@ -38,230 +38,346 @@
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" City Name Type Package Variety Sub Variety Grade Date \\\n",
"0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \n",
"1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n",
"2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n",
"3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n",
"4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n",
"\n",
" Low Price High Price Mostly Low ... Unit of Sale Quality Condition \\\n",
"0 270.0 280.0 270.0 ... NaN NaN NaN \n",
"1 270.0 280.0 270.0 ... NaN NaN NaN \n",
"2 160.0 160.0 160.0 ... NaN NaN NaN \n",
"3 160.0 160.0 160.0 ... NaN NaN NaN \n",
"4 90.0 100.0 90.0 ... NaN NaN NaN \n",
"\n",
" Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n",
"0 NaN NaN NaN E NaN NaN NaN \n",
"1 NaN NaN NaN E NaN NaN NaN \n",
"2 NaN NaN NaN N NaN NaN NaN \n",
"3 NaN NaN NaN N NaN NaN NaN \n",
"4 NaN NaN NaN N NaN NaN NaN \n",
"\n",
"[5 rows x 26 columns]"
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>City Name</th>\n <th>Type</th>\n <th>Package</th>\n <th>Variety</th>\n <th>Sub Variety</th>\n <th>Grade</th>\n <th>Date</th>\n <th>Low Price</th>\n <th>High Price</th>\n <th>Mostly Low</th>\n <th>...</th>\n <th>Unit of Sale</th>\n <th>Quality</th>\n <th>Condition</th>\n <th>Appearance</th>\n <th>Storage</th>\n <th>Crop</th>\n <th>Repack</th>\n <th>Trans Mode</th>\n <th>Unnamed: 24</th>\n <th>Unnamed: 25</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>BALTIMORE</td>\n <td>NaN</td>\n <td>24 inch bins</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>4/29/17</td>\n <td>270.0</td>\n <td>280.0</td>\n <td>270.0</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>E</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>1</th>\n <td>BALTIMORE</td>\n <td>NaN</td>\n <td>24 inch bins</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>5/6/17</td>\n <td>270.0</td>\n <td>280.0</td>\n <td>270.0</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>E</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>BALTIMORE</td>\n <td>NaN</td>\n <td>24 inch bins</td>\n <td>HOWDEN TYPE</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>9/24/16</td>\n <td>160.0</td>\n <td>160.0</td>\n <td>160.0</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>N</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>3</th>\n <td>BALTIMORE</td>\n <td>NaN</td>\n <td>24 inch bins</td>\n <td>HOWDEN TYPE</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>9/24/16</td>\n <td>160.0</td>\n <td>160.0</td>\n <td>160.0</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>N</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>4</th>\n <td>BALTIMORE</td>\n <td>NaN</td>\n <td>24 inch bins</td>\n <td>HOWDEN TYPE</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>11/5/16</td>\n <td>90.0</td>\n <td>100.0</td>\n <td>90.0</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>N</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n<p>5 rows × 26 columns</p>\n</div>"
},
"execution_count": null,
"metadata": {},
"execution_count": 22
}
],
"outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"\n",
"# Use the pumpkin data from Lesso\n",
"\n",
"pumpkins = pd.read_csv('../../../Regression/data/US-pumpkins.csv')\n",
"\n",
"pumpkins.head()\n"
"ufos = pd.read_csv('../data/ufos.csv')\n",
"ufos.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 23 ,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Package Low Price High Price Price\n",
"70 0 5 3 13.636364\n",
"71 0 10 7 16.363636\n",
"72 0 10 7 16.363636\n",
"73 0 9 6 15.454545\n",
"74 0 5 3 13.636364"
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Package</th>\n <th>Low Price</th>\n <th>High Price</th>\n <th>Price</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>70</th>\n <td>0</td>\n <td>5</td>\n <td>3</td>\n <td>13.636364</td>\n </tr>\n <tr>\n <th>71</th>\n <td>0</td>\n <td>10</td>\n <td>7</td>\n <td>16.363636</td>\n </tr>\n <tr>\n <th>72</th>\n <td>0</td>\n <td>10</td>\n <td>7</td>\n <td>16.363636</td>\n </tr>\n <tr>\n <th>73</th>\n <td>0</td>\n <td>9</td>\n <td>6</td>\n <td>15.454545</td>\n </tr>\n <tr>\n <th>74</th>\n <td>0</td>\n <td>5</td>\n <td>3</td>\n <td>13.636364</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {},
"execution_count": 23
"output_type": "stream",
"name": "stdout",
"text": [
"<class 'pandas.core.frame.DataFrame'>\nInt64Index: 70662 entries, 0 to 80331\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 70662 non-null float64\n 1 Country 70662 non-null int64 \n 2 Latitude 70662 non-null float64\n 3 Longitude 70662 non-null float64\ndtypes: float64(3), int64(1)\nmemory usage: 2.7 MB\n"
]
}
],
"source": [
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n",
"\n",
"new_columns = ['Package', 'Low Price', 'High Price']\n",
"ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n",
"\n",
"pumpkins = pumpkins.drop([c for c in pumpkins.columns if c not in new_columns], axis=1 )\n",
"ufos.dropna(inplace=True)\n",
"\n",
"## price is the average of low and high prices \n",
"ufos['Country'] = LabelEncoder().fit_transform(ufos['Country']) \n",
"\n",
"price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2 \n",
"# only view seconds 1-60 \n",
"\n",
"new_pumpkins = pd.DataFrame({ 'Package': pumpkins['Package'], 'Low Price': pumpkins['Low Price'],'High Price': pumpkins['High Price'], 'Price': price}) \n",
"ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)] \n",
"\n",
"new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1 \n",
"ufos.info() \n",
"\n",
"new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2\n",
"\n",
"new_pumpkins.iloc[:, 0:-1] = new_pumpkins.iloc[:, 0:-1].apply(LabelEncoder().fit_transform)\n",
"\n",
"new_pumpkins.head()\n"
"\n"
]
},
{
"cell_type": "code",
"execution_count": 24 ,
"execution_count": 3 ,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"<class 'pandas.core.frame.DataFrame'>\nInt64Index: 415 entries, 70 to 1742\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Package 415 non-null int64 \n 1 Low Price 415 non-null int64 \n 2 High Price 415 non-null int64 \n 3 Price 415 non-null float64\ndtypes: float64(1), int64(3)\nmemory usage: 16.2 KB\n"
]
}
],
"outputs": [],
"source": [
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"new_pumpkins.dropna(inplace=True)\n",
"new_pumpkins.info()\n",
"new_columns = ['Seconds','Latitude','Longitude','Country']\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Package Price\n",
"70 0 13.636364\n",
"71 0 16.363636\n",
"72 0 16.363636\n",
"73 0 15.454545\n",
"74 0 13.636364\n",
"... ... ...\n",
"1738 2 30.000000\n",
"1739 2 28.750000\n",
"1740 2 25.750000\n",
"1741 2 24.000000\n",
"1742 2 24.000000\n",
"new_ufos = ufos.drop([c for c in ufos.columns if c not in new_columns], axis=1)\n",
"\n",
"[415 rows x 2 columns]"
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Package</th>\n <th>Price</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>70</th>\n <td>0</td>\n <td>13.636364</td>\n </tr>\n <tr>\n <th>71</th>\n <td>0</td>\n <td>16.363636</td>\n </tr>\n <tr>\n <th>72</th>\n <td>0</td>\n <td>16.363636</td>\n </tr>\n <tr>\n <th>73</th>\n <td>0</td>\n <td>15.454545</td>\n </tr>\n <tr>\n <th>74</th>\n <td>0</td>\n <td>13.636364</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>1738</th>\n <td>2</td>\n <td>30.000000</td>\n </tr>\n <tr>\n <th>1739</th>\n <td>2</td>\n <td>28.750000</td>\n </tr>\n <tr>\n <th>1740</th>\n <td>2</td>\n <td>25.750000</td>\n </tr>\n <tr>\n <th>1741</th>\n <td>2</td>\n <td>24.000000</td>\n </tr>\n <tr>\n <th>1742</th>\n <td>2</td>\n <td>24.000000</td>\n </tr>\n </tbody>\n</table>\n<p>415 rows × 2 columns</p>\n</div>"
},
"metadata": {},
"execution_count": 25
}
],
"source": [
"new_columns = ['Package', 'Price']\n",
"lin_pumpkins = new_pumpkins.drop([c for c in new_pumpkins.columns if c not in new_columns], axis='columns')\n",
"new_ufos.dropna(inplace=True)\n",
"\n",
"lin_pumpkins\n "
"new_ufos = new_ufos.apply(LabelEncoder().fit_transform)"
]
},
{
"source": [
"Set X and y arrays to correspond to Package and Price"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 26 ,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"X = lin_pumpkins.values[:, :1]\n",
"y = lin_pumpkins.values[:, 1:2]\n"
"from sklearn.model_selection import train_test_split\n",
"\n",
"Selected_features = ['Latitude','Longitude','Country']\n",
"\n",
"X = new_ufos[Selected_features]\n",
"y = new_ufos['Seconds']\n",
"\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 7,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout ",
"name": "stderr ",
"text": [
"Model Accuracy: 0.3315342327998989\n"
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n",
" precision recall f1-score support\n",
"\n",
" 4 0.00 0.00 0.00 1\n",
" 10 0.00 0.00 0.00 11\n",
" 12 0.00 0.00 0.00 1\n",
" 13 0.00 0.00 0.00 1\n",
" 15 0.00 0.00 0.00 131\n",
" 17 0.00 0.00 0.00 1\n",
" 18 0.00 0.00 0.00 19\n",
" 21 0.00 0.00 0.00 410\n",
" 22 0.00 0.00 0.00 3\n",
" 23 0.00 0.00 0.00 284\n",
" 25 0.00 0.00 0.00 1\n",
" 26 0.00 0.00 0.00 1\n",
" 27 0.00 0.00 0.00 146\n",
" 29 0.00 0.00 0.00 555\n",
" 31 0.00 0.00 0.00 84\n",
" 33 0.00 0.00 0.00 86\n",
" 34 0.00 0.00 0.00 85\n",
" 35 0.00 0.00 0.00 8\n",
" 36 0.00 0.00 0.00 569\n",
" 37 0.00 0.00 0.00 3\n",
" 38 0.00 0.00 0.00 39\n",
" 39 0.00 0.00 0.00 5\n",
" 40 0.00 0.00 0.00 4\n",
" 41 0.00 0.00 0.00 352\n",
" 42 0.00 0.00 0.00 1\n",
" 43 0.00 0.00 0.00 4\n",
" 44 0.00 0.00 0.00 2\n",
" 45 0.00 0.00 0.00 1\n",
" 46 0.00 0.00 0.00 360\n",
" 49 0.00 0.00 0.00 2\n",
" 50 0.00 0.00 0.00 2\n",
" 52 0.00 0.00 0.00 60\n",
" 53 0.00 0.00 0.00 1\n",
" 55 0.00 0.00 0.00 1\n",
" 56 0.00 0.00 0.00 1\n",
" 57 0.00 0.00 0.00 634\n",
" 60 0.00 0.00 0.00 1\n",
" 61 0.00 0.00 0.00 2\n",
" 62 0.00 0.00 0.00 20\n",
" 65 0.00 0.00 0.00 3\n",
" 67 0.00 0.00 0.00 83\n",
" 71 0.00 0.00 0.00 171\n",
" 75 0.00 0.00 0.00 19\n",
" 79 0.00 0.00 0.00 5\n",
" 83 0.00 0.00 0.00 1005\n",
" 86 0.00 0.00 0.00 1\n",
" 88 0.00 0.00 0.00 1\n",
" 94 0.00 0.00 0.00 1\n",
" 95 0.00 0.00 0.00 1\n",
" 97 0.00 0.00 0.00 7\n",
" 99 0.00 0.00 0.00 2\n",
" 102 0.00 0.00 0.00 2\n",
" 104 0.00 0.00 0.00 1\n",
" 105 0.00 0.00 0.00 5\n",
" 107 0.00 0.00 0.00 1\n",
" 109 0.00 0.00 0.00 1\n",
" 110 0.00 0.00 0.00 137\n",
" 111 0.00 0.00 0.00 1\n",
" 113 0.00 0.00 0.00 1\n",
" 114 0.00 0.00 0.00 1\n",
" 117 0.00 0.00 0.00 4\n",
" 122 0.00 0.00 0.00 1270\n",
" 123 0.00 0.00 0.00 1\n",
" 124 0.00 0.00 0.00 1\n",
" 129 0.00 0.00 0.00 1\n",
" 130 0.00 0.00 0.00 1\n",
" 133 0.00 0.00 0.00 9\n",
" 135 0.00 0.00 0.00 1\n",
" 137 0.00 0.00 0.00 2\n",
" 140 0.00 0.00 0.00 862\n",
" 145 0.00 0.00 0.00 2\n",
" 150 0.00 0.00 0.00 2\n",
" 154 0.00 0.00 0.00 375\n",
" 155 0.00 0.00 0.00 1\n",
" 161 0.00 0.00 0.00 1\n",
" 163 0.00 0.00 0.00 1\n",
" 168 0.11 1.00 0.19 1523\n",
" 169 0.00 0.00 0.00 1\n",
" 172 0.00 0.00 0.00 1\n",
" 174 0.00 0.00 0.00 1\n",
" 178 0.00 0.00 0.00 2\n",
" 180 0.00 0.00 0.00 108\n",
" 181 0.00 0.00 0.00 1\n",
" 184 0.00 0.00 0.00 1\n",
" 188 0.00 0.00 0.00 149\n",
" 193 0.00 0.00 0.00 1\n",
" 196 0.00 0.00 0.00 136\n",
" 197 0.00 0.00 0.00 1\n",
" 200 0.00 0.00 0.00 16\n",
" 202 0.00 0.00 0.00 1\n",
" 207 0.00 0.00 0.00 1130\n",
" 215 0.00 0.00 0.00 1\n",
" 220 0.00 0.00 0.00 5\n",
" 223 0.00 0.00 0.00 1\n",
" 226 0.00 0.00 0.00 44\n",
" 228 0.00 0.00 0.00 1\n",
" 233 0.00 0.00 0.00 10\n",
" 237 0.00 0.00 0.00 5\n",
" 239 0.00 0.00 0.00 736\n",
" 241 0.00 0.00 0.00 4\n",
" 245 0.00 0.00 0.00 1\n",
" 248 0.00 0.00 0.00 1\n",
" 251 0.00 0.00 0.00 8\n",
" 258 0.00 0.00 0.00 11\n",
" 265 0.00 0.00 0.00 1\n",
" 267 0.00 0.00 0.00 5\n",
" 268 0.00 0.00 0.00 1\n",
" 273 0.00 0.00 0.00 1\n",
" 275 0.00 0.00 0.00 542\n",
" 277 0.00 0.00 0.00 1\n",
" 279 0.00 0.00 0.00 1\n",
" 281 0.00 0.00 0.00 1\n",
" 284 0.00 0.00 0.00 1\n",
" 287 0.00 0.00 0.00 1\n",
" 288 0.00 0.00 0.00 4\n",
" 290 0.00 0.00 0.00 1\n",
" 293 0.00 0.00 0.00 1\n",
" 298 0.00 0.00 0.00 1\n",
" 300 0.00 0.00 0.00 1\n",
" 301 0.00 0.00 0.00 1\n",
" 302 0.00 0.00 0.00 2\n",
" 305 0.00 0.00 0.00 3\n",
" 306 0.00 0.00 0.00 3\n",
" 308 0.00 0.00 0.00 1\n",
" 309 0.00 0.00 0.00 1\n",
" 313 0.00 0.00 0.00 2\n",
" 314 0.00 0.00 0.00 1\n",
" 316 0.00 0.00 0.00 1\n",
" 317 0.00 0.00 0.00 8\n",
" 324 0.00 0.00 0.00 1\n",
" 330 0.00 0.00 0.00 2\n",
" 331 0.00 0.00 0.00 85\n",
" 332 0.00 0.00 0.00 2\n",
" 333 0.00 0.00 0.00 3\n",
" 334 0.00 0.00 0.00 2\n",
" 336 0.00 0.00 0.00 1\n",
" 337 0.00 0.00 0.00 1\n",
" 338 0.00 0.00 0.00 468\n",
" 339 0.00 0.00 0.00 1\n",
" 343 0.00 0.00 0.00 2\n",
" 345 0.00 0.00 0.00 1\n",
" 346 0.00 0.00 0.00 2\n",
" 347 0.00 0.00 0.00 42\n",
" 348 0.00 0.00 0.00 1\n",
" 350 0.00 0.00 0.00 2\n",
" 353 0.00 0.00 0.00 62\n",
" 355 0.00 0.00 0.00 1\n",
" 359 0.00 0.00 0.00 165\n",
" 364 0.00 0.00 0.00 14\n",
" 365 0.00 0.00 0.00 1\n",
" 366 0.00 0.00 0.00 1\n",
" 368 0.00 0.00 0.00 1\n",
" 372 0.00 0.00 0.00 1\n",
" 373 0.00 0.00 0.00 1\n",
" 375 0.00 0.00 0.00 366\n",
" 377 0.00 0.00 0.00 1\n",
" 381 0.00 0.00 0.00 3\n",
" 383 0.00 0.00 0.00 3\n",
" 384 0.00 0.00 0.00 1\n",
" 387 0.00 0.00 0.00 12\n",
" 390 0.00 0.00 0.00 5\n",
" 392 0.00 0.00 0.00 1\n",
" 394 0.00 0.00 0.00 48\n",
" 395 0.00 0.00 0.00 1\n",
" 397 0.00 0.00 0.00 2\n",
" 398 0.00 0.00 0.00 5\n",
" 400 0.00 0.00 0.00 1\n",
" 404 0.00 0.00 0.00 202\n",
" 405 0.00 0.00 0.00 1\n",
" 409 0.00 0.00 0.00 1\n",
" 410 0.00 0.00 0.00 1\n",
" 416 0.00 0.00 0.00 10\n",
" 417 0.00 0.00 0.00 1\n",
" 418 0.00 0.00 0.00 1\n",
" 419 0.00 0.00 0.00 1\n",
" 420 0.00 0.00 0.00 92\n",
" 423 0.00 0.00 0.00 2\n",
" 424 0.00 0.00 0.00 44\n",
" 427 0.00 0.00 0.00 4\n",
" 428 0.00 0.00 0.00 28\n",
" 431 0.00 0.00 0.00 11\n",
" 433 0.00 0.00 0.00 3\n",
" 434 0.00 0.00 0.00 6\n",
" 436 0.00 0.00 0.00 1\n",
" 438 0.00 0.00 0.00 14\n",
" 441 0.00 0.00 0.00 3\n",
" 443 0.00 0.00 0.00 1\n",
" 444 0.00 0.00 0.00 2\n",
" 450 0.00 0.00 0.00 5\n",
" 451 0.00 0.00 0.00 4\n",
" 453 0.00 0.00 0.00 1\n",
" 456 0.00 0.00 0.00 6\n",
" 459 0.00 0.00 0.00 4\n",
" 460 0.00 0.00 0.00 2\n",
" 462 0.00 0.00 0.00 2\n",
" 463 0.00 0.00 0.00 2\n",
" 465 0.00 0.00 0.00 1\n",
" 466 0.00 0.00 0.00 1\n",
" 469 0.00 0.00 0.00 1\n",
" 470 0.00 0.00 0.00 1\n",
" 474 0.00 0.00 0.00 1\n",
"\n",
" accuracy 0.11 14133\n",
" macro avg 0.00 0.00 0.00 14133\n",
"weighted avg 0.01 0.11 0.02 14133\n",
"\n",
"Predicted labels: [168 168 168 ... 168 168 168]\n",
"Accuracy: 0.10776197551829053\n",
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n",
" 'precision', 'predicted', average, warn_for)\n"
]
}
],
"source": [
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score, classification_report \n",
"from sklearn.linear_model import LogisticRegression\n",
"model = LogisticRegression()\n",
"model.fit(X_train, y_train)\n",
"predictions = model.predict(X_test)\n",
"\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n",
"lin_reg = LinearRegression()\n",
"lin_reg.fit(X_train,y_train)\n",
"\n",
"pred = lin_reg.predict(X_test)\n",
"\n",
"accuracy_score = lin_reg.score(X_train,y_train)\n",
"print('Model Accuracy: ', accuracy_score)"
"print(classification_report(y_test, predictions))\n",
"print('Predicted labels: ', predictions)\n",
"print('Accuracy: ', accuracy_score(y_test, predictions))\n"
]
},
{
"cell_type": "code",
"execution_count": 41,
"execution_count": 10,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[[33.627655]]\n"
"[59 ]\n"
]
}
],
"source": [
"import pickle\n",
"s = pickle.dumps(lin_reg)\n",
"model_filename = 'lin-reg-model.pkl'\n",
"s = pickle.dumps(model )\n",
"model_filename = 'ufo -model.pkl'\n",
"# Open the file to save as pkl file\n",
"pickle.dump(lin_reg, open(model_filename,'wb'))\n",
"\n",
"model = pickle.load(open('lin-reg-model.pkl','rb'))\n",
"print(model.predict([[2.85]]))\n",
"pickle.dump(model, open(model_filename,'wb'))\n",
"\n",
"# Close the pickle instances\n",
"# clf2 = pickle.loads(s)\n",
"# clf2.predict([[2.75]])\n"
"model = pickle.load(open('ufo-model.pkl','rb'))\n",
"print(model.predict([[1,2,3]]))\n"
]
},
{