@ -38,230 +38,346 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 22 ,
"execution_count": null ,
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [],
{
"output_type": "execute_result",
"data": {
"text/plain": [
" City Name Type Package Variety Sub Variety Grade Date \\\n",
"0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \n",
"1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n",
"2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n",
"3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n",
"4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n",
"\n",
" Low Price High Price Mostly Low ... Unit of Sale Quality Condition \\\n",
"0 270.0 280.0 270.0 ... NaN NaN NaN \n",
"1 270.0 280.0 270.0 ... NaN NaN NaN \n",
"2 160.0 160.0 160.0 ... NaN NaN NaN \n",
"3 160.0 160.0 160.0 ... NaN NaN NaN \n",
"4 90.0 100.0 90.0 ... NaN NaN NaN \n",
"\n",
" Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n",
"0 NaN NaN NaN E NaN NaN NaN \n",
"1 NaN NaN NaN E NaN NaN NaN \n",
"2 NaN NaN NaN N NaN NaN NaN \n",
"3 NaN NaN NaN N NaN NaN NaN \n",
"4 NaN NaN NaN N NaN NaN NaN \n",
"\n",
"[5 rows x 26 columns]"
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>City Name</th>\n <th>Type</th>\n <th>Package</th>\n <th>Variety</th>\n <th>Sub Variety</th>\n <th>Grade</th>\n <th>Date</th>\n <th>Low Price</th>\n <th>High Price</th>\n <th>Mostly Low</th>\n <th>...</th>\n <th>Unit of Sale</th>\n <th>Quality</th>\n <th>Condition</th>\n <th>Appearance</th>\n <th>Storage</th>\n <th>Crop</th>\n <th>Repack</th>\n <th>Trans Mode</th>\n <th>Unnamed: 24</th>\n <th>Unnamed: 25</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>BALTIMORE</td>\n <td>NaN</td>\n <td>24 inch bins</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>4/29/17</td>\n <td>270.0</td>\n <td>280.0</td>\n <td>270.0</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>E</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>1</th>\n <td>BALTIMORE</td>\n <td>NaN</td>\n <td>24 inch bins</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>5/6/17</td>\n <td>270.0</td>\n <td>280.0</td>\n <td>270.0</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>E</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>BALTIMORE</td>\n <td>NaN</td>\n <td>24 inch bins</td>\n <td>HOWDEN TYPE</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>9/24/16</td>\n <td>160.0</td>\n <td>160.0</td>\n <td>160.0</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>N</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>3</th>\n <td>BALTIMORE</td>\n <td>NaN</td>\n <td>24 inch bins</td>\n <td>HOWDEN TYPE</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>9/24/16</td>\n <td>160.0</td>\n <td>160.0</td>\n <td>160.0</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>N</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>4</th>\n <td>BALTIMORE</td>\n <td>NaN</td>\n <td>24 inch bins</td>\n <td>HOWDEN TYPE</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>11/5/16</td>\n <td>90.0</td>\n <td>100.0</td>\n <td>90.0</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>N</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n<p>5 rows × 26 columns</p>\n</div>"
},
"metadata": {},
"execution_count": 22
}
],
"source": [
"source": [
"import pandas as pd\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import numpy as np\n",
"\n",
"\n",
"# Use the pumpkin data from Lesso\n",
"ufos = pd.read_csv('../data/ufos.csv')\n",
"\n",
"ufos.head()\n"
"pumpkins = pd.read_csv('../../../Regression/data/US-pumpkins.csv')\n",
"\n",
"pumpkins.head()\n"
]
]
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 23 ,
"execution_count": 2,
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
{
{
"output_type": "execute_result",
"output_type": "stream",
"data": {
"name": "stdout",
"text/plain": [
"text": [
" Package Low Price High Price Price\n",
"<class 'pandas.core.frame.DataFrame'>\nInt64Index: 70662 entries, 0 to 80331\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 70662 non-null float64\n 1 Country 70662 non-null int64 \n 2 Latitude 70662 non-null float64\n 3 Longitude 70662 non-null float64\ndtypes: float64(3), int64(1)\nmemory usage: 2.7 MB\n"
"70 0 5 3 13.636364\n",
]
"71 0 10 7 16.363636\n",
"72 0 10 7 16.363636\n",
"73 0 9 6 15.454545\n",
"74 0 5 3 13.636364"
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Package</th>\n <th>Low Price</th>\n <th>High Price</th>\n <th>Price</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>70</th>\n <td>0</td>\n <td>5</td>\n <td>3</td>\n <td>13.636364</td>\n </tr>\n <tr>\n <th>71</th>\n <td>0</td>\n <td>10</td>\n <td>7</td>\n <td>16.363636</td>\n </tr>\n <tr>\n <th>72</th>\n <td>0</td>\n <td>10</td>\n <td>7</td>\n <td>16.363636</td>\n </tr>\n <tr>\n <th>73</th>\n <td>0</td>\n <td>9</td>\n <td>6</td>\n <td>15.454545</td>\n </tr>\n <tr>\n <th>74</th>\n <td>0</td>\n <td>5</td>\n <td>3</td>\n <td>13.636364</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {},
"execution_count": 23
}
}
],
],
"source": [
"source": [
"from sklearn.preprocessing import LabelEncoder\n",
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"\n",
"pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n",
"ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n",
"\n",
"new_columns = ['Package', 'Low Price', 'High Price']\n",
"\n",
"pumpkins = pumpkins.drop([c for c in pumpkins.columns if c not in new_columns], axis=1)\n",
"\n",
"\n",
"## price is the average of low and high prices \n",
"ufos.dropna(inplace=True)\n",
"\n",
"\n",
"price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2 \n",
"ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n",
"\n",
"\n",
"new_pumpkins = pd.DataFrame({ 'Package': pumpkins['Package'], 'Low Price': pumpkins['Low Price'],'High Price': pumpkins['High Price'], 'Price': price}) \n",
"# only view seconds 1-60\n",
"\n",
"\n",
"new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1 \n",
"ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)] \n",
"\n",
"\n",
"new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2 \n",
"ufos.info() \n",
"\n",
"\n",
"new_pumpkins.iloc[:, 0:-1] = new_pumpkins.iloc[:, 0:-1].apply(LabelEncoder().fit_transform)\n",
"\n"
"\n",
"new_pumpkins.head()\n"
]
]
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 24 ,
"execution_count": 3 ,
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [],
{
"output_type": "stream",
"name": "stdout",
"text": [
"<class 'pandas.core.frame.DataFrame'>\nInt64Index: 415 entries, 70 to 1742\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Package 415 non-null int64 \n 1 Low Price 415 non-null int64 \n 2 High Price 415 non-null int64 \n 3 Price 415 non-null float64\ndtypes: float64(1), int64(3)\nmemory usage: 16.2 KB\n"
]
}
],
"source": [
"source": [
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"\n",
"new_pumpkins.dropna(inplace=True)\n",
"new_columns = ['Seconds','Latitude','Longitude','Country']\n",
"new_pumpkins.info()\n",
"\n",
"\n",
"\n"
"new_ufos = ufos.drop([c for c in ufos.columns if c not in new_columns], axis=1)\n",
]
"\n",
},
"new_ufos.dropna(inplace=True)\n",
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Package Price\n",
"70 0 13.636364\n",
"71 0 16.363636\n",
"72 0 16.363636\n",
"73 0 15.454545\n",
"74 0 13.636364\n",
"... ... ...\n",
"1738 2 30.000000\n",
"1739 2 28.750000\n",
"1740 2 25.750000\n",
"1741 2 24.000000\n",
"1742 2 24.000000\n",
"\n",
"[415 rows x 2 columns]"
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Package</th>\n <th>Price</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>70</th>\n <td>0</td>\n <td>13.636364</td>\n </tr>\n <tr>\n <th>71</th>\n <td>0</td>\n <td>16.363636</td>\n </tr>\n <tr>\n <th>72</th>\n <td>0</td>\n <td>16.363636</td>\n </tr>\n <tr>\n <th>73</th>\n <td>0</td>\n <td>15.454545</td>\n </tr>\n <tr>\n <th>74</th>\n <td>0</td>\n <td>13.636364</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>1738</th>\n <td>2</td>\n <td>30.000000</td>\n </tr>\n <tr>\n <th>1739</th>\n <td>2</td>\n <td>28.750000</td>\n </tr>\n <tr>\n <th>1740</th>\n <td>2</td>\n <td>25.750000</td>\n </tr>\n <tr>\n <th>1741</th>\n <td>2</td>\n <td>24.000000</td>\n </tr>\n <tr>\n <th>1742</th>\n <td>2</td>\n <td>24.000000</td>\n </tr>\n </tbody>\n</table>\n<p>415 rows × 2 columns</p>\n</div>"
},
"metadata": {},
"execution_count": 25
}
],
"source": [
"new_columns = ['Package', 'Price']\n",
"lin_pumpkins = new_pumpkins.drop([c for c in new_pumpkins.columns if c not in new_columns], axis='columns')\n",
"\n",
"\n",
"lin_pumpkins\n "
"new_ufos = new_ufos.apply(LabelEncoder().fit_transform)"
]
]
},
},
{
"source": [
"Set X and y arrays to correspond to Package and Price"
],
"cell_type": "markdown",
"metadata": {}
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 26 ,
"execution_count": 4,
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"X = lin_pumpkins.values[:, :1]\n",
"from sklearn.model_selection import train_test_split\n",
"y = lin_pumpkins.values[:, 1:2]\n"
"\n",
"Selected_features = ['Latitude','Longitude','Country']\n",
"\n",
"X = new_ufos[Selected_features]\n",
"y = new_ufos['Seconds']\n",
"\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)"
]
]
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 27,
"execution_count": 7,
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
{
{
"output_type": "stream",
"output_type": "stream",
"name": "stdout ",
"name": "stderr",
"text": [
"text": [
"Model Accuracy: 0.3315342327998989\n"
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n",
" precision recall f1-score support\n",
"\n",
" 4 0.00 0.00 0.00 1\n",
" 10 0.00 0.00 0.00 11\n",
" 12 0.00 0.00 0.00 1\n",
" 13 0.00 0.00 0.00 1\n",
" 15 0.00 0.00 0.00 131\n",
" 17 0.00 0.00 0.00 1\n",
" 18 0.00 0.00 0.00 19\n",
" 21 0.00 0.00 0.00 410\n",
" 22 0.00 0.00 0.00 3\n",
" 23 0.00 0.00 0.00 284\n",
" 25 0.00 0.00 0.00 1\n",
" 26 0.00 0.00 0.00 1\n",
" 27 0.00 0.00 0.00 146\n",
" 29 0.00 0.00 0.00 555\n",
" 31 0.00 0.00 0.00 84\n",
" 33 0.00 0.00 0.00 86\n",
" 34 0.00 0.00 0.00 85\n",
" 35 0.00 0.00 0.00 8\n",
" 36 0.00 0.00 0.00 569\n",
" 37 0.00 0.00 0.00 3\n",
" 38 0.00 0.00 0.00 39\n",
" 39 0.00 0.00 0.00 5\n",
" 40 0.00 0.00 0.00 4\n",
" 41 0.00 0.00 0.00 352\n",
" 42 0.00 0.00 0.00 1\n",
" 43 0.00 0.00 0.00 4\n",
" 44 0.00 0.00 0.00 2\n",
" 45 0.00 0.00 0.00 1\n",
" 46 0.00 0.00 0.00 360\n",
" 49 0.00 0.00 0.00 2\n",
" 50 0.00 0.00 0.00 2\n",
" 52 0.00 0.00 0.00 60\n",
" 53 0.00 0.00 0.00 1\n",
" 55 0.00 0.00 0.00 1\n",
" 56 0.00 0.00 0.00 1\n",
" 57 0.00 0.00 0.00 634\n",
" 60 0.00 0.00 0.00 1\n",
" 61 0.00 0.00 0.00 2\n",
" 62 0.00 0.00 0.00 20\n",
" 65 0.00 0.00 0.00 3\n",
" 67 0.00 0.00 0.00 83\n",
" 71 0.00 0.00 0.00 171\n",
" 75 0.00 0.00 0.00 19\n",
" 79 0.00 0.00 0.00 5\n",
" 83 0.00 0.00 0.00 1005\n",
" 86 0.00 0.00 0.00 1\n",
" 88 0.00 0.00 0.00 1\n",
" 94 0.00 0.00 0.00 1\n",
" 95 0.00 0.00 0.00 1\n",
" 97 0.00 0.00 0.00 7\n",
" 99 0.00 0.00 0.00 2\n",
" 102 0.00 0.00 0.00 2\n",
" 104 0.00 0.00 0.00 1\n",
" 105 0.00 0.00 0.00 5\n",
" 107 0.00 0.00 0.00 1\n",
" 109 0.00 0.00 0.00 1\n",
" 110 0.00 0.00 0.00 137\n",
" 111 0.00 0.00 0.00 1\n",
" 113 0.00 0.00 0.00 1\n",
" 114 0.00 0.00 0.00 1\n",
" 117 0.00 0.00 0.00 4\n",
" 122 0.00 0.00 0.00 1270\n",
" 123 0.00 0.00 0.00 1\n",
" 124 0.00 0.00 0.00 1\n",
" 129 0.00 0.00 0.00 1\n",
" 130 0.00 0.00 0.00 1\n",
" 133 0.00 0.00 0.00 9\n",
" 135 0.00 0.00 0.00 1\n",
" 137 0.00 0.00 0.00 2\n",
" 140 0.00 0.00 0.00 862\n",
" 145 0.00 0.00 0.00 2\n",
" 150 0.00 0.00 0.00 2\n",
" 154 0.00 0.00 0.00 375\n",
" 155 0.00 0.00 0.00 1\n",
" 161 0.00 0.00 0.00 1\n",
" 163 0.00 0.00 0.00 1\n",
" 168 0.11 1.00 0.19 1523\n",
" 169 0.00 0.00 0.00 1\n",
" 172 0.00 0.00 0.00 1\n",
" 174 0.00 0.00 0.00 1\n",
" 178 0.00 0.00 0.00 2\n",
" 180 0.00 0.00 0.00 108\n",
" 181 0.00 0.00 0.00 1\n",
" 184 0.00 0.00 0.00 1\n",
" 188 0.00 0.00 0.00 149\n",
" 193 0.00 0.00 0.00 1\n",
" 196 0.00 0.00 0.00 136\n",
" 197 0.00 0.00 0.00 1\n",
" 200 0.00 0.00 0.00 16\n",
" 202 0.00 0.00 0.00 1\n",
" 207 0.00 0.00 0.00 1130\n",
" 215 0.00 0.00 0.00 1\n",
" 220 0.00 0.00 0.00 5\n",
" 223 0.00 0.00 0.00 1\n",
" 226 0.00 0.00 0.00 44\n",
" 228 0.00 0.00 0.00 1\n",
" 233 0.00 0.00 0.00 10\n",
" 237 0.00 0.00 0.00 5\n",
" 239 0.00 0.00 0.00 736\n",
" 241 0.00 0.00 0.00 4\n",
" 245 0.00 0.00 0.00 1\n",
" 248 0.00 0.00 0.00 1\n",
" 251 0.00 0.00 0.00 8\n",
" 258 0.00 0.00 0.00 11\n",
" 265 0.00 0.00 0.00 1\n",
" 267 0.00 0.00 0.00 5\n",
" 268 0.00 0.00 0.00 1\n",
" 273 0.00 0.00 0.00 1\n",
" 275 0.00 0.00 0.00 542\n",
" 277 0.00 0.00 0.00 1\n",
" 279 0.00 0.00 0.00 1\n",
" 281 0.00 0.00 0.00 1\n",
" 284 0.00 0.00 0.00 1\n",
" 287 0.00 0.00 0.00 1\n",
" 288 0.00 0.00 0.00 4\n",
" 290 0.00 0.00 0.00 1\n",
" 293 0.00 0.00 0.00 1\n",
" 298 0.00 0.00 0.00 1\n",
" 300 0.00 0.00 0.00 1\n",
" 301 0.00 0.00 0.00 1\n",
" 302 0.00 0.00 0.00 2\n",
" 305 0.00 0.00 0.00 3\n",
" 306 0.00 0.00 0.00 3\n",
" 308 0.00 0.00 0.00 1\n",
" 309 0.00 0.00 0.00 1\n",
" 313 0.00 0.00 0.00 2\n",
" 314 0.00 0.00 0.00 1\n",
" 316 0.00 0.00 0.00 1\n",
" 317 0.00 0.00 0.00 8\n",
" 324 0.00 0.00 0.00 1\n",
" 330 0.00 0.00 0.00 2\n",
" 331 0.00 0.00 0.00 85\n",
" 332 0.00 0.00 0.00 2\n",
" 333 0.00 0.00 0.00 3\n",
" 334 0.00 0.00 0.00 2\n",
" 336 0.00 0.00 0.00 1\n",
" 337 0.00 0.00 0.00 1\n",
" 338 0.00 0.00 0.00 468\n",
" 339 0.00 0.00 0.00 1\n",
" 343 0.00 0.00 0.00 2\n",
" 345 0.00 0.00 0.00 1\n",
" 346 0.00 0.00 0.00 2\n",
" 347 0.00 0.00 0.00 42\n",
" 348 0.00 0.00 0.00 1\n",
" 350 0.00 0.00 0.00 2\n",
" 353 0.00 0.00 0.00 62\n",
" 355 0.00 0.00 0.00 1\n",
" 359 0.00 0.00 0.00 165\n",
" 364 0.00 0.00 0.00 14\n",
" 365 0.00 0.00 0.00 1\n",
" 366 0.00 0.00 0.00 1\n",
" 368 0.00 0.00 0.00 1\n",
" 372 0.00 0.00 0.00 1\n",
" 373 0.00 0.00 0.00 1\n",
" 375 0.00 0.00 0.00 366\n",
" 377 0.00 0.00 0.00 1\n",
" 381 0.00 0.00 0.00 3\n",
" 383 0.00 0.00 0.00 3\n",
" 384 0.00 0.00 0.00 1\n",
" 387 0.00 0.00 0.00 12\n",
" 390 0.00 0.00 0.00 5\n",
" 392 0.00 0.00 0.00 1\n",
" 394 0.00 0.00 0.00 48\n",
" 395 0.00 0.00 0.00 1\n",
" 397 0.00 0.00 0.00 2\n",
" 398 0.00 0.00 0.00 5\n",
" 400 0.00 0.00 0.00 1\n",
" 404 0.00 0.00 0.00 202\n",
" 405 0.00 0.00 0.00 1\n",
" 409 0.00 0.00 0.00 1\n",
" 410 0.00 0.00 0.00 1\n",
" 416 0.00 0.00 0.00 10\n",
" 417 0.00 0.00 0.00 1\n",
" 418 0.00 0.00 0.00 1\n",
" 419 0.00 0.00 0.00 1\n",
" 420 0.00 0.00 0.00 92\n",
" 423 0.00 0.00 0.00 2\n",
" 424 0.00 0.00 0.00 44\n",
" 427 0.00 0.00 0.00 4\n",
" 428 0.00 0.00 0.00 28\n",
" 431 0.00 0.00 0.00 11\n",
" 433 0.00 0.00 0.00 3\n",
" 434 0.00 0.00 0.00 6\n",
" 436 0.00 0.00 0.00 1\n",
" 438 0.00 0.00 0.00 14\n",
" 441 0.00 0.00 0.00 3\n",
" 443 0.00 0.00 0.00 1\n",
" 444 0.00 0.00 0.00 2\n",
" 450 0.00 0.00 0.00 5\n",
" 451 0.00 0.00 0.00 4\n",
" 453 0.00 0.00 0.00 1\n",
" 456 0.00 0.00 0.00 6\n",
" 459 0.00 0.00 0.00 4\n",
" 460 0.00 0.00 0.00 2\n",
" 462 0.00 0.00 0.00 2\n",
" 463 0.00 0.00 0.00 2\n",
" 465 0.00 0.00 0.00 1\n",
" 466 0.00 0.00 0.00 1\n",
" 469 0.00 0.00 0.00 1\n",
" 470 0.00 0.00 0.00 1\n",
" 474 0.00 0.00 0.00 1\n",
"\n",
" accuracy 0.11 14133\n",
" macro avg 0.00 0.00 0.00 14133\n",
"weighted avg 0.01 0.11 0.02 14133\n",
"\n",
"Predicted labels: [168 168 168 ... 168 168 168]\n",
"Accuracy: 0.10776197551829053\n",
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n",
" 'precision', 'predicted', average, warn_for)\n"
]
]
}
}
],
],
"source": [
"source": [
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score, classification_report \n",
"from sklearn.linear_model import LogisticRegression\n",
"model = LogisticRegression()\n",
"model.fit(X_train, y_train)\n",
"predictions = model.predict(X_test)\n",
"\n",
"\n",
"\n",
"print(classification_report(y_test, predictions))\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n",
"print('Predicted labels: ', predictions)\n",
"lin_reg = LinearRegression()\n",
"print('Accuracy: ', accuracy_score(y_test, predictions))\n"
"lin_reg.fit(X_train,y_train)\n",
"\n",
"pred = lin_reg.predict(X_test)\n",
"\n",
"accuracy_score = lin_reg.score(X_train,y_train)\n",
"print('Model Accuracy: ', accuracy_score)"
]
]
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 41,
"execution_count": 10,
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
{
{
"output_type": "stream",
"output_type": "stream",
"name": "stdout",
"name": "stdout",
"text": [
"text": [
"[[33.627655]]\n"
"[59 ]\n"
]
]
}
}
],
],
"source": [
"source": [
"import pickle\n",
"import pickle\n",
"s = pickle.dumps(lin_reg)\n",
"s = pickle.dumps(model )\n",
"model_filename = 'lin-reg-model.pkl'\n",
"model_filename = 'ufo -model.pkl'\n",
"# Open the file to save as pkl file\n",
"# Open the file to save as pkl file\n",
"pickle.dump(lin_reg, open(model_filename,'wb'))\n",
"pickle.dump(model, open(model_filename,'wb'))\n",
"\n",
"model = pickle.load(open('lin-reg-model.pkl','rb'))\n",
"print(model.predict([[2.85]]))\n",
"\n",
"\n",
"# Close the pickle instances\n",
"model = pickle.load(open('ufo-model.pkl','rb'))\n",
"# clf2 = pickle.loads(s)\n",
"print(model.predict([[1,2,3]]))\n"
"# clf2.predict([[2.75]])\n"
]
]
},
},
{
{