@ -38,9 +38,47 @@
},
{
"cell_type": "code",
"execution_count": null ,
"execution_count": 77 ,
"metadata": {},
"outputs": [],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" datetime city state country shape \\\n",
"0 10/10/1949 20:30 san marcos tx us cylinder \n",
"1 10/10/1949 21:00 lackland afb tx NaN light \n",
"2 10/10/1955 17:00 chester (uk/england) NaN gb circle \n",
"3 10/10/1956 21:00 edna tx us circle \n",
"4 10/10/1960 20:00 kaneohe hi us light \n",
"\n",
" duration (seconds) duration (hours/min) \\\n",
"0 2700.0 45 minutes \n",
"1 7200.0 1-2 hrs \n",
"2 20.0 20 seconds \n",
"3 20.0 1/2 hour \n",
"4 900.0 15 minutes \n",
"\n",
" comments date posted latitude \\\n",
"0 This event took place in early fall around 194... 4/27/2004 29.883056 \n",
"1 1949 Lackland AFB, TX. Lights racing acros... 12/16/2005 29.384210 \n",
"2 Green/Orange circular disc over Chester, En... 1/21/2008 53.200000 \n",
"3 My older brother and twin sister were leaving ... 1/17/2004 28.978333 \n",
"4 AS a Marine 1st Lt. flying an FJ4B fighter/att... 1/22/2004 21.418056 \n",
"\n",
" longitude \n",
"0 -97.941111 \n",
"1 -98.581082 \n",
"2 -2.916667 \n",
"3 -96.645833 \n",
"4 -157.803611 "
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>datetime</th>\n <th>city</th>\n <th>state</th>\n <th>country</th>\n <th>shape</th>\n <th>duration (seconds)</th>\n <th>duration (hours/min)</th>\n <th>comments</th>\n <th>date posted</th>\n <th>latitude</th>\n <th>longitude</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10/10/1949 20:30</td>\n <td>san marcos</td>\n <td>tx</td>\n <td>us</td>\n <td>cylinder</td>\n <td>2700.0</td>\n <td>45 minutes</td>\n <td>This event took place in early fall around 194...</td>\n <td>4/27/2004</td>\n <td>29.883056</td>\n <td>-97.941111</td>\n </tr>\n <tr>\n <th>1</th>\n <td>10/10/1949 21:00</td>\n <td>lackland afb</td>\n <td>tx</td>\n <td>NaN</td>\n <td>light</td>\n <td>7200.0</td>\n <td>1-2 hrs</td>\n <td>1949 Lackland AFB&#44 TX. Lights racing acros...</td>\n <td>12/16/2005</td>\n <td>29.384210</td>\n <td>-98.581082</td>\n </tr>\n <tr>\n <th>2</th>\n <td>10/10/1955 17:00</td>\n <td>chester (uk/england)</td>\n <td>NaN</td>\n <td>gb</td>\n <td>circle</td>\n <td>20.0</td>\n <td>20 seconds</td>\n <td>Green/Orange circular disc over Chester&#44 En...</td>\n <td>1/21/2008</td>\n <td>53.200000</td>\n <td>-2.916667</td>\n </tr>\n <tr>\n <th>3</th>\n <td>10/10/1956 21:00</td>\n <td>edna</td>\n <td>tx</td>\n <td>us</td>\n <td>circle</td>\n <td>20.0</td>\n <td>1/2 hour</td>\n <td>My older brother and twin sister were leaving ...</td>\n <td>1/17/2004</td>\n <td>28.978333</td>\n <td>-96.645833</td>\n </tr>\n <tr>\n <th>4</th>\n <td>10/10/1960 20:00</td>\n <td>kaneohe</td>\n <td>hi</td>\n <td>us</td>\n <td>light</td>\n <td>900.0</td>\n <td>15 minutes</td>\n <td>AS a Marine 1st Lt. flying an FJ4B fighter/att...</td>\n <td>1/22/2004</td>\n <td>21.418056</td>\n <td>-157.803611</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {},
"execution_count": 77
}
],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
@ -52,14 +90,14 @@
},
{
"cell_type": "code",
"execution_count": 2 ,
"execution_count": 78 ,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"<class 'pandas.core.frame.DataFrame'>\nInt64Index: 70662 entries, 0 to 80331 \nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 70662 non-null float64\n 1 Country 70662 non-null int64 \n 2 Latitude 70662 non-null float64\n 3 Longitude 70662 non-null float64\ndtypes: float64(3), int64(1)\nmemory usage: 2.7 M B\n"
"<class 'pandas.core.frame.DataFrame'>\nInt64Index: 25863 entries, 2 to 80330 \nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 25863 non-null float64\n 1 Country 25863 non-null int64 \n 2 Latitude 25863 non-null float64\n 3 Longitude 25863 non-null float64\ndtypes: float64(3), int64(1)\nmemory usage: 1010.3 K B\n"
]
}
],
@ -83,33 +121,52 @@
},
{
"cell_type": "code",
"execution_count": 3 ,
"execution_count": 79 ,
"metadata": {},
"outputs": [],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Seconds Country Latitude Longitude\n",
"2 20.0 3 53.200000 -2.916667\n",
"3 20.0 4 28.978333 -96.645833\n",
"14 30.0 4 35.823889 -80.253611\n",
"23 60.0 4 45.582778 -122.352222\n",
"24 3.0 3 51.783333 -0.783333"
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Seconds</th>\n <th>Country</th>\n <th>Latitude</th>\n <th>Longitude</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2</th>\n <td>20.0</td>\n <td>3</td>\n <td>53.200000</td>\n <td>-2.916667</td>\n </tr>\n <tr>\n <th>3</th>\n <td>20.0</td>\n <td>4</td>\n <td>28.978333</td>\n <td>-96.645833</td>\n </tr>\n <tr>\n <th>14</th>\n <td>30.0</td>\n <td>4</td>\n <td>35.823889</td>\n <td>-80.253611</td>\n </tr>\n <tr>\n <th>23</th>\n <td>60.0</td>\n <td>4</td>\n <td>45.582778</td>\n <td>-122.352222</td>\n </tr>\n <tr>\n <th>24</th>\n <td>3.0</td>\n <td>3</td>\n <td>51.783333</td>\n <td>-0.783333</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {},
"execution_count": 79
}
],
"source": [
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"new_columns = ['Seconds','Latitude','Longitude','Country']\n",
"new_columns = ['Seconds','Country',' Latitude','Longitude']\n",
"\n",
"new_ufos = ufos.drop([c for c in ufos.columns if c not in new_columns], axis=1)\n",
"\n",
"new_ufos.dropna(inplace=True)\n",
"\n",
"new_ufos = new_ufos.apply(LabelEncoder().fit_transform)"
"new_ufos['Country'] = LabelEncoder().fit_transform(new_ufos['Country'])\n",
"\n",
"new_ufos.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 88 ,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"Selected_features = ['Latitude','Longitude','Country ']\n",
"Selected_features = ['Seconds',' Latitude','Longitude']\n",
"\n",
"X = new_ufos[Selected_features]\n",
"y = new_ufos['Seconds ']\n",
"y = new_ufos['Country ']\n",
"\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)"
@ -117,227 +174,31 @@
},
{
"cell_type": "code",
"execution_count": 7 ,
"execution_count": 89 ,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stderr ",
"name": "stdout ",
"text": [
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n",
" precision recall f1-score support\n",
"\n",
" 4 0.00 0.00 0.00 1\n",
" 10 0.00 0.00 0.00 11\n",
" 12 0.00 0.00 0.00 1\n",
" 13 0.00 0.00 0.00 1\n",
" 15 0.00 0.00 0.00 131\n",
" 17 0.00 0.00 0.00 1\n",
" 18 0.00 0.00 0.00 19\n",
" 21 0.00 0.00 0.00 410\n",
" 22 0.00 0.00 0.00 3\n",
" 23 0.00 0.00 0.00 284\n",
" 25 0.00 0.00 0.00 1\n",
" 26 0.00 0.00 0.00 1\n",
" 27 0.00 0.00 0.00 146\n",
" 29 0.00 0.00 0.00 555\n",
" 31 0.00 0.00 0.00 84\n",
" 33 0.00 0.00 0.00 86\n",
" 34 0.00 0.00 0.00 85\n",
" 35 0.00 0.00 0.00 8\n",
" 36 0.00 0.00 0.00 569\n",
" 37 0.00 0.00 0.00 3\n",
" 38 0.00 0.00 0.00 39\n",
" 39 0.00 0.00 0.00 5\n",
" 40 0.00 0.00 0.00 4\n",
" 41 0.00 0.00 0.00 352\n",
" 42 0.00 0.00 0.00 1\n",
" 43 0.00 0.00 0.00 4\n",
" 44 0.00 0.00 0.00 2\n",
" 45 0.00 0.00 0.00 1\n",
" 46 0.00 0.00 0.00 360\n",
" 49 0.00 0.00 0.00 2\n",
" 50 0.00 0.00 0.00 2\n",
" 52 0.00 0.00 0.00 60\n",
" 53 0.00 0.00 0.00 1\n",
" 55 0.00 0.00 0.00 1\n",
" 56 0.00 0.00 0.00 1\n",
" 57 0.00 0.00 0.00 634\n",
" 60 0.00 0.00 0.00 1\n",
" 61 0.00 0.00 0.00 2\n",
" 62 0.00 0.00 0.00 20\n",
" 65 0.00 0.00 0.00 3\n",
" 67 0.00 0.00 0.00 83\n",
" 71 0.00 0.00 0.00 171\n",
" 75 0.00 0.00 0.00 19\n",
" 79 0.00 0.00 0.00 5\n",
" 83 0.00 0.00 0.00 1005\n",
" 86 0.00 0.00 0.00 1\n",
" 88 0.00 0.00 0.00 1\n",
" 94 0.00 0.00 0.00 1\n",
" 95 0.00 0.00 0.00 1\n",
" 97 0.00 0.00 0.00 7\n",
" 99 0.00 0.00 0.00 2\n",
" 102 0.00 0.00 0.00 2\n",
" 104 0.00 0.00 0.00 1\n",
" 105 0.00 0.00 0.00 5\n",
" 107 0.00 0.00 0.00 1\n",
" 109 0.00 0.00 0.00 1\n",
" 110 0.00 0.00 0.00 137\n",
" 111 0.00 0.00 0.00 1\n",
" 113 0.00 0.00 0.00 1\n",
" 114 0.00 0.00 0.00 1\n",
" 117 0.00 0.00 0.00 4\n",
" 122 0.00 0.00 0.00 1270\n",
" 123 0.00 0.00 0.00 1\n",
" 124 0.00 0.00 0.00 1\n",
" 129 0.00 0.00 0.00 1\n",
" 130 0.00 0.00 0.00 1\n",
" 133 0.00 0.00 0.00 9\n",
" 135 0.00 0.00 0.00 1\n",
" 137 0.00 0.00 0.00 2\n",
" 140 0.00 0.00 0.00 862\n",
" 145 0.00 0.00 0.00 2\n",
" 150 0.00 0.00 0.00 2\n",
" 154 0.00 0.00 0.00 375\n",
" 155 0.00 0.00 0.00 1\n",
" 161 0.00 0.00 0.00 1\n",
" 163 0.00 0.00 0.00 1\n",
" 168 0.11 1.00 0.19 1523\n",
" 169 0.00 0.00 0.00 1\n",
" 172 0.00 0.00 0.00 1\n",
" 174 0.00 0.00 0.00 1\n",
" 178 0.00 0.00 0.00 2\n",
" 180 0.00 0.00 0.00 108\n",
" 181 0.00 0.00 0.00 1\n",
" 184 0.00 0.00 0.00 1\n",
" 188 0.00 0.00 0.00 149\n",
" 193 0.00 0.00 0.00 1\n",
" 196 0.00 0.00 0.00 136\n",
" 197 0.00 0.00 0.00 1\n",
" 200 0.00 0.00 0.00 16\n",
" 202 0.00 0.00 0.00 1\n",
" 207 0.00 0.00 0.00 1130\n",
" 215 0.00 0.00 0.00 1\n",
" 220 0.00 0.00 0.00 5\n",
" 223 0.00 0.00 0.00 1\n",
" 226 0.00 0.00 0.00 44\n",
" 228 0.00 0.00 0.00 1\n",
" 233 0.00 0.00 0.00 10\n",
" 237 0.00 0.00 0.00 5\n",
" 239 0.00 0.00 0.00 736\n",
" 241 0.00 0.00 0.00 4\n",
" 245 0.00 0.00 0.00 1\n",
" 248 0.00 0.00 0.00 1\n",
" 251 0.00 0.00 0.00 8\n",
" 258 0.00 0.00 0.00 11\n",
" 265 0.00 0.00 0.00 1\n",
" 267 0.00 0.00 0.00 5\n",
" 268 0.00 0.00 0.00 1\n",
" 273 0.00 0.00 0.00 1\n",
" 275 0.00 0.00 0.00 542\n",
" 277 0.00 0.00 0.00 1\n",
" 279 0.00 0.00 0.00 1\n",
" 281 0.00 0.00 0.00 1\n",
" 284 0.00 0.00 0.00 1\n",
" 287 0.00 0.00 0.00 1\n",
" 288 0.00 0.00 0.00 4\n",
" 290 0.00 0.00 0.00 1\n",
" 293 0.00 0.00 0.00 1\n",
" 298 0.00 0.00 0.00 1\n",
" 300 0.00 0.00 0.00 1\n",
" 301 0.00 0.00 0.00 1\n",
" 302 0.00 0.00 0.00 2\n",
" 305 0.00 0.00 0.00 3\n",
" 306 0.00 0.00 0.00 3\n",
" 308 0.00 0.00 0.00 1\n",
" 309 0.00 0.00 0.00 1\n",
" 313 0.00 0.00 0.00 2\n",
" 314 0.00 0.00 0.00 1\n",
" 316 0.00 0.00 0.00 1\n",
" 317 0.00 0.00 0.00 8\n",
" 324 0.00 0.00 0.00 1\n",
" 330 0.00 0.00 0.00 2\n",
" 331 0.00 0.00 0.00 85\n",
" 332 0.00 0.00 0.00 2\n",
" 333 0.00 0.00 0.00 3\n",
" 334 0.00 0.00 0.00 2\n",
" 336 0.00 0.00 0.00 1\n",
" 337 0.00 0.00 0.00 1\n",
" 338 0.00 0.00 0.00 468\n",
" 339 0.00 0.00 0.00 1\n",
" 343 0.00 0.00 0.00 2\n",
" 345 0.00 0.00 0.00 1\n",
" 346 0.00 0.00 0.00 2\n",
" 347 0.00 0.00 0.00 42\n",
" 348 0.00 0.00 0.00 1\n",
" 350 0.00 0.00 0.00 2\n",
" 353 0.00 0.00 0.00 62\n",
" 355 0.00 0.00 0.00 1\n",
" 359 0.00 0.00 0.00 165\n",
" 364 0.00 0.00 0.00 14\n",
" 365 0.00 0.00 0.00 1\n",
" 366 0.00 0.00 0.00 1\n",
" 368 0.00 0.00 0.00 1\n",
" 372 0.00 0.00 0.00 1\n",
" 373 0.00 0.00 0.00 1\n",
" 375 0.00 0.00 0.00 366\n",
" 377 0.00 0.00 0.00 1\n",
" 381 0.00 0.00 0.00 3\n",
" 383 0.00 0.00 0.00 3\n",
" 384 0.00 0.00 0.00 1\n",
" 387 0.00 0.00 0.00 12\n",
" 390 0.00 0.00 0.00 5\n",
" 392 0.00 0.00 0.00 1\n",
" 394 0.00 0.00 0.00 48\n",
" 395 0.00 0.00 0.00 1\n",
" 397 0.00 0.00 0.00 2\n",
" 398 0.00 0.00 0.00 5\n",
" 400 0.00 0.00 0.00 1\n",
" 404 0.00 0.00 0.00 202\n",
" 405 0.00 0.00 0.00 1\n",
" 409 0.00 0.00 0.00 1\n",
" 410 0.00 0.00 0.00 1\n",
" 416 0.00 0.00 0.00 10\n",
" 417 0.00 0.00 0.00 1\n",
" 418 0.00 0.00 0.00 1\n",
" 419 0.00 0.00 0.00 1\n",
" 420 0.00 0.00 0.00 92\n",
" 423 0.00 0.00 0.00 2\n",
" 424 0.00 0.00 0.00 44\n",
" 427 0.00 0.00 0.00 4\n",
" 428 0.00 0.00 0.00 28\n",
" 431 0.00 0.00 0.00 11\n",
" 433 0.00 0.00 0.00 3\n",
" 434 0.00 0.00 0.00 6\n",
" 436 0.00 0.00 0.00 1\n",
" 438 0.00 0.00 0.00 14\n",
" 441 0.00 0.00 0.00 3\n",
" 443 0.00 0.00 0.00 1\n",
" 444 0.00 0.00 0.00 2\n",
" 450 0.00 0.00 0.00 5\n",
" 451 0.00 0.00 0.00 4\n",
" 453 0.00 0.00 0.00 1\n",
" 456 0.00 0.00 0.00 6\n",
" 459 0.00 0.00 0.00 4\n",
" 460 0.00 0.00 0.00 2\n",
" 462 0.00 0.00 0.00 2\n",
" 463 0.00 0.00 0.00 2\n",
" 465 0.00 0.00 0.00 1\n",
" 466 0.00 0.00 0.00 1\n",
" 469 0.00 0.00 0.00 1\n",
" 470 0.00 0.00 0.00 1\n",
" 474 0.00 0.00 0.00 1\n",
" 0 1.00 1.00 1.00 41\n",
" 1 1.00 0.02 0.05 250\n",
" 2 0.00 0.00 0.00 8\n",
" 3 0.94 1.00 0.97 131\n",
" 4 0.95 1.00 0.97 4743\n",
"\n",
" accuracy 0.11 1413 3\n",
" macro avg 0.00 0.00 0.00 1413 3\n",
"weighted avg 0.01 0.11 0.02 1413 3\n",
" accuracy 0.95 5173\n",
" macro avg 0.78 0.60 0.60 5173\n",
"weighted avg 0.95 0.95 0.93 5173\n",
"\n",
"Predicted labels: [168 168 168 ... 168 168 168]\n",
"Accuracy: 0.10776197551829053\n",
"Predicted labels: [4 4 4 ... 3 4 4]\n",
"Accuracy: 0.9512855209742895\n",
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n",
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n",
" 'precision', 'predicted', average, warn_for)\n"
]
@ -358,14 +219,14 @@
},
{
"cell_type": "code",
"execution_count": 10 ,
"execution_count": 97 ,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[59 ]\n"
"[3 ]\n"
]
}
],
@ -377,7 +238,7 @@
"pickle.dump(model, open(model_filename,'wb'))\n",
"\n",
"model = pickle.load(open('ufo-model.pkl','rb'))\n",
"print(model.predict([[1, 2,3 ]]))\n"
"print(model.predict([[2,44,- 12]]))\n"
]
},
{