diff --git a/API/1-API/solution/notebook.ipynb b/API/1-API/solution/notebook.ipynb index ec388331..7e68127b 100644 --- a/API/1-API/solution/notebook.ipynb +++ b/API/1-API/solution/notebook.ipynb @@ -38,9 +38,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 77, "metadata": {}, - "outputs": [], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " datetime city state country shape \\\n", + "0 10/10/1949 20:30 san marcos tx us cylinder \n", + "1 10/10/1949 21:00 lackland afb tx NaN light \n", + "2 10/10/1955 17:00 chester (uk/england) NaN gb circle \n", + "3 10/10/1956 21:00 edna tx us circle \n", + "4 10/10/1960 20:00 kaneohe hi us light \n", + "\n", + " duration (seconds) duration (hours/min) \\\n", + "0 2700.0 45 minutes \n", + "1 7200.0 1-2 hrs \n", + "2 20.0 20 seconds \n", + "3 20.0 1/2 hour \n", + "4 900.0 15 minutes \n", + "\n", + " comments date posted latitude \\\n", + "0 This event took place in early fall around 194... 4/27/2004 29.883056 \n", + "1 1949 Lackland AFB, TX. Lights racing acros... 12/16/2005 29.384210 \n", + "2 Green/Orange circular disc over Chester, En... 1/21/2008 53.200000 \n", + "3 My older brother and twin sister were leaving ... 1/17/2004 28.978333 \n", + "4 AS a Marine 1st Lt. flying an FJ4B fighter/att... 1/22/2004 21.418056 \n", + "\n", + " longitude \n", + "0 -97.941111 \n", + "1 -98.581082 \n", + "2 -2.916667 \n", + "3 -96.645833 \n", + "4 -157.803611 " + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
datetimecitystatecountryshapeduration (seconds)duration (hours/min)commentsdate postedlatitudelongitude
010/10/1949 20:30san marcostxuscylinder2700.045 minutesThis event took place in early fall around 194...4/27/200429.883056-97.941111
110/10/1949 21:00lackland afbtxNaNlight7200.01-2 hrs1949 Lackland AFB&#44 TX. Lights racing acros...12/16/200529.384210-98.581082
210/10/1955 17:00chester (uk/england)NaNgbcircle20.020 secondsGreen/Orange circular disc over Chester&#44 En...1/21/200853.200000-2.916667
310/10/1956 21:00ednatxuscircle20.01/2 hourMy older brother and twin sister were leaving ...1/17/200428.978333-96.645833
410/10/1960 20:00kaneohehiuslight900.015 minutesAS a Marine 1st Lt. flying an FJ4B fighter/att...1/22/200421.418056-157.803611
\n
" + }, + "metadata": {}, + "execution_count": 77 + } + ], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", @@ -52,14 +90,14 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 78, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "\nInt64Index: 70662 entries, 0 to 80331\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 70662 non-null float64\n 1 Country 70662 non-null int64 \n 2 Latitude 70662 non-null float64\n 3 Longitude 70662 non-null float64\ndtypes: float64(3), int64(1)\nmemory usage: 2.7 MB\n" + "\nInt64Index: 25863 entries, 2 to 80330\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 25863 non-null float64\n 1 Country 25863 non-null int64 \n 2 Latitude 25863 non-null float64\n 3 Longitude 25863 non-null float64\ndtypes: float64(3), int64(1)\nmemory usage: 1010.3 KB\n" ] } ], @@ -83,33 +121,52 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 79, "metadata": {}, - "outputs": [], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Seconds Country Latitude Longitude\n", + "2 20.0 3 53.200000 -2.916667\n", + "3 20.0 4 28.978333 -96.645833\n", + "14 30.0 4 35.823889 -80.253611\n", + "23 60.0 4 45.582778 -122.352222\n", + "24 3.0 3 51.783333 -0.783333" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
SecondsCountryLatitudeLongitude
220.0353.200000-2.916667
320.0428.978333-96.645833
1430.0435.823889-80.253611
2360.0445.582778-122.352222
243.0351.783333-0.783333
\n
" + }, + "metadata": {}, + "execution_count": 79 + } + ], "source": [ "from sklearn.preprocessing import LabelEncoder\n", "\n", - "new_columns = ['Seconds','Latitude','Longitude','Country']\n", + "new_columns = ['Seconds','Country','Latitude','Longitude']\n", "\n", "new_ufos = ufos.drop([c for c in ufos.columns if c not in new_columns], axis=1)\n", "\n", "new_ufos.dropna(inplace=True)\n", "\n", - "new_ufos = new_ufos.apply(LabelEncoder().fit_transform)" + "new_ufos['Country'] = LabelEncoder().fit_transform(new_ufos['Country'])\n", + "\n", + "new_ufos.head()" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 88, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", - "Selected_features = ['Latitude','Longitude','Country']\n", + "Selected_features = ['Seconds','Latitude','Longitude']\n", "\n", "X = new_ufos[Selected_features]\n", - "y = new_ufos['Seconds']\n", + "y = new_ufos['Country']\n", "\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)" @@ -117,227 +174,31 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 89, "metadata": {}, "outputs": [ { "output_type": "stream", - "name": "stderr", + "name": "stdout", "text": [ - "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", - " FutureWarning)\n", - "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n", - " \"this warning.\", FutureWarning)\n", " precision recall f1-score support\n", "\n", - " 4 0.00 0.00 0.00 1\n", - " 10 0.00 0.00 0.00 11\n", - " 12 0.00 0.00 0.00 1\n", - " 13 0.00 0.00 0.00 1\n", - " 15 0.00 0.00 0.00 131\n", - " 17 0.00 0.00 0.00 1\n", - " 18 0.00 0.00 0.00 19\n", - " 21 0.00 0.00 0.00 410\n", - " 22 0.00 0.00 0.00 3\n", - " 23 0.00 0.00 0.00 284\n", - " 25 0.00 0.00 0.00 1\n", - " 26 0.00 0.00 0.00 1\n", - " 27 0.00 0.00 0.00 146\n", - " 29 0.00 0.00 0.00 555\n", - " 31 0.00 0.00 0.00 84\n", - " 33 0.00 0.00 0.00 86\n", - " 34 0.00 0.00 0.00 85\n", - " 35 0.00 0.00 0.00 8\n", - " 36 0.00 0.00 0.00 569\n", - " 37 0.00 0.00 0.00 3\n", - " 38 0.00 0.00 0.00 39\n", - " 39 0.00 0.00 0.00 5\n", - " 40 0.00 0.00 0.00 4\n", - " 41 0.00 0.00 0.00 352\n", - " 42 0.00 0.00 0.00 1\n", - " 43 0.00 0.00 0.00 4\n", - " 44 0.00 0.00 0.00 2\n", - " 45 0.00 0.00 0.00 1\n", - " 46 0.00 0.00 0.00 360\n", - " 49 0.00 0.00 0.00 2\n", - " 50 0.00 0.00 0.00 2\n", - " 52 0.00 0.00 0.00 60\n", - " 53 0.00 0.00 0.00 1\n", - " 55 0.00 0.00 0.00 1\n", - " 56 0.00 0.00 0.00 1\n", - " 57 0.00 0.00 0.00 634\n", - " 60 0.00 0.00 0.00 1\n", - " 61 0.00 0.00 0.00 2\n", - " 62 0.00 0.00 0.00 20\n", - " 65 0.00 0.00 0.00 3\n", - " 67 0.00 0.00 0.00 83\n", - " 71 0.00 0.00 0.00 171\n", - " 75 0.00 0.00 0.00 19\n", - " 79 0.00 0.00 0.00 5\n", - " 83 0.00 0.00 0.00 1005\n", - " 86 0.00 0.00 0.00 1\n", - " 88 0.00 0.00 0.00 1\n", - " 94 0.00 0.00 0.00 1\n", - " 95 0.00 0.00 0.00 1\n", - " 97 0.00 0.00 0.00 7\n", - " 99 0.00 0.00 0.00 2\n", - " 102 0.00 0.00 0.00 2\n", - " 104 0.00 0.00 0.00 1\n", - " 105 0.00 0.00 0.00 5\n", - " 107 0.00 0.00 0.00 1\n", - " 109 0.00 0.00 0.00 1\n", - " 110 0.00 0.00 0.00 137\n", - " 111 0.00 0.00 0.00 1\n", - " 113 0.00 0.00 0.00 1\n", - " 114 0.00 0.00 0.00 1\n", - " 117 0.00 0.00 0.00 4\n", - " 122 0.00 0.00 0.00 1270\n", - " 123 0.00 0.00 0.00 1\n", - " 124 0.00 0.00 0.00 1\n", - " 129 0.00 0.00 0.00 1\n", - " 130 0.00 0.00 0.00 1\n", - " 133 0.00 0.00 0.00 9\n", - " 135 0.00 0.00 0.00 1\n", - " 137 0.00 0.00 0.00 2\n", - " 140 0.00 0.00 0.00 862\n", - " 145 0.00 0.00 0.00 2\n", - " 150 0.00 0.00 0.00 2\n", - " 154 0.00 0.00 0.00 375\n", - " 155 0.00 0.00 0.00 1\n", - " 161 0.00 0.00 0.00 1\n", - " 163 0.00 0.00 0.00 1\n", - " 168 0.11 1.00 0.19 1523\n", - " 169 0.00 0.00 0.00 1\n", - " 172 0.00 0.00 0.00 1\n", - " 174 0.00 0.00 0.00 1\n", - " 178 0.00 0.00 0.00 2\n", - " 180 0.00 0.00 0.00 108\n", - " 181 0.00 0.00 0.00 1\n", - " 184 0.00 0.00 0.00 1\n", - " 188 0.00 0.00 0.00 149\n", - " 193 0.00 0.00 0.00 1\n", - " 196 0.00 0.00 0.00 136\n", - " 197 0.00 0.00 0.00 1\n", - " 200 0.00 0.00 0.00 16\n", - " 202 0.00 0.00 0.00 1\n", - " 207 0.00 0.00 0.00 1130\n", - " 215 0.00 0.00 0.00 1\n", - " 220 0.00 0.00 0.00 5\n", - " 223 0.00 0.00 0.00 1\n", - " 226 0.00 0.00 0.00 44\n", - " 228 0.00 0.00 0.00 1\n", - " 233 0.00 0.00 0.00 10\n", - " 237 0.00 0.00 0.00 5\n", - " 239 0.00 0.00 0.00 736\n", - " 241 0.00 0.00 0.00 4\n", - " 245 0.00 0.00 0.00 1\n", - " 248 0.00 0.00 0.00 1\n", - " 251 0.00 0.00 0.00 8\n", - " 258 0.00 0.00 0.00 11\n", - " 265 0.00 0.00 0.00 1\n", - " 267 0.00 0.00 0.00 5\n", - " 268 0.00 0.00 0.00 1\n", - " 273 0.00 0.00 0.00 1\n", - " 275 0.00 0.00 0.00 542\n", - " 277 0.00 0.00 0.00 1\n", - " 279 0.00 0.00 0.00 1\n", - " 281 0.00 0.00 0.00 1\n", - " 284 0.00 0.00 0.00 1\n", - " 287 0.00 0.00 0.00 1\n", - " 288 0.00 0.00 0.00 4\n", - " 290 0.00 0.00 0.00 1\n", - " 293 0.00 0.00 0.00 1\n", - " 298 0.00 0.00 0.00 1\n", - " 300 0.00 0.00 0.00 1\n", - " 301 0.00 0.00 0.00 1\n", - " 302 0.00 0.00 0.00 2\n", - " 305 0.00 0.00 0.00 3\n", - " 306 0.00 0.00 0.00 3\n", - " 308 0.00 0.00 0.00 1\n", - " 309 0.00 0.00 0.00 1\n", - " 313 0.00 0.00 0.00 2\n", - " 314 0.00 0.00 0.00 1\n", - " 316 0.00 0.00 0.00 1\n", - " 317 0.00 0.00 0.00 8\n", - " 324 0.00 0.00 0.00 1\n", - " 330 0.00 0.00 0.00 2\n", - " 331 0.00 0.00 0.00 85\n", - " 332 0.00 0.00 0.00 2\n", - " 333 0.00 0.00 0.00 3\n", - " 334 0.00 0.00 0.00 2\n", - " 336 0.00 0.00 0.00 1\n", - " 337 0.00 0.00 0.00 1\n", - " 338 0.00 0.00 0.00 468\n", - " 339 0.00 0.00 0.00 1\n", - " 343 0.00 0.00 0.00 2\n", - " 345 0.00 0.00 0.00 1\n", - " 346 0.00 0.00 0.00 2\n", - " 347 0.00 0.00 0.00 42\n", - " 348 0.00 0.00 0.00 1\n", - " 350 0.00 0.00 0.00 2\n", - " 353 0.00 0.00 0.00 62\n", - " 355 0.00 0.00 0.00 1\n", - " 359 0.00 0.00 0.00 165\n", - " 364 0.00 0.00 0.00 14\n", - " 365 0.00 0.00 0.00 1\n", - " 366 0.00 0.00 0.00 1\n", - " 368 0.00 0.00 0.00 1\n", - " 372 0.00 0.00 0.00 1\n", - " 373 0.00 0.00 0.00 1\n", - " 375 0.00 0.00 0.00 366\n", - " 377 0.00 0.00 0.00 1\n", - " 381 0.00 0.00 0.00 3\n", - " 383 0.00 0.00 0.00 3\n", - " 384 0.00 0.00 0.00 1\n", - " 387 0.00 0.00 0.00 12\n", - " 390 0.00 0.00 0.00 5\n", - " 392 0.00 0.00 0.00 1\n", - " 394 0.00 0.00 0.00 48\n", - " 395 0.00 0.00 0.00 1\n", - " 397 0.00 0.00 0.00 2\n", - " 398 0.00 0.00 0.00 5\n", - " 400 0.00 0.00 0.00 1\n", - " 404 0.00 0.00 0.00 202\n", - " 405 0.00 0.00 0.00 1\n", - " 409 0.00 0.00 0.00 1\n", - " 410 0.00 0.00 0.00 1\n", - " 416 0.00 0.00 0.00 10\n", - " 417 0.00 0.00 0.00 1\n", - " 418 0.00 0.00 0.00 1\n", - " 419 0.00 0.00 0.00 1\n", - " 420 0.00 0.00 0.00 92\n", - " 423 0.00 0.00 0.00 2\n", - " 424 0.00 0.00 0.00 44\n", - " 427 0.00 0.00 0.00 4\n", - " 428 0.00 0.00 0.00 28\n", - " 431 0.00 0.00 0.00 11\n", - " 433 0.00 0.00 0.00 3\n", - " 434 0.00 0.00 0.00 6\n", - " 436 0.00 0.00 0.00 1\n", - " 438 0.00 0.00 0.00 14\n", - " 441 0.00 0.00 0.00 3\n", - " 443 0.00 0.00 0.00 1\n", - " 444 0.00 0.00 0.00 2\n", - " 450 0.00 0.00 0.00 5\n", - " 451 0.00 0.00 0.00 4\n", - " 453 0.00 0.00 0.00 1\n", - " 456 0.00 0.00 0.00 6\n", - " 459 0.00 0.00 0.00 4\n", - " 460 0.00 0.00 0.00 2\n", - " 462 0.00 0.00 0.00 2\n", - " 463 0.00 0.00 0.00 2\n", - " 465 0.00 0.00 0.00 1\n", - " 466 0.00 0.00 0.00 1\n", - " 469 0.00 0.00 0.00 1\n", - " 470 0.00 0.00 0.00 1\n", - " 474 0.00 0.00 0.00 1\n", + " 0 1.00 1.00 1.00 41\n", + " 1 1.00 0.02 0.05 250\n", + " 2 0.00 0.00 0.00 8\n", + " 3 0.94 1.00 0.97 131\n", + " 4 0.95 1.00 0.97 4743\n", "\n", - " accuracy 0.11 14133\n", - " macro avg 0.00 0.00 0.00 14133\n", - "weighted avg 0.01 0.11 0.02 14133\n", + " accuracy 0.95 5173\n", + " macro avg 0.78 0.60 0.60 5173\n", + "weighted avg 0.95 0.95 0.93 5173\n", "\n", - "Predicted labels: [168 168 168 ... 168 168 168]\n", - "Accuracy: 0.10776197551829053\n", + "Predicted labels: [4 4 4 ... 3 4 4]\n", + "Accuracy: 0.9512855209742895\n", + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", + " FutureWarning)\n", + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n", + " \"this warning.\", FutureWarning)\n", "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n", " 'precision', 'predicted', average, warn_for)\n" ] @@ -358,14 +219,14 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 97, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "[59]\n" + "[3]\n" ] } ], @@ -377,7 +238,7 @@ "pickle.dump(model, open(model_filename,'wb'))\n", "\n", "model = pickle.load(open('ufo-model.pkl','rb'))\n", - "print(model.predict([[1,2,3]]))\n" + "print(model.predict([[2,44,-12]]))\n" ] }, { diff --git a/API/1-API/solution/ufo-model.pkl b/API/1-API/solution/ufo-model.pkl index 8e18367b..523962f2 100644 Binary files a/API/1-API/solution/ufo-model.pkl and b/API/1-API/solution/ufo-model.pkl differ