You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
391 lines
18 KiB
391 lines
18 KiB
{
|
|
"metadata": {
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.0"
|
|
},
|
|
"orig_nbformat": 2,
|
|
"kernelspec": {
|
|
"name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7",
|
|
"display_name": "Python 3.7.0 64-bit ('3.7')"
|
|
},
|
|
"metadata": {
|
|
"interpreter": {
|
|
"hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
|
|
}
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2,
|
|
"cells": [
|
|
{
|
|
"source": [
|
|
"## Build an API with two different models\n",
|
|
"\n",
|
|
"Linear Regression\n",
|
|
"Classification"
|
|
],
|
|
"cell_type": "markdown",
|
|
"metadata": {}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"ufos = pd.read_csv('../data/ufos.csv')\n",
|
|
"ufos.head()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"<class 'pandas.core.frame.DataFrame'>\nInt64Index: 70662 entries, 0 to 80331\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 70662 non-null float64\n 1 Country 70662 non-null int64 \n 2 Latitude 70662 non-null float64\n 3 Longitude 70662 non-null float64\ndtypes: float64(3), int64(1)\nmemory usage: 2.7 MB\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from sklearn.preprocessing import LabelEncoder\n",
|
|
"\n",
|
|
"ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n",
|
|
"\n",
|
|
"ufos.dropna(inplace=True)\n",
|
|
"\n",
|
|
"ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n",
|
|
"\n",
|
|
"# only view seconds 1-60\n",
|
|
"\n",
|
|
"ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]\n",
|
|
"\n",
|
|
"ufos.info()\n",
|
|
"\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.preprocessing import LabelEncoder\n",
|
|
"\n",
|
|
"new_columns = ['Seconds','Latitude','Longitude','Country']\n",
|
|
"\n",
|
|
"new_ufos = ufos.drop([c for c in ufos.columns if c not in new_columns], axis=1)\n",
|
|
"\n",
|
|
"new_ufos.dropna(inplace=True)\n",
|
|
"\n",
|
|
"new_ufos = new_ufos.apply(LabelEncoder().fit_transform)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"\n",
|
|
"Selected_features = ['Latitude','Longitude','Country']\n",
|
|
"\n",
|
|
"X = new_ufos[Selected_features]\n",
|
|
"y = new_ufos['Seconds']\n",
|
|
"\n",
|
|
"\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stderr",
|
|
"text": [
|
|
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
|
|
" FutureWarning)\n",
|
|
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
|
|
" \"this warning.\", FutureWarning)\n",
|
|
" precision recall f1-score support\n",
|
|
"\n",
|
|
" 4 0.00 0.00 0.00 1\n",
|
|
" 10 0.00 0.00 0.00 11\n",
|
|
" 12 0.00 0.00 0.00 1\n",
|
|
" 13 0.00 0.00 0.00 1\n",
|
|
" 15 0.00 0.00 0.00 131\n",
|
|
" 17 0.00 0.00 0.00 1\n",
|
|
" 18 0.00 0.00 0.00 19\n",
|
|
" 21 0.00 0.00 0.00 410\n",
|
|
" 22 0.00 0.00 0.00 3\n",
|
|
" 23 0.00 0.00 0.00 284\n",
|
|
" 25 0.00 0.00 0.00 1\n",
|
|
" 26 0.00 0.00 0.00 1\n",
|
|
" 27 0.00 0.00 0.00 146\n",
|
|
" 29 0.00 0.00 0.00 555\n",
|
|
" 31 0.00 0.00 0.00 84\n",
|
|
" 33 0.00 0.00 0.00 86\n",
|
|
" 34 0.00 0.00 0.00 85\n",
|
|
" 35 0.00 0.00 0.00 8\n",
|
|
" 36 0.00 0.00 0.00 569\n",
|
|
" 37 0.00 0.00 0.00 3\n",
|
|
" 38 0.00 0.00 0.00 39\n",
|
|
" 39 0.00 0.00 0.00 5\n",
|
|
" 40 0.00 0.00 0.00 4\n",
|
|
" 41 0.00 0.00 0.00 352\n",
|
|
" 42 0.00 0.00 0.00 1\n",
|
|
" 43 0.00 0.00 0.00 4\n",
|
|
" 44 0.00 0.00 0.00 2\n",
|
|
" 45 0.00 0.00 0.00 1\n",
|
|
" 46 0.00 0.00 0.00 360\n",
|
|
" 49 0.00 0.00 0.00 2\n",
|
|
" 50 0.00 0.00 0.00 2\n",
|
|
" 52 0.00 0.00 0.00 60\n",
|
|
" 53 0.00 0.00 0.00 1\n",
|
|
" 55 0.00 0.00 0.00 1\n",
|
|
" 56 0.00 0.00 0.00 1\n",
|
|
" 57 0.00 0.00 0.00 634\n",
|
|
" 60 0.00 0.00 0.00 1\n",
|
|
" 61 0.00 0.00 0.00 2\n",
|
|
" 62 0.00 0.00 0.00 20\n",
|
|
" 65 0.00 0.00 0.00 3\n",
|
|
" 67 0.00 0.00 0.00 83\n",
|
|
" 71 0.00 0.00 0.00 171\n",
|
|
" 75 0.00 0.00 0.00 19\n",
|
|
" 79 0.00 0.00 0.00 5\n",
|
|
" 83 0.00 0.00 0.00 1005\n",
|
|
" 86 0.00 0.00 0.00 1\n",
|
|
" 88 0.00 0.00 0.00 1\n",
|
|
" 94 0.00 0.00 0.00 1\n",
|
|
" 95 0.00 0.00 0.00 1\n",
|
|
" 97 0.00 0.00 0.00 7\n",
|
|
" 99 0.00 0.00 0.00 2\n",
|
|
" 102 0.00 0.00 0.00 2\n",
|
|
" 104 0.00 0.00 0.00 1\n",
|
|
" 105 0.00 0.00 0.00 5\n",
|
|
" 107 0.00 0.00 0.00 1\n",
|
|
" 109 0.00 0.00 0.00 1\n",
|
|
" 110 0.00 0.00 0.00 137\n",
|
|
" 111 0.00 0.00 0.00 1\n",
|
|
" 113 0.00 0.00 0.00 1\n",
|
|
" 114 0.00 0.00 0.00 1\n",
|
|
" 117 0.00 0.00 0.00 4\n",
|
|
" 122 0.00 0.00 0.00 1270\n",
|
|
" 123 0.00 0.00 0.00 1\n",
|
|
" 124 0.00 0.00 0.00 1\n",
|
|
" 129 0.00 0.00 0.00 1\n",
|
|
" 130 0.00 0.00 0.00 1\n",
|
|
" 133 0.00 0.00 0.00 9\n",
|
|
" 135 0.00 0.00 0.00 1\n",
|
|
" 137 0.00 0.00 0.00 2\n",
|
|
" 140 0.00 0.00 0.00 862\n",
|
|
" 145 0.00 0.00 0.00 2\n",
|
|
" 150 0.00 0.00 0.00 2\n",
|
|
" 154 0.00 0.00 0.00 375\n",
|
|
" 155 0.00 0.00 0.00 1\n",
|
|
" 161 0.00 0.00 0.00 1\n",
|
|
" 163 0.00 0.00 0.00 1\n",
|
|
" 168 0.11 1.00 0.19 1523\n",
|
|
" 169 0.00 0.00 0.00 1\n",
|
|
" 172 0.00 0.00 0.00 1\n",
|
|
" 174 0.00 0.00 0.00 1\n",
|
|
" 178 0.00 0.00 0.00 2\n",
|
|
" 180 0.00 0.00 0.00 108\n",
|
|
" 181 0.00 0.00 0.00 1\n",
|
|
" 184 0.00 0.00 0.00 1\n",
|
|
" 188 0.00 0.00 0.00 149\n",
|
|
" 193 0.00 0.00 0.00 1\n",
|
|
" 196 0.00 0.00 0.00 136\n",
|
|
" 197 0.00 0.00 0.00 1\n",
|
|
" 200 0.00 0.00 0.00 16\n",
|
|
" 202 0.00 0.00 0.00 1\n",
|
|
" 207 0.00 0.00 0.00 1130\n",
|
|
" 215 0.00 0.00 0.00 1\n",
|
|
" 220 0.00 0.00 0.00 5\n",
|
|
" 223 0.00 0.00 0.00 1\n",
|
|
" 226 0.00 0.00 0.00 44\n",
|
|
" 228 0.00 0.00 0.00 1\n",
|
|
" 233 0.00 0.00 0.00 10\n",
|
|
" 237 0.00 0.00 0.00 5\n",
|
|
" 239 0.00 0.00 0.00 736\n",
|
|
" 241 0.00 0.00 0.00 4\n",
|
|
" 245 0.00 0.00 0.00 1\n",
|
|
" 248 0.00 0.00 0.00 1\n",
|
|
" 251 0.00 0.00 0.00 8\n",
|
|
" 258 0.00 0.00 0.00 11\n",
|
|
" 265 0.00 0.00 0.00 1\n",
|
|
" 267 0.00 0.00 0.00 5\n",
|
|
" 268 0.00 0.00 0.00 1\n",
|
|
" 273 0.00 0.00 0.00 1\n",
|
|
" 275 0.00 0.00 0.00 542\n",
|
|
" 277 0.00 0.00 0.00 1\n",
|
|
" 279 0.00 0.00 0.00 1\n",
|
|
" 281 0.00 0.00 0.00 1\n",
|
|
" 284 0.00 0.00 0.00 1\n",
|
|
" 287 0.00 0.00 0.00 1\n",
|
|
" 288 0.00 0.00 0.00 4\n",
|
|
" 290 0.00 0.00 0.00 1\n",
|
|
" 293 0.00 0.00 0.00 1\n",
|
|
" 298 0.00 0.00 0.00 1\n",
|
|
" 300 0.00 0.00 0.00 1\n",
|
|
" 301 0.00 0.00 0.00 1\n",
|
|
" 302 0.00 0.00 0.00 2\n",
|
|
" 305 0.00 0.00 0.00 3\n",
|
|
" 306 0.00 0.00 0.00 3\n",
|
|
" 308 0.00 0.00 0.00 1\n",
|
|
" 309 0.00 0.00 0.00 1\n",
|
|
" 313 0.00 0.00 0.00 2\n",
|
|
" 314 0.00 0.00 0.00 1\n",
|
|
" 316 0.00 0.00 0.00 1\n",
|
|
" 317 0.00 0.00 0.00 8\n",
|
|
" 324 0.00 0.00 0.00 1\n",
|
|
" 330 0.00 0.00 0.00 2\n",
|
|
" 331 0.00 0.00 0.00 85\n",
|
|
" 332 0.00 0.00 0.00 2\n",
|
|
" 333 0.00 0.00 0.00 3\n",
|
|
" 334 0.00 0.00 0.00 2\n",
|
|
" 336 0.00 0.00 0.00 1\n",
|
|
" 337 0.00 0.00 0.00 1\n",
|
|
" 338 0.00 0.00 0.00 468\n",
|
|
" 339 0.00 0.00 0.00 1\n",
|
|
" 343 0.00 0.00 0.00 2\n",
|
|
" 345 0.00 0.00 0.00 1\n",
|
|
" 346 0.00 0.00 0.00 2\n",
|
|
" 347 0.00 0.00 0.00 42\n",
|
|
" 348 0.00 0.00 0.00 1\n",
|
|
" 350 0.00 0.00 0.00 2\n",
|
|
" 353 0.00 0.00 0.00 62\n",
|
|
" 355 0.00 0.00 0.00 1\n",
|
|
" 359 0.00 0.00 0.00 165\n",
|
|
" 364 0.00 0.00 0.00 14\n",
|
|
" 365 0.00 0.00 0.00 1\n",
|
|
" 366 0.00 0.00 0.00 1\n",
|
|
" 368 0.00 0.00 0.00 1\n",
|
|
" 372 0.00 0.00 0.00 1\n",
|
|
" 373 0.00 0.00 0.00 1\n",
|
|
" 375 0.00 0.00 0.00 366\n",
|
|
" 377 0.00 0.00 0.00 1\n",
|
|
" 381 0.00 0.00 0.00 3\n",
|
|
" 383 0.00 0.00 0.00 3\n",
|
|
" 384 0.00 0.00 0.00 1\n",
|
|
" 387 0.00 0.00 0.00 12\n",
|
|
" 390 0.00 0.00 0.00 5\n",
|
|
" 392 0.00 0.00 0.00 1\n",
|
|
" 394 0.00 0.00 0.00 48\n",
|
|
" 395 0.00 0.00 0.00 1\n",
|
|
" 397 0.00 0.00 0.00 2\n",
|
|
" 398 0.00 0.00 0.00 5\n",
|
|
" 400 0.00 0.00 0.00 1\n",
|
|
" 404 0.00 0.00 0.00 202\n",
|
|
" 405 0.00 0.00 0.00 1\n",
|
|
" 409 0.00 0.00 0.00 1\n",
|
|
" 410 0.00 0.00 0.00 1\n",
|
|
" 416 0.00 0.00 0.00 10\n",
|
|
" 417 0.00 0.00 0.00 1\n",
|
|
" 418 0.00 0.00 0.00 1\n",
|
|
" 419 0.00 0.00 0.00 1\n",
|
|
" 420 0.00 0.00 0.00 92\n",
|
|
" 423 0.00 0.00 0.00 2\n",
|
|
" 424 0.00 0.00 0.00 44\n",
|
|
" 427 0.00 0.00 0.00 4\n",
|
|
" 428 0.00 0.00 0.00 28\n",
|
|
" 431 0.00 0.00 0.00 11\n",
|
|
" 433 0.00 0.00 0.00 3\n",
|
|
" 434 0.00 0.00 0.00 6\n",
|
|
" 436 0.00 0.00 0.00 1\n",
|
|
" 438 0.00 0.00 0.00 14\n",
|
|
" 441 0.00 0.00 0.00 3\n",
|
|
" 443 0.00 0.00 0.00 1\n",
|
|
" 444 0.00 0.00 0.00 2\n",
|
|
" 450 0.00 0.00 0.00 5\n",
|
|
" 451 0.00 0.00 0.00 4\n",
|
|
" 453 0.00 0.00 0.00 1\n",
|
|
" 456 0.00 0.00 0.00 6\n",
|
|
" 459 0.00 0.00 0.00 4\n",
|
|
" 460 0.00 0.00 0.00 2\n",
|
|
" 462 0.00 0.00 0.00 2\n",
|
|
" 463 0.00 0.00 0.00 2\n",
|
|
" 465 0.00 0.00 0.00 1\n",
|
|
" 466 0.00 0.00 0.00 1\n",
|
|
" 469 0.00 0.00 0.00 1\n",
|
|
" 470 0.00 0.00 0.00 1\n",
|
|
" 474 0.00 0.00 0.00 1\n",
|
|
"\n",
|
|
" accuracy 0.11 14133\n",
|
|
" macro avg 0.00 0.00 0.00 14133\n",
|
|
"weighted avg 0.01 0.11 0.02 14133\n",
|
|
"\n",
|
|
"Predicted labels: [168 168 168 ... 168 168 168]\n",
|
|
"Accuracy: 0.10776197551829053\n",
|
|
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n",
|
|
" 'precision', 'predicted', average, warn_for)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from sklearn.metrics import accuracy_score, classification_report \n",
|
|
"from sklearn.linear_model import LogisticRegression\n",
|
|
"model = LogisticRegression()\n",
|
|
"model.fit(X_train, y_train)\n",
|
|
"predictions = model.predict(X_test)\n",
|
|
"\n",
|
|
"print(classification_report(y_test, predictions))\n",
|
|
"print('Predicted labels: ', predictions)\n",
|
|
"print('Accuracy: ', accuracy_score(y_test, predictions))\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"[59]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import pickle\n",
|
|
"s = pickle.dumps(model)\n",
|
|
"model_filename = 'ufo-model.pkl'\n",
|
|
"# Open the file to save as pkl file\n",
|
|
"pickle.dump(model, open(model_filename,'wb'))\n",
|
|
"\n",
|
|
"model = pickle.load(open('ufo-model.pkl','rb'))\n",
|
|
"print(model.predict([[1,2,3]]))\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
]
|
|
} |