You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ML-For-Beginners/API/1-API/solution/notebook.ipynb

391 lines
18 KiB

{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7",
"display_name": "Python 3.7.0 64-bit ('3.7')"
},
"metadata": {
"interpreter": {
"hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
}
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"source": [
"## Build an API with two different models\n",
"\n",
"Linear Regression\n",
"Classification"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"\n",
"ufos = pd.read_csv('../data/ufos.csv')\n",
"ufos.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"<class 'pandas.core.frame.DataFrame'>\nInt64Index: 70662 entries, 0 to 80331\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 70662 non-null float64\n 1 Country 70662 non-null int64 \n 2 Latitude 70662 non-null float64\n 3 Longitude 70662 non-null float64\ndtypes: float64(3), int64(1)\nmemory usage: 2.7 MB\n"
]
}
],
"source": [
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n",
"\n",
"ufos.dropna(inplace=True)\n",
"\n",
"ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n",
"\n",
"# only view seconds 1-60\n",
"\n",
"ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]\n",
"\n",
"ufos.info()\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"new_columns = ['Seconds','Latitude','Longitude','Country']\n",
"\n",
"new_ufos = ufos.drop([c for c in ufos.columns if c not in new_columns], axis=1)\n",
"\n",
"new_ufos.dropna(inplace=True)\n",
"\n",
"new_ufos = new_ufos.apply(LabelEncoder().fit_transform)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"Selected_features = ['Latitude','Longitude','Country']\n",
"\n",
"X = new_ufos[Selected_features]\n",
"y = new_ufos['Seconds']\n",
"\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n",
" precision recall f1-score support\n",
"\n",
" 4 0.00 0.00 0.00 1\n",
" 10 0.00 0.00 0.00 11\n",
" 12 0.00 0.00 0.00 1\n",
" 13 0.00 0.00 0.00 1\n",
" 15 0.00 0.00 0.00 131\n",
" 17 0.00 0.00 0.00 1\n",
" 18 0.00 0.00 0.00 19\n",
" 21 0.00 0.00 0.00 410\n",
" 22 0.00 0.00 0.00 3\n",
" 23 0.00 0.00 0.00 284\n",
" 25 0.00 0.00 0.00 1\n",
" 26 0.00 0.00 0.00 1\n",
" 27 0.00 0.00 0.00 146\n",
" 29 0.00 0.00 0.00 555\n",
" 31 0.00 0.00 0.00 84\n",
" 33 0.00 0.00 0.00 86\n",
" 34 0.00 0.00 0.00 85\n",
" 35 0.00 0.00 0.00 8\n",
" 36 0.00 0.00 0.00 569\n",
" 37 0.00 0.00 0.00 3\n",
" 38 0.00 0.00 0.00 39\n",
" 39 0.00 0.00 0.00 5\n",
" 40 0.00 0.00 0.00 4\n",
" 41 0.00 0.00 0.00 352\n",
" 42 0.00 0.00 0.00 1\n",
" 43 0.00 0.00 0.00 4\n",
" 44 0.00 0.00 0.00 2\n",
" 45 0.00 0.00 0.00 1\n",
" 46 0.00 0.00 0.00 360\n",
" 49 0.00 0.00 0.00 2\n",
" 50 0.00 0.00 0.00 2\n",
" 52 0.00 0.00 0.00 60\n",
" 53 0.00 0.00 0.00 1\n",
" 55 0.00 0.00 0.00 1\n",
" 56 0.00 0.00 0.00 1\n",
" 57 0.00 0.00 0.00 634\n",
" 60 0.00 0.00 0.00 1\n",
" 61 0.00 0.00 0.00 2\n",
" 62 0.00 0.00 0.00 20\n",
" 65 0.00 0.00 0.00 3\n",
" 67 0.00 0.00 0.00 83\n",
" 71 0.00 0.00 0.00 171\n",
" 75 0.00 0.00 0.00 19\n",
" 79 0.00 0.00 0.00 5\n",
" 83 0.00 0.00 0.00 1005\n",
" 86 0.00 0.00 0.00 1\n",
" 88 0.00 0.00 0.00 1\n",
" 94 0.00 0.00 0.00 1\n",
" 95 0.00 0.00 0.00 1\n",
" 97 0.00 0.00 0.00 7\n",
" 99 0.00 0.00 0.00 2\n",
" 102 0.00 0.00 0.00 2\n",
" 104 0.00 0.00 0.00 1\n",
" 105 0.00 0.00 0.00 5\n",
" 107 0.00 0.00 0.00 1\n",
" 109 0.00 0.00 0.00 1\n",
" 110 0.00 0.00 0.00 137\n",
" 111 0.00 0.00 0.00 1\n",
" 113 0.00 0.00 0.00 1\n",
" 114 0.00 0.00 0.00 1\n",
" 117 0.00 0.00 0.00 4\n",
" 122 0.00 0.00 0.00 1270\n",
" 123 0.00 0.00 0.00 1\n",
" 124 0.00 0.00 0.00 1\n",
" 129 0.00 0.00 0.00 1\n",
" 130 0.00 0.00 0.00 1\n",
" 133 0.00 0.00 0.00 9\n",
" 135 0.00 0.00 0.00 1\n",
" 137 0.00 0.00 0.00 2\n",
" 140 0.00 0.00 0.00 862\n",
" 145 0.00 0.00 0.00 2\n",
" 150 0.00 0.00 0.00 2\n",
" 154 0.00 0.00 0.00 375\n",
" 155 0.00 0.00 0.00 1\n",
" 161 0.00 0.00 0.00 1\n",
" 163 0.00 0.00 0.00 1\n",
" 168 0.11 1.00 0.19 1523\n",
" 169 0.00 0.00 0.00 1\n",
" 172 0.00 0.00 0.00 1\n",
" 174 0.00 0.00 0.00 1\n",
" 178 0.00 0.00 0.00 2\n",
" 180 0.00 0.00 0.00 108\n",
" 181 0.00 0.00 0.00 1\n",
" 184 0.00 0.00 0.00 1\n",
" 188 0.00 0.00 0.00 149\n",
" 193 0.00 0.00 0.00 1\n",
" 196 0.00 0.00 0.00 136\n",
" 197 0.00 0.00 0.00 1\n",
" 200 0.00 0.00 0.00 16\n",
" 202 0.00 0.00 0.00 1\n",
" 207 0.00 0.00 0.00 1130\n",
" 215 0.00 0.00 0.00 1\n",
" 220 0.00 0.00 0.00 5\n",
" 223 0.00 0.00 0.00 1\n",
" 226 0.00 0.00 0.00 44\n",
" 228 0.00 0.00 0.00 1\n",
" 233 0.00 0.00 0.00 10\n",
" 237 0.00 0.00 0.00 5\n",
" 239 0.00 0.00 0.00 736\n",
" 241 0.00 0.00 0.00 4\n",
" 245 0.00 0.00 0.00 1\n",
" 248 0.00 0.00 0.00 1\n",
" 251 0.00 0.00 0.00 8\n",
" 258 0.00 0.00 0.00 11\n",
" 265 0.00 0.00 0.00 1\n",
" 267 0.00 0.00 0.00 5\n",
" 268 0.00 0.00 0.00 1\n",
" 273 0.00 0.00 0.00 1\n",
" 275 0.00 0.00 0.00 542\n",
" 277 0.00 0.00 0.00 1\n",
" 279 0.00 0.00 0.00 1\n",
" 281 0.00 0.00 0.00 1\n",
" 284 0.00 0.00 0.00 1\n",
" 287 0.00 0.00 0.00 1\n",
" 288 0.00 0.00 0.00 4\n",
" 290 0.00 0.00 0.00 1\n",
" 293 0.00 0.00 0.00 1\n",
" 298 0.00 0.00 0.00 1\n",
" 300 0.00 0.00 0.00 1\n",
" 301 0.00 0.00 0.00 1\n",
" 302 0.00 0.00 0.00 2\n",
" 305 0.00 0.00 0.00 3\n",
" 306 0.00 0.00 0.00 3\n",
" 308 0.00 0.00 0.00 1\n",
" 309 0.00 0.00 0.00 1\n",
" 313 0.00 0.00 0.00 2\n",
" 314 0.00 0.00 0.00 1\n",
" 316 0.00 0.00 0.00 1\n",
" 317 0.00 0.00 0.00 8\n",
" 324 0.00 0.00 0.00 1\n",
" 330 0.00 0.00 0.00 2\n",
" 331 0.00 0.00 0.00 85\n",
" 332 0.00 0.00 0.00 2\n",
" 333 0.00 0.00 0.00 3\n",
" 334 0.00 0.00 0.00 2\n",
" 336 0.00 0.00 0.00 1\n",
" 337 0.00 0.00 0.00 1\n",
" 338 0.00 0.00 0.00 468\n",
" 339 0.00 0.00 0.00 1\n",
" 343 0.00 0.00 0.00 2\n",
" 345 0.00 0.00 0.00 1\n",
" 346 0.00 0.00 0.00 2\n",
" 347 0.00 0.00 0.00 42\n",
" 348 0.00 0.00 0.00 1\n",
" 350 0.00 0.00 0.00 2\n",
" 353 0.00 0.00 0.00 62\n",
" 355 0.00 0.00 0.00 1\n",
" 359 0.00 0.00 0.00 165\n",
" 364 0.00 0.00 0.00 14\n",
" 365 0.00 0.00 0.00 1\n",
" 366 0.00 0.00 0.00 1\n",
" 368 0.00 0.00 0.00 1\n",
" 372 0.00 0.00 0.00 1\n",
" 373 0.00 0.00 0.00 1\n",
" 375 0.00 0.00 0.00 366\n",
" 377 0.00 0.00 0.00 1\n",
" 381 0.00 0.00 0.00 3\n",
" 383 0.00 0.00 0.00 3\n",
" 384 0.00 0.00 0.00 1\n",
" 387 0.00 0.00 0.00 12\n",
" 390 0.00 0.00 0.00 5\n",
" 392 0.00 0.00 0.00 1\n",
" 394 0.00 0.00 0.00 48\n",
" 395 0.00 0.00 0.00 1\n",
" 397 0.00 0.00 0.00 2\n",
" 398 0.00 0.00 0.00 5\n",
" 400 0.00 0.00 0.00 1\n",
" 404 0.00 0.00 0.00 202\n",
" 405 0.00 0.00 0.00 1\n",
" 409 0.00 0.00 0.00 1\n",
" 410 0.00 0.00 0.00 1\n",
" 416 0.00 0.00 0.00 10\n",
" 417 0.00 0.00 0.00 1\n",
" 418 0.00 0.00 0.00 1\n",
" 419 0.00 0.00 0.00 1\n",
" 420 0.00 0.00 0.00 92\n",
" 423 0.00 0.00 0.00 2\n",
" 424 0.00 0.00 0.00 44\n",
" 427 0.00 0.00 0.00 4\n",
" 428 0.00 0.00 0.00 28\n",
" 431 0.00 0.00 0.00 11\n",
" 433 0.00 0.00 0.00 3\n",
" 434 0.00 0.00 0.00 6\n",
" 436 0.00 0.00 0.00 1\n",
" 438 0.00 0.00 0.00 14\n",
" 441 0.00 0.00 0.00 3\n",
" 443 0.00 0.00 0.00 1\n",
" 444 0.00 0.00 0.00 2\n",
" 450 0.00 0.00 0.00 5\n",
" 451 0.00 0.00 0.00 4\n",
" 453 0.00 0.00 0.00 1\n",
" 456 0.00 0.00 0.00 6\n",
" 459 0.00 0.00 0.00 4\n",
" 460 0.00 0.00 0.00 2\n",
" 462 0.00 0.00 0.00 2\n",
" 463 0.00 0.00 0.00 2\n",
" 465 0.00 0.00 0.00 1\n",
" 466 0.00 0.00 0.00 1\n",
" 469 0.00 0.00 0.00 1\n",
" 470 0.00 0.00 0.00 1\n",
" 474 0.00 0.00 0.00 1\n",
"\n",
" accuracy 0.11 14133\n",
" macro avg 0.00 0.00 0.00 14133\n",
"weighted avg 0.01 0.11 0.02 14133\n",
"\n",
"Predicted labels: [168 168 168 ... 168 168 168]\n",
"Accuracy: 0.10776197551829053\n",
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n",
" 'precision', 'predicted', average, warn_for)\n"
]
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score, classification_report \n",
"from sklearn.linear_model import LogisticRegression\n",
"model = LogisticRegression()\n",
"model.fit(X_train, y_train)\n",
"predictions = model.predict(X_test)\n",
"\n",
"print(classification_report(y_test, predictions))\n",
"print('Predicted labels: ', predictions)\n",
"print('Accuracy: ', accuracy_score(y_test, predictions))\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[59]\n"
]
}
],
"source": [
"import pickle\n",
"s = pickle.dumps(model)\n",
"model_filename = 'ufo-model.pkl'\n",
"# Open the file to save as pkl file\n",
"pickle.dump(model, open(model_filename,'wb'))\n",
"\n",
"model = pickle.load(open('ufo-model.pkl','rb'))\n",
"print(model.predict([[1,2,3]]))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
]
}