{ "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" }, "orig_nbformat": 2, "kernelspec": { "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", "display_name": "Python 3.7.0 64-bit ('3.7')" }, "metadata": { "interpreter": { "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" } } }, "nbformat": 4, "nbformat_minor": 2, "cells": [ { "source": [ "## Build a Web App using a Regression model to learn about UFO sighting" ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " datetime city state country shape \\\n", "0 10/10/1949 20:30 san marcos tx us cylinder \n", "1 10/10/1949 21:00 lackland afb tx NaN light \n", "2 10/10/1955 17:00 chester (uk/england) NaN gb circle \n", "3 10/10/1956 21:00 edna tx us circle \n", "4 10/10/1960 20:00 kaneohe hi us light \n", "\n", " duration (seconds) duration (hours/min) \\\n", "0 2700.0 45 minutes \n", "1 7200.0 1-2 hrs \n", "2 20.0 20 seconds \n", "3 20.0 1/2 hour \n", "4 900.0 15 minutes \n", "\n", " comments date posted latitude \\\n", "0 This event took place in early fall around 194... 4/27/2004 29.883056 \n", "1 1949 Lackland AFB, TX. Lights racing acros... 12/16/2005 29.384210 \n", "2 Green/Orange circular disc over Chester, En... 1/21/2008 53.200000 \n", "3 My older brother and twin sister were leaving ... 1/17/2004 28.978333 \n", "4 AS a Marine 1st Lt. flying an FJ4B fighter/att... 1/22/2004 21.418056 \n", "\n", " longitude \n", "0 -97.941111 \n", "1 -98.581082 \n", "2 -2.916667 \n", "3 -96.645833 \n", "4 -157.803611 " ], "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
datetimecitystatecountryshapeduration (seconds)duration (hours/min)commentsdate postedlatitudelongitude
010/10/1949 20:30san marcostxuscylinder2700.045 minutesThis event took place in early fall around 194...4/27/200429.883056-97.941111
110/10/1949 21:00lackland afbtxNaNlight7200.01-2 hrs1949 Lackland AFB&#44 TX. Lights racing acros...12/16/200529.384210-98.581082
210/10/1955 17:00chester (uk/england)NaNgbcircle20.020 secondsGreen/Orange circular disc over Chester&#44 En...1/21/200853.200000-2.916667
310/10/1956 21:00ednatxuscircle20.01/2 hourMy older brother and twin sister were leaving ...1/17/200428.978333-96.645833
410/10/1960 20:00kaneohehiuslight900.015 minutesAS a Marine 1st Lt. flying an FJ4B fighter/att...1/22/200421.418056-157.803611
\n
" }, "metadata": {}, "execution_count": 23 } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "\n", "ufos = pd.read_csv('../data/ufos.csv')\n", "ufos.head()\n" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)" ] }, "metadata": {}, "execution_count": 24 } ], "source": [ "\n", "ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n", "\n", "ufos.Country.unique()\n", "\n", "# 0 au, 1 ca, 2 de, 3 gb, 4 us" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\nInt64Index: 25863 entries, 2 to 80330\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 25863 non-null float64\n 1 Country 25863 non-null object \n 2 Latitude 25863 non-null float64\n 3 Longitude 25863 non-null float64\ndtypes: float64(3), object(1)\nmemory usage: 1010.3+ KB\n" ] } ], "source": [ "ufos.dropna(inplace=True)\n", "\n", "ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]\n", "\n", "ufos.info()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Seconds Country Latitude Longitude\n", "2 20.0 3 53.200000 -2.916667\n", "3 20.0 4 28.978333 -96.645833\n", "14 30.0 4 35.823889 -80.253611\n", "23 60.0 4 45.582778 -122.352222\n", "24 3.0 3 51.783333 -0.783333" ], "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
SecondsCountryLatitudeLongitude
220.0353.200000-2.916667
320.0428.978333-96.645833
1430.0435.823889-80.253611
2360.0445.582778-122.352222
243.0351.783333-0.783333
\n
" }, "metadata": {}, "execution_count": 26 } ], "source": [ "from sklearn.preprocessing import LabelEncoder\n", "\n", "ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n", "\n", "ufos.head()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "Selected_features = ['Seconds','Latitude','Longitude']\n", "\n", "X = ufos[Selected_features]\n", "y = ufos['Country']\n", "\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", " FutureWarning)\n", "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n", " \"this warning.\", FutureWarning)\n", " precision recall f1-score support\n", "\n", " 0 1.00 1.00 1.00 41\n", " 1 1.00 0.02 0.05 250\n", " 2 0.00 0.00 0.00 8\n", " 3 0.94 1.00 0.97 131\n", " 4 0.95 1.00 0.97 4743\n", "\n", " accuracy 0.95 5173\n", " macro avg 0.78 0.60 0.60 5173\n", "weighted avg 0.95 0.95 0.93 5173\n", "\n", "Predicted labels: [4 4 4 ... 3 4 4]\n", "Accuracy: 0.9512855209742895\n", "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n", " 'precision', 'predicted', average, warn_for)\n" ] } ], "source": [ "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score, classification_report \n", "from sklearn.linear_model import LogisticRegression\n", "model = LogisticRegression()\n", "model.fit(X_train, y_train)\n", "predictions = model.predict(X_test)\n", "\n", "print(classification_report(y_test, predictions))\n", "print('Predicted labels: ', predictions)\n", "print('Accuracy: ', accuracy_score(y_test, predictions))\n", "\n" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[3]\n" ] } ], "source": [ "import pickle\n", "model_filename = 'ufo-model.pkl'\n", "pickle.dump(model, open(model_filename,'wb'))\n", "\n", "model = pickle.load(open('ufo-model.pkl','rb'))\n", "print(model.predict([[50,44,-12]]))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ] }