{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7",
"display_name": "Python 3.7.0 64-bit ('3.7')"
},
"metadata": {
"interpreter": {
"hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
}
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"source": [
"## Build a Web App using a Regression model to learn about UFO sighting"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" datetime city state country shape \\\n",
"0 10/10/1949 20:30 san marcos tx us cylinder \n",
"1 10/10/1949 21:00 lackland afb tx NaN light \n",
"2 10/10/1955 17:00 chester (uk/england) NaN gb circle \n",
"3 10/10/1956 21:00 edna tx us circle \n",
"4 10/10/1960 20:00 kaneohe hi us light \n",
"\n",
" duration (seconds) duration (hours/min) \\\n",
"0 2700.0 45 minutes \n",
"1 7200.0 1-2 hrs \n",
"2 20.0 20 seconds \n",
"3 20.0 1/2 hour \n",
"4 900.0 15 minutes \n",
"\n",
" comments date posted latitude \\\n",
"0 This event took place in early fall around 194... 4/27/2004 29.883056 \n",
"1 1949 Lackland AFB, TX. Lights racing acros... 12/16/2005 29.384210 \n",
"2 Green/Orange circular disc over Chester, En... 1/21/2008 53.200000 \n",
"3 My older brother and twin sister were leaving ... 1/17/2004 28.978333 \n",
"4 AS a Marine 1st Lt. flying an FJ4B fighter/att... 1/22/2004 21.418056 \n",
"\n",
" longitude \n",
"0 -97.941111 \n",
"1 -98.581082 \n",
"2 -2.916667 \n",
"3 -96.645833 \n",
"4 -157.803611 "
],
"text/html": "
\n\n
\n \n \n | \n datetime | \n city | \n state | \n country | \n shape | \n duration (seconds) | \n duration (hours/min) | \n comments | \n date posted | \n latitude | \n longitude | \n
\n \n \n \n 0 | \n 10/10/1949 20:30 | \n san marcos | \n tx | \n us | \n cylinder | \n 2700.0 | \n 45 minutes | \n This event took place in early fall around 194... | \n 4/27/2004 | \n 29.883056 | \n -97.941111 | \n
\n \n 1 | \n 10/10/1949 21:00 | \n lackland afb | \n tx | \n NaN | \n light | \n 7200.0 | \n 1-2 hrs | \n 1949 Lackland AFB, TX. Lights racing acros... | \n 12/16/2005 | \n 29.384210 | \n -98.581082 | \n
\n \n 2 | \n 10/10/1955 17:00 | \n chester (uk/england) | \n NaN | \n gb | \n circle | \n 20.0 | \n 20 seconds | \n Green/Orange circular disc over Chester, En... | \n 1/21/2008 | \n 53.200000 | \n -2.916667 | \n
\n \n 3 | \n 10/10/1956 21:00 | \n edna | \n tx | \n us | \n circle | \n 20.0 | \n 1/2 hour | \n My older brother and twin sister were leaving ... | \n 1/17/2004 | \n 28.978333 | \n -96.645833 | \n
\n \n 4 | \n 10/10/1960 20:00 | \n kaneohe | \n hi | \n us | \n light | \n 900.0 | \n 15 minutes | \n AS a Marine 1st Lt. flying an FJ4B fighter/att... | \n 1/22/2004 | \n 21.418056 | \n -157.803611 | \n
\n \n
\n
"
},
"metadata": {},
"execution_count": 23
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"ufos = pd.read_csv('../data/ufos.csv')\n",
"ufos.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)"
]
},
"metadata": {},
"execution_count": 24
}
],
"source": [
"\n",
"ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n",
"\n",
"ufos.Country.unique()\n",
"\n",
"# 0 au, 1 ca, 2 de, 3 gb, 4 us"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\nInt64Index: 25863 entries, 2 to 80330\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 25863 non-null float64\n 1 Country 25863 non-null object \n 2 Latitude 25863 non-null float64\n 3 Longitude 25863 non-null float64\ndtypes: float64(3), object(1)\nmemory usage: 1010.3+ KB\n"
]
}
],
"source": [
"ufos.dropna(inplace=True)\n",
"\n",
"ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]\n",
"\n",
"ufos.info()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Seconds Country Latitude Longitude\n",
"2 20.0 3 53.200000 -2.916667\n",
"3 20.0 4 28.978333 -96.645833\n",
"14 30.0 4 35.823889 -80.253611\n",
"23 60.0 4 45.582778 -122.352222\n",
"24 3.0 3 51.783333 -0.783333"
],
"text/html": "\n\n
\n \n \n | \n Seconds | \n Country | \n Latitude | \n Longitude | \n
\n \n \n \n 2 | \n 20.0 | \n 3 | \n 53.200000 | \n -2.916667 | \n
\n \n 3 | \n 20.0 | \n 4 | \n 28.978333 | \n -96.645833 | \n
\n \n 14 | \n 30.0 | \n 4 | \n 35.823889 | \n -80.253611 | \n
\n \n 23 | \n 60.0 | \n 4 | \n 45.582778 | \n -122.352222 | \n
\n \n 24 | \n 3.0 | \n 3 | \n 51.783333 | \n -0.783333 | \n
\n \n
\n
"
},
"metadata": {},
"execution_count": 26
}
],
"source": [
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n",
"\n",
"ufos.head()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"Selected_features = ['Seconds','Latitude','Longitude']\n",
"\n",
"X = ufos[Selected_features]\n",
"y = ufos['Country']\n",
"\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n",
" precision recall f1-score support\n",
"\n",
" 0 1.00 1.00 1.00 41\n",
" 1 1.00 0.02 0.05 250\n",
" 2 0.00 0.00 0.00 8\n",
" 3 0.94 1.00 0.97 131\n",
" 4 0.95 1.00 0.97 4743\n",
"\n",
" accuracy 0.95 5173\n",
" macro avg 0.78 0.60 0.60 5173\n",
"weighted avg 0.95 0.95 0.93 5173\n",
"\n",
"Predicted labels: [4 4 4 ... 3 4 4]\n",
"Accuracy: 0.9512855209742895\n",
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n",
" 'precision', 'predicted', average, warn_for)\n"
]
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score, classification_report \n",
"from sklearn.linear_model import LogisticRegression\n",
"model = LogisticRegression()\n",
"model.fit(X_train, y_train)\n",
"predictions = model.predict(X_test)\n",
"\n",
"print(classification_report(y_test, predictions))\n",
"print('Predicted labels: ', predictions)\n",
"print('Accuracy: ', accuracy_score(y_test, predictions))\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[3]\n"
]
}
],
"source": [
"import pickle\n",
"model_filename = 'ufo-model.pkl'\n",
"pickle.dump(model, open(model_filename,'wb'))\n",
"\n",
"model = pickle.load(open('ufo-model.pkl','rb'))\n",
"print(model.predict([[50,44,-12]]))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
]
}