You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
256 lines
12 KiB
256 lines
12 KiB
{
|
|
"metadata": {
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.0"
|
|
},
|
|
"orig_nbformat": 2,
|
|
"kernelspec": {
|
|
"name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7",
|
|
"display_name": "Python 3.7.0 64-bit ('3.7')"
|
|
},
|
|
"metadata": {
|
|
"interpreter": {
|
|
"hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
|
|
}
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2,
|
|
"cells": [
|
|
{
|
|
"source": [
|
|
"## Build a Web App using a Regression model to learn about UFO sighting"
|
|
],
|
|
"cell_type": "markdown",
|
|
"metadata": {}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 23,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"output_type": "execute_result",
|
|
"data": {
|
|
"text/plain": [
|
|
" datetime city state country shape \\\n",
|
|
"0 10/10/1949 20:30 san marcos tx us cylinder \n",
|
|
"1 10/10/1949 21:00 lackland afb tx NaN light \n",
|
|
"2 10/10/1955 17:00 chester (uk/england) NaN gb circle \n",
|
|
"3 10/10/1956 21:00 edna tx us circle \n",
|
|
"4 10/10/1960 20:00 kaneohe hi us light \n",
|
|
"\n",
|
|
" duration (seconds) duration (hours/min) \\\n",
|
|
"0 2700.0 45 minutes \n",
|
|
"1 7200.0 1-2 hrs \n",
|
|
"2 20.0 20 seconds \n",
|
|
"3 20.0 1/2 hour \n",
|
|
"4 900.0 15 minutes \n",
|
|
"\n",
|
|
" comments date posted latitude \\\n",
|
|
"0 This event took place in early fall around 194... 4/27/2004 29.883056 \n",
|
|
"1 1949 Lackland AFB, TX. Lights racing acros... 12/16/2005 29.384210 \n",
|
|
"2 Green/Orange circular disc over Chester, En... 1/21/2008 53.200000 \n",
|
|
"3 My older brother and twin sister were leaving ... 1/17/2004 28.978333 \n",
|
|
"4 AS a Marine 1st Lt. flying an FJ4B fighter/att... 1/22/2004 21.418056 \n",
|
|
"\n",
|
|
" longitude \n",
|
|
"0 -97.941111 \n",
|
|
"1 -98.581082 \n",
|
|
"2 -2.916667 \n",
|
|
"3 -96.645833 \n",
|
|
"4 -157.803611 "
|
|
],
|
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>datetime</th>\n <th>city</th>\n <th>state</th>\n <th>country</th>\n <th>shape</th>\n <th>duration (seconds)</th>\n <th>duration (hours/min)</th>\n <th>comments</th>\n <th>date posted</th>\n <th>latitude</th>\n <th>longitude</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>10/10/1949 20:30</td>\n <td>san marcos</td>\n <td>tx</td>\n <td>us</td>\n <td>cylinder</td>\n <td>2700.0</td>\n <td>45 minutes</td>\n <td>This event took place in early fall around 194...</td>\n <td>4/27/2004</td>\n <td>29.883056</td>\n <td>-97.941111</td>\n </tr>\n <tr>\n <th>1</th>\n <td>10/10/1949 21:00</td>\n <td>lackland afb</td>\n <td>tx</td>\n <td>NaN</td>\n <td>light</td>\n <td>7200.0</td>\n <td>1-2 hrs</td>\n <td>1949 Lackland AFB&#44 TX. Lights racing acros...</td>\n <td>12/16/2005</td>\n <td>29.384210</td>\n <td>-98.581082</td>\n </tr>\n <tr>\n <th>2</th>\n <td>10/10/1955 17:00</td>\n <td>chester (uk/england)</td>\n <td>NaN</td>\n <td>gb</td>\n <td>circle</td>\n <td>20.0</td>\n <td>20 seconds</td>\n <td>Green/Orange circular disc over Chester&#44 En...</td>\n <td>1/21/2008</td>\n <td>53.200000</td>\n <td>-2.916667</td>\n </tr>\n <tr>\n <th>3</th>\n <td>10/10/1956 21:00</td>\n <td>edna</td>\n <td>tx</td>\n <td>us</td>\n <td>circle</td>\n <td>20.0</td>\n <td>1/2 hour</td>\n <td>My older brother and twin sister were leaving ...</td>\n <td>1/17/2004</td>\n <td>28.978333</td>\n <td>-96.645833</td>\n </tr>\n <tr>\n <th>4</th>\n <td>10/10/1960 20:00</td>\n <td>kaneohe</td>\n <td>hi</td>\n <td>us</td>\n <td>light</td>\n <td>900.0</td>\n <td>15 minutes</td>\n <td>AS a Marine 1st Lt. flying an FJ4B fighter/att...</td>\n <td>1/22/2004</td>\n <td>21.418056</td>\n <td>-157.803611</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
|
},
|
|
"metadata": {},
|
|
"execution_count": 23
|
|
}
|
|
],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"ufos = pd.read_csv('../data/ufos.csv')\n",
|
|
"ufos.head()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 24,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"output_type": "execute_result",
|
|
"data": {
|
|
"text/plain": [
|
|
"array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"execution_count": 24
|
|
}
|
|
],
|
|
"source": [
|
|
"\n",
|
|
"ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n",
|
|
"\n",
|
|
"ufos.Country.unique()\n",
|
|
"\n",
|
|
"# 0 au, 1 ca, 2 de, 3 gb, 4 us"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 25,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"<class 'pandas.core.frame.DataFrame'>\nInt64Index: 25863 entries, 2 to 80330\nData columns (total 4 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Seconds 25863 non-null float64\n 1 Country 25863 non-null object \n 2 Latitude 25863 non-null float64\n 3 Longitude 25863 non-null float64\ndtypes: float64(3), object(1)\nmemory usage: 1010.3+ KB\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"ufos.dropna(inplace=True)\n",
|
|
"\n",
|
|
"ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]\n",
|
|
"\n",
|
|
"ufos.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 26,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"output_type": "execute_result",
|
|
"data": {
|
|
"text/plain": [
|
|
" Seconds Country Latitude Longitude\n",
|
|
"2 20.0 3 53.200000 -2.916667\n",
|
|
"3 20.0 4 28.978333 -96.645833\n",
|
|
"14 30.0 4 35.823889 -80.253611\n",
|
|
"23 60.0 4 45.582778 -122.352222\n",
|
|
"24 3.0 3 51.783333 -0.783333"
|
|
],
|
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Seconds</th>\n <th>Country</th>\n <th>Latitude</th>\n <th>Longitude</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2</th>\n <td>20.0</td>\n <td>3</td>\n <td>53.200000</td>\n <td>-2.916667</td>\n </tr>\n <tr>\n <th>3</th>\n <td>20.0</td>\n <td>4</td>\n <td>28.978333</td>\n <td>-96.645833</td>\n </tr>\n <tr>\n <th>14</th>\n <td>30.0</td>\n <td>4</td>\n <td>35.823889</td>\n <td>-80.253611</td>\n </tr>\n <tr>\n <th>23</th>\n <td>60.0</td>\n <td>4</td>\n <td>45.582778</td>\n <td>-122.352222</td>\n </tr>\n <tr>\n <th>24</th>\n <td>3.0</td>\n <td>3</td>\n <td>51.783333</td>\n <td>-0.783333</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
|
},
|
|
"metadata": {},
|
|
"execution_count": 26
|
|
}
|
|
],
|
|
"source": [
|
|
"from sklearn.preprocessing import LabelEncoder\n",
|
|
"\n",
|
|
"ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n",
|
|
"\n",
|
|
"ufos.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 27,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"\n",
|
|
"Selected_features = ['Seconds','Latitude','Longitude']\n",
|
|
"\n",
|
|
"X = ufos[Selected_features]\n",
|
|
"y = ufos['Country']\n",
|
|
"\n",
|
|
"\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 28,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stderr",
|
|
"text": [
|
|
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
|
|
" FutureWarning)\n",
|
|
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
|
|
" \"this warning.\", FutureWarning)\n",
|
|
" precision recall f1-score support\n",
|
|
"\n",
|
|
" 0 1.00 1.00 1.00 41\n",
|
|
" 1 1.00 0.02 0.05 250\n",
|
|
" 2 0.00 0.00 0.00 8\n",
|
|
" 3 0.94 1.00 0.97 131\n",
|
|
" 4 0.95 1.00 0.97 4743\n",
|
|
"\n",
|
|
" accuracy 0.95 5173\n",
|
|
" macro avg 0.78 0.60 0.60 5173\n",
|
|
"weighted avg 0.95 0.95 0.93 5173\n",
|
|
"\n",
|
|
"Predicted labels: [4 4 4 ... 3 4 4]\n",
|
|
"Accuracy: 0.9512855209742895\n",
|
|
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n",
|
|
" 'precision', 'predicted', average, warn_for)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from sklearn.metrics import accuracy_score, classification_report \n",
|
|
"from sklearn.linear_model import LogisticRegression\n",
|
|
"model = LogisticRegression()\n",
|
|
"model.fit(X_train, y_train)\n",
|
|
"predictions = model.predict(X_test)\n",
|
|
"\n",
|
|
"print(classification_report(y_test, predictions))\n",
|
|
"print('Predicted labels: ', predictions)\n",
|
|
"print('Accuracy: ', accuracy_score(y_test, predictions))\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 29,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"[3]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import pickle\n",
|
|
"model_filename = 'ufo-model.pkl'\n",
|
|
"pickle.dump(model, open(model_filename,'wb'))\n",
|
|
"\n",
|
|
"model = pickle.load(open('ufo-model.pkl','rb'))\n",
|
|
"print(model.predict([[50,44,-12]]))\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
]
|
|
} |