{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7",
   "display_name": "Python 3.7.0 64-bit ('3.7')"
  },
  "metadata": {
   "interpreter": {
    "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "source": [
    "## Build a Web App using a Regression model to learn about UFO sighting"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "           datetime                  city state country     shape  \\\n",
       "0  10/10/1949 20:30            san marcos    tx      us  cylinder   \n",
       "1  10/10/1949 21:00          lackland afb    tx     NaN     light   \n",
       "2  10/10/1955 17:00  chester (uk/england)   NaN      gb    circle   \n",
       "3  10/10/1956 21:00                  edna    tx      us    circle   \n",
       "4  10/10/1960 20:00               kaneohe    hi      us     light   \n",
       "\n",
       "   duration (seconds) duration (hours/min)  \\\n",
       "0              2700.0           45 minutes   \n",
       "1              7200.0              1-2 hrs   \n",
       "2                20.0           20 seconds   \n",
       "3                20.0             1/2 hour   \n",
       "4               900.0           15 minutes   \n",
       "\n",
       "                                            comments date posted   latitude  \\\n",
       "0  This event took place in early fall around 194...   4/27/2004  29.883056   \n",
       "1  1949 Lackland AFB&#44 TX.  Lights racing acros...  12/16/2005  29.384210   \n",
       "2  Green/Orange circular disc over Chester&#44 En...   1/21/2008  53.200000   \n",
       "3  My older brother and twin sister were leaving ...   1/17/2004  28.978333   \n",
       "4  AS a Marine 1st Lt. flying an FJ4B fighter/att...   1/22/2004  21.418056   \n",
       "\n",
       "    longitude  \n",
       "0  -97.941111  \n",
       "1  -98.581082  \n",
       "2   -2.916667  \n",
       "3  -96.645833  \n",
       "4 -157.803611  "
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>datetime</th>\n      <th>city</th>\n      <th>state</th>\n      <th>country</th>\n      <th>shape</th>\n      <th>duration (seconds)</th>\n      <th>duration (hours/min)</th>\n      <th>comments</th>\n      <th>date posted</th>\n      <th>latitude</th>\n      <th>longitude</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>10/10/1949 20:30</td>\n      <td>san marcos</td>\n      <td>tx</td>\n      <td>us</td>\n      <td>cylinder</td>\n      <td>2700.0</td>\n      <td>45 minutes</td>\n      <td>This event took place in early fall around 194...</td>\n      <td>4/27/2004</td>\n      <td>29.883056</td>\n      <td>-97.941111</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>10/10/1949 21:00</td>\n      <td>lackland afb</td>\n      <td>tx</td>\n      <td>NaN</td>\n      <td>light</td>\n      <td>7200.0</td>\n      <td>1-2 hrs</td>\n      <td>1949 Lackland AFB&amp;#44 TX.  Lights racing acros...</td>\n      <td>12/16/2005</td>\n      <td>29.384210</td>\n      <td>-98.581082</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>10/10/1955 17:00</td>\n      <td>chester (uk/england)</td>\n      <td>NaN</td>\n      <td>gb</td>\n      <td>circle</td>\n      <td>20.0</td>\n      <td>20 seconds</td>\n      <td>Green/Orange circular disc over Chester&amp;#44 En...</td>\n      <td>1/21/2008</td>\n      <td>53.200000</td>\n      <td>-2.916667</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>10/10/1956 21:00</td>\n      <td>edna</td>\n      <td>tx</td>\n      <td>us</td>\n      <td>circle</td>\n      <td>20.0</td>\n      <td>1/2 hour</td>\n      <td>My older brother and twin sister were leaving ...</td>\n      <td>1/17/2004</td>\n      <td>28.978333</td>\n      <td>-96.645833</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>10/10/1960 20:00</td>\n      <td>kaneohe</td>\n      <td>hi</td>\n      <td>us</td>\n      <td>light</td>\n      <td>900.0</td>\n      <td>15 minutes</td>\n      <td>AS a Marine 1st Lt. flying an FJ4B fighter/att...</td>\n      <td>1/22/2004</td>\n      <td>21.418056</td>\n      <td>-157.803611</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 23
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "ufos = pd.read_csv('../data/ufos.csv')\n",
    "ufos.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)"
      ]
     },
     "metadata": {},
     "execution_count": 24
    }
   ],
   "source": [
    "\n",
    "ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n",
    "\n",
    "ufos.Country.unique()\n",
    "\n",
    "# 0 au, 1 ca, 2 de, 3 gb, 4 us"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\nInt64Index: 25863 entries, 2 to 80330\nData columns (total 4 columns):\n #   Column     Non-Null Count  Dtype  \n---  ------     --------------  -----  \n 0   Seconds    25863 non-null  float64\n 1   Country    25863 non-null  object \n 2   Latitude   25863 non-null  float64\n 3   Longitude  25863 non-null  float64\ndtypes: float64(3), object(1)\nmemory usage: 1010.3+ KB\n"
     ]
    }
   ],
   "source": [
    "ufos.dropna(inplace=True)\n",
    "\n",
    "ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]\n",
    "\n",
    "ufos.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "    Seconds  Country   Latitude   Longitude\n",
       "2      20.0        3  53.200000   -2.916667\n",
       "3      20.0        4  28.978333  -96.645833\n",
       "14     30.0        4  35.823889  -80.253611\n",
       "23     60.0        4  45.582778 -122.352222\n",
       "24      3.0        3  51.783333   -0.783333"
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Seconds</th>\n      <th>Country</th>\n      <th>Latitude</th>\n      <th>Longitude</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2</th>\n      <td>20.0</td>\n      <td>3</td>\n      <td>53.200000</td>\n      <td>-2.916667</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>20.0</td>\n      <td>4</td>\n      <td>28.978333</td>\n      <td>-96.645833</td>\n    </tr>\n    <tr>\n      <th>14</th>\n      <td>30.0</td>\n      <td>4</td>\n      <td>35.823889</td>\n      <td>-80.253611</td>\n    </tr>\n    <tr>\n      <th>23</th>\n      <td>60.0</td>\n      <td>4</td>\n      <td>45.582778</td>\n      <td>-122.352222</td>\n    </tr>\n    <tr>\n      <th>24</th>\n      <td>3.0</td>\n      <td>3</td>\n      <td>51.783333</td>\n      <td>-0.783333</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 26
    }
   ],
   "source": [
    "from sklearn.preprocessing import LabelEncoder\n",
    "\n",
    "ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n",
    "\n",
    "ufos.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "Selected_features = ['Seconds','Latitude','Longitude']\n",
    "\n",
    "X = ufos[Selected_features]\n",
    "y = ufos['Country']\n",
    "\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stderr",
     "text": [
      "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
      "  \"this warning.\", FutureWarning)\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       1.00      1.00      1.00        41\n",
      "           1       1.00      0.02      0.05       250\n",
      "           2       0.00      0.00      0.00         8\n",
      "           3       0.94      1.00      0.97       131\n",
      "           4       0.95      1.00      0.97      4743\n",
      "\n",
      "    accuracy                           0.95      5173\n",
      "   macro avg       0.78      0.60      0.60      5173\n",
      "weighted avg       0.95      0.95      0.93      5173\n",
      "\n",
      "Predicted labels:  [4 4 4 ... 3 4 4]\n",
      "Accuracy:  0.9512855209742895\n",
      "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score, classification_report \n",
    "from sklearn.linear_model import LogisticRegression\n",
    "model = LogisticRegression()\n",
    "model.fit(X_train, y_train)\n",
    "predictions = model.predict(X_test)\n",
    "\n",
    "print(classification_report(y_test, predictions))\n",
    "print('Predicted labels: ', predictions)\n",
    "print('Accuracy: ', accuracy_score(y_test, predictions))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "[3]\n"
     ]
    }
   ],
   "source": [
    "import pickle\n",
    "model_filename = 'ufo-model.pkl'\n",
    "pickle.dump(model, open(model_filename,'wb'))\n",
    "\n",
    "model = pickle.load(open('ufo-model.pkl','rb'))\n",
    "print(model.predict([[50,44,-12]]))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ]
}