{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7",
   "display_name": "Python 3.7.0 64-bit ('3.7')"
  },
  "metadata": {
   "interpreter": {
    "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "source": [
    "## Build a Web App using a Regression model to learn about UFO sighting"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "           datetime                  city state country     shape  \\\n",
       "0  10/10/1949 20:30            san marcos    tx      us  cylinder   \n",
       "1  10/10/1949 21:00          lackland afb    tx     NaN     light   \n",
       "2  10/10/1955 17:00  chester (uk/england)   NaN      gb    circle   \n",
       "3  10/10/1956 21:00                  edna    tx      us    circle   \n",
       "4  10/10/1960 20:00               kaneohe    hi      us     light   \n",
       "\n",
       "   duration (seconds) duration (hours/min)  \\\n",
       "0              2700.0           45 minutes   \n",
       "1              7200.0              1-2 hrs   \n",
       "2                20.0           20 seconds   \n",
       "3                20.0             1/2 hour   \n",
       "4               900.0           15 minutes   \n",
       "\n",
       "                                            comments date posted   latitude  \\\n",
       "0  This event took place in early fall around 194...   4/27/2004  29.883056   \n",
       "1  1949 Lackland AFB, TX.  Lights racing acros...  12/16/2005  29.384210   \n",
       "2  Green/Orange circular disc over Chester, En...   1/21/2008  53.200000   \n",
       "3  My older brother and twin sister were leaving ...   1/17/2004  28.978333   \n",
       "4  AS a Marine 1st Lt. flying an FJ4B fighter/att...   1/22/2004  21.418056   \n",
       "\n",
       "    longitude  \n",
       "0  -97.941111  \n",
       "1  -98.581082  \n",
       "2   -2.916667  \n",
       "3  -96.645833  \n",
       "4 -157.803611  "
      ],
      "text/html": "
\n\n
\n  \n    \n       | \n      datetime | \n      city | \n      state | \n      country | \n      shape | \n      duration (seconds) | \n      duration (hours/min) | \n      comments | \n      date posted | \n      latitude | \n      longitude | \n    
\n  \n  \n    \n      | 0 | \n      10/10/1949 20:30 | \n      san marcos | \n      tx | \n      us | \n      cylinder | \n      2700.0 | \n      45 minutes | \n      This event took place in early fall around 194... | \n      4/27/2004 | \n      29.883056 | \n      -97.941111 | \n    
\n    \n      | 1 | \n      10/10/1949 21:00 | \n      lackland afb | \n      tx | \n      NaN | \n      light | \n      7200.0 | \n      1-2 hrs | \n      1949 Lackland AFB, TX.  Lights racing acros... | \n      12/16/2005 | \n      29.384210 | \n      -98.581082 | \n    
\n    \n      | 2 | \n      10/10/1955 17:00 | \n      chester (uk/england) | \n      NaN | \n      gb | \n      circle | \n      20.0 | \n      20 seconds | \n      Green/Orange circular disc over Chester, En... | \n      1/21/2008 | \n      53.200000 | \n      -2.916667 | \n    
\n    \n      | 3 | \n      10/10/1956 21:00 | \n      edna | \n      tx | \n      us | \n      circle | \n      20.0 | \n      1/2 hour | \n      My older brother and twin sister were leaving ... | \n      1/17/2004 | \n      28.978333 | \n      -96.645833 | \n    
\n    \n      | 4 | \n      10/10/1960 20:00 | \n      kaneohe | \n      hi | \n      us | \n      light | \n      900.0 | \n      15 minutes | \n      AS a Marine 1st Lt. flying an FJ4B fighter/att... | \n      1/22/2004 | \n      21.418056 | \n      -157.803611 | \n    
\n  \n
\n
 "
     },
     "metadata": {},
     "execution_count": 23
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "ufos = pd.read_csv('../data/ufos.csv')\n",
    "ufos.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)"
      ]
     },
     "metadata": {},
     "execution_count": 24
    }
   ],
   "source": [
    "\n",
    "ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n",
    "\n",
    "ufos.Country.unique()\n",
    "\n",
    "# 0 au, 1 ca, 2 de, 3 gb, 4 us"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "\nInt64Index: 25863 entries, 2 to 80330\nData columns (total 4 columns):\n #   Column     Non-Null Count  Dtype  \n---  ------     --------------  -----  \n 0   Seconds    25863 non-null  float64\n 1   Country    25863 non-null  object \n 2   Latitude   25863 non-null  float64\n 3   Longitude  25863 non-null  float64\ndtypes: float64(3), object(1)\nmemory usage: 1010.3+ KB\n"
     ]
    }
   ],
   "source": [
    "ufos.dropna(inplace=True)\n",
    "\n",
    "ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]\n",
    "\n",
    "ufos.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "    Seconds  Country   Latitude   Longitude\n",
       "2      20.0        3  53.200000   -2.916667\n",
       "3      20.0        4  28.978333  -96.645833\n",
       "14     30.0        4  35.823889  -80.253611\n",
       "23     60.0        4  45.582778 -122.352222\n",
       "24      3.0        3  51.783333   -0.783333"
      ],
      "text/html": "\n\n
\n  \n    \n       | \n      Seconds | \n      Country | \n      Latitude | \n      Longitude | \n    
\n  \n  \n    \n      | 2 | \n      20.0 | \n      3 | \n      53.200000 | \n      -2.916667 | \n    
\n    \n      | 3 | \n      20.0 | \n      4 | \n      28.978333 | \n      -96.645833 | \n    
\n    \n      | 14 | \n      30.0 | \n      4 | \n      35.823889 | \n      -80.253611 | \n    
\n    \n      | 23 | \n      60.0 | \n      4 | \n      45.582778 | \n      -122.352222 | \n    
\n    \n      | 24 | \n      3.0 | \n      3 | \n      51.783333 | \n      -0.783333 | \n    
\n  \n
\n
 "
     },
     "metadata": {},
     "execution_count": 26
    }
   ],
   "source": [
    "from sklearn.preprocessing import LabelEncoder\n",
    "\n",
    "ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n",
    "\n",
    "ufos.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "Selected_features = ['Seconds','Latitude','Longitude']\n",
    "\n",
    "X = ufos[Selected_features]\n",
    "y = ufos['Country']\n",
    "\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stderr",
     "text": [
      "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
      "  \"this warning.\", FutureWarning)\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       1.00      1.00      1.00        41\n",
      "           1       1.00      0.02      0.05       250\n",
      "           2       0.00      0.00      0.00         8\n",
      "           3       0.94      1.00      0.97       131\n",
      "           4       0.95      1.00      0.97      4743\n",
      "\n",
      "    accuracy                           0.95      5173\n",
      "   macro avg       0.78      0.60      0.60      5173\n",
      "weighted avg       0.95      0.95      0.93      5173\n",
      "\n",
      "Predicted labels:  [4 4 4 ... 3 4 4]\n",
      "Accuracy:  0.9512855209742895\n",
      "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/metrics/classification.py:1437: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score, classification_report \n",
    "from sklearn.linear_model import LogisticRegression\n",
    "model = LogisticRegression()\n",
    "model.fit(X_train, y_train)\n",
    "predictions = model.predict(X_test)\n",
    "\n",
    "print(classification_report(y_test, predictions))\n",
    "print('Predicted labels: ', predictions)\n",
    "print('Accuracy: ', accuracy_score(y_test, predictions))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "[3]\n"
     ]
    }
   ],
   "source": [
    "import pickle\n",
    "model_filename = 'ufo-model.pkl'\n",
    "pickle.dump(model, open(model_filename,'wb'))\n",
    "\n",
    "model = pickle.load(open('ufo-model.pkl','rb'))\n",
    "print(model.predict([[50,44,-12]]))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ]
}