{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7",
   "display_name": "Python 3.7.0 64-bit ('3.7')"
  },
  "metadata": {
   "interpreter": {
    "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "source": [
    "## Build an API with two different models\n",
    "\n",
    "Linear Regression\n",
    "Classification"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "   City Name Type       Package      Variety Sub Variety  Grade     Date  \\\n",
       "0  BALTIMORE  NaN  24 inch bins          NaN         NaN    NaN  4/29/17   \n",
       "1  BALTIMORE  NaN  24 inch bins          NaN         NaN    NaN   5/6/17   \n",
       "2  BALTIMORE  NaN  24 inch bins  HOWDEN TYPE         NaN    NaN  9/24/16   \n",
       "3  BALTIMORE  NaN  24 inch bins  HOWDEN TYPE         NaN    NaN  9/24/16   \n",
       "4  BALTIMORE  NaN  24 inch bins  HOWDEN TYPE         NaN    NaN  11/5/16   \n",
       "\n",
       "   Low Price  High Price  Mostly Low  ...  Unit of Sale Quality Condition  \\\n",
       "0      270.0       280.0       270.0  ...           NaN     NaN       NaN   \n",
       "1      270.0       280.0       270.0  ...           NaN     NaN       NaN   \n",
       "2      160.0       160.0       160.0  ...           NaN     NaN       NaN   \n",
       "3      160.0       160.0       160.0  ...           NaN     NaN       NaN   \n",
       "4       90.0       100.0        90.0  ...           NaN     NaN       NaN   \n",
       "\n",
       "  Appearance Storage  Crop Repack  Trans Mode  Unnamed: 24  Unnamed: 25  \n",
       "0        NaN     NaN   NaN      E         NaN          NaN          NaN  \n",
       "1        NaN     NaN   NaN      E         NaN          NaN          NaN  \n",
       "2        NaN     NaN   NaN      N         NaN          NaN          NaN  \n",
       "3        NaN     NaN   NaN      N         NaN          NaN          NaN  \n",
       "4        NaN     NaN   NaN      N         NaN          NaN          NaN  \n",
       "\n",
       "[5 rows x 26 columns]"
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>City Name</th>\n      <th>Type</th>\n      <th>Package</th>\n      <th>Variety</th>\n      <th>Sub Variety</th>\n      <th>Grade</th>\n      <th>Date</th>\n      <th>Low Price</th>\n      <th>High Price</th>\n      <th>Mostly Low</th>\n      <th>...</th>\n      <th>Unit of Sale</th>\n      <th>Quality</th>\n      <th>Condition</th>\n      <th>Appearance</th>\n      <th>Storage</th>\n      <th>Crop</th>\n      <th>Repack</th>\n      <th>Trans Mode</th>\n      <th>Unnamed: 24</th>\n      <th>Unnamed: 25</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>BALTIMORE</td>\n      <td>NaN</td>\n      <td>24 inch bins</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>4/29/17</td>\n      <td>270.0</td>\n      <td>280.0</td>\n      <td>270.0</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>E</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>BALTIMORE</td>\n      <td>NaN</td>\n      <td>24 inch bins</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>5/6/17</td>\n      <td>270.0</td>\n      <td>280.0</td>\n      <td>270.0</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>E</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>BALTIMORE</td>\n      <td>NaN</td>\n      <td>24 inch bins</td>\n      <td>HOWDEN TYPE</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>9/24/16</td>\n      <td>160.0</td>\n      <td>160.0</td>\n      <td>160.0</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>N</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>BALTIMORE</td>\n      <td>NaN</td>\n      <td>24 inch bins</td>\n      <td>HOWDEN TYPE</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>9/24/16</td>\n      <td>160.0</td>\n      <td>160.0</td>\n      <td>160.0</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>N</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>BALTIMORE</td>\n      <td>NaN</td>\n      <td>24 inch bins</td>\n      <td>HOWDEN TYPE</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>11/5/16</td>\n      <td>90.0</td>\n      <td>100.0</td>\n      <td>90.0</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>N</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n  </tbody>\n</table>\n<p>5 rows × 26 columns</p>\n</div>"
     },
     "metadata": {},
     "execution_count": 22
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "\n",
    "# Use the pumpkin data from Lesso\n",
    "\n",
    "pumpkins = pd.read_csv('../../../Regression/data/US-pumpkins.csv')\n",
    "\n",
    "pumpkins.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "    Package  Low Price  High Price      Price\n",
       "70        0          5           3  13.636364\n",
       "71        0         10           7  16.363636\n",
       "72        0         10           7  16.363636\n",
       "73        0          9           6  15.454545\n",
       "74        0          5           3  13.636364"
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Package</th>\n      <th>Low Price</th>\n      <th>High Price</th>\n      <th>Price</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>70</th>\n      <td>0</td>\n      <td>5</td>\n      <td>3</td>\n      <td>13.636364</td>\n    </tr>\n    <tr>\n      <th>71</th>\n      <td>0</td>\n      <td>10</td>\n      <td>7</td>\n      <td>16.363636</td>\n    </tr>\n    <tr>\n      <th>72</th>\n      <td>0</td>\n      <td>10</td>\n      <td>7</td>\n      <td>16.363636</td>\n    </tr>\n    <tr>\n      <th>73</th>\n      <td>0</td>\n      <td>9</td>\n      <td>6</td>\n      <td>15.454545</td>\n    </tr>\n    <tr>\n      <th>74</th>\n      <td>0</td>\n      <td>5</td>\n      <td>3</td>\n      <td>13.636364</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 23
    }
   ],
   "source": [
    "from sklearn.preprocessing import LabelEncoder\n",
    "\n",
    "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n",
    "\n",
    "new_columns = ['Package', 'Low Price', 'High Price']\n",
    "\n",
    "pumpkins = pumpkins.drop([c for c in pumpkins.columns if c not in new_columns], axis=1)\n",
    "\n",
    "## price is the average of low and high prices\n",
    "\n",
    "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n",
    "\n",
    "new_pumpkins = pd.DataFrame({ 'Package': pumpkins['Package'], 'Low Price': pumpkins['Low Price'],'High Price': pumpkins['High Price'], 'Price': price})\n",
    "\n",
    "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1\n",
    "\n",
    "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2\n",
    "\n",
    "new_pumpkins.iloc[:, 0:-1] = new_pumpkins.iloc[:, 0:-1].apply(LabelEncoder().fit_transform)\n",
    "\n",
    "new_pumpkins.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\nInt64Index: 415 entries, 70 to 1742\nData columns (total 4 columns):\n #   Column      Non-Null Count  Dtype  \n---  ------      --------------  -----  \n 0   Package     415 non-null    int64  \n 1   Low Price   415 non-null    int64  \n 2   High Price  415 non-null    int64  \n 3   Price       415 non-null    float64\ndtypes: float64(1), int64(3)\nmemory usage: 16.2 KB\n"
     ]
    }
   ],
   "source": [
    "\n",
    "new_pumpkins.dropna(inplace=True)\n",
    "new_pumpkins.info()\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "      Package      Price\n",
       "70          0  13.636364\n",
       "71          0  16.363636\n",
       "72          0  16.363636\n",
       "73          0  15.454545\n",
       "74          0  13.636364\n",
       "...       ...        ...\n",
       "1738        2  30.000000\n",
       "1739        2  28.750000\n",
       "1740        2  25.750000\n",
       "1741        2  24.000000\n",
       "1742        2  24.000000\n",
       "\n",
       "[415 rows x 2 columns]"
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Package</th>\n      <th>Price</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>70</th>\n      <td>0</td>\n      <td>13.636364</td>\n    </tr>\n    <tr>\n      <th>71</th>\n      <td>0</td>\n      <td>16.363636</td>\n    </tr>\n    <tr>\n      <th>72</th>\n      <td>0</td>\n      <td>16.363636</td>\n    </tr>\n    <tr>\n      <th>73</th>\n      <td>0</td>\n      <td>15.454545</td>\n    </tr>\n    <tr>\n      <th>74</th>\n      <td>0</td>\n      <td>13.636364</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>1738</th>\n      <td>2</td>\n      <td>30.000000</td>\n    </tr>\n    <tr>\n      <th>1739</th>\n      <td>2</td>\n      <td>28.750000</td>\n    </tr>\n    <tr>\n      <th>1740</th>\n      <td>2</td>\n      <td>25.750000</td>\n    </tr>\n    <tr>\n      <th>1741</th>\n      <td>2</td>\n      <td>24.000000</td>\n    </tr>\n    <tr>\n      <th>1742</th>\n      <td>2</td>\n      <td>24.000000</td>\n    </tr>\n  </tbody>\n</table>\n<p>415 rows × 2 columns</p>\n</div>"
     },
     "metadata": {},
     "execution_count": 25
    }
   ],
   "source": [
    "new_columns = ['Package', 'Price']\n",
    "lin_pumpkins = new_pumpkins.drop([c for c in new_pumpkins.columns if c not in new_columns], axis='columns')\n",
    "\n",
    "lin_pumpkins\n"
   ]
  },
  {
   "source": [
    "Set X and y arrays to correspond to Package and Price"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = lin_pumpkins.values[:, :1]\n",
    "y = lin_pumpkins.values[:, 1:2]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "Model Accuracy:  0.3315342327998989\n"
     ]
    }
   ],
   "source": [
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n",
    "lin_reg = LinearRegression()\n",
    "lin_reg.fit(X_train,y_train)\n",
    "\n",
    "pred = lin_reg.predict(X_test)\n",
    "\n",
    "accuracy_score = lin_reg.score(X_train,y_train)\n",
    "print('Model Accuracy: ', accuracy_score)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "[[33.627655]]\n"
     ]
    }
   ],
   "source": [
    "import pickle\n",
    "s = pickle.dumps(lin_reg)\n",
    "model_filename = 'lin-reg-model.pkl'\n",
    "# Open the file to save as pkl file\n",
    "pickle.dump(lin_reg, open(model_filename,'wb'))\n",
    "\n",
    "model = pickle.load(open('lin-reg-model.pkl','rb'))\n",
    "print(model.predict([[2.85]]))\n",
    "\n",
    "# Close the pickle instances\n",
    "# clf2 = pickle.loads(s)\n",
    "# clf2.predict([[2.75]])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ]
}