build an api

5 years ago · 48b158eefb
parent d69bf5f5ee
commit 48b158eefb
8 changed files with 343 additions and 39 deletions
--- a/API/1-API/README.md
+++ b/API/1-API/README.md
@ -0,0 +1,13 @@
+# Build an API
+
+## [Pre-lecture quiz](link-to-quiz-app)
+
+✅ Knowledge Check - use this moment to stretch students' knowledge with open questions
+
+🚀 Challenge: Add a challenge for students to work on collaboratively in class to enhance the project
+
+## [Post-lecture quiz](link-to-quiz-app)
+
+## Review & Self Study
+
+**Assignment**: [Assignment Name](assignment.md)
--- a/API/1-API/assignment.md
+++ b/API/1-API/assignment.md
@ -0,0 +1 @@
+# Assignment
--- a/API/1-API/solution/lin-reg-model.pkl
+++ b/API/1-API/solution/lin-reg-model.pkl
--- a/API/1-API/solution/notebook.ipynb
+++ b/API/1-API/solution/notebook.ipynb
@ -0,0 +1,275 @@
+{
+ "metadata": {
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.0"
+  },
+  "orig_nbformat": 2,
+  "kernelspec": {
+   "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7",
+   "display_name": "Python 3.7.0 64-bit ('3.7')"
+  },
+  "metadata": {
+   "interpreter": {
+    "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2,
+ "cells": [
+  {
+   "source": [
+    "## Build an API with two different models\n",
+    "\n",
+    "Linear Regression\n",
+    "Classification"
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "   City Name Type       Package      Variety Sub Variety  Grade     Date  \\\n",
+       "0  BALTIMORE  NaN  24 inch bins          NaN         NaN    NaN  4/29/17   \n",
+       "1  BALTIMORE  NaN  24 inch bins          NaN         NaN    NaN   5/6/17   \n",
+       "2  BALTIMORE  NaN  24 inch bins  HOWDEN TYPE         NaN    NaN  9/24/16   \n",
+       "3  BALTIMORE  NaN  24 inch bins  HOWDEN TYPE         NaN    NaN  9/24/16   \n",
+       "4  BALTIMORE  NaN  24 inch bins  HOWDEN TYPE         NaN    NaN  11/5/16   \n",
+       "\n",
+       "   Low Price  High Price  Mostly Low  ...  Unit of Sale Quality Condition  \\\n",
+       "0      270.0       280.0       270.0  ...           NaN     NaN       NaN   \n",
+       "1      270.0       280.0       270.0  ...           NaN     NaN       NaN   \n",
+       "2      160.0       160.0       160.0  ...           NaN     NaN       NaN   \n",
+       "3      160.0       160.0       160.0  ...           NaN     NaN       NaN   \n",
+       "4       90.0       100.0        90.0  ...           NaN     NaN       NaN   \n",
+       "\n",
+       "  Appearance Storage  Crop Repack  Trans Mode  Unnamed: 24  Unnamed: 25  \n",
+       "0        NaN     NaN   NaN      E         NaN          NaN          NaN  \n",
+       "1        NaN     NaN   NaN      E         NaN          NaN          NaN  \n",
+       "2        NaN     NaN   NaN      N         NaN          NaN          NaN  \n",
+       "3        NaN     NaN   NaN      N         NaN          NaN          NaN  \n",
+       "4        NaN     NaN   NaN      N         NaN          NaN          NaN  \n",
+       "\n",
+       "[5 rows x 26 columns]"
+      ],
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>City Name</th>\n      <th>Type</th>\n      <th>Package</th>\n      <th>Variety</th>\n      <th>Sub Variety</th>\n      <th>Grade</th>\n      <th>Date</th>\n      <th>Low Price</th>\n      <th>High Price</th>\n      <th>Mostly Low</th>\n      <th>...</th>\n      <th>Unit of Sale</th>\n      <th>Quality</th>\n      <th>Condition</th>\n      <th>Appearance</th>\n      <th>Storage</th>\n      <th>Crop</th>\n      <th>Repack</th>\n      <th>Trans Mode</th>\n      <th>Unnamed: 24</th>\n      <th>Unnamed: 25</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>BALTIMORE</td>\n      <td>NaN</td>\n      <td>24 inch bins</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>4/29/17</td>\n      <td>270.0</td>\n      <td>280.0</td>\n      <td>270.0</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>E</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>BALTIMORE</td>\n      <td>NaN</td>\n      <td>24 inch bins</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>5/6/17</td>\n      <td>270.0</td>\n      <td>280.0</td>\n      <td>270.0</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>E</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>BALTIMORE</td>\n      <td>NaN</td>\n      <td>24 inch bins</td>\n      <td>HOWDEN TYPE</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>9/24/16</td>\n      <td>160.0</td>\n      <td>160.0</td>\n      <td>160.0</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>N</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>BALTIMORE</td>\n      <td>NaN</td>\n      <td>24 inch bins</td>\n      <td>HOWDEN TYPE</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>9/24/16</td>\n      <td>160.0</td>\n      <td>160.0</td>\n      <td>160.0</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>N</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>BALTIMORE</td>\n      <td>NaN</td>\n      <td>24 inch bins</td>\n      <td>HOWDEN TYPE</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>11/5/16</td>\n      <td>90.0</td>\n      <td>100.0</td>\n      <td>90.0</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>N</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n  </tbody>\n</table>\n<p>5 rows × 26 columns</p>\n</div>"
+     },
+     "metadata": {},
+     "execution_count": 22
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "\n",
+    "# Use the pumpkin data from Lesso\n",
+    "\n",
+    "pumpkins = pd.read_csv('../../../Regression/data/US-pumpkins.csv')\n",
+    "\n",
+    "pumpkins.head()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "    Package  Low Price  High Price      Price\n",
+       "70        0          5           3  13.636364\n",
+       "71        0         10           7  16.363636\n",
+       "72        0         10           7  16.363636\n",
+       "73        0          9           6  15.454545\n",
+       "74        0          5           3  13.636364"
+      ],
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Package</th>\n      <th>Low Price</th>\n      <th>High Price</th>\n      <th>Price</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>70</th>\n      <td>0</td>\n      <td>5</td>\n      <td>3</td>\n      <td>13.636364</td>\n    </tr>\n    <tr>\n      <th>71</th>\n      <td>0</td>\n      <td>10</td>\n      <td>7</td>\n      <td>16.363636</td>\n    </tr>\n    <tr>\n      <th>72</th>\n      <td>0</td>\n      <td>10</td>\n      <td>7</td>\n      <td>16.363636</td>\n    </tr>\n    <tr>\n      <th>73</th>\n      <td>0</td>\n      <td>9</td>\n      <td>6</td>\n      <td>15.454545</td>\n    </tr>\n    <tr>\n      <th>74</th>\n      <td>0</td>\n      <td>5</td>\n      <td>3</td>\n      <td>13.636364</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
+     },
+     "metadata": {},
+     "execution_count": 23
+    }
+   ],
+   "source": [
+    "from sklearn.preprocessing import LabelEncoder\n",
+    "\n",
+    "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n",
+    "\n",
+    "new_columns = ['Package', 'Low Price', 'High Price']\n",
+    "\n",
+    "pumpkins = pumpkins.drop([c for c in pumpkins.columns if c not in new_columns], axis=1)\n",
+    "\n",
+    "## price is the average of low and high prices\n",
+    "\n",
+    "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n",
+    "\n",
+    "new_pumpkins = pd.DataFrame({ 'Package': pumpkins['Package'], 'Low Price': pumpkins['Low Price'],'High Price': pumpkins['High Price'], 'Price': price})\n",
+    "\n",
+    "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1\n",
+    "\n",
+    "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2\n",
+    "\n",
+    "new_pumpkins.iloc[:, 0:-1] = new_pumpkins.iloc[:, 0:-1].apply(LabelEncoder().fit_transform)\n",
+    "\n",
+    "new_pumpkins.head()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\nInt64Index: 415 entries, 70 to 1742\nData columns (total 4 columns):\n #   Column      Non-Null Count  Dtype  \n---  ------      --------------  -----  \n 0   Package     415 non-null    int64  \n 1   Low Price   415 non-null    int64  \n 2   High Price  415 non-null    int64  \n 3   Price       415 non-null    float64\ndtypes: float64(1), int64(3)\nmemory usage: 16.2 KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "new_pumpkins.dropna(inplace=True)\n",
+    "new_pumpkins.info()\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "      Package      Price\n",
+       "70          0  13.636364\n",
+       "71          0  16.363636\n",
+       "72          0  16.363636\n",
+       "73          0  15.454545\n",
+       "74          0  13.636364\n",
+       "...       ...        ...\n",
+       "1738        2  30.000000\n",
+       "1739        2  28.750000\n",
+       "1740        2  25.750000\n",
+       "1741        2  24.000000\n",
+       "1742        2  24.000000\n",
+       "\n",
+       "[415 rows x 2 columns]"
+      ],
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Package</th>\n      <th>Price</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>70</th>\n      <td>0</td>\n      <td>13.636364</td>\n    </tr>\n    <tr>\n      <th>71</th>\n      <td>0</td>\n      <td>16.363636</td>\n    </tr>\n    <tr>\n      <th>72</th>\n      <td>0</td>\n      <td>16.363636</td>\n    </tr>\n    <tr>\n      <th>73</th>\n      <td>0</td>\n      <td>15.454545</td>\n    </tr>\n    <tr>\n      <th>74</th>\n      <td>0</td>\n      <td>13.636364</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>1738</th>\n      <td>2</td>\n      <td>30.000000</td>\n    </tr>\n    <tr>\n      <th>1739</th>\n      <td>2</td>\n      <td>28.750000</td>\n    </tr>\n    <tr>\n      <th>1740</th>\n      <td>2</td>\n      <td>25.750000</td>\n    </tr>\n    <tr>\n      <th>1741</th>\n      <td>2</td>\n      <td>24.000000</td>\n    </tr>\n    <tr>\n      <th>1742</th>\n      <td>2</td>\n      <td>24.000000</td>\n    </tr>\n  </tbody>\n</table>\n<p>415 rows × 2 columns</p>\n</div>"
+     },
+     "metadata": {},
+     "execution_count": 25
+    }
+   ],
+   "source": [
+    "new_columns = ['Package', 'Price']\n",
+    "lin_pumpkins = new_pumpkins.drop([c for c in new_pumpkins.columns if c not in new_columns], axis='columns')\n",
+    "\n",
+    "lin_pumpkins\n"
+   ]
+  },
+  {
+   "source": [
+    "Set X and y arrays to correspond to Package and Price"
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = lin_pumpkins.values[:, :1]\n",
+    "y = lin_pumpkins.values[:, 1:2]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Model Accuracy:  0.3315342327998989\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.linear_model import LinearRegression\n",
+    "from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n",
+    "lin_reg = LinearRegression()\n",
+    "lin_reg.fit(X_train,y_train)\n",
+    "\n",
+    "pred = lin_reg.predict(X_test)\n",
+    "\n",
+    "accuracy_score = lin_reg.score(X_train,y_train)\n",
+    "print('Model Accuracy: ', accuracy_score)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "[[33.627655]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pickle\n",
+    "s = pickle.dumps(lin_reg)\n",
+    "model_filename = 'lin-reg-model.pkl'\n",
+    "# Open the file to save as pkl file\n",
+    "pickle.dump(lin_reg, open(model_filename,'wb'))\n",
+    "\n",
+    "model = pickle.load(open('lin-reg-model.pkl','rb'))\n",
+    "print(model.predict([[2.85]]))\n",
+    "\n",
+    "# Close the pickle instances\n",
+    "# clf2 = pickle.loads(s)\n",
+    "# clf2.predict([[2.75]])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ]
+}
--- a/API/1-API/translations/README.es.md
+++ b/API/1-API/translations/README.es.md
--- a/API/README.md
+++ b/API/README.md
@ -0,0 +1,11 @@
+# Getting Started with 
+
+In this section of the curriculum, you will be introduced to ...
+
+## Topics
+
+1. [Build an API for your model](1-API/README.md)
+
+## Credits
+
+"Build an API" was written with ♥️ by [Jen Looper](https://twitter.com/jenlooper)
--- a/API/translations/README.es.md
+++ b/API/translations/README.es.md
--- a/Regression/3-Linear/solution/notebook.ipynb
+++ b/Regression/3-Linear/solution/notebook.ipynb