edits for applied lesson

pull/34/head
Jen Looper 3 years ago
parent 6e2d1d53e0
commit f90ac5ae9d

@ -4,7 +4,7 @@ In this section of the curriculum, you will be introduced to an applied ML topic
## Lessons
1. [Build a Web App](3-Web-App/README.md)
1. [Build a Web App](1-Web-App/README.md)
## Credits

@ -0,0 +1,28 @@
<html>
<head> </head>
<body>
<!-- Load ONNX.js -->
<script src="https://cdn.jsdelivr.net/npm/onnxjs/dist/onnx.min.js"></script>
<!-- Code that consumes ONNX.js -->
<script>
// create a session
const myOnnxSession = new onnx.InferenceSession();
// load the ONNX model file
myOnnxSession.loadModel('./model-kn.onnx').then(() => {
// generate model input
const inferenceInputs = getInputs();
console.log(inferenceInputs)
// execute the model
myOnnxSession.run(inferenceInputs).then((output) => {
// consume the output
const outputTensor = output.values().next().value;
console.log(`model output tensor: ${outputTensor.data}.`);
});
});
</script>
</body>
</html>

@ -0,0 +1,56 @@
{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": 3
},
"orig_nbformat": 2
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"source": [
"## Build a web app"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pip install skl2onnx"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pip install onnxruntime"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np \n",
"import pandas as pd "
]
}
]
}

@ -0,0 +1,328 @@
{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7",
"display_name": "Python 3.7.0 64-bit ('3.7')"
},
"metadata": {
"interpreter": {
"hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
}
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting skl2onnx\n",
" Using cached skl2onnx-1.8.0-py2.py3-none-any.whl (230 kB)\n",
"Requirement already satisfied: six in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from skl2onnx) (1.12.0)\n",
"Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.4.1)\n",
"Requirement already satisfied: protobuf in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (3.8.0)\n",
"Requirement already satisfied: scikit-learn>=0.19 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (0.24.2)\n",
"Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from skl2onnx) (1.19.2)\n",
"Collecting onnx>=1.2.1\n",
" Downloading onnx-1.9.0-cp37-cp37m-macosx_10_12_x86_64.whl (12.0 MB)\n",
"\u001b[K |████████████████████████████████| 12.0 MB 6.6 MB/s \n",
"\u001b[?25hCollecting onnxconverter-common<1.9,>=1.6.1\n",
" Downloading onnxconverter_common-1.8.1-py2.py3-none-any.whl (77 kB)\n",
"\u001b[K |████████████████████████████████| 77 kB 8.2 MB/s \n",
"\u001b[?25hRequirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from protobuf->skl2onnx) (45.1.0)\n",
"Requirement already satisfied: joblib>=0.11 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.19->skl2onnx) (0.16.0)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.19->skl2onnx) (2.1.0)\n",
"Collecting typing-extensions>=3.6.2.1\n",
" Downloading typing_extensions-3.10.0.0-py3-none-any.whl (26 kB)\n",
"Installing collected packages: typing-extensions, onnx, onnxconverter-common, skl2onnx\n",
"Successfully installed onnx-1.9.0 onnxconverter-common-1.8.1 skl2onnx-1.8.0 typing-extensions-3.10.0.0\n",
"\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n",
"You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install skl2onnx"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting onnxruntime\n",
" Downloading onnxruntime-1.8.0-cp37-cp37m-macosx_10_12_x86_64.whl (5.0 MB)\n",
"\u001b[K |████████████████████████████████| 5.0 MB 3.1 MB/s \n",
"\u001b[?25hRequirement already satisfied: numpy>=1.16.6 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from onnxruntime) (1.19.2)\n",
"Requirement already satisfied: protobuf in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from onnxruntime) (3.8.0)\n",
"Collecting flatbuffers\n",
" Downloading flatbuffers-2.0-py2.py3-none-any.whl (26 kB)\n",
"Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from protobuf->onnxruntime) (45.1.0)\n",
"Requirement already satisfied: six>=1.9 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from protobuf->onnxruntime) (1.12.0)\n",
"Installing collected packages: flatbuffers, onnxruntime\n",
"Successfully installed flatbuffers-2.0 onnxruntime-1.8.0\n",
"\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n",
"You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install onnxruntime"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np \n",
"import pandas as pd \n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n",
"0 0 indian 0 0 0 0 0 \n",
"1 1 indian 1 0 0 0 0 \n",
"2 2 indian 0 0 0 0 0 \n",
"3 3 indian 0 0 0 0 0 \n",
"4 4 indian 0 0 0 0 0 \n",
"\n",
" apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n",
"0 0 0 0 ... 0 0 0 \n",
"1 0 0 0 ... 0 0 0 \n",
"2 0 0 0 ... 0 0 0 \n",
"3 0 0 0 ... 0 0 0 \n",
"4 0 0 0 ... 0 0 0 \n",
"\n",
" whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n",
"0 0 0 0 0 0 0 0 \n",
"1 0 0 0 0 0 0 0 \n",
"2 0 0 0 0 0 0 0 \n",
"3 0 0 0 0 0 0 0 \n",
"4 0 0 0 0 0 1 0 \n",
"\n",
"[5 rows x 382 columns]"
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Unnamed: 0</th>\n <th>cuisine</th>\n <th>almond</th>\n <th>angelica</th>\n <th>anise</th>\n <th>anise_seed</th>\n <th>apple</th>\n <th>apple_brandy</th>\n <th>apricot</th>\n <th>armagnac</th>\n <th>...</th>\n <th>whiskey</th>\n <th>white_bread</th>\n <th>white_wine</th>\n <th>whole_grain_wheat_flour</th>\n <th>wine</th>\n <th>wood</th>\n <th>yam</th>\n <th>yeast</th>\n <th>yogurt</th>\n <th>zucchini</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>0</td>\n <td>indian</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>indian</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>2</td>\n <td>indian</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>3</td>\n <td>indian</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>4</td>\n <td>indian</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n<p>5 rows × 382 columns</p>\n</div>"
},
"metadata": {},
"execution_count": 9
}
],
"source": [
"data = pd.read_csv('../../data/cleaned_cuisine.csv')\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" almond angelica anise anise_seed apple apple_brandy apricot \\\n",
"0 0 0 0 0 0 0 0 \n",
"1 1 0 0 0 0 0 0 \n",
"2 0 0 0 0 0 0 0 \n",
"3 0 0 0 0 0 0 0 \n",
"4 0 0 0 0 0 0 0 \n",
"\n",
" armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n",
"0 0 0 0 ... 0 0 0 \n",
"1 0 0 0 ... 0 0 0 \n",
"2 0 0 0 ... 0 0 0 \n",
"3 0 0 0 ... 0 0 0 \n",
"4 0 0 0 ... 0 0 0 \n",
"\n",
" whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n",
"0 0 0 0 0 0 0 0 \n",
"1 0 0 0 0 0 0 0 \n",
"2 0 0 0 0 0 0 0 \n",
"3 0 0 0 0 0 0 0 \n",
"4 0 0 0 0 0 1 0 \n",
"\n",
"[5 rows x 380 columns]"
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>almond</th>\n <th>angelica</th>\n <th>anise</th>\n <th>anise_seed</th>\n <th>apple</th>\n <th>apple_brandy</th>\n <th>apricot</th>\n <th>armagnac</th>\n <th>artemisia</th>\n <th>artichoke</th>\n <th>...</th>\n <th>whiskey</th>\n <th>white_bread</th>\n <th>white_wine</th>\n <th>whole_grain_wheat_flour</th>\n <th>wine</th>\n <th>wood</th>\n <th>yam</th>\n <th>yeast</th>\n <th>yogurt</th>\n <th>zucchini</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n<p>5 rows × 380 columns</p>\n</div>"
},
"metadata": {},
"execution_count": 10
}
],
"source": [
"X = data.iloc[:,2:]\n",
"X.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" cuisine\n",
"0 indian\n",
"1 indian\n",
"2 indian\n",
"3 indian\n",
"4 indian"
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>cuisine</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>indian</td>\n </tr>\n <tr>\n <th>1</th>\n <td>indian</td>\n </tr>\n <tr>\n <th>2</th>\n <td>indian</td>\n </tr>\n <tr>\n <th>3</th>\n <td>indian</td>\n </tr>\n <tr>\n <th>4</th>\n <td>indian</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {},
"execution_count": 11
}
],
"source": [
"y = data[['cuisine']]\n",
"y.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"from sklearn.naive_bayes import BernoulliNB,GaussianNB\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.svm import SVC\n",
"from sklearn.model_selection import cross_val_score\n",
"from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"KNeighborsClassifier()"
]
},
"metadata": {},
"execution_count": 14
}
],
"source": [
"# 5 types of model fitting\n",
"model_gaussian = GaussianNB()\n",
"model_gaussian.fit(X_train,y_train.values.ravel())\n",
"model_rfst = RandomForestClassifier()\n",
"model_rfst.fit(X_train,y_train.values.ravel())\n",
"model_nba = BernoulliNB(binarize = .5)\n",
"model_nba.fit(X_train,y_train.values.ravel())\n",
"model_dt = DecisionTreeClassifier()\n",
"model_dt.fit(X_train,y_train.values.ravel())\n",
"model_kn = KNeighborsClassifier()\n",
"model_kn.fit(X_train,y_train.values.ravel())\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"y_pred = model_kn.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" precision recall f1-score support\n\n chinese 0.59 0.74 0.66 239\n indian 0.83 0.80 0.82 237\n japanese 0.68 0.77 0.72 252\n korean 0.93 0.65 0.76 220\n thai 0.77 0.71 0.74 251\n\n accuracy 0.73 1199\n macro avg 0.76 0.73 0.74 1199\nweighted avg 0.76 0.73 0.74 1199\n\n"
]
}
],
"source": [
"print(classification_report(y_test,y_pred))"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"from skl2onnx import convert_sklearn\n",
"from skl2onnx.common.data_types import FloatTensorType\n",
"\n",
"initial_type = [('float_input', FloatTensorType([None, 4]))]\n",
"options = {id(model_kn): {'nocl': True, 'zipmap': False}}\n",
"onx = convert_sklearn(model_kn, initial_types=initial_type,options=options)\n",
"with open(\"./model-kn.onnx\", \"wb\") as f:\n",
" f.write(onx.SerializeToString())\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
]
}

@ -146,7 +146,6 @@ for i in range(1, 11):
> 🎓 Inertia: K-Means algorithms attempt to choose centroids to minimize 'inertia', "a measure of how internally coherent clusters are."[source](https://scikit-learn.org/stable/modules/clustering.html). The value is appended to the wcss variable on each iteration.
> 🎓 k-means++: In [Scikit-Learn](https://scikit-learn.org/stable/modules/clustering.html#k-means) you can use the 'k-means++' optimization, which "initializes the centroids to be (generally) distant from each other, leading to probably better results than random initialization.
### Elbow method
Previously, you surmised that, because you have targeted 3 song genres, you should choose 3 clusters. But is that the case? Use the 'elbow method' to make sure.
@ -159,6 +158,7 @@ plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.show()
```
Use the `wcss` variable that you built in the previous step to create a chart showing where the 'bend' in the elbow is, which indicates the optimum number of clusters. Maybe it **is** 3!
![elbow method](images/elbow.png)
@ -176,6 +176,7 @@ plt.xlabel('popularity')
plt.ylabel('danceability')
plt.show()
```
Check the model's accuracy:
```python

Loading…
Cancel
Save