diff --git a/4-Classification/3-Classifiers-2/notebook copy.ipynb b/4-Classification/3-Classifiers-2/notebook copy.ipynb new file mode 100644 index 00000000..87e8202f --- /dev/null +++ b/4-Classification/3-Classifiers-2/notebook copy.ipynb @@ -0,0 +1,152 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Build Classification Model" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", + "0 0 indian 0 0 0 0 0 \n", + "1 1 indian 1 0 0 0 0 \n", + "2 2 indian 0 0 0 0 0 \n", + "3 3 indian 0 0 0 0 0 \n", + "4 4 indian 0 0 0 0 0 \n", + "\n", + " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 382 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "import pandas as pd\n", + "recipes_df = pd.read_csv(\"../data/cleaned_cuisine.csv\")\n", + "recipes_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 indian\n", + "1 indian\n", + "2 indian\n", + "3 indian\n", + "4 indian\n", + "Name: cuisine, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ], + "source": [ + "recipes_label_df = recipes_df['cuisine']\n", + "recipes_label_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " almond angelica anise anise_seed apple apple_brandy apricot \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "\n", + " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n", + "0 0 0 0 ... 0 0 0 \n", + "1 0 0 0 ... 0 0 0 \n", + "2 0 0 0 ... 0 0 0 \n", + "3 0 0 0 ... 0 0 0 \n", + "4 0 0 0 ... 0 0 0 \n", + "\n", + " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 1 0 \n", + "\n", + "[5 rows x 380 columns]" + ], + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "recipes_feature_df = recipes_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", + "recipes_feature_df.head()" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.0 64-bit ('3.7')" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "metadata": { + "interpreter": { + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/4-Classification/3-Classifiers-2/notebook.ipynb b/4-Classification/3-Classifiers-2/notebook.ipynb deleted file mode 100644 index e69de29b..00000000 diff --git a/4-Classification/3-Classifiers-2/solution/notebook.ipynb b/4-Classification/3-Classifiers-2/solution/notebook.ipynb index 4cd14195..f41302e2 100644 --- a/4-Classification/3-Classifiers-2/solution/notebook.ipynb +++ b/4-Classification/3-Classifiers-2/solution/notebook.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -42,7 +42,7 @@ "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" }, "metadata": {}, - "execution_count": 1 + "execution_count": 15 } ], "source": [ @@ -53,20 +53,7 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.model_selection import train_test_split, cross_val_score\n", - "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n", - "from sklearn.svm import SVC\n", - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": 3, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -82,7 +69,7 @@ ] }, "metadata": {}, - "execution_count": 3 + "execution_count": 16 } ], "source": [ @@ -92,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -125,7 +112,7 @@ "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" }, "metadata": {}, - "execution_count": 4 + "execution_count": 17 } ], "source": [ @@ -135,7 +122,20 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.svm import SVC\n", + "from sklearn.model_selection import train_test_split, cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -151,7 +151,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -178,17 +178,17 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 21, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "Accuracy (train) for L1 logistic: 79.8% \n", - "Accuracy (train) for L2 logistic (Multinomial): 80.2% \n", - "Accuracy (train) for L2 logistic (OvR): 81.3% \n", - "Accuracy (train) for Linear SVC: 79.6% \n" + "Accuracy (train) for L1 logistic: 79.9% \n", + "Accuracy (train) for L2 logistic (Multinomial): 80.6% \n", + "Accuracy (train) for L2 logistic (OvR): 81.2% \n", + "Accuracy (train) for Linear SVC: 78.9% \n" ] } ], @@ -213,10 +213,10 @@ ], "metadata": { "interpreter": { - "hash": "dd61f40108e2a19f4ef0d3ebbc6b6eea57ab3c4bc13b15fe6f390d3d86442534" + "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" }, "kernelspec": { - "name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7", + "name": "python3", "display_name": "Python 3.7.0 64-bit ('3.7')" }, "language_info": {