From 4873a37d142b5bbd967914a0f84f80e29c6631a9 Mon Sep 17 00:00:00 2001 From: raygaeta Date: Wed, 24 Jan 2024 09:52:41 -0800 Subject: [PATCH] More Classifiers --- .../3-Classifiers-2/notebook.ipynb | 498 +++++++++++++++++- 1 file changed, 481 insertions(+), 17 deletions(-) diff --git a/4-Classification/3-Classifiers-2/notebook.ipynb b/4-Classification/3-Classifiers-2/notebook.ipynb index 4659a7b6..20ecc4e5 100644 --- a/4-Classification/3-Classifiers-2/notebook.ipynb +++ b/4-Classification/3-Classifiers-2/notebook.ipynb @@ -9,12 +9,179 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 38, "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n", + "

5 rows × 382 columns

\n", + "
" + ], "text/plain": [ " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", "0 0 indian 0 0 0 0 0 \n", @@ -38,11 +205,11 @@ "4 0 0 0 0 0 1 0 \n", "\n", "[5 rows x 382 columns]" - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Unnamed: 0cuisinealmondangelicaaniseanise_seedappleapple_brandyapricotarmagnac...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00indian00000000...0000000000
11indian10000000...0000000000
22indian00000000...0000000000
33indian00000000...0000000000
44indian00000000...0000000010
\n

5 rows × 382 columns

\n
" + ] }, + "execution_count": 38, "metadata": {}, - "execution_count": 9 + "output_type": "execute_result" } ], "source": [ @@ -53,11 +220,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 39, "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "0 indian\n", @@ -68,8 +234,9 @@ "Name: cuisine, dtype: object" ] }, + "execution_count": 39, "metadata": {}, - "execution_count": 10 + "output_type": "execute_result" } ], "source": [ @@ -79,12 +246,179 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 40, "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n", + "

5 rows × 380 columns

\n", + "
" + ], "text/plain": [ " almond angelica anise anise_seed apple apple_brandy apricot \\\n", "0 0 0 0 0 0 0 0 \n", @@ -108,17 +442,147 @@ "4 0 0 0 0 0 1 0 \n", "\n", "[5 rows x 380 columns]" - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
almondangelicaaniseanise_seedappleapple_brandyapricotarmagnacartemisiaartichoke...whiskeywhite_breadwhite_winewhole_grain_wheat_flourwinewoodyamyeastyogurtzucchini
00000000000...0000000000
11000000000...0000000000
20000000000...0000000000
30000000000...0000000000
40000000000...0000000010
\n

5 rows × 380 columns

\n
" + ] }, + "execution_count": 40, "metadata": {}, - "execution_count": 11 + "output_type": "execute_result" } ], "source": [ "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n", "cuisines_feature_df.head()" ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.svm import SVC\n", + "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier\n", + "from sklearn.model_selection import train_test_split, cross_val_score\n", + "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "C = 10\n", + "# Create different classifiers.\n", + "classifiers = {\n", + " 'Linear SVC': SVC(kernel='linear', C=C, probability=True,random_state=0),\n", + " 'KNN classifier': KNeighborsClassifier(C),\n", + " 'SVC': SVC(),\n", + " 'RFST': RandomForestClassifier(n_estimators=100),\n", + " 'ADA': AdaBoostClassifier(n_estimators=100)\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy (train) for Linear SVC: 78.6% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.66 0.72 0.69 233\n", + " indian 0.88 0.86 0.87 236\n", + " japanese 0.80 0.73 0.76 250\n", + " korean 0.82 0.75 0.78 248\n", + " thai 0.79 0.87 0.82 232\n", + "\n", + " accuracy 0.79 1199\n", + " macro avg 0.79 0.79 0.79 1199\n", + "weighted avg 0.79 0.79 0.79 1199\n", + "\n", + "Accuracy (train) for KNN classifier: 73.0% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.67 0.69 0.68 233\n", + " indian 0.81 0.80 0.80 236\n", + " japanese 0.66 0.84 0.74 250\n", + " korean 0.92 0.53 0.68 248\n", + " thai 0.70 0.80 0.75 232\n", + "\n", + " accuracy 0.73 1199\n", + " macro avg 0.75 0.73 0.73 1199\n", + "weighted avg 0.75 0.73 0.73 1199\n", + "\n", + "Accuracy (train) for SVC: 81.9% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.73 0.75 0.74 233\n", + " indian 0.90 0.88 0.89 236\n", + " japanese 0.84 0.79 0.81 250\n", + " korean 0.87 0.79 0.83 248\n", + " thai 0.78 0.89 0.83 232\n", + "\n", + " accuracy 0.82 1199\n", + " macro avg 0.82 0.82 0.82 1199\n", + "weighted avg 0.82 0.82 0.82 1199\n", + "\n", + "Accuracy (train) for RFST: 84.8% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.77 0.79 0.78 233\n", + " indian 0.90 0.92 0.91 236\n", + " japanese 0.89 0.80 0.84 250\n", + " korean 0.87 0.82 0.84 248\n", + " thai 0.81 0.91 0.86 232\n", + "\n", + " accuracy 0.85 1199\n", + " macro avg 0.85 0.85 0.85 1199\n", + "weighted avg 0.85 0.85 0.85 1199\n", + "\n", + "Accuracy (train) for ADA: 69.9% \n", + " precision recall f1-score support\n", + "\n", + " chinese 0.62 0.48 0.54 233\n", + " indian 0.84 0.84 0.84 236\n", + " japanese 0.69 0.57 0.62 250\n", + " korean 0.66 0.81 0.73 248\n", + " thai 0.68 0.79 0.73 232\n", + "\n", + " accuracy 0.70 1199\n", + " macro avg 0.70 0.70 0.69 1199\n", + "weighted avg 0.70 0.70 0.69 1199\n", + "\n" + ] + } + ], + "source": [ + "n_classifiers = len(classifiers)\n", + "\n", + "for index, (name, classifier) in enumerate(classifiers.items()):\n", + " classifier.fit(X_train, np.ravel(y_train))\n", + "\n", + " y_pred = classifier.predict(X_test)\n", + " accuracy = accuracy_score(y_test, y_pred)\n", + " print(\"Accuracy (train) for %s: %0.1f%% \" % (name, accuracy * 100))\n", + " print(classification_report(y_test,y_pred))" + ] } ], "metadata": { @@ -126,8 +590,8 @@ "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" }, "kernelspec": { - "name": "python3", - "display_name": "Python 3.7.0 64-bit ('3.7')" + "display_name": "Python 3.7.0 64-bit ('3.7')", + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -139,7 +603,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.0" + "version": "3.12.0" }, "metadata": { "interpreter": { @@ -149,4 +613,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +}