ML Model Predict Cuisine Based on Ingridients

2 years ago · f6855e0c65
parent d33fba0977
commit f6855e0c65
1 changed files with 809 additions and 12 deletions
--- a/4-Classification/2-Classifiers-1/notebook.ipynb
+++ b/4-Classification/2-Classifiers-1/notebook.ipynb
@ -1,5 +1,811 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Build Classification Models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>cuisine</th>\n",
       "      <th>almond</th>\n",
       "      <th>angelica</th>\n",
       "      <th>anise</th>\n",
       "      <th>anise_seed</th>\n",
       "      <th>apple</th>\n",
       "      <th>apple_brandy</th>\n",
       "      <th>apricot</th>\n",
       "      <th>armagnac</th>\n",
       "      <th>...</th>\n",
       "      <th>whiskey</th>\n",
       "      <th>white_bread</th>\n",
       "      <th>white_wine</th>\n",
       "      <th>whole_grain_wheat_flour</th>\n",
       "      <th>wine</th>\n",
       "      <th>wood</th>\n",
       "      <th>yam</th>\n",
       "      <th>yeast</th>\n",
       "      <th>yogurt</th>\n",
       "      <th>zucchini</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>indian</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>indian</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>indian</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>indian</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>indian</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 382 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0 cuisine  almond  angelica  anise  anise_seed  apple  \\\n",
       "0           0  indian       0         0      0           0      0   \n",
       "1           1  indian       1         0      0           0      0   \n",
       "2           2  indian       0         0      0           0      0   \n",
       "3           3  indian       0         0      0           0      0   \n",
       "4           4  indian       0         0      0           0      0   \n",
       "\n",
       "   apple_brandy  apricot  armagnac  ...  whiskey  white_bread  white_wine  \\\n",
       "0             0        0         0  ...        0            0           0   \n",
       "1             0        0         0  ...        0            0           0   \n",
       "2             0        0         0  ...        0            0           0   \n",
       "3             0        0         0  ...        0            0           0   \n",
       "4             0        0         0  ...        0            0           0   \n",
       "\n",
       "   whole_grain_wheat_flour  wine  wood  yam  yeast  yogurt  zucchini  \n",
       "0                        0     0     0    0      0       0         0  \n",
       "1                        0     0     0    0      0       0         0  \n",
       "2                        0     0     0    0      0       0         0  \n",
       "3                        0     0     0    0      0       0         0  \n",
       "4                        0     0     0    0      0       1         0  \n",
       "\n",
       "[5 rows x 382 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "cuisines_df = pd.read_csv(\"../data/cleaned_cuisines.csv\")\n",
    "cuisines_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.model_selection import train_test_split, cross_val_score\n",
    "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n",
    "from sklearn.svm import SVC\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    indian\n",
       "1    indian\n",
       "2    indian\n",
       "3    indian\n",
       "4    indian\n",
       "Name: cuisine, dtype: object"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cuisines_label_df = cuisines_df['cuisine']\n",
    "cuisines_label_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>almond</th>\n",
       "      <th>angelica</th>\n",
       "      <th>anise</th>\n",
       "      <th>anise_seed</th>\n",
       "      <th>apple</th>\n",
       "      <th>apple_brandy</th>\n",
       "      <th>apricot</th>\n",
       "      <th>armagnac</th>\n",
       "      <th>artemisia</th>\n",
       "      <th>artichoke</th>\n",
       "      <th>...</th>\n",
       "      <th>whiskey</th>\n",
       "      <th>white_bread</th>\n",
       "      <th>white_wine</th>\n",
       "      <th>whole_grain_wheat_flour</th>\n",
       "      <th>wine</th>\n",
       "      <th>wood</th>\n",
       "      <th>yam</th>\n",
       "      <th>yeast</th>\n",
       "      <th>yogurt</th>\n",
       "      <th>zucchini</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 380 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   almond  angelica  anise  anise_seed  apple  apple_brandy  apricot  \\\n",
       "0       0         0      0           0      0             0        0   \n",
       "1       1         0      0           0      0             0        0   \n",
       "2       0         0      0           0      0             0        0   \n",
       "3       0         0      0           0      0             0        0   \n",
       "4       0         0      0           0      0             0        0   \n",
       "\n",
       "   armagnac  artemisia  artichoke  ...  whiskey  white_bread  white_wine  \\\n",
       "0         0          0          0  ...        0            0           0   \n",
       "1         0          0          0  ...        0            0           0   \n",
       "2         0          0          0  ...        0            0           0   \n",
       "3         0          0          0  ...        0            0           0   \n",
       "4         0          0          0  ...        0            0           0   \n",
       "\n",
       "   whole_grain_wheat_flour  wine  wood  yam  yeast  yogurt  zucchini  \n",
       "0                        0     0     0    0      0       0         0  \n",
       "1                        0     0     0    0      0       0         0  \n",
       "2                        0     0     0    0      0       0         0  \n",
       "3                        0     0     0    0      0       0         0  \n",
       "4                        0     0     0    0      0       1         0  \n",
       "\n",
       "[5 rows x 380 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n",
    "cuisines_feature_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy is 0.7881567973311092\n"
     ]
    }
   ],
   "source": [
    "lr = LogisticRegression(multi_class='ovr',solver='liblinear')\n",
    "model = lr.fit(X_train, np.ravel(y_train))\n",
    "\n",
    "accuracy = model.score(X_test, y_test)\n",
    "print (\"Accuracy is {}\".format(accuracy))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ingredients: Index(['black_pepper', 'carrot', 'corn', 'onion', 'pineapple', 'pork',\n",
      "       'shiitake', 'soy_sauce', 'starch', 'vinegar'],\n",
      "      dtype='object')\n",
      "cuisine: chinese\n"
     ]
    }
   ],
   "source": [
    "print(f'ingredients: {X_test.iloc[50][X_test.iloc[50]!=0].keys()}')\n",
    "print(f'cuisine: {y_test.iloc[50]}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/ray/Desktop/ML-For-Beginners/.venv/lib/python3.12/site-packages/sklearn/base.py:465: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>chinese</th>\n",
       "      <td>0.954918</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>korean</th>\n",
       "      <td>0.023422</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>japanese</th>\n",
       "      <td>0.018495</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>thai</th>\n",
       "      <td>0.002817</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>indian</th>\n",
       "      <td>0.000348</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 0\n",
       "chinese   0.954918\n",
       "korean    0.023422\n",
       "japanese  0.018495\n",
       "thai      0.002817\n",
       "indian    0.000348"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test= X_test.iloc[50].values.reshape(-1, 1).T\n",
    "proba = model.predict_proba(test)\n",
    "classes = model.classes_\n",
    "resultdf = pd.DataFrame(data=proba, columns=classes)\n",
    "\n",
    "topPrediction = resultdf.T.sort_values(by=[0], ascending = [False])\n",
    "topPrediction.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "     chinese       0.74      0.74      0.74       234\n",
      "      indian       0.92      0.91      0.92       254\n",
      "    japanese       0.63      0.75      0.68       222\n",
      "      korean       0.80      0.78      0.79       235\n",
      "        thai       0.86      0.75      0.80       254\n",
      "\n",
      "    accuracy                           0.79      1199\n",
      "   macro avg       0.79      0.79      0.79      1199\n",
      "weighted avg       0.80      0.79      0.79      1199\n",
      "\n"
     ]
    }
   ],
   "source": [
    "y_pred = model.predict(X_test)\n",
    "print(classification_report(y_test,y_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy is 0.8006672226855713\n"
     ]
    }
   ],
   "source": [
    "lr = LogisticRegression(multi_class='ovr',solver='liblinear')\n",
    "model = lr.fit(X_train, np.ravel(y_train))\n",
    "\n",
    "accuracy = model.score(X_test, y_test)\n",
    "print (\"Accuracy is {}\".format(accuracy))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ingredients: Index(['bell_pepper', 'carrot', 'egg', 'pork', 'shiitake', 'soy_sauce',\n",
      "       'starch', 'vegetable', 'vegetable_oil'],\n",
      "      dtype='object')\n",
      "cuisine: chinese\n"
     ]
    }
   ],
   "source": [
    "print(f'ingredients: {X_test.iloc[50][X_test.iloc[50]!=0].keys()}')\n",
    "print(f'cuisine: {y_test.iloc[50]}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/ray/Desktop/ML-For-Beginners/.venv/lib/python3.12/site-packages/sklearn/base.py:465: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>chinese</th>\n",
       "      <td>0.928517</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>japanese</th>\n",
       "      <td>0.042258</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>korean</th>\n",
       "      <td>0.026577</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>thai</th>\n",
       "      <td>0.002424</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>indian</th>\n",
       "      <td>0.000223</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 0\n",
       "chinese   0.928517\n",
       "japanese  0.042258\n",
       "korean    0.026577\n",
       "thai      0.002424\n",
       "indian    0.000223"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test= X_test.iloc[50].values.reshape(-1, 1).T\n",
    "proba = model.predict_proba(test)\n",
    "classes = model.classes_\n",
    "resultdf = pd.DataFrame(data=proba, columns=classes)\n",
    "\n",
    "topPrediction = resultdf.T.sort_values(by=[0], ascending = [False])\n",
    "topPrediction.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "     chinese       0.74      0.69      0.71       243\n",
      "      indian       0.91      0.90      0.90       233\n",
      "    japanese       0.74      0.76      0.75       238\n",
      "      korean       0.82      0.81      0.81       238\n",
      "        thai       0.81      0.84      0.83       247\n",
      "\n",
      "    accuracy                           0.80      1199\n",
      "   macro avg       0.80      0.80      0.80      1199\n",
      "weighted avg       0.80      0.80      0.80      1199\n",
      "\n"
     ]
    }
   ],
   "source": [
    "y_pred = model.predict(X_test)\n",
    "print(classification_report(y_test,y_pred))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
@ -10,19 +816,10 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": 3
+   "version": "3.12.0"
  },
  "orig_nbformat": 2
 },
 "nbformat": 4,
- "nbformat_minor": 2,
+ "nbformat_minor": 2
 "cells": [
  {
   "source": [
    "# Build Classification Models"
   ],
   "cell_type": "markdown",
   "metadata": {}
  }
 ]
 }