ML-For-Beginners/translations/nl/4-Classification/3-Classifiers-2/solution/notebook.ipynb

{
 "cells": [
  {
   "source": [
    "# Bouw Meer Classificatiemodellen\n"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "   Unnamed: 0 cuisine  almond  angelica  anise  anise_seed  apple  \\\n",
       "0           0  indian       0         0      0           0      0   \n",
       "1           1  indian       1         0      0           0      0   \n",
       "2           2  indian       0         0      0           0      0   \n",
       "3           3  indian       0         0      0           0      0   \n",
       "4           4  indian       0         0      0           0      0   \n",
       "\n",
       "   apple_brandy  apricot  armagnac  ...  whiskey  white_bread  white_wine  \\\n",
       "0             0        0         0  ...        0            0           0   \n",
       "1             0        0         0  ...        0            0           0   \n",
       "2             0        0         0  ...        0            0           0   \n",
       "3             0        0         0  ...        0            0           0   \n",
       "4             0        0         0  ...        0            0           0   \n",
       "\n",
       "   whole_grain_wheat_flour  wine  wood  yam  yeast  yogurt  zucchini  \n",
       "0                        0     0     0    0      0       0         0  \n",
       "1                        0     0     0    0      0       0         0  \n",
       "2                        0     0     0    0      0       0         0  \n",
       "3                        0     0     0    0      0       0         0  \n",
       "4                        0     0     0    0      0       1         0  \n",
       "\n",
       "[5 rows x 382 columns]"
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Unnamed: 0</th>\n      <th>cuisine</th>\n      <th>almond</th>\n      <th>angelica</th>\n      <th>anise</th>\n      <th>anise_seed</th>\n      <th>apple</th>\n      <th>apple_brandy</th>\n      <th>apricot</th>\n      <th>armagnac</th>\n      <th>...</th>\n      <th>whiskey</th>\n      <th>white_bread</th>\n      <th>white_wine</th>\n      <th>whole_grain_wheat_flour</th>\n      <th>wine</th>\n      <th>wood</th>\n      <th>yam</th>\n      <th>yeast</th>\n      <th>yogurt</th>\n      <th>zucchini</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>0</td>\n      <td>indian</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>indian</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2</td>\n      <td>indian</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>3</td>\n      <td>indian</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>4</td>\n      <td>indian</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n<p>5 rows × 382 columns</p>\n</div>"
     },
     "metadata": {},
     "execution_count": 1
    }
   ],
   "source": [
    "import pandas as pd\n",
    "cuisines_df = pd.read_csv(\"../../data/cleaned_cuisines.csv\")\n",
    "cuisines_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "0    indian\n",
       "1    indian\n",
       "2    indian\n",
       "3    indian\n",
       "4    indian\n",
       "Name: cuisine, dtype: object"
      ]
     },
     "metadata": {},
     "execution_count": 2
    }
   ],
   "source": [
    "cuisines_label_df = cuisines_df['cuisine']\n",
    "cuisines_label_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "   almond  angelica  anise  anise_seed  apple  apple_brandy  apricot  \\\n",
       "0       0         0      0           0      0             0        0   \n",
       "1       1         0      0           0      0             0        0   \n",
       "2       0         0      0           0      0             0        0   \n",
       "3       0         0      0           0      0             0        0   \n",
       "4       0         0      0           0      0             0        0   \n",
       "\n",
       "   armagnac  artemisia  artichoke  ...  whiskey  white_bread  white_wine  \\\n",
       "0         0          0          0  ...        0            0           0   \n",
       "1         0          0          0  ...        0            0           0   \n",
       "2         0          0          0  ...        0            0           0   \n",
       "3         0          0          0  ...        0            0           0   \n",
       "4         0          0          0  ...        0            0           0   \n",
       "\n",
       "   whole_grain_wheat_flour  wine  wood  yam  yeast  yogurt  zucchini  \n",
       "0                        0     0     0    0      0       0         0  \n",
       "1                        0     0     0    0      0       0         0  \n",
       "2                        0     0     0    0      0       0         0  \n",
       "3                        0     0     0    0      0       0         0  \n",
       "4                        0     0     0    0      0       1         0  \n",
       "\n",
       "[5 rows x 380 columns]"
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>almond</th>\n      <th>angelica</th>\n      <th>anise</th>\n      <th>anise_seed</th>\n      <th>apple</th>\n      <th>apple_brandy</th>\n      <th>apricot</th>\n      <th>armagnac</th>\n      <th>artemisia</th>\n      <th>artichoke</th>\n      <th>...</th>\n      <th>whiskey</th>\n      <th>white_bread</th>\n      <th>white_wine</th>\n      <th>whole_grain_wheat_flour</th>\n      <th>wine</th>\n      <th>wood</th>\n      <th>yam</th>\n      <th>yeast</th>\n      <th>yogurt</th>\n      <th>zucchini</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>...</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n<p>5 rows × 380 columns</p>\n</div>"
     },
     "metadata": {},
     "execution_count": 3
    }
   ],
   "source": [
    "cuisines_features_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n",
    "cuisines_features_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Probeer verschillende classificatoren\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier\n",
    "from sklearn.model_selection import train_test_split, cross_val_score\n",
    "from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(cuisines_features_df, cuisines_label_df, test_size=0.3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "C = 10\n",
    "# Create different classifiers.\n",
    "classifiers = {\n",
    "    'Linear SVC': SVC(kernel='linear', C=C, probability=True,random_state=0),\n",
    "    'KNN classifier': KNeighborsClassifier(C),\n",
    "    'SVC': SVC(),\n",
    "    'RFST': RandomForestClassifier(n_estimators=100),\n",
    "    'ADA': AdaBoostClassifier(n_estimators=100)\n",
    "    \n",
    "}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "Accuracy (train) for Linear SVC: 76.4% \n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "     chinese       0.64      0.66      0.65       242\n",
      "      indian       0.91      0.86      0.89       236\n",
      "    japanese       0.72      0.73      0.73       245\n",
      "      korean       0.83      0.75      0.79       234\n",
      "        thai       0.75      0.82      0.78       242\n",
      "\n",
      "    accuracy                           0.76      1199\n",
      "   macro avg       0.77      0.76      0.77      1199\n",
      "weighted avg       0.77      0.76      0.77      1199\n",
      "\n",
      "Accuracy (train) for KNN classifier: 70.7% \n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "     chinese       0.65      0.63      0.64       242\n",
      "      indian       0.84      0.81      0.82       236\n",
      "    japanese       0.60      0.81      0.69       245\n",
      "      korean       0.89      0.53      0.67       234\n",
      "        thai       0.69      0.75      0.72       242\n",
      "\n",
      "    accuracy                           0.71      1199\n",
      "   macro avg       0.73      0.71      0.71      1199\n",
      "weighted avg       0.73      0.71      0.71      1199\n",
      "\n",
      "Accuracy (train) for SVC: 80.1% \n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "     chinese       0.71      0.69      0.70       242\n",
      "      indian       0.92      0.92      0.92       236\n",
      "    japanese       0.77      0.78      0.77       245\n",
      "      korean       0.87      0.77      0.82       234\n",
      "        thai       0.75      0.86      0.80       242\n",
      "\n",
      "    accuracy                           0.80      1199\n",
      "   macro avg       0.80      0.80      0.80      1199\n",
      "weighted avg       0.80      0.80      0.80      1199\n",
      "\n",
      "Accuracy (train) for RFST: 82.8% \n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "     chinese       0.80      0.75      0.77       242\n",
      "      indian       0.90      0.91      0.90       236\n",
      "    japanese       0.82      0.78      0.80       245\n",
      "      korean       0.85      0.82      0.83       234\n",
      "        thai       0.78      0.89      0.83       242\n",
      "\n",
      "    accuracy                           0.83      1199\n",
      "   macro avg       0.83      0.83      0.83      1199\n",
      "weighted avg       0.83      0.83      0.83      1199\n",
      "\n",
      "Accuracy (train) for ADA: 71.1% \n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "     chinese       0.60      0.57      0.58       242\n",
      "      indian       0.87      0.84      0.86       236\n",
      "    japanese       0.71      0.60      0.65       245\n",
      "      korean       0.68      0.78      0.72       234\n",
      "        thai       0.70      0.78      0.74       242\n",
      "\n",
      "    accuracy                           0.71      1199\n",
      "   macro avg       0.71      0.71      0.71      1199\n",
      "weighted avg       0.71      0.71      0.71      1199\n",
      "\n"
     ]
    }
   ],
   "source": [
    "n_classifiers = len(classifiers)\n",
    "\n",
    "for index, (name, classifier) in enumerate(classifiers.items()):\n",
    "    classifier.fit(X_train, np.ravel(y_train))\n",
    "\n",
    "    y_pred = classifier.predict(X_test)\n",
    "    accuracy = accuracy_score(y_test, y_pred)\n",
    "    print(\"Accuracy (train) for %s: %0.1f%% \" % (name, accuracy * 100))\n",
    "    print(classification_report(y_test,y_pred))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n\n<!-- CO-OP TRANSLATOR DISCLAIMER START -->\n**Disclaimer**:  \nDit document is vertaald met behulp van de AI-vertalingsdienst [Co-op Translator](https://github.com/Azure/co-op-translator). Hoewel wij streven naar nauwkeurigheid, dient u er rekening mee te houden dat automatische vertalingen fouten of onnauwkeurigheden kunnen bevatten. Het oorspronkelijke document in de oorspronkelijke taal dient als de gezaghebbende bron te worden beschouwd. Voor kritieke informatie wordt een professionele menselijke vertaling aanbevolen. Wij zijn niet aansprakelijk voor enige misverstanden of verkeerde interpretaties die voortvloeien uit het gebruik van deze vertaling.\n<!-- CO-OP TRANSLATOR DISCLAIMER END -->\n"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
  },
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3.7.0 64-bit ('3.7')"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  },
  "metadata": {
   "interpreter": {
    "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}