You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ML-For-Beginners/4-Classification/1-Introduction/solution/notebook.ipynb

820 lines
283 KiB

{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: imblearn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.0)\n",
"Requirement already satisfied: imbalanced-learn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imblearn) (0.8.0)\n",
"Requirement already satisfied: numpy>=1.13.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (1.19.2)\n",
"Requirement already satisfied: joblib>=0.11 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (0.16.0)\n",
"Requirement already satisfied: scikit-learn>=0.24 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (0.24.2)\n",
"Requirement already satisfied: scipy>=0.19.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (1.4.1)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.24->imbalanced-learn->imblearn) (2.1.0)\n",
"\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n",
"You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install imblearn"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib as mpl\n",
"import numpy as np\n",
"from imblearn.over_sampling import SMOTE"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('../../data/recipes.csv')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n",
"0 65 indian 0 0 0 0 0 \n",
"1 66 indian 1 0 0 0 0 \n",
"2 67 indian 0 0 0 0 0 \n",
"3 68 indian 0 0 0 0 0 \n",
"4 69 indian 0 0 0 0 0 \n",
"\n",
" apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n",
"0 0 0 0 ... 0 0 0 \n",
"1 0 0 0 ... 0 0 0 \n",
"2 0 0 0 ... 0 0 0 \n",
"3 0 0 0 ... 0 0 0 \n",
"4 0 0 0 ... 0 0 0 \n",
"\n",
" whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n",
"0 0 0 0 0 0 0 0 \n",
"1 0 0 0 0 0 0 0 \n",
"2 0 0 0 0 0 0 0 \n",
"3 0 0 0 0 0 0 0 \n",
"4 0 0 0 0 0 1 0 \n",
"\n",
"[5 rows x 385 columns]"
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Unnamed: 0</th>\n <th>cuisine</th>\n <th>almond</th>\n <th>angelica</th>\n <th>anise</th>\n <th>anise_seed</th>\n <th>apple</th>\n <th>apple_brandy</th>\n <th>apricot</th>\n <th>armagnac</th>\n <th>...</th>\n <th>whiskey</th>\n <th>white_bread</th>\n <th>white_wine</th>\n <th>whole_grain_wheat_flour</th>\n <th>wine</th>\n <th>wood</th>\n <th>yam</th>\n <th>yeast</th>\n <th>yogurt</th>\n <th>zucchini</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>65</td>\n <td>indian</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>66</td>\n <td>indian</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>67</td>\n <td>indian</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>68</td>\n <td>indian</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>69</td>\n <td>indian</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n<p>5 rows × 385 columns</p>\n</div>"
},
"metadata": {},
"execution_count": 7
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 2448 entries, 0 to 2447\nColumns: 385 entries, Unnamed: 0 to zucchini\ndtypes: int64(384), object(1)\nmemory usage: 7.2+ MB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"korean 799\n",
"indian 598\n",
"chinese 442\n",
"japanese 320\n",
"thai 289\n",
"Name: cuisine, dtype: int64"
]
},
"metadata": {},
"execution_count": 9
}
],
"source": [
"df.cuisine.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"#df.keys().values"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7fa6d133c7b8>"
]
},
"metadata": {},
"execution_count": 11
},
{
"output_type": "display_data",
"data": {
"text/plain": "<Figure size 432x288 with 1 Axes>",
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Created with matplotlib (https://matplotlib.org/) -->\n<svg height=\"248.518125pt\" version=\"1.1\" viewBox=\"0 0 401.435938 248.518125\" width=\"401.435938pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n <defs>\n <style type=\"text/css\">\n*{stroke-linecap:butt;stroke-linejoin:round;white-space:pre;}\n </style>\n </defs>\n <g id=\"figure_1\">\n <g id=\"patch_1\">\n <path d=\"M 0 248.518125 \nL 401.435938 248.518125 \nL 401.435938 0 \nL 0 0 \nz\n\" style=\"fill:none;\"/>\n </g>\n <g id=\"axes_1\">\n <g id=\"patch_2\">\n <path d=\"M 59.435938 224.64 \nL 394.235938 224.64 \nL 394.235938 7.2 \nL 59.435938 7.2 \nz\n\" style=\"fill:#ffffff;\"/>\n </g>\n <g id=\"patch_3\">\n <path clip-path=\"url(#p0ca7a4ae3c)\" d=\"M 59.435938 213.768 \nL 378.29308 213.768 \nL 378.29308 192.024 \nL 59.435938 192.024 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_4\">\n <path clip-path=\"url(#p0ca7a4ae3c)\" d=\"M 59.435938 170.28 \nL 298.079957 170.28 \nL 298.079957 148.536 \nL 59.435938 148.536 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_5\">\n <path clip-path=\"url(#p0ca7a4ae3c)\" d=\"M 59.435938 126.792 \nL 235.824995 126.792 \nL 235.824995 105.048 \nL 59.435938 105.048 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_6\">\n <path clip-path=\"url(#p0ca7a4ae3c)\" d=\"M 59.435938 83.304 \nL 187.138423 83.304 \nL 187.138423 61.56 \nL 59.435938 61.56 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_7\">\n <path clip-path=\"url(#p0ca7a4ae3c)\" d=\"M 59.435938 39.816 \nL 174.767244 39.816 \nL 174.767244 18.072 \nL 59.435938 18.072 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"matplotlib.axis_1\">\n <g id=\"xtick_1\">\n <g id=\"line2d_1\">\n <defs>\n <path d=\"M 0 0 \nL 0 3.5 \n\" id=\"maed64254fb\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n </defs>\n <g>\n <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"59.435938\" xlink:href=\"#maed64254fb\" y=\"224.64\"/>\n </g>\n </g>\n <g id=\"text_1\">\n <!-- 0 -->\n <defs>\n <path d=\"M 31.78125 66.40625 \nQ 24.171875 66.40625 20.328125 58.90625 \nQ 16.5 51.421875 16.5 36.375 \nQ 16.5 21.390625 20.328125 13.890625 \nQ 24.171875 6.390625 31.78125 6.390625 \nQ 39.453125 6.390625 43.28125 13.890625 \nQ 47.125 21.390625 47.125 36.375 \nQ 47.125 51.421875 43.28125 58.90625 \nQ 39.453125 66.40625 31.78125 66.40625 \nz\nM 31.78125 74.21875 \nQ 44.046875 74.21875 50.515625 64.515625 \nQ 56.984375 54.828125 56.984375 36.375 \nQ 56.984375 17.96875 50.515625 8.265625 \nQ 44.046875 -1.421875 31.78125 -1.421875 \nQ 19.53125 -1.421875 13.0625 8.265625 \nQ 6.59375 17.96875 6.59375 36.375 \nQ 6.59375 54.828125 13.0625 64.515625 \nQ 19.53125 74.21875 31.78125 74.21875 \nz\n\" id=\"DejaVuSans-48\"/>\n </defs>\n <g transform=\"translate(56.254688 239.238437)scale(0.1 -0.1)\">\n <use xlink:href=\"#DejaVuSans-48\"/>\n </g>\n </g>\n </g>\n <g id=\"xtick_2\">\n <g id=\"line2d_2\">\n <g>\n <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"99.342964\" xlink:href=\"#maed64254fb\" y=\"224.64\"/>\n </g>\n </g>\n <g id=\"text_2\">\n <!-- 100 -->\n <defs>\n <path d=\"M 12.40625 8.296875 \nL 28.515625 8.296875 \nL 28.515625 63.921875 \nL 10.984375 60.40625 \nL 10.984375 69.390625 \nL 28.421875 72.90625 \nL 38.28125 72.90625 \nL 38.28125 8.296875 \nL 54.390625 8.296875 \nL 54.390625 0 \nL 12.40625 0 \nz\n\" id=\"DejaVuSans-49\"/>\n </defs>\n <g transform=\"translate(89.799214 239.238437)scale(0.1 -0.1)\">\n <use xlink:href=\"#DejaVuSans-49\"/>\n <use x=\"63.623047\" xlink:href=\"#DejaVuSans-48\"/>\n <use x=\"127.246094\" xlink:href=\"#DejaVuSans-48\"/>\n </g>\n </g>\n </g>\n <g id=\"xtick_3\">\n <g id=\"line2d_3\">\n
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAD4CAYAAAAtrdtxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAASY0lEQVR4nO3df7TldV3v8eerGZkRRoeAiXtE5UgNIkUCjlwQIzAiC7NscdcSbcmsfkxl5SXX0juuyzK9d3UvlXnpplajma0kMtCUhluImNcr8msGBmb4pZaTQCFQOYom0fi+f+zPkd14hpnzOWefvYfzfKy113z35/vde7/22fvMa3++3733SVUhSVKPbxt3AEnSgcsSkSR1s0QkSd0sEUlSN0tEktRt+bgDLKYjjjiipqenxx1Dkg4oW7dufbiq1sy2bkmVyPT0NFu2bBl3DEk6oCT5u72tc3eWJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqduS+sT69vt3Mb3xqnHH0ALZefG5444gLXnORCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd0sEUlSN0tEktRtIkokyaFJXtuWz0yyeY6X/29Jzh5NOknS3kxEiQCHAq/tvXBVvbmqPraAeSRJ+2FSSuRi4DuTbAN+E1iV5Iokdye5NEkAkrw5yc1JdiTZNDT+viTnjTG/JC1Jk1IiG4G/qaoTgTcAJwEXAscDxwCnt+3eUVUvrKrvAZ4KvGxfV5xkQ5ItSbbs/tqu0aSXpCVqUkpkTzdV1X1V9Q1gGzDdxs9KcmOS7cBLgO/e1xVV1aaqWldV65YdvHp0iSVpCZrUL2B8dGh5N7A8yUrgXcC6qro3yVuAleMIJ0kamJSZyFeAp+1jm5nCeDjJKsBjIJI0ZhMxE6mqf0xyXZIdwL8AX5xlmy8leTewA3gAuHmRY0qS9jARJQJQVa/ay/gvDS1fBFw0yzbrR5dMkrQ3k7I7S5J0ALJEJEndLBFJUjdLRJLUzRKRJHWbmHdnLYYTjlrNlovPHXcMSXrScCYiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6rZ83AEW0/b7dzG98apxx9CY7Lz43HFHkJ50nIlIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG77VSJJPj3qIJKkA89+lUhVvWjUQSRJB579nYk8kmRVkmuT3JJke5Ifa+umk9yd5NIkdyW5IsnBbd2bk9ycZEeSTUnSxj+R5NeT3JTkM0m+r40vS/Kb7TK3J/m5Nj6V5JNJtrXrmtn+nCTXt0yXJ1k1ih+SJGl2czkm8nXgFVV1MnAW8FszpQA8F3hXVT0P+DLw2jb+jqp6YVV9D/BU4GVD17e8qk4BLgR+tY39NLCrql4IvBD42STPAV4FXF1VJwLPB7YlOQK4CDi7ZdoCvH4ud16SND9z+dqTAP8jyRnAN4CjgCPbunur6rq2/H7gdcDbgLOSvBE4GDgMuAP4i7bdh9q/W4HptnwO8L1JzmvnVwNrgZuB9yZ5CvDhqtqW5PuB44HrWpcdBFz/LaGTDcAGgGVPXzOHuytJ2pe5lMirgTXAC6rqsSQ7gZVtXe2xbSVZCbwLWFdV9yZ5y9D2AI+2f3cP5Qjwy1V19Z433srrXOB9Sd4O/DNwTVWd/0Shq2oTsAlgxdTaPXNKkuZhLruzVgMPtgI5Czh6aN2zk5zWll8FfIrHC+PhdqziPPbtauAX2oyDJMcmOSTJ0cAXq+rdwHuAk4EbgNOTfFfb9pAkx87h/kiS5ml/ZyIFXAr8RZLtDI4/3D20/h7gF5O8F7gT+N2q+lqSdwM7gAcY7JLal/cw2LV1Szve8hDw48CZwBuSPAY8Arymqh5Ksh64LMmKdvmLgM/s532SJM1Tqp54D0+Sw4FbqurovayfBja3g+cTbcXU2pq64JJxx9CY+FXwUp8kW6tq3WzrnnB3VpJnMDhY/bZRBJMkHdiecHdWVf098ITHGapqJzDxsxBJ0sLzu7MkSd0sEUlSN0tEktRtLh82POCdcNRqtvgOHUlaMM5EJEndLBFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd0sEUlSN0tEktTNEpEkdbNEJEndLBFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1s0QkSd2WjzvAYtp+/y6mN1417hhSt50XnzvuCNK/40xEktTNEpEkdbNEJEndLBFJUjdLRJLUzRKRJHWzRCRJ3Ra0RJK8L8l5s4w/I8kVC3lbkqTxW5QPG1bV3wPfUi6SpAPbvGYiSV6T5PYktyX54zZ8RpJPJ/nbmVlJkukkO9ry+iQfSvJXST6b5DeGru+cJNcnuSXJ5UlWtfGLk9zZbuttbWxNkg8mubmdTp/PfZEkzV33TCTJdwMXAS+qqoeTHAa8HZgCXgwcB1wJzLYb60TgJOBR4J4kvwP8S7u+s6vqq0n+C/D6JO8EXgEcV1WV5NB2Hb8N/K+q+lSSZwNXA8+bJecGYAPAsqev6b27kqRZzGd31kuAy6vqYYCq+qckAB+uqm8AdyY5ci+XvbaqdgEkuRM4GjgUOB64rl3PQcD1wC7g68AfJNkMbG7XcTZwfNsW4OlJVlXVI8M3VFWbgE0AK6bW1jzuryRpD6M4JvLo0HL2Y5vdLUeAa6rq/D03TnIK8AMMjqv8EoMC+zbg1Kr6+kKEliTN3XyOiXwc+E9JDgdou7Pm4wbg9CTf1a7vkCTHtuMiq6vq/wC/Ajy/bf9R4JdnLpzkxHneviRpjrpnIlV1R5JfA/5vkt3ArfMJUlUPJVkPXJZkRRu+CPgK8JEkKxnMVl7f1r0OeGeS2xncj08CPz+fDJKkuUnV0jlMsGJqbU1dcMm4Y0jd/HsiGockW6tq3Wzr/MS6JKmbJSJJ6maJSJK6WSKSpG6WiCSp26J8AeOkOOGo1Wzx3S2StGCciUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6LR93gMW0/f5dTG+8atwxJM3RzovPHXcE7YUzEUlSN0tEktTNEpEkdbNEJEndLBFJUjdLRJLUbWQlkuTTc9z+zCSb2/LLk2wcTTJJ0kIZ2edEqupF87jslcCVCxhHkjQCo5yJPNL+PTPJJ5JckeTuJJcmSVv30jZ2C/ATQ5ddn+QdbflHk9yY5NYkH0tyZBt/S5L3tuv+2ySvG9V9kSTNbrGOiZwEXAgcDxwDnJ5kJfBu4EeBFwD/YS+X/RRwalWdBPwp8MahdccBPwScAvxqkqeMJr4kaTaL9bUnN1XVfQBJtgHTwCPA56vqs238/cCGWS77TOADSaaAg4DPD627qqoeBR5N8iBwJHDf8IWTbJi53mVPX7OQ90mSlrzFmok8OrS8m7mV1+8A76iqE4CfA1bO5XqralNVrauqdcsOXj2Hm5Uk7cs43+J7NzCd5Dvb+fP3st1q4P62fMHIU0mS9tvYSqSqvs5gN9NV7cD6g3vZ9C3A5Um2Ag8vUjxJ0n5IVY07w6JZMbW2pi64ZNwxJM2RXwU/Xkm2VtW62db5iXVJUjdLRJLUzRKRJHWzRCRJ3SwRSVK3xfrE+kQ44ajVbPFdHpK0YJyJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrpZIpKkbpaIJKmbJSJJ6maJSJK6WSKSpG6WiCSpmyUiSepmiUiSulkikqRulogkqZslIknqZolIkrotH3eAxbT9/l1Mb7xq3DEkaVHtvPjckV23MxFJUjdLRJLUzRKRJHWzRCRJ3SwRSVI3S0SS1M0SkSR1W9ASSTKdZMdCXqckaXJNxEwkyZL60KMkPVm
},
"metadata": {
"needs_background": "light"
}
}
],
"source": [
"#display classes in bar graph\r\n",
"df.cuisine.value_counts().plot.barh()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"thai df: (289, 385)\njapanese df: (320, 385)\nchinese df: (442, 385)\nindian df: (598, 385)\nkorean df: (799, 385)\n"
]
}
],
"source": [
"# ingrediant counts by class count\r\n",
"# filter to thai food, display ingredients graph\r\n",
"\r\n",
"thai_df = df[(df.cuisine == \"thai\")]\r\n",
"japanese_df = df[(df.cuisine == \"japanese\")]\r\n",
"chinese_df = df[(df.cuisine == \"chinese\")]\r\n",
"indian_df = df[(df.cuisine == \"indian\")]\r\n",
"korean_df = df[(df.cuisine == \"korean\")]\r\n",
"\r\n",
"print(f'thai df: {thai_df.shape}')\r\n",
"print(f'japanese df: {japanese_df.shape}')\r\n",
"print(f'chinese df: {chinese_df.shape}')\r\n",
"print(f'indian df: {indian_df.shape}')\r\n",
"print(f'korean df: {korean_df.shape}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## What are the top ingredients by class"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"def create_ingredient_df(df):\r\n",
" #transpose df, drop cuisine and unnamed rows, sum the row to get total for ingredient and add value header to new df\r\n",
" ingredient_df = df.T.drop(['cuisine','Unnamed: 0']).sum(axis=1).to_frame('value')\r\n",
" # drop ingredients that have a 0 sum\r\n",
" ingredient_df = ingredient_df[(ingredient_df.T != 0).any()]\r\n",
" # sort df\r\n",
" ingredient_df = ingredient_df.sort_values(by='value', ascending=False, inplace=False)\r\n",
" return ingredient_df\r\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7fa6b0d16c18>"
]
},
"metadata": {},
"execution_count": 14
},
{
"output_type": "display_data",
"data": {
"text/plain": "<Figure size 432x288 with 1 Axes>",
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Created with matplotlib (https://matplotlib.org/) -->\n<svg height=\"248.518125pt\" version=\"1.1\" viewBox=\"0 0 422.776562 248.518125\" width=\"422.776562pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n <defs>\n <style type=\"text/css\">\n*{stroke-linecap:butt;stroke-linejoin:round;white-space:pre;}\n </style>\n </defs>\n <g id=\"figure_1\">\n <g id=\"patch_1\">\n <path d=\"M 0 248.518125 \nL 422.776562 248.518125 \nL 422.776562 0 \nL 0 0 \nz\n\" style=\"fill:none;\"/>\n </g>\n <g id=\"axes_1\">\n <g id=\"patch_2\">\n <path d=\"M 80.776563 224.64 \nL 415.576563 224.64 \nL 415.576563 7.2 \nL 80.776563 7.2 \nz\n\" style=\"fill:#ffffff;\"/>\n </g>\n <g id=\"patch_3\">\n <path clip-path=\"url(#p861f312b41)\" d=\"M 80.776563 219.204 \nL 399.633705 219.204 \nL 399.633705 208.332 \nL 80.776563 208.332 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_4\">\n <path clip-path=\"url(#p861f312b41)\" d=\"M 80.776563 197.46 \nL 362.771608 197.46 \nL 362.771608 186.588 \nL 80.776563 186.588 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_5\">\n <path clip-path=\"url(#p861f312b41)\" d=\"M 80.776563 175.716 \nL 331.438825 175.716 \nL 331.438825 164.844 \nL 80.776563 164.844 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_6\">\n <path clip-path=\"url(#p861f312b41)\" d=\"M 80.776563 153.972 \nL 303.792252 153.972 \nL 303.792252 143.1 \nL 80.776563 143.1 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_7\">\n <path clip-path=\"url(#p861f312b41)\" d=\"M 80.776563 132.228 \nL 290.890518 132.228 \nL 290.890518 121.356 \nL 80.776563 121.356 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_8\">\n <path clip-path=\"url(#p861f312b41)\" d=\"M 80.776563 110.484 \nL 285.361203 110.484 \nL 285.361203 99.612 \nL 80.776563 99.612 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_9\">\n <path clip-path=\"url(#p861f312b41)\" d=\"M 80.776563 88.74 \nL 276.145679 88.74 \nL 276.145679 77.868 \nL 80.776563 77.868 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_10\">\n <path clip-path=\"url(#p861f312b41)\" d=\"M 80.776563 66.996 \nL 274.302574 66.996 \nL 274.302574 56.124 \nL 80.776563 56.124 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_11\">\n <path clip-path=\"url(#p861f312b41)\" d=\"M 80.776563 45.252 \nL 268.773259 45.252 \nL 268.773259 34.38 \nL 80.776563 34.38 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_12\">\n <path clip-path=\"url(#p861f312b41)\" d=\"M 80.776563 23.508 \nL 259.557735 23.508 \nL 259.557735 12.636 \nL 80.776563 12.636 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"matplotlib.axis_1\">\n <g id=\"xtick_1\">\n <g id=\"line2d_1\">\n <defs>\n <path d=\"M 0 0 \nL 0 3.5 \n\" id=\"m4feadc1503\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n </defs>\n <g>\n <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"80.776563\" xlink:href=\"#m4feadc1503\" y=\"224.64\"/>\n </g>\n </g>\n <g id=\"text_1\">\n <!-- 0 -->\n <defs>\n <path d=\"M 31.78125 66.40625 \nQ 24.171875 66.40625 20.328125 58.90625 \nQ 16.5 51.421875 16.5 36.375 \nQ 16.5 21.390625 20.328125 13.890625 \nQ 24.171875 6.390625 31.78125 6.390625 \nQ 39.453125 6.390625 43.28125 13.890625 \nQ 47.125 21.390625 47.125 36.375 \nQ 47.125 51.421875 43.28125 58.90625 \nQ 39.453125 66.40625 31.78125 66.40625 \nz\nM 31.78125 74.21875 \nQ 44.046875 74.21875 50.515625 64.515625 \nQ 56.984375 54.828125 56.984375 36.375 \nQ 56.984375 17.96875 50.515625 8.265625 \nQ 44.046875 -1.421875 31.78125 -1.421875 \nQ 19.53125 -1.421875 13.0625 8.265625 \nQ 6.59375 17.96875 6.59375 36.375 \nQ 6.59375 54.828125 13.0625 64.515625 \nQ 19.53125 74.21875 31.78125 74.21875 \nz\n\" id=\"DejaVuSans-48\"/>\n </defs>\n <g transform=\"translate(77.595
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAaYAAAD4CAYAAACngkIwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAeAklEQVR4nO3de7RVdb338fcHRFAhTPBCeNnogzckQJcdb11ES01QSxGLUtNHjl1EM2twhmbUo+PY5dhN07DHII+nuKSPFKe0FPEasTcCG1TUEk8QCmjuJIRg+33+WL+ti82+AWvvORfr8xpjjT3Xb/7mnN85x2J/+M0595qKCMzMzPKiW9YFmJmZlXIwmZlZrjiYzMwsVxxMZmaWKw4mMzPLlV2yLmBn0L9//6ipqcm6DDOzitG/f3/uv//++yPi9ObzHExlUFNTQ21tbdZlmJlVFEn9W2r3qTwzM8sVB5OZmeWKg8nMzHLF15jMzLrApk2bWLFiBRs2bMi6lC7Xq1cv9t9/f3r06NGh/g6mMqhf2UDNxNlZl2G2Q5bfdGbWJezUVqxYQZ8+faipqUFS1uV0mYjg1VdfZcWKFQwaNKhDy/hUnplZF9iwYQP9+vWrqlACkES/fv22aaRYEcEkaYqk81pof4+kme0su7y1WxLNzLpStYVSk23d74o+lRcRfwW2CiwzM6tcuQwmSRcC1wABLAYagQ9IuhrYD/hKRMyUVAP8OiKOktQd+CZwOvAWcEdE/LBknbsB9wD3RMQdkj4FTAB2BeYBn4uIRknrgO8Do4A3gbMj4pWu2G8zqx7lvi5d7muEvXv3Zt26dWVdZ0fl7lSepCHAdcDIiBgGXJlmDQBOohgYN7Ww6HigBhgeEe8F7i6Z1xv4FfDzFEpHAGOBEyNiOMXgG5f67gH8IW37EeCyVuocL6lWUm3j+obt3l8zM9tS7oIJGAnMiIi1ABHxWmr/fxHxVkQ8DezbwnKnAj+OiM3NlgO4D/hpRPwsvT8FOAaYL2lhen9wmvdP4Ndpuo5i2G0lIiZHRCEiCt1377sdu2lm1nUmTpzIrbfe+vb7SZMmccMNN3DKKadw9NFHM3ToUO67776tlnv44YcZNWrU2++/8IUvMGXKFADq6ur44Ac/yDHHHMNpp53GqlWrylJrHoOpNRtLprf1CuLjwOl65wqcgKkRMTy9DouISWnepnjnefON5PR0p5nZthg7dizTp09/+/306dO56KKLuPfee1mwYAFz5szhS1/6Eu/8+mvbpk2buOKKK5g5cyZ1dXVccsklXHvttWWpNY+/dB8C7pV0c0S8KmmvDi73O+BfJc2JiM2S9ioZNV2fXrcCnwMeBO6T9N2IWJ220SciXir3zpiZ5cGIESNYvXo1f/3rX1mzZg3vfve72W+//fjiF7/II488Qrdu3Vi5ciWvvPIK++23X7vrW7ZsGUuWLOHDH/4wAI2NjQwYMKAsteYumCJiqaQbgbmSGoGnOrjoT4BDgcWSNgF3ALeUzL8SuFPStyLiK5KuAx6Q1A3YBHwecDCZ2U5rzJgxzJw5k5dffpmxY8dy9913s2bNGurq6ujRowc1NTVb/b3RLrvswltvvfX2+6b5EcGQIUN48skny15n7oIJICKmAlPbmN87/VwOHJWmNwNXp1dp35qSt58paZ8GTGtt3Wl6JtDm30mZmVWKsWPHctlll7F27Vrmzp3L9OnT2WeffejRowdz5szhpZe2/r/5QQcdxNNPP83GjRt58803efDBBznppJM47LDDWLNmDU8++STHH388mzZt4rnnnmPIkCE7XGcug6nSDB3Yl1p/nYuZbYMsvgJqyJAhvPHGGwwcOJABAwYwbtw4Ro8ezdChQykUChx++OFbLXPAAQdw/vnnc9RRRzFo0CBGjBgBwK677srMmTOZMGECDQ0NbN68mauuuqoswaSOXuiy1hUKhfCDAs2sLc888wxHHHFE1mVkpqX9l1QXEYXmfSvprjwzM6sCDiYzM8sVB5OZWRep1ksn27rfDiYzsy7Qq1cvXn311aoLp6bnMfXq1avDy/iuPDOzLrD//vuzYsUK1qxZk3UpXa7pCbYd5WAyM+sCPXr06PATXKudT+WZmVmuOJjMzCxXHExmZpYrvsZUBvUrG8r+NEqzvMriq3SsunjEZGZmuVJ1wSTpvyXtmXUdZmbWsqo6lZeeYDsqIt5qt7OZmWVipx8xSaqRtEzSz4AlQKOk/mnehZIWS1ok6a7UtrekX0qan14nZlm/mVm1qZYR02Dgooj4g6TlAJKGANcBJ0TE2pJHuH8f+G5EPCbpQOB+YKvvqpc0HhgP0P1de3fBLpiZVYdqCaaXIuIPzdpGAjMiYi1ARLyW2k8Fjiye9QPgXZJ6R8S60oUjYjIwGaDngMHV9eVXZmadqFqC6R/b0LcbcFxEbGi3p5mZld1Of42pDQ8BYyT1Ayg5lfcAcEVTJ0nDM6jNzKxqVW0wRcRS4EZgrqRFwM1p1gSgkG6KeBq4PKsazcyqkart2SCdoVAoRG1tbdZlmJlVFEl1EVFo3l61IyYzM8snB5OZmeWKg8nMzHLFwWRmZrniYDIzs1xxMJmZWa44mMzMLFccTGZmlisOJjMzyxUHk5mZ5Uq1fLt4p6pf2UDNxNlZl2GWmeU3nZl1CbYT8YjJzMxyxcFkZma5UjHBJOksSRPLtK5Jkq4px7rMzKy8KuIak6RdImIWMCvD7W/OYttmZtWmy0dMki5MD+FbJOkuSTWSHkptD0o6MPWbIul2SfOAb0m6WNItad5oSfMkPSXp95L2Te2TJN0p6WFJf5Y0oWS710p6TtJjwGEl7YdI+q2kOkmPSjq8pe134SEyM6tqXTpikjQEuA44ISLWpseZTwWmRsRUSZcAPwDOSYvsn/o2Srq4ZFWPAcdFREj638BXgC+leYcDJwN9gGWSbgPeC1wADKe4zwuAutR/MnB5RDwv6V+AHwEjm2+/hX0ZD4wH6P6uvXfksJiZWYmuPpU3EpgREWsBIuI1SccDH0/z72LL0cmMlkKBYmBMkzQA2BV4sWTe7IjYCGyUtBrYF3g/cG9ErAeQNCv97A2cAMyQ1LR8zw5sn4iYTDHU6DlgsB8DbGZWJnm/xvSPVtp/CNwcEbMkfQiYVDJvY8l0I23vYzfg9YgYvo3bNzOzTtLV15geAsZI6geQTuU9QfE0G8A44NEOrKcvsDJNX9SB/o8A50jaTVIfYDRARPwdeFHSmFSPJA3r6M6YmVn5demIKSKWSroRmCupEXgKuAL4qaQvA2uAz3RgVZMonn77G8WwG9TOdhdImgYsAlYD80tmjwNuk3Qd0AP4RepnZmYZUIQvj+yoQqEQtbW1WZdhZlZRJNVFRKF5e8X8ga2ZmVUHB5OZmeWKg8nMzHLFwWRmZrniYDIzs1xxMJmZWa44mMzMLFccTGZmlisOJjMzyxUHk5mZ5Urev128ItSvbKBm4uysyzDL3PKbzsy6BNsJeMRkZma54mAyM7NcyTSYJJ0j6cgO9Jsi6bwW2j8k6ddlrKcg6Qdp+mJJt5Rr3WZm1jFZj5jOAdoNpq4SEbURMSHrOszMqlmbwSTpJkmfL3k/SdI1kr4sab6kxZK+XjL/q5KWSXpM0s8lXZPaD5H0W0l1kh6VdLikE4CzgG9LWpj6XJbWu0jSLyXtXlLOqZJqJT0naVQLte4h6U5Jf5T0lKSz29ivXpJ+Kqk+9T05tXd4BCZpfKqntnF9Q0cWMTOzDmhvxDQNOL/k/fkUnzI7GHgfMBw4RtIHJB0LnAsMA84ASh/+NBm4IiKOAa4BfhQRTwCzgC9HxPCI+BNwT0QcGxHDgGeAS0vWUZO2eSZwu6RezWq9FngoIt4HnEwx8PZoZb8+D0REDAU+AUxtYX1tiojJEVGIiEL33ftuy6JmZtaGNm8Xj4inJO0j6T3A3sDfgKHARyg+Fh2gN8Wg6gPcFxEbgA2SfgUgqTdwAsVHoTetumcrmzxK0g3Anmm995fMmx4RbwHPS/ozcHizZT8CnNU0SgN6AQdSDLjmTgJ+mPbxWUkvAYe2dSz
},
"metadata": {
"needs_background": "light"
}
}
],
"source": [
"thai_ingredient_df = create_ingredient_df(thai_df)\r\n",
"thai_ingredient_df.head(10).plot.barh()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7fa690ff1dd8>"
]
},
"metadata": {},
"execution_count": 15
},
{
"output_type": "display_data",
"data": {
"text/plain": "<Figure size 432x288 with 1 Axes>",
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Created with matplotlib (https://matplotlib.org/) -->\n<svg height=\"248.518125pt\" version=\"1.1\" viewBox=\"0 0 422.776562 248.518125\" width=\"422.776562pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n <defs>\n <style type=\"text/css\">\n*{stroke-linecap:butt;stroke-linejoin:round;white-space:pre;}\n </style>\n </defs>\n <g id=\"figure_1\">\n <g id=\"patch_1\">\n <path d=\"M 0 248.518125 \nL 422.776562 248.518125 \nL 422.776562 0 \nL 0 0 \nz\n\" style=\"fill:none;\"/>\n </g>\n <g id=\"axes_1\">\n <g id=\"patch_2\">\n <path d=\"M 80.776563 224.64 \nL 415.576563 224.64 \nL 415.576563 7.2 \nL 80.776563 7.2 \nz\n\" style=\"fill:#ffffff;\"/>\n </g>\n <g id=\"patch_3\">\n <path clip-path=\"url(#p253e0f3736)\" d=\"M 80.776563 219.204 \nL 399.633705 219.204 \nL 399.633705 208.332 \nL 80.776563 208.332 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_4\">\n <path clip-path=\"url(#p253e0f3736)\" d=\"M 80.776563 197.46 \nL 329.555212 197.46 \nL 329.555212 186.588 \nL 80.776563 186.588 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_5\">\n <path clip-path=\"url(#p253e0f3736)\" d=\"M 80.776563 175.716 \nL 287.508117 175.716 \nL 287.508117 164.844 \nL 80.776563 164.844 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_6\">\n <path clip-path=\"url(#p253e0f3736)\" d=\"M 80.776563 153.972 \nL 276.996343 153.972 \nL 276.996343 143.1 \nL 80.776563 143.1 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_7\">\n <path clip-path=\"url(#p253e0f3736)\" d=\"M 80.776563 132.228 \nL 236.701209 132.228 \nL 236.701209 121.356 \nL 80.776563 121.356 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_8\">\n <path clip-path=\"url(#p253e0f3736)\" d=\"M 80.776563 110.484 \nL 234.949247 110.484 \nL 234.949247 99.612 \nL 80.776563 99.612 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_9\">\n <path clip-path=\"url(#p253e0f3736)\" d=\"M 80.776563 88.74 \nL 229.69336 88.74 \nL 229.69336 77.868 \nL 80.776563 77.868 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_10\">\n <path clip-path=\"url(#p253e0f3736)\" d=\"M 80.776563 66.996 \nL 224.437473 66.996 \nL 224.437473 56.124 \nL 80.776563 56.124 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_11\">\n <path clip-path=\"url(#p253e0f3736)\" d=\"M 80.776563 45.252 \nL 205.165887 45.252 \nL 205.165887 34.38 \nL 80.776563 34.38 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_12\">\n <path clip-path=\"url(#p253e0f3736)\" d=\"M 80.776563 23.508 \nL 192.902151 23.508 \nL 192.902151 12.636 \nL 80.776563 12.636 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"matplotlib.axis_1\">\n <g id=\"xtick_1\">\n <g id=\"line2d_1\">\n <defs>\n <path d=\"M 0 0 \nL 0 3.5 \n\" id=\"mf9f0508e34\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n </defs>\n <g>\n <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"80.776563\" xlink:href=\"#mf9f0508e34\" y=\"224.64\"/>\n </g>\n </g>\n <g id=\"text_1\">\n <!-- 0 -->\n <defs>\n <path d=\"M 31.78125 66.40625 \nQ 24.171875 66.40625 20.328125 58.90625 \nQ 16.5 51.421875 16.5 36.375 \nQ 16.5 21.390625 20.328125 13.890625 \nQ 24.171875 6.390625 31.78125 6.390625 \nQ 39.453125 6.390625 43.28125 13.890625 \nQ 47.125 21.390625 47.125 36.375 \nQ 47.125 51.421875 43.28125 58.90625 \nQ 39.453125 66.40625 31.78125 66.40625 \nz\nM 31.78125 74.21875 \nQ 44.046875 74.21875 50.515625 64.515625 \nQ 56.984375 54.828125 56.984375 36.375 \nQ 56.984375 17.96875 50.515625 8.265625 \nQ 44.046875 -1.421875 31.78125 -1.421875 \nQ 19.53125 -1.421875 13.0625 8.265625 \nQ 6.59375 17.96875 6.59375 36.375 \nQ 6.59375 54.828125 13.0625 64.515625 \nQ 19.53125 74.21875 31.78125 74.21875 \nz\n\" id=\"DejaVuSans-48\"/>\n </defs>\n <g transform=\"translate(77.59531
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAaYAAAD4CAYAAACngkIwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAdqUlEQVR4nO3de5xXdb3v8dcbREhBvICJeBkvKAocbmN57eb9btsLFpmkD3nUMT1mdg4dtWNnu/fW3NXOtHQ6JewyE1DTorwcNDVF5TfcBi9gKZ5AVLAaRcQQP+eP9Z36McwMg/5m1vrN7/18PObBWt91+X3WmoE337XWrK8iAjMzs6LolXcBZmZm5RxMZmZWKA4mMzMrFAeTmZkVioPJzMwKZau8C+gJBg0aFHV1dXmXYWZWNQYNGsR99913X0Qc13qZg6kC6urqKJVKeZdhZlZVJA1qq92X8szMrFAcTGZmVigOJjMzKxTfYzIz6wbr169n+fLlrFu3Lu9Sul2/fv3Ybbfd6NOnT6fWdzBVQNOKZuqmzMq7DHufll1zYt4lWA1Yvnw5AwYMoK6uDkl5l9NtIoLXX3+d5cuXs9dee3VqG1/KMzPrBuvWrWOnnXaqqVACkMROO+20RT3FmgomSb+RtH3edZhZbaq1UGqxpcddU5fyIuKEvGswM7OO9ahgkvQ14J2IuF7Sd4HREfEpSZ8CzgcOA+qB/sBvgd8DhwIrgFMj4m1J+wA3AoOBtcAFEfFcDodjZj1Ype9LV/peaf/+/VmzZk1F99lZPe1S3qPAEWm6HugvqU9qe6TVusOAGyNiBPBX4PTU3gBcFBHjgcuAH7T1QZImSypJKm1Y21zhwzAzq109LZgagfGStgPeAeaQBdQRZKFV7sWIWFC2XZ2k/mQ9qBmSFgA3A0Pa+qCIaIiI+oio773NwC44FDOzypkyZQo33njj3+evuuoqrr76ao488kjGjRvHqFGjuPvuuzfZ7ne/+x0nnXTS3+e//OUvM3XqVAAaGxv5+Mc/zvjx4zn22GNZuXJlRWrtUcEUEeuBF4FJwONkYfRJYF/g2Varv1M2vYHssmYv4K8RMabs64AuL9zMrItNmDCB6dOn/31++vTpnHvuudx1113MmzePhx56iK9+9atERKf2t379ei666CJmzpxJY2Mj5513HpdffnlFau1R95iSR8kuwZ0HNAHfARojIjb3ZEhEvCHpRUlnRsQMZRv8l4hY2OVVm5l1obFjx/Laa6/x8ssvs2rVKnbYYQd22WUXvvKVr/DII4/Qq1cvVqxYwauvvsouu+yy2f0tWbKExYsXc/TRRwOwYcMGhgxp8wLTFuupwXQ5MCci3pK0jk0v43VkIvBDSVcAfYBfAA4mM6t6Z555JjNnzuSVV15hwoQJ3HrrraxatYrGxkb69OlDXV3dJr9vtNVWW/Hee+/9fb5leUQwYsQI5syZU/E6e1wwRcRsskBpmd+vbLouTa4GRpa1/3vZ9IvAJuODmJlVuwkTJnDBBRewevVqHn74YaZPn87OO+9Mnz59eOihh3jppZc22WbPPffkmWee4Z133uHtt99m9uzZHH744ey///6sWrWKOXPmcMghh7B+/XqWLl3KiBEjPnCdPS6Y8jBq6EBKfq2NmW2BPF6FNWLECN58802GDh3KkCFDmDhxIieffDKjRo2ivr6e4cOHb7LN7rvvzllnncXIkSPZa6+9GDt2LABbb701M2fO5OKLL6a5uZl3332XSy65pCLBpM7e6LL21dfXhwcKNLOOPPvssxxwQO0+S9XW8UtqjIj61uv2qKfyzMys+jmYzMysUBxMZmbdpFZvnWzpcTuYzMy6Qb9+/Xj99ddrLpxaxmPq169fp7fxU3lmZt1gt912Y/ny5axatSrvUrpdywi2neVgMjPrBn369On0CK61zpfyzMysUBxMZmZWKA4mMzMrFN9jqoCmFc0VH43SiiuPV8mY1RL3mMzMrFBqNpgkTZJ0Q5r+oqTP512TmZnV6KU8SRsdd0TclFctZma2saoOJklXAp8DVgF/AhqBZmAysDXwB+CciFgraSqwDhgLPAYsKtvPVcCaiPh3SfsCNwGDyYZcPzMi/thdx2RmVuuq9lKepIOA04HRwPFAy6vT74yIgyJiNPAscH7ZZrsBh0bEpR3s+lbgxrT9ocDKdj5/sqSSpNKGtc0f8GjMzKxFNfeYDgPujoh1wDpJv0rtIyVdDWwP9AfuK9tmRkRsaG+HkgYAQyPiLoC07zZFRAPQANB3yLDaevmVmVkXqtoeUwemAl+OiFHAN4HyNwe+lUtFZmbWadUcTI8BJ0vqJ6k/cFJqHwCslNQHmLglO4yIN4Hlkk4DkNRX0jaVLNrMzDpWtcEUEXOBe8geYvgt0ET24MOVwJNkwfXc+9j1OcDFkhYBjwO7VKRgMzPrFFXz2CCS+kfEmtSreQSYHBHzuruO+vr6KJVK3f2xZmZVTVJjRNS3bq/mhx8AGiQdSHYfaVoeoWRmZpVV1cEUEZ/NuwYzM6usqr3HZGZmPZODyczMCsXBZGZmheJgMjOzQnEwmZlZoTiYzMysUBxMZmZWKA4mMzMrlKr+BduiaFrRTN2UWXmXYVVo2TUn5l2CWeG4x2RmZoXiYDIzs0Kp+mCS9L8lHZV3HWZmVhlVf48pIr7R1Z8hqXdHQ7KbmVnlVFWPSdKVkpZI+r2k2yRdJmmqpDPS8mWSvilpnqQmScNT+2BJD0h6WtL/kfSSpEFp2eckPSVpgaSbJfVO7WskfVvSQuCQ3A7azKzGVE0wSToIOB0YDRwPbDK4VLI6IsYBPwQuS23/C3gwIkYAM4E90j4PACYAh0XEGGAD/xiOfVvgyYgYHRG/b6OeyZJKkkob1jZX5BjNzKy6LuUdBtwdEeuAdZJ+1c56d6Y/G4F/StOHA58GiIh7Jf0ltR8JjAfmSgL4EPBaWrYBuKO9YiKiAWgA6DtkWPUOA2xmVjDVFEyd9U76cwObPz6RjXz79TaWrfN9JTOz7lc1l/KAx4CTJfWT1B84aQu3PQtA0jHADql9NnCGpJ3Tsh0l7VnBms3MbAtVTY8pIuZKugdYBLwKNAGdvbnzTeA2SecAc4BXgDcjYrWkK4D7JfUC1gMXAi9V/ADMzKxTFFE9t0ck9Y+INZK2AR4BJkfEvE5s1xfYEBHvSjoE+GF62KEi6uvro1QqVWp3ZmY1QVJjRGzyIFvV9JiSBkkHAv3I7g1tNpSSPYDpqVf0N+CCrirQzMw+mKoKpoj47Pvc7nlgbIXLMTOzLlBNDz+YmVkNcDCZmVmhOJjMzKxQHExmZlYoDiYzMysUB5OZmRWKg8nMzArFwWRmZoVSVb9gW1RNK5qpmzIr7zKsB1l2zYl5l2CWG/eYzMysUHpcMEmqk7Q4TX9C0q/T9CmSpuRbnZmZbU7NXMqLiHuAe/Kuw8zMOla4HpOkbSXNkrRQ0mJJEyQdJOnx1PaUpAGpZ/SopHnp69DN7HeSpBvSdJ2kByUtkjRb0h6pfaqk69NnvSDpjO44ZjMz+4ci9piOA16OiBMBJA0E5gMT0mCB2wFvA68BR0fEOknDgNuATcb1aMf3yYbNmCbpPOB64LS0bAhwODCcrIc1s60dSJoMTAbovd3gLT9KMzNrU+F6TGQj0x4t6VpJR5CNpbQyIuYCRMQbEfEu0Af4kaQmYAZw4BZ8xiHAz9P0T8mCqMUvI+K9iHgG+HB7O4iIhoioj4j63tsM3IKPNjOzjhSuxxQRSyWNA04ArgYebGfVr5ANsT6aLGDXVaiEd8qmVaF9mplZJxWuxyRpV2BtRPwMuA74KDBE0kFp+QBJWwEDyXpS7wHnAL234GMeB85O0xOBRytVv5mZfTCF6zEBo4DrJL0HrAe+RNZz+b6kD5HdXzoK+AFwh6TPA/cCb23BZ1wE3CLpa8Aq4AsVrN/MzD4ARUTeNVS9vkOGxZBz/yPvMqwH8ZsfrBZIaoyITR5aK2KPqeqMGjqQkv8hMTOriMLdYzIzs9rmYDIzs0JxMJmZWaE4mMzMrFAcTGZmVigOJjMzKxQHk5m
},
"metadata": {
"needs_background": "light"
}
}
],
"source": [
"japanese_ingredient_df = create_ingredient_df(japanese_df)\r\n",
"japanese_ingredient_df.head(10).plot.barh()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7fa660176b70>"
]
},
"metadata": {},
"execution_count": 16
},
{
"output_type": "display_data",
"data": {
"text/plain": "<Figure size 432x288 with 1 Axes>",
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Created with matplotlib (https://matplotlib.org/) -->\n<svg height=\"248.518125pt\" version=\"1.1\" viewBox=\"0 0 422.776562 248.518125\" width=\"422.776562pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n <defs>\n <style type=\"text/css\">\n*{stroke-linecap:butt;stroke-linejoin:round;white-space:pre;}\n </style>\n </defs>\n <g id=\"figure_1\">\n <g id=\"patch_1\">\n <path d=\"M 0 248.518125 \nL 422.776562 248.518125 \nL 422.776562 0 \nL 0 0 \nz\n\" style=\"fill:none;\"/>\n </g>\n <g id=\"axes_1\">\n <g id=\"patch_2\">\n <path d=\"M 80.776563 224.64 \nL 415.576563 224.64 \nL 415.576563 7.2 \nL 80.776563 7.2 \nz\n\" style=\"fill:#ffffff;\"/>\n </g>\n <g id=\"patch_3\">\n <path clip-path=\"url(#pf2447088c0)\" d=\"M 80.776563 219.204 \nL 399.633705 219.204 \nL 399.633705 208.332 \nL 80.776563 208.332 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_4\">\n <path clip-path=\"url(#pf2447088c0)\" d=\"M 80.776563 197.46 \nL 329.12734 197.46 \nL 329.12734 186.588 \nL 80.776563 186.588 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_5\">\n <path clip-path=\"url(#pf2447088c0)\" d=\"M 80.776563 175.716 \nL 327.022673 175.716 \nL 327.022673 164.844 \nL 80.776563 164.844 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_6\">\n <path clip-path=\"url(#pf2447088c0)\" d=\"M 80.776563 153.972 \nL 304.923663 153.972 \nL 304.923663 143.1 \nL 80.776563 143.1 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_7\">\n <path clip-path=\"url(#pf2447088c0)\" d=\"M 80.776563 132.228 \nL 264.934978 132.228 \nL 264.934978 121.356 \nL 80.776563 121.356 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_8\">\n <path clip-path=\"url(#pf2447088c0)\" d=\"M 80.776563 110.484 \nL 255.463974 110.484 \nL 255.463974 99.612 \nL 80.776563 99.612 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_9\">\n <path clip-path=\"url(#pf2447088c0)\" d=\"M 80.776563 88.74 \nL 253.359306 88.74 \nL 253.359306 77.868 \nL 80.776563 77.868 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_10\">\n <path clip-path=\"url(#pf2447088c0)\" d=\"M 80.776563 66.996 \nL 213.370622 66.996 \nL 213.370622 56.124 \nL 80.776563 56.124 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_11\">\n <path clip-path=\"url(#pf2447088c0)\" d=\"M 80.776563 45.252 \nL 204.951951 45.252 \nL 204.951951 34.38 \nL 80.776563 34.38 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_12\">\n <path clip-path=\"url(#pf2447088c0)\" d=\"M 80.776563 23.508 \nL 202.847284 23.508 \nL 202.847284 12.636 \nL 80.776563 12.636 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"matplotlib.axis_1\">\n <g id=\"xtick_1\">\n <g id=\"line2d_1\">\n <defs>\n <path d=\"M 0 0 \nL 0 3.5 \n\" id=\"m21dcd55c82\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n </defs>\n <g>\n <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"80.776563\" xlink:href=\"#m21dcd55c82\" y=\"224.64\"/>\n </g>\n </g>\n <g id=\"text_1\">\n <!-- 0 -->\n <defs>\n <path d=\"M 31.78125 66.40625 \nQ 24.171875 66.40625 20.328125 58.90625 \nQ 16.5 51.421875 16.5 36.375 \nQ 16.5 21.390625 20.328125 13.890625 \nQ 24.171875 6.390625 31.78125 6.390625 \nQ 39.453125 6.390625 43.28125 13.890625 \nQ 47.125 21.390625 47.125 36.375 \nQ 47.125 51.421875 43.28125 58.90625 \nQ 39.453125 66.40625 31.78125 66.40625 \nz\nM 31.78125 74.21875 \nQ 44.046875 74.21875 50.515625 64.515625 \nQ 56.984375 54.828125 56.984375 36.375 \nQ 56.984375 17.96875 50.515625 8.265625 \nQ 44.046875 -1.421875 31.78125 -1.421875 \nQ 19.53125 -1.421875 13.0625 8.265625 \nQ 6.59375 17.96875 6.59375 36.375 \nQ 6.59375 54.828125 13.0625 64.515625 \nQ 19.53125 74.21875 31.78125 74.21875 \nz\n\" id=\"DejaVuSans-48\"/>\n </defs>\n <g transform=\"translate(77.59531
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAaYAAAD4CAYAAACngkIwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAfeElEQVR4nO3deZwV1Z338c8XbCEKYlSMiEurISECsl1NULO5xD2aCYgT4hLzkkcn4pgEnyGPJoMTM+OSbTRGJRmFUUcF1GjCqHGUuBL1NggNKmgiPmGJ4tZiEILwmz/qtN603U033O5bl/6+X69+dd1Tp079Thfxl3Oqbh1FBGZmZnnRrdIBmJmZlXJiMjOzXHFiMjOzXHFiMjOzXHFiMjOzXNmm0gFsDXbZZZeora2tdBhmZlWlrq7u1Yjo27TciakMamtrKRaLlQ7DzKyqSHqpuXJP5ZmZWa44MZmZWa44MZmZWa74HpOZWSdYv349y5YtY+3atZUOpdP17NmTPfbYg5qamjbVd2Iqg/rlDdROmlXpMKyDLb30uEqHYFVs2bJl9O7dm9raWiRVOpxOExG89tprLFu2jH322adNx3gqz8ysE6xdu5add965SyUlAEnsvPPO7RopOjGZmXWSrpaUGrW3305MZmaWK7m4xyTpNGAiEMACYDpwEbAt8BowDlgFLAYOjohVkroBS4BRqZlrgb3S9vkR8Zikyals3/T7pxFxpaRa4B7gUeBgYDlwYkS8I2k/4GqgL7AGOCsinuu43ptZV1Tu+9Llvgfaq1cv3n777bK22VYVHzFJGkSWhA6LiKHAP5IljE9FxHDgVuD/RsRG4CayJAVwBDA/IlYB/w78JCIOBL4M/LLkFAOBo4CDgH+W1PhYyADg6ogYBLyZjgOYAkyIiJFkyfLnLcQ9XlJRUnHDmoYt/juYmVmm4okJOAyYERGvAkTE68AewH2S6oELgEGp7vXAaWn7TOCGtH0E8DNJTwN3AztI6pX2zYqIdan9V4CPpPIXI+LptF0H1KZjDgZmpLauA/o1F3RETImIQkQUum/XZwv/BGZmHWvSpElcffXV732ePHkyl1xyCYcffjgjRoxgyJAh3HXXXR847ne/+x3HH3/8e5/PPfdcpk6dCkBdXR2f/exnGTlyJEcddRQrV64sS6x5SEzNuQr4WUQMAf4P0BMgIv4EvCzpMLIR0D2pfjeyEdaw9NM/IhrHoOtK2t3A+9OXzZV3A94saWdYRHyiIzpoZtaZxo4dy/Tp09/7PH36dE4//XTuvPNO5s6dy+zZs/n2t79NRLSpvfXr1zNhwgRmzpxJXV0dZ555JhdeeGFZYs3DPaYHgTsl/TgiXpO0E9CH7L4PwOlN6v+SbErvxojYkMp+C0wArgCQNKxkNNRmEfGWpBcljYmIGcoeJTkgIuZvRr/MzHJj+PDhvPLKK6xYsYJVq1bx4Q9/mN12241vfvObPPzww3Tr1o3ly5fz8ssvs9tuu22yvcWLF7Nw4UKOPPJIADZs2EC/fs1OMLVbxRNTRCyS9APgIUkbgHnAZLLptDfIElfpt7LuJpvCu6Gk7DzgakkLyPr0MHD2ZoY0DrhG0kVADdk9LicmM6t6Y8aMYebMmfz5z39m7Nix3HzzzaxatYq6ujpqamqora39wPeNttlmGzZu3Pje58b9EcGgQYOYM2dO2eOseGICiIhpwLQmxR+c7MwMJXvo4b0n5dL9o7HNtDu5yefBJR8Hl5T/sGT7ReDotsZuZlYtxo4dy1lnncWrr77KQw89xPTp09l1112pqalh9uzZvPTSB1eh2HvvvXnmmWdYt24d77zzDg888ACHHnooH//4x1m1ahVz5sxh1KhRrF+/niVLljBo0KBmztw+uUhMbSVpEnAO7z+ZlwtD+veh6NfVmFk7VOIVV4MGDWL16tX079+ffv36MW7cOE444QSGDBlCoVBg4MCBHzhmzz335OSTT2bw4MHss88+DB8+HIBtt92WmTNnct5559HQ0MC7777L+eefX5bEpLbe6LKWFQqF8EKBZtaaZ599lk98ous+S9Vc/yXVRUShad28PpVnZmZdlBOTmZnlihOTmVkn6aq3TtrbbycmM7NO0LNnT1577bUul5wa12Pq2bNnm4+pqqfyzMyq1R577MGyZctYtWpVpUPpdI0r2LaVE5OZWSeoqalp8wquXZ2n8szMLFecmMzMLFecmMzMLFd8j6kM6pc3lH01SsuvSrxKxqwr8YjJzMxypcslJkn/LWnHSsdhZmbN61JTeWnhv+MjYuMmK5uZWUVs9SMmSbWSFkv6T2AhsEHSLmnfaZIWSJov6cZU1lfS7ZKeSj+HVDJ+M7OupquMmAYAp0fE7yUtBZA0CLgIODgiXk1LugP8O/CTiHhU0l7AfcAH3lUvaTwwHqD7Dn07oQtmZl1DV0lML0XE75uUHQbMSKvfEhGvp/IjgP2zWT8AdpDUKyLeLj04IqYAUwB69BvQtV5+ZWbWgbpKYvpLO+p2Az4VEWs3WdPMzMpuq7/H1IoHgTGSdgYomcr7LTChsZKkYRWIzcysy+qyiSkiFgE/AB6SNB/4cdp1HlBID0U8A5xdqRjNzLoidbW1QTpCoVCIYrFY6TDMzKqKpLqIKDQt77IjJjMzyycnJjMzyxUnJjMzyxUnJjMzyxUnJjMzyxUnJjMzyxUnJjMzyxUnJjMzyxUnJjMzyxUnJjMzy5Wu8nbxDlW/vIHaSbMqHYblxNJLj6t0CGZVzSMmMzPLFScmMzPLlapITJJ2lzSz0nGYmVnHq4rEFBErImJ0Jc4tyffhzMw6Ue4Sk6RLJX2j5PNkSRMlLUyfz5B0h6R7JT0v6fKSul+QNEfSXEkzJPVK5cdKek5SnaQrJf0mlR+U6s+T9Likj5ec425JDwIPdOofwMysi8tdYgJuA04u+Xwy8ESTOsOAscAQYKykPSXtAlwEHBERI4Ai8C1JPYHrgGMiYiTQt6Sd54BPR8Rw4HvAv5bsGwGMjojPNhekpPGSipKKG9Y0bG5fzcysidxNU0XEPEm7StqdLIm8AfypSbUHIqIBIC1/vjewI7A/8JgkgG2BOcBA4I8R8WI69hZgfNruA0yTNAAIoKbkHPdHxOutxDkFmALQo98ALwNsZlYmuUtMyQxgNLAb2QiqqXUl2xvI+iGyZPL3pRUlDWvlPN8HZkfElyTVAr8r2feXdkdtZmZbLI9TeZAlo1PIktOMNh7ze+AQSR8FkLS9pI8Bi4F9U+KBbAqwUR9gedo+Y8tCNjOzcshlYoqIRUBvYHlErGzjMavIksstkhaQpvEi4h3gH4B7JdUBq4HGm0KXA/8maR75HT2amXUpitj6b49I6hURbyu7+XQ18HxE/KRc7RcKhSgWi+VqzsysS5BUFxGFpuW5HDF1gLMkPQ0sIpu+u67C8ZiZWQu6xPRVGh2VbYRkZmYdp6uMmMzMrEo4MZmZWa44MZmZWa44MZmZWa44MZmZWa44MZmZWa44MZmZWa44MZmZWa50iS/YdrT65Q3UTppV6TDM2mTppcdVOgSzVnnEZGZmuVL1iUnS+ZK2K1Nbn2tcdt3MzCqj6hMTcD7QrsQkqXsHxWJmZluoqhJTWvxvlqT5khZK+mdgd2C2pNmpzjWSipIWSbq45Nilki6TNBcYI+mjkv4ntTVX0n6pai9JMyU9J+nmtFSGmZl1kmp7+OFoYEVEHAcgqQ/wNeDzEfFqqnNhRLyeRkUPSDogIhakfa9FxIh07BPApRFxp6SeZEl6T2A4MAhYATwGHAI82jQQSeOB8QDdd+jbMb01M+uCqmrEBNQDR6aRz6cjoqGZOienUdE8sgSzf8m+2wAk9Qb6R8SdABGxNiLWpDpPRsSyiNgIPA3UNhdIREyJiEJEFLpv16csnTMzsyobMUXEEkkjgGOBSyQ9ULpf0j7ARODAiHhD0lSgZ0mVv7ThNOtKtjdQZX8jM7NqV1UjJkm7A2si4ibgCmAEsBronarsQJZ8GiR9BDimuXYiYjWwTNJJqd0e5Xqyz8zMtky1jQaGAFdI2gisB84BRgH
},
"metadata": {
"needs_background": "light"
}
}
],
"source": [
"chinese_ingredient_df = create_ingredient_df(chinese_df)\r\n",
"chinese_ingredient_df.head(10).plot.barh()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7fa6c160d160>"
]
},
"metadata": {},
"execution_count": 17
},
{
"output_type": "display_data",
"data": {
"text/plain": "<Figure size 432x288 with 1 Axes>",
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Created with matplotlib (https://matplotlib.org/) -->\n<svg height=\"248.518125pt\" version=\"1.1\" viewBox=\"0 0 422.776562 248.518125\" width=\"422.776562pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n <defs>\n <style type=\"text/css\">\n*{stroke-linecap:butt;stroke-linejoin:round;white-space:pre;}\n </style>\n </defs>\n <g id=\"figure_1\">\n <g id=\"patch_1\">\n <path d=\"M 0 248.518125 \nL 422.776562 248.518125 \nL 422.776562 0 \nL 0 0 \nz\n\" style=\"fill:none;\"/>\n </g>\n <g id=\"axes_1\">\n <g id=\"patch_2\">\n <path d=\"M 80.776563 224.64 \nL 415.576563 224.64 \nL 415.576563 7.2 \nL 80.776563 7.2 \nz\n\" style=\"fill:#ffffff;\"/>\n </g>\n <g id=\"patch_3\">\n <path clip-path=\"url(#p326df5e6c6)\" d=\"M 80.776563 219.204 \nL 399.633705 219.204 \nL 399.633705 208.332 \nL 80.776563 208.332 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_4\">\n <path clip-path=\"url(#p326df5e6c6)\" d=\"M 80.776563 197.46 \nL 349.287841 197.46 \nL 349.287841 186.588 \nL 80.776563 186.588 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_5\">\n <path clip-path=\"url(#p326df5e6c6)\" d=\"M 80.776563 175.716 \nL 343.988276 175.716 \nL 343.988276 164.844 \nL 80.776563 164.844 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_6\">\n <path clip-path=\"url(#p326df5e6c6)\" d=\"M 80.776563 153.972 \nL 333.389147 153.972 \nL 333.389147 143.1 \nL 80.776563 143.1 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_7\">\n <path clip-path=\"url(#p326df5e6c6)\" d=\"M 80.776563 132.228 \nL 331.622625 132.228 \nL 331.622625 121.356 \nL 80.776563 121.356 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_8\">\n <path clip-path=\"url(#p326df5e6c6)\" d=\"M 80.776563 110.484 \nL 324.556539 110.484 \nL 324.556539 99.612 \nL 80.776563 99.612 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_9\">\n <path clip-path=\"url(#p326df5e6c6)\" d=\"M 80.776563 88.74 \nL 301.591758 88.74 \nL 301.591758 77.868 \nL 80.776563 77.868 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_10\">\n <path clip-path=\"url(#p326df5e6c6)\" d=\"M 80.776563 66.996 \nL 300.708498 66.996 \nL 300.708498 56.124 \nL 80.776563 56.124 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_11\">\n <path clip-path=\"url(#p326df5e6c6)\" d=\"M 80.776563 45.252 \nL 244.179807 45.252 \nL 244.179807 34.38 \nL 80.776563 34.38 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_12\">\n <path clip-path=\"url(#p326df5e6c6)\" d=\"M 80.776563 23.508 \nL 232.697417 23.508 \nL 232.697417 12.636 \nL 80.776563 12.636 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"matplotlib.axis_1\">\n <g id=\"xtick_1\">\n <g id=\"line2d_1\">\n <defs>\n <path d=\"M 0 0 \nL 0 3.5 \n\" id=\"m4031011a7f\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n </defs>\n <g>\n <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"80.776563\" xlink:href=\"#m4031011a7f\" y=\"224.64\"/>\n </g>\n </g>\n <g id=\"text_1\">\n <!-- 0 -->\n <defs>\n <path d=\"M 31.78125 66.40625 \nQ 24.171875 66.40625 20.328125 58.90625 \nQ 16.5 51.421875 16.5 36.375 \nQ 16.5 21.390625 20.328125 13.890625 \nQ 24.171875 6.390625 31.78125 6.390625 \nQ 39.453125 6.390625 43.28125 13.890625 \nQ 47.125 21.390625 47.125 36.375 \nQ 47.125 51.421875 43.28125 58.90625 \nQ 39.453125 66.40625 31.78125 66.40625 \nz\nM 31.78125 74.21875 \nQ 44.046875 74.21875 50.515625 64.515625 \nQ 56.984375 54.828125 56.984375 36.375 \nQ 56.984375 17.96875 50.515625 8.265625 \nQ 44.046875 -1.421875 31.78125 -1.421875 \nQ 19.53125 -1.421875 13.0625 8.265625 \nQ 6.59375 17.96875 6.59375 36.375 \nQ 6.59375 54.828125 13.0625 64.515625 \nQ 19.53125 74.21875 31.78125 74.21875 \nz\n\" id=\"DejaVuSans-48\"/>\n </defs>\n <g transform=\"translate(77.595
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAaYAAAD4CAYAAACngkIwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAfTElEQVR4nO3de5RXdb3/8ecLHEEdQkU0fngZNBRFBGH0eMtMTSs17QjiL0tMj/ws01ylHUrr4MnWsU4Xu1iGHsPMo1zSBcVKLe+aF2ZAbhJqggWSgOUkGoTj+/fH/ox8HecK35m9v8zrsdZ3zd6f/dmf/d6f9WXefPbesz+KCMzMzIqiV94BmJmZlXJiMjOzQnFiMjOzQnFiMjOzQnFiMjOzQtku7wC2BbvttlvU1NTkHYaZWUWpr69fFxEDm5c7MZVBTU0NdXV1eYdhZlZRJL3YUrkv5ZmZWaE4MZmZWaE4MZmZWaH4HpOZWTfYtGkTK1euZMOGDXmH0u369u3LnnvuSVVVVYfqOzGVwaJVDdRMmpN3GLaNWnHtKXmHYGWwcuVK+vXrR01NDZLyDqfbRASvvPIKK1euZMiQIR3ax5fyzMy6wYYNGxgwYECPSkoAkhgwYECnRopdnpgkXSppqaTbuvpY5SBphaTd8o7DzLY9PS0pNenseXfHpbzPAidGxMpuONbbJG0XEW925zHNzGzrdWliknQDsC/wG0l3APsBBwNVwOSImCXpPOBjwI5p+10R8aW0//qIqE7LY4FTI+I8SfsBtwE7AbOAyyKiWtJxwNeBvwHDgP0lfRK4FNgeeBL4bEQ0SjoJuBroA/wR+HRErC+JfQfgTuDOiLixyzrJzHqkct+XLve9yOrqatavX99+xS7QpZfyIuIi4CXgg2RJ5P6IODyt/7eknVLVUcB4YAQwXtJe7TT9feD7ETECaD4SGw18PiL2l3RgavfoiBgFNALnpEt1V5GN5EYDdcAXStqoBn4F3N5aUpI0UVKdpLrGNxra7wwzM+uQ7nz44SRgkqSngQeBvsDeadt9EdEQERuAZ4B92mnrSGBGWv7fZtueiojlafkEYAwwNx33BLIR3BHAQcBjqXxCs2POAn4WET9vLYCImBIRtRFR23vH/u2Ea2aWr0mTJnH99de/vT558mSuueYaTjjhBEaPHs2IESOYNWvWu/Z78MEHOfXUU99e/9znPsfUqVMBqK+v5wMf+ABjxozh5JNPZvXq1WWJtTsTk4AzI2JU+uwdEUvTto0l9RrZfImxdN73vh08zuvNjnlLyTEPiIjJqfy3JeUHRcQFJfs9BnxYPfVOpZltc8aPH8/06dPfXp8+fToTJkzgrrvuYt68eTzwwAN88YtfJCLaaGWzTZs2cckllzBz5kzq6+s5//zzufLKK8sSa3cmpnuAS5p+2Us6tAP7vCzpQEm9gI+XlD8BnJmWz25j//uAsZJ2T8fcVdI+af+jJb0vle8kaf+S/b5Gdp/q+uYNmplVokMPPZQ1a9bw0ksvsWDBAnbZZRfe+9738pWvfIVDDjmEE088kVWrVvHyyy93qL1ly5axePFiPvShDzFq1CiuueYaVq4szzNu3fkHtl8HrgMWpkSzHDi17V2YBPwaWEt2H6g6lV8G/ELSlcDdQIs3eSLiGUlXAfemY24CLo6IJ9JDF7dL6pOqXwU8W7L754GbJX2r6WEMM7NKNm7cOGbOnMlf/vIXxo8fz2233cbatWupr6+nqqqKmpqad/290Xbbbcdbb7319nrT9ohg+PDhPP7442WPs8sTU0TUlKz+vxa2TwWmlqyfWrI8E5jZQrOrgCMiIiSdDRyQ6j9Idv+qtP1pwLQWjns/cFg78X66hWObmVWk8ePHc+GFF7Ju3Toeeughpk+fzu67705VVRUPPPAAL7747lko9tlnH5555hk2btzIP/7xD+677z6OOeYYDjjgANauXcvjjz/OkUceyaZNm3j22WcZPnz4VsdZqa8kGgP8KF0WfBU4P89gRgzuT51fG2NmnZDHq6aGDx/Oa6+9xuDBgxk0aBDnnHMOp512GiNGjKC2tpZhw4a9a5+99tqLs846i4MPPpghQ4Zw6KHZXZjtt9+emTNncumll9LQ0MCbb77JZZddVpbEpI7e6LLW1dbWhicKNLO2LF26lAMPPDDvMHLT0vlLqo+I2uZ1/a48MzMrFCcmMzMrFCcmM7Nu0lNvnXT2vJ2YzMy6Qd++fXnllVd6XHJqmo+pb9+OviOhcp/KMzOrKHvuuScrV65k7dq1eYfS7ZpmsO0oJyYzs25QVVXV4RlcezpfyjMzs0JxYjIzs0JxYjIzs0LxPaYyWLSqoeyzUZptiTxec2NWbh4xmZlZoTgxmZlZoTgxtUMZ95OZWTcp9C9cSTWS/iDpNklLJc2UtKOkMZIeklQv6R5Jg1L9ByV9X9LTkhZLOjyVT5Z0q6THJT0n6cKSY1whaa6khZKuLjnuMkk/BxYDe+Vx/mZmPVElPPxwAHBBRDwm6WbgYrJp1k+PiLWSxgPfYPOcTDtGxChJxwI3Awen8kOAI4CdgPmS5qRtQ4HDAQGz035/SuUTIuKJloKSNBGYCND7PQPLfc5mZj1WJSSmP0fEY2n5F8BXyBLKb7N5AukNrC6pfztARDws6T2Sdk7lsyLiH8A/JD1AloyOAU4C5qc61WQJ6U/Ai60lpdT+FGAKQJ9BQ3vWy6/MzLpQJSSm5r/0XwOWRMSRHawfbZQL+K+I+GnpBkk1wOudjtTMzLZaoe8xJXtLakpCnwCeAAY2lUmqklQ6l+/4VH4M0BARDan8dEl9JQ0AjgPmAvcA50uqTvsMlrR7l5+RmZm1qhJGTMuAi9P9pWeAH5IllB9I6k92DtcBS1L9DZLmA1Vsvu8EsBB4ANgN+HpEvAS8JOlA4PF0WXA98EmgscvPyszMWqQizw2SLqn9OiIObqdqU/0Hgcsjoq5Z+WRgfUR8u8whAlBbWxt1dXXtVzQzs7dJqo+I2ubllXApz8zMepBCX8qLiBVsfty7I/WPa6V8cnkiMjOzruYRk5mZFYoTk5mZFYoTk5mZFYoTk5mZFYoTk5mZFYoTk5mZFYoTk5mZFYoTk5mZFUqh/8C2Uixa1UDNpDl5h2HWI6y49pS8Q7Au5hGTmZkVihOTmZkVSq6JSdIZkg7qQL2pksa2UH6cpF+XMZ5aST9Iy+dJ+lG52jYzs47Je8R0BtBuYuouEVEXEZfmHYeZWU/WZmKSdK2ki0vWJ0u6XNIVkuZKWijp6pLtX5W0TNKjkm6XdHkq30/S3ZLqJT0iaZiko4CPAf8t6elU58LU7gJJv5S0Y0k4J0qqk/SspFNbiHUnSTdLekrSfEmnt3FefSX9TNKiVPeDqbysIzAzM+u89kZM04CzStbPAtYCQ4HDgVHAGEnHSjoMOBMYCXwEKJ38aQpwSUSMAS4HfhwRvwdmA1dExKiI+CNwZ0QcFhEjgaXABSVt1KRjngLcIKlvs1ivBO6PiMOBD5IlvJ1aOa+LgYiIEcD/BW5pob02SZqYEmVd4xsN7e9gZmYd0ubj4hExX9Lukv4PMBD4GzACOAmYn6pVkyWqfsCsiNhANr35rwAkVQNHATPS9OUAfVo55MGSrgF2Tu3eU7JtekS8BTwn6QVgWLN9TwI+1jRKA/oCe5MluOaOIZuinYj4g6QXgf3b6ovmImIKWcKlz6ChxZ0G2MyswnTk75hmAGOB95KNoPYB/isiflpaSdJlrezfC3g1IkZ14FhTgTMiYoGk84DjSrY1/+XffF3AmRGxrAPHMTOzgurIww/TgLPJktMMslHM+WkkhKTBknYHHgNOS/dvqoFTASLi78BySeNSfUkamdp+jWyk1aQfsFpSFXBOszjGSeolaT9gX6B5AroHuERpWCbp0DbO6ZGm9iXtTzayckIzMyuAdhNTRCwhSxirImJ1RNwL/C/wuKRFwEygX0TMJbtntBD4DbAIaLr5cg5wgaQFwBKg6cGEO4Ar0gMI+wFfBZ4kS3J/aBbKn4CnUtsXpUuGpb4OVAELJS1J6635MdArxT8NOC8
},
"metadata": {
"needs_background": "light"
}
}
],
"source": [
"indian_ingredient_df = create_ingredient_df(indian_df)\r\n",
"indian_ingredient_df.head(10).plot.barh()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7fa6b0e92e80>"
]
},
"metadata": {},
"execution_count": 18
},
{
"output_type": "display_data",
"data": {
"text/plain": "<Figure size 432x288 with 1 Axes>",
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Created with matplotlib (https://matplotlib.org/) -->\n<svg height=\"248.518125pt\" version=\"1.1\" viewBox=\"0 0 466.64375 248.518125\" width=\"466.64375pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n <defs>\n <style type=\"text/css\">\n*{stroke-linecap:butt;stroke-linejoin:round;white-space:pre;}\n </style>\n </defs>\n <g id=\"figure_1\">\n <g id=\"patch_1\">\n <path d=\"M 0 248.518125 \nL 466.64375 248.518125 \nL 466.64375 0 \nL 0 0 \nz\n\" style=\"fill:none;\"/>\n </g>\n <g id=\"axes_1\">\n <g id=\"patch_2\">\n <path d=\"M 124.64375 224.64 \nL 459.44375 224.64 \nL 459.44375 7.2 \nL 124.64375 7.2 \nz\n\" style=\"fill:#ffffff;\"/>\n </g>\n <g id=\"patch_3\">\n <path clip-path=\"url(#p37a9dfbd7c)\" d=\"M 124.64375 219.204 \nL 443.500893 219.204 \nL 443.500893 208.332 \nL 124.64375 208.332 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_4\">\n <path clip-path=\"url(#p37a9dfbd7c)\" d=\"M 124.64375 197.46 \nL 407.697019 197.46 \nL 407.697019 186.588 \nL 124.64375 186.588 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_5\">\n <path clip-path=\"url(#p37a9dfbd7c)\" d=\"M 124.64375 175.716 \nL 403.64375 175.716 \nL 403.64375 164.844 \nL 124.64375 164.844 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_6\">\n <path clip-path=\"url(#p37a9dfbd7c)\" d=\"M 124.64375 153.972 \nL 390.808399 153.972 \nL 390.808399 143.1 \nL 124.64375 143.1 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_7\">\n <path clip-path=\"url(#p37a9dfbd7c)\" d=\"M 124.64375 132.228 \nL 358.382249 132.228 \nL 358.382249 121.356 \nL 124.64375 121.356 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_8\">\n <path clip-path=\"url(#p37a9dfbd7c)\" d=\"M 124.64375 110.484 \nL 318.525106 110.484 \nL 318.525106 99.612 \nL 124.64375 99.612 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_9\">\n <path clip-path=\"url(#p37a9dfbd7c)\" d=\"M 124.64375 88.74 \nL 310.418568 88.74 \nL 310.418568 77.868 \nL 124.64375 77.868 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_10\">\n <path clip-path=\"url(#p37a9dfbd7c)\" d=\"M 124.64375 66.996 \nL 290.152225 66.996 \nL 290.152225 56.124 \nL 124.64375 56.124 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_11\">\n <path clip-path=\"url(#p37a9dfbd7c)\" d=\"M 124.64375 45.252 \nL 284.072321 45.252 \nL 284.072321 34.38 \nL 124.64375 34.38 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"patch_12\">\n <path clip-path=\"url(#p37a9dfbd7c)\" d=\"M 124.64375 23.508 \nL 277.316873 23.508 \nL 277.316873 12.636 \nL 124.64375 12.636 \nz\n\" style=\"fill:#1f77b4;\"/>\n </g>\n <g id=\"matplotlib.axis_1\">\n <g id=\"xtick_1\">\n <g id=\"line2d_1\">\n <defs>\n <path d=\"M 0 0 \nL 0 3.5 \n\" id=\"m517d68fb4a\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n </defs>\n <g>\n <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"124.64375\" xlink:href=\"#m517d68fb4a\" y=\"224.64\"/>\n </g>\n </g>\n <g id=\"text_1\">\n <!-- 0 -->\n <defs>\n <path d=\"M 31.78125 66.40625 \nQ 24.171875 66.40625 20.328125 58.90625 \nQ 16.5 51.421875 16.5 36.375 \nQ 16.5 21.390625 20.328125 13.890625 \nQ 24.171875 6.390625 31.78125 6.390625 \nQ 39.453125 6.390625 43.28125 13.890625 \nQ 47.125 21.390625 47.125 36.375 \nQ 47.125 51.421875 43.28125 58.90625 \nQ 39.453125 66.40625 31.78125 66.40625 \nz\nM 31.78125 74.21875 \nQ 44.046875 74.21875 50.515625 64.515625 \nQ 56.984375 54.828125 56.984375 36.375 \nQ 56.984375 17.96875 50.515625 8.265625 \nQ 44.046875 -1.421875 31.78125 -1.421875 \nQ 19.53125 -1.421875 13.0625 8.265625 \nQ 6.59375 17.96875 6.59375 36.375 \nQ 6.59375 54.828125 13.0625 64.515625 \nQ 19.53125 74.21875 31.78125 74.21875 \nz\n\" id=\"DejaVuSans-48\"/>\n </defs>\n <g transform=\"translate(121.4625 239.2
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAdIAAAD4CAYAAABYIGfSAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3de5xXVb3/8dcbHCEFp1SyCbQho0hAQAYTNPV4KfOSdhTpRF7SXxz9HbFOWnHSftFJfz/NfuZdo1JILQXKS3KCTPBOyowIwyWsRI8gKd5GkSCEz/ljr5Ev48www56ZLzPf9/PxmMfs79prr/XZCx58WGvv796KCMzMzGz7dCt2AGZmZp2ZE6mZmVkOTqRmZmY5OJGamZnl4ERqZmaWw07FDsA61p577hmVlZXFDsPMrFOpqal5JSL6NLbPibTEVFZWUl1dXewwzMw6FUnPN7XPS7tmZmY5OJGamZnl4ERqZmaWg6+RmpnZe2zcuJGVK1eyfv36YofSoXr27Em/fv0oKytr8TFOpCWmdlUdlRNnFjsM68Keu+y4YodgbWDlypX07t2byspKJBU7nA4REbz66qusXLmS/v37t/g4L+2amdl7rF+/nj322KNkkiiAJPbYY49Wz8KdSDuYpJMk7deCelMkndJI+eGS7muf6MzMtiilJFpve87ZibTjnQRsM5GamVnn4GukDUi6DHghIq5PnycBawEBpwI9gLsi4ntp/3eBLwNrgBeAmoj4kaR9geuBPsA64KvA7sDngcMkXQycDBwBjAd2Bv4CnBYR61I4R0maCOwGfCMitpqJStoVuBYYDJQBkyLinjYfFDMreW19b0VbX0vv1asXa9eubdM2W8oz0ve6kyxh1juVLEkOAA4EhgEjJB0qaSRZMhwKfA6oKjhuMjAhIkYAFwI3RMTjwL3ANyNiWET8FfhNRIyMiKHAMuDsgjYqU5/HATdJ6tkg1ouAORFxIPBPwBUpuW5F0nhJ1ZKqN62r244hMTOzpjiRNhARC4APSvqwpKHA68AQ4DPAAuApYCBZYj0YuCci1kfEW8BvAST1AkYD0yU9DfwEqGiiy8GSHpFUC4wDBhXsmxYRmyPiz8Czqd9CnwEmpj4eBHoC+zRyTpMjoioiqrrvUt7KETEz63gTJ07k+uuvf/fzpEmTuOSSSzjyyCM54IADGDJkCPfc894FuAcffJDjjz/+3c/nnXceU6ZMAaCmpobDDjuMESNG8NnPfpbVq1e3SaxOpI2bDpwCjCWboQr4f2kWOSwiPhYRP2/m+G7AGwX1h0XEJ5uoOwU4LyKGAN8nS4b1okHdhp8FnFzQxz4Rsaxlp2hmtuMaO3Ys06ZNe/fztGnTOOOMM7jrrrt46qmnmDt3LhdccAERDf9ZbNzGjRuZMGECM2bMoKamhrPOOouLLrqoTWJ1Im3cncAXyZLpdGA2cFaaaSKpr6QPAo8BJ0jqmfYdDxARbwIrJI1J9ZVmtwBvAb0L+uoNrJZURjYjLTRGUrd0vfWjwPIG+2cDE5RuM5M0vA3O3cys6IYPH87LL7/Miy++yMKFC/nABz7Ahz70Ib7zne+w//77c9RRR7Fq1SpeeumlFrW3fPlyFi9ezNFHH82wYcO45JJLWLlyZZvE6puNGhERSyT1BlZFxGqyRPdJYF7KWWuBL0fEfEn3AouAl4BaoP4i5DjgxnRTURlwB7Aw/f6ppPPJEvV3gSfIrsM+wdZJ9r+BJ8luNjonItY3uDX7B8BVwCJJ3YAVpGRuZtbZjRkzhhkzZvC3v/2NsWPHcvvtt7NmzRpqamooKyujsrLyPd/53Gmnndi8efO7n+v3RwSDBg1i3rx5bR6nE2kT0lJr4eergasbqfqjiJgkaRfgYaAm1V8BHNNIu4+x9ddfbkw/Deud2URcD5JdDyUi/g786zZPxsysExo7dixf/epXeeWVV3jooYeYNm0aH/zgBykrK2Pu3Lk8//x732z2kY98hKVLl7Jhwwb+/ve/88ADD3DIIYfwiU98gjVr1jBv3jxGjRrFxo0beeaZZxg0aFAjPbeOE2l+k9MDFnoCUyPiqWIH1Jwhfcup9iPczKyVivHox0GDBvHWW2/Rt29fKioqGDduHCeccAJDhgyhqqqKgQMb3n8Je++9N6eeeiqDBw+mf//+DB+eXfHaeeedmTFjBueffz51dXW88847fP3rX2+TRKqWXqi1rqGqqir8Ym8z25Zly5bxyU82dY9k19bYuUuqiYiqxur7ZiMzM7McnEjNzMxycCI1M7NGleKlv+05ZydSMzN7j549e/Lqq6+WVDKtfx9pz54Nn8baPN+1a2Zm79GvXz9WrlzJmjVrih1Kh+rZsyf9+vVr1TFOpGZm9h5lZWX079+/2GF0Cl7aNTMzy8GJ1MzMLAcnUjMzsxx8jbTE1K6qa/M33Zu1VDEeM2fW3jwjNTMzy8GJ1MzMLIeiJFJJJ6U3prT2uLXtEU9XIWmKpFOKHYeZWSlpVSJVpi2S70ls/U5OMzOzTmmbSVFSpaTlkn4BLAZOk1QrabGkywvq3SipWtISSd8vKL9M0lJJiyT9SNJo4PPAFZKelrRv+pklqUbSI5IGpmP7S5qX+rtkG3FWSHo4tblY0qdT+WdSG09Jmi6pV2NxpbITJD0haYGkP0jaK5VPkjQ1xfa8pH+W9MMU1yxJZaneCEkPpfOYLamimXjPL+j/jlS2q6SbJT2ZYjgxlXeXdIWk+an+v6ZySbou/fn8AfhgE32NT3821ZvW1TX7521mZq3T0rt2BwBnAP8N/BEYAbwO/F7SSRFxN3BRRLwmqTvwgKT9gVXAF4CBERGS3h8Rb0i6F7gvImYASHoAOCci/izpU8ANwBHA1cCNEfELSf+2jRi/BMyOiEtTDLtI2hO4GDgqIt6W9G3gG5KubxhXauNR4KBU9r+AbwEXpH37Av9ENpOeB5wcEd+SdBdwnKSZwLXAiRGxRtJY4FLgrCbinQj0j4gNBf1fBMyJiLNS2ZMpQY4D6iJipKQewGOSfg8MBz6RYtoLWArc3LCjiJgMTAboUTGgdB6caWbWAVqaSJ+PiD+mGdKDEbEGQNLtwKHA3cCpksanNivI/nFfCqwHfi7pPuC+hg2nGeJoYLqk+uIe6ffBwMlp+1bgcpo2H7g5zQ7vjoinJR2W4ngstb0zWRKsayKufsCdaSa5M7CioP3fRcRGSbVAd2BWKq8FKskS2mDg/tRXd2B1M/EuAm6XdDfZ+AF8Bvi8pAvT557APql8f225/llO9p+bQ4FfRcQm4EVJc5rpz8zM2kFLE+nbze2U1B+4EBgZEa9LmgL0jIh3JB0IHAmcApxHNtMs1A14IyKGNdF8i2ZQEfGwpEOB44Apkq4kmzXfHxH/0kjMjcV1LXBlRNwr6XBgUsEhG1I/myVtjC2vRNhMNo4ClkTEqJbEm+I8FDgBuEjSkNTGyRGxvEGsAiZExOwG5ce2sC8zM2snrb1x6EngMEl7puXTfwEeAnYjS7Z16bri5+Dd2WZ5RPwX8O/A0NTOW0BvgIh4E1ghaUw6RpLq6z0GfDFtj2suMEkfAV6KiJ8CPwMOIFuGPljSx1KdXSV9vJm4ysmWoyFbym6N5UAfSaNSX2WSBjURazdg74iYC3w79dsLmA1MSIkTScPTIbOBcwuuxX5c0q7Aw8DYdA21gmzp2czMOlCrnmwUEaslTQTmks2eZkbEPQCSFgB/Al4gS4CQJct7JPVM9b+Ryu8AfirpfLIZ4TjgRkkXA2Vp/0Lga8Av07XNe7YR3uHANyVtBNYCp6drlWcCv0rXFiG7ZvpWE3FNIltifh2YA7T41QcR8Y+09HqNpHKysb0KWNJI9e7AbamegGvSteMfpGMWpWS7Ajie7D8GlcBTKcmuIbvz+S6ymfRSsuvX87YV55C+5VT76TJmZm1GpfTSVoOqqqqorq4udhhmZp2KpJqIqGpsn59sZGZmlkOne2h9uinn1gbFGyLiU8WIZ1vSV20OblB8dUTcUox4zMysbXW6RBoRtUBTd/jucCJiW99/NTOzTsxLu2ZmZjk4kZq
},
"metadata": {
"needs_background": "light"
}
}
],
"source": [
"korean_ingredient_df = create_ingredient_df(korean_df)\r\n",
"korean_ingredient_df.head(10).plot.barh()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"# add categorical labels to food items - calculated columns to improve accuracy"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"# drop rice, garlic and ginger"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" almond angelica anise anise_seed apple apple_brandy apricot \\\n",
"0 0 0 0 0 0 0 0 \n",
"1 1 0 0 0 0 0 0 \n",
"2 0 0 0 0 0 0 0 \n",
"3 0 0 0 0 0 0 0 \n",
"4 0 0 0 0 0 0 0 \n",
"\n",
" armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n",
"0 0 0 0 ... 0 0 0 \n",
"1 0 0 0 ... 0 0 0 \n",
"2 0 0 0 ... 0 0 0 \n",
"3 0 0 0 ... 0 0 0 \n",
"4 0 0 0 ... 0 0 0 \n",
"\n",
" whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n",
"0 0 0 0 0 0 0 0 \n",
"1 0 0 0 0 0 0 0 \n",
"2 0 0 0 0 0 0 0 \n",
"3 0 0 0 0 0 0 0 \n",
"4 0 0 0 0 0 1 0 \n",
"\n",
"[5 rows x 380 columns]"
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>almond</th>\n <th>angelica</th>\n <th>anise</th>\n <th>anise_seed</th>\n <th>apple</th>\n <th>apple_brandy</th>\n <th>apricot</th>\n <th>armagnac</th>\n <th>artemisia</th>\n <th>artichoke</th>\n <th>...</th>\n <th>whiskey</th>\n <th>white_bread</th>\n <th>white_wine</th>\n <th>whole_grain_wheat_flour</th>\n <th>wine</th>\n <th>wood</th>\n <th>yam</th>\n <th>yeast</th>\n <th>yogurt</th>\n <th>zucchini</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n<p>5 rows × 380 columns</p>\n</div>"
},
"metadata": {},
"execution_count": 21
}
],
"source": [
"# set x and y features\r\n",
"# dropping common ingredients to improve accuracy\r\n",
"#feature_df= df.drop(['cuisine','Unnamed: 0'], axis=1)\r\n",
"feature_df= df.drop(['cuisine','Unnamed: 0','rice','garlic','ginger'], axis=1)\r\n",
"labels_df = df.cuisine #.unique()\r\n",
"feature_df.head()\r\n"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"# balance data with SMOTE oversamplling to the highest class. Read more here: https://imbalanced-learn.org/dev/references/generated/imblearn.over_sampling.SMOTE.html\r\n",
"oversample = SMOTE()\r\n",
"transformed_feature_df, transformed_label_df = oversample.fit_resample(feature_df, labels_df)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"new label count: indian 799\nkorean 799\nchinese 799\njapanese 799\nthai 799\nName: cuisine, dtype: int64\nold label count: korean 799\nindian 598\nchinese 442\njapanese 320\nthai 289\nName: cuisine, dtype: int64\n"
]
}
],
"source": [
"print(f'new label count: {transformed_label_df.value_counts()}')\r\n",
"print(f'old label count: {df.cuisine.value_counts()}')"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" almond angelica anise anise_seed apple apple_brandy apricot \\\n",
"0 0 0 0 0 0 0 0 \n",
"1 1 0 0 0 0 0 0 \n",
"2 0 0 0 0 0 0 0 \n",
"3 0 0 0 0 0 0 0 \n",
"4 0 0 0 0 0 0 0 \n",
"\n",
" armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n",
"0 0 0 0 ... 0 0 0 \n",
"1 0 0 0 ... 0 0 0 \n",
"2 0 0 0 ... 0 0 0 \n",
"3 0 0 0 ... 0 0 0 \n",
"4 0 0 0 ... 0 0 0 \n",
"\n",
" whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n",
"0 0 0 0 0 0 0 0 \n",
"1 0 0 0 0 0 0 0 \n",
"2 0 0 0 0 0 0 0 \n",
"3 0 0 0 0 0 0 0 \n",
"4 0 0 0 0 0 1 0 \n",
"\n",
"[5 rows x 380 columns]"
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>almond</th>\n <th>angelica</th>\n <th>anise</th>\n <th>anise_seed</th>\n <th>apple</th>\n <th>apple_brandy</th>\n <th>apricot</th>\n <th>armagnac</th>\n <th>artemisia</th>\n <th>artichoke</th>\n <th>...</th>\n <th>whiskey</th>\n <th>white_bread</th>\n <th>white_wine</th>\n <th>whole_grain_wheat_flour</th>\n <th>wine</th>\n <th>wood</th>\n <th>yam</th>\n <th>yeast</th>\n <th>yogurt</th>\n <th>zucchini</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n<p>5 rows × 380 columns</p>\n</div>"
},
"metadata": {},
"execution_count": 24
}
],
"source": [
"transformed_feature_df.head()\r\n",
"#y.head()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" cuisine almond angelica anise anise_seed apple apple_brandy \\\n",
"0 indian 0 0 0 0 0 0 \n",
"1 indian 1 0 0 0 0 0 \n",
"2 indian 0 0 0 0 0 0 \n",
"3 indian 0 0 0 0 0 0 \n",
"4 indian 0 0 0 0 0 0 \n",
"... ... ... ... ... ... ... ... \n",
"3990 thai 0 0 0 0 0 0 \n",
"3991 thai 0 0 0 0 0 0 \n",
"3992 thai 0 0 0 0 0 0 \n",
"3993 thai 0 0 0 0 0 0 \n",
"3994 thai 0 0 0 0 0 0 \n",
"\n",
" apricot armagnac artemisia ... whiskey white_bread white_wine \\\n",
"0 0 0 0 ... 0 0 0 \n",
"1 0 0 0 ... 0 0 0 \n",
"2 0 0 0 ... 0 0 0 \n",
"3 0 0 0 ... 0 0 0 \n",
"4 0 0 0 ... 0 0 0 \n",
"... ... ... ... ... ... ... ... \n",
"3990 0 0 0 ... 0 0 0 \n",
"3991 0 0 0 ... 0 0 0 \n",
"3992 0 0 0 ... 0 0 0 \n",
"3993 0 0 0 ... 0 0 0 \n",
"3994 0 0 0 ... 0 0 0 \n",
"\n",
" whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n",
"0 0 0 0 0 0 0 0 \n",
"1 0 0 0 0 0 0 0 \n",
"2 0 0 0 0 0 0 0 \n",
"3 0 0 0 0 0 0 0 \n",
"4 0 0 0 0 0 1 0 \n",
"... ... ... ... ... ... ... ... \n",
"3990 0 0 0 0 0 0 0 \n",
"3991 0 0 0 0 0 0 0 \n",
"3992 0 0 0 0 0 0 0 \n",
"3993 0 0 0 0 0 0 0 \n",
"3994 0 0 0 0 0 0 0 \n",
"\n",
"[3995 rows x 381 columns]"
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>cuisine</th>\n <th>almond</th>\n <th>angelica</th>\n <th>anise</th>\n <th>anise_seed</th>\n <th>apple</th>\n <th>apple_brandy</th>\n <th>apricot</th>\n <th>armagnac</th>\n <th>artemisia</th>\n <th>...</th>\n <th>whiskey</th>\n <th>white_bread</th>\n <th>white_wine</th>\n <th>whole_grain_wheat_flour</th>\n <th>wine</th>\n <th>wood</th>\n <th>yam</th>\n <th>yeast</th>\n <th>yogurt</th>\n <th>zucchini</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>indian</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>indian</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>indian</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>indian</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>indian</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>3990</th>\n <td>thai</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3991</th>\n <td>thai</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\
},
"metadata": {},
"execution_count": 25
}
],
"source": [
"# export transformed data to new csv for classification\r\n",
"transformed_df = pd.concat([transformed_label_df,transformed_feature_df],axis=1, join='outer')\r\n",
"transformed_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Build classification model"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression\r\n",
"from sklearn.model_selection import train_test_split\r\n",
"from sklearn.metrics import precision_recall_curve\r\n",
"from sklearn.linear_model import LogisticRegression\r\n",
"from sklearn.model_selection import cross_val_score\r\n",
"from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report\r\n",
"from sklearn.svm import SVC\r\n",
"from sklearn.gaussian_process import GaussianProcessClassifier"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(transformed_feature_df, transformed_label_df, test_size=0.3)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Accuracy is 0.8031693077564637\n"
]
}
],
"source": [
"lr = LogisticRegression(multi_class='ovr',solver='lbfgs')\r\n",
"model = lr.fit(X_train, y_train)\r\n",
"\r\n",
"accuracy = model.score(X_test, y_test)\r\n",
"print (\"Accuracy is {}\".format(accuracy))"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"ingredients: Index(['black_pepper', 'cayenne', 'clam', 'roasted_sesame_seed', 'scallion',\n 'sesame_oil', 'soy_sauce'],\n dtype='object')\ncusine: korean\n"
]
}
],
"source": [
"# test an item\r\n",
"print(f'ingredients: {X_test.iloc[20][X_test.iloc[20]!=0].keys()}')\r\n",
"print(f'cusine: {y_test.iloc[20]}')"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" 0\n",
"korean 0.956777\n",
"chinese 0.037421\n",
"thai 0.004163\n",
"indian 0.000874\n",
"japanese 0.000765"
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>korean</th>\n <td>0.956777</td>\n </tr>\n <tr>\n <th>chinese</th>\n <td>0.037421</td>\n </tr>\n <tr>\n <th>thai</th>\n <td>0.004163</td>\n </tr>\n <tr>\n <th>indian</th>\n <td>0.000874</td>\n </tr>\n <tr>\n <th>japanese</th>\n <td>0.000765</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {},
"execution_count": 30
}
],
"source": [
"#rehsape to 2d array and transpose\r\n",
"test= X_test.iloc[20].values.reshape(-1, 1).T\r\n",
"# predict with score\r\n",
"proba = model.predict_proba(test)\r\n",
"classes = model.classes_\r\n",
"# create df with classes and scores\r\n",
"resultdf = pd.DataFrame(data=proba, columns=classes)\r\n",
"\r\n",
"# create df to show results\r\n",
"topPrediction = resultdf.T.sort_values(by=[0], ascending = [False])\r\n",
"topPrediction.head()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"y_pred = model.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" precision recall f1-score support\n\n chinese 0.72 0.67 0.70 233\n indian 0.92 0.91 0.91 242\n japanese 0.79 0.78 0.78 264\n korean 0.81 0.79 0.80 237\n thai 0.77 0.87 0.82 223\n\n accuracy 0.80 1199\n macro avg 0.80 0.80 0.80 1199\nweighted avg 0.80 0.80 0.80 1199\n\n"
]
}
],
"source": [
"print(classification_report(y_test,y_pred))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Try different classifiers"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"\r\n",
"C = 10\r\n",
"# Create different classifiers.\r\n",
"classifiers = {\r\n",
" 'L1 logistic': LogisticRegression(C=C, penalty='l1',\r\n",
" solver='saga',\r\n",
" multi_class='multinomial',\r\n",
" max_iter=10000),\r\n",
" 'L2 logistic (Multinomial)': LogisticRegression(C=C, penalty='l2',\r\n",
" solver='saga',\r\n",
" multi_class='multinomial',\r\n",
" max_iter=10000),\r\n",
" 'L2 logistic (OvR)': LogisticRegression(C=C, penalty='l2',\r\n",
" solver='saga',\r\n",
" multi_class='ovr',\r\n",
" max_iter=10000),\r\n",
" 'Linear SVC': SVC(kernel='linear', C=C, probability=True,\r\n",
" random_state=0)\r\n",
"}\r\n"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Accuracy (train) for L1 logistic: 78.0% \n",
"Accuracy (train) for L2 logistic (Multinomial): 78.7% \n",
"Accuracy (train) for L2 logistic (OvR): 79.6% \n",
"Accuracy (train) for Linear SVC: 77.4% \n"
]
}
],
"source": [
"n_classifiers = len(classifiers)\r\n",
"\r\n",
"for index, (name, classifier) in enumerate(classifiers.items()):\r\n",
" classifier.fit(X_train, y_train)\r\n",
"\r\n",
" y_pred = classifier.predict(X_test)\r\n",
" accuracy = accuracy_score(y_test, y_pred)\r\n",
" print(\"Accuracy (train) for %s: %0.1f%% \" % (name, accuracy * 100))\r\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"interpreter": {
"hash": "dd61f40108e2a19f4ef0d3ebbc6b6eea57ab3c4bc13b15fe6f390d3d86442534"
},
"kernelspec": {
"name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7",
"display_name": "Python 3.7.0 64-bit ('3.7')"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
},
"metadata": {
"interpreter": {
"hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}