{ "cells": [ { "source": [ "# Delicious Asian and Indian Cuisines \n" ], "cell_type": "markdown", "metadata": {} }, { "source": [ "Install Imblearn which will enable SMOTE. This is a Scikit-learn package that helps handle imbalanced data when performing classification. (https://imbalanced-learn.org/stable/)" ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: imblearn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.0)\n", "Requirement already satisfied: imbalanced-learn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imblearn) (0.8.0)\n", "Requirement already satisfied: numpy>=1.13.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (1.19.2)\n", "Requirement already satisfied: scipy>=0.19.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (1.4.1)\n", "Requirement already satisfied: scikit-learn>=0.24 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (0.24.2)\n", "Requirement already satisfied: joblib>=0.11 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from imbalanced-learn->imblearn) (0.16.0)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from scikit-learn>=0.24->imbalanced-learn->imblearn) (2.1.0)\n", "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install imblearn" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import matplotlib as mpl\n", "import numpy as np\n", "from imblearn.over_sampling import SMOTE" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('../../data/cuisines.csv')" ] }, { "source": [ "This dataset includes 385 columns indicating all kinds of ingredients in various cuisines from a given set of cuisines." ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n", "0 65 indian 0 0 0 0 0 \n", "1 66 indian 1 0 0 0 0 \n", "2 67 indian 0 0 0 0 0 \n", "3 68 indian 0 0 0 0 0 \n", "4 69 indian 0 0 0 0 0 \n", "\n", " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n", "0 0 0 0 ... 0 0 0 \n", "1 0 0 0 ... 0 0 0 \n", "2 0 0 0 ... 0 0 0 \n", "3 0 0 0 ... 0 0 0 \n", "4 0 0 0 ... 0 0 0 \n", "\n", " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n", "0 0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 0 \n", "2 0 0 0 0 0 0 0 \n", "3 0 0 0 0 0 0 0 \n", "4 0 0 0 0 0 1 0 \n", "\n", "[5 rows x 385 columns]" ], "text/html": "
\n | Unnamed: 0 | \ncuisine | \nalmond | \nangelica | \nanise | \nanise_seed | \napple | \napple_brandy | \napricot | \narmagnac | \n... | \nwhiskey | \nwhite_bread | \nwhite_wine | \nwhole_grain_wheat_flour | \nwine | \nwood | \nyam | \nyeast | \nyogurt | \nzucchini | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n65 | \nindian | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
1 | \n66 | \nindian | \n1 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
2 | \n67 | \nindian | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
3 | \n68 | \nindian | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
4 | \n69 | \nindian | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n1 | \n0 | \n
5 rows × 385 columns
\n\n | almond | \nangelica | \nanise | \nanise_seed | \napple | \napple_brandy | \napricot | \narmagnac | \nartemisia | \nartichoke | \n... | \nwhiskey | \nwhite_bread | \nwhite_wine | \nwhole_grain_wheat_flour | \nwine | \nwood | \nyam | \nyeast | \nyogurt | \nzucchini | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
1 | \n1 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
2 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
3 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
4 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n1 | \n0 | \n
5 rows × 380 columns
\n\n | almond | \nangelica | \nanise | \nanise_seed | \napple | \napple_brandy | \napricot | \narmagnac | \nartemisia | \nartichoke | \n... | \nwhiskey | \nwhite_bread | \nwhite_wine | \nwhole_grain_wheat_flour | \nwine | \nwood | \nyam | \nyeast | \nyogurt | \nzucchini | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
1 | \n1 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
2 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
3 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
4 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n1 | \n0 | \n
5 rows × 380 columns
\n\n | cuisine | \nalmond | \nangelica | \nanise | \nanise_seed | \napple | \napple_brandy | \napricot | \narmagnac | \nartemisia | \n... | \nwhiskey | \nwhite_bread | \nwhite_wine | \nwhole_grain_wheat_flour | \nwine | \nwood | \nyam | \nyeast | \nyogurt | \nzucchini | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \nindian | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
1 | \nindian | \n1 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
2 | \nindian | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
3 | \nindian | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
4 | \nindian | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n1 | \n0 | \n
... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n
3990 | \nthai | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
3991 | \nthai | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
3992 | \nthai | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
3993 | \nthai | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
3994 | \nthai | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
3995 rows × 381 columns
\n