From a5f5ad4a12fce5e8408f8a0e197421421eba0b44 Mon Sep 17 00:00:00 2001
From: Paskal Sunari <60564521+PaskalSunari@users.noreply.github.com>
Date: Thu, 12 Jun 2025 10:23:24 +0000
Subject: [PATCH 1/2] classifir 1
---
.../1-Introduction/notebook.ipynb | 2 +-
.../2-Classifiers-1/notebook.ipynb | 552 +++++++++++++++++-
2 files changed, 541 insertions(+), 13 deletions(-)
diff --git a/4-Classification/1-Introduction/notebook.ipynb b/4-Classification/1-Introduction/notebook.ipynb
index fd862c35..f85e06e6 100644
--- a/4-Classification/1-Introduction/notebook.ipynb
+++ b/4-Classification/1-Introduction/notebook.ipynb
@@ -747,7 +747,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.13.3"
+ "version": "3.11.4"
},
"orig_nbformat": 2
},
diff --git a/4-Classification/2-Classifiers-1/notebook.ipynb b/4-Classification/2-Classifiers-1/notebook.ipynb
index 30778dee..2b51e68c 100644
--- a/4-Classification/2-Classifiers-1/notebook.ipynb
+++ b/4-Classification/2-Classifiers-1/notebook.ipynb
@@ -1,5 +1,542 @@
{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Build Classification Models"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## predict a national cuisine"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " cuisine | \n",
+ " almond | \n",
+ " angelica | \n",
+ " anise | \n",
+ " anise_seed | \n",
+ " apple | \n",
+ " apple_brandy | \n",
+ " apricot | \n",
+ " armagnac | \n",
+ " ... | \n",
+ " whiskey | \n",
+ " white_bread | \n",
+ " white_wine | \n",
+ " whole_grain_wheat_flour | \n",
+ " wine | \n",
+ " wood | \n",
+ " yam | \n",
+ " yeast | \n",
+ " yogurt | \n",
+ " zucchini | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " indian | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " indian | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " indian | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " indian | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " indian | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 382 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n",
+ "0 0 indian 0 0 0 0 0 \n",
+ "1 1 indian 1 0 0 0 0 \n",
+ "2 2 indian 0 0 0 0 0 \n",
+ "3 3 indian 0 0 0 0 0 \n",
+ "4 4 indian 0 0 0 0 0 \n",
+ "\n",
+ " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n",
+ "0 0 0 0 ... 0 0 0 \n",
+ "1 0 0 0 ... 0 0 0 \n",
+ "2 0 0 0 ... 0 0 0 \n",
+ "3 0 0 0 ... 0 0 0 \n",
+ "4 0 0 0 ... 0 0 0 \n",
+ "\n",
+ " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n",
+ "0 0 0 0 0 0 0 0 \n",
+ "1 0 0 0 0 0 0 0 \n",
+ "2 0 0 0 0 0 0 0 \n",
+ "3 0 0 0 0 0 0 0 \n",
+ "4 0 0 0 0 0 1 0 \n",
+ "\n",
+ "[5 rows x 382 columns]"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "cuisines_df = pd.read_csv(\"../data/cleaned_cuisines.csv\")\n",
+ "cuisines_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.model_selection import train_test_split, cross_val_score\n",
+ "from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_recall_curve, precision_score\n",
+ "from sklearn.svm import SVC\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 indian\n",
+ "1 indian\n",
+ "2 indian\n",
+ "3 indian\n",
+ "4 indian\n",
+ "Name: cuisine, dtype: object"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cuisines_label_df = cuisines_df['cuisine']\n",
+ "cuisines_label_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " almond | \n",
+ " angelica | \n",
+ " anise | \n",
+ " anise_seed | \n",
+ " apple | \n",
+ " apple_brandy | \n",
+ " apricot | \n",
+ " armagnac | \n",
+ " artemisia | \n",
+ " artichoke | \n",
+ " ... | \n",
+ " whiskey | \n",
+ " white_bread | \n",
+ " white_wine | \n",
+ " whole_grain_wheat_flour | \n",
+ " wine | \n",
+ " wood | \n",
+ " yam | \n",
+ " yeast | \n",
+ " yogurt | \n",
+ " zucchini | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 380 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " almond angelica anise anise_seed apple apple_brandy apricot \\\n",
+ "0 0 0 0 0 0 0 0 \n",
+ "1 1 0 0 0 0 0 0 \n",
+ "2 0 0 0 0 0 0 0 \n",
+ "3 0 0 0 0 0 0 0 \n",
+ "4 0 0 0 0 0 0 0 \n",
+ "\n",
+ " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n",
+ "0 0 0 0 ... 0 0 0 \n",
+ "1 0 0 0 ... 0 0 0 \n",
+ "2 0 0 0 ... 0 0 0 \n",
+ "3 0 0 0 ... 0 0 0 \n",
+ "4 0 0 0 ... 0 0 0 \n",
+ "\n",
+ " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n",
+ "0 0 0 0 0 0 0 0 \n",
+ "1 0 0 0 0 0 0 0 \n",
+ "2 0 0 0 0 0 0 0 \n",
+ "3 0 0 0 0 0 0 0 \n",
+ "4 0 0 0 0 0 1 0 \n",
+ "\n",
+ "[5 rows x 380 columns]"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cusisines_features_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n",
+ "cusisines_features_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_train, X_test, y_train, y_test = train_test_split(cusisines_features_df, cuisines_label_df, test_size=0.3, random_state=42)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Logistic Regression Accuracy: 0.79\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/workspaces/ML-For-Beginners/.venv/lib/python3.11/site-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
+ " warnings.warn(\n"
+ ]
+ }
+ ],
+ "source": [
+ "lr = LogisticRegression(multi_class = 'ovr', solver='liblinear')\n",
+ "model = lr.fit(X_train, np.ravel(y_train))\n",
+ "accuracy = model.score(X_test, y_test)\n",
+ "print(f\"Logistic Regression Accuracy: {accuracy:.2f}\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ingredients: Index(['fish', 'lime_juice', 'shallot'], dtype='object')\n",
+ "cuisine: thai\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(f'ingredients: {X_test.iloc[50][X_test.iloc[50]!=0].keys()}')\n",
+ "print(f'cuisine: {y_test.iloc[50]}')"
+ ]
+ }
+ ],
"metadata": {
+ "kernelspec": {
+ "display_name": ".venv",
+ "language": "python",
+ "name": "python3"
+ },
"language_info": {
"codemirror_mode": {
"name": "ipython",
@@ -10,19 +547,10 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": 3
+ "version": "3.11.4"
},
"orig_nbformat": 2
},
"nbformat": 4,
- "nbformat_minor": 2,
- "cells": [
- {
- "source": [
- "# Build Classification Models"
- ],
- "cell_type": "markdown",
- "metadata": {}
- }
- ]
-}
\ No newline at end of file
+ "nbformat_minor": 2
+}
From 46ec25562ba90e1ea7748e32a3bebdb50040950b Mon Sep 17 00:00:00 2001
From: Paskal Sunari <60564521+PaskalSunari@users.noreply.github.com>
Date: Thu, 12 Jun 2025 17:25:59 +0000
Subject: [PATCH 2/2] claffification 1 finished
---
.../2-Classifiers-1/notebook.ipynb | 135 ++++++++++++++++--
1 file changed, 125 insertions(+), 10 deletions(-)
diff --git a/4-Classification/2-Classifiers-1/notebook.ipynb b/4-Classification/2-Classifiers-1/notebook.ipynb
index 2b51e68c..342f51c8 100644
--- a/4-Classification/2-Classifiers-1/notebook.ipynb
+++ b/4-Classification/2-Classifiers-1/notebook.ipynb
@@ -16,7 +16,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 1,
"metadata": {},
"outputs": [
{
@@ -214,7 +214,7 @@
"[5 rows x 382 columns]"
]
},
- "execution_count": 6,
+ "execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
@@ -227,7 +227,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -240,7 +240,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 3,
"metadata": {},
"outputs": [
{
@@ -254,7 +254,7 @@
"Name: cuisine, dtype: object"
]
},
- "execution_count": 9,
+ "execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -266,7 +266,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
@@ -464,7 +464,7 @@
"[5 rows x 380 columns]"
]
},
- "execution_count": 11,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@@ -476,7 +476,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
@@ -485,7 +485,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -513,7 +513,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
@@ -529,6 +529,121 @@
"print(f'ingredients: {X_test.iloc[50][X_test.iloc[50]!=0].keys()}')\n",
"print(f'cuisine: {y_test.iloc[50]}')"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/workspaces/ML-For-Beginners/.venv/lib/python3.11/site-packages/sklearn/utils/validation.py:2739: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " thai | \n",
+ " 0.839568 | \n",
+ "
\n",
+ " \n",
+ " japanese | \n",
+ " 0.134624 | \n",
+ "
\n",
+ " \n",
+ " chinese | \n",
+ " 0.014531 | \n",
+ "
\n",
+ " \n",
+ " korean | \n",
+ " 0.008383 | \n",
+ "
\n",
+ " \n",
+ " indian | \n",
+ " 0.002894 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0\n",
+ "thai 0.839568\n",
+ "japanese 0.134624\n",
+ "chinese 0.014531\n",
+ "korean 0.008383\n",
+ "indian 0.002894"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test= X_test.iloc[50].values.reshape(-1, 1).T\n",
+ "proba = model.predict_proba(test)\n",
+ "classes = model.classes_\n",
+ "resultdf = pd.DataFrame(data=proba, columns=classes)\n",
+ "\n",
+ "topPrediction = resultdf.T.sort_values(by=[0], ascending = [False])\n",
+ "topPrediction.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " precision recall f1-score support\n",
+ "\n",
+ " chinese 0.69 0.69 0.69 236\n",
+ " indian 0.91 0.91 0.91 245\n",
+ " japanese 0.73 0.72 0.73 231\n",
+ " korean 0.81 0.76 0.78 242\n",
+ " thai 0.78 0.84 0.81 245\n",
+ "\n",
+ " accuracy 0.79 1199\n",
+ " macro avg 0.79 0.78 0.78 1199\n",
+ "weighted avg 0.79 0.79 0.79 1199\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "y_pred = model.predict(X_test)\n",
+ "print(classification_report(y_test, y_pred))"
+ ]
}
],
"metadata": {