From a5f5ad4a12fce5e8408f8a0e197421421eba0b44 Mon Sep 17 00:00:00 2001
From: Paskal Sunari <60564521+PaskalSunari@users.noreply.github.com>
Date: Thu, 12 Jun 2025 10:23:24 +0000
Subject: [PATCH] classifir 1
---
.../1-Introduction/notebook.ipynb | 2 +-
.../2-Classifiers-1/notebook.ipynb | 552 +++++++++++++++++-
2 files changed, 541 insertions(+), 13 deletions(-)
diff --git a/4-Classification/1-Introduction/notebook.ipynb b/4-Classification/1-Introduction/notebook.ipynb
index fd862c35..f85e06e6 100644
--- a/4-Classification/1-Introduction/notebook.ipynb
+++ b/4-Classification/1-Introduction/notebook.ipynb
@@ -747,7 +747,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.13.3"
+ "version": "3.11.4"
},
"orig_nbformat": 2
},
diff --git a/4-Classification/2-Classifiers-1/notebook.ipynb b/4-Classification/2-Classifiers-1/notebook.ipynb
index 30778dee..2b51e68c 100644
--- a/4-Classification/2-Classifiers-1/notebook.ipynb
+++ b/4-Classification/2-Classifiers-1/notebook.ipynb
@@ -1,5 +1,542 @@
{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Build Classification Models"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## predict a national cuisine"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " cuisine | \n",
+ " almond | \n",
+ " angelica | \n",
+ " anise | \n",
+ " anise_seed | \n",
+ " apple | \n",
+ " apple_brandy | \n",
+ " apricot | \n",
+ " armagnac | \n",
+ " ... | \n",
+ " whiskey | \n",
+ " white_bread | \n",
+ " white_wine | \n",
+ " whole_grain_wheat_flour | \n",
+ " wine | \n",
+ " wood | \n",
+ " yam | \n",
+ " yeast | \n",
+ " yogurt | \n",
+ " zucchini | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " indian | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " indian | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " indian | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " indian | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " indian | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 382 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Unnamed: 0 cuisine almond angelica anise anise_seed apple \\\n",
+ "0 0 indian 0 0 0 0 0 \n",
+ "1 1 indian 1 0 0 0 0 \n",
+ "2 2 indian 0 0 0 0 0 \n",
+ "3 3 indian 0 0 0 0 0 \n",
+ "4 4 indian 0 0 0 0 0 \n",
+ "\n",
+ " apple_brandy apricot armagnac ... whiskey white_bread white_wine \\\n",
+ "0 0 0 0 ... 0 0 0 \n",
+ "1 0 0 0 ... 0 0 0 \n",
+ "2 0 0 0 ... 0 0 0 \n",
+ "3 0 0 0 ... 0 0 0 \n",
+ "4 0 0 0 ... 0 0 0 \n",
+ "\n",
+ " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n",
+ "0 0 0 0 0 0 0 0 \n",
+ "1 0 0 0 0 0 0 0 \n",
+ "2 0 0 0 0 0 0 0 \n",
+ "3 0 0 0 0 0 0 0 \n",
+ "4 0 0 0 0 0 1 0 \n",
+ "\n",
+ "[5 rows x 382 columns]"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "cuisines_df = pd.read_csv(\"../data/cleaned_cuisines.csv\")\n",
+ "cuisines_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.model_selection import train_test_split, cross_val_score\n",
+ "from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_recall_curve, precision_score\n",
+ "from sklearn.svm import SVC\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 indian\n",
+ "1 indian\n",
+ "2 indian\n",
+ "3 indian\n",
+ "4 indian\n",
+ "Name: cuisine, dtype: object"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cuisines_label_df = cuisines_df['cuisine']\n",
+ "cuisines_label_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " almond | \n",
+ " angelica | \n",
+ " anise | \n",
+ " anise_seed | \n",
+ " apple | \n",
+ " apple_brandy | \n",
+ " apricot | \n",
+ " armagnac | \n",
+ " artemisia | \n",
+ " artichoke | \n",
+ " ... | \n",
+ " whiskey | \n",
+ " white_bread | \n",
+ " white_wine | \n",
+ " whole_grain_wheat_flour | \n",
+ " wine | \n",
+ " wood | \n",
+ " yam | \n",
+ " yeast | \n",
+ " yogurt | \n",
+ " zucchini | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 380 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " almond angelica anise anise_seed apple apple_brandy apricot \\\n",
+ "0 0 0 0 0 0 0 0 \n",
+ "1 1 0 0 0 0 0 0 \n",
+ "2 0 0 0 0 0 0 0 \n",
+ "3 0 0 0 0 0 0 0 \n",
+ "4 0 0 0 0 0 0 0 \n",
+ "\n",
+ " armagnac artemisia artichoke ... whiskey white_bread white_wine \\\n",
+ "0 0 0 0 ... 0 0 0 \n",
+ "1 0 0 0 ... 0 0 0 \n",
+ "2 0 0 0 ... 0 0 0 \n",
+ "3 0 0 0 ... 0 0 0 \n",
+ "4 0 0 0 ... 0 0 0 \n",
+ "\n",
+ " whole_grain_wheat_flour wine wood yam yeast yogurt zucchini \n",
+ "0 0 0 0 0 0 0 0 \n",
+ "1 0 0 0 0 0 0 0 \n",
+ "2 0 0 0 0 0 0 0 \n",
+ "3 0 0 0 0 0 0 0 \n",
+ "4 0 0 0 0 0 1 0 \n",
+ "\n",
+ "[5 rows x 380 columns]"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cusisines_features_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)\n",
+ "cusisines_features_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_train, X_test, y_train, y_test = train_test_split(cusisines_features_df, cuisines_label_df, test_size=0.3, random_state=42)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Logistic Regression Accuracy: 0.79\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/workspaces/ML-For-Beginners/.venv/lib/python3.11/site-packages/sklearn/linear_model/_logistic.py:1256: FutureWarning: 'multi_class' was deprecated in version 1.5 and will be removed in 1.7. Use OneVsRestClassifier(LogisticRegression(..)) instead. Leave it to its default value to avoid this warning.\n",
+ " warnings.warn(\n"
+ ]
+ }
+ ],
+ "source": [
+ "lr = LogisticRegression(multi_class = 'ovr', solver='liblinear')\n",
+ "model = lr.fit(X_train, np.ravel(y_train))\n",
+ "accuracy = model.score(X_test, y_test)\n",
+ "print(f\"Logistic Regression Accuracy: {accuracy:.2f}\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ingredients: Index(['fish', 'lime_juice', 'shallot'], dtype='object')\n",
+ "cuisine: thai\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(f'ingredients: {X_test.iloc[50][X_test.iloc[50]!=0].keys()}')\n",
+ "print(f'cuisine: {y_test.iloc[50]}')"
+ ]
+ }
+ ],
"metadata": {
+ "kernelspec": {
+ "display_name": ".venv",
+ "language": "python",
+ "name": "python3"
+ },
"language_info": {
"codemirror_mode": {
"name": "ipython",
@@ -10,19 +547,10 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": 3
+ "version": "3.11.4"
},
"orig_nbformat": 2
},
"nbformat": 4,
- "nbformat_minor": 2,
- "cells": [
- {
- "source": [
- "# Build Classification Models"
- ],
- "cell_type": "markdown",
- "metadata": {}
- }
- ]
-}
\ No newline at end of file
+ "nbformat_minor": 2
+}