|
|
@ -1,29 +1,15 @@
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cells": [
|
|
|
|
"cells": [
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
|
|
"# Build Classification Model"
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
|
|
"execution_count": 25,
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
|
"from sklearn.linear_model import LogisticRegression\n",
|
|
|
|
"# Build Classification Models"
|
|
|
|
"from sklearn.model_selection import train_test_split, cross_val_score\n",
|
|
|
|
],
|
|
|
|
"from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n",
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"from sklearn.svm import SVC\n",
|
|
|
|
"metadata": {}
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
|
|
|
"import numpy as np"
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 26,
|
|
|
|
"execution_count": 12,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -56,17 +42,31 @@
|
|
|
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Unnamed: 0</th>\n <th>cuisine</th>\n <th>almond</th>\n <th>angelica</th>\n <th>anise</th>\n <th>anise_seed</th>\n <th>apple</th>\n <th>apple_brandy</th>\n <th>apricot</th>\n <th>armagnac</th>\n <th>...</th>\n <th>whiskey</th>\n <th>white_bread</th>\n <th>white_wine</th>\n <th>whole_grain_wheat_flour</th>\n <th>wine</th>\n <th>wood</th>\n <th>yam</th>\n <th>yeast</th>\n <th>yogurt</th>\n <th>zucchini</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>0</td>\n <td>indian</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>indian</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>2</td>\n <td>indian</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>3</td>\n <td>indian</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>4</td>\n <td>indian</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n<p>5 rows × 382 columns</p>\n</div>"
|
|
|
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Unnamed: 0</th>\n <th>cuisine</th>\n <th>almond</th>\n <th>angelica</th>\n <th>anise</th>\n <th>anise_seed</th>\n <th>apple</th>\n <th>apple_brandy</th>\n <th>apricot</th>\n <th>armagnac</th>\n <th>...</th>\n <th>whiskey</th>\n <th>white_bread</th>\n <th>white_wine</th>\n <th>whole_grain_wheat_flour</th>\n <th>wine</th>\n <th>wood</th>\n <th>yam</th>\n <th>yeast</th>\n <th>yogurt</th>\n <th>zucchini</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>0</td>\n <td>indian</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>indian</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>2</td>\n <td>indian</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>3</td>\n <td>indian</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>4</td>\n <td>indian</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n<p>5 rows × 382 columns</p>\n</div>"
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"execution_count": 26
|
|
|
|
"execution_count": 12
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
"recipes_df = pd.read_csv(\"../../data/cleaned_cuisine.csv\")\n",
|
|
|
|
"recipes_df = pd.read_csv(\"../../data/cleaned_cuisine.csv\")\n",
|
|
|
|
"recipes_df.head()"
|
|
|
|
"recipes_df.head()"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 27,
|
|
|
|
"execution_count": 13,
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
|
|
"from sklearn.linear_model import LogisticRegression\n",
|
|
|
|
|
|
|
|
"from sklearn.model_selection import train_test_split, cross_val_score\n",
|
|
|
|
|
|
|
|
"from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve\n",
|
|
|
|
|
|
|
|
"from sklearn.svm import SVC\n",
|
|
|
|
|
|
|
|
"import numpy as np"
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
|
|
"execution_count": 14,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -82,7 +82,7 @@
|
|
|
|
]
|
|
|
|
]
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"execution_count": 27
|
|
|
|
"execution_count": 14
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
@ -92,7 +92,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 28,
|
|
|
|
"execution_count": 15,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -125,7 +125,7 @@
|
|
|
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>almond</th>\n <th>angelica</th>\n <th>anise</th>\n <th>anise_seed</th>\n <th>apple</th>\n <th>apple_brandy</th>\n <th>apricot</th>\n <th>armagnac</th>\n <th>artemisia</th>\n <th>artichoke</th>\n <th>...</th>\n <th>whiskey</th>\n <th>white_bread</th>\n <th>white_wine</th>\n <th>whole_grain_wheat_flour</th>\n <th>wine</th>\n <th>wood</th>\n <th>yam</th>\n <th>yeast</th>\n <th>yogurt</th>\n <th>zucchini</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n<p>5 rows × 380 columns</p>\n</div>"
|
|
|
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>almond</th>\n <th>angelica</th>\n <th>anise</th>\n <th>anise_seed</th>\n <th>apple</th>\n <th>apple_brandy</th>\n <th>apricot</th>\n <th>armagnac</th>\n <th>artemisia</th>\n <th>artichoke</th>\n <th>...</th>\n <th>whiskey</th>\n <th>white_bread</th>\n <th>white_wine</th>\n <th>whole_grain_wheat_flour</th>\n <th>wine</th>\n <th>wood</th>\n <th>yam</th>\n <th>yeast</th>\n <th>yogurt</th>\n <th>zucchini</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n<p>5 rows × 380 columns</p>\n</div>"
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"execution_count": 28
|
|
|
|
"execution_count": 15
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
@ -135,7 +135,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 29,
|
|
|
|
"execution_count": 16,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
@ -144,14 +144,14 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 30,
|
|
|
|
"execution_count": 17,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"output_type": "stream",
|
|
|
|
"output_type": "stream",
|
|
|
|
"name": "stdout",
|
|
|
|
"name": "stdout",
|
|
|
|
"text": [
|
|
|
|
"text": [
|
|
|
|
"Accuracy is 0.810675562969141\n"
|
|
|
|
"Accuracy is 0.8023352793994996\n"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
],
|
|
|
@ -165,26 +165,26 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 31,
|
|
|
|
"execution_count": 23,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"output_type": "stream",
|
|
|
|
"output_type": "stream",
|
|
|
|
"name": "stdout",
|
|
|
|
"name": "stdout",
|
|
|
|
"text": [
|
|
|
|
"text": [
|
|
|
|
"ingredients: Index(['bean', 'coriander', 'cumin', 'fenugreek', 'pepper', 'turmeric',\n 'vegetable_oil'],\n dtype='object')\ncusine: thai\n"
|
|
|
|
"ingredients: Index(['cilantro', 'onion', 'pea', 'potato', 'tomato', 'vegetable_oil'], dtype='object')\ncuisine: indian\n"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
|
"# test an item\n",
|
|
|
|
"# test an item\n",
|
|
|
|
"print(f'ingredients: {X_test.iloc[20][X_test.iloc[20]!=0].keys()}')\n",
|
|
|
|
"print(f'ingredients: {X_test.iloc[50][X_test.iloc[50]!=0].keys()}')\n",
|
|
|
|
"print(f'cuisine: {y_test.iloc[20]}')"
|
|
|
|
"print(f'cuisine: {y_test.iloc[50]}')"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 32,
|
|
|
|
"execution_count": 24,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -192,42 +192,42 @@
|
|
|
|
"data": {
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"text/plain": [
|
|
|
|
" 0\n",
|
|
|
|
" 0\n",
|
|
|
|
"indian 0.530435\n",
|
|
|
|
"indian 0.715851\n",
|
|
|
|
"thai 0.344293\n",
|
|
|
|
"chinese 0.229475\n",
|
|
|
|
"japanese 0.108792\n",
|
|
|
|
"japanese 0.029763\n",
|
|
|
|
"chinese 0.015001\n",
|
|
|
|
"korean 0.017277\n",
|
|
|
|
"korean 0.001480"
|
|
|
|
"thai 0.007634"
|
|
|
|
],
|
|
|
|
],
|
|
|
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>indian</th>\n <td>0.530435</td>\n </tr>\n <tr>\n <th>thai</th>\n <td>0.344293</td>\n </tr>\n <tr>\n <th>japanese</th>\n <td>0.108792</td>\n </tr>\n <tr>\n <th>chinese</th>\n <td>0.015001</td>\n </tr>\n <tr>\n <th>korean</th>\n <td>0.001480</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
|
|
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>indian</th>\n <td>0.715851</td>\n </tr>\n <tr>\n <th>chinese</th>\n <td>0.229475</td>\n </tr>\n <tr>\n <th>japanese</th>\n <td>0.029763</td>\n </tr>\n <tr>\n <th>korean</th>\n <td>0.017277</td>\n </tr>\n <tr>\n <th>thai</th>\n <td>0.007634</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"execution_count": 32
|
|
|
|
"execution_count": 24
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
|
"#rehsape to 2d array and transpose\r\n",
|
|
|
|
"#rehsape to 2d array and transpose\n",
|
|
|
|
"test= X_test.iloc[20].values.reshape(-1, 1).T\r\n",
|
|
|
|
"test= X_test.iloc[50].values.reshape(-1, 1).T\n",
|
|
|
|
"# predict with score\r\n",
|
|
|
|
"# predict with score\n",
|
|
|
|
"proba = model.predict_proba(test)\r\n",
|
|
|
|
"proba = model.predict_proba(test)\n",
|
|
|
|
"classes = model.classes_\r\n",
|
|
|
|
"classes = model.classes_\n",
|
|
|
|
"# create df with classes and scores\r\n",
|
|
|
|
"# create df with classes and scores\n",
|
|
|
|
"resultdf = pd.DataFrame(data=proba, columns=classes)\r\n",
|
|
|
|
"resultdf = pd.DataFrame(data=proba, columns=classes)\n",
|
|
|
|
"\r\n",
|
|
|
|
"\n",
|
|
|
|
"# create df to show results\r\n",
|
|
|
|
"# create df to show results\n",
|
|
|
|
"topPrediction = resultdf.T.sort_values(by=[0], ascending = [False])\r\n",
|
|
|
|
"topPrediction = resultdf.T.sort_values(by=[0], ascending = [False])\n",
|
|
|
|
"topPrediction.head()"
|
|
|
|
"topPrediction.head()"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 33,
|
|
|
|
"execution_count": 20,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"output_type": "stream",
|
|
|
|
"output_type": "stream",
|
|
|
|
"name": "stdout",
|
|
|
|
"name": "stdout",
|
|
|
|
"text": [
|
|
|
|
"text": [
|
|
|
|
" precision recall f1-score support\n\n chinese 0.75 0.67 0.70 231\n indian 0.91 0.90 0.90 255\n japanese 0.77 0.82 0.79 260\n korean 0.83 0.83 0.83 220\n thai 0.79 0.83 0.81 233\n\n accuracy 0.81 1199\n macro avg 0.81 0.81 0.81 1199\nweighted avg 0.81 0.81 0.81 1199\n\n"
|
|
|
|
" precision recall f1-score support\n\n chinese 0.73 0.71 0.72 229\n indian 0.91 0.93 0.92 254\n japanese 0.70 0.75 0.72 220\n korean 0.86 0.76 0.81 242\n thai 0.79 0.85 0.82 254\n\n accuracy 0.80 1199\n macro avg 0.80 0.80 0.80 1199\nweighted avg 0.80 0.80 0.80 1199\n\n"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
],
|
|
|
@ -245,7 +245,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 34,
|
|
|
|
"execution_count": 21,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
@ -272,17 +272,17 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 35,
|
|
|
|
"execution_count": 22,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"output_type": "stream",
|
|
|
|
"output_type": "stream",
|
|
|
|
"name": "stdout",
|
|
|
|
"name": "stdout",
|
|
|
|
"text": [
|
|
|
|
"text": [
|
|
|
|
"Accuracy (train) for L1 logistic: 79.4% \n",
|
|
|
|
"Accuracy (train) for L1 logistic: 79.5% \n",
|
|
|
|
"Accuracy (train) for L2 logistic (Multinomial): 79.2% \n",
|
|
|
|
"Accuracy (train) for L2 logistic (Multinomial): 80.1% \n",
|
|
|
|
"Accuracy (train) for L2 logistic (OvR): 80.2% \n",
|
|
|
|
"Accuracy (train) for L2 logistic (OvR): 80.7% \n",
|
|
|
|
"Accuracy (train) for Linear SVC: 79.1% \n"
|
|
|
|
"Accuracy (train) for Linear SVC: 78.5% \n"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
],
|
|
|
|