Merge pull request #8 from PaskalSunari/ML-Patch-2

regression
pull/872/head
Paskal Sunari 4 months ago committed by GitHub
commit 70b556c60c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -239,7 +239,7 @@
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_7488\\2637987050.py:9: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", "C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_11800\\2637987050.py:9: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n" " day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n"
] ]
}, },
@ -397,7 +397,7 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"<matplotlib.collections.PathCollection at 0x1ae64410440>" "<matplotlib.collections.PathCollection at 0x227dc6b56a0>"
] ]
}, },
"execution_count": 4, "execution_count": 4,
@ -428,7 +428,7 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"<matplotlib.collections.PathCollection at 0x1ae644ea490>" "<matplotlib.collections.PathCollection at 0x227de8d4190>"
] ]
}, },
"execution_count": 5, "execution_count": 5,
@ -460,7 +460,7 @@
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_7488\\3995499251.py:1: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", "C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_11800\\3995499251.py:1: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n" " day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n"
] ]
} }
@ -471,7 +471,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 7,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -490,7 +490,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 8,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -514,7 +514,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 9,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -523,7 +523,7 @@
"<Axes: xlabel='Variety'>" "<Axes: xlabel='Variety'>"
] ]
}, },
"execution_count": 15, "execution_count": 9,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
}, },
@ -544,7 +544,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": 10,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -553,7 +553,7 @@
"<Axes: xlabel='DayOfYear', ylabel='Price'>" "<Axes: xlabel='DayOfYear', ylabel='Price'>"
] ]
}, },
"execution_count": 18, "execution_count": 10,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
}, },
@ -575,7 +575,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 19, "execution_count": 11,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -595,7 +595,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 20, "execution_count": 12,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -623,7 +623,7 @@
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_7488\\3144308612.py:1: SettingWithCopyWarning: \n", "C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_11800\\3144308612.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n", "A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n", "\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
@ -638,7 +638,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 23, "execution_count": 13,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -667,7 +667,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 28, "execution_count": 14,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -687,7 +687,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 29, "execution_count": 15,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -705,16 +705,16 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 31, "execution_count": 16,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"<matplotlib.collections.PathCollection at 0x1ae7381f9d0>" "<matplotlib.collections.PathCollection at 0x227dfdc7b10>"
] ]
}, },
"execution_count": 31, "execution_count": 16,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
}, },
@ -735,16 +735,16 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 20,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"<matplotlib.collections.PathCollection at 0x1ae73ae9310>" "<matplotlib.collections.PathCollection at 0x227dfe95bd0>"
] ]
}, },
"execution_count": 35, "execution_count": 20,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
}, },
@ -760,7 +760,7 @@
} }
], ],
"source": [ "source": [
"plt.scatter(X_test, pred, color='red', label='Predicted Prices')git config --global --add safe.directory D:/AI/MachineLearning/ML-For-Beginners" "plt.scatter(X_test, pred, color='red', label='Predicted Prices')"
] ]
}, },
{ {
@ -772,18 +772,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 21,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"ename": "NameError", "name": "stdout",
"evalue": "name 'LinearRegression' is not defined", "output_type": "stream",
"output_type": "error", "text": [
"traceback": [ "Score on test set: 0.12719946902474621\n"
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mNameError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msklearn\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mpreprocessing\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m PolynomialFeatures\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msklearn\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mpipeline\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m make_pipeline\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m pipeline = make_pipeline(PolynomialFeatures(\u001b[32m2\u001b[39m), \u001b[43mLinearRegression\u001b[49m())\n\u001b[32m 6\u001b[39m pipeline.fit(X_train,y_train)\n\u001b[32m 7\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mScore on test set:\u001b[39m\u001b[33m\"\u001b[39m, pipeline.score(X_test, y_test))\n",
"\u001b[31mNameError\u001b[39m: name 'LinearRegression' is not defined"
] ]
} }
], ],
@ -795,10 +791,269 @@
"\n", "\n",
"pipeline.fit(X_train,y_train)\n", "pipeline.fit(X_train,y_train)\n",
"print(\"Score on test set:\", pipeline.score(X_test, y_test))\n", "print(\"Score on test set:\", pipeline.score(X_test, y_test))\n",
"pred = pipeline.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>FAIRYTALE</th>\n",
" <th>MINIATURE</th>\n",
" <th>MIXED HEIRLOOM VARIETIES</th>\n",
" <th>PIE TYPE</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>70</th>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>71</th>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72</th>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73</th>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1738</th>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1739</th>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1740</th>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1741</th>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1742</th>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>415 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" FAIRYTALE MINIATURE MIXED HEIRLOOM VARIETIES PIE TYPE\n",
"70 False False False True\n",
"71 False False False True\n",
"72 False False False True\n",
"73 False False False True\n",
"74 False False False True\n",
"... ... ... ... ...\n",
"1738 False True False False\n",
"1739 False True False False\n",
"1740 False True False False\n",
"1741 False True False False\n",
"1742 False True False False\n",
"\n",
"[415 rows x 4 columns]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.get_dummies(new_pumpkins['Variety'])"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"x = pd.get_dummies(new_pumpkins['Variety'])\n",
"y = new_pumpkins['Price']"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mean error: 2.23 (8.28%)\n",
"Model determination: 0.9653029999448537\n"
]
}
],
"source": [
"x = pd.get_dummies(new_pumpkins['Variety']) \\\n",
" .join(new_pumpkins['Month']) \\\n",
" .join(pd.get_dummies(new_pumpkins['City'])) \\\n",
" .join(pd.get_dummies (new_pumpkins['Package']))\n",
"y = new_pumpkins['Price']\n",
"X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)\n",
"pipeline = make_pipeline(PolynomialFeatures(2), LinearRegression())\n",
"pipeline.fit(X_train, y_train)\n",
"pred = pipeline.predict(X_test)\n", "pred = pipeline.predict(X_test)\n",
"\n", "mse = np.sqrt(mean_squared_error(y_test, pred))\n",
"#Lets start the fork" "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n",
"score = pipeline.score(X_train, y_train)\n",
"print('Model determination: ', score)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" FAIRYTALE MINIATURE MIXED HEIRLOOM VARIETIES PIE TYPE Month \\\n",
"70 False False False True 9 \n",
"71 False False False True 9 \n",
"72 False False False True 10 \n",
"73 False False False True 10 \n",
"74 False False False True 10 \n",
"... ... ... ... ... ... \n",
"1738 False True False False 9 \n",
"1739 False True False False 9 \n",
"1740 False True False False 9 \n",
"1741 False True False False 9 \n",
"1742 False True False False 9 \n",
"\n",
" ATLANTA BALTIMORE BOSTON CHICAGO COLUMBIA DETROIT NEW YORK \\\n",
"70 False True False False False False False \n",
"71 False True False False False False False \n",
"72 False True False False False False False \n",
"73 False True False False False False False \n",
"74 False True False False False False False \n",
"... ... ... ... ... ... ... ... \n",
"1738 False False False False False False False \n",
"1739 False False False False False False False \n",
"1740 False False False False False False False \n",
"1741 False False False False False False False \n",
"1742 False False False False False False False \n",
"\n",
" PHILADELPHIA SAN FRANCISCO ST. LOUIS 1 1/9 bushel cartons \\\n",
"70 False False False True \n",
"71 False False False True \n",
"72 False False False True \n",
"73 False False False True \n",
"74 False False False True \n",
"... ... ... ... ... \n",
"1738 False False True False \n",
"1739 False False True False \n",
"1740 False False True False \n",
"1741 False False True False \n",
"1742 False False True False \n",
"\n",
" 1 1/9 bushel crates 1/2 bushel cartons bushel baskets bushel cartons \n",
"70 False False False False \n",
"71 False False False False \n",
"72 False False False False \n",
"73 False False False False \n",
"74 False False False False \n",
"... ... ... ... ... \n",
"1738 False True False False \n",
"1739 False True False False \n",
"1740 False True False False \n",
"1741 False True False False \n",
"1742 False True False False \n",
"\n",
"[415 rows x 20 columns]\n"
]
}
],
"source": [
"x = pd.get_dummies(new_pumpkins['Variety']) \\\n",
" .join(new_pumpkins['Month']) \\\n",
" .join(pd.get_dummies(new_pumpkins['City'])) \\\n",
" .join(pd.get_dummies(new_pumpkins['Package']))\n",
"print(x)"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {

Loading…
Cancel
Save