From d7f03199f834efe3735808e15c001aae3e19fec5 Mon Sep 17 00:00:00 2001 From: Paskal Date: Sat, 7 Jun 2025 15:23:31 +0545 Subject: [PATCH] regression --- 2-Regression/3-Linear/notebook.ipynb | 323 ++++++++++++++++++++++++--- 1 file changed, 289 insertions(+), 34 deletions(-) diff --git a/2-Regression/3-Linear/notebook.ipynb b/2-Regression/3-Linear/notebook.ipynb index 68b55550..e3cd14ce 100644 --- a/2-Regression/3-Linear/notebook.ipynb +++ b/2-Regression/3-Linear/notebook.ipynb @@ -239,7 +239,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_7488\\2637987050.py:9: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + "C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_11800\\2637987050.py:9: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n" ] }, @@ -397,7 +397,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 4, @@ -428,7 +428,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 5, @@ -460,7 +460,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_7488\\3995499251.py:1: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + "C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_11800\\3995499251.py:1: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n" ] } @@ -471,7 +471,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -490,7 +490,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -514,7 +514,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -523,7 +523,7 @@ "" ] }, - "execution_count": 15, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" }, @@ -544,7 +544,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -553,7 +553,7 @@ "" ] }, - "execution_count": 18, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" }, @@ -575,7 +575,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -595,7 +595,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -623,7 +623,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_7488\\3144308612.py:1: SettingWithCopyWarning: \n", + "C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_11800\\3144308612.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", @@ -638,7 +638,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -667,7 +667,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -687,7 +687,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -705,16 +705,16 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 31, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" }, @@ -735,16 +735,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 35, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" }, @@ -760,7 +760,7 @@ } ], "source": [ - "plt.scatter(X_test, pred, color='red', label='Predicted Prices')git config --global --add safe.directory D:/AI/MachineLearning/ML-For-Beginners" + "plt.scatter(X_test, pred, color='red', label='Predicted Prices')" ] }, { @@ -772,18 +772,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, "outputs": [ { - "ename": "NameError", - "evalue": "name 'LinearRegression' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mNameError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msklearn\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mpreprocessing\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m PolynomialFeatures\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msklearn\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mpipeline\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m make_pipeline\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m pipeline = make_pipeline(PolynomialFeatures(\u001b[32m2\u001b[39m), \u001b[43mLinearRegression\u001b[49m())\n\u001b[32m 6\u001b[39m pipeline.fit(X_train,y_train)\n\u001b[32m 7\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mScore on test set:\u001b[39m\u001b[33m\"\u001b[39m, pipeline.score(X_test, y_test))\n", - "\u001b[31mNameError\u001b[39m: name 'LinearRegression' is not defined" + "name": "stdout", + "output_type": "stream", + "text": [ + "Score on test set: 0.12719946902474621\n" ] } ], @@ -795,10 +791,269 @@ "\n", "pipeline.fit(X_train,y_train)\n", "print(\"Score on test set:\", pipeline.score(X_test, y_test))\n", + "pred = pipeline.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FAIRYTALEMINIATUREMIXED HEIRLOOM VARIETIESPIE TYPE
70FalseFalseFalseTrue
71FalseFalseFalseTrue
72FalseFalseFalseTrue
73FalseFalseFalseTrue
74FalseFalseFalseTrue
...............
1738FalseTrueFalseFalse
1739FalseTrueFalseFalse
1740FalseTrueFalseFalse
1741FalseTrueFalseFalse
1742FalseTrueFalseFalse
\n", + "

415 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " FAIRYTALE MINIATURE MIXED HEIRLOOM VARIETIES PIE TYPE\n", + "70 False False False True\n", + "71 False False False True\n", + "72 False False False True\n", + "73 False False False True\n", + "74 False False False True\n", + "... ... ... ... ...\n", + "1738 False True False False\n", + "1739 False True False False\n", + "1740 False True False False\n", + "1741 False True False False\n", + "1742 False True False False\n", + "\n", + "[415 rows x 4 columns]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.get_dummies(new_pumpkins['Variety'])" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "x = pd.get_dummies(new_pumpkins['Variety'])\n", + "y = new_pumpkins['Price']" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean error: 2.23 (8.28%)\n", + "Model determination: 0.9653029999448537\n" + ] + } + ], + "source": [ + "x = pd.get_dummies(new_pumpkins['Variety']) \\\n", + " .join(new_pumpkins['Month']) \\\n", + " .join(pd.get_dummies(new_pumpkins['City'])) \\\n", + " .join(pd.get_dummies (new_pumpkins['Package']))\n", + "y = new_pumpkins['Price']\n", + "X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)\n", + "pipeline = make_pipeline(PolynomialFeatures(2), LinearRegression())\n", + "pipeline.fit(X_train, y_train)\n", "pred = pipeline.predict(X_test)\n", - "\n", - "#Lets start the fork" + "mse = np.sqrt(mean_squared_error(y_test, pred))\n", + "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n", + "score = pipeline.score(X_train, y_train)\n", + "print('Model determination: ', score)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " FAIRYTALE MINIATURE MIXED HEIRLOOM VARIETIES PIE TYPE Month \\\n", + "70 False False False True 9 \n", + "71 False False False True 9 \n", + "72 False False False True 10 \n", + "73 False False False True 10 \n", + "74 False False False True 10 \n", + "... ... ... ... ... ... \n", + "1738 False True False False 9 \n", + "1739 False True False False 9 \n", + "1740 False True False False 9 \n", + "1741 False True False False 9 \n", + "1742 False True False False 9 \n", + "\n", + " ATLANTA BALTIMORE BOSTON CHICAGO COLUMBIA DETROIT NEW YORK \\\n", + "70 False True False False False False False \n", + "71 False True False False False False False \n", + "72 False True False False False False False \n", + "73 False True False False False False False \n", + "74 False True False False False False False \n", + "... ... ... ... ... ... ... ... \n", + "1738 False False False False False False False \n", + "1739 False False False False False False False \n", + "1740 False False False False False False False \n", + "1741 False False False False False False False \n", + "1742 False False False False False False False \n", + "\n", + " PHILADELPHIA SAN FRANCISCO ST. LOUIS 1 1/9 bushel cartons \\\n", + "70 False False False True \n", + "71 False False False True \n", + "72 False False False True \n", + "73 False False False True \n", + "74 False False False True \n", + "... ... ... ... ... \n", + "1738 False False True False \n", + "1739 False False True False \n", + "1740 False False True False \n", + "1741 False False True False \n", + "1742 False False True False \n", + "\n", + " 1 1/9 bushel crates 1/2 bushel cartons bushel baskets bushel cartons \n", + "70 False False False False \n", + "71 False False False False \n", + "72 False False False False \n", + "73 False False False False \n", + "74 False False False False \n", + "... ... ... ... ... \n", + "1738 False True False False \n", + "1739 False True False False \n", + "1740 False True False False \n", + "1741 False True False False \n", + "1742 False True False False \n", + "\n", + "[415 rows x 20 columns]\n" + ] + } + ], + "source": [ + "x = pd.get_dummies(new_pumpkins['Variety']) \\\n", + " .join(new_pumpkins['Month']) \\\n", + " .join(pd.get_dummies(new_pumpkins['City'])) \\\n", + " .join(pd.get_dummies(new_pumpkins['Package']))\n", + "print(x)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {