From d7f03199f834efe3735808e15c001aae3e19fec5 Mon Sep 17 00:00:00 2001
From: Paskal <paskalsunari@gmail.com>
Date: Sat, 7 Jun 2025 15:23:31 +0545
Subject: [PATCH] regression

---
 2-Regression/3-Linear/notebook.ipynb | 323 ++++++++++++++++++++++++---
 1 file changed, 289 insertions(+), 34 deletions(-)

diff --git a/2-Regression/3-Linear/notebook.ipynb b/2-Regression/3-Linear/notebook.ipynb
index 68b55550..e3cd14ce 100644
--- a/2-Regression/3-Linear/notebook.ipynb
+++ b/2-Regression/3-Linear/notebook.ipynb
@@ -239,7 +239,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_7488\\2637987050.py:9: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
+      "C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_11800\\2637987050.py:9: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
       "  day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n"
      ]
     },
@@ -397,7 +397,7 @@
     {
      "data": {
       "text/plain": [
-       "<matplotlib.collections.PathCollection at 0x1ae64410440>"
+       "<matplotlib.collections.PathCollection at 0x227dc6b56a0>"
       ]
      },
      "execution_count": 4,
@@ -428,7 +428,7 @@
     {
      "data": {
       "text/plain": [
-       "<matplotlib.collections.PathCollection at 0x1ae644ea490>"
+       "<matplotlib.collections.PathCollection at 0x227de8d4190>"
       ]
      },
      "execution_count": 5,
@@ -460,7 +460,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_7488\\3995499251.py:1: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
+      "C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_11800\\3995499251.py:1: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
       "  day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n"
      ]
     }
@@ -471,7 +471,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -490,7 +490,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -514,7 +514,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -523,7 +523,7 @@
        "<Axes: xlabel='Variety'>"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     },
@@ -544,7 +544,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -553,7 +553,7 @@
        "<Axes: xlabel='DayOfYear', ylabel='Price'>"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     },
@@ -575,7 +575,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -595,7 +595,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -623,7 +623,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_7488\\3144308612.py:1: SettingWithCopyWarning: \n",
+      "C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_11800\\3144308612.py:1: SettingWithCopyWarning: \n",
       "A value is trying to be set on a copy of a slice from a DataFrame\n",
       "\n",
       "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
@@ -638,7 +638,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -667,7 +667,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -687,7 +687,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -705,16 +705,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "<matplotlib.collections.PathCollection at 0x1ae7381f9d0>"
+       "<matplotlib.collections.PathCollection at 0x227dfdc7b10>"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     },
@@ -735,16 +735,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "<matplotlib.collections.PathCollection at 0x1ae73ae9310>"
+       "<matplotlib.collections.PathCollection at 0x227dfe95bd0>"
       ]
      },
-     "execution_count": 35,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     },
@@ -760,7 +760,7 @@
     }
    ],
    "source": [
-    "plt.scatter(X_test, pred, color='red', label='Predicted Prices')git config --global --add safe.directory D:/AI/MachineLearning/ML-For-Beginners"
+    "plt.scatter(X_test, pred, color='red', label='Predicted Prices')"
    ]
   },
   {
@@ -772,18 +772,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
-     "ename": "NameError",
-     "evalue": "name 'LinearRegression' is not defined",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
-      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msklearn\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mpreprocessing\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m PolynomialFeatures\n\u001b[32m      2\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msklearn\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mpipeline\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m make_pipeline\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m pipeline = make_pipeline(PolynomialFeatures(\u001b[32m2\u001b[39m), \u001b[43mLinearRegression\u001b[49m())\n\u001b[32m      6\u001b[39m pipeline.fit(X_train,y_train)\n\u001b[32m      7\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mScore on test set:\u001b[39m\u001b[33m\"\u001b[39m, pipeline.score(X_test, y_test))\n",
-      "\u001b[31mNameError\u001b[39m: name 'LinearRegression' is not defined"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Score on test set: 0.12719946902474621\n"
      ]
     }
    ],
@@ -795,10 +791,269 @@
     "\n",
     "pipeline.fit(X_train,y_train)\n",
     "print(\"Score on test set:\", pipeline.score(X_test, y_test))\n",
+    "pred = pipeline.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>FAIRYTALE</th>\n",
+       "      <th>MINIATURE</th>\n",
+       "      <th>MIXED HEIRLOOM VARIETIES</th>\n",
+       "      <th>PIE TYPE</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>70</th>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>71</th>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72</th>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>73</th>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>74</th>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1738</th>\n",
+       "      <td>False</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1739</th>\n",
+       "      <td>False</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1740</th>\n",
+       "      <td>False</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1741</th>\n",
+       "      <td>False</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1742</th>\n",
+       "      <td>False</td>\n",
+       "      <td>True</td>\n",
+       "      <td>False</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>415 rows × 4 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      FAIRYTALE  MINIATURE  MIXED HEIRLOOM VARIETIES  PIE TYPE\n",
+       "70        False      False                     False      True\n",
+       "71        False      False                     False      True\n",
+       "72        False      False                     False      True\n",
+       "73        False      False                     False      True\n",
+       "74        False      False                     False      True\n",
+       "...         ...        ...                       ...       ...\n",
+       "1738      False       True                     False     False\n",
+       "1739      False       True                     False     False\n",
+       "1740      False       True                     False     False\n",
+       "1741      False       True                     False     False\n",
+       "1742      False       True                     False     False\n",
+       "\n",
+       "[415 rows x 4 columns]"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.get_dummies(new_pumpkins['Variety'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x = pd.get_dummies(new_pumpkins['Variety'])\n",
+    "y = new_pumpkins['Price']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Mean error: 2.23 (8.28%)\n",
+      "Model determination:  0.9653029999448537\n"
+     ]
+    }
+   ],
+   "source": [
+    "x = pd.get_dummies(new_pumpkins['Variety']) \\\n",
+    "        .join(new_pumpkins['Month']) \\\n",
+    "        .join(pd.get_dummies(new_pumpkins['City'])) \\\n",
+    "        .join(pd.get_dummies    (new_pumpkins['Package']))\n",
+    "y = new_pumpkins['Price']\n",
+    "X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)\n",
+    "pipeline = make_pipeline(PolynomialFeatures(2), LinearRegression())\n",
+    "pipeline.fit(X_train, y_train)\n",
     "pred = pipeline.predict(X_test)\n",
-    "\n",
-    "#Lets start the fork"
+    "mse = np.sqrt(mean_squared_error(y_test, pred))\n",
+    "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n",
+    "score = pipeline.score(X_train, y_train)\n",
+    "print('Model determination: ', score)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "      FAIRYTALE  MINIATURE  MIXED HEIRLOOM VARIETIES  PIE TYPE  Month  \\\n",
+      "70        False      False                     False      True      9   \n",
+      "71        False      False                     False      True      9   \n",
+      "72        False      False                     False      True     10   \n",
+      "73        False      False                     False      True     10   \n",
+      "74        False      False                     False      True     10   \n",
+      "...         ...        ...                       ...       ...    ...   \n",
+      "1738      False       True                     False     False      9   \n",
+      "1739      False       True                     False     False      9   \n",
+      "1740      False       True                     False     False      9   \n",
+      "1741      False       True                     False     False      9   \n",
+      "1742      False       True                     False     False      9   \n",
+      "\n",
+      "      ATLANTA  BALTIMORE  BOSTON  CHICAGO  COLUMBIA  DETROIT  NEW YORK  \\\n",
+      "70      False       True   False    False     False    False     False   \n",
+      "71      False       True   False    False     False    False     False   \n",
+      "72      False       True   False    False     False    False     False   \n",
+      "73      False       True   False    False     False    False     False   \n",
+      "74      False       True   False    False     False    False     False   \n",
+      "...       ...        ...     ...      ...       ...      ...       ...   \n",
+      "1738    False      False   False    False     False    False     False   \n",
+      "1739    False      False   False    False     False    False     False   \n",
+      "1740    False      False   False    False     False    False     False   \n",
+      "1741    False      False   False    False     False    False     False   \n",
+      "1742    False      False   False    False     False    False     False   \n",
+      "\n",
+      "      PHILADELPHIA  SAN FRANCISCO  ST. LOUIS  1 1/9 bushel cartons  \\\n",
+      "70           False          False      False                  True   \n",
+      "71           False          False      False                  True   \n",
+      "72           False          False      False                  True   \n",
+      "73           False          False      False                  True   \n",
+      "74           False          False      False                  True   \n",
+      "...            ...            ...        ...                   ...   \n",
+      "1738         False          False       True                 False   \n",
+      "1739         False          False       True                 False   \n",
+      "1740         False          False       True                 False   \n",
+      "1741         False          False       True                 False   \n",
+      "1742         False          False       True                 False   \n",
+      "\n",
+      "      1 1/9 bushel crates  1/2 bushel cartons  bushel baskets  bushel cartons  \n",
+      "70                  False               False           False           False  \n",
+      "71                  False               False           False           False  \n",
+      "72                  False               False           False           False  \n",
+      "73                  False               False           False           False  \n",
+      "74                  False               False           False           False  \n",
+      "...                   ...                 ...             ...             ...  \n",
+      "1738                False                True           False           False  \n",
+      "1739                False                True           False           False  \n",
+      "1740                False                True           False           False  \n",
+      "1741                False                True           False           False  \n",
+      "1742                False                True           False           False  \n",
+      "\n",
+      "[415 rows x 20 columns]\n"
+     ]
+    }
+   ],
+   "source": [
+    "x = pd.get_dummies(new_pumpkins['Variety']) \\\n",
+    "        .join(new_pumpkins['Month']) \\\n",
+    "        .join(pd.get_dummies(new_pumpkins['City'])) \\\n",
+    "        .join(pd.get_dummies(new_pumpkins['Package']))\n",
+    "print(x)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {