Merge pull request #8 from PaskalSunari/ML-Patch-2

regression
4 months ago · 70b556c60c
parent 328e041d57 d7f03199f8
commit 70b556c60c
1 changed files with 289 additions and 34 deletions
--- a/2-Regression/3-Linear/notebook.ipynb
+++ b/2-Regression/3-Linear/notebook.ipynb
@ -239,7 +239,7 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_7488\\2637987050.py:9: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
+      "C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_11800\\2637987050.py:9: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
      "  day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n"
     ]
    },
@ -397,7 +397,7 @@
    {
     "data": {
      "text/plain": [
-       "<matplotlib.collections.PathCollection at 0x1ae64410440>"
+       "<matplotlib.collections.PathCollection at 0x227dc6b56a0>"
      ]
     },
     "execution_count": 4,
@ -428,7 +428,7 @@
    {
     "data": {
      "text/plain": [
-       "<matplotlib.collections.PathCollection at 0x1ae644ea490>"
+       "<matplotlib.collections.PathCollection at 0x227de8d4190>"
      ]
     },
     "execution_count": 5,
@ -460,7 +460,7 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_7488\\3995499251.py:1: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
+      "C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_11800\\3995499251.py:1: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
      "  day_of_year = pd.to_datetime(pumpkins['Date']).apply(lambda dt: (dt-datetime(dt.year,1,1)).days)\n"
     ]
    }
@ -471,7 +471,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
@ -490,7 +490,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
@ -514,7 +514,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
@ -523,7 +523,7 @@
       "<Axes: xlabel='Variety'>"
      ]
     },
-     "execution_count": 15,
+     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    },
@ -544,7 +544,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
@ -553,7 +553,7 @@
       "<Axes: xlabel='DayOfYear', ylabel='Price'>"
      ]
     },
-     "execution_count": 18,
+     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    },
@ -575,7 +575,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
@ -595,7 +595,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
@ -623,7 +623,7 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_7488\\3144308612.py:1: SettingWithCopyWarning: \n",
+      "C:\\Users\\Paskal Sunari\\AppData\\Local\\Temp\\ipykernel_11800\\3144308612.py:1: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
@ -638,7 +638,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
@ -667,7 +667,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
@ -687,7 +687,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
@ -705,16 +705,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "<matplotlib.collections.PathCollection at 0x1ae7381f9d0>"
+       "<matplotlib.collections.PathCollection at 0x227dfdc7b10>"
      ]
     },
-     "execution_count": 31,
+     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    },
@ -735,16 +735,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "<matplotlib.collections.PathCollection at 0x1ae73ae9310>"
+       "<matplotlib.collections.PathCollection at 0x227dfe95bd0>"
      ]
     },
-     "execution_count": 35,
+     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    },
@ -760,7 +760,7 @@
    }
   ],
   "source": [
-    "plt.scatter(X_test, pred, color='red', label='Predicted Prices')git config --global --add safe.directory D:/AI/MachineLearning/ML-For-Beginners"
+    "plt.scatter(X_test, pred, color='red', label='Predicted Prices')"
   ]
  },
  {
@ -772,18 +772,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
-     "ename": "NameError",
+     "name": "stdout",
-     "evalue": "name 'LinearRegression' is not defined",
+     "output_type": "stream",
-     "output_type": "error",
+     "text": [
-     "traceback": [
+      "Score on test set: 0.12719946902474621\n"
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mNameError\u001b[39m                                 Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msklearn\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mpreprocessing\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m PolynomialFeatures\n\u001b[32m      2\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msklearn\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mpipeline\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m make_pipeline\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m pipeline = make_pipeline(PolynomialFeatures(\u001b[32m2\u001b[39m), \u001b[43mLinearRegression\u001b[49m())\n\u001b[32m      6\u001b[39m pipeline.fit(X_train,y_train)\n\u001b[32m      7\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mScore on test set:\u001b[39m\u001b[33m\"\u001b[39m, pipeline.score(X_test, y_test))\n",
      "\u001b[31mNameError\u001b[39m: name 'LinearRegression' is not defined"
     ]
    }
   ],
@ -795,10 +791,269 @@
    "\n",
    "pipeline.fit(X_train,y_train)\n",
    "print(\"Score on test set:\", pipeline.score(X_test, y_test))\n",
    "pred = pipeline.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>FAIRYTALE</th>\n",
       "      <th>MINIATURE</th>\n",
       "      <th>MIXED HEIRLOOM VARIETIES</th>\n",
       "      <th>PIE TYPE</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>70</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>74</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1738</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1739</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1740</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1741</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1742</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>415 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      FAIRYTALE  MINIATURE  MIXED HEIRLOOM VARIETIES  PIE TYPE\n",
       "70        False      False                     False      True\n",
       "71        False      False                     False      True\n",
       "72        False      False                     False      True\n",
       "73        False      False                     False      True\n",
       "74        False      False                     False      True\n",
       "...         ...        ...                       ...       ...\n",
       "1738      False       True                     False     False\n",
       "1739      False       True                     False     False\n",
       "1740      False       True                     False     False\n",
       "1741      False       True                     False     False\n",
       "1742      False       True                     False     False\n",
       "\n",
       "[415 rows x 4 columns]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.get_dummies(new_pumpkins['Variety'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = pd.get_dummies(new_pumpkins['Variety'])\n",
    "y = new_pumpkins['Price']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mean error: 2.23 (8.28%)\n",
      "Model determination:  0.9653029999448537\n"
     ]
    }
   ],
   "source": [
    "x = pd.get_dummies(new_pumpkins['Variety']) \\\n",
    "        .join(new_pumpkins['Month']) \\\n",
    "        .join(pd.get_dummies(new_pumpkins['City'])) \\\n",
    "        .join(pd.get_dummies    (new_pumpkins['Package']))\n",
    "y = new_pumpkins['Price']\n",
    "X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)\n",
    "pipeline = make_pipeline(PolynomialFeatures(2), LinearRegression())\n",
    "pipeline.fit(X_train, y_train)\n",
    "pred = pipeline.predict(X_test)\n",
-    "\n",
+    "mse = np.sqrt(mean_squared_error(y_test, pred))\n",
-    "#Lets start the fork"
+    "print(f'Mean error: {mse:3.3} ({mse/np.mean(pred)*100:3.3}%)')\n",
    "score = pipeline.score(X_train, y_train)\n",
    "print('Model determination: ', score)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "      FAIRYTALE  MINIATURE  MIXED HEIRLOOM VARIETIES  PIE TYPE  Month  \\\n",
      "70        False      False                     False      True      9   \n",
      "71        False      False                     False      True      9   \n",
      "72        False      False                     False      True     10   \n",
      "73        False      False                     False      True     10   \n",
      "74        False      False                     False      True     10   \n",
      "...         ...        ...                       ...       ...    ...   \n",
      "1738      False       True                     False     False      9   \n",
      "1739      False       True                     False     False      9   \n",
      "1740      False       True                     False     False      9   \n",
      "1741      False       True                     False     False      9   \n",
      "1742      False       True                     False     False      9   \n",
      "\n",
      "      ATLANTA  BALTIMORE  BOSTON  CHICAGO  COLUMBIA  DETROIT  NEW YORK  \\\n",
      "70      False       True   False    False     False    False     False   \n",
      "71      False       True   False    False     False    False     False   \n",
      "72      False       True   False    False     False    False     False   \n",
      "73      False       True   False    False     False    False     False   \n",
      "74      False       True   False    False     False    False     False   \n",
      "...       ...        ...     ...      ...       ...      ...       ...   \n",
      "1738    False      False   False    False     False    False     False   \n",
      "1739    False      False   False    False     False    False     False   \n",
      "1740    False      False   False    False     False    False     False   \n",
      "1741    False      False   False    False     False    False     False   \n",
      "1742    False      False   False    False     False    False     False   \n",
      "\n",
      "      PHILADELPHIA  SAN FRANCISCO  ST. LOUIS  1 1/9 bushel cartons  \\\n",
      "70           False          False      False                  True   \n",
      "71           False          False      False                  True   \n",
      "72           False          False      False                  True   \n",
      "73           False          False      False                  True   \n",
      "74           False          False      False                  True   \n",
      "...            ...            ...        ...                   ...   \n",
      "1738         False          False       True                 False   \n",
      "1739         False          False       True                 False   \n",
      "1740         False          False       True                 False   \n",
      "1741         False          False       True                 False   \n",
      "1742         False          False       True                 False   \n",
      "\n",
      "      1 1/9 bushel crates  1/2 bushel cartons  bushel baskets  bushel cartons  \n",
      "70                  False               False           False           False  \n",
      "71                  False               False           False           False  \n",
      "72                  False               False           False           False  \n",
      "73                  False               False           False           False  \n",
      "74                  False               False           False           False  \n",
      "...                   ...                 ...             ...             ...  \n",
      "1738                False                True           False           False  \n",
      "1739                False                True           False           False  \n",
      "1740                False                True           False           False  \n",
      "1741                False                True           False           False  \n",
      "1742                False                True           False           False  \n",
      "\n",
      "[415 rows x 20 columns]\n"
     ]
    }
   ],
   "source": [
    "x = pd.get_dummies(new_pumpkins['Variety']) \\\n",
    "        .join(new_pumpkins['Month']) \\\n",
    "        .join(pd.get_dummies(new_pumpkins['City'])) \\\n",
    "        .join(pd.get_dummies(new_pumpkins['Package']))\n",
    "print(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {