diff --git a/2-Regression/3-Linear/README.md b/2-Regression/3-Linear/README.md
index 63596de4..d213a7b4 100644
--- a/2-Regression/3-Linear/README.md
+++ b/2-Regression/3-Linear/README.md
@@ -103,7 +103,14 @@ This suggests that there should be some correlation, and we can try training lin
-It looks like there are different clusters of prices corresponding to different pumpkin varieties. To confirm this hypothesis, let's plot each pumpkin category using a different color. By passing an `ax` parameter to the `scatter` plotting function we can plot all points on the same graph:
+Let's see if there is a correlation using the `corr` function:
+
+```python
+print(new_pumpkins['Month'].corr(new_pumpkins['Price']))
+print(new_pumpkins['DayOfYear'].corr(new_pumpkins['Price']))
+```
+
+It looks like the correlation is pretty small, -0.15 by `Month` and -0.17 by the `DayOfMonth`, but there could be another important relationship. It looks like there are different clusters of prices corresponding to different pumpkin varieties. To confirm this hypothesis, let's plot each pumpkin category using a different color. By passing an `ax` parameter to the `scatter` plotting function we can plot all points on the same graph:
```python
ax=None
@@ -115,7 +122,15 @@ for i,var in enumerate(new_pumpkins['Variety'].unique()):
-Our investigation suggests that variety has more effect on the overall price than the actual selling date. So let us focus for the moment only on one pumpkin variety, and see what effect the date has on the price:
+Our investigation suggests that variety has more effect on the overall price than the actual selling date. We can see this with a bar graph:
+
+```python
+new_pumpkins.groupby('Variety')['Price'].mean().plot(kind='bar')
+```
+
+
+
+Let us focus for the moment only on one pumpkin variety, the 'pie type', and see what effect the date has on the price:
```python
pie_pumpkins = new_pumpkins[new_pumpkins['Variety']=='PIE TYPE']
diff --git a/2-Regression/3-Linear/notebook.ipynb b/2-Regression/3-Linear/notebook.ipynb
index 2da56e5b..b01f1ee8 100644
--- a/2-Regression/3-Linear/notebook.ipynb
+++ b/2-Regression/3-Linear/notebook.ipynb
@@ -16,209 +16,9 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- "
\n",
- "
\n",
- "
City Name
\n",
- "
Type
\n",
- "
Package
\n",
- "
Variety
\n",
- "
Sub Variety
\n",
- "
Grade
\n",
- "
Date
\n",
- "
Low Price
\n",
- "
High Price
\n",
- "
Mostly Low
\n",
- "
...
\n",
- "
Unit of Sale
\n",
- "
Quality
\n",
- "
Condition
\n",
- "
Appearance
\n",
- "
Storage
\n",
- "
Crop
\n",
- "
Repack
\n",
- "
Trans Mode
\n",
- "
Unnamed: 24
\n",
- "
Unnamed: 25
\n",
- "
\n",
- " \n",
- " \n",
- "
\n",
- "
0
\n",
- "
BALTIMORE
\n",
- "
NaN
\n",
- "
24 inch bins
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
4/29/17
\n",
- "
270.0
\n",
- "
280.0
\n",
- "
270.0
\n",
- "
...
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
E
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
\n",
- "
\n",
- "
1
\n",
- "
BALTIMORE
\n",
- "
NaN
\n",
- "
24 inch bins
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
5/6/17
\n",
- "
270.0
\n",
- "
280.0
\n",
- "
270.0
\n",
- "
...
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
E
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
\n",
- "
\n",
- "
2
\n",
- "
BALTIMORE
\n",
- "
NaN
\n",
- "
24 inch bins
\n",
- "
HOWDEN TYPE
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
9/24/16
\n",
- "
160.0
\n",
- "
160.0
\n",
- "
160.0
\n",
- "
...
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
N
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
\n",
- "
\n",
- "
3
\n",
- "
BALTIMORE
\n",
- "
NaN
\n",
- "
24 inch bins
\n",
- "
HOWDEN TYPE
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
9/24/16
\n",
- "
160.0
\n",
- "
160.0
\n",
- "
160.0
\n",
- "
...
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
N
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
\n",
- "
\n",
- "
4
\n",
- "
BALTIMORE
\n",
- "
NaN
\n",
- "
24 inch bins
\n",
- "
HOWDEN TYPE
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
11/5/16
\n",
- "
90.0
\n",
- "
100.0
\n",
- "
90.0
\n",
- "
...
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
N
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
NaN
\n",
- "
\n",
- " \n",
- "
\n",
- "
5 rows × 26 columns
\n",
- "
"
- ],
- "text/plain": [
- " City Name Type Package Variety Sub Variety Grade Date \\\n",
- "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \n",
- "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n",
- "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n",
- "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n",
- "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n",
- "\n",
- " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \\\n",
- "0 270.0 280.0 270.0 ... NaN NaN NaN \n",
- "1 270.0 280.0 270.0 ... NaN NaN NaN \n",
- "2 160.0 160.0 160.0 ... NaN NaN NaN \n",
- "3 160.0 160.0 160.0 ... NaN NaN NaN \n",
- "4 90.0 100.0 90.0 ... NaN NaN NaN \n",
- "\n",
- " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n",
- "0 NaN NaN NaN E NaN NaN NaN \n",
- "1 NaN NaN NaN E NaN NaN NaN \n",
- "2 NaN NaN NaN N NaN NaN NaN \n",
- "3 NaN NaN NaN N NaN NaN NaN \n",
- "4 NaN NaN NaN N NaN NaN NaN \n",
- "\n",
- "[5 rows x 26 columns]"
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
@@ -232,115 +32,9 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "