{ "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3-final" }, "orig_nbformat": 2, "kernelspec": { "name": "python3", "display_name": "Python 3", "language": "python" } }, "nbformat": 4, "nbformat_minor": 2, "cells": [ { "source": [ "## Pumpkin Pricing Per Bushel, by City\n", "\n", "Load up required libraries and dataset. Convert the data to a dataframe containing a subset of the data: \n", "\n", "- Only get pumpkins priced by the bushel\n", "- Convert the date to a month\n", "- Calculate the price to be an average of high and low prices\n", "- Convert the price to reflect the pricing by bushel quantity" ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " City Name Type Package Variety Sub Variety Grade Date \\\n", "0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \n", "1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n", "2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", "3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n", "4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n", "\n", " Low Price High Price Mostly Low ... Unit of Sale Quality Condition \\\n", "0 270.0 280.0 270.0 ... NaN NaN NaN \n", "1 270.0 280.0 270.0 ... NaN NaN NaN \n", "2 160.0 160.0 160.0 ... NaN NaN NaN \n", "3 160.0 160.0 160.0 ... NaN NaN NaN \n", "4 90.0 100.0 90.0 ... NaN NaN NaN \n", "\n", " Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n", "0 NaN NaN NaN E NaN NaN NaN \n", "1 NaN NaN NaN E NaN NaN NaN \n", "2 NaN NaN NaN N NaN NaN NaN \n", "3 NaN NaN NaN N NaN NaN NaN \n", "4 NaN NaN NaN N NaN NaN NaN \n", "\n", "[5 rows x 26 columns]" ], "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
City NameTypePackageVarietySub VarietyGradeDateLow PriceHigh PriceMostly Low...Unit of SaleQualityConditionAppearanceStorageCropRepackTrans ModeUnnamed: 24Unnamed: 25
0BALTIMORENaN24 inch binsNaNNaNNaN4/29/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
1BALTIMORENaN24 inch binsNaNNaNNaN5/6/17270.0280.0270.0...NaNNaNNaNNaNNaNNaNENaNNaNNaN
2BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
3BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN9/24/16160.0160.0160.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
4BALTIMORENaN24 inch binsHOWDEN TYPENaNNaN11/5/1690.0100.090.0...NaNNaNNaNNaNNaNNaNNNaNNaNNaN
\n

5 rows × 26 columns

\n
" }, "metadata": {}, "execution_count": 18 } ], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", "pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n", "\n", "pumpkins.head()\n" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Month Variety City Package Low Price High Price Price\n", "70 1 3 1 0 5 3 13.636364\n", "71 1 3 1 0 10 7 16.363636\n", "72 2 3 1 0 10 7 16.363636\n", "73 2 3 1 0 9 6 15.454545\n", "74 2 3 1 0 5 3 13.636364" ], "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
MonthVarietyCityPackageLow PriceHigh PricePrice
7013105313.636364
71131010716.363636
72231010716.363636
7323109615.454545
7423105313.636364
\n
" }, "metadata": {}, "execution_count": 19 } ], "source": [ "from sklearn.preprocessing import LabelEncoder\n", "\n", "pumpkins = pumpkins[pumpkins['Package'].str.contains('bushel', case=True, regex=True)]\n", "\n", "new_columns = ['Package', 'Variety', 'City Name', 'Month', 'Low Price', 'High Price', 'Date']\n", "\n", "pumpkins = pumpkins.drop([c for c in pumpkins.columns if c not in new_columns], axis=1)\n", "\n", "price = (pumpkins['Low Price'] + pumpkins['High Price']) / 2\n", "\n", "month = pd.DatetimeIndex(pumpkins['Date']).month\n", "\n", "new_pumpkins = pd.DataFrame({'Month': month, 'Variety': pumpkins['Variety'], 'City': pumpkins['City Name'], 'Package': pumpkins['Package'], 'Low Price': pumpkins['Low Price'],'High Price': pumpkins['High Price'], 'Price': price})\n", "\n", "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1 1/9'), 'Price'] = price/1.1\n", "\n", "new_pumpkins.loc[new_pumpkins['Package'].str.contains('1/2'), 'Price'] = price*2\n", "\n", "new_pumpkins.iloc[:, 0:-1] = new_pumpkins.iloc[:, 0:-1].apply(LabelEncoder().fit_transform)\n", "new_pumpkins.iloc[:, 0:-1] = new_pumpkins.iloc[:, 0:-1].apply(LabelEncoder().fit_transform)\n", "\n", "\n", "new_pumpkins.head()\n" ] }, { "source": [ "A basic scatterplot reminds us that we only have month data from August through December. We probably need more data to be able to draw conclusions in a linear fashion." ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "" ] }, "metadata": {}, "execution_count": 20 }, { "output_type": "display_data", "data": { "text/plain": "
", "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD4CAYAAAD1jb0+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAULUlEQVR4nO3dcYyU9Z3H8c+H7SobtbdyLNy6QOkRQq9X2qWZIA3JhdZ6GDR1NedVUj1yZ4p3qYmNDT2ozam5NpqjVnPJxQtWU+7q2TMpRWNpOUI1TU1LuygCHnLUhlpxA1s9qva2Fpfv/TEPdBlmmefZnWfneXbfr2Qy83znGeb7+AsfHp95nufniBAAoHymtboBAMDYEOAAUFIEOACUFAEOACVFgANASb1rIr9s5syZMX/+/In8SgAovd27d/8qIrpq6xMa4PPnz1d/f/9EfiUAlJ7tX9SrcwgFAEqKAAeAkiLAAaCkCHAAKCkCHABKakLPQsHksfW5I9q4/aBePT6kSzo7tG7lIvUt6Wl1W8CUQoAjs63PHdGGLfs0dGJYknTk+JA2bNknSYQ4MIE4hILMNm4/eDq8Txk6MayN2w+2qCNgaiLAkdmrx4cy1QHkgwBHZpd0dmSqA8gHAY7M1q1cpPZpPqPWPs1at3JRizoCpiYCHGPjBssAcpcqwG0ftr3P9h7b/Ulthu0dtg8lzxfn2yqKYuP2gzoxfOZcqieGgx8xgQmWZQ/8oxHRGxGVZHm9pJ0RsVDSzmQZUwA/YgLFMJ5DKFdL2py83iypb/ztoAz4ERMohrQBHpL+y/Zu22uT2uyIGJCk5HlWHg2ieNatXKSO9rYzah3tbfyICUywtFdiLo+IV23PkrTD9otpvyAJ/LWSNG/evDG0iKI5dbUll9IDreWIaLzWyA/Yd0p6S9KnJa2IiAHb3ZKejohz7oJVKpVgRh4AyMb27hG/P57W8BCK7QtsX3TqtaQ/l7Rf0hOS1iSrrZH0ePPaBQA0kuYQymxJ37Z9av3/iIjv2f6ppMds3yTpZUnX5dcmAKBWwwCPiJ9L+lCd+muSLsujKQBAY1yJCQAlxf3AMSafevBHeual108vL18wQ498+iMt7AiYetgDR2a14S1Jz7z0uj714I9a1BEwNRHgyKw2vBvVAeSDAAeAkiLAAaCkCHBktnzBjEx1APkgwJHZdZV5qpmQR9NcrQOYOAQ4Mtu4/aBO1txC52SICR2ACUaAIzMmdACKgQBHZm21x08a1AHkgwBHZu/UHj9pUAeQDwIcAEqKAAeAkiLAAaCkCHBk1uZRfsQcpQ4gH6kD3Hab7edsP5ks32n7iO09yWNVfm2iSFZfOjdTHUA+suyB3yrpQE3tvojoTR7bmtgXCqzynhl1r8SsvIdL6YGJlCrAbc+RdKWkr+XbDsqAKzGBYki7B36/pM9LOllTv8X2XtsP27643gdtr7Xdb7t/cHBwPL2iILgSEyiGhgFu+ypJxyJid81bD0haIKlX0oCke+t9PiI2RUQlIipdXV3j7RcFcElnR6Y6gHyk2QNfLukTtg9L+qakj9n+RkQcjYjhiDgp6UFJS3PsEwWybuUidbS3nVHraG/TupWLWtQRMDU1DPCI2BARcyJivqTrJX0/Im6w3T1itWsk7c+pRxRM35Ie3X3tYvV0dsiSejo7dPe1i9W3pKfVrQFTynhmpf8n272SQtJhSTc3pSOUQt+SHgIbaLFMAR4RT0t6Onl9Yw79AABS4kpMACgpAhwASooAB4CSIsABoKQIcAAoKQIcAEpqPOeBYwqbv/47Z9UO33NlCzoBpi72wJFZvfA+Vx1APghwACgpAhwASooAB4CSIsABoKQIcGQ22tkmnIUCTCxOI8SYENZA67EHDgAllXoP3HabpH5JRyLiKtszJP2npPmqTujwlxHxv3k0ieLhQh6g9bLsgd8q6cCI5fWSdkbEQkk7k2VMAVzIAxRDqgC3PUfSlZK+NqJ8taTNyevNkvqa2xoA4FzS7oHfL+nzkk6OqM2OiAFJSp5n1fug7bW2+233Dw4OjqtZAMDvNQxw21dJOhYRu8fyBRGxKSIqEVHp6uoayx8BAKgjzY+YyyV9wvYqSdMlvdv2NyQdtd0dEQO2uyUdy7NRAMCZGu6BR8SGiJgTEfMlXS/p+xFxg6QnJK1JVlsj6fHcukShcCEPUAzjuZDnHkmP2b5J0suSrmtOSygDwhpovUwBHhFPS3o6ef2apMua3xIAIA2uxASAkuJeKBgTrsQEWo89cGTGlZhAMRDgAFBSBDgAlBQBDgAlRYADQEkR4MiMKzGBYiDAMSb3f7JXPZ0dsqSezg7d/8neVrcETDmcB47Mtj53RBu27NPQiWFJ0pHjQ9qwZZ8kqW9JTytbA6YU9sCR2cbtB0+H9ylDJ4a1cfvBFnUETE0EODJ79fhQpjqAfBDgyCwy1gHkgwAHgJIiwAGgpNLMiTnd9k9sP2/7Bdt3JfU7bR+xvSd5rMq/XQDAKWlOI3xb0sci4i3b7ZJ+aPu7yXv3RcRX8msPADCahgEeESHprWSxPXnwexUAtFiqY+C222zvUXXm+R0RsSt56xbbe20/bPvi3LoEAJwlVYBHxHBE9EqaI2mp7Q9IekDSAkm9kgYk3Vvvs7bX2u633T84ONiktgEAmc5CiYjjqk5qfEVEHE2C/aSkByUtHeUzmyKiEhGVrq6ucTcMAKhKcxZKl+3O5HWHpI9LetF294jVrpG0P58WAQD1pDkLpVvSZtttqgb+YxHxpO1/t92r6g+ahyXdnF+bAIBaac5C2StpSZ36jbl0BABIhSsxAaCkCHAAKCkCHABKigAHgJIiwAGgpAhwACgpAhwASooAR2bLF8zIVAeQDwIcmT3z0uuZ6gDyQYADQEkR4ABQUmluZtVS77t9m347/PsJgKa3WS9+mek3W+n8d03T2++crFsHMHEK/TeuNrwl6bfDoffdvq1FHUFS3fA+Vx1APgod4LXh3agOAFNJoQMcADA6AhwASirNlGrTbf/E9vO2X7B9V1KfYXuH7UPJc9NnpZ/e5kx1AJhK0uyBvy3pYxHxIVVnoL/C9jJJ6yXtjIiFknYmy0314pdXnRXWnIUCAFVpplQLSW8li+3JIyRdLWlFUt+s6mz1f9/sBglrAKgv1TFw222290g6JmlHROySNDsiBiQpeZ41ymfX2u633T84ONisvgFgyksV4BExHBG9kuZIWmr7A2m/ICI2RUQlIipdXV1j7RMAUCPTWSgRcVzVQyVXSDpqu1uSkudjTe8OADCqNGehdNnuTF53SPq4pBclPSFpTbLaGkmP59UkiuXwPVdmqgPIR5o98G5JT9neK+mnqh4Df1LSPZIut31I0uXJMqaAL27dl6kOIB9pzkLZK2lJnfprki7LoykU26O7fjlq/Ut9iye4G2Dq4kpMZDYc9e9FM1odQD4IcGTW5vpXwo5WB5APAhyZsQcOFAMBDgAlRYADQEkR4ABQUgQ4AJQUAY7MOAsFKAYCHJmtvnRupjqAfDS8EhOodepqy0d3/VLDEWqztfrSuVyFCUwwxwSeu1upVKK/v3/Cvg8AJgPbuyOiUlvnEAoAlBSHUDAmX9y6j0MoQIsR4Mjsi1v36Rs/fvn08nDE6WVCHJg4HEJBZue6nSyAiZNmRp65tp+yfcD2C7ZvTep32j5ie0/yYPr4KYKbWQHFkOYQyjuSPhcRz9q+SNJu2zuS9+6LiK/k1x6KaJqlk3WyehrX8QATKs2MPAOSBpLXb9o+IKkn78ZQXOe/a5qGTpysWwcwcTL9jbM9X9Xp1XYlpVts77X9sO2LR/nMWtv9tvsHBwfH1SyK4bd1wvtcdQD5SB3gti+U9C1Jn42INyQ9IGmBpF5V99Dvrfe5iNgUEZWIqHR1dTWhZbTaJZ0dmeoA8pEqwG23qxrej0TEFkmKiKMRMRwRJyU9KGlpfm2iSNatXJSpDiAfac5CsaSHJB2IiK+OqHePWO0aSfub3x6K6F+eOpSpDiAfac5CWS7pRkn7bO9Jal+QtNp2r6SQdFjSzbl0iMI5dOw3meoA8pHmLJQfSqp3gti25rcDAEiL874AoKQIcGS2cNYFmeoA8kGAI7Mdt604K6wXzrpAO25b0ZqGgCmKuxFiTAhroPXYAweAkiLAAaCkOIQCADnKc/YqAhwAcpL37FUcQgGAnOQ9exUBDgA5yXv2KgIcAHLS5vrTVI1Wz4oAB4CcrL50bqZ6VvyICQA5OfVDZV5nobAHDgA5euTHL58+5j0coUdGnJUyXgQ4AOTkveu/o9qfKyOpN0OaGXnm2n7K9gHbL9i+NanPsL3D9qHkue6kxgAwVY12rklzzkFJtwf+jqTPRcSfSFom6TO23y9pvaSdEbFQ0s5kGQAwQRoGeEQMRMSzyes3JR2Q1CPpakmbk9U2S+rLq0kAwNkyHQO3PV/SEkm7JM2OiAGpGvKSZjW7OQDA6FIHuO0LJX1L0mcj4o0Mn1tru992/+Dg4Fh6BADUkSrAbberGt6PRMSWpHzUdnfyfrekY/U+GxGbIqISEZWurq5m9AwAULqzUCzpIUkHIuKrI956QtKa5PUaSY83vz0AKK/lC2ZkqmeV5krM5ZJulLTP9p6k9gVJ90h6zPZNkl6WdF1TOgKASeKZl17PVM+qYYBHxA8ljXbnlcua0gUAIDPuhQJMEnnO/IJiIsCBSSDvmV9QTNwLBZgE8p75BcVEgAOTQN4zv6CYCHBgEsh75hcUEwEOTAJ5z/yCsXn3+W2Z6lkR4MAk8KW+xbph2bzTe9xttm5YNo8fMFts9h9Mz1TPirNQgEniS32LCeyCOXTsN5nqWbEHDgAlxR44xuSDd3xPb7w9fHr53ee3ae9dV7SwI2DqYQ8cmdWGtyS98fawPnjH91rUETA1EeDIrDa8G9UB5IMAB4CSIsABoKQIcGQ2va3+1X2j1QHkgwBHZn94Uf2LEEarA8hHminVHrZ9zPb+EbU7bR+xvSd5rMq3TRTJq8eHMtUB5CPNHvjXJdU7wfe+iOhNHtua2xaK7JLOjkx1APloGOAR8QNJzZnADZPCupWL1NF+5s14OtrbtG7lohZ1BExN4zkGfovtvckhlotHW8n2Wtv9tvsHBwfH8XUoir4lPbr72sXq6eyQJfV0dujuaxerb0lPq1sDppSxXkr/gKR/lBTJ872S/qbeihGxSdImSapUKtxdfpLoW9JDYAMtNqY98Ig4GhHDEXFS0oOSlja3LQBAI2MKcNvdIxavkbR/tHUBAPloeAjF9qOSVkiaafsVSXdIWmG7V9VDKIcl3ZxjjwCAOhoGeESsrlN+KIdeAAAZcCUmAJQUAQ4AJUWAA0BJEeAAUFIEOADkZOGsCzLVsyLAASAnO25bcVZYL5x1gXbctqIpfz4BDgA5+sxHF55x36DPfHRh0/7ssd4LBQDQwNbnjmjDln0aOlGd8PvI8SFt2LJPkppyLyH2wAEgJxu3Hzwd3qcMnRjWxu0Hm/LnE+AAkJO8Z68iwAEgJ3nPXkWAA0BORpulqlmzVxHgAJCTu7f9d6Z6VgQ4AOTk6Ju/y1TPigAHgJJqGODJpMXHbO8fUZthe4ftQ8nzqJMaAwDykWYP/OuSrqiprZe0MyIWStqZLAMARph90XmZ6lk1DPCI+IGk12vKV0vanLzeLKmvKd0AwCSy6/bLzwrr2Redp123X96UP3+sl9LPjogBSYqIAduzRlvR9lpJayVp3rx5Y/w6ACinZoV1Pbn/iBkRmyKiEhGVrq6uvL8OAKaMsQb4UdvdkpQ8H2teSwCANMYa4E9IWpO8XiPp8ea0AwBIK81phI9K+pGkRbZfsX2TpHskXW77kKTLk2UAwARq+CNmRKwe5a3LmtwLACADR8TEfZk9KOkXY/z4TEm/amI7rcS2FM9k2Q6JbSmq8WzLeyLirLNAJjTAx8N2f0RUWt1HM7AtxTNZtkNiW4oqj23hXigAUFIEOACUVJkCfFOrG2gitqV4Jst2SGxLUTV9W0pzDBwAcKYy7YEDAEYgwAGgpAoX4LavsH3Q9s9sn3WfcVf9c/L+XtsfbkWfaaTYlhW2f217T/L4h1b02Ui9ST1q3i/FmKTYjlKMhyTZnmv7KdsHbL9g+9Y665RlXNJsS+HHxvZ02z+x/XyyHXfVWae5YxIRhXlIapP0kqQ/lnSepOclvb9mnVWSvivJkpZJ2tXqvsexLSskPdnqXlNsy59J+rCk/aO8X5YxabQdpRiPpNduSR9OXl8k6X9K/HclzbYUfmyS/84XJq/bJe2StCzPMSnaHvhSST+LiJ9HxO8kfVPVySNGulrSv0XVjyV1nrozYsGk2ZZSiPqTeoxUijFJsR2lEREDEfFs8vpNSQck9dSsVpZxSbMthZf8d34rWWxPHrVniTR1TIoW4D2Sfjli+RWdPZBp1imCtH1+JPlfru/a/tOJaa3pyjImaZRuPGzPl7RE1T2+kUo3LufYFqkEY2O7zfYeVW+xvSMich2Tsc7IkxfXqdX+C5ZmnSJI0+ezqt7j4C3bqyRtlbQw986aryxj0kjpxsP2hZK+JemzEfFG7dt1PlLYcWmwLaUYm4gYltRru1PSt21/ICJG/ubS1DEp2h74K5LmjlieI+nVMaxTBA37jIg3Tv0vV0Rsk9Rue+bEtdg0ZRmTcyrbeNhuVzXwHomILXVWKc24NNqWso1NRByX9LTOnhC+qWNStAD/qaSFtt9r+zxJ16s6ecRIT0j6q+TX3GWSfh3J/JwF03BbbP+RbSevl6o6Hq9NeKfjV5YxOacyjUfS50OSDkTEV0dZrRTjkmZbyjA2truSPW/Z7pD0cUkv1qzW1DEp1CGUiHjH9i2Stqt6FsfDEfGC7b9N3v9XSdtU/SX3Z5L+T9Jft6rfc0m5LX8h6e9svyNpSNL1kfxUXSSuTuqxQtJM269IukPVH2hKNSYptqMU45FYLulGSfuSY66S9AVJ86RyjYvSbUsZxqZb0mbbbar+A/NYRDyZZ35xKT0AlFTRDqEAAFIiwAGgpAhwACgpAhwASooAB4CSIsABoKQIcAAoqf8HQKptRCdCEaMAAAAASUVORK5CYII=\n" }, "metadata": { "needs_background": "light" } } ], "source": [ "import matplotlib.pyplot as plt\n", "plt.scatter('Variety','Price',data=new_pumpkins)\n" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "-0.8634790400214403\n" ] } ], "source": [ "print(new_pumpkins['Variety'].corr(new_pumpkins['Price']))" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\nInt64Index: 415 entries, 70 to 1742\nData columns (total 7 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 Month 415 non-null int64 \n 1 Variety 415 non-null int64 \n 2 City 415 non-null int64 \n 3 Package 415 non-null int64 \n 4 Low Price 415 non-null int64 \n 5 High Price 415 non-null int64 \n 6 Price 415 non-null float64\ndtypes: float64(1), int64(6)\nmemory usage: 25.9 KB\n" ] } ], "source": [ "\n", "new_pumpkins.dropna(inplace=True)\n", "new_pumpkins.info()\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Variety Price\n", "70 3 13.636364\n", "71 3 16.363636\n", "72 3 16.363636\n", "73 3 15.454545\n", "74 3 13.636364\n", "... ... ...\n", "1738 1 30.000000\n", "1739 1 28.750000\n", "1740 1 25.750000\n", "1741 1 24.000000\n", "1742 1 24.000000\n", "\n", "[415 rows x 2 columns]" ], "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
VarietyPrice
70313.636364
71316.363636
72316.363636
73315.454545
74313.636364
.........
1738130.000000
1739128.750000
1740125.750000
1741124.000000
1742124.000000
\n

415 rows × 2 columns

\n
" }, "metadata": {}, "execution_count": 23 } ], "source": [ "# create new dataframe \n", "new_columns = ['Variety', 'Price']\n", "ml_pumpkins = new_pumpkins.drop([c for c in new_pumpkins.columns if c not in new_columns], axis='columns')\n", "\n", "ml_pumpkins\n" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "X = ml_pumpkins.values[:, :1]\n", "y = ml_pumpkins.values[:, 1:2]\n" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Model Accuracy: 0.7327987875929955\nCoefficients: [[-8.54296764]]\nMean squared error: 23.443815358076087\nCoefficient of determination: 0.7802537224707632\n" ] } ], "source": [ "from sklearn.linear_model import LinearRegression\n", "from sklearn.metrics import r2_score, mean_absolute_error\n", "from sklearn.model_selection import train_test_split\n", "\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "lin_reg = LinearRegression()\n", "lin_reg.fit(X_train,y_train)\n", "\n", "pred = lin_reg.predict(X_test)\n", "\n", "accuracy_score = lin_reg.score(X_train,y_train)\n", "print('Model Accuracy: ', accuracy_score)\n", "\n", "# The coefficients\n", "print('Coefficients: ', lin_reg.coef_)\n", "# The mean squared error\n", "print('Mean squared error: ',\n", " mean_squared_error(y_test, pred))\n", "# The coefficient of determination: 1 is perfect prediction\n", "print('Coefficient of determination: ',\n", " r2_score(y_test, pred)) " ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "output_type": "display_data", "data": { "text/plain": "
", "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAADrCAYAAABXYUzjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAANg0lEQVR4nO3dMYgcZR/H8f/s7gUyFm8uxhcSdGdsYiFYhLUQu9yphaCgIsKCdpOwnWg3EBCZQmxsvNOxiBZTiAqCVnKHjaSQO4JNJFEkuyGv8r5JPJGs8S578xZ6685mN3fP3PPss7Pz/cAW92effR4sfj75zzwzTpqmAgCYvIrtBQBAWRHAAGAJAQwAlhDAAGAJAQwAlhDAAGBJTeXLR44cSX3fN7QUAJhN6+vr19I0vW+4rhTAvu/L2tqavlUBQAk4jtMeVacFAQCWEMAAYAkBDACWEMAAYAkBDACWGA/gJEnE932pVCri+74kSWJ6SgAoBKXb0FQlSSJBEEi32xURkXa7LUEQiIhIs9k0OTUATD2jO+AwDPvhu6Pb7UoYhianBYBCMBrAnU5HqQ4AZWI0gOv1ulIdAMrEaABHUSSu62ZqrutKFEUmpwWAQjAawM1mU+I4Fs/zxHEc8TxP4jjmAhwAiIij8lLORqOR8jAeAFDjOM56mqaN4ToHMQDAEgIYACwhgAHAEgIYACwhgAHAEgIYACwhgAHAEgIYACwhgAHAEgIYACwhgAHAEgIYACwhgAHAEgIYACwhgAHAEgIYACwxHsBJkojv+1KpVMT3fUmSxPSUAFAINZM/niSJBEHQfzV9u92WIAhERHgtEYDSM7oDDsOwH747ut2uhGFocloAKASjAdzpdJTqAFAmRgP48OHDSnUAKBPuggAAS4wG8I0bN5TqAFAmRgO4Xq8r1QGgTIwGcBRF4rpupua6rkRRZHJaACgEowHcbDYljmPxPE8cxxHP8ySOY+4BBgARcdI03fOXG41Gura2ZnA5ADB7HMdZT9O0MVznLggAsIQABgBLCGAAsIQABgBLCGAAsIQABgBLCGAAsIQABgBLCGAAsIQABgBLCGAAsMR4AC8uLorjOP3P4uKi6SkBoBCMBvDi4qKsrq5maqurq4QwAIjhAB4O393qAFAm9IABwBICGAAsMRrACwsLSnUAKBOjAbyysnJH2C4sLMjKyorJaQGgEGqmJyBsAWA0esAAYAkBDACWGA/gVqsltVpNHMeRWq0mrVbL9JQAUAhGe8CtVkuWl5f7f/d6vf7fS0tLJqcGgKnnpGm65y83Go10bW1tz9+v1WrS6/XuqFerVbl9+/aefwcAisxxnPU0TRvDdaMtiFHhe7c6AJSJ0QCuVqtKdQAoE6MBHASBUh0AysToRbidC21xHEuv15NqtSpBEHABDgDE8EU4AICli3AAgPGMB3CSJOL7vlQqFfF9X5IkMT0lABSC0R5wkiQSBIF0u10REWm32/0LcM1m0+TUADD1jO6AwzDsh++ObrcrYRianBYACsFoAHc6HaU6AJSJ0QCu1+tKdQAoE6MBHEWRuK6bqbmuK1EUmZwWAArBaAA3m02J41g8zxPHccTzPInjmAtwACAcxAAA4ziIAQBThgAGAEuMB3C1WhXHcfofHkUJAH8x/jzg7e3tTG17e5sQBgAxHMDD4btbHQDKhB4wAFhCAAOAJUYDuFIZ/fPj6gBQJsbfijwctpVKhbciA4AYfh6wCK+gB4Bx6AUAgCUEMABYYjyA5+fnMyfh5ufnTU8JAIVgNIDn5+dlY2MjU9vY2CCEAUAMB/Bw+O5WB4AymUAP+KiIpAMfAIDIBG5DE/nP0N9/hbDj/P0XmQygpIzugA8dOrTrdxznn88HH5hcDQBMF6MBfPPmTRH5156/HwTZQAaAWWa0BbG1tSUiWyKyk6Zq/YbhEKZdAWCWTPgghtP/5AnTwd3xxx/rXhsATJbVk3Bp+s/np5/Uxr70Eu0KAMVmNIDn5ub2XH/wwWwgqxoMYwIZQBEYDeDNzc07wnZubk42Nzd3HTsYxvsN5JUV9fEAYJrxFsTm5qakadr/7CV8RxkM4+++Uxv7xBPsjgFMnwkcxNDvkUeyu2LVUOXuCgDTYCYeR6mzXbG+rn99ADDKTATwsMEwPndObWyjQbsCwGQYD+AkScT3falUKuL7viRJYnrKjMce4+4KANPJaAAnSSJBEEi73ZY0TaXdbksQBBMP4UE62xU//KB/fQDKw2gAh2Eo3W43U+t2uxKGoclplQyG8Zdfqo09fpzdMYD8jN4F0el0lOq2Pf00d1cAmByjO+B0TAKNq08bne2Kn3/Wvz4AxTaTd0GYMhjGH32kNvbYMdoVALII4Jxefpm7KwDsDwGsic52Be8sBcphap6GNmsGw/jtt9XGzs+zOwbKYGqfhjZLXn+ddgWAOxlvQZw9e1Y8zxPHccTzPDl79qzpKaeeznbFrVv61wdgMkp3Em4aDYbxa6+pjT14kN0xUFSOyj25jUYjXVtb2/P3fd+Xdrt9R93zPLl8+fKef6fM9hOqhw6J/PqrvrUAyMdxnPU0TRvDdaM74KKdhJtG+2lXbGxkd8dbW2bWCCAfowF8zz33KNWxu8EwfuUVtbEHDtCuAKaJ0QC+efOmUh1qPvxQ38W8Eye0Lw/ALow+jKfoz4IomsH/rGkqUlH43+v589ld8fY2u2TANKM74Gq1qlSHPo6T3R2fPKk2vlKhXQGYZjSAgyBQqsOc1VV97YpnntG/PqCMjLYglpaWREQkjmPp9XpSrVYlCIJ+HfYMhvD2tojKP0q++CK7K6ajBORj9D5gFNNDD4lcupR/PIEMZFm5DxjFdPGivnZFq6V/fcCsIICxq8Ew/vNPtbHLy1zMA8YhgKHkwIFsINcUryLwZDfgH8YDOEkS8X1fKpWK+L7Pg3hmzNaWvnbFm2/qXx8wzXgaGrQaDOPff1cbe+YMu2OUC09Dw8TsN1S5uwJFxdPQYJ3OB9HHsf71AZNmNIDr9bpSHeUyGMbXr6uNPXWKdgWKz2gAR1Ekrutmaq7rShRFJqdFAR0+zHvzUD5GA7jZbEocx5l3wsVxLM1m0+S0mAE62xWffaZ/fYAOHEVG4Vy5IrKfLhYX8zBp4y7CGX0YD2DCAw9kQ1S15TD8fQIZtnAQA4Wns13x9df61weMw0EMzJzBMP7+e7WxJ09yMQ+Tw0EMlAqHQWADBzEA0duuOH9e//pQLhzEQKkNhvG336qNPXGCdgX2h4MYwN8efZTDIJgsDmIAY+hsV/z4o/71ofg4iAHk8NVXIk89lX88F/PKhYMYgEZPPslhEOwfryQCNNDZrvjlF/3rw3QigAEDBsNY9WFAR49yMa8saEEAhj33HO0KjMYOGJgwne2K337Tvz5MDgEMWDYYxu+9pzb20CHaFUVGAANT5NQpfbvjalX/+qAXAQxMsf20K7a3s4F865aZNSI/AhgokMEwfuMNtbEHD9KumDYEMFBQZ87oa1fcf7/+9c0C0y+U4DY0YEYMh7DKLvfq1ez3t7ZEaiVPh50XSnS7XRGR/gslRETb82zYAQMzanB33GqpjZ2bo10RhmE/fHd0u10Jw1DbHAQwUALvvquvXfH44/rXN41Gvc3nbvU8Sv6PDKCcBkM4TUUqCluxc+eyu+Kduy2gjh0wUHKOk90dP/us2vhKhXZFXgQwgIzPP9fXrnjxRf3rm5TqmJMs4+p5EMAA7mowjHs9tbGffFLc3fHOHQ97redBAAPYs0olG8iqF+SK9N68S5cuKdXzIIAB5PbNN/raFa++qn99+7G6uqpUz4MABqDNYBhvbamNfeed4uyOdSGAUWimj4oiv1otG8iqx52L1K7IiwBGYe0cFW2325Kmaf+oKCE8na5c0deueOst/esbtrCwoFTPg9fSo7B83x95KsnzPLl8+fLkF4Tc/vhDxHXzjzf1mqbFxcVMz3dhYUFWVlaUf4fX0mPmdDodpTqm18GD0/nevDxhq4IWBAqrXq8r1VEcOt+b9/77+tenCwGMwoqiSNyhf7e6ritRFFlaEUwZDOMbN9TGnj49vRfzCGAUVrPZlDiOxfM8cRxHPM+TOI61PasV02l+Xt/u2HYgcxEOwEzZT6h++qnI88/rW8uOcRfh2AEDmCmDu+OrV9XGvvDCZHfHBDCAmXXsmM52xb/l4Ycf1ro+AhhAaezv7or/yoULV7WGMAEMoLQGw/jixb2M+E0uXLigbX7jAcxZfQBFcPz4/toVeRg9CTeJ1zoDgBnDV+H071eN7oAn8VpnAJiMbe2/aDSAOasPAOMZDWDO6gPAeEYDmLP6ADCe0QDmrD4AjMezIABghCNHjsj169fvqN97771y7do1pd/iWRAAMGUIYAAYYdTu9271PAhgALCEAAYASwhgALCEAAYASwhgALCEAAaAEZwx7yQaV8+DAAaAEU6fPq1Uz8Po84ABoKiWlpZERCSOY+n1elKtViUIgn5dB44iA4BhHEUGgClDAAOAJQQwAFhCAAOAJQQwAIzRarWkVquJ4zhSq9Wk1Wpp/X1uQwOAEVqtliwvL/f/7vV6/b913YrGbWgAMEKtVpNer3dHvVqtyu3bt5V+i9vQAEDBqPC9Wz0PAhgARqhWq0r1PAhgABghCAKleh5chAOAEXgWBADMAC7CAcCUIYABwBICGAAsIYABwBICGAAsUboLwnGc/4lI29xyAGAmeWma3jdcVApgAIA+tCAAwBICGAAsIYABwBICGAAsIYABwBICGAAsIYABwBICGAAsIYABwJL/A9LCp6YpkCnOAAAAAElFTkSuQmCC\n" }, "metadata": {} } ], "source": [ "\n", "plt.scatter(X_test, y_test, color='black')\n", "plt.plot(X_test, pred, color='blue', linewidth=3)\n", "\n", "plt.xticks(())\n", "plt.yticks(())\n", "\n", "plt.show()\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ] }