You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ML-For-Beginners/Regression/4-Logistic/solution/notebook.ipynb

350 lines
103 KiB

{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7",
"display_name": "Python 3.7.0 64-bit ('3.7')"
},
"metadata": {
"interpreter": {
"hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
}
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"source": [
4 years ago
"## Logistic Regression - Lesson 4\n",
"\n",
"Load up required libraries and dataset. Convert the data to a dataframe containing a subset of the data"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" City Name Type Package Variety Sub Variety Grade Date \\\n",
"0 BALTIMORE NaN 24 inch bins NaN NaN NaN 4/29/17 \n",
"1 BALTIMORE NaN 24 inch bins NaN NaN NaN 5/6/17 \n",
"2 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n",
"3 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 9/24/16 \n",
"4 BALTIMORE NaN 24 inch bins HOWDEN TYPE NaN NaN 11/5/16 \n",
"\n",
" Low Price High Price Mostly Low ... Unit of Sale Quality Condition \\\n",
"0 270.0 280.0 270.0 ... NaN NaN NaN \n",
"1 270.0 280.0 270.0 ... NaN NaN NaN \n",
"2 160.0 160.0 160.0 ... NaN NaN NaN \n",
"3 160.0 160.0 160.0 ... NaN NaN NaN \n",
"4 90.0 100.0 90.0 ... NaN NaN NaN \n",
"\n",
" Appearance Storage Crop Repack Trans Mode Unnamed: 24 Unnamed: 25 \n",
"0 NaN NaN NaN E NaN NaN NaN \n",
"1 NaN NaN NaN E NaN NaN NaN \n",
"2 NaN NaN NaN N NaN NaN NaN \n",
"3 NaN NaN NaN N NaN NaN NaN \n",
"4 NaN NaN NaN N NaN NaN NaN \n",
"\n",
"[5 rows x 26 columns]"
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>City Name</th>\n <th>Type</th>\n <th>Package</th>\n <th>Variety</th>\n <th>Sub Variety</th>\n <th>Grade</th>\n <th>Date</th>\n <th>Low Price</th>\n <th>High Price</th>\n <th>Mostly Low</th>\n <th>...</th>\n <th>Unit of Sale</th>\n <th>Quality</th>\n <th>Condition</th>\n <th>Appearance</th>\n <th>Storage</th>\n <th>Crop</th>\n <th>Repack</th>\n <th>Trans Mode</th>\n <th>Unnamed: 24</th>\n <th>Unnamed: 25</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>BALTIMORE</td>\n <td>NaN</td>\n <td>24 inch bins</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>4/29/17</td>\n <td>270.0</td>\n <td>280.0</td>\n <td>270.0</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>E</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>1</th>\n <td>BALTIMORE</td>\n <td>NaN</td>\n <td>24 inch bins</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>5/6/17</td>\n <td>270.0</td>\n <td>280.0</td>\n <td>270.0</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>E</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>BALTIMORE</td>\n <td>NaN</td>\n <td>24 inch bins</td>\n <td>HOWDEN TYPE</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>9/24/16</td>\n <td>160.0</td>\n <td>160.0</td>\n <td>160.0</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>N</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>3</th>\n <td>BALTIMORE</td>\n <td>NaN</td>\n <td>24 inch bins</td>\n <td>HOWDEN TYPE</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>9/24/16</td>\n <td>160.0</td>\n <td>160.0</td>\n <td>160.0</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>N</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>4</th>\n <td>BALTIMORE</td>\n <td>NaN</td>\n <td>24 inch bins</td>\n <td>HOWDEN TYPE</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>11/5/16</td>\n <td>90.0</td>\n <td>100.0</td>\n <td>90.0</td>\n <td>...</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>N</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n<p>5 rows × 26 columns</p>\n</div>"
},
"metadata": {},
"execution_count": 1
}
],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"\n",
4 years ago
"pumpkins = pd.read_csv('../../data/US-pumpkins.csv')\n",
"\n",
"pumpkins.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import LabelEncoder\n",
4 years ago
"\n",
"new_columns = ['Color','Origin','Item Size','Variety','City Name','Package']\n",
"\n",
"new_pumpkins = pumpkins.drop([c for c in pumpkins.columns if c not in new_columns], axis=1)\n",
"\n",
"new_pumpkins.dropna(inplace=True)\n",
"\n",
"new_pumpkins = new_pumpkins.apply(LabelEncoder().fit_transform)"
]
},
{
"source": [
"Check the data shape, size, and quality"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<bound method DataFrame.info of City Name Package Variety Origin Item Size Color\n",
4 years ago
"2 1 3 4 3 3 0\n",
"3 1 3 4 17 3 0\n",
"4 1 3 4 5 2 0\n",
"5 1 3 4 5 2 0\n",
"6 1 4 4 5 3 0\n",
"... ... ... ... ... ... ...\n",
4 years ago
"1694 12 3 5 4 6 1\n",
"1695 12 3 5 4 6 1\n",
"1696 12 3 5 4 6 1\n",
"1697 12 3 5 4 6 1\n",
"1698 12 3 5 4 6 1\n",
"\n",
4 years ago
"[991 rows x 6 columns]>"
]
},
"metadata": {},
"execution_count": 3
}
],
"source": [
"new_pumpkins.info"
]
},
{
"source": [
4 years ago
"Working with Item Size to Color, create a scatterplot using Seaborn"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"output_type": "error",
"ename": "ModuleNotFoundError",
"evalue": "No module named 'seaborn'",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-4-9d9b5803ae31>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mseaborn\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0msns\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPairGrid\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_pumpkins\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscatterplot\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'seaborn'"
]
}
],
"source": [
"import seaborn as sns\n",
"\n",
"g = sns.PairGrid(new_pumpkins)\n",
"g.map(sns.scatterplot)\n"
]
},
{
4 years ago
"cell_type": "code",
"execution_count": 6,
4 years ago
"metadata": {},
"outputs": [
{
"output_type": "error",
"ename": "NameError",
"evalue": "name 'sns' is not defined",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-6-63553501ab59>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mswarmplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"Color\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"Item Size\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnew_pumpkins\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'sns' is not defined"
]
}
],
4 years ago
"source": [
"sns.swarmplot(x=\"Color\", y=\"Item Size\", data=new_pumpkins)"
]
},
{
"cell_type": "code",
"execution_count": 10,
4 years ago
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
4 years ago
"<seaborn.axisgrid.FacetGrid at 0x7f95c8484130>"
]
},
"metadata": {},
4 years ago
"execution_count": 10
4 years ago
},
{
"output_type": "display_data",
"data": {
"text/plain": "<Figure size 360x360 with 1 Axes>",
4 years ago
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Created with matplotlib (https://matplotlib.org/) -->\n<svg height=\"352.15625pt\" version=\"1.1\" viewBox=\"0 0 352.470312 352.15625\" width=\"352.470312pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n <defs>\n <style type=\"text/css\">\n*{stroke-linecap:butt;stroke-linejoin:round;}\n </style>\n </defs>\n <g id=\"figure_1\">\n <g id=\"patch_1\">\n <path d=\"M 0 352.15625 \nL 352.470312 352.15625 \nL 352.470312 0 \nL 0 0 \nz\n\" style=\"fill:none;\"/>\n </g>\n <g id=\"axes_1\">\n <g id=\"patch_2\">\n <path d=\"M 42.620313 314.6 \nL 345.270312 314.6 \nL 345.270312 7.2 \nL 42.620313 7.2 \nz\n\" style=\"fill:#ffffff;\"/>\n </g>\n <g id=\"PolyCollection_1\">\n <defs>\n <path d=\"M 118.559246 -51.528977 \nL 118.006379 -51.528977 \nL 117.893814 -54.24032 \nL 117.751719 -56.951662 \nL 117.578902 -59.663004 \nL 117.376321 -62.374347 \nL 117.147126 -65.085689 \nL 116.896104 -67.797032 \nL 116.628406 -70.508374 \nL 116.347624 -73.219716 \nL 116.053494 -75.931059 \nL 115.739719 -78.642401 \nL 115.392567 -81.353744 \nL 114.990898 -84.065086 \nL 114.508116 -86.776428 \nL 113.916167 -89.487771 \nL 113.191174 -92.199113 \nL 112.319736 -94.910455 \nL 111.304448 -97.621798 \nL 110.167004 -100.33314 \nL 108.947462 -103.044483 \nL 107.698895 -105.755825 \nL 106.477692 -108.467167 \nL 105.331008 -111.17851 \nL 104.283921 -113.889852 \nL 103.329502 -116.601195 \nL 102.42491 -119.312537 \nL 101.495673 -122.023879 \nL 100.448632 -124.735222 \nL 99.191944 -127.446564 \nL 97.658479 -130.157907 \nL 95.827549 -132.869249 \nL 93.739574 -135.580591 \nL 91.499242 -138.291934 \nL 89.264968 -141.003276 \nL 87.225469 -143.714618 \nL 85.567453 -146.425961 \nL 84.440878 -149.137303 \nL 83.92935 -151.848646 \nL 84.0325 -154.559988 \nL 84.664779 -157.27133 \nL 85.67147 -159.982673 \nL 86.858771 -162.694015 \nL 88.03151 -165.405358 \nL 89.030275 -168.1167 \nL 89.759898 -170.828042 \nL 90.203287 -173.539385 \nL 90.418026 -176.250727 \nL 90.517143 -178.96207 \nL 90.638947 -181.673412 \nL 90.913149 -184.384754 \nL 91.430989 -187.096097 \nL 92.225838 -189.807439 \nL 93.268 -192.518781 \nL 94.473998 -195.230124 \nL 95.727232 -197.941466 \nL 96.904465 -200.652809 \nL 97.901524 -203.364151 \nL 98.652167 -206.075493 \nL 99.136048 -208.786836 \nL 99.374519 -211.498178 \nL 99.416054 -214.209521 \nL 99.315548 -216.920863 \nL 99.113208 -219.632205 \nL 98.81877 -222.343548 \nL 98.405465 -225.05489 \nL 97.815772 -227.766233 \nL 96.978011 -230.477575 \nL 95.830061 -233.188917 \nL 94.344406 -235.90026 \nL 92.548003 -238.611602 \nL 90.53127 -241.322944 \nL 88.442765 -244.034287 \nL 86.469391 -246.745629 \nL 84.805522 -249.456972 \nL 83.617365 -252.168314 \nL 83.010473 -254.879656 \nL 83.007993 -257.590999 \nL 83.545117 -260.302341 \nL 84.481563 -263.013684 \nL 85.629819 -265.725026 \nL 86.793201 -268.436368 \nL 87.805466 -271.147711 \nL 88.563446 -273.859053 \nL 89.045782 -276.570396 \nL 89.314135 -279.281738 \nL 89.497208 -281.99308 \nL 89.761683 -284.704423 \nL 90.276852 -287.415765 \nL 91.180636 -290.127107 \nL 92.55388 -292.83845 \nL 94.407447 -295.549792 \nL 96.683453 -298.261135 \nL 99.268866 -300.972477 \nL 102.01728 -303.683819 \nL 104.773559 -306.395162 \nL 107.396276 -309.106504 \nL 109.774197 -311.817847 \nL 111.834972 -314.529189 \nL 113.546157 -317.240531 \nL 114.910134 -319.951874 \nL 121.655491 -319.951874 \nL 121.655491 -319.951874 \nL 123.019468 -317.240531 \nL 124.730653 -314.529189 \nL 126.791428 -311.817847 \nL 129.169349 -309.106504 \nL 131.792066 -306.395162 \nL 134.548345 -303.683819 \nL 137.296759 -300.972477 \nL 139.882172 -298.261135 \nL 142.158178 -295.549792 \nL 144.011745 -292.83845 \nL 145.384989 -290.127107 \nL 146.288773 -287.415765 \nL 146.803942 -284.704423 \nL 147.068417 -281.99308 \nL 147.25149 -279.281738 \nL 147.519843 -276.570396 \nL 148.002179 -273.859053 \nL 148.760159 -271.14
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAFgCAYAAACFYaNMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdd3xUVcL/8c+ZPpPeCQRCgNCLlJViRUThEQsI1t3fuuuKDcWKZbFgWcvu2lgfC+q6q65rwfZYWFwbukrvoRNCCBDSk0mZfn5/JEEXKQmZmTMzOe/XKy9hEu79Isk3N+eee46QUqJpmqaFn0F1AE3TtM5KF7CmaZoiuoA1TdMU0QWsaZqmiC5gTdM0RUyqA7THpEmT5KJFi1TH0DRNay9xuBej6gq4oqJCdQRN07SgiaoC1jRNiyW6gDVN0xTRBaxpmqaILmBN0zRFdAFrmqYpogtY0zRNEV3AmqZpiugC1jRNU0QXsKZpmiK6gDVN0xTRBaxpmqaILmBN0zRFomo1NE3T1HG73dw0+0YqK39cFKtXr9488uhjCHHYxb60Y9AFrGlam2zbto3NW7YyNM1DkjnAgSYjS5dVUllZSXp6uup4UUkPQWia1ibbt28H4Mr+9Vw1sIGLejcCzcWsHR9dwJqmtcmWLVtIskKyRQLQI96HEM2va8dHWQELIfoJIdb+5K1OCHGTqjyaph3dhnVryU900zrcazNBj/gAGzZsUBssiikbA5ZSbgVOABBCGIG9wPuq8miadmTl5eXsP1DG+D6+/3q9b5KbJZsK8Pl8mEz6llJ7RcoQxARgp5Ryt+ogmqb93OrVqwHon+L9r9f7J/twuz1s2rRJRayoFykFfAnw5uHeIYSYKYRYKYRYWV5eHuZYmqYBLF++nEQr9Ij3/9frA1O8GETz+7X2U17AQggLcB7wzuHeL6V8UUo5Sko5KiMjI7zhNE3D5/OxYvkyBie7MRwy3TfOLOmT5GPpD9+rCRfllBcwMBlYLaU8oDqIpmk/t379euqc9YzM8Bz2/SPS3ezYWci+ffvCnCz6RUIBX8oRhh80TVNvyZIlWI2CoWmHL+BftBTzkiVLwhkrJii9bSmEcAATgatV5ogFXq+Xu+66m3379x/2/XabjYceepDs7OwwJ9Oimdfr5asvv2BYmgur8fAfk2EP0CvRz78/X8wll1wS3oBRTukVsJSyUUqZJqWsVZkjFnz22WesXLmC4nrB7ibzz94KdxXx6quvqo6pRZmlS5dSW+fklC7uo37cyV2a2LGz8ODTclrb6Il7McDj8fC3v79GID6Tpvwz4TALo8jiZSxevJhf/epX5OTkKEipRaPPPv2UZBsMTvUe9ePGZHn4x47mC4H8/PwwpYt+kTAGrHXQBx98QGVFOa6uww9bvgCe7KFgMPLiiwvCnE6LVvv27eOHpT9walYjxmM0RbxZcmKGm0WffUpjY2N4AsYAXcBRrrS0lJdffgVfUnf8iV2P+HHSbMfVZShLlnzD99/rKUPasb3//vsYgAk5rjZ9/MTuLhqbXCxatCi0wWKILuAoJqXkySefwuPz48ode8Sr31aeLkOQjhT+/MST+ipFOyqn08knH/8fv8h0k2KVbfozvRN99Eny885b/8Tn8x37D2i6gKPZhx9+yLJlS2nqOgJpjT/2HzAYaewxjsqKcp588kmkbNsXltb5LFy4kMYmF1Nym9r156b0aGD/gTK++OKLECWLLbqAo9SWLVuY/5e/4EvKwZs1sM1/LpCQhbvrcD7//HM++eSTECbUolV9fT0L332HEemenz16fCzD0730SAjw2t//pq+C20AXcBSqqanh3vvux2+00dTr1GMOPRzK0/UE/EndeOqpp/VartrP/POf/8RZ38D5Pds/TCUEnJ/bQMneffzrX/8KQbrYogs4yrjdbu6++/eUl5fT0Ot0MNnafxAhaMo7FZ/Ryp133sX+Izy8oXU+FRUVvPP2W4zJcpOX2L6r31ajMjz0TvLzyssv4XK17QZeZ6ULOIoEAgEeevhhNm0qoDHvNALxmcd9LGm205A/kdr6RubMuQOn0xnEpFq0eumll/D5vEzvdfw3aYWAS3rXU1lVzVtvvRXEdLFHF3CUkFLyzDPP8O2SJbi6n4gvtWeHjxmwp9DQ+wxK9u7lzrvuoqmpfTdctNiyceNGFi1axOTuTWTaAx06Vr9kH6Mz3bzx+uv6J6yj0AUcBaSUPPfcc3zwwQd4ugzG22Vw0I7tT8ymMe9UCgoKuOvuu3G7j/7IqRabfD4fTz35BGl2jmvs93AuzW9ESC/zn3lGz7g5Al3AUeCVV17h7bffxpM5AHfOL4J+fF9qHk09T2HtmjXMnXsPHs/hV73SYtdbb73Fjp2FXNbbecRFd9or1RpgWs8Gvv/hB7766qvgHDTG6AKOYFJKXnnlFV577TU86X1x9xjT7hkPbeVL74Or50msWLGcuffco6+EO5Hdu3fz6l9f4RcZbn6RGdxvvmfluOiV6Ofpp56kpqYmqMeOBbqAI5SUkgULFvD3v/+9uXx7nhSy8m3lzeiHK3ccy5ct4/dz5+oS7gR8Ph+PPPIHrAY/v+7XEPTjGw1w1QAnDfVO/vynP+mhiEPoAo5AUkqef/55/vGPf+DJ6BeW8m3lzexPU8+TWbliBXfddZeeRhTjXnnlFbZs2coVfZ0kWkJTjt3i/Ezv1cC3333Hxx9/HJJzRCtdwBFGSsmzzz7LW2+9hSezP+7ccWEr31a+jL405Z3C6tWrufPOO/XsiBi1evVq3nzzH5ze1cWJQR56ONSk7i4Gp3r5y/xnKCoqCum5ooku4AgipWT+/Pm8++67eDIH4u5x7AV2QsWXnk9T3qmsXbeOO+7QJRxrysvLeWDe/WQ7JJfnB3/o4VAGATMHOLEKL/feM1cvBtVCF3CEaJ3n+9577+HJGoS7x2hl5dvKl96HprxTWb9hPXPm3KGHI2KEx+PhvnvvwdXg5IbBtUGb9XAsyVbJdQNr2VNSwqOPPqrHg9EFHBGklLz44ou8//77zeXb/UTl5dvKl9abprzT2LBxA7+fO1dPUYtyUkqefvppNm3ewlX96+gWd3yPGx+vgSk+LundwJIlS3jjjTfCeu5IpAs4Arz22mu8+eabeDL6R1T5tvKl9aIp9yRWrVzJ/fPm6VWuothbb73FJ598wrm5jUGfctZWk7q7GJPl5qWXXuLrr79WkiFS6AJW7OOPP+aVV17Bm9YbdxsWVVfFl9EXV48xfP+f//DEE0/oHx+j0LfffssLLzzPiZluLuylbkxfCPhd/3ryk/z84eGHKSgoUJZFNV3ACq1Zs4YnnngSf1I3XHmnRGz5tvJmDcSdPYxPP/2Ut99+W3UcrR3Wr1/Pgw88QK9EPzMH1GNQ/KlmMcLsIbUkmz3cfded7N69W20gRXQBK1JSUsLcuffgtybQ2Gs8iOj4p/B0G4E3pSfPPf+83lsuSmzfvp0777iDNIuHm4fUYgnTTbdjSbRIbhtag3Q7ue3WWygrK1MdKeyi46s+xvh8PuY98ACNHh8Nfc4Ek0V1pLYTAlfeqQQcaTz88B+oqKhQnUg7ij179jDn9tuwySZuH1YTsoctjlcXR4DbhtZQX1vJbbfeQnV1tepIYaULWIE33niD7du20dhjLNKWqDpO+xlNNPY6jUaXi8cff1yPB0eoPXv2cNPsG/E31TJnWDXpto4tMRkqPRP83DKkltJ9Jdx80+xOVcK6gMNsx44d/O1vf8eb2gtfap7qOMdN2pJo6jaK5cuX8+mnn6qOox2itXy9DdXcOayGrnGRWb6t+iX7uGVoLftKijtVCSstYCFEshDiXSHEFiHEZiHEWJV5wmHBgpeQRlPzNvJRzps5gEBCFi+9/IpeuCeCFBYWctONNxws35x2bqypysCUH0v4phtvoLy8XHWkkFN9Bfw0sEhK2R8YBmxWnCekNm3axLJlS3FlDgaTVXWcjhMCV9cRVFdV6kVWIkRBQQGzb7yBQFNNVJVvq4EpPm4bWkvZ/hJmXX8dJSUlqiOFlLICFkIkAqcCLwN
},
"metadata": {
"needs_background": "light"
}
}
],
"source": [
4 years ago
"sns.catplot(x=\"Color\", y=\"Item Size\",\n",
" kind=\"violin\", data=new_pumpkins)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"Selected_features = ['Origin','Item Size','Variety','City Name','Package']\n",
"\n",
"X = new_pumpkins[Selected_features]\n",
"y = new_pumpkins['Color']\n",
"\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n"
]
},
{
"cell_type": "code",
4 years ago
"execution_count": 14,
"metadata": {},
"outputs": [
4 years ago
{
"output_type": "stream",
"name": "stdout",
"text": [
" precision recall f1-score support\n",
"\n",
" 0 0.83 0.98 0.90 166\n",
" 1 0.00 0.00 0.00 33\n",
"\n",
" accuracy 0.81 199\n",
" macro avg 0.42 0.49 0.45 199\n",
"weighted avg 0.69 0.81 0.75 199\n",
"\n",
"Predicted labels: [0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0\n",
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0\n",
" 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 0 0 0 0 0 1 0 0 0 0 0 0 0 0]\n",
"Accuracy: 0.8140703517587939\n",
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n"
4 years ago
]
4 years ago
}
],
4 years ago
"source": [
4 years ago
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score, classification_report \n",
"from sklearn.linear_model import LogisticRegression\n",
"model = LogisticRegression()\n",
"model.fit(X_train, y_train)\n",
"predictions = model.predict(X_test)\n",
4 years ago
"\n",
"print(classification_report(y_test, predictions))\n",
"print('Predicted labels: ', predictions)\n",
"print('Accuracy: ', accuracy_score(y_test, predictions))\n"
4 years ago
]
},
{
"cell_type": "code",
4 years ago
"execution_count": 16,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
4 years ago
"<matplotlib.axes._subplots.AxesSubplot at 0x7f95892612b0>"
]
},
"metadata": {},
4 years ago
"execution_count": 16
},
{
"output_type": "display_data",
"data": {
4 years ago
"text/plain": "<Figure size 432x288 with 1 Axes>",
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Created with matplotlib (https://matplotlib.org/) -->\n<svg height=\"248.518125pt\" version=\"1.1\" viewBox=\"0 0 372.103125 248.518125\" width=\"372.103125pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n <defs>\n <style type=\"text/css\">\n*{stroke-linecap:butt;stroke-linejoin:round;}\n </style>\n </defs>\n <g id=\"figure_1\">\n <g id=\"patch_1\">\n <path d=\"M 0 248.518125 \nL 372.103125 248.518125 \nL 372.103125 0 \nL 0 0 \nz\n\" style=\"fill:none;\"/>\n </g>\n <g id=\"axes_1\">\n <g id=\"patch_2\">\n <path d=\"M 30.103125 224.64 \nL 364.903125 224.64 \nL 364.903125 7.2 \nL 30.103125 7.2 \nz\n\" style=\"fill:#ffffff;\"/>\n </g>\n <g id=\"PolyCollection_1\">\n <path clip-path=\"url(#pd4614883dc)\" d=\"M 61.82295 202.776198 \nL 61.82295 214.756364 \nL 69.157013 190.796033 \nL 76.491077 178.815868 \nL 76.491077 142.875372 \nL 76.491077 142.875372 \nL 69.157013 178.815868 \nL 61.82295 202.776198 \nz\n\" style=\"fill:#ff7f0e;fill-opacity:0.2;stroke:#ff7f0e;stroke-opacity:0.2;\"/>\n <path clip-path=\"url(#pd4614883dc)\" d=\"M 89.325688 124.905124 \nL 89.325688 130.895207 \nL 92.99272 124.905124 \nL 92.99272 118.915041 \nL 92.99272 118.915041 \nL 89.325688 124.905124 \nz\n\" style=\"fill:#ff7f0e;fill-opacity:0.2;stroke:#ff7f0e;stroke-opacity:0.2;\"/>\n <path clip-path=\"url(#pd4614883dc)\" d=\"M 124.16249 82.974545 \nL 124.16249 88.964628 \nL 124.16249 82.974545 \nL 124.16249 82.974545 \nz\n\" style=\"fill:#ff7f0e;fill-opacity:0.2;stroke:#ff7f0e;stroke-opacity:0.2;\"/>\n <path clip-path=\"url(#pd4614883dc)\" d=\"M 146.16468 76.984463 \nL 146.16468 82.974545 \nL 151.665228 76.984463 \nL 151.665228 65.004298 \nL 151.665228 65.004298 \nL 146.16468 76.984463 \nz\n\" style=\"fill:#ff7f0e;fill-opacity:0.2;stroke:#ff7f0e;stroke-opacity:0.2;\"/>\n <path clip-path=\"url(#pd4614883dc)\" d=\"M 184.668514 53.024132 \nL 184.668514 65.004298 \nL 184.668514 53.024132 \nL 184.668514 53.024132 \nz\n\" style=\"fill:#ff7f0e;fill-opacity:0.2;stroke:#ff7f0e;stroke-opacity:0.2;\"/>\n <path clip-path=\"url(#pd4614883dc)\" d=\"M 232.339927 41.043967 \nL 232.339927 53.024132 \nL 232.339927 41.043967 \nL 232.339927 41.043967 \nz\n\" style=\"fill:#ff7f0e;fill-opacity:0.2;stroke:#ff7f0e;stroke-opacity:0.2;\"/>\n <path clip-path=\"url(#pd4614883dc)\" d=\"M 314.848141 35.053884 \nL 314.848141 41.043967 \nL 314.848141 35.053884 \nL 314.848141 35.053884 \nz\n\" style=\"fill:#ff7f0e;fill-opacity:0.2;stroke:#ff7f0e;stroke-opacity:0.2;\"/>\n <path clip-path=\"url(#pd4614883dc)\" d=\"M 333.1833 23.073719 \nL 333.1833 35.053884 \nL 333.1833 23.073719 \nL 333.1833 23.073719 \nz\n\" style=\"fill:#ff7f0e;fill-opacity:0.2;stroke:#ff7f0e;stroke-opacity:0.2;\"/>\n </g>\n <g id=\"matplotlib.axis_1\">\n <g id=\"xtick_1\">\n <g id=\"line2d_1\">\n <defs>\n <path d=\"M 0 0 \nL 0 3.5 \n\" id=\"mb4db33e09d\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n </defs>\n <g>\n <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"45.321307\" xlink:href=\"#mb4db33e09d\" y=\"224.64\"/>\n </g>\n </g>\n <g id=\"text_1\">\n <!-- 0.0 -->\n <defs>\n <path d=\"M 31.78125 66.40625 \nQ 24.171875 66.40625 20.328125 58.90625 \nQ 16.5 51.421875 16.5 36.375 \nQ 16.5 21.390625 20.328125 13.890625 \nQ 24.171875 6.390625 31.78125 6.390625 \nQ 39.453125 6.390625 43.28125 13.890625 \nQ 47.125 21.390625 47.125 36.375 \nQ 47.125 51.421875 43.28125 58.90625 \nQ 39.453125 66.40625 31.78125 66.40625 \nz\nM 31.78125 74.21875 \nQ 44.046875 74.21875 50.515625 64.515625 \nQ 56.984375 54.828125 56.984375 36.375 \nQ 56.984375 17.96875 50.515625 8.265625 \nQ 44.046875 -1.421875 31.78125 -1.421875 \nQ 19.53125 -1.421875 13.0625 8.265625 \nQ 6.59375 17.96875 6.59375 36.375 \nQ 6.59375 54.828125 13.0625 64.515625 \nQ 19.53125 74.21875 31.78125 74.21875 \nz\n\" id=\"DejaVuSans
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD4CAYAAAD8Zh1EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3dd3xW5d3H8c+VkE0SCCFAEkJYYYYZluJAQQEVRLB1D2qRtvbp08cKuOqs4uhwU7RYrba2kgjIVARRGYKoZEEgBEhCIJvscY/r+ePEGjGQO3An5x6/9+vFK/fJObnzOwS+HM65rt+ltNYIIYRwfz5mFyCEEMI5JNCFEMJDSKALIYSHkEAXQggPIYEuhBAeopNZ3zgyMlLHx8eb9e2FEMIt7d27t0Rr3b2lfaYFenx8PF999ZVZ314IIdySUurYmfbJLRchhPAQEuhCCOEhJNCFEMJDSKALIYSHkEAXQggP0WqgK6VWKKWKlFLpZ9ivlFIvKqWylVKpSqkxzi9TCCFEaxy5Qv87MP0s+2cAA5t+LQBeO/+yhBBCtFWr49C11p8ppeLPcshs4G1t9OHdpZTqopTqpbU+4aQahRDCtWV8AIWZrR5m05qvc8uJGHwJ/SfNcnoZzphYFAPkNdvOb/rcjwJdKbUA4yqeuLg4J3xrIYQwWc6n8P4dTRvqjIfppr1jNXzZ2Oiygd7SGbS4aobWejmwHCApKUlW1hBCuDdrA6y7F7r2hV/uAr/AHx1Sb7Hx0pZDLNuWQ9dgf56YPYwZib3apRxnBHo+0LvZdixQ4IT3FUII17b9RSjNhluSWwzzr46WsSg5lZziGq4fG8tDVw0lPNiv3cpxRqCvAe5RSr0HTAAq5P65EMLjlR2Bz5+HodfCgKk/2FXdYOW5jQd4e9cxosODeHv+eC5OaLGfllO1GuhKqX8BlwKRSql84BHAD0BrvQxYD8wEsoFa4M72KlYIIVyC1rBhEfh0gulP/2DXtoPFPJCSRkFFHbdPiue+KwcREtAxfRAdGeVyYyv7NfArp1UkhBCu7sBaOPQRXPkUhEUDcKq2kSfW7if563z6dw/h/bsnkRQf0aFlmdY+Vwgh3FLlCdj1GnQbAOPvBmBD2gkeXp1BeW0j90wZwD2XDSDQz7fDS5NAF0KItijJgtydcNG9FNVY+f3qfWzMOMnwmDDemj+OYdHhppUmgS6EEG2Ruwu0nY/tY7n3T9uot9pZPH0wP7+oL518zW2PJYEuhBBtUJv9BfW+3Viw2cq4+EiWzk2kX/fOZpcFSKALIYRDbHbNu19kMS9vNx9xMY/PTuTmCX3w8Tnz7NCOJoEuhBCtyC6qYnFyGhF5H3ObfwNTZ99B5Oh4s8v6EQl0IYQ4A4vNzl+3HebFT7KZ4f81zwctQwd2J3L41Na/2AQS6EII0YK0/AoWJaey/0QFL8Zs4ZrSFaiokTDlgRan+bsCCXQhhGim3mLjL5sP8frnOUQHa3YOfJdeeesh8XqY9RIUHzC7xDOSQBdCiCZf5pSyJCWNIyU13D3Cn0UVj+Gblw5TH4MLfwPKdR6AtkQCXQjh9arqLTy7MYt/7DpG74gg1szyZcQXC8HWCDf9BxKuMLtEh0igCyG8R+p/oPCHyyMfK6vl84PFxDTYeLdPOONjA/Db/DZ07QM3/Au6J5hUbNtJoAshvEPqfyDl5+DrD8oHDVhtdnrYNdcrhV+AwqdUQSkw8Aq49lUI6vLj9+ncs6Mrd5gEuhDC81nqYPMj0G0geuEXrNtfxiOrM6hosPDLKQP41ZT++HRysJlWWPusNuQMEuhCCM9WXwGbH4PKAsrnvc+if6XzcWYhI2LDeeeuCQzpFWZ2hU4jgS6E8EwF38BXKyBtJVhqOdxjOteu1DRai3lg5mDmX2h+My1nk0AXQniOxlpITzaCvOBr8AumeuC1PF18Ae8ei2BC3zCemTuC+MgQsyttFxLoQriLgm8h7X2zq3BdDZWQudq4xdJ9MPbpz/Bu3SSe2nICXx/FH6aEcOO0iS7VTMvZJNCFcGWVJ4yHcPUV8M+fQG0p+AaYXZVr8ukEA6dB0nwOBiayKDmNb/PyuWxwFH+YM5xe6hR4cJiDBLoQrq36pBHoW56E6iJYsBWiR5tdlctqtNp57dPDvLz1C0ID/XjhhlHMGhmNUgoIMru8dieBLoSrK/gG9rwB4+6SMD+LfXmnWJycyoGTVcwaGc0j1wylW2fv+t+MBLoQrsxug/X3QXAkXPaQ2dW4pLpGG3/efJA3Ps8hKjSQN25LYurQHmaXZQoJdCFc2YG1xmiN695oedail9t5uJT7U1I5WlrLjePjuH/mYMIC/cwuyzQS6EK4quoi2L0c+l4MifPMrsalVNZbWLrhAP/8Mpc+3YL5588ncEH/SLPLMp0EuhCu6qOHwdoAM//o8m1bO9In+wt58IN0iqrqWXBxP347NYEgfwen7Xs4CXQhXNGRzyH1PRh1i1t1+2tPpdUNPPZhJmv2FTCoRyjLbh3LqN5yG6o5CXQhXI21EdbdC13iYMwtZldjOq01a/YV8NiHmVTVW/jt1AR+cWl//Dt51rR9Z5BAF8LV7HwJSrKMhRU6uebalR3lREUdD32QzicHihjZuwvPzh3BoJ6hZpflsiTQhXAl5Udh23Mw+GpIuNIYg+6F7HbNe3vyeHr9fix2Ow9dNYQ7L+yLr4fP9DxfEujCe9SWwc5XoLHa7ErOLO9LUD4w4xmzKzHN0ZIalqSksiunjAv6d+Pp6xLp080zm2k5mwS68Hzf9UPZsMhopRrowv2vlS/MWArhsca2C6+O42xWm50V24/wx48O4u/rw9LrEvnpuN5N0/aFIxwKdKXUdOAFwBd4Q2u99LT94cA7QFzTez6vtX7TybUKcW6qTxr3pNPeh0sWw5QHzK7IcS68Oo4zHThZyeKVqezLr2DqkB48ee1weoZ79/ODc9FqoCulfIFXgGlAPrBHKbVGa53Z7LBfAZla62uUUt2BLKXUu1rrxnapWoi2sDXCut9B13iY/FuzqxHNNFhtvLL1MK9uzSY8yI+XbhzN1SN6yVX5OXLkCn08kK21zgFQSr0HzAaaB7oGQpXxU+gMlAFWJ9cqxLnZ928oPQQ3J4Of53fccxff5JazODmVg4XVzBkdw8NXDyUixN/sstyaI4EeA+Q1284HJpx2zMvAGqAACAV+qrW2n/5GSqkFwAKAuLi4c6lXiLYpOwLf/AOGzoaBU82uRgC1jVb++NFBVmw/Qs+wQFbckcRlg72zmZazORLoLf3fR5+2fSXwLXAZ0B/4WCn1uda68gdfpPVyYDlAUlLS6e8hhHNpbTwI9fGFK582uxoB7MguYUlKGrlltdwyMY7F0wcT6sXNtJzNkUDPB3o3247FuBJv7k5gqdZaA9lKqSPAYGC3U6oU4lwcWAuHPoKJv4DwGLOr8WoVdRaeXr+f9/bk0TcyhH8vmMiEft3MLsvjOBLoe4CBSqm+wHHgBuCm047JBS4HPldK9QAGATnOLFSINmmohg2LIWoYDJ9rdjVe7aOMkzy0Kp2S6gbuvsRophXoJ8202kOrga61tiql7gE2YQxbXKG1zlBKLWzavwx4Avi7UioN4xbNYq11STvWLcTZbXsGKo/DvBXGWpOiw5VUN/DomgzWpp5gcM9Q3rg9iRGx0kyrPTn0J11rvR5Yf9rnljV7XQBc4dzShDhHhZmw61UYfSvETfTa6fNm0Vqz6tvjPPZhJrUNNu6dlsDCS/vj5yvNtNqbXLoIz2K3w7r/g4AwmPa42dV4nYJTdTz4QRpbs4oZHWc00xrYQ5ppdRQJdNF+LPXg6w8+Z7gys1lh75tQtN9537O2BHJ3wqyXITjCee8rzspu17y7O5el6/dj1/DINUO5bVK8NNPqYBLoon3Y7VCYYSzOENDCFVphBqz+lXE7JCjCaEjlLMPnwaibv9/2on4oZsgprmZJchq7j5YxeUAkT1+XSO+IYLPL8koS6KJ91JVBTdGPV9uxNsIXf4LPnofAcOOh5bDr2neJNS/ph9LRrDY7b3xxhD9/fJCATj4
},
"metadata": {
"needs_background": "light"
}
}
],
"source": [
4 years ago
"from sklearn.metrics import roc_curve, roc_auc_score\n",
"\n",
"y_scores = model.predict_proba(X_test)\n",
"# calculate ROC curve\n",
"fpr, tpr, thresholds = roc_curve(y_test, y_scores[:,1])\n",
"sns.lineplot([0, 1], [0, 1])\n",
"sns.lineplot(fpr, tpr)"
]
},
{
"source": [
"auc = roc_auc_score(y_test,y_scores[:,1])\n",
"print(auc)"
],
"cell_type": "code",
"metadata": {},
"execution_count": 18,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"0.6976998904709748\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
]
}