You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

508 lines
22 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "markdown",
"source": [
"# 🍄 Mushroom Proportions"
],
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"Import the mushroom dataset"
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 27,
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"mushrooms = pd.read_csv('../../data/mushrooms.csv')\n",
"mushrooms.head()"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" class cap-shape cap-surface cap-color bruises odor \\\n",
"0 Poisonous Convex Smooth Brown Bruises Pungent \n",
"1 Edible Convex Smooth Yellow Bruises Almond \n",
"2 Edible Bell Smooth White Bruises Anise \n",
"3 Poisonous Convex Scaly White Bruises Pungent \n",
"4 Edible Convex Smooth Green No Bruises None \n",
"\n",
" gill-attachment gill-spacing gill-size gill-color ... \\\n",
"0 Free Close Narrow Black ... \n",
"1 Free Close Broad Black ... \n",
"2 Free Close Broad Brown ... \n",
"3 Free Close Narrow Brown ... \n",
"4 Free Crowded Broad Black ... \n",
"\n",
" stalk-surface-below-ring stalk-color-above-ring stalk-color-below-ring \\\n",
"0 Smooth White White \n",
"1 Smooth White White \n",
"2 Smooth White White \n",
"3 Smooth White White \n",
"4 Smooth White White \n",
"\n",
" veil-type veil-color ring-number ring-type spore-print-color population \\\n",
"0 Partial White One Pendant Black Scattered \n",
"1 Partial White One Pendant Brown Numerous \n",
"2 Partial White One Pendant Brown Numerous \n",
"3 Partial White One Pendant Black Scattered \n",
"4 Partial White One Evanescent Brown Abundant \n",
"\n",
" habitat \n",
"0 Urban \n",
"1 Grasses \n",
"2 Meadows \n",
"3 Urban \n",
"4 Grasses \n",
"\n",
"[5 rows x 23 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>class</th>\n",
" <th>cap-shape</th>\n",
" <th>cap-surface</th>\n",
" <th>cap-color</th>\n",
" <th>bruises</th>\n",
" <th>odor</th>\n",
" <th>gill-attachment</th>\n",
" <th>gill-spacing</th>\n",
" <th>gill-size</th>\n",
" <th>gill-color</th>\n",
" <th>...</th>\n",
" <th>stalk-surface-below-ring</th>\n",
" <th>stalk-color-above-ring</th>\n",
" <th>stalk-color-below-ring</th>\n",
" <th>veil-type</th>\n",
" <th>veil-color</th>\n",
" <th>ring-number</th>\n",
" <th>ring-type</th>\n",
" <th>spore-print-color</th>\n",
" <th>population</th>\n",
" <th>habitat</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Poisonous</td>\n",
" <td>Convex</td>\n",
" <td>Smooth</td>\n",
" <td>Brown</td>\n",
" <td>Bruises</td>\n",
" <td>Pungent</td>\n",
" <td>Free</td>\n",
" <td>Close</td>\n",
" <td>Narrow</td>\n",
" <td>Black</td>\n",
" <td>...</td>\n",
" <td>Smooth</td>\n",
" <td>White</td>\n",
" <td>White</td>\n",
" <td>Partial</td>\n",
" <td>White</td>\n",
" <td>One</td>\n",
" <td>Pendant</td>\n",
" <td>Black</td>\n",
" <td>Scattered</td>\n",
" <td>Urban</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Edible</td>\n",
" <td>Convex</td>\n",
" <td>Smooth</td>\n",
" <td>Yellow</td>\n",
" <td>Bruises</td>\n",
" <td>Almond</td>\n",
" <td>Free</td>\n",
" <td>Close</td>\n",
" <td>Broad</td>\n",
" <td>Black</td>\n",
" <td>...</td>\n",
" <td>Smooth</td>\n",
" <td>White</td>\n",
" <td>White</td>\n",
" <td>Partial</td>\n",
" <td>White</td>\n",
" <td>One</td>\n",
" <td>Pendant</td>\n",
" <td>Brown</td>\n",
" <td>Numerous</td>\n",
" <td>Grasses</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Edible</td>\n",
" <td>Bell</td>\n",
" <td>Smooth</td>\n",
" <td>White</td>\n",
" <td>Bruises</td>\n",
" <td>Anise</td>\n",
" <td>Free</td>\n",
" <td>Close</td>\n",
" <td>Broad</td>\n",
" <td>Brown</td>\n",
" <td>...</td>\n",
" <td>Smooth</td>\n",
" <td>White</td>\n",
" <td>White</td>\n",
" <td>Partial</td>\n",
" <td>White</td>\n",
" <td>One</td>\n",
" <td>Pendant</td>\n",
" <td>Brown</td>\n",
" <td>Numerous</td>\n",
" <td>Meadows</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Poisonous</td>\n",
" <td>Convex</td>\n",
" <td>Scaly</td>\n",
" <td>White</td>\n",
" <td>Bruises</td>\n",
" <td>Pungent</td>\n",
" <td>Free</td>\n",
" <td>Close</td>\n",
" <td>Narrow</td>\n",
" <td>Brown</td>\n",
" <td>...</td>\n",
" <td>Smooth</td>\n",
" <td>White</td>\n",
" <td>White</td>\n",
" <td>Partial</td>\n",
" <td>White</td>\n",
" <td>One</td>\n",
" <td>Pendant</td>\n",
" <td>Black</td>\n",
" <td>Scattered</td>\n",
" <td>Urban</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Edible</td>\n",
" <td>Convex</td>\n",
" <td>Smooth</td>\n",
" <td>Green</td>\n",
" <td>No Bruises</td>\n",
" <td>None</td>\n",
" <td>Free</td>\n",
" <td>Crowded</td>\n",
" <td>Broad</td>\n",
" <td>Black</td>\n",
" <td>...</td>\n",
" <td>Smooth</td>\n",
" <td>White</td>\n",
" <td>White</td>\n",
" <td>Partial</td>\n",
" <td>White</td>\n",
" <td>One</td>\n",
" <td>Evanescent</td>\n",
" <td>Brown</td>\n",
" <td>Abundant</td>\n",
" <td>Grasses</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 23 columns</p>\n",
"</div>"
]
},
"metadata": {},
"execution_count": 27
}
],
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"Create a pie chart displaying the proportion of Poisonous vs. Edible mushrooms"
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 28,
"source": [
"print(mushrooms.select_dtypes([\"object\"]).columns)"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Index(['class', 'cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor',\n",
" 'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color',\n",
" 'stalk-shape', 'stalk-root', 'stalk-surface-above-ring',\n",
" 'stalk-surface-below-ring', 'stalk-color-above-ring',\n",
" 'stalk-color-below-ring', 'veil-type', 'veil-color', 'ring-number',\n",
" 'ring-type', 'spore-print-color', 'population', 'habitat'],\n",
" dtype='object')\n"
]
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 29,
"source": [
"cols = mushrooms.select_dtypes([\"object\"]).columns\n",
"mushrooms[cols] = mushrooms[cols].astype('category')"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 30,
"source": [
"edibleclass=mushrooms.groupby(['class']).count()\n",
"edibleclass"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" cap-shape cap-surface cap-color bruises odor gill-attachment \\\n",
"class \n",
"Edible 4208 4208 4208 4208 4208 4208 \n",
"Poisonous 3916 3916 3916 3916 3916 3916 \n",
"\n",
" gill-spacing gill-size gill-color stalk-shape ... \\\n",
"class ... \n",
"Edible 4208 4208 4208 4208 ... \n",
"Poisonous 3916 3916 3916 3916 ... \n",
"\n",
" stalk-surface-below-ring stalk-color-above-ring \\\n",
"class \n",
"Edible 4208 4208 \n",
"Poisonous 3916 3916 \n",
"\n",
" stalk-color-below-ring veil-type veil-color ring-number \\\n",
"class \n",
"Edible 4208 4208 4208 4208 \n",
"Poisonous 3916 3916 3916 3916 \n",
"\n",
" ring-type spore-print-color population habitat \n",
"class \n",
"Edible 4208 4208 4208 4208 \n",
"Poisonous 3916 3916 3916 3916 \n",
"\n",
"[2 rows x 22 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>cap-shape</th>\n",
" <th>cap-surface</th>\n",
" <th>cap-color</th>\n",
" <th>bruises</th>\n",
" <th>odor</th>\n",
" <th>gill-attachment</th>\n",
" <th>gill-spacing</th>\n",
" <th>gill-size</th>\n",
" <th>gill-color</th>\n",
" <th>stalk-shape</th>\n",
" <th>...</th>\n",
" <th>stalk-surface-below-ring</th>\n",
" <th>stalk-color-above-ring</th>\n",
" <th>stalk-color-below-ring</th>\n",
" <th>veil-type</th>\n",
" <th>veil-color</th>\n",
" <th>ring-number</th>\n",
" <th>ring-type</th>\n",
" <th>spore-print-color</th>\n",
" <th>population</th>\n",
" <th>habitat</th>\n",
" </tr>\n",
" <tr>\n",
" <th>class</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Edible</th>\n",
" <td>4208</td>\n",
" <td>4208</td>\n",
" <td>4208</td>\n",
" <td>4208</td>\n",
" <td>4208</td>\n",
" <td>4208</td>\n",
" <td>4208</td>\n",
" <td>4208</td>\n",
" <td>4208</td>\n",
" <td>4208</td>\n",
" <td>...</td>\n",
" <td>4208</td>\n",
" <td>4208</td>\n",
" <td>4208</td>\n",
" <td>4208</td>\n",
" <td>4208</td>\n",
" <td>4208</td>\n",
" <td>4208</td>\n",
" <td>4208</td>\n",
" <td>4208</td>\n",
" <td>4208</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Poisonous</th>\n",
" <td>3916</td>\n",
" <td>3916</td>\n",
" <td>3916</td>\n",
" <td>3916</td>\n",
" <td>3916</td>\n",
" <td>3916</td>\n",
" <td>3916</td>\n",
" <td>3916</td>\n",
" <td>3916</td>\n",
" <td>3916</td>\n",
" <td>...</td>\n",
" <td>3916</td>\n",
" <td>3916</td>\n",
" <td>3916</td>\n",
" <td>3916</td>\n",
" <td>3916</td>\n",
" <td>3916</td>\n",
" <td>3916</td>\n",
" <td>3916</td>\n",
" <td>3916</td>\n",
" <td>3916</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2 rows × 22 columns</p>\n",
"</div>"
]
},
"metadata": {},
"execution_count": 30
}
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 33,
"source": [
"labels=['Edible','Poisonous']\n",
"plt.pie(edibleclass['population'],labels=labels,autopct='%.1f %%')\n",
"plt.title('Edible?')\n",
"plt.show()"
],
"outputs": [
{
"output_type": "error",
"ename": "KeyError",
"evalue": "'class'",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2890\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2891\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2892\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 'class'",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-33-04c1f5ff0aea>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mlabels\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0medibleclass\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'class'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpie\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0medibleclass\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'population'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mautopct\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'%.1f %%'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtitle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Edible?'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2900\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2901\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2902\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2903\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2904\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2891\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2892\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2893\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2894\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2895\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtolerance\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 'class'"
]
}
],
"metadata": {}
}
],
"metadata": {
"orig_nbformat": 4,
"language_info": {
"name": "python",
"version": "3.7.0",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3.7.0 64-bit ('3.7')"
},
"interpreter": {
"hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
}
},
"nbformat": 4,
"nbformat_minor": 2
}