You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ML-For-Beginners/5-Clustering/2-K-Means/solution/notebook.ipynb

541 lines
520 KiB

4 years ago
{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7",
"display_name": "Python 3.7.0 64-bit ('3.7')"
},
"metadata": {
"interpreter": {
"hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
}
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"source": [
"# Nigerian Music scraped from Spotify - an analysis"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
4 years ago
"execution_count": 10,
4 years ago
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n",
"Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n",
"Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n",
4 years ago
"Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n",
"Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n",
4 years ago
"Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n",
4 years ago
"Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n",
"Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n",
"Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n",
"Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n",
"\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n",
"You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n",
"Note: you may need to restart the kernel to use updated packages.\n"
4 years ago
]
}
],
"source": [
"pip install seaborn"
]
},
4 years ago
{
"source": [
"Start where we finished in the last lesson, with data imported and filtered."
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
4 years ago
"execution_count": 11,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" name album \\\n",
"0 Sparky Mandy & The Jungle \n",
"1 shuga rush EVERYTHING YOU HEARD IS TRUE \n",
"2 LITT! LITT! \n",
"3 Confident / Feeling Cool Enjoy Your Life \n",
"4 wanted you rare. \n",
"\n",
" artist artist_top_genre release_date length popularity \\\n",
"0 Cruel Santino alternative r&b 2019 144000 48 \n",
"1 Odunsi (The Engine) afropop 2020 89488 30 \n",
"2 AYLØ indie r&b 2018 207758 40 \n",
"3 Lady Donli nigerian pop 2019 175135 14 \n",
"4 Odunsi (The Engine) afropop 2018 152049 25 \n",
"\n",
" danceability acousticness energy instrumentalness liveness loudness \\\n",
"0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n",
"1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n",
"2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n",
"3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n",
"4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n",
"\n",
" speechiness tempo time_signature \n",
"0 0.0829 133.015 5 \n",
"1 0.3600 129.993 3 \n",
"2 0.0424 130.005 4 \n",
"3 0.1130 111.087 4 \n",
"4 0.0447 105.115 4 "
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>name</th>\n <th>album</th>\n <th>artist</th>\n <th>artist_top_genre</th>\n <th>release_date</th>\n <th>length</th>\n <th>popularity</th>\n <th>danceability</th>\n <th>acousticness</th>\n <th>energy</th>\n <th>instrumentalness</th>\n <th>liveness</th>\n <th>loudness</th>\n <th>speechiness</th>\n <th>tempo</th>\n <th>time_signature</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Sparky</td>\n <td>Mandy &amp; The Jungle</td>\n <td>Cruel Santino</td>\n <td>alternative r&amp;b</td>\n <td>2019</td>\n <td>144000</td>\n <td>48</td>\n <td>0.666</td>\n <td>0.8510</td>\n <td>0.420</td>\n <td>0.534000</td>\n <td>0.1100</td>\n <td>-6.699</td>\n <td>0.0829</td>\n <td>133.015</td>\n <td>5</td>\n </tr>\n <tr>\n <th>1</th>\n <td>shuga rush</td>\n <td>EVERYTHING YOU HEARD IS TRUE</td>\n <td>Odunsi (The Engine)</td>\n <td>afropop</td>\n <td>2020</td>\n <td>89488</td>\n <td>30</td>\n <td>0.710</td>\n <td>0.0822</td>\n <td>0.683</td>\n <td>0.000169</td>\n <td>0.1010</td>\n <td>-5.640</td>\n <td>0.3600</td>\n <td>129.993</td>\n <td>3</td>\n </tr>\n <tr>\n <th>2</th>\n <td>LITT!</td>\n <td>LITT!</td>\n <td>AYLØ</td>\n <td>indie r&amp;b</td>\n <td>2018</td>\n <td>207758</td>\n <td>40</td>\n <td>0.836</td>\n <td>0.2720</td>\n <td>0.564</td>\n <td>0.000537</td>\n <td>0.1100</td>\n <td>-7.127</td>\n <td>0.0424</td>\n <td>130.005</td>\n <td>4</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Confident / Feeling Cool</td>\n <td>Enjoy Your Life</td>\n <td>Lady Donli</td>\n <td>nigerian pop</td>\n <td>2019</td>\n <td>175135</td>\n <td>14</td>\n <td>0.894</td>\n <td>0.7980</td>\n <td>0.611</td>\n <td>0.000187</td>\n <td>0.0964</td>\n <td>-4.961</td>\n <td>0.1130</td>\n <td>111.087</td>\n <td>4</td>\n </tr>\n <tr>\n <th>4</th>\n <td>wanted you</td>\n <td>rare.</td>\n <td>Odunsi (The Engine)</td>\n <td>afropop</td>\n <td>2018</td>\n <td>152049</td>\n <td>25</td>\n <td>0.702</td>\n <td>0.1160</td>\n <td>0.833</td>\n <td>0.910000</td>\n <td>0.3480</td>\n <td>-6.044</td>\n <td>0.0447</td>\n <td>105.115</td>\n <td>4</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {},
4 years ago
"execution_count": 11
}
],
4 years ago
"source": [
"\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"\n",
"\n",
"df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n",
"df.head()"
4 years ago
]
},
4 years ago
{
"source": [
"We will focus only on 3 genres. Maybe we can get 3 clusters built!"
],
"cell_type": "markdown",
"metadata": {}
},
4 years ago
{
"cell_type": "code",
4 years ago
"execution_count": 12,
4 years ago
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Text(0.5, 1.0, 'Top genres')"
]
4 years ago
},
"metadata": {},
4 years ago
"execution_count": 12
},
{
"output_type": "display_data",
"data": {
"text/plain": "<Figure size 720x504 with 1 Axes>",
4 years ago
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Created with matplotlib (https://matplotlib.org/) -->\n<svg height=\"474.468454pt\" version=\"1.1\" viewBox=\"0 0 598.4875 474.468454\" width=\"598.4875pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n <defs>\n <style type=\"text/css\">\n*{stroke-linecap:butt;stroke-linejoin:round;white-space:pre;}\n </style>\n </defs>\n <g id=\"figure_1\">\n <g id=\"patch_1\">\n <path d=\"M 0 474.468454 \nL 598.4875 474.468454 \nL 598.4875 0 \nL 0 0 \nz\n\" style=\"fill:none;\"/>\n </g>\n <g id=\"axes_1\">\n <g id=\"patch_2\">\n <path d=\"M 33.2875 402.838125 \nL 591.2875 402.838125 \nL 591.2875 22.318125 \nL 33.2875 22.318125 \nz\n\" style=\"fill:#ffffff;\"/>\n </g>\n <g id=\"patch_3\">\n <path clip-path=\"url(#p3897eb18a2)\" d=\"M 51.8875 402.838125 \nL 200.6875 402.838125 \nL 200.6875 40.438125 \nL 51.8875 40.438125 \nz\n\" style=\"fill:#3274a1;\"/>\n </g>\n <g id=\"patch_4\">\n <path clip-path=\"url(#p3897eb18a2)\" d=\"M 237.8875 402.838125 \nL 386.6875 402.838125 \nL 386.6875 295.525504 \nL 237.8875 295.525504 \nz\n\" style=\"fill:#e1812c;\"/>\n </g>\n <g id=\"patch_5\">\n <path clip-path=\"url(#p3897eb18a2)\" d=\"M 423.8875 402.838125 \nL 572.6875 402.838125 \nL 572.6875 369.412882 \nL 423.8875 369.412882 \nz\n\" style=\"fill:#3a923a;\"/>\n </g>\n <g id=\"matplotlib.axis_1\">\n <g id=\"xtick_1\">\n <g id=\"line2d_1\">\n <defs>\n <path d=\"M 0 0 \nL 0 3.5 \n\" id=\"m25e6aaa0af\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n </defs>\n <g>\n <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"126.2875\" xlink:href=\"#m25e6aaa0af\" y=\"402.838125\"/>\n </g>\n </g>\n <g id=\"text_1\">\n <!-- afro dancehall -->\n <defs>\n <path d=\"M 34.28125 27.484375 \nQ 23.390625 27.484375 19.1875 25 \nQ 14.984375 22.515625 14.984375 16.5 \nQ 14.984375 11.71875 18.140625 8.90625 \nQ 21.296875 6.109375 26.703125 6.109375 \nQ 34.1875 6.109375 38.703125 11.40625 \nQ 43.21875 16.703125 43.21875 25.484375 \nL 43.21875 27.484375 \nz\nM 52.203125 31.203125 \nL 52.203125 0 \nL 43.21875 0 \nL 43.21875 8.296875 \nQ 40.140625 3.328125 35.546875 0.953125 \nQ 30.953125 -1.421875 24.3125 -1.421875 \nQ 15.921875 -1.421875 10.953125 3.296875 \nQ 6 8.015625 6 15.921875 \nQ 6 25.140625 12.171875 29.828125 \nQ 18.359375 34.515625 30.609375 34.515625 \nL 43.21875 34.515625 \nL 43.21875 35.40625 \nQ 43.21875 41.609375 39.140625 45 \nQ 35.0625 48.390625 27.6875 48.390625 \nQ 23 48.390625 18.546875 47.265625 \nQ 14.109375 46.140625 10.015625 43.890625 \nL 10.015625 52.203125 \nQ 14.9375 54.109375 19.578125 55.046875 \nQ 24.21875 56 28.609375 56 \nQ 40.484375 56 46.34375 49.84375 \nQ 52.203125 43.703125 52.203125 31.203125 \nz\n\" id=\"DejaVuSans-97\"/>\n <path d=\"M 37.109375 75.984375 \nL 37.109375 68.5 \nL 28.515625 68.5 \nQ 23.6875 68.5 21.796875 66.546875 \nQ 19.921875 64.59375 19.921875 59.515625 \nL 19.921875 54.6875 \nL 34.71875 54.6875 \nL 34.71875 47.703125 \nL 19.921875 47.703125 \nL 19.921875 0 \nL 10.890625 0 \nL 10.890625 47.703125 \nL 2.296875 47.703125 \nL 2.296875 54.6875 \nL 10.890625 54.6875 \nL 10.890625 58.5 \nQ 10.890625 67.625 15.140625 71.796875 \nQ 19.390625 75.984375 28.609375 75.984375 \nz\n\" id=\"DejaVuSans-102\"/>\n <path d=\"M 41.109375 46.296875 \nQ 39.59375 47.171875 37.8125 47.578125 \nQ 36.03125 48 33.890625 48 \nQ 26.265625 48 22.1875 43.046875 \nQ 18.109375 38.09375 18.109375 28.8125 \nL 18.109375 0 \nL 9.078125 0 \nL 9.078125 54.6875 \nL 18.109375 54.6875 \nL 18.109375 46.1875 \nQ 20.953125 51.171875 25.484375 53.578125 \nQ 30.03125 56 36.53125 56 \nQ 37.453125 56 38.578125 55.875 \nQ 39.703125 55.765625 41.0625 55.515625 \nz\n\" id=\"DejaVuSans-114\"/>\n <path d=\"M 30.609375 48.390625 \nQ 23.390625 48.390625 19.1875 42.75 \nQ 14.984375 37.109375 14.984375 27.296875 \nQ 14.984375 17.4
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAHbCAYAAAAJY9SEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3de7ymc73/8dfbjNROhUwINR0msjvInk07hZLILofaiSJKTQfS+biT2NXu3O6oKL+0f6WURG0dpIOdnTJkO5UMEdNgoaQIw2f/cV1Td2ONGbO+y32vNa/n47Ee676/13Vf9yetWet9f09XqgpJkiRN3GrDLkCSJGm6MFhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiZFwh8Hvm5PuGng+fOHXZ8kTYa4QaikyZZwKfDiKr437FomImFmFYuHXYek0WWPlaShSLhXwicSFiVckfD+hNX7YzslLEg4NOG6hF8nPOdOrjUn4X8Sbkj4dsKnEz4zcPxJCT9N+H3CWQlbDxw7PeGQ/vsfEk5KWLs/tmnC4oSXJFwOnLQC13tJwqV9LZfcWd2Sph+DlaRhORR4DPBo4B+A7YA3DhyfDdwDWB94CXB0wkOWvkhCgGOBHwD3B94D7D1wfDbwdeBfgXWAtwFfXxKees8Dng9sAKwFvGrg2AxgK2ATYNc7u15/zfcD21dxH+CJwHl35T+KpKnNYCVpWJ4PHFLFNVVcBbwT2Gfg+GLg0Cpu6YcQvwf8yzjXmQNsChzWn/tD4FsDx/cFvlbF96q4vYqTgAuApw2cc2QVF1fxJ+CrwOZLvcfbq7ixiptW8HqPSrhnFb+t4hd36b+KpCnNYCXpbtf3Mq0PXDbQfBmw4cDzsSr+vNTxB45zuQf259480Hb5wOMHA3v3w3a/T/g9MHepa1058PhGYM2B57dX8dsVuV4Vv6MLjAcBVyacmPDwcWqWNE0ZrCTd7aooujDz4IHmBwELB56vm3DPpY4PBpwlFgGzEtYYaNt44PHlwGeqWGvg695VfHhFy13q+Z1er4r/qmJ7uuD2G+DwFXwfSdOAwUrSsBwDHJJw/4QH0M1Z+v8Dx1cHDk64R8JTgB2A48a5zq+AC4G3JayesA2w08Dxo4HnJGyfMKOfNL99wvorWfcyr5ewYcI/J/wdcDPwR+D2lXwfSVOQwUrSsLydbm7S+cDZwGnA+waOX0o3z+pK4CjghVVcsvRF+t6v5wJPBX4HvBX4Cl2woX/Ns+kmy19DN6T4Klby999yrjcDeHNf87XAPwIHrsz7SJqa3MdK0shJ2An4eNXKzU9KOAE4vYp/b1uZJN05e6wkTXkJWyXMTlgt4Zl0Q4EnDLsuSauemcMuQJIa2Ihu/tXadJPLX1TFBcMtSdKqyKFASZKkRhwKlCRJamQkhgLXXXfdmj179rDLkCRJWq4zzzzzmqqaNd6xkQhWs2fPZv78+cMuQ5IkabmSXLasYw4FSpIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1MnPYBbT2D2/4/LBL0DRz5vtfMOwSJElThD1WkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqZHlBqskGyf5QZILkpyf5FV9+zpJTk5yUf997b49ST6aZEGSc5JsMdn/IyRJkkbBivRYLQZeV1WbAY8HDkiyGfBm4JSqmgOc0j8HeDowp/+aBxzevGpJkqQRtNxgVVWLquqs/vENwC+ADYFdgaP7044Gdusf7wp8vjqnA2sl2aB55ZIkSSPmLs2xSjIbeBzwU2C9qlrUH7oSWK9/vCFw+cDLrujblr7WvCTzk8wfGxu7i2VLkiSNnhUOVknWBI4DXl1Vfxg8VlUF1F1546o6oqrmVtXcWbNm3ZWXSpIkjaQVClZJVqcLVV+oqq/1zVctGeLrv1/dty8ENh54+UZ9myRJ0rS2IqsCA3wW+EVVfWjg0InAvv3jfYETBtpf0K8OfDxw/cCQoSRJ0rQ1cwXO2RrYBzg3ydl921uB9wDHJtkfuAzYoz92ErAzsAC4EXhh04olSZJG1HKDVVX9GMgyDm8/zvkFHDDBuiRJkqYcd16XJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqZHlBqskRyW5Osl5A21fTnJ2/3VpkrP79tlJbho49qnJLF6SJGmUzFyBcz4HfBz4/JKGqnruksdJPghcP3D+xVW1easCJUmSporlBquqOjXJ7PGOJQmwB/CUtmVJkiRNPROdY/Uk4Kqqumig7SFJfp7kR0metKwXJpmXZH6S+WNjYxMsQ5IkafgmGqz2Ao4ZeL4IeFBVPQ54LfDFJPcd74VVdURVza2qubNmzZpgGZIkScO30sEqyUzgWcCXl7RV1c1VdW3/+EzgYuAREy1SkiRpKphIj9VTgV9W1RVLGpLMSjKjf/xQYA5wycRKlCRJmhpWZLuFY4CfAJskuSLJ/v2hPfnbYUCAbYBz+u0Xvgq8rKqua1mwJEnSqFqRVYF7LaN9v3HajgOOm3hZkiRJU487r0uSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUyHKDVZKjklyd5LyBtnckWZjk7P5r54Fjb0myIMmFSXacrMIlSZJGzYr0WH0O2Gmc9g9X1eb910kASTYD9gT+vn/NJ5PMaFWsJEnSKFtusKqqU4HrVvB6uwJfqqqbq+rXwAJgywnUJ0mSNGVMZI7VgUnO6YcK1+7bNgQuHzjnir7tDpLMSzI/yfyxsbEJlCFJkjQaVjZYHQ48DNgcWAR88K5eoKqOqKq5VTV31qxZK1mGJEnS6FipYFVVV1XVbVV1O3Akfx3uWwhsPHDqRn2bJEnStLdSwSrJBgNPdweWrBg8EdgzyRpJHgLMAX42sRIlSZKmhpnLOyHJMcB2wLpJrgAOAbZLsjlQwKXASwGq6vwkxwIXAIuBA6rqtskpXZIkabQsN1hV1V7jNH/2Ts5/F/CuiRQlSZI0FbnzuiRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNLDdYJTkqydVJzhtoe3+SXyY5J8nxSdbq22cnuSnJ2f3XpyazeEmSpFGyIj1WnwN2WqrtZOBRVfUY4FfAWwaOXVxVm/dfL2tTpiRJ0uhbbrCqqlOB65Zq+25VLe6fng5sNAm1SZIkTSkt5li9CPjWwPOHJPl5kh8ledKyXpRkXpL5Sea
},
"metadata": {
"needs_background": "light"
}
4 years ago
}
],
"source": [
"df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n",
"df = df[(df['popularity'] > 0)]\n",
"top = df['artist_top_genre'].value_counts()\n",
"plt.figure(figsize=(10,7))\n",
"sns.barplot(x=top.index,y=top.values)\n",
"plt.xticks(rotation=45)\n",
"plt.title('Top genres',color = 'blue')"
]
},
{
"cell_type": "code",
4 years ago
"execution_count": 13,
"metadata": {},
"outputs": [
{
4 years ago
"output_type": "execute_result",
"data": {
"text/plain": [
" name album \\\n",
"1 shuga rush EVERYTHING YOU HEARD IS TRUE \n",
"3 Confident / Feeling Cool Enjoy Your Life \n",
"4 wanted you rare. \n",
"5 Kasala Pioneers \n",
"6 Pull Up Everything Pretty \n",
"\n",
" artist artist_top_genre release_date length popularity \\\n",
"1 Odunsi (The Engine) afropop 2020 89488 30 \n",
"3 Lady Donli nigerian pop 2019 175135 14 \n",
"4 Odunsi (The Engine) afropop 2018 152049 25 \n",
"5 DRB Lasgidi nigerian pop 2020 184800 26 \n",
"6 prettyboydo nigerian pop 2018 202648 29 \n",
"\n",
" danceability acousticness energy instrumentalness liveness loudness \\\n",
"1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n",
"3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n",
"4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n",
"5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n",
"6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n",
"\n",
" speechiness tempo time_signature \n",
"1 0.3600 129.993 3 \n",
"3 0.1130 111.087 4 \n",
"4 0.0447 105.115 4 \n",
"5 0.1970 100.103 4 \n",
"6 0.1990 95.842 4 "
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>name</th>\n <th>album</th>\n <th>artist</th>\n <th>artist_top_genre</th>\n <th>release_date</th>\n <th>length</th>\n <th>popularity</th>\n <th>danceability</th>\n <th>acousticness</th>\n <th>energy</th>\n <th>instrumentalness</th>\n <th>liveness</th>\n <th>loudness</th>\n <th>speechiness</th>\n <th>tempo</th>\n <th>time_signature</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>1</th>\n <td>shuga rush</td>\n <td>EVERYTHING YOU HEARD IS TRUE</td>\n <td>Odunsi (The Engine)</td>\n <td>afropop</td>\n <td>2020</td>\n <td>89488</td>\n <td>30</td>\n <td>0.710</td>\n <td>0.0822</td>\n <td>0.683</td>\n <td>0.000169</td>\n <td>0.1010</td>\n <td>-5.640</td>\n <td>0.3600</td>\n <td>129.993</td>\n <td>3</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Confident / Feeling Cool</td>\n <td>Enjoy Your Life</td>\n <td>Lady Donli</td>\n <td>nigerian pop</td>\n <td>2019</td>\n <td>175135</td>\n <td>14</td>\n <td>0.894</td>\n <td>0.7980</td>\n <td>0.611</td>\n <td>0.000187</td>\n <td>0.0964</td>\n <td>-4.961</td>\n <td>0.1130</td>\n <td>111.087</td>\n <td>4</td>\n </tr>\n <tr>\n <th>4</th>\n <td>wanted you</td>\n <td>rare.</td>\n <td>Odunsi (The Engine)</td>\n <td>afropop</td>\n <td>2018</td>\n <td>152049</td>\n <td>25</td>\n <td>0.702</td>\n <td>0.1160</td>\n <td>0.833</td>\n <td>0.910000</td>\n <td>0.3480</td>\n <td>-6.044</td>\n <td>0.0447</td>\n <td>105.115</td>\n <td>4</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Kasala</td>\n <td>Pioneers</td>\n <td>DRB Lasgidi</td>\n <td>nigerian pop</td>\n <td>2020</td>\n <td>184800</td>\n <td>26</td>\n <td>0.803</td>\n <td>0.1270</td>\n <td>0.525</td>\n <td>0.000007</td>\n <td>0.1290</td>\n <td>-10.034</td>\n <td>0.1970</td>\n <td>100.103</td>\n <td>4</td>\n </tr>\n <tr>\n <th>6</th>\n <td>Pull Up</td>\n <td>Everything Pretty</td>\n <td>prettyboydo</td>\n <td>nigerian pop</td>\n <td>2018</td>\n <td>202648</td>\n <td>29</td>\n <td>0.818</td>\n <td>0.4520</td>\n <td>0.587</td>\n <td>0.004490</td>\n <td>0.5900</td>\n <td>-9.840</td>\n <td>0.1990</td>\n <td>95.842</td>\n <td>4</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {},
4 years ago
"execution_count": 13
}
],
"source": [
4 years ago
"df.head()"
]
},
{
"source": [
"How clean is this data? Check for outliers using box plots. We will concentrate on columns with fewer outliers (although you could clean out the outliers). Boxplots can show the range of the data and will help choose which columns to use. Note, Boxplots do not show variance, an important element of good clusterable data (https://stats.stackexchange.com/questions/91536/deduce-variance-from-boxplot)"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
4 years ago
"execution_count": 14,
4 years ago
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
4 years ago
"<matplotlib.axes._subplots.AxesSubplot at 0x7fbc18790a20>"
4 years ago
]
},
"metadata": {},
4 years ago
"execution_count": 14
4 years ago
},
{
"output_type": "display_data",
"data": {
"text/plain": "<Figure size 4000x4000 with 12 Axes>",
4 years ago
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Created with matplotlib (https://matplotlib.org/) -->\n<svg height=\"1132.234375pt\" version=\"1.1\" viewBox=\"0 0 1136.475526 1132.234375\" width=\"1136.475526pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n <defs>\n <style type=\"text/css\">\n*{stroke-linecap:butt;stroke-linejoin:round;white-space:pre;}\n </style>\n </defs>\n <g id=\"figure_1\">\n <g id=\"patch_1\">\n <path d=\"M 0 1132.234375 \nL 1136.475526 1132.234375 \nL 1136.475526 0 \nL 0 0 \nz\n\" style=\"fill:none;\"/>\n </g>\n <g id=\"axes_1\">\n <g id=\"patch_2\">\n <path d=\"M 10.7 243.547826 \nL 338.935294 243.547826 \nL 338.935294 7.2 \nL 10.7 7.2 \nz\n\" style=\"fill:#ffffff;\"/>\n </g>\n <g id=\"patch_3\">\n <path clip-path=\"url(#p2eb0fe637d)\" d=\"M 79.496791 30.834783 \nL 79.496791 219.913043 \nL 190.359091 219.913043 \nL 190.359091 30.834783 \nL 79.496791 30.834783 \nz\n\" style=\"fill:#3274a1;stroke:#3f3f3f;stroke-linejoin:miter;stroke-width:1.5;\"/>\n </g>\n <g id=\"matplotlib.axis_1\">\n <g id=\"xtick_1\">\n <g id=\"line2d_1\">\n <defs>\n <path d=\"M 0 0 \nL 0 3.5 \n\" id=\"m68c942961f\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n </defs>\n <g>\n <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"21.475401\" xlink:href=\"#m68c942961f\" y=\"243.547826\"/>\n </g>\n </g>\n <g id=\"text_1\">\n <!-- 0 -->\n <defs>\n <path d=\"M 31.78125 66.40625 \nQ 24.171875 66.40625 20.328125 58.90625 \nQ 16.5 51.421875 16.5 36.375 \nQ 16.5 21.390625 20.328125 13.890625 \nQ 24.171875 6.390625 31.78125 6.390625 \nQ 39.453125 6.390625 43.28125 13.890625 \nQ 47.125 21.390625 47.125 36.375 \nQ 47.125 51.421875 43.28125 58.90625 \nQ 39.453125 66.40625 31.78125 66.40625 \nz\nM 31.78125 74.21875 \nQ 44.046875 74.21875 50.515625 64.515625 \nQ 56.984375 54.828125 56.984375 36.375 \nQ 56.984375 17.96875 50.515625 8.265625 \nQ 44.046875 -1.421875 31.78125 -1.421875 \nQ 19.53125 -1.421875 13.0625 8.265625 \nQ 6.59375 17.96875 6.59375 36.375 \nQ 6.59375 54.828125 13.0625 64.515625 \nQ 19.53125 74.21875 31.78125 74.21875 \nz\n\" id=\"DejaVuSans-48\"/>\n </defs>\n <g transform=\"translate(18.294151 258.146264)scale(0.1 -0.1)\">\n <use xlink:href=\"#DejaVuSans-48\"/>\n </g>\n </g>\n </g>\n <g id=\"xtick_2\">\n <g id=\"line2d_2\">\n <g>\n <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"62.919251\" xlink:href=\"#m68c942961f\" y=\"243.547826\"/>\n </g>\n </g>\n <g id=\"text_2\">\n <!-- 10 -->\n <defs>\n <path d=\"M 12.40625 8.296875 \nL 28.515625 8.296875 \nL 28.515625 63.921875 \nL 10.984375 60.40625 \nL 10.984375 69.390625 \nL 28.421875 72.90625 \nL 38.28125 72.90625 \nL 38.28125 8.296875 \nL 54.390625 8.296875 \nL 54.390625 0 \nL 12.40625 0 \nz\n\" id=\"DejaVuSans-49\"/>\n </defs>\n <g transform=\"translate(56.556751 258.146264)scale(0.1 -0.1)\">\n <use xlink:href=\"#DejaVuSans-49\"/>\n <use x=\"63.623047\" xlink:href=\"#DejaVuSans-48\"/>\n </g>\n </g>\n </g>\n <g id=\"xtick_3\">\n <g id=\"line2d_3\">\n <g>\n <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"104.363102\" xlink:href=\"#m68c942961f\" y=\"243.547826\"/>\n </g>\n </g>\n <g id=\"text_3\">\n <!-- 20 -->\n <defs>\n <path d=\"M 19.1875 8.296875 \nL 53.609375 8.296875 \nL 53.609375 0 \nL 7.328125 0 \nL 7.328125 8.296875 \nQ 12.9375 14.109375 22.625 23.890625 \nQ 32.328125 33.6875 34.8125 36.53125 \nQ 39.546875 41.84375 41.421875 45.53125 \nQ 43.3125 49.21875 43.3125 52.78125 \nQ 43.3125 58.59375 39.234375 62.25 \nQ 35.15625 65.921875 28.609375 65.921875 \nQ 23.96875 65.921875 18.8125 64.3125 \nQ 13.671875 62.703125 7.8125 59.421875 \nL 7.8125 69.390625 \nQ 13.765625 71.78125 18.9375 73 \nQ 24.125 74.21875 28.421875 74.21875 \nQ 39.75 74.21875
4 years ago
"image/png": "iVBORw0KGgoAAAANSUhEUgAADFQAAAxLCAYAAAAjUVg8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAewgAAHsIBbtB1PgAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOzdsU5bZxiA4R/XFalaEEsHS1xCOpbroL3Qluug3Zp7YGAzqdRIwe7QMBAlEBrsY/w+z2QfWfgbLMv/d/SKg/V6PQAAAAAAAAAAAAAAAEpmUw8AAAAAAAAAAAAAAACwbYIKAAAAAAAAAAAAAAAgR1ABAAAAAAAAAAAAAADkCCoAAAAAAAAAAAAAAIAcQQUAAAAAAAAAAAAAAJAjqAAAAAAAAAAAAAAAAHIEFQAAAAAAAAAAAAAAQI6gAgAAAAAAAAAAAAAAyBFUAAAAAAAAAAAAAAAAOYIKAAAAAAAAAAAAAAAgR1ABAAAAAAAAAAAAAADkCCoAAAAAAAAAAAAAAIAcQQUAAAAAAAAAAAAAAJAjqAAAAAAAAAAAAAAAAHIEFQAAAAAAAAAAAAAAQI6gAgAAAAAAAAAAAAAAyJlv+w0PDg4Oxxg/fXh6Pca43fYMAAAA7Lxvxhg/fnj813q9fjflMPBc7EUAAAD4QnYj7B17EQAAAL7QVvciWw8qxn+H4z8meF8AAABepp/HGH9OPQQ8E3sRAAAAnspuhH1hLwIAAMBTbXwvMtvkHwcAAAAAAAAAAAAAANhFU/yHiuu7B5eXl2OxWEwwAgAAALvs6upqnJ2d3T29fui18MLYiwAAAPAouxH2lL0IAAAAj9r2XmSKoOL27sFisRinp6cTjAAAAMALcvv4S+DFsBcBAADgqexG2Bf2IgAAADzVxvcis02/AQAAAAAAAAAAAAAAwK4RVAAAAAAAAAAAAAAAADmCCgAAAAAAAAAAAAAAIEdQAQAAAAAAAAAAAAAA5AgqAAAAAAAAAAAAAACAHEEFAAAAAAAAAAAAAACQI6gAAAAAAAAAAAAAAAByBBUAAAAAAAAAAAAAAECOoAIAAAAAAAAAAAAAAMgRVAAAAAAAAAAAAAAAADmCCgAAAAAAAAAAAAAAIEdQAQAAAAAAAAAAAAAA5AgqAAAAAAAAAAAAAACAHEEFAAAAAAAAAAAAAACQI6gAAAAAAAAAAAAAAAByBBUAAAAAAAAAAAAAAECOoAIAAAAAAAAAAAAAAMgRVAAAAAAAAAAAAAAAADmCCgAAAAAAAAAAAAAAIEdQAQAAAAAAAAAAAAAA5AgqAAAAAAAAAAAAAACAHEEFAAAAAAAAAAAAAACQI6gAAAAAAAAAAAAAAAByBBUAAAAAAAAAAAAAAECOoAIAAAAAAAAAAAAAAMgRVAAAAAAAAAAAAAAAADmCCgAAAAAAAAAAAAAAIEdQAQAAAAAAAAAAAAAA5AgqAAAAAAAAAAAAAACAHEEFAAAAAAAAAAAAAACQI6gAAAAAAAAAAAAAAAByBBUAAAAAAAAAAAAAAECOoAIAAAAAAAAAAAAAAMgRVAAAAAAAAAAAAAAAADmCCgAAAAAAAAAAAAAAIEdQAQAAAAAAAAAAAAAA5AgqAAAAAAAAAAAAAACAHEEFAAAAAAAAAAAAAACQI6gAAAAAAAAAAAAAAAByBBUAAAAAAAAAAAAAAECOoAIAAAAAAAAAAAAAAMgRVAAAAAAAAAAAAAAAADmCCgAAAAAAAAAAAAAAIEdQAQAAAAAAAAAAAAAA5AgqAAAAAAAAAAAAAACAHEEFAAAAAAAAAAAAAACQI6gAAAAAAAAAAAAAAAByBBUAAAAAAAAAAAAAAECOoAIAAAAAAAAAAAAAAMgRVAAAAAAAAAAAAAAAADmCCgAAAAAAAAAAAAAAIEdQAQAAAAAAAAAAAAAA5AgqAAAAAAAAAAAAAACAHEEFAAAAAAAAAAAAAACQI6gAAAAAAAAAAAAAAAByBBUAAAAAAAAAAAAAAECOoAIAAAAAAAAAAAAAAMgRVAAAAAAAAAAAAAAAADmCCgAAAAAAAAAAAAAAIEdQAQAAAAAAAAAAAAAA5MynHgAKVqvVWC6XU49BwGq1Gjc3N/euHR0djdlMP8fmHB8f+4wBAMAn2AfwMed2do0zPQAAAAB8PfcDdoc9/PbYL7NPBBWwBcvlcpyfn089BsBGXFxcjJOTk6nHAACAnWMfAOw6Z3oAAAAA+HruB1Bkv8w+kQYBAAAAAAAAAAAAAAA5ggoAAAAAAAAAAAAAACBHUAEAAAAAAAAAAAAAAOTMpx4Aqv5+/etYzw+nHoN98/6f8cOb3+9devv6lzHmryYaiH1z8P7d+P7Nb1OPAQAAL5Z9QJxzOxNypgcAAACA7XE/YCL28Bthv8y+E1TARNbzw7H+9rupx2DPHHzq4vyVzxoAAMCOsA9oc24HAAAAAGhwP2Aa9vDA/zGbegAAAAAAAAAAAAAAAIBtE1QAAAAAAAAAAAAAAAA5ggoAAAAAAAAAAAAAACBHUAEAAAAAAAAAAAAAAOQIKgAAAAAAAAAAAAAAgBxBBQAAAAAAAAAAAAAAkCOoAAAAAAAAAAAAAAAAcgQVAAAAAAAAAAAAAABAjqACAAAAAAAAAAAAAADIEVQAAAAAAAAAAAAAAAA5ggoAAAAAAAAAAAAAACBHUAEAAAAAAAAAAAAAAOQIKgAAAAAAAAAAAAAAgBxBBQAAAAAAAAAAAAAAkCOoAAAAAAAAAAAAAAAAcgQVAAAAAAAAAAAAAABAjqACAAAAAAAAAAAAAADIEVQAAAAAAAAAAAAAAAA5ggoAAAAAAAAAAAAAACBHUAEAAAAAAAAAAAAAAOQIKgAAAAAAAAAAAAAAgBxBBQAAAAAAAAAAAAAAkCOoAAAAAAAAAAAAAAAAcgQVAAAAAAAAAAAAAABAjqACAAAAAAAAAAAAAADIEVQAAAAAAAAAAAAAAAA5ggoAAAAAAAAAAAAAACBHUAEAAAAAAAAAAAAAAOQIKgAAAAAAAAAAAAAAgBxBBQAAAAAAAAAAAAAAkCOoAAAAAAAAAAAAAAAAcgQVAAAAAAAAAAAAAABAjqACAAAAAAAAAAAAAADIEVQAAAAAAAAAAAAAAAA5ggoAAAAAAAAAAAAAACBHUAEAAAAAAAAAAAAAAOQIKgAAAAAAAAAAAAAAgBxBBQAAAAAAAAAAAAAAkCOoAAAAAAAAAAAAAAAAcgQVAAAAAAAAAAAAAABAjqACAAAAAAAAAAAAAADIEVQAAAAAAAAAAAAAAAA5ggoAAAAAAAAAAAAAACBHUAEAAAAAAAAAAAAAAOQIKgAAAAAAAAAAAAAAgBxBBQAAAAAAAAAAAAAAkCOoAAAAAAAAAAAAAAAAcgQVAAAAAAAAAAAAAABAjqACAAAAAAAAAAAAAADIEVQAAAAAAAAAAAAAAAA5ggoAAAAAAAAAAAAAACBHUAEAAAAAAAAAAAAAAOQIKgAAAAAAAAAAAAAAgBxBBQAAAAAAAAAAAAAAkCOoAAAAAAAAAAAAAAAAcgQVAAAAAAAAAAAAAABAjqACAAAAAAAAAAAAAADIEVQAAAAAAAAAAAAAAAA5ggoAAAAAAAAAAAAAACBHUAEAAAAAAAAAAAAAAOQIKgAAAAAAAAAAAAAAgBxBBQAAAAAAAAAAAAAAkCOoAAAAAAAAAAAAAAAAcgQVAAAAAAAAAAAAAABAjqACAAAAAAAAAAAAAADIEVQAAAAAAAAAAAAAAAA5ggoAAAAAAAAAAAAAACBHUAEAAAAAAAAAAAAAAOQIKgAAAAAAAAAAAAAAgBxBBQAAAAAAAAAAAAAAkCOoAAAAAAAAAAAAAAAAcgQVAAAAAAAAAAAAAABAjqACAAAAAAAAAAAAAADIEVQAAAAAAAAAAAAAAAA5ggoAAAAAAAAAAAAAACBHUAEAAAAAAAAAAAAAAOQIKgAAAAAAAAAAAAAAgBxBBQAAAAAAAAAAAAAAkCOoAAAAAAAAAAAAAAAAcgQVAAAAAAAAAAAAAABAjqACAAAAAAAAAAAAAADIEVQAAAAAAAAAAAAAAAA5ggo
},
"metadata": {
"needs_background": "light"
}
}
],
"source": [
"plt.figure(figsize=(20,20), dpi=200)\n",
"\n",
4 years ago
"plt.subplot(4,3,1)\n",
"sns.boxplot(x = 'popularity', data = df)\n",
"\n",
4 years ago
"plt.subplot(4,3,2)\n",
"sns.boxplot(x = 'acousticness', data = df)\n",
"\n",
4 years ago
"plt.subplot(4,3,3)\n",
"sns.boxplot(x = 'energy', data = df)\n",
"\n",
"plt.subplot(4,3,4)\n",
"sns.boxplot(x = 'instrumentalness', data = df)\n",
"\n",
"plt.subplot(4,3,5)\n",
"sns.boxplot(x = 'liveness', data = df)\n",
"\n",
"plt.subplot(4,3,6)\n",
"sns.boxplot(x = 'loudness', data = df)\n",
"\n",
"plt.subplot(4,3,7)\n",
"sns.boxplot(x = 'speechiness', data = df)\n",
"\n",
"plt.subplot(4,3,8)\n",
"sns.boxplot(x = 'tempo', data = df)\n",
"\n",
4 years ago
"plt.subplot(4,3,9)\n",
"sns.boxplot(x = 'time_signature', data = df)\n",
"\n",
"plt.subplot(4,3,10)\n",
"sns.boxplot(x = 'danceability', data = df)\n",
"\n",
"plt.subplot(4,3,11)\n",
"sns.boxplot(x = 'length', data = df)\n",
"\n",
"plt.subplot(4,3,12)\n",
"sns.boxplot(x = 'release_date', data = df)"
]
},
{
"source": [
"Choose several columns with similar ranges. Make sure to include the artist_top_genre column to keep our genres straight. "
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
4 years ago
"execution_count": 15,
4 years ago
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
"le = LabelEncoder()\n",
"\n",
"# scaler = StandardScaler()\n",
"\n",
4 years ago
"X = df.loc[:, ('artist_top_genre','popularity','danceability','acousticness','loudness','energy')]\n",
"\n",
"y = df['artist_top_genre']\n",
"\n",
"X['artist_top_genre'] = le.fit_transform(X['artist_top_genre'])\n",
4 years ago
"\n",
"# X = scaler.fit_transform(X)\n",
"\n",
"y = le.transform(y)\n",
"\n"
4 years ago
]
},
{
"source": [
"K-Means Clustering has the drawback of needing to tell it how many clusters to build. We know there are three song types, so let's focus on 3."
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
4 years ago
"execution_count": 16,
4 years ago
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 0, 2, 1, 1, 0, 1, 0, 0,\n",
" 0, 1, 0, 2, 0, 0, 2, 2, 1, 1, 0, 2, 2, 2, 2, 1, 1, 0, 2, 0, 2, 0,\n",
" 2, 0, 0, 1, 1, 2, 1, 0, 0, 2, 2, 2, 2, 1, 1, 0, 1, 2, 2, 1, 2, 2,\n",
" 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2, 0, 2, 1, 1, 1, 2, 2, 2,\n",
" 2, 1, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 0,\n",
" 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1, 1, 1, 0, 1, 2, 1, 2,\n",
" 1, 2, 2, 2, 0, 2, 1, 1, 1, 2, 1, 0, 1, 2, 2, 1, 1, 1, 0, 1, 2, 2,\n",
" 2, 1, 1, 0, 1, 2, 1, 1, 1, 1, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2,\n",
" 0, 1, 0, 0, 1, 0, 0, 2, 0, 0, 1, 1, 2, 0, 2, 2, 0, 2, 2, 1, 1, 0,\n",
" 1, 1, 0, 0, 1, 0, 2, 0, 1, 0, 2, 0, 0, 2, 2, 2, 1, 1, 1, 1, 1, 0,\n",
" 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2,\n",
" 1, 1, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 2, 0, 0, 2, 1, 1, 1, 2, 2, 2,\n",
" 1, 2, 1, 2, 1, 1, 1, 0, 2, 2, 2, 1, 2, 1, 0, 1, 2, 1, 1, 1, 2, 1],\n",
" dtype=int32)"
]
},
"metadata": {},
4 years ago
"execution_count": 16
4 years ago
}
],
"source": [
"\n",
"from sklearn.cluster import KMeans\n",
"\n",
"nclusters = 3 \n",
"seed = 0\n",
"\n",
4 years ago
"km = KMeans(n_clusters=nclusters, random_state=seed)\n",
"km.fit(X)\n",
"\n",
"# Predict the cluster for each data point\n",
"\n",
"y_cluster_kmeans = km.predict(X)\n",
"y_cluster_kmeans"
]
},
{
"source": [
"Those numbers don't mean much to us, so let's get a 'silhouette score' to see the accuracy. Our score is in the middle."
4 years ago
],
4 years ago
"cell_type": "markdown",
"metadata": {}
4 years ago
},
{
"cell_type": "code",
4 years ago
"execution_count": 17,
4 years ago
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.5466747351275563"
]
},
"metadata": {},
4 years ago
"execution_count": 17
4 years ago
}
],
"source": [
"from sklearn import metrics\n",
"score = metrics.silhouette_score(X, y_cluster_kmeans)\n",
"score"
]
},
{
"source": [
"Import KMeans and build a model"
4 years ago
],
"cell_type": "markdown",
"metadata": {}
4 years ago
},
{
"cell_type": "code",
4 years ago
"execution_count": 19,
4 years ago
"metadata": {},
"outputs": [],
"source": [
"from sklearn.cluster import KMeans\n",
"wcss = []\n",
"\n",
"for i in range(1, 11):\n",
" kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)\n",
" kmeans.fit(X)\n",
" wcss.append(kmeans.inertia_)"
4 years ago
]
},
4 years ago
{
"source": [
"Use that model to decide, using the Elbow Method, the best number of clusters to build"
],
"cell_type": "markdown",
"metadata": {}
},
4 years ago
{
"cell_type": "code",
4 years ago
"execution_count": 20,
4 years ago
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n FutureWarning\n"
]
},
4 years ago
{
"output_type": "display_data",
"data": {
"text/plain": "<Figure size 720x360 with 1 Axes>",
4 years ago
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Created with matplotlib (https://matplotlib.org/) -->\n<svg height=\"331.674375pt\" version=\"1.1\" viewBox=\"0 0 624.890625 331.674375\" width=\"624.890625pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n <defs>\n <style type=\"text/css\">\n*{stroke-linecap:butt;stroke-linejoin:round;white-space:pre;}\n </style>\n </defs>\n <g id=\"figure_1\">\n <g id=\"patch_1\">\n <path d=\"M 0 331.674375 \nL 624.890625 331.674375 \nL 624.890625 0 \nL 0 0 \nz\n\" style=\"fill:none;\"/>\n </g>\n <g id=\"axes_1\">\n <g id=\"patch_2\">\n <path d=\"M 59.690625 294.118125 \nL 617.690625 294.118125 \nL 617.690625 22.318125 \nL 59.690625 22.318125 \nz\n\" style=\"fill:#ffffff;\"/>\n </g>\n <g id=\"matplotlib.axis_1\">\n <g id=\"xtick_1\">\n <g id=\"line2d_1\">\n <defs>\n <path d=\"M 0 0 \nL 0 3.5 \n\" id=\"m5626e93e8a\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n </defs>\n <g>\n <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"141.417898\" xlink:href=\"#m5626e93e8a\" y=\"294.118125\"/>\n </g>\n </g>\n <g id=\"text_1\">\n <!-- 2 -->\n <defs>\n <path d=\"M 19.1875 8.296875 \nL 53.609375 8.296875 \nL 53.609375 0 \nL 7.328125 0 \nL 7.328125 8.296875 \nQ 12.9375 14.109375 22.625 23.890625 \nQ 32.328125 33.6875 34.8125 36.53125 \nQ 39.546875 41.84375 41.421875 45.53125 \nQ 43.3125 49.21875 43.3125 52.78125 \nQ 43.3125 58.59375 39.234375 62.25 \nQ 35.15625 65.921875 28.609375 65.921875 \nQ 23.96875 65.921875 18.8125 64.3125 \nQ 13.671875 62.703125 7.8125 59.421875 \nL 7.8125 69.390625 \nQ 13.765625 71.78125 18.9375 73 \nQ 24.125 74.21875 28.421875 74.21875 \nQ 39.75 74.21875 46.484375 68.546875 \nQ 53.21875 62.890625 53.21875 53.421875 \nQ 53.21875 48.921875 51.53125 44.890625 \nQ 49.859375 40.875 45.40625 35.40625 \nQ 44.1875 33.984375 37.640625 27.21875 \nQ 31.109375 20.453125 19.1875 8.296875 \nz\n\" id=\"DejaVuSans-50\"/>\n </defs>\n <g transform=\"translate(138.236648 308.716563)scale(0.1 -0.1)\">\n <use xlink:href=\"#DejaVuSans-50\"/>\n </g>\n </g>\n </g>\n <g id=\"xtick_2\">\n <g id=\"line2d_2\">\n <g>\n <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"254.14517\" xlink:href=\"#m5626e93e8a\" y=\"294.118125\"/>\n </g>\n </g>\n <g id=\"text_2\">\n <!-- 4 -->\n <defs>\n <path d=\"M 37.796875 64.3125 \nL 12.890625 25.390625 \nL 37.796875 25.390625 \nz\nM 35.203125 72.90625 \nL 47.609375 72.90625 \nL 47.609375 25.390625 \nL 58.015625 25.390625 \nL 58.015625 17.1875 \nL 47.609375 17.1875 \nL 47.609375 0 \nL 37.796875 0 \nL 37.796875 17.1875 \nL 4.890625 17.1875 \nL 4.890625 26.703125 \nz\n\" id=\"DejaVuSans-52\"/>\n </defs>\n <g transform=\"translate(250.96392 308.716563)scale(0.1 -0.1)\">\n <use xlink:href=\"#DejaVuSans-52\"/>\n </g>\n </g>\n </g>\n <g id=\"xtick_3\">\n <g id=\"line2d_3\">\n <g>\n <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"366.872443\" xlink:href=\"#m5626e93e8a\" y=\"294.118125\"/>\n </g>\n </g>\n <g id=\"text_3\">\n <!-- 6 -->\n <defs>\n <path d=\"M 33.015625 40.375 \nQ 26.375 40.375 22.484375 35.828125 \nQ 18.609375 31.296875 18.609375 23.390625 \nQ 18.609375 15.53125 22.484375 10.953125 \nQ 26.375 6.390625 33.015625 6.390625 \nQ 39.65625 6.390625 43.53125 10.953125 \nQ 47.40625 15.53125 47.40625 23.390625 \nQ 47.40625 31.296875 43.53125 35.828125 \nQ 39.65625 40.375 33.015625 40.375 \nz\nM 52.59375 71.296875 \nL 52.59375 62.3125 \nQ 48.875 64.0625 45.09375 64.984375 \nQ 41.3125 65.921875 37.59375 65.921875 \nQ 27.828125 65.921875 22.671875 59.328125 \nQ 17.53125 52.734375 16.796875 39.40625 \nQ 19.671875 43.65625 24.015625 45.921875 \nQ 28.375 48.1875 33.59375 48.1875 \nQ 44.578125 48.1875 50.953125 41.515625 \nQ 57.328125 34.859375 57.328125 23.3906
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAnAAAAFNCAYAAACAH1JNAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3de5hdZX33//d3coIQgSQEUkJMQE4iVMABgyIp4SyHhD3wCLWK/VEpLSii9Kn2oG3110ptK1JbWypWeMoD8kNIEJCDnEU5hIMEpEIKBMIpkUA4J4R8f3+sNWYnmWRmktmz9t7zfl3XvvZa91p7z3c5l5kP973WfUdmIkmSpNbRUXUBkiRJ6h8DnCRJUosxwEmSJLUYA5wkSVKLMcBJkiS1GAOcJElSizHASdI6RMSnIuKndfsZETtWWZMkgQFOkoiIJyPizYh4re717arrkqR1McBJUuHozBxT9zq96oIkaV0McJLUPx+NiMcj4tcR8Y2I6ACIiI6I+IuIWBARiyLiwojYojx2QUR8odyeVA7FnlbuvycilnR/jyT1hf9gSFL/HAt0AnsDM4H/p2z/VPk6ENgBGAN0D8PeCvxOuT0deBw4oG7/9sxc2diyJbUTA5wkFWZHxMt1r0+v47yzM3NJZj4FnAOcWLZ/HPinzHw8M18DvgScEBHDKQLc/mUv2wHA3wMfLj83vTwuSX1mgJOkwqzM3LLu9R/rOO/puu0FwLbl9rblfv2x4cA2mfk/wOvAnsBHgKuAZyNiFwxwkjaAAU6S+mdy3fa7gWfL7WeBKWscWwG8UO7fChwHjMzMZ8r9k4CxwAONLFhS+zHASVL//ElEjI2IycAZwA/K9ouBMyNi+4gYA/wt8IPMXFEevxU4Hbit3L+l3P9pZr4zaNVLagvDqy5AkprEjyKiPkjdAMzp4bw5wL3AFsD3gfPL9u9RDKPeBmwCXAd8pu5ztwLvYlWA+ykwum5fkvosMrPqGiRJktQPDqFKkiS1GAOcJElSizHASZIktRgDnCRJUosxwEmSJLWYITeNyFZbbZVTp06tugxJkqRe3Xvvvb/OzAlrtg+5ADd16lTmzp1bdRmSJEm9iogFPbU7hCpJktRiDHCSJEktxgAnSZLUYgxwkiRJLcYAJ0mS1GIMcJIkSS3GACdJktRihtw8cA21ciUsWgTLlsGoUbD11tBhRpYkSQPLdDFQVq6EefNg2jSYOrV4nzevaJckSRpABriBsmgRzJwJC8oJkxcsKPYXLaq2LkmS1HYMcANl2bJV4a3bggVFuyRJ0gAywA2UUaNgypTV26ZMKdolSZIGkAFuoGy9NcyZsyrETZlS7G+9dbV1SZKktuNTqAOlowP22APuvBMefxzeeqvY9ylUSZI0wEwXA6mjAyZOhGuugUMPhSVLqq5IkiS1IQNcI9Rq8M47cOWVVVciSZLakAGuEfbaq5gL7vLLq65EkiS1IQNcI0QUvXA33ACvvFJ1NZIkqc0Y4BqlqwuWL4err666EkmS1GYMcI0ybVrxQIPDqJIkaYAZ4BqlowOOPbZ4IvWNN6quRpIktREDXCN1dRXh7frrq65EkiS1EQNcIx1wAIwbBz/8YdWVSJKkNmKAa6QRI2DmTPjRj4oHGiRJkgaAAa7RajVYuhRuuqnqSiRJUpswwDXawQfDu97l06iSJGnAGOAabZNN4MgjYfbsYnktSZKkjWSAGwy1GixeDD/9adWVSJKkNmCAGwxHHFH0xDmMKkmSBoABbjCMGQOHHVYEuJUrq65GkiS1OAPcYOnqgoULYe7cqiuRJEktzgA3WI46CoYPd1JfSZK00Qxwg2XsWDjooGIYNbPqaiRJUgszwA2mWg3mz4eHHqq6EkmS1MIMcINp5kyIcBhVkiRtFAPcYNpmG/jIR5xORJIkbRQD3GCr1WDePHjssaorkSRJLcoAN9hqteLdXjhJkrSBGhrgIuLMiHg4Ih6KiIsjYpOI2D4i7oqI+RHxg4gYWZ47qtyfXx6fWvc9XyrbfxURh9W1H162zY+ILzbyWgbM5Mmwzz7eBydJkjZYwwJcREwCPgt0ZubuwDDgBOBs4JuZuSPwEnBy+ZGTgZfK9m+W5xERu5Wfex9wOPCvETEsIoYB/wIcAewGnFie2/y6uuCee+Cpp6quRJIktaBGD6EOBzaNiOHAaOA5YAZwWXn8AmBWuT2z3Kc8flBERNl+SWYuy8wngPnAvuVrfmY+npnLgUvKc5vfsccW77NnV1uHJElqSQ0LcJn5DPAPwFMUwW0pcC/wcmauKE9bCEwqtycBT5efXVGeP76+fY3PrKu9+e28M+y+u8OokiRpgzRyCHUsRY/Y9sC2wGYUQ6CDLiJOiYi5ETF38eLFVZSwtq4uuP12eOGFqiuRJEktppFDqAcDT2Tm4sx8G7gc+DCwZTmkCrAd8Ey5/QwwGaA8vgXwYn37Gp9ZV/taMvO8zOzMzM4JEyYMxLVtvFqtWFJrzpyqK5EkSS2mkQHuKWBaRIwu72U7CPglcDNwXHnOSUB3grmy3Kc8flNmZtl+QvmU6vbATsDdwD3ATuVTrSMpHnS4soHXM7D22AN23NHpRCRJUr818h64uygeRrgPmFf+rPOAPwU+HxHzKe5xO7/8yPnA+LL988AXy+95GLiUIvxdC5yWme+U98mdDlwHPAJcWp7bGiKKXrgbb4SXXqq6GkmS1EKi6OQaOjo7O3Pu3LlVl1G4+2744AfhwgvhE5+ouhpJktRkIuLezOxcs92VGKrU2QnbbecwqiRJ6hcDXJU6Ooo54a69Fl57repqJElSizDAVa2rC956qwhxkiRJfWCAq9r++8OECU7qK0mS+swAV7Vhw2DWLLjqqqInTpIkqRcGuGZQqxX3wP3kJ1VXIkmSWoABrhnMmAFbbOHTqJIkqU8McM1g5Eg4+uhiWa0VK6quRpIkNTkDXLOo1WDJErj11qorkSRJTc4A1ywOOwxGj3YYVZIk9coA1yxGj4YjjoArroCVK6uuRpIkNTEDXDPp6oLnnoM776y6EkmS1MQMcM3kyCOLBxqc1FeSJK2HAa6ZbL45HHJIcR9cZtXVSJKkJmWAaza1Gjz5JDzwQNWVSJKkJmWAazbHHFMsr+UwqiRJWgcDXLPZaiuYPt3pRCRJ0joZ4JpRrQaPPFK8JEmS1mCAa0bHHlu82wsnSZJ6YIBrRttuC/vt531wkiSpRwa4ZtXVBfffD088UXUlkiSpyRjgmlX3MOoVV1RbhyRJajoGuGa1ww6w554Oo0qSpLUY4JpZVxf87GfF+qiSJEklA1wzq9WKd4dRJUlSHQNcM9ttN9h1V6cTkSRJqzHANbtaDW65BV58sepKJElSkzDANbuuLnjnHbjyyqorkSRJTcIA1+z22gumTHEYVZIk/YYBrtlFFMOo118Pr7xSdTWSJKkJGOBaQVcXLF8O11xTdSWSJKkJGOBawX77wcSJTuorSZIAA1xr6Ogolta65hp4882qq5EkSRUzwLWKWg3eeAOuu67qSiRJUsUMcK1i+nQYN86nUSVJkgGuZYwYAcccU8wHt3x51dVIkqQKGeBaSa0GS5fCzTdXXYkkSaqQAa6VHHIIjBnjMKokSUOcAa6VbLIJHHkkzJ5dLK8lSZKGJANcq+nqgkWL4I47qq5EkiRVxADXao44ouiJc1JfSZKGLANcqxkzBg47rLgPLrPqaiRJUgUMcK2oVoOFC+Gee6quRJIkVcAA14qOPhqGD/dpVEmShigDXCsaOxZmzCjug3MYVZKkIccA16pqNZg/Hx56qOpKJEnSIDPAtapZsyDCYVRJkoaghga4iNgyIi6LiP+OiEciYr+IGBcRN0TEY+X72PLciIhzI2J+RDwYEXvXfc9J5fmPRcRJde0fiIh55WfOjYho5PU0lW22gf33dzoRSZKGoEb3wH0LuDYzdwXeDzwCfBG4MTN3Am4s9wGOAHYqX6cA3wGIiHHAV4APAvsCX+kOfeU5n6773OENvp7m0tUF8+bBY49VXYkkSRpEDQtwEbEFcABwPkBmLs/Ml4GZwAXlaRcAs8r
4 years ago
},
"metadata": {
"needs_background": "light"
}
4 years ago
}
],
"source": [
"plt.figure(figsize=(10,5))\n",
"sns.lineplot(range(1, 11), wcss,marker='o',color='red')\n",
4 years ago
"plt.title('Elbow')\n",
"plt.xlabel('Number of clusters')\n",
"plt.ylabel('WCSS')\n",
"plt.show()"
4 years ago
]
},
4 years ago
{
"source": [
4 years ago
"Looks like 3 is a good number after all. Fit the model again and create a scatterplot of your clusters. They do group in bunches, but they are pretty close together."
4 years ago
],
4 years ago
"cell_type": "code",
"metadata": {},
"execution_count": null,
"outputs": []
4 years ago
},
4 years ago
{
"cell_type": "code",
4 years ago
"execution_count": 21,
4 years ago
"metadata": {},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": "<Figure size 432x288 with 1 Axes>",
4 years ago
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Created with matplotlib (https://matplotlib.org/) -->\n<svg height=\"265.728679pt\" version=\"1.1\" viewBox=\"0 0 385.78125 265.728679\" width=\"385.78125pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n <defs>\n <style type=\"text/css\">\n*{stroke-linecap:butt;stroke-linejoin:round;white-space:pre;}\n </style>\n </defs>\n <g id=\"figure_1\">\n <g id=\"patch_1\">\n <path d=\"M 0 265.728679 \nL 385.78125 265.728679 \nL 385.78125 0 \nL 0 0 \nz\n\" style=\"fill:none;\"/>\n </g>\n <g id=\"axes_1\">\n <g id=\"patch_2\">\n <path d=\"M 43.78125 228.172429 \nL 378.58125 228.172429 \nL 378.58125 10.732429 \nL 43.78125 10.732429 \nz\n\" style=\"fill:#ffffff;\"/>\n </g>\n <g id=\"PathCollection_1\">\n <defs>\n <path d=\"M 0 3 \nC 0.795609 3 1.55874 2.683901 2.12132 2.12132 \nC 2.683901 1.55874 3 0.795609 3 0 \nC 3 -0.795609 2.683901 -1.55874 2.12132 -2.12132 \nC 1.55874 -2.683901 0.795609 -3 0 -3 \nC -0.795609 -3 -1.55874 -2.683901 -2.12132 -2.12132 \nC -2.683901 -1.55874 -3 -0.795609 -3 0 \nC -3 0.795609 -2.683901 1.55874 -2.12132 2.12132 \nC -1.55874 2.683901 -0.795609 3 0 3 \nz\n\" id=\"C0_0_cb5f23d3e7\"/>\n </defs>\n <g clip-path=\"url(#p9fb0d779e7)\">\n <use style=\"fill:#440154;stroke:#440154;\" x=\"181.597704\" xlink:href=\"#C0_0_cb5f23d3e7\" y=\"90.221967\"/>\n </g>\n <g clip-path=\"url(#p9fb0d779e7)\">\n <use style=\"fill:#21918c;stroke:#21918c;\" x=\"113.978172\" xlink:href=\"#C0_0_cb5f23d3e7\" y=\"39.956499\"/>\n </g>\n <g clip-path=\"url(#p9fb0d779e7)\">\n <use style=\"fill:#440154;stroke:#440154;\" x=\"160.4666\" xlink:href=\"#C0_0_cb5f23d3e7\" y=\"92.407422\"/>\n </g>\n <g clip-path=\"url(#p9fb0d779e7)\">\n <use style=\"fill:#440154;stroke:#440154;\" x=\"164.692821\" xlink:href=\"#C0_0_cb5f23d3e7\" y=\"64.816051\"/>\n </g>\n <g clip-path=\"url(#p9fb0d779e7)\">\n <use style=\"fill:#440154;stroke:#440154;\" x=\"177.371484\" xlink:href=\"#C0_0_cb5f23d3e7\" y=\"60.718323\"/>\n </g>\n <g clip-path=\"url(#p9fb0d779e7)\">\n <use style=\"fill:#440154;stroke:#440154;\" x=\"168.919042\" xlink:href=\"#C0_0_cb5f23d3e7\" y=\"63.450142\"/>\n </g>\n <g clip-path=\"url(#p9fb0d779e7)\">\n <use style=\"fill:#440154;stroke:#440154;\" x=\"206.955029\" xlink:href=\"#C0_0_cb5f23d3e7\" y=\"53.06923\"/>\n </g>\n <g clip-path=\"url(#p9fb0d779e7)\">\n <use style=\"fill:#440154;stroke:#440154;\" x=\"202.728808\" xlink:href=\"#C0_0_cb5f23d3e7\" y=\"103.881061\"/>\n </g>\n <g clip-path=\"url(#p9fb0d779e7)\">\n <use style=\"fill:#440154;stroke:#440154;\" x=\"152.014159\" xlink:href=\"#C0_0_cb5f23d3e7\" y=\"124.916067\"/>\n </g>\n <g clip-path=\"url(#p9fb0d779e7)\">\n <use style=\"fill:#21918c;stroke:#21918c;\" x=\"113.978172\" xlink:href=\"#C0_0_cb5f23d3e7\" y=\"95.685605\"/>\n </g>\n <g clip-path=\"url(#p9fb0d779e7)\">\n <use style=\"fill:#440154;stroke:#440154;\" x=\"177.371484\" xlink:href=\"#C0_0_cb5f23d3e7\" y=\"113.715609\"/>\n </g>\n <g clip-path=\"url(#p9fb0d779e7)\">\n <use style=\"fill:#440154;stroke:#440154;\" x=\"198.502588\" xlink:href=\"#C0_0_cb5f23d3e7\" y=\"89.402421\"/>\n </g>\n <g clip-path=\"url(#p9fb0d779e7)\">\n <use style=\"fill:#21918c;stroke:#21918c;\" x=\"126.656834\" xlink:href=\"#C0_0_cb5f23d3e7\" y=\"103.061516\"/>\n </g>\n <g clip-path=\"url(#p9fb0d779e7)\">\n <use style=\"fill:#440154;stroke:#440154;\" x=\"206.955029\" xlink:href=\"#C0_0_cb5f23d3e7\" y=\"41.049227\"/>\n </g>\n <g clip-path=\"url(#p9fb0d779e7)\">\n <use style=\"fill:#fde725;stroke:#fde725;\" x=\"244.991016\" xlink:href=\"#C0_0_cb5f23d3e7\" y=\"87.216966\"/>\n </g>\n <g clip-path=\"url(#p9fb0d779e7)\">\n <use style=\"fill:#440154;stroke:#440154;\" x=\"206.955029\" xlink:href=\"#C0_0_cb5f23d3e7\" y=\"76.562872\"/>\n </g>\n <g clip-pat
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEKCAYAAAAfGVI8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOydd3gUVduH7zOzNZWE0BKqVAHpAgpSFBTBil2wI2Lvfnasr72j2MWCBVGQpqgIAtKl914SSoD0bJ853x+zCdnsbFhCAop7X5eXZHZ2zskmOc85T/k9QkpJjBgxYsT476Ic7wnEiBEjRozjS8wQxIgRI8Z/nJghiBEjRoz/ODFDECNGjBj/cWKGIEaMGDH+48QMQYwYMWL8x6k2QyCE+FQIkS2EWB3hdSGEeFsIsVkIsVII0am65hIjRowYMSJTnSeCMcCACl4/F2ge/G84MLoa5xIjRowYMSJQbYZASjkbyKnglguBL6TBAqCGEKJedc0nRowYMWKYYzmOY2cAu8p8nRm8tqeiN6WlpcnGjRtX47RixIgR48Tj77//PiClrGX22vE0BFEjhBiO4T6iYcOGLFmy5DjPKEaMGDH+XQghdkR67XhmDWUBDcp8XT94LQwp5YdSyi5Syi61apkatBgxYsSIUUmOpyGYBFwbzB7qDuRLKSt0C8WIESNGjKqn2lxDQohvgD5AmhAiExgJWAGklO8D04CBwGbABdxQXXOJESNGjBiRqTZDIKW86jCvS+D26ho/RowYMWJER6yyOEaMGDH+48QMQYwYMWL8x4kZghgxYsT4jxMzBDFixIjxH+dfUVAWI0aM/xYysAu8vxtf2PsjLPWP74ROcGKGIEaMGP8o9OLPofBVQBoXCl9HJj6IEn/tcZ3XiUzMNRQjRox/DDKwM2gEvIAv+J8XCl8xTgkxqoWYIYgRI8Y/B+9vgG7ygg7eX4/1bP4zxFxDMWIcJQd25zD5velsWbGdFqc24/wRZ5NSO/l4T+tfijzeE/hPEjMEMU54pJRIKVGUqj8Ab125g3t7PYHf68fvDbBsxiomvDWVd+b/j/ot0qt8vBMee38ofMvkBcV4LUa1EHMNHWN25uex/sB+NN3s+BujKinOL+bl60cx0Hk1A2xXcn/fkezaYCpwW2neuvVDXAVu/N4AAD6Pn+I8F+/dM6ZKx/mvICyNIOEewI6xT7UY/064B2FpeETPklJH+jciAxHVl2MEEYbkz7+HLl26yH9jP4LMgnxumfIT2/JyUYXAplp49ewB9G180vGe2gmJlJI7uj3CtpU78PuMRVoIQXxyHGM2vk1yWtJRj6FpGufar0Lq4X9DVruFae5vjnqM/yoysB08wZiA42yEpfGRvd87D5n/AEgXSB3U+oiUdxGWJlU+138LQoi/pZRdzF6LnQiOAbqUXP3jODYcPIAnEKDY7yfX4+b2aZPZlpd7vKfHrvx83l44n2dnz2Tuzh382zYHZqxbuImd6zJLjQAYxsHn9fPLp39UyRiKomCxmntX7U57lYxRFUj57zt9CktjRMJw478jNQLaHmTuraAfMAwBHtC2IHOGIqW/Wub7bydmCI4Bi7IyyXW70cstsAFd5+tVK47TrAymbtzAOWPH8O7iBXy2fCkjpv7ELVN+qhbXVXF+MZuXb6Mwt6jKn12ezA27Ta/73D62rqwaV4EQgn5De2G1W0Ou2xxWBtx0ZpWMcTTorono2b2Q+1qhZ/dEd/1wvKd0TJCu8UCg/FXDKHjnHo8p/eOJBYuPAftdxabXA7pOVmHBMZ7NIVx+Pw/9/gueQCDk2rzMnUzfspmBzVtUyTi6rvPhg18yefR0LDYLAV+As4b24u73bka1qFUyRgl5+/NZ+vsq8g8UoJu4bOxxNpp3qjp33K1vXMfuLXtZv2gTqkVF82u069OGG569ssrGqAy66ycoeBLwBC9kQ8Ez6IASd8nxnFr1o+8FTHb+UjdOCTHCiBmCY0CnuukETHbYTouV3g0bH/sJBVmUlYlqkknj8vv5acPaKjMEP7w+hSkf/IbP48fnMf5A/xg7h6TUBIa9OLRKxgCY+M40PnzoKyw2FYEg4AugWo3FGUBRBPY4O+fc0LfKxnQmOHn1j6fYtnonmRt206hNAxq2yjC9t8TlJoSosvEjUvwmpUagFDcUvQUnuCEQttOQ7qkY/a7KooOt0/GY0j+emGvoGJCRlMRlrdvitBxyIdhVlXqJCVzY6uTjNi+LokRM27apVbdHGP/6ZLwub8g1r9vHpPemV1k8YsuK7Xz88Fj8Xj/uQg+uQje6piMAZ6IDq91Ct0GdeXfRiySmJFTJmGVp0rYhZ1zS3dQI7N2ezaPnPs8A25UMdF7NC0PfoiCnsMrnEIIWoeurvveEiAFViONssDTEyDwqwQmOAQhL0+M1q380sRPBMeLpPmfRqV4GX6xchsvnY2DzltzQoRMOi/Xwb64mumbUR1XCd6dOi5XLW7etsnEixQQ8xR50Ta8S99D0MTPxe8PdATaHjQc+u50zBnc76jEqg6vQzZ3dHqHgYCG6LtE1ndnjF7B15Q4+WP5qtdQ2AKDWB21n+HUl/dicSMohpRf0IlBSEKJ6959C2CD1W6TrS/BMBuyIuCHgvKhax/03EzMExwghBBe1OpmLjuMJoDw2VeWD8y7ipkk/AqAFC6+uansKPRs2qrJxWnRuypp5G8KuN2iVUWUxAneRxzQmIKXEU1zeRXLsmPHVbDwub8jcAr4A+7bvZ8WsNXQ885TqGTjhfsj/P0LdQw5IuK9Kh5FSgn8J0vMHiHiE84KQfH8pfciC58A9AZCgJCMTn0BxDqjSeZRHKHGIhFsg4ZZqHedEoVpNsxBigBBigxBisxDiYZPXGwkhZgghVgohZgkhYlqzx5iuGfVZcNMInjuzP4/27M3PQ67j8V59q3TXeOsb1+OIs6METx9CCOxxNu5456YqG+OMwd1xxIenbGoBjc7921XZOEfKttU78RR7w65rms6u9eaZTVWB4jwXkl8CtSGgGv9PfgEl7oIqG0NKicx/CJk7DFyfQPF7yAOD0F0TD92T/zi4J1IqIqfvh/yHkL5FVTaPGEdPtZ0IhBAq8C7QH8gEFgshJkkp15a57VXgCynl50KIM4EXgGuqa04nGlJKxq5awUdLl5DncXNqen0e7tmLZqk1j+g58TYbF7asvpNKy1Ob8c7CFxj7/A9sXrqVRm0aMOSxS6o0e6fLOe3pfHZ7/v51BZ5iL0KAzWnjuqevILVuSpWNEwktoFGYW0RiSkLIKadp+8Y44u1hxkBVFRq1rt59j+I8F5znVt8AvjmGSJx0By8EjP8KnkA6zgQkeKZhKIiWxYMsGo1I7Vp9c4txRFSna6grsFlKuRVACPEtcCFQ1hC0BkrOqjOBifzLkVJG3E3vys/npw3rKPL5OLPJSZyannFUO+8X587mq1XLcQfTP2du38rCrEymXX0tDZL/WaJnjds04LGv76m25yuKwlWPXMzmpdvwuo0UwabtG9NvaK9KPa+in+OcHxYw7tVJ5O8voMs57YlLdDJ59K/4fQFsDitDn7yMS+4ZhBCCM6/uyedPjcPn8aNrRuaY1WYho3k92vVuXblv9h+CdE8NFmyVQ1jA9xeozUBYQZY3BJjHL2IcN6rTEGQAZQXEM4HyEbsVwGDgLeBiIFEIUVNKebDsTUKI4cBwgIYNj0xv5FixKCuTZ/78g3UH9pNkt3NTx87c2qVbaXrmTxvW8cjvv6JJHb+u8+XK5XTLqE+yw8HO/Dy612/A9R06USsuPqrx8j0evli5DK+mlV6TgCfg5/2/F/H8mcdWoCtvfz5/TViE3xug23mdqNekzlE/c/nM1Ux4exp5+wvocVFXzrulP3GJzpAxJ749jaUzVpNat4ZxGiiTnbRh8Rbu7/sUn6x5I2qDu2XFdt654xPWzt+AI87OucPO4qYXhmALFo19+ez3fPfyT3iDO/wpH/wWIjHh9/r5/IlviUt0MHBYP5wJTkYtfIF37/qUxb8sQ7Wo9LmyByNevfa4BG2rFGEBBOapZxawNDB
},
"metadata": {
"needs_background": "light"
}
}
],
"source": [
"from sklearn.cluster import KMeans\n",
4 years ago
"kmeans = KMeans(n_clusters = 3)\n",
"kmeans.fit(X)\n",
"labels = kmeans.predict(X)\n",
"plt.scatter(df['popularity'],df['danceability'],c = labels)\n",
"plt.xlabel('popularity')\n",
4 years ago
"plt.ylabel('danceability')\n",
4 years ago
"plt.show()"
]
},
4 years ago
{
"source": [
"This model's accuracy is not bad, but not great. It may be that the data may not lend itself well to K-Means Clustering. You might try a different method."
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
4 years ago
"execution_count": 811,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
4 years ago
"Result: 109 out of 286 samples were correctly labeled.\nAccuracy score: 0.38\n"
]
}
],
"source": [
"labels = kmeans.labels_\n",
"\n",
"correct_labels = sum(y == labels)\n",
"\n",
"print(\"Result: %d out of %d samples were correctly labeled.\" % (correct_labels, y.size))\n",
"\n",
"print('Accuracy score: {0:0.2f}'. format(correct_labels/float(y.size)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
4 years ago
}
]
}