You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
332 lines
59 KiB
332 lines
59 KiB
4 years ago
|
{
|
||
|
"metadata": {
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.7.0"
|
||
|
},
|
||
|
"orig_nbformat": 2,
|
||
|
"kernelspec": {
|
||
|
"name": "python3",
|
||
|
"display_name": "Python 3.7.0 64-bit ('3.7')"
|
||
|
},
|
||
|
"metadata": {
|
||
|
"interpreter": {
|
||
|
"hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
|
||
|
}
|
||
|
},
|
||
|
"interpreter": {
|
||
|
"hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 2,
|
||
|
"cells": [
|
||
|
{
|
||
|
"source": [
|
||
|
"# Nigerian Music scraped from Spotify - an analysis"
|
||
|
],
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 104,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "stream",
|
||
|
"name": "stdout",
|
||
|
"text": [
|
||
|
"Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n",
|
||
|
"Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n",
|
||
|
"Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n",
|
||
|
"Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n",
|
||
|
"Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n",
|
||
|
"Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n",
|
||
|
"Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n",
|
||
|
"Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n",
|
||
|
"Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n",
|
||
|
"Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n",
|
||
|
"Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n",
|
||
|
"Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n",
|
||
|
"\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n",
|
||
|
"You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n",
|
||
|
"Note: you may need to restart the kernel to use updated packages.\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"pip install seaborn"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"source": [
|
||
|
"Start where we finished in the last lesson, with data imported and filtered."
|
||
|
],
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 105,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "execute_result",
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
" name album \\\n",
|
||
|
"0 Sparky Mandy & The Jungle \n",
|
||
|
"1 shuga rush EVERYTHING YOU HEARD IS TRUE \n",
|
||
|
"2 LITT! LITT! \n",
|
||
|
"3 Confident / Feeling Cool Enjoy Your Life \n",
|
||
|
"4 wanted you rare. \n",
|
||
|
"\n",
|
||
|
" artist artist_top_genre release_date length popularity \\\n",
|
||
|
"0 Cruel Santino alternative r&b 2019 144000 48 \n",
|
||
|
"1 Odunsi (The Engine) afropop 2020 89488 30 \n",
|
||
|
"2 AYLØ indie r&b 2018 207758 40 \n",
|
||
|
"3 Lady Donli nigerian pop 2019 175135 14 \n",
|
||
|
"4 Odunsi (The Engine) afropop 2018 152049 25 \n",
|
||
|
"\n",
|
||
|
" danceability acousticness energy instrumentalness liveness loudness \\\n",
|
||
|
"0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n",
|
||
|
"1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n",
|
||
|
"2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n",
|
||
|
"3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n",
|
||
|
"4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n",
|
||
|
"\n",
|
||
|
" speechiness tempo time_signature \n",
|
||
|
"0 0.0829 133.015 5 \n",
|
||
|
"1 0.3600 129.993 3 \n",
|
||
|
"2 0.0424 130.005 4 \n",
|
||
|
"3 0.1130 111.087 4 \n",
|
||
|
"4 0.0447 105.115 4 "
|
||
|
],
|
||
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>name</th>\n <th>album</th>\n <th>artist</th>\n <th>artist_top_genre</th>\n <th>release_date</th>\n <th>length</th>\n <th>popularity</th>\n <th>danceability</th>\n <th>acousticness</th>\n <th>energy</th>\n <th>instrumentalness</th>\n <th>liveness</th>\n <th>loudness</th>\n <th>speechiness</th>\n <th>tempo</th>\n <th>time_signature</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Sparky</td>\n <td>Mandy & The Jungle</td>\n <td>Cruel Santino</td>\n <td>alternative r&b</td>\n <td>2019</td>\n <td>144000</td>\n <td>48</td>\n <td>0.666</td>\n <td>0.8510</td>\n <td>0.420</td>\n <td>0.534000</td>\n <td>0.1100</td>\n <td>-6.699</td>\n <td>0.0829</td>\n <td>133.015</td>\n <td>5</td>\n </tr>\n <tr>\n <th>1</th>\n <td>shuga rush</td>\n <td>EVERYTHING YOU HEARD IS TRUE</td>\n <td>Odunsi (The Engine)</td>\n <td>afropop</td>\n <td>2020</td>\n <td>89488</td>\n <td>30</td>\n <td>0.710</td>\n <td>0.0822</td>\n <td>0.683</td>\n <td>0.000169</td>\n <td>0.1010</td>\n <td>-5.640</td>\n <td>0.3600</td>\n <td>129.993</td>\n <td>3</td>\n </tr>\n <tr>\n <th>2</th>\n <td>LITT!</td>\n <td>LITT!</td>\n <td>AYLØ</td>\n <td>indie r&b</td>\n <td>2018</td>\n <td>207758</td>\n <td>40</td>\n <td>0.836</td>\n <td>0.2720</td>\n <td>0.564</td>\n <td>0.000537</td>\n <td>0.1100</td>\n <td>-7.127</td>\n <td>0.0424</td>\n <td>130.005</td>\n <td>4</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Confident / Feeling Cool</td>\n <td>Enjoy Your Life</td>\n <td>Lady Donli</td>\n <td>nigerian pop</td>\n <td>2019</td>\n <td>175135</td>\n <td>14</td>\n <td>0.894</td>\n <td>0.7980</td>\n <td>0.611</td>\n <td>0.000187</td>\n <td>0.0964</td>\n <td>-4.961</td>\n <td>0.1130</td>\n <td>111.087</td>\n <td>4</td>\n </tr>\n <tr>\n <th>4</th>\n <td>wanted you</td>\n <td>rare.</td>\n <td>Odunsi (The Engine)</td>\n <td>afropop</td>\n <td>2018</td>\n <td>152049</td>\n <td>25</td>\n <td>0.702</td>\n <td>0.1160</td>\n <td>0.833</td>\n <td>0.910000</td>\n <td>0.3480</td>\n <td>-6.044</td>\n <td>0.0447</td>\n <td>105.115</td>\n <td>4</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"execution_count": 105
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"\n",
|
||
|
"import matplotlib.pyplot as plt\n",
|
||
|
"import pandas as pd\n",
|
||
|
"import seaborn as sns\n",
|
||
|
"import numpy as np\n",
|
||
|
"\n",
|
||
|
"df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n",
|
||
|
"df.head()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"source": [
|
||
|
"We will focus only on 3 genres. Maybe we can get 3 clusters built!"
|
||
|
],
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 106,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "execute_result",
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"Text(0.5, 1.0, 'Top genres')"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"execution_count": 106
|
||
|
},
|
||
|
{
|
||
|
"output_type": "display_data",
|
||
|
"data": {
|
||
|
"text/plain": "<Figure size 720x504 with 1 Axes>",
|
||
|
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Created with matplotlib (https://matplotlib.org/) -->\n<svg height=\"474.468454pt\" version=\"1.1\" viewBox=\"0 0 598.4875 474.468454\" width=\"598.4875pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n <defs>\n <style type=\"text/css\">\n*{stroke-linecap:butt;stroke-linejoin:round;white-space:pre;}\n </style>\n </defs>\n <g id=\"figure_1\">\n <g id=\"patch_1\">\n <path d=\"M 0 474.468454 \nL 598.4875 474.468454 \nL 598.4875 0 \nL 0 0 \nz\n\" style=\"fill:none;\"/>\n </g>\n <g id=\"axes_1\">\n <g id=\"patch_2\">\n <path d=\"M 33.2875 402.838125 \nL 591.2875 402.838125 \nL 591.2875 22.318125 \nL 33.2875 22.318125 \nz\n\" style=\"fill:#ffffff;\"/>\n </g>\n <g id=\"patch_3\">\n <path clip-path=\"url(#p0704da7660)\" d=\"M 51.8875 402.838125 \nL 200.6875 402.838125 \nL 200.6875 40.438125 \nL 51.8875 40.438125 \nz\n\" style=\"fill:#3274a1;\"/>\n </g>\n <g id=\"patch_4\">\n <path clip-path=\"url(#p0704da7660)\" d=\"M 237.8875 402.838125 \nL 386.6875 402.838125 \nL 386.6875 295.525504 \nL 237.8875 295.525504 \nz\n\" style=\"fill:#e1812c;\"/>\n </g>\n <g id=\"patch_5\">\n <path clip-path=\"url(#p0704da7660)\" d=\"M 423.8875 402.838125 \nL 572.6875 402.838125 \nL 572.6875 369.412882 \nL 423.8875 369.412882 \nz\n\" style=\"fill:#3a923a;\"/>\n </g>\n <g id=\"matplotlib.axis_1\">\n <g id=\"xtick_1\">\n <g id=\"line2d_1\">\n <defs>\n <path d=\"M 0 0 \nL 0 3.5 \n\" id=\"m85c8933980\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n </defs>\n <g>\n <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"126.2875\" xlink:href=\"#m85c8933980\" y=\"402.838125\"/>\n </g>\n </g>\n <g id=\"text_1\">\n <!-- afro dancehall -->\n <defs>\n <path d=\"M 34.28125 27.484375 \nQ 23.390625 27.484375 19.1875 25 \nQ 14.984375 22.515625 14.984375 16.5 \nQ 14.984375 11.71875 18.140625 8.90625 \nQ 21.296875 6.109375 26.703125 6.109375 \nQ 34.1875 6.109375 38.703125 11.40625 \nQ 43.21875 16.703125 43.21875 25.484375 \nL 43.21875 27.484375 \nz\nM 52.203125 31.203125 \nL 52.203125 0 \nL 43.21875 0 \nL 43.21875 8.296875 \nQ 40.140625 3.328125 35.546875 0.953125 \nQ 30.953125 -1.421875 24.3125 -1.421875 \nQ 15.921875 -1.421875 10.953125 3.296875 \nQ 6 8.015625 6 15.921875 \nQ 6 25.140625 12.171875 29.828125 \nQ 18.359375 34.515625 30.609375 34.515625 \nL 43.21875 34.515625 \nL 43.21875 35.40625 \nQ 43.21875 41.609375 39.140625 45 \nQ 35.0625 48.390625 27.6875 48.390625 \nQ 23 48.390625 18.546875 47.265625 \nQ 14.109375 46.140625 10.015625 43.890625 \nL 10.015625 52.203125 \nQ 14.9375 54.109375 19.578125 55.046875 \nQ 24.21875 56 28.609375 56 \nQ 40.484375 56 46.34375 49.84375 \nQ 52.203125 43.703125 52.203125 31.203125 \nz\n\" id=\"DejaVuSans-97\"/>\n <path d=\"M 37.109375 75.984375 \nL 37.109375 68.5 \nL 28.515625 68.5 \nQ 23.6875 68.5 21.796875 66.546875 \nQ 19.921875 64.59375 19.921875 59.515625 \nL 19.921875 54.6875 \nL 34.71875 54.6875 \nL 34.71875 47.703125 \nL 19.921875 47.703125 \nL 19.921875 0 \nL 10.890625 0 \nL 10.890625 47.703125 \nL 2.296875 47.703125 \nL 2.296875 54.6875 \nL 10.890625 54.6875 \nL 10.890625 58.5 \nQ 10.890625 67.625 15.140625 71.796875 \nQ 19.390625 75.984375 28.609375 75.984375 \nz\n\" id=\"DejaVuSans-102\"/>\n <path d=\"M 41.109375 46.296875 \nQ 39.59375 47.171875 37.8125 47.578125 \nQ 36.03125 48 33.890625 48 \nQ 26.265625 48 22.1875 43.046875 \nQ 18.109375 38.09375 18.109375 28.8125 \nL 18.109375 0 \nL 9.078125 0 \nL 9.078125 54.6875 \nL 18.109375 54.6875 \nL 18.109375 46.1875 \nQ 20.953125 51.171875 25.484375 53.578125 \nQ 30.03125 56 36.53125 56 \nQ 37.453125 56 38.578125 55.875 \nQ 39.703125 55.765625 41.0625 55.515625 \nz\n\" id=\"DejaVuSans-114\"/>\n <path d=\"M 30.609375 48.390625 \nQ 23.390625 48.390625 19.1875 42.75 \nQ 14.984375 37.109375 14.984375 27.296875 \nQ 14.984375 17.4
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAHbCAYAAAAJY9SEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3de7ymc73/8dfbjNROhUwINR0msjvInk07hZLILofaiSJKTQfS+biT2NXu3O6oKL+0f6WURG0dpIOdnTJkO5UMEdNgoaQIw2f/cV1Td2ONGbO+y32vNa/n47Ee676/13Vf9yetWet9f09XqgpJkiRN3GrDLkCSJGm6MFhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiZFwh8Hvm5PuGng+fOHXZ8kTYa4QaikyZZwKfDiKr437FomImFmFYuHXYek0WWPlaShSLhXwicSFiVckfD+hNX7YzslLEg4NOG6hF8nPOdOrjUn4X8Sbkj4dsKnEz4zcPxJCT9N+H3CWQlbDxw7PeGQ/vsfEk5KWLs/tmnC4oSXJFwOnLQC13tJwqV9LZfcWd2Sph+DlaRhORR4DPBo4B+A7YA3DhyfDdwDWB94CXB0wkOWvkhCgGOBHwD3B94D7D1wfDbwdeBfgXWAtwFfXxKees8Dng9sAKwFvGrg2AxgK2ATYNc7u15/zfcD21dxH+CJwHl35T+KpKnNYCVpWJ4PHFLFNVVcBbwT2Gfg+GLg0Cpu6YcQvwf8yzjXmQNsChzWn/tD4FsDx/cFvlbF96q4vYqTgAuApw2cc2QVF1fxJ+CrwOZLvcfbq7ixiptW8HqPSrhnFb+t4hd36b+KpCnNYCXpbtf3Mq0PXDbQfBmw4cDzsSr+vNTxB45zuQf259480Hb5wOMHA3v3w3a/T/g9MHepa1058PhGYM2B57dX8dsVuV4Vv6MLjAcBVyacmPDwcWqWNE0ZrCTd7aooujDz4IHmBwELB56vm3DPpY4PBpwlFgGzEtYYaNt44PHlwGeqWGvg695VfHhFy13q+Z1er4r/qmJ7uuD2G+DwFXwfSdOAwUrSsBwDHJJw/4QH0M1Z+v8Dx1cHDk64R8JTgB2A48a5zq+AC4G3JayesA2w08Dxo4HnJGyfMKOfNL99wvorWfcyr5ewYcI/J/wdcDPwR+D2lXwfSVOQwUrSsLydbm7S+cDZwGnA+waOX0o3z+pK4CjghVVcsvRF+t6v5wJPBX4HvBX4Cl2woX/Ns+kmy19DN6T4Klby999yrjcDeHNf87XAPwIHrsz7SJqa3MdK0shJ2An4eNXKzU9KOAE4vYp/b1uZJN05e6wkTXkJWyXMTlgt4Zl0Q4EnDLsuSauemcMuQJIa2Ihu/tXadJPLX1TFBcMtSdKqyKFASZKkRhwKlCRJamQkhgLXXXfdmj179rDLkCRJWq4zzzzzmqqaNd6xkQhWs2fPZv78+cMuQ5IkabmSXLasYw4FSpIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1MnPYBbT2D2/4/LBL0DRz5vtfMOwSJElThD1WkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqZHlBqskGyf5QZILkpyf5FV9+zpJTk5yUf997b49ST6aZEGSc5JsMdn/IyRJkkbBivRYLQZeV1WbAY8HDkiyGfBm4JSqmgOc0j8HeDowp/+aBxzevGpJkqQRtNxgVVWLquqs/vENwC+ADYFdgaP7044Gdusf7wp8vjqnA2sl2aB55ZIkSSPmLs2xSjIbeBzwU2C9qlrUH7oSWK9/vCFw+cDLrujblr7WvCTzk8wfGxu7i2VLkiSNnhUOVknWBI4DXl1Vfxg8VlUF1F1546o6oqrmVtXcWbNm3ZWXSpIkjaQVClZJVqcLVV+oqq/1zVctGeLrv1/dty8ENh54+UZ9myRJ0rS2IqsCA3wW+EVVfWjg0InAvv3jfYETBtpf0K8OfDxw/cCQoSRJ0rQ1cwXO2RrYBzg3ydl921uB9wDHJtkfuAzYoz92ErAzsAC4EXhh04olSZJG1HKDVVX9GMgyDm8/zvkFHDDBuiRJkqYcd16XJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqZHlBqskRyW5Osl5A21fTnJ2/3VpkrP79tlJbho49qnJLF6SJGmUzFyBcz4HfBz4/JKGqnruksdJPghcP3D+xVW1easCJUmSporlBquqOjXJ7PGOJQmwB/CUtmVJkiRNPROdY/Uk4Kqqumig7SFJfp7kR0metKwXJpmXZH6S+WNjYxMsQ5IkafgmGqz2Ao4ZeL4IeFBVPQ54LfDFJPcd74VVdURVza2qubNmzZpgGZIkScO30sEqyUzgWcCXl7RV1c1VdW3/+EzgYuAREy1SkiRpKphIj9VTgV9W1RVLGpLMSjKjf/xQYA5wycRKlCRJmhpWZLuFY4CfAJskuSLJ/v2hPfnbYUCAbYBz+u0Xvgq8rKqua1mwJEnSqFqRVYF7LaN9v3HajgOOm3hZkiRJU487r0uSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUyHKDVZKjklyd5LyBtnckWZjk7P5r54Fjb0myIMmFSXacrMIlSZJGzYr0WH0O2Gmc9g9X1eb910kASTYD9gT+vn/NJ5PMaFWsJEnSKFtusKqqU4HrVvB6uwJfqqqbq+rXwAJgywnUJ0mSNGVMZI7VgUnO6YcK1+7bNgQuHzjnir7tDpLMSzI/yfyxsbEJlCFJkjQaVjZYHQ48DNgcWAR88K5eoKqOqKq5VTV31qxZK1mGJEnS6FipYFVVV1XVbVV1O3Akfx3uWwhsPHDqRn2bJEnStLdSwSrJBgNPdweWrBg8EdgzyRpJHgLMAX42sRIlSZKmhpnLOyHJMcB2wLpJrgAOAbZLsjlQwKXASwGq6vwkxwIXAIuBA6rqtskpXZIkabQsN1hV1V7jNH/2Ts5/F/CuiRQlSZI0FbnzuiRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNLDdYJTkqydVJzhtoe3+SXyY5J8nxSdbq22cnuSnJ2f3XpyazeEmSpFGyIj1WnwN2WqrtZOBRVfUY4FfAWwaOXVxVm/dfL2tTpiRJ0uhbbrCqqlOB65Zq+25VLe6fng5sNAm1SZIkTSkt5li9CPjWwPOHJPl5kh8ledKyXpRkXpL5Sea
|
||
|
},
|
||
|
"metadata": {
|
||
|
"needs_background": "light"
|
||
|
}
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n",
|
||
|
"df = df[(df['popularity'] > 0)]\n",
|
||
|
"top = df['artist_top_genre'].value_counts()\n",
|
||
|
"plt.figure(figsize=(10,7))\n",
|
||
|
"sns.barplot(x=top.index,y=top.values)\n",
|
||
|
"plt.xticks(rotation=45)\n",
|
||
|
"plt.title('Top genres',color = 'blue')"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 107,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "execute_result",
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
" name album \\\n",
|
||
|
"1 shuga rush EVERYTHING YOU HEARD IS TRUE \n",
|
||
|
"3 Confident / Feeling Cool Enjoy Your Life \n",
|
||
|
"4 wanted you rare. \n",
|
||
|
"5 Kasala Pioneers \n",
|
||
|
"6 Pull Up Everything Pretty \n",
|
||
|
"\n",
|
||
|
" artist artist_top_genre release_date length popularity \\\n",
|
||
|
"1 Odunsi (The Engine) afropop 2020 89488 30 \n",
|
||
|
"3 Lady Donli nigerian pop 2019 175135 14 \n",
|
||
|
"4 Odunsi (The Engine) afropop 2018 152049 25 \n",
|
||
|
"5 DRB Lasgidi nigerian pop 2020 184800 26 \n",
|
||
|
"6 prettyboydo nigerian pop 2018 202648 29 \n",
|
||
|
"\n",
|
||
|
" danceability acousticness energy instrumentalness liveness loudness \\\n",
|
||
|
"1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n",
|
||
|
"3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n",
|
||
|
"4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n",
|
||
|
"5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n",
|
||
|
"6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n",
|
||
|
"\n",
|
||
|
" speechiness tempo time_signature \n",
|
||
|
"1 0.3600 129.993 3 \n",
|
||
|
"3 0.1130 111.087 4 \n",
|
||
|
"4 0.0447 105.115 4 \n",
|
||
|
"5 0.1970 100.103 4 \n",
|
||
|
"6 0.1990 95.842 4 "
|
||
|
],
|
||
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>name</th>\n <th>album</th>\n <th>artist</th>\n <th>artist_top_genre</th>\n <th>release_date</th>\n <th>length</th>\n <th>popularity</th>\n <th>danceability</th>\n <th>acousticness</th>\n <th>energy</th>\n <th>instrumentalness</th>\n <th>liveness</th>\n <th>loudness</th>\n <th>speechiness</th>\n <th>tempo</th>\n <th>time_signature</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>1</th>\n <td>shuga rush</td>\n <td>EVERYTHING YOU HEARD IS TRUE</td>\n <td>Odunsi (The Engine)</td>\n <td>afropop</td>\n <td>2020</td>\n <td>89488</td>\n <td>30</td>\n <td>0.710</td>\n <td>0.0822</td>\n <td>0.683</td>\n <td>0.000169</td>\n <td>0.1010</td>\n <td>-5.640</td>\n <td>0.3600</td>\n <td>129.993</td>\n <td>3</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Confident / Feeling Cool</td>\n <td>Enjoy Your Life</td>\n <td>Lady Donli</td>\n <td>nigerian pop</td>\n <td>2019</td>\n <td>175135</td>\n <td>14</td>\n <td>0.894</td>\n <td>0.7980</td>\n <td>0.611</td>\n <td>0.000187</td>\n <td>0.0964</td>\n <td>-4.961</td>\n <td>0.1130</td>\n <td>111.087</td>\n <td>4</td>\n </tr>\n <tr>\n <th>4</th>\n <td>wanted you</td>\n <td>rare.</td>\n <td>Odunsi (The Engine)</td>\n <td>afropop</td>\n <td>2018</td>\n <td>152049</td>\n <td>25</td>\n <td>0.702</td>\n <td>0.1160</td>\n <td>0.833</td>\n <td>0.910000</td>\n <td>0.3480</td>\n <td>-6.044</td>\n <td>0.0447</td>\n <td>105.115</td>\n <td>4</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Kasala</td>\n <td>Pioneers</td>\n <td>DRB Lasgidi</td>\n <td>nigerian pop</td>\n <td>2020</td>\n <td>184800</td>\n <td>26</td>\n <td>0.803</td>\n <td>0.1270</td>\n <td>0.525</td>\n <td>0.000007</td>\n <td>0.1290</td>\n <td>-10.034</td>\n <td>0.1970</td>\n <td>100.103</td>\n <td>4</td>\n </tr>\n <tr>\n <th>6</th>\n <td>Pull Up</td>\n <td>Everything Pretty</td>\n <td>prettyboydo</td>\n <td>nigerian pop</td>\n <td>2018</td>\n <td>202648</td>\n <td>29</td>\n <td>0.818</td>\n <td>0.4520</td>\n <td>0.587</td>\n <td>0.004490</td>\n <td>0.5900</td>\n <td>-9.840</td>\n <td>0.1990</td>\n <td>95.842</td>\n <td>4</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"execution_count": 107
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df.head()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 108,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from sklearn.preprocessing import StandardScaler\n",
|
||
|
"\n",
|
||
|
"scaler = StandardScaler()\n",
|
||
|
"\n",
|
||
|
"# X = df.loc[:, ('danceability','energy')]\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 110,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "error",
|
||
|
"ename": "ValueError",
|
||
|
"evalue": "Unknown label type: 'continuous'",
|
||
|
"traceback": [
|
||
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||
|
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
|
||
|
"\u001b[0;32m<ipython-input-110-3756ba7b0bc1>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;31m# we create an instance of SVM and fit out data. We do not scale our\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;31m# data since we want to plot the support vectors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mls30\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 30% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0mls50\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 50% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mls100\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 100% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||
|
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/semi_supervised/_label_propagation.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m 228\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mX_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 230\u001b[0;31m \u001b[0mcheck_classification_targets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 231\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[0;31m# actual graph construction (implementations should override this)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||
|
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/utils/multiclass.py\u001b[0m in \u001b[0;36mcheck_classification_targets\u001b[0;34m(y)\u001b[0m\n\u001b[1;32m 181\u001b[0m if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',\n\u001b[1;32m 182\u001b[0m 'multilabel-indicator', 'multilabel-sequences']:\n\u001b[0;32m--> 183\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Unknown label type: %r\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0my_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 184\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
|
||
|
"\u001b[0;31mValueError\u001b[0m: Unknown label type: 'continuous'"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"from sklearn.svm import SVC\n",
|
||
|
"from sklearn.semi_supervised import LabelSpreading\n",
|
||
|
"from sklearn.semi_supervised import SelfTrainingClassifier\n",
|
||
|
"from sklearn import datasets\n",
|
||
|
"\n",
|
||
|
"X = df[['danceability','acousticness']].values\n",
|
||
|
"y = df['energy'].values\n",
|
||
|
"\n",
|
||
|
"# X = scaler.fit_transform(X)\n",
|
||
|
"\n",
|
||
|
"# step size in the mesh\n",
|
||
|
"h = .02\n",
|
||
|
"\n",
|
||
|
"rng = np.random.RandomState(0)\n",
|
||
|
"y_rand = rng.rand(y.shape[0])\n",
|
||
|
"y_30 = np.copy(y)\n",
|
||
|
"y_30[y_rand < 0.3] = -1 # set random samples to be unlabeled\n",
|
||
|
"y_50 = np.copy(y)\n",
|
||
|
"y_50[y_rand < 0.5] = -1\n",
|
||
|
"# we create an instance of SVM and fit out data. We do not scale our\n",
|
||
|
"# data since we want to plot the support vectors\n",
|
||
|
"ls30 = (LabelSpreading().fit(X, y_30), y_30, 'Label Spreading 30% data')\n",
|
||
|
"ls50 = (LabelSpreading().fit(X, y_50), y_50, 'Label Spreading 50% data')\n",
|
||
|
"ls100 = (LabelSpreading().fit(X, y), y, 'Label Spreading 100% data')\n",
|
||
|
"\n",
|
||
|
"# the base classifier for self-training is identical to the SVC\n",
|
||
|
"base_classifier = SVC(kernel='rbf', gamma=.5, probability=True)\n",
|
||
|
"st30 = (SelfTrainingClassifier(base_classifier).fit(X, y_30),\n",
|
||
|
" y_30, 'Self-training 30% data')\n",
|
||
|
"st50 = (SelfTrainingClassifier(base_classifier).fit(X, y_50),\n",
|
||
|
" y_50, 'Self-training 50% data')\n",
|
||
|
"\n",
|
||
|
"rbf_svc = (SVC(kernel='rbf', gamma=.5).fit(X, y), y, 'SVC with rbf kernel')\n",
|
||
|
"\n",
|
||
|
"# create a mesh to plot in\n",
|
||
|
"x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n",
|
||
|
"y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n",
|
||
|
"xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n",
|
||
|
" np.arange(y_min, y_max, h))\n",
|
||
|
"\n",
|
||
|
"color_map = {-1: (1, 1, 1), 0: (0, 0, .9), 1: (1, 0, 0), 2: (.8, .6, 0)}\n",
|
||
|
"\n",
|
||
|
"classifiers = (ls30, st30, ls50, st50, ls100, rbf_svc)\n",
|
||
|
"for i, (clf, y_train, title) in enumerate(classifiers):\n",
|
||
|
" # Plot the decision boundary. For that, we will assign a color to each\n",
|
||
|
" # point in the mesh [x_min, x_max]x[y_min, y_max].\n",
|
||
|
" plt.subplot(3, 2, i + 1)\n",
|
||
|
" Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n",
|
||
|
"\n",
|
||
|
" # Put the result into a color plot\n",
|
||
|
" Z = Z.reshape(xx.shape)\n",
|
||
|
" plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)\n",
|
||
|
" plt.axis('off')\n",
|
||
|
"\n",
|
||
|
" # Plot also the training points\n",
|
||
|
" colors = [color_map[y] for y in y_train]\n",
|
||
|
" plt.scatter(X[:, 0], X[:, 1], c=colors, edgecolors='black')\n",
|
||
|
"\n",
|
||
|
" plt.title(title)\n",
|
||
|
"\n",
|
||
|
"plt.suptitle(\"Unlabeled points are colored white\", y=0.1)\n",
|
||
|
"plt.show()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
}
|
||
|
]
|
||
|
}
|