{ "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" }, "orig_nbformat": 2, "kernelspec": { "name": "python3", "display_name": "Python 3.7.0 64-bit ('3.7')" }, "metadata": { "interpreter": { "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" } }, "interpreter": { "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d" } }, "nbformat": 4, "nbformat_minor": 2, "cells": [ { "source": [ "# Nigerian Music scraped from Spotify - an analysis" ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 104, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n", "Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n", "Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n", "Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n", "Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n", "Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n", "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n", "Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n", "Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n", "Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n", "\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n", "You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "pip install seaborn" ] }, { "source": [ "Start where we finished in the last lesson, with data imported and filtered." ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 105, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " name album \\\n", "0 Sparky Mandy & The Jungle \n", "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", "2 LITT! LITT! \n", "3 Confident / Feeling Cool Enjoy Your Life \n", "4 wanted you rare. \n", "\n", " artist artist_top_genre release_date length popularity \\\n", "0 Cruel Santino alternative r&b 2019 144000 48 \n", "1 Odunsi (The Engine) afropop 2020 89488 30 \n", "2 AYLØ indie r&b 2018 207758 40 \n", "3 Lady Donli nigerian pop 2019 175135 14 \n", "4 Odunsi (The Engine) afropop 2018 152049 25 \n", "\n", " danceability acousticness energy instrumentalness liveness loudness \\\n", "0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n", "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", "2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n", "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", "\n", " speechiness tempo time_signature \n", "0 0.0829 133.015 5 \n", "1 0.3600 129.993 3 \n", "2 0.0424 130.005 4 \n", "3 0.1130 111.087 4 \n", "4 0.0447 105.115 4 " ], "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
0SparkyMandy & The JungleCruel Santinoalternative r&b2019144000480.6660.85100.4200.5340000.1100-6.6990.0829133.0155
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
2LITT!LITT!AYLØindie r&b2018207758400.8360.27200.5640.0005370.1100-7.1270.0424130.0054
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
\n
" }, "metadata": {}, "execution_count": 105 } ], "source": [ "\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import seaborn as sns\n", "import numpy as np\n", "\n", "df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n", "df.head()" ] }, { "source": [ "We will focus only on 3 genres. Maybe we can get 3 clusters built!" ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "Text(0.5, 1.0, 'Top genres')" ] }, "metadata": {}, "execution_count": 106 }, { "output_type": "display_data", "data": { "text/plain": "
", "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAHbCAYAAAAJY9SEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3de7ymc73/8dfbjNROhUwINR0msjvInk07hZLILofaiSJKTQfS+biT2NXu3O6oKL+0f6WURG0dpIOdnTJkO5UMEdNgoaQIw2f/cV1Td2ONGbO+y32vNa/n47Ee676/13Vf9yetWet9f09XqgpJkiRN3GrDLkCSJGm6MFhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiZFwh8Hvm5PuGng+fOHXZ8kTYa4QaikyZZwKfDiKr437FomImFmFYuHXYek0WWPlaShSLhXwicSFiVckfD+hNX7YzslLEg4NOG6hF8nPOdOrjUn4X8Sbkj4dsKnEz4zcPxJCT9N+H3CWQlbDxw7PeGQ/vsfEk5KWLs/tmnC4oSXJFwOnLQC13tJwqV9LZfcWd2Sph+DlaRhORR4DPBo4B+A7YA3DhyfDdwDWB94CXB0wkOWvkhCgGOBHwD3B94D7D1wfDbwdeBfgXWAtwFfXxKees8Dng9sAKwFvGrg2AxgK2ATYNc7u15/zfcD21dxH+CJwHl35T+KpKnNYCVpWJ4PHFLFNVVcBbwT2Gfg+GLg0Cpu6YcQvwf8yzjXmQNsChzWn/tD4FsDx/cFvlbF96q4vYqTgAuApw2cc2QVF1fxJ+CrwOZLvcfbq7ixiptW8HqPSrhnFb+t4hd36b+KpCnNYCXpbtf3Mq0PXDbQfBmw4cDzsSr+vNTxB45zuQf259480Hb5wOMHA3v3w3a/T/g9MHepa1058PhGYM2B57dX8dsVuV4Vv6MLjAcBVyacmPDwcWqWNE0ZrCTd7aooujDz4IHmBwELB56vm3DPpY4PBpwlFgGzEtYYaNt44PHlwGeqWGvg695VfHhFy13q+Z1er4r/qmJ7uuD2G+DwFXwfSdOAwUrSsBwDHJJw/4QH0M1Z+v8Dx1cHDk64R8JTgB2A48a5zq+AC4G3JayesA2w08Dxo4HnJGyfMKOfNL99wvorWfcyr5ewYcI/J/wdcDPwR+D2lXwfSVOQwUrSsLydbm7S+cDZwGnA+waOX0o3z+pK4CjghVVcsvRF+t6v5wJPBX4HvBX4Cl2woX/Ns+kmy19DN6T4Klby999yrjcDeHNf87XAPwIHrsz7SJqa3MdK0shJ2An4eNXKzU9KOAE4vYp/b1uZJN05e6wkTXkJWyXMTlgt4Zl0Q4EnDLsuSauemcMuQJIa2Ihu/tXadJPLX1TFBcMtSdKqyKFASZKkRhwKlCRJamQkhgLXXXfdmj179rDLkCRJWq4zzzzzmqqaNd6xkQhWs2fPZv78+cMuQ5IkabmSXLasYw4FSpIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1MnPYBbT2D2/4/LBL0DRz5vtfMOwSJElThD1WkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqZHlBqskGyf5QZILkpyf5FV9+zpJTk5yUf997b49ST6aZEGSc5JsMdn/IyRJkkbBivRYLQZeV1WbAY8HDkiyGfBm4JSqmgOc0j8HeDowp/+aBxzevGpJkqQRtNxgVVWLquqs/vENwC+ADYFdgaP7044Gdusf7wp8vjqnA2sl2aB55ZIkSSPmLs2xSjIbeBzwU2C9qlrUH7oSWK9/vCFw+cDLrujblr7WvCTzk8wfGxu7i2VLkiSNnhUOVknWBI4DXl1Vfxg8VlUF1F1546o6oqrmVtXcWbNm3ZWXSpIkjaQVClZJVqcLVV+oqq/1zVctGeLrv1/dty8ENh54+UZ9myRJ0rS2IqsCA3wW+EVVfWjg0InAvv3jfYETBtpf0K8OfDxw/cCQoSRJ0rQ1cwXO2RrYBzg3ydl921uB9wDHJtkfuAzYoz92ErAzsAC4EXhh04olSZJG1HKDVVX9GMgyDm8/zvkFHDDBuiRJkqYcd16XJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqZHlBqskRyW5Osl5A21fTnJ2/3VpkrP79tlJbho49qnJLF6SJGmUzFyBcz4HfBz4/JKGqnruksdJPghcP3D+xVW1easCJUmSporlBquqOjXJ7PGOJQmwB/CUtmVJkiRNPROdY/Uk4Kqqumig7SFJfp7kR0metKwXJpmXZH6S+WNjYxMsQ5IkafgmGqz2Ao4ZeL4IeFBVPQ54LfDFJPcd74VVdURVza2qubNmzZpgGZIkScO30sEqyUzgWcCXl7RV1c1VdW3/+EzgYuAREy1SkiRpKphIj9VTgV9W1RVLGpLMSjKjf/xQYA5wycRKlCRJmhpWZLuFY4CfAJskuSLJ/v2hPfnbYUCAbYBz+u0Xvgq8rKqua1mwJEnSqFqRVYF7LaN9v3HajgOOm3hZkiRJU487r0uSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUyHKDVZKjklyd5LyBtnckWZjk7P5r54Fjb0myIMmFSXacrMIlSZJGzYr0WH0O2Gmc9g9X1eb910kASTYD9gT+vn/NJ5PMaFWsJEnSKFtusKqqU4HrVvB6uwJfqqqbq+rXwAJgywnUJ0mSNGVMZI7VgUnO6YcK1+7bNgQuHzjnir7tDpLMSzI/yfyxsbEJlCFJkjQaVjZYHQ48DNgcWAR88K5eoKqOqKq5VTV31qxZK1mGJEnS6FipYFVVV1XVbVV1O3Akfx3uWwhsPHDqRn2bJEnStLdSwSrJBgNPdweWrBg8EdgzyRpJHgLMAX42sRIlSZKmhpnLOyHJMcB2wLpJrgAOAbZLsjlQwKXASwGq6vwkxwIXAIuBA6rqtskpXZIkabQsN1hV1V7jNH/2Ts5/F/CuiRQlSZI0FbnzuiRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNLDdYJTkqydVJzhtoe3+SXyY5J8nxSdbq22cnuSnJ2f3XpyazeEmSpFGyIj1WnwN2WqrtZOBRVfUY4FfAWwaOXVxVm/dfL2tTpiRJ0uhbbrCqqlOB65Zq+25VLe6fng5sNAm1SZIkTSkt5li9CPjWwPOHJPl5kh8ledKyXpRkXpL5SeaPjY01KEOSJGm4JhSskvwrsBj4Qt+0CHhQVT0OeC3wxST3He+1VXVEVc2tqrmzZs2aSBmSJEkjYaWDVZL9gGcAz6+qAqiqm6vq2v7xmcDFwCMa1ClJkjTyVipYJdkJeCOwS1XdONA+K8mM/vFDgTnAJS0KlSRJGnUzl3dCkmOA7YB1k1wBHEK3CnAN4OQkAKf3KwC3AQ5LcitwO/Cyqrpu3AtLkiRNM8sNVlW11zjNn13GuccBx020KEmSpKnIndclSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJamSFglWSo5JcneS8gbZ1kpyc5KL++9p9e5J8NMmCJOck2WKyipckSRolK9pj9Tlgp6Xa3gycUlVzgFP65wBPB+b0X/OAwydepiRJ0uhboWBVVacC1y3VvCtwdP/4aGC3gfbPV+d0YK0kG7QoVpIkaZRNZI7VelW1qH98JbBe/3hD4PKB867o2/5GknlJ5ieZPzY2NoEyJEmSRkOTyetVVUDdxdccUVVzq2rurFmzWpQhSZI0VBMJVlctGeLrv1/dty8ENh44b6O+TZIkaVqbSLA6Edi3f7wvcMJA+wv61YGPB64fGDKUJEmatmauyElJjgG2A9ZNcgVwCPAe4Ngk+wOXAXv0p58E7AwsAG4EXti4ZkmSpJG0QsGqqvZaxqHtxzm3gAMmUpQkSdJU5M7rkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDUyc2VfmGQT4MsDTQ8F3g6sBbwEGOvb31pVJ610hZIkSVPESgerqroQ2BwgyQxgIXA88ELgw1X1gSYVSpIkTRGthgK3By6uqssaXU+SJGnKaRWs9gSOGXh+YJJzkhyVZO3xXpBkXpL5SeaPjY2Nd4okSdKUMuFgleQewC7AV/qmw4GH0Q0TLgI+ON7rquqIqppbVXNnzZo10TIkSZKGrkWP1dOBs6rqKoCquqqqbquq24EjgS0bvIckSdLIaxGs9mJgGDDJBgPHdgfOa/AekiRJI2+lVwUCJLk3sAPw0oHm9yXZHCjg0qWOSZIkTVsTClZV9Sfg/ku17TOhiiRJkqYod16XJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKmRmcMuQNJd95vDHj3sEjTNPOjt5w67BGlasMdKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWpk5kQvkORS4AbgNmBxVc1Nsg7wZWA2cCmwR1X9bqLvJUmSNMpa9Vg9uao2r6q5/fM3A6dU1RzglP65JEnStDZZQ4G7Akf3j48Gdpuk95EkSRoZLYJVAd9NcmaSeX3belW1qH98JbDe0i9KMi/J/CTzx8bGGpQhSZI0XBOeYwU8saoWJnkAcHKSXw4erKpKUku/qKqOAI4AmDt37h2OS5IkTTUT7rGqqoX996uB44EtgauSbADQf796ou8jSZI06iYUrJLcO8l9ljwGngacB5wI7Nufti9wwkTeR5IkaSqY6FDgesDxSZZc64tV9e0kZwDHJtkfuAzYY4LvI0mSNPImFKyq6hLgseO0XwtsP5FrS5IkTTXuvC5JktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIysdrJJsnOQHSS5Icn6SV/Xt70iyMMnZ/dfO7cqVJEkaXTMn8NrFwOuq6qwk9wHOTHJyf+zDVfWBiZcnSZI0dax0sKqqRcCi/vENSX4BbNiqMEmSpKmmyRyrJLOBxwE/7ZsOTHJOkqOSrL2M18xLMj/J/LGxsRZlSJIkDdWEg1WSNYHjgFdX1R+Aw4GHAZvT9Wh9cLzXVdURVTW3qubOmjVromVIkiQN3YSCVZLV6ULVF6rqawBVdVVV3VZVtwNHAltOvExJkqTRN5FVgQE+C/yiqj400L7BwGm7A+etfHmSJElTx0RWBW4N7AOcm+Tsvu2twF5JNgcKuBR46YQqlCRJmiImsirwx0DGOXTSypcjSZI0dbnzuiRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDUyke0WJEmaNFt/bOthl6Bp5rRXnjbp72GPlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDVisJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmNGKwkSZIaMVhJkiQ1YrCSJElqxGAlSZLUiMFKkiSpEYOVJElSIwYrSZKkRgxWkiRJjRisJEmSGjFYSZIkNWKwkiRJasRgJUmS1IjBSpIkqRGDlSRJUiMGK0mSpEYMVpIkSY0YrCRJkhoxWEmSJDUyacEqyU5JLkyyIMmbJ+t9JEmSRsWkBKskM4BPAE8HNgP2SrLZZLyXJEnSqJisHqstgQVVdUlV3QJ8Cdh1kt5LkiRpJKSq2l80+Rdgp6p6cf98H2Crqjpw4Jx5wLz+6SbAhc0L0Z1ZF7hm2EVIk8yfc60K/Dm/+z24qmaNd2Dm3V3JElV1BHDEsN5/VZdkflXNHXYd0mTy51yrAn/OR8tkDQUuBDYeeL5R3yZJkjRtTVawOgOYk+QhSe4B7AmcOEnvJUmSNBImZSiwqhYnORD4DjADOKqqzp+M99JKcxhWqwJ/zrUq8Od8hEzK5HVJkqRVkTuvS5IkNWKwkiRJasRgpSaSzE1yn2HXIUnSMBms1MpLgO8ariRp6kmSYdcwXRisNCFJtgCoqpcCZwLHG640VYz3x8Q/MFrVJElVVZKtk+yfZPt+qyStBFcFakKSnA7cWFVP6Z8fDswBdq+qG4ZanLQCkmxDt6HxH4Bv9n9gVquq24dcmnS3SfJk4LPAl4FnAEcDX6+qBUMtbAqyx0oTUlWPB2Yk+Ub//OXARdhzpRG2pFcqyVzgKGBrYG/g60tClT1XWlUk2QR4GfDqqnoLsC/dB+QdhlrYFGWw0l028EdpJkBVbQvMWipc/RL4fpI1h1aotAx9r9T2wFuAF1fVK4D9gKuBjyw5Z3gVSpMvPWAb4GHAjknuXVVnAccA85KsPdQipyCDle6SJWPx/dMNk8yBv/Rc3T/JN/vnBwKnAusMp1JpudYCdgf+sX9+C/BpwLklmtYGemPXBWZW1ZHAu4DQ3YIO4Erghr5Nd4FzrLRSkrwO2Bm4J/D9qjq4bz8VoKq2GWJ50h0MTNBdD7ihqm5M8s/A14Gdq+rkJDsA76MbArnWXitNV0l2Bg4DFgJ/AvYHnk03DLga3S3v3l9V3xxakVPUpNwrUNNbkhcBu1TVtkk+Brw2yd9V1euqapsk30mycVVdPuxapSX6UPVM4JVAJTmNrodqN+A7SY6l+4R+WFVdM8RSpUmV5JHAO4EDgbOBLwL/r6r2TPJnYEfg3CWhaqmRCi2HQ4FarnEm8S4A9knySmBD4DHA3kk+BVBVOxqqNGqSPIyuN+oNwAfoQtShwLfohgSfCfxPVR2/ZP6gNE3dDFwAnFVVN1bVbsAGSQ6g68H9KfDYJHsaqu46f3louZb8o+onot9cVacmuR+wLfC+qrq4/7S/VZJ1quq6YdYrDRr4w7A2cFlV/W/f/htgK+CpVXVCkn2BY5P8uqp+OLyKpbYGhsFn0HWoXAdsAMwFftyf9iW6X/eLkxwN3Ar8wFB119ljpWVK8rAkm/WPXwt8nm45+gOq6nrg18Czk7yZrufq2YYqjYqBntZ79d/PAxYnORCgqi4ELgc2659/FfgXYNHdXKo0qfpQtStwLN0+VY8EPgF8LMmBSV5MNyy4oD//1qo6uqquGlrRU5iT1zWuJPcCPgZcRddlPA94Od2ta3YHtqALU7sBTwYOqqrzhlOtNL4kO9H9zF4CnA4U3Z5Va9J9Qv80sF9V/Y9DHpqukmwKfAb4d7qVgO8A9qHrldoR2Aj4alV9d1g1TicGKy1Tv5XCa4H7AudX1bv79g8DOwFPqqprktyzqv48xFKlO0jyeOC9dB8QHkO3jcKtdJ/aX0230/r3q+obQytSmmRJHgV8ELiwqg7q23YEPkf3O9yd1RtzKFB/Y3CielVdBLwbuB54TJLH9O2vAf4b+EE/Zn/LMGqVliXJhnQT1H/aD/G9D/gh3bySRVW1P/CGqvqGO6xrmvsV3Z5Uj0wyJ8kaVfUd4Dhg1nBLm54MVvqLwaGQJM9NshuwKV2v1fXA7gPhah7dpN/bvKeaRtBNdJNy90yyVVX9saq+DTyIrveKqlrcf7fbXtNSkhlVdQvwYrq5g68HdkmyLfAsYPEw65uuDFb6i4FQdSDdXj8A36D7Q/ReYH26bRb+vj929d1epDSOgdssPSrJdnRzqN5D11N1WJKn90PbGwO/H1qh0t2k/6B8W5KZVXUrXbhaDfhXulC1X1WdYY9tewYr/UWS1ZJsQDcZfXvgocApwM+r6hK6YcGZdBPa/aSvkdGvetoZOAF4Id1ePM+kG/47jW4DxE8AL6qqs/xjoulm4MPFnCTrL2nvt0+Y2fdcvQKYD/wdcJYLNiaHwWoVt9QfmBl0+5tcS7cr7zbAc6rq1iQv7895vbtSa9QkuTfdH419qmpfuo0/twXWo/tZPhj4I93PtzStDOxTtSNwIt0HiwOSPBz+JlzdSvfv5AF0NyB3L8tJYLBahS01p2pvYF5V3Uy3JP0gun2pbkzyPLr7SFVV3Ta8iqW/SrJa//0f6XaSvgbYBKCqTqDbt+oN/enH0n1SPyTJPe/+aqXJ04equXTDfc8EXgf8PbDbUuFqyZyr5wAf7IOWGjOtrsIGQtUBwIvo9jWhql6aZC3g1CQ/p9uder+qumJoxUq9JPeqqpuq6vYkTwQOp7tx7M+AjZPMrar5dCtXtwBmVNXVSY4AbndrEE03Se5DNwS+Rb99woL+g8dewHOTfKWqftXPuVqtD1e/HWbN05n7WK3ikqwNHAG8qaou6Zfi3twf24muJ+DSqvr1MOuU4C978vwH8Ay6rRMOp9vY8DNJHgocQLfIYjHwD8DBVXX8sOqVJsvS86OSbAJ8lG739Ff2Hzy2A54PvNvf4Xcfg9UqZrzJikm+Rrf673MDvVhbAedU1U1DKFO6gySr0wWpn9L9vD6NbthjbeAFVfXbJOvS7SK9KbCgquY7QVfTzcCcqh3othBJ/+HiEcCb6Ta/fW0frtauqt8NteBVjHOsViFLzama03/CAfgO8GDgn/pjzwXeRrdkXRolC+kmpX+Fbs7UYcDZwEFJ1q+qa6rq7Kr6Uj8c6OpVTSv9UF4l+We6HdWvoNtS5ANV9Su61dvrAx/vX3L9kEpdZdljtYpYKlS9lm5O1U3AfwH/RnfvqMfRDaE8DHheVZ07nGqlv7XUJ/T/BH5YVXv2x7amGxq8F92Qh/uradpJ8hBgtaq6uO+Z/U/gNcCSXqoNge9V1f79h+Y1quqc4VW86nLy+ipiIFQ9HngC8ERgDeAMYHFVHdzvYfVwuiGURUMrVhowEKoeSncLjmcBr07yTrqVTaf1E3V3oxsWNFhpOnoCcFGSK/p7tM6j+3k/lG4+4Wy6Ses3VdWBQ6xzlWewWoUkeSRwCF2v1GpVdVW/VP0nSR5YVa+gu+2BNDL6ULUL3bDfAuAS4NN0S8oPSvLRqvrvJOdWlbuqa1qqqi8kWRM4I8neVXVOkgcCZ/ZzqdYHPkQ3tUND5ByraWzp3aWr6hfAkXTBarsk61bVVcDWwBOTrOeO1Bo1fS/rwcCOwPF0Gxw+je4my9sCr+s3PzRUadoZ2FF9R+BRdEOAR/YrZC8F7pfkk3Q3VT6hqk729/hwOcdqmhpn88916O5y/h3g2XTDJl8HTu33+Jnh5p8aRUk2ottaYW26XdSfB3yKbhf1zwFjVXXG0AqUJlmSLYGPAK+pqtP7ebLPo/s9Dt39XP9UVT8aVo36K4cCp6mBUPUaYFe6VVRvotvs893AbcB+wK1JvgHcPpxKpTvXb0x7RZJ3AV+oqgVJPk93d4Dzq+qy4VYoTZ4kGwNvBM6tqtMBqupDfafUyXS3HTtpiCVqKQaraaa/fcE6VfWzfk7VFnQ3VX4D3f/f69NtpXAY3ZDgmVVlqNJUcC7w0n4/q2cBrzJUaRWwGDgH2DXJTlX1bfhLuJoBrDXU6nQHBqtppL+twf7A6kluBf6Xbhnu04Gdge3ptlk4iO7WHocOq1ZpJZxEt5J1F+BdVXXakOuRmh/RBFIAAAVUSURBVBtYBftPdKtgf0N3t4HfA7snubWqTgGoqvcPsVQtg5PXp4l+07gb6HamXgzsCTyiqhYC9wN+1t8f6hbgW3SrqqQpo6r+UFVHA8+tqv9ygq6moz5UPQ04ClgPOJNugdGJdD1X+/XHNaLssZomBobzdgQeC2wC3DPJZ4CfAJ/t96naDtihqq4cSqHSxN0G7qiu6affj20t4KXA7nSLji4Azuq3x/kKXa+t2+KMMFcFTiNJngR8DNgSeDywE7A63XyqNek2kbugqi4ZWpGSpDuV5E10Iw1PAZ7f77a+H3AqcKnzYkebQ4HTy5rAtVV1S1WdSrevyVPo7hm1TlV901AlSaMnyeZJDumf3hvYB9i7D1WPpVvV/UBD1ehzKHB6+RmwMMmewFeq6swkp9EF6KuGW5okadDARPUnAc8BdkxydVW9PcmmwCFJFgObA2+qqh8PtWCtEIPV9HI98GO6vaqelmQ+3T0Bn11V1wy1MkkS8NdA1YeqbYAvAAcCC4EnJ1mjqvZI8kS6jXE/3n9QjnMLR59zrKaYfvXfMruCk9wL2JRu4uOawGer6vy7qz5J0rL19/d7JPDDqrqtvzPGhlX13v5egJsD7wWOraqPDLNWrRyD1RTV/2N8EHADcMx4PVL9/dMW3+3FSZLGlWRX4CLgCrqtcbYHPky3WvvX/crAo+nmWX21qr44tGK1Upy8PgUleRHdxp8X093376Akj+6PZcn+PoYqSRotVXUCcCXwSbp7/X2X7t6XH+nnVT2G7t6YFwEbDqtOrTznWE0B44yrbwe8vqq+neRU4GC6DUHPdfxdkkbP4O/xqrouyY+Ap9Ft2nw8EOA/6Xqx9qe7HdkO/S2cFvu7fepwKHAKGFg58jLgDLpb1NwT+FD/D/QhdLv07l5Vvx9mrZKk8SXZFng08P2quiDJXnS/z79eVV9Lcu/+1C3p7o6xu3Nkpx6HAkdYkk3gL7c4eBawB/BbunC1Ft3Kv7WAR9F9yrllWLVKku5oydSMJFvRDf9tC7wxyUuq6hjgm8DeSfYA/kz3ofkJwK6GqqnJocARlWRH4PAkW9CNt78YOK+qFgGLkmwMbNO33wN4ZVXdOLSCJUl30H8w3hI4FNirqs7p9xp8Qh+ujkwyA7iwqm4Drk3y/v7erpqCDFYjKMlMuq7gg4HN6Jbf/gDYNckz+h3UP5PkfnR7nPypqsaGV7Ek6U6sBTwV2IHuRspfBW6nn0NVVZ+Ev9nfylA1hRmsRlBVLU5yMfA2uhvOPpmui/gmYJcki6vq21V1Pd2moJKkEVVV3+2nc/x7kt9W1TFJvgrMAP534DwnPU8DBqvRdQ5wI/AH4H5VdU2Sr9F9ytk3ya1VdcpQK5QkrZCqOrG/Pc2/JblHVR0NHDPsutSeqwJHxOBS3CT3AG7rd+V9Pd2NlA+pqjOSbES3iuSb/XwrSdIUkWQX4D10Q4NXelPl6cdgNQKWClUH0s2r+gPwjqr6c5K30t3/7z1V9ZMkM/pJjpKkKSbJLOfFTl8GqxGS5BXAc4HnAWcB3wPeXlUXJ3kn8HBgv6r68xDLlCRJy2CwGhFJ7gt8iG4l4HOAnYGr6bZaeHlVLUhy/6q6dohlSpKkO2GwGiFJ1gA2Bf6jqp7cbyw3RrcD7zuq6tahFihJku6UqwJHSFXdnORGYGZ/U+UHA6cAnzZUSZI0+uyxGjF9r9Wr6VaMPBB4TlVdMNyqJEnSijBYjaD+bubrA7dX1cJh1yNJklaMwUqSJKmR1YZdgCRJ0nRhsJIkSWrEYCVJktSIwUqSJKkRg5UkSVIjBitJkqRGDFaSJEmN/B/Djeb5PsBsCgAAAABJRU5ErkJggg==\n" }, "metadata": { "needs_background": "light" } } ], "source": [ "df = df[(df['artist_top_genre'] == 'afro dancehall') | (df['artist_top_genre'] == 'afropop') | (df['artist_top_genre'] == 'nigerian pop')]\n", "df = df[(df['popularity'] > 0)]\n", "top = df['artist_top_genre'].value_counts()\n", "plt.figure(figsize=(10,7))\n", "sns.barplot(x=top.index,y=top.values)\n", "plt.xticks(rotation=45)\n", "plt.title('Top genres',color = 'blue')" ] }, { "cell_type": "code", "execution_count": 107, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " name album \\\n", "1 shuga rush EVERYTHING YOU HEARD IS TRUE \n", "3 Confident / Feeling Cool Enjoy Your Life \n", "4 wanted you rare. \n", "5 Kasala Pioneers \n", "6 Pull Up Everything Pretty \n", "\n", " artist artist_top_genre release_date length popularity \\\n", "1 Odunsi (The Engine) afropop 2020 89488 30 \n", "3 Lady Donli nigerian pop 2019 175135 14 \n", "4 Odunsi (The Engine) afropop 2018 152049 25 \n", "5 DRB Lasgidi nigerian pop 2020 184800 26 \n", "6 prettyboydo nigerian pop 2018 202648 29 \n", "\n", " danceability acousticness energy instrumentalness liveness loudness \\\n", "1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n", "3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n", "4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n", "5 0.803 0.1270 0.525 0.000007 0.1290 -10.034 \n", "6 0.818 0.4520 0.587 0.004490 0.5900 -9.840 \n", "\n", " speechiness tempo time_signature \n", "1 0.3600 129.993 3 \n", "3 0.1130 111.087 4 \n", "4 0.0447 105.115 4 \n", "5 0.1970 100.103 4 \n", "6 0.1990 95.842 4 " ], "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
namealbumartistartist_top_genrerelease_datelengthpopularitydanceabilityacousticnessenergyinstrumentalnesslivenessloudnessspeechinesstempotime_signature
1shuga rushEVERYTHING YOU HEARD IS TRUEOdunsi (The Engine)afropop202089488300.7100.08220.6830.0001690.1010-5.6400.3600129.9933
3Confident / Feeling CoolEnjoy Your LifeLady Donlinigerian pop2019175135140.8940.79800.6110.0001870.0964-4.9610.1130111.0874
4wanted yourare.Odunsi (The Engine)afropop2018152049250.7020.11600.8330.9100000.3480-6.0440.0447105.1154
5KasalaPioneersDRB Lasgidinigerian pop2020184800260.8030.12700.5250.0000070.1290-10.0340.1970100.1034
6Pull UpEverything Prettyprettyboydonigerian pop2018202648290.8180.45200.5870.0044900.5900-9.8400.199095.8424
\n
" }, "metadata": {}, "execution_count": 107 } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 108, "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import StandardScaler\n", "\n", "scaler = StandardScaler()\n", "\n", "# X = df.loc[:, ('danceability','energy')]\n", "\n", "\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 110, "metadata": {}, "outputs": [ { "output_type": "error", "ename": "ValueError", "evalue": "Unknown label type: 'continuous'", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;31m# we create an instance of SVM and fit out data. We do not scale our\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;31m# data since we want to plot the support vectors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mls30\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_30\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 30% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 23\u001b[0m \u001b[0mls50\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_50\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 50% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mls100\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mLabelSpreading\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Label Spreading 100% data'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/semi_supervised/_label_propagation.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m 228\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mX_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 230\u001b[0;31m \u001b[0mcheck_classification_targets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 231\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[0;31m# actual graph construction (implementations should override this)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/sklearn/utils/multiclass.py\u001b[0m in \u001b[0;36mcheck_classification_targets\u001b[0;34m(y)\u001b[0m\n\u001b[1;32m 181\u001b[0m if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',\n\u001b[1;32m 182\u001b[0m 'multilabel-indicator', 'multilabel-sequences']:\n\u001b[0;32m--> 183\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Unknown label type: %r\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0my_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 184\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mValueError\u001b[0m: Unknown label type: 'continuous'" ] } ], "source": [ "from sklearn.svm import SVC\n", "from sklearn.semi_supervised import LabelSpreading\n", "from sklearn.semi_supervised import SelfTrainingClassifier\n", "from sklearn import datasets\n", "\n", "X = df[['danceability','acousticness']].values\n", "y = df['energy'].values\n", "\n", "# X = scaler.fit_transform(X)\n", "\n", "# step size in the mesh\n", "h = .02\n", "\n", "rng = np.random.RandomState(0)\n", "y_rand = rng.rand(y.shape[0])\n", "y_30 = np.copy(y)\n", "y_30[y_rand < 0.3] = -1 # set random samples to be unlabeled\n", "y_50 = np.copy(y)\n", "y_50[y_rand < 0.5] = -1\n", "# we create an instance of SVM and fit out data. We do not scale our\n", "# data since we want to plot the support vectors\n", "ls30 = (LabelSpreading().fit(X, y_30), y_30, 'Label Spreading 30% data')\n", "ls50 = (LabelSpreading().fit(X, y_50), y_50, 'Label Spreading 50% data')\n", "ls100 = (LabelSpreading().fit(X, y), y, 'Label Spreading 100% data')\n", "\n", "# the base classifier for self-training is identical to the SVC\n", "base_classifier = SVC(kernel='rbf', gamma=.5, probability=True)\n", "st30 = (SelfTrainingClassifier(base_classifier).fit(X, y_30),\n", " y_30, 'Self-training 30% data')\n", "st50 = (SelfTrainingClassifier(base_classifier).fit(X, y_50),\n", " y_50, 'Self-training 50% data')\n", "\n", "rbf_svc = (SVC(kernel='rbf', gamma=.5).fit(X, y), y, 'SVC with rbf kernel')\n", "\n", "# create a mesh to plot in\n", "x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n", "y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n", "xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n", " np.arange(y_min, y_max, h))\n", "\n", "color_map = {-1: (1, 1, 1), 0: (0, 0, .9), 1: (1, 0, 0), 2: (.8, .6, 0)}\n", "\n", "classifiers = (ls30, st30, ls50, st50, ls100, rbf_svc)\n", "for i, (clf, y_train, title) in enumerate(classifiers):\n", " # Plot the decision boundary. For that, we will assign a color to each\n", " # point in the mesh [x_min, x_max]x[y_min, y_max].\n", " plt.subplot(3, 2, i + 1)\n", " Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n", "\n", " # Put the result into a color plot\n", " Z = Z.reshape(xx.shape)\n", " plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)\n", " plt.axis('off')\n", "\n", " # Plot also the training points\n", " colors = [color_map[y] for y in y_train]\n", " plt.scatter(X[:, 0], X[:, 1], c=colors, edgecolors='black')\n", "\n", " plt.title(title)\n", "\n", "plt.suptitle(\"Unlabeled points are colored white\", y=0.1)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ] }