{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python37364bit8d3b438fb5fc4430a93ac2cb74d693a7",
"display_name": "Python 3.7.0 64-bit ('3.7')"
},
"metadata": {
"interpreter": {
"hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
}
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"source": [
"# Nigerian Music scraped from Spotify - an analysis"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (0.11.1)\n",
"Requirement already satisfied: scipy>=1.0 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.4.1)\n",
"Requirement already satisfied: numpy>=1.15 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.19.2)\n",
"Requirement already satisfied: pandas>=0.23 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (1.1.2)\n",
"Requirement already satisfied: matplotlib>=2.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from seaborn) (3.1.0)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2.8.0)\n",
"Requirement already satisfied: pytz>=2017.2 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2019.1)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.1.0)\n",
"Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)\n",
"Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.0)\n",
"Requirement already satisfied: six>=1.5 in /Users/jenlooper/Library/Python/3.7/lib/python/site-packages (from python-dateutil>=2.7.3->pandas>=0.23->seaborn) (1.12.0)\n",
"Requirement already satisfied: setuptools in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (from kiwisolver>=1.0.1->matplotlib>=2.2->seaborn) (45.1.0)\n",
"\u001b[33mWARNING: You are using pip version 20.2.3; however, version 21.1.2 is available.\n",
"You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install seaborn"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" name album \\\n",
"0 Sparky Mandy & The Jungle \n",
"1 shuga rush EVERYTHING YOU HEARD IS TRUE \n",
"2 LITT! LITT! \n",
"3 Confident / Feeling Cool Enjoy Your Life \n",
"4 wanted you rare. \n",
"\n",
" artist artist_top_genre release_date length popularity \\\n",
"0 Cruel Santino alternative r&b 2019 144000 48 \n",
"1 Odunsi (The Engine) afropop 2020 89488 30 \n",
"2 AYLØ indie r&b 2018 207758 40 \n",
"3 Lady Donli nigerian pop 2019 175135 14 \n",
"4 Odunsi (The Engine) afropop 2018 152049 25 \n",
"\n",
" danceability acousticness energy instrumentalness liveness loudness \\\n",
"0 0.666 0.8510 0.420 0.534000 0.1100 -6.699 \n",
"1 0.710 0.0822 0.683 0.000169 0.1010 -5.640 \n",
"2 0.836 0.2720 0.564 0.000537 0.1100 -7.127 \n",
"3 0.894 0.7980 0.611 0.000187 0.0964 -4.961 \n",
"4 0.702 0.1160 0.833 0.910000 0.3480 -6.044 \n",
"\n",
" speechiness tempo time_signature \n",
"0 0.0829 133.015 5 \n",
"1 0.3600 129.993 3 \n",
"2 0.0424 130.005 4 \n",
"3 0.1130 111.087 4 \n",
"4 0.0447 105.115 4 "
],
"text/html": "
\n\n
\n \n \n | \n name | \n album | \n artist | \n artist_top_genre | \n release_date | \n length | \n popularity | \n danceability | \n acousticness | \n energy | \n instrumentalness | \n liveness | \n loudness | \n speechiness | \n tempo | \n time_signature | \n
\n \n \n \n 0 | \n Sparky | \n Mandy & The Jungle | \n Cruel Santino | \n alternative r&b | \n 2019 | \n 144000 | \n 48 | \n 0.666 | \n 0.8510 | \n 0.420 | \n 0.534000 | \n 0.1100 | \n -6.699 | \n 0.0829 | \n 133.015 | \n 5 | \n
\n \n 1 | \n shuga rush | \n EVERYTHING YOU HEARD IS TRUE | \n Odunsi (The Engine) | \n afropop | \n 2020 | \n 89488 | \n 30 | \n 0.710 | \n 0.0822 | \n 0.683 | \n 0.000169 | \n 0.1010 | \n -5.640 | \n 0.3600 | \n 129.993 | \n 3 | \n
\n \n 2 | \n LITT! | \n LITT! | \n AYLØ | \n indie r&b | \n 2018 | \n 207758 | \n 40 | \n 0.836 | \n 0.2720 | \n 0.564 | \n 0.000537 | \n 0.1100 | \n -7.127 | \n 0.0424 | \n 130.005 | \n 4 | \n
\n \n 3 | \n Confident / Feeling Cool | \n Enjoy Your Life | \n Lady Donli | \n nigerian pop | \n 2019 | \n 175135 | \n 14 | \n 0.894 | \n 0.7980 | \n 0.611 | \n 0.000187 | \n 0.0964 | \n -4.961 | \n 0.1130 | \n 111.087 | \n 4 | \n
\n \n 4 | \n wanted you | \n rare. | \n Odunsi (The Engine) | \n afropop | \n 2018 | \n 152049 | \n 25 | \n 0.702 | \n 0.1160 | \n 0.833 | \n 0.910000 | \n 0.3480 | \n -6.044 | \n 0.0447 | \n 105.115 | \n 4 | \n
\n \n
\n
"
},
"metadata": {},
"execution_count": 21
}
],
"source": [
"\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"\n",
"df = pd.read_csv(\"../../data/nigerian-songs.csv\")\n",
"df.head()"
]
},
{
"source": [
"Get information about the dataframe"
],
"cell_type": "markdown",
"metadata": {}
},
{
"source": [
"df.info()"
],
"cell_type": "code",
"metadata": {},
"execution_count": 22,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\nRangeIndex: 530 entries, 0 to 529\nData columns (total 16 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 name 530 non-null object \n 1 album 530 non-null object \n 2 artist 530 non-null object \n 3 artist_top_genre 530 non-null object \n 4 release_date 530 non-null int64 \n 5 length 530 non-null int64 \n 6 popularity 530 non-null int64 \n 7 danceability 530 non-null float64\n 8 acousticness 530 non-null float64\n 9 energy 530 non-null float64\n 10 instrumentalness 530 non-null float64\n 11 liveness 530 non-null float64\n 12 loudness 530 non-null float64\n 13 speechiness 530 non-null float64\n 14 tempo 530 non-null float64\n 15 time_signature 530 non-null int64 \ndtypes: float64(8), int64(4), object(4)\nmemory usage: 66.4+ KB\n"
]
}
]
},
{
"source": [
"Double-check for null values."
],
"cell_type": "code",
"metadata": {},
"execution_count": 23,
"outputs": [
{
"output_type": "error",
"ename": "SyntaxError",
"evalue": "invalid syntax (, line 1)",
"traceback": [
"\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m Double-check for null values.\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"name 0\n",
"album 0\n",
"artist 0\n",
"artist_top_genre 0\n",
"release_date 0\n",
"length 0\n",
"popularity 0\n",
"danceability 0\n",
"acousticness 0\n",
"energy 0\n",
"instrumentalness 0\n",
"liveness 0\n",
"loudness 0\n",
"speechiness 0\n",
"tempo 0\n",
"time_signature 0\n",
"dtype: int64"
]
},
"metadata": {},
"execution_count": 19
}
],
"source": [
"df.isnull().sum()"
]
},
{
"source": [
"Look at the general values of the data. Note that popularity can be '0' - and there are many rows with that value"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" release_date length popularity danceability acousticness \\\n",
"count 530.000000 530.000000 530.000000 530.000000 530.000000 \n",
"mean 2015.390566 222298.169811 17.507547 0.741619 0.265412 \n",
"std 3.131688 39696.822259 18.992212 0.117522 0.208342 \n",
"min 1998.000000 89488.000000 0.000000 0.255000 0.000665 \n",
"25% 2014.000000 199305.000000 0.000000 0.681000 0.089525 \n",
"50% 2016.000000 218509.000000 13.000000 0.761000 0.220500 \n",
"75% 2017.000000 242098.500000 31.000000 0.829500 0.403000 \n",
"max 2020.000000 511738.000000 73.000000 0.966000 0.954000 \n",
"\n",
" energy instrumentalness liveness loudness speechiness \\\n",
"count 530.000000 530.000000 530.000000 530.000000 530.000000 \n",
"mean 0.760623 0.016305 0.147308 -4.953011 0.130748 \n",
"std 0.148533 0.090321 0.123588 2.464186 0.092939 \n",
"min 0.111000 0.000000 0.028300 -19.362000 0.027800 \n",
"25% 0.669000 0.000000 0.075650 -6.298750 0.059100 \n",
"50% 0.784500 0.000004 0.103500 -4.558500 0.097950 \n",
"75% 0.875750 0.000234 0.164000 -3.331000 0.177000 \n",
"max 0.995000 0.910000 0.811000 0.582000 0.514000 \n",
"\n",
" tempo time_signature \n",
"count 530.000000 530.000000 \n",
"mean 116.487864 3.986792 \n",
"std 23.518601 0.333701 \n",
"min 61.695000 3.000000 \n",
"25% 102.961250 4.000000 \n",
"50% 112.714500 4.000000 \n",
"75% 125.039250 4.000000 \n",
"max 206.007000 5.000000 "
],
"text/html": "\n\n
\n \n \n | \n release_date | \n length | \n popularity | \n danceability | \n acousticness | \n energy | \n instrumentalness | \n liveness | \n loudness | \n speechiness | \n tempo | \n time_signature | \n
\n \n \n \n count | \n 530.000000 | \n 530.000000 | \n 530.000000 | \n 530.000000 | \n 530.000000 | \n 530.000000 | \n 530.000000 | \n 530.000000 | \n 530.000000 | \n 530.000000 | \n 530.000000 | \n 530.000000 | \n
\n \n mean | \n 2015.390566 | \n 222298.169811 | \n 17.507547 | \n 0.741619 | \n 0.265412 | \n 0.760623 | \n 0.016305 | \n 0.147308 | \n -4.953011 | \n 0.130748 | \n 116.487864 | \n 3.986792 | \n
\n \n std | \n 3.131688 | \n 39696.822259 | \n 18.992212 | \n 0.117522 | \n 0.208342 | \n 0.148533 | \n 0.090321 | \n 0.123588 | \n 2.464186 | \n 0.092939 | \n 23.518601 | \n 0.333701 | \n
\n \n min | \n 1998.000000 | \n 89488.000000 | \n 0.000000 | \n 0.255000 | \n 0.000665 | \n 0.111000 | \n 0.000000 | \n 0.028300 | \n -19.362000 | \n 0.027800 | \n 61.695000 | \n 3.000000 | \n
\n \n 25% | \n 2014.000000 | \n 199305.000000 | \n 0.000000 | \n 0.681000 | \n 0.089525 | \n 0.669000 | \n 0.000000 | \n 0.075650 | \n -6.298750 | \n 0.059100 | \n 102.961250 | \n 4.000000 | \n
\n \n 50% | \n 2016.000000 | \n 218509.000000 | \n 13.000000 | \n 0.761000 | \n 0.220500 | \n 0.784500 | \n 0.000004 | \n 0.103500 | \n -4.558500 | \n 0.097950 | \n 112.714500 | \n 4.000000 | \n
\n \n 75% | \n 2017.000000 | \n 242098.500000 | \n 31.000000 | \n 0.829500 | \n 0.403000 | \n 0.875750 | \n 0.000234 | \n 0.164000 | \n -3.331000 | \n 0.177000 | \n 125.039250 | \n 4.000000 | \n
\n \n max | \n 2020.000000 | \n 511738.000000 | \n 73.000000 | \n 0.966000 | \n 0.954000 | \n 0.995000 | \n 0.910000 | \n 0.811000 | \n 0.582000 | \n 0.514000 | \n 206.007000 | \n 5.000000 | \n
\n \n
\n
"
},
"metadata": {},
"execution_count": 5
}
],
"source": [
"df.describe()"
]
},
{
"source": [
"Let's examine the genres. Quite a few are listed as 'Missing' which means they aren't categorized in the dataset with a genre "
],
"cell_type": "code",
"metadata": {},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Text(0.5, 1.0, 'Top genres')"
]
},
"metadata": {},
"execution_count": 6
},
{
"output_type": "display_data",
"data": {
"text/plain": "