From fc5a429036940119a66ca0543a74838409c0fd2e Mon Sep 17 00:00:00 2001 From: Jen Looper Date: Tue, 18 May 2021 20:19:13 -0400 Subject: [PATCH] cluster intro --- Clustering/1-Visualize/README.md | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/Clustering/1-Visualize/README.md b/Clustering/1-Visualize/README.md index 5fc1600db..ae22571c3 100644 --- a/Clustering/1-Visualize/README.md +++ b/Clustering/1-Visualize/README.md @@ -59,6 +59,16 @@ import pandas as pd df = pd.read_csv("../data/nigerian-songs.csv") df.head() ``` + +| | name | album | artist | artist_top_genre | release_date | length | popularity | danceability | acousticness | energy | instrumentalness | liveness | loudness | speechiness | tempo | time_signature | +| --- | ------------------------ | ---------------------------- | ------------------- | ---------------- | ------------ | ------ | ---------- | ------------ | ------------ | ------ | ---------------- | -------- | -------- | ----------- | ------- | -------------- | +| 0 | Sparky | Mandy & The Jungle | Cruel Santino | alternative r&b | 2019 | 144000 | 48 | 0.666 | 0.851 | 0.42 | 0.534 | 0.11 | -6.699 | 0.0829 | 133.015 | 5 | +| 1 | shuga rush | EVERYTHING YOU HEARD IS TRUE | Odunsi (The Engine) | afropop | 2020 | 89488 | 30 | 0.71 | 0.0822 | 0.683 | 0.000169 | 0.101 | -5.64 | 0.36 | 129.993 | 3 | +| 2 | LITT! | LITT! | AYLØ | indie r&b | 2018 | 207758 | 40 | 0.836 | 0.272 | 0.564 | 0.000537 | 0.11 | -7.127 | 0.0424 | 130.005 | 4 | +| 3 | Confident / Feeling Cool | Enjoy Your Life | Lady Donli | nigerian pop | 2019 | 175135 | 14 | 0.894 | 0.798 | 0.611 | 0.000187 | 0.0964 | -4.961 | 0.113 | 111.087 | 4 | +| 4 | wanted you | rare. | Odunsi (The Engine) | afropop | 2018 | 152049 | 25 | 0.702 | 0.116 | 0.833 | 0.91 | 0.348 | -6.044 | 0.0447 | 105.115 | 4 | + + Check the first few lines of data: Get some information about the dataframe: @@ -124,9 +134,21 @@ Describe the data: ```python df.describe() ``` + +| | release_date | length | popularity | danceability | acousticness | energy | instrumentalness | liveness | loudness | speechiness | tempo | time_signature | +| ----- | ------------ | ----------- | ---------- | ------------ | ------------ | -------- | ---------------- | -------- | --------- | ----------- | ---------- | -------------- | +| count | 530 | 530 | 530 | 530 | 530 | 530 | 530 | 530 | 530 | 530 | 530 | 530 | +| mean | 2015.390566 | 222298.1698 | 17.507547 | 0.741619 | 0.265412 | 0.760623 | 0.016305 | 0.147308 | -4.953011 | 0.130748 | 116.487864 | 3.986792 | +| std | 3.131688 | 39696.82226 | 18.992212 | 0.117522 | 0.208342 | 0.148533 | 0.090321 | 0.123588 | 2.464186 | 0.092939 | 23.518601 | 0.333701 | +| min | 1998 | 89488 | 0 | 0.255 | 0.000665 | 0.111 | 0 | 0.0283 | -19.362 | 0.0278 | 61.695 | 3 | +| 25% | 2014 | 199305 | 0 | 0.681 | 0.089525 | 0.669 | 0 | 0.07565 | -6.29875 | 0.0591 | 102.96125 | 4 | +| 50% | 2016 | 218509 | 13 | 0.761 | 0.2205 | 0.7845 | 0.000004 | 0.1035 | -4.5585 | 0.09795 | 112.7145 | 4 | +| 75% | 2017 | 242098.5 | 31 | 0.8295 | 0.403 | 0.87575 | 0.000234 | 0.164 | -3.331 | 0.177 | 125.03925 | 4 | +| max | 2020 | 511738 | 73 | 0.966 | 0.954 | 0.995 | 0.91 | 0.811 | 0.582 | 0.514 | 206.007 | 5 | + ## Visualize the data -Now, find out the most popular genre using a barplot: +Now, find out the most popular music genre using a barplot: ```python top = df['artist_top_genre'].value_counts()