diff --git a/1-Introduction/01-defining-data-science/README.md b/1-Introduction/01-defining-data-science/README.md index 412ef753..544f698b 100644 --- a/1-Introduction/01-defining-data-science/README.md +++ b/1-Introduction/01-defining-data-science/README.md @@ -1,5 +1,6 @@ # Defining Data Science +[](https://youtu.be/pqqsm5reGvs) ## Pre-Lecture Quiz [Pre-lecture quiz]() @@ -74,10 +75,54 @@ As we have already mentioned - data is everywhere, we just need to capture it in ## Where to get Data There are many possible sources of data, and it will be impossible to list all of them! However, let's mention some of the typical places where you can get data: - + +* **Structured** + - **Internet of Things**, including data from different sensors, such as temperature or pressure sensors, provides a lot of useful data. For example, if an office building is equipped with IoT sensors, we can automatically control heating and lighting in order to minimize costs. + - **Surveys** that we ask users after purchase of a good, or after visiting a web site. + - **Analysis of behavior** can, for example, help us understand how deeply a user goes into a site, and what is the typical reason for leaving the site. +* **Unstructured** + - **Texts** can be a rich source of insights, starting from overall **sentiment score**, up to extracting keywords and even some semantic meaning. + - **Images** or **Video**. A video from surveillance camera can be used to estimate traffic on the road, and inform people about potential traffic jams. + - Web server **Logs** can be used to understand which pages of our site are most visited, and for how long. +* Semi-structured + - **Social Network** graph can be a great source of data about user personality and potential effectiveness in spreading information around. + - When we have a bunch of photographs from a party, we can try to extract **Group Dynamics** data by building a graph of people taking pictures with each other. + +By knowing different possible sources of data, you can try to think about different scenarios where data science techniques can be applied to know the situation better, and to improve business processes. + ## What you can do with Data +In Data Science, we focus on the following steps of data journey: +
5 rows × 590 columns
\n", + "\n", + " | Province/State | \n", + "Country/Region | \n", + "Lat | \n", + "Long | \n", + "1/22/20 | \n", + "1/23/20 | \n", + "1/24/20 | \n", + "1/25/20 | \n", + "1/26/20 | \n", + "1/27/20 | \n", + "... | \n", + "8/20/21 | \n", + "8/21/21 | \n", + "8/22/21 | \n", + "8/23/21 | \n", + "8/24/21 | \n", + "8/25/21 | \n", + "8/26/21 | \n", + "8/27/21 | \n", + "8/28/21 | \n", + "8/29/21 | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
58 | \n", + "Anhui | \n", + "China | \n", + "31.8257 | \n", + "117.2264 | \n", + "1 | \n", + "9 | \n", + "15 | \n", + "39 | \n", + "60 | \n", + "70 | \n", + "... | \n", + "1008 | \n", + "1008 | \n", + "1008 | \n", + "1008 | \n", + "1008 | \n", + "1008 | \n", + "1008 | \n", + "1008 | \n", + "1008 | \n", + "1008 | \n", + "
59 | \n", + "Beijing | \n", + "China | \n", + "40.1824 | \n", + "116.4142 | \n", + "14 | \n", + "22 | \n", + "36 | \n", + "41 | \n", + "68 | \n", + "80 | \n", + "... | \n", + "1112 | \n", + "1113 | \n", + "1115 | \n", + "1115 | \n", + "1115 | \n", + "1115 | \n", + "1115 | \n", + "1115 | \n", + "1115 | \n", + "1115 | \n", + "
60 | \n", + "Chongqing | \n", + "China | \n", + "30.0572 | \n", + "107.8740 | \n", + "6 | \n", + "9 | \n", + "27 | \n", + "57 | \n", + "75 | \n", + "110 | \n", + "... | \n", + "603 | \n", + "603 | \n", + "603 | \n", + "603 | \n", + "603 | \n", + "603 | \n", + "603 | \n", + "603 | \n", + "603 | \n", + "603 | \n", + "
61 | \n", + "Fujian | \n", + "China | \n", + "26.0789 | \n", + "117.9874 | \n", + "1 | \n", + "5 | \n", + "10 | \n", + "18 | \n", + "35 | \n", + "59 | \n", + "... | \n", + "780 | \n", + "780 | \n", + "780 | \n", + "782 | \n", + "782 | \n", + "783 | \n", + "783 | \n", + "784 | \n", + "785 | \n", + "786 | \n", + "
62 | \n", + "Gansu | \n", + "China | \n", + "35.7518 | \n", + "104.2861 | \n", + "0 | \n", + "2 | \n", + "2 | \n", + "4 | \n", + "7 | \n", + "14 | \n", + "... | \n", + "199 | \n", + "199 | \n", + "199 | \n", + "199 | \n", + "199 | \n", + "199 | \n", + "199 | \n", + "199 | \n", + "199 | \n", + "199 | \n", + "
63 | \n", + "Guangdong | \n", + "China | \n", + "23.3417 | \n", + "113.4244 | \n", + "26 | \n", + "32 | \n", + "53 | \n", + "78 | \n", + "111 | \n", + "151 | \n", + "... | \n", + "3001 | \n", + "3007 | \n", + "3012 | \n", + "3020 | \n", + "3023 | \n", + "3032 | \n", + "3040 | \n", + "3043 | \n", + "3046 | \n", + "3055 | \n", + "
64 | \n", + "Guangxi | \n", + "China | \n", + "23.8298 | \n", + "108.7881 | \n", + "2 | \n", + "5 | \n", + "23 | \n", + "23 | \n", + "36 | \n", + "46 | \n", + "... | \n", + "289 | \n", + "289 | \n", + "289 | \n", + "289 | \n", + "289 | \n", + "289 | \n", + "289 | \n", + "289 | \n", + "289 | \n", + "289 | \n", + "
65 | \n", + "Guizhou | \n", + "China | \n", + "26.8154 | \n", + "106.8748 | \n", + "1 | \n", + "3 | \n", + "3 | \n", + "4 | \n", + "5 | \n", + "7 | \n", + "... | \n", + "147 | \n", + "147 | \n", + "147 | \n", + "147 | \n", + "147 | \n", + "147 | \n", + "147 | \n", + "147 | \n", + "147 | \n", + "147 | \n", + "
66 | \n", + "Hainan | \n", + "China | \n", + "19.1959 | \n", + "109.7453 | \n", + "4 | \n", + "5 | \n", + "8 | \n", + "19 | \n", + "22 | \n", + "33 | \n", + "... | \n", + "190 | \n", + "190 | \n", + "190 | \n", + "190 | \n", + "190 | \n", + "190 | \n", + "190 | \n", + "190 | \n", + "190 | \n", + "190 | \n", + "
67 | \n", + "Hebei | \n", + "China | \n", + "39.5490 | \n", + "116.1306 | \n", + "1 | \n", + "1 | \n", + "2 | \n", + "8 | \n", + "13 | \n", + "18 | \n", + "... | \n", + "1317 | \n", + "1317 | \n", + "1317 | \n", + "1317 | \n", + "1317 | \n", + "1317 | \n", + "1317 | \n", + "1317 | \n", + "1317 | \n", + "1317 | \n", + "
68 | \n", + "Heilongjiang | \n", + "China | \n", + "47.8620 | \n", + "127.7615 | \n", + "0 | \n", + "2 | \n", + "4 | \n", + "9 | \n", + "15 | \n", + "21 | \n", + "... | \n", + "1614 | \n", + "1614 | \n", + "1614 | \n", + "1614 | \n", + "1614 | \n", + "1614 | \n", + "1615 | \n", + "1615 | \n", + "1615 | \n", + "1615 | \n", + "
69 | \n", + "Henan | \n", + "China | \n", + "37.8957 | \n", + "114.9042 | \n", + "5 | \n", + "5 | \n", + "9 | \n", + "32 | \n", + "83 | \n", + "128 | \n", + "... | \n", + "1521 | \n", + "1522 | \n", + "1523 | \n", + "1524 | \n", + "1525 | \n", + "1525 | \n", + "1527 | \n", + "1528 | \n", + "1528 | \n", + "1528 | \n", + "
70 | \n", + "Hong Kong | \n", + "China | \n", + "22.3000 | \n", + "114.2000 | \n", + "0 | \n", + "2 | \n", + "2 | \n", + "5 | \n", + "8 | \n", + "8 | \n", + "... | \n", + "12049 | \n", + "12052 | \n", + "12057 | \n", + "12062 | \n", + "12069 | \n", + "12074 | \n", + "12077 | \n", + "12094 | \n", + "12100 | \n", + "12107 | \n", + "
71 | \n", + "Hubei | \n", + "China | \n", + "30.9756 | \n", + "112.2707 | \n", + "444 | \n", + "444 | \n", + "549 | \n", + "761 | \n", + "1058 | \n", + "1423 | \n", + "... | \n", + "68287 | \n", + "68289 | \n", + "68289 | \n", + "68289 | \n", + "68289 | \n", + "68289 | \n", + "68290 | \n", + "68290 | \n", + "68290 | \n", + "68290 | \n", + "
72 | \n", + "Hunan | \n", + "China | \n", + "27.6104 | \n", + "111.7088 | \n", + "4 | \n", + "9 | \n", + "24 | \n", + "43 | \n", + "69 | \n", + "100 | \n", + "... | \n", + "1181 | \n", + "1181 | \n", + "1181 | \n", + "1181 | \n", + "1181 | \n", + "1181 | \n", + "1181 | \n", + "1181 | \n", + "1181 | \n", + "1181 | \n", + "
73 | \n", + "Inner Mongolia | \n", + "China | \n", + "44.0935 | \n", + "113.9448 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "7 | \n", + "7 | \n", + "11 | \n", + "... | \n", + "412 | \n", + "412 | \n", + "412 | \n", + "412 | \n", + "412 | \n", + "412 | \n", + "412 | \n", + "412 | \n", + "412 | \n", + "412 | \n", + "
74 | \n", + "Jiangsu | \n", + "China | \n", + "32.9711 | \n", + "119.4550 | \n", + "1 | \n", + "5 | \n", + "9 | \n", + "18 | \n", + "33 | \n", + "47 | \n", + "... | \n", + "1583 | \n", + "1584 | \n", + "1584 | \n", + "1584 | \n", + "1586 | \n", + "1586 | \n", + "1587 | \n", + "1587 | \n", + "1589 | \n", + "1589 | \n", + "
75 | \n", + "Jiangxi | \n", + "China | \n", + "27.6140 | \n", + "115.7221 | \n", + "2 | \n", + "7 | \n", + "18 | \n", + "18 | \n", + "36 | \n", + "72 | \n", + "... | \n", + "937 | \n", + "937 | \n", + "937 | \n", + "937 | \n", + "937 | \n", + "937 | \n", + "937 | \n", + "937 | \n", + "937 | \n", + "937 | \n", + "
76 | \n", + "Jilin | \n", + "China | \n", + "43.6661 | \n", + "126.1923 | \n", + "0 | \n", + "1 | \n", + "3 | \n", + "4 | \n", + "4 | \n", + "6 | \n", + "... | \n", + "574 | \n", + "574 | \n", + "574 | \n", + "574 | \n", + "574 | \n", + "574 | \n", + "574 | \n", + "574 | \n", + "574 | \n", + "574 | \n", + "
77 | \n", + "Liaoning | \n", + "China | \n", + "41.2956 | \n", + "122.6085 | \n", + "2 | \n", + "3 | \n", + "4 | \n", + "17 | \n", + "21 | \n", + "27 | \n", + "... | \n", + "443 | \n", + "443 | \n", + "443 | \n", + "443 | \n", + "443 | \n", + "444 | \n", + "445 | \n", + "446 | \n", + "446 | \n", + "446 | \n", + "
78 | \n", + "Macau | \n", + "China | \n", + "22.1667 | \n", + "113.5500 | \n", + "1 | \n", + "2 | \n", + "2 | \n", + "2 | \n", + "5 | \n", + "6 | \n", + "... | \n", + "63 | \n", + "63 | \n", + "63 | \n", + "63 | \n", + "63 | \n", + "63 | \n", + "63 | \n", + "63 | \n", + "63 | \n", + "63 | \n", + "
79 | \n", + "Ningxia | \n", + "China | \n", + "37.2692 | \n", + "106.1655 | \n", + "1 | \n", + "1 | \n", + "2 | \n", + "3 | \n", + "4 | \n", + "7 | \n", + "... | \n", + "77 | \n", + "77 | \n", + "77 | \n", + "77 | \n", + "77 | \n", + "77 | \n", + "77 | \n", + "77 | \n", + "77 | \n", + "77 | \n", + "
80 | \n", + "Qinghai | \n", + "China | \n", + "35.7452 | \n", + "95.9956 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "6 | \n", + "... | \n", + "18 | \n", + "18 | \n", + "18 | \n", + "18 | \n", + "18 | \n", + "18 | \n", + "18 | \n", + "18 | \n", + "18 | \n", + "18 | \n", + "
81 | \n", + "Shaanxi | \n", + "China | \n", + "35.1917 | \n", + "108.8701 | \n", + "0 | \n", + "3 | \n", + "5 | \n", + "15 | \n", + "22 | \n", + "35 | \n", + "... | \n", + "668 | \n", + "668 | \n", + "668 | \n", + "669 | \n", + "669 | \n", + "669 | \n", + "669 | \n", + "669 | \n", + "669 | \n", + "669 | \n", + "
82 | \n", + "Shandong | \n", + "China | \n", + "36.3427 | \n", + "118.1498 | \n", + "2 | \n", + "6 | \n", + "15 | \n", + "27 | \n", + "46 | \n", + "75 | \n", + "... | \n", + "923 | \n", + "923 | \n", + "923 | \n", + "923 | \n", + "923 | \n", + "923 | \n", + "923 | \n", + "923 | \n", + "923 | \n", + "924 | \n", + "
83 | \n", + "Shanghai | \n", + "China | \n", + "31.2020 | \n", + "121.4491 | \n", + "9 | \n", + "16 | \n", + "20 | \n", + "33 | \n", + "40 | \n", + "53 | \n", + "... | \n", + "2420 | \n", + "2432 | \n", + "2436 | \n", + "2445 | \n", + "2451 | \n", + "2454 | \n", + "2462 | \n", + "2466 | \n", + "2471 | \n", + "2476 | \n", + "
84 | \n", + "Shanxi | \n", + "China | \n", + "37.5777 | \n", + "112.2922 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "6 | \n", + "9 | \n", + "13 | \n", + "... | \n", + "255 | \n", + "255 | \n", + "256 | \n", + "256 | \n", + "256 | \n", + "256 | \n", + "256 | \n", + "258 | \n", + "258 | \n", + "259 | \n", + "
85 | \n", + "Sichuan | \n", + "China | \n", + "30.6171 | \n", + "102.7103 | \n", + "5 | \n", + "8 | \n", + "15 | \n", + "28 | \n", + "44 | \n", + "69 | \n", + "... | \n", + "1181 | \n", + "1182 | \n", + "1183 | \n", + "1185 | \n", + "1185 | \n", + "1185 | \n", + "1186 | \n", + "1187 | \n", + "1188 | \n", + "1188 | \n", + "
86 | \n", + "Tianjin | \n", + "China | \n", + "39.3054 | \n", + "117.3230 | \n", + "4 | \n", + "4 | \n", + "8 | \n", + "10 | \n", + "14 | \n", + "23 | \n", + "... | \n", + "458 | \n", + "459 | \n", + "462 | \n", + "463 | \n", + "464 | \n", + "465 | \n", + "466 | \n", + "466 | \n", + "470 | \n", + "472 | \n", + "
87 | \n", + "Tibet | \n", + "China | \n", + "31.6927 | \n", + "88.0924 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "... | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "1 | \n", + "
88 | \n", + "Unknown | \n", + "China | \n", + "NaN | \n", + "NaN | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "... | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "
89 | \n", + "Xinjiang | \n", + "China | \n", + "41.1129 | \n", + "85.2401 | \n", + "0 | \n", + "2 | \n", + "2 | \n", + "3 | \n", + "4 | \n", + "5 | \n", + "... | \n", + "980 | \n", + "980 | \n", + "980 | \n", + "980 | \n", + "980 | \n", + "980 | \n", + "980 | \n", + "980 | \n", + "980 | \n", + "980 | \n", + "
90 | \n", + "Yunnan | \n", + "China | \n", + "24.9740 | \n", + "101.4870 | \n", + "1 | \n", + "2 | \n", + "5 | \n", + "11 | \n", + "16 | \n", + "26 | \n", + "... | \n", + "1000 | \n", + "1007 | \n", + "1010 | \n", + "1014 | \n", + "1021 | \n", + "1031 | \n", + "1039 | \n", + "1047 | \n", + "1064 | \n", + "1067 | \n", + "
91 | \n", + "Zhejiang | \n", + "China | \n", + "29.1832 | \n", + "120.0934 | \n", + "10 | \n", + "27 | \n", + "43 | \n", + "62 | \n", + "104 | \n", + "128 | \n", + "... | \n", + "1420 | \n", + "1420 | \n", + "1421 | \n", + "1428 | \n", + "1428 | \n", + "1429 | \n", + "1429 | \n", + "1429 | \n", + "1429 | \n", + "1430 | \n", "
5 rows × 584 columns
\n", + "34 rows × 590 columns
\n", "\n", + " | Lat | \n", + "Long | \n", + "1/22/20 | \n", + "1/23/20 | \n", + "1/24/20 | \n", + "1/25/20 | \n", + "1/26/20 | \n", + "1/27/20 | \n", + "1/28/20 | \n", + "1/29/20 | \n", + "... | \n", + "8/20/21 | \n", + "8/21/21 | \n", + "8/22/21 | \n", + "8/23/21 | \n", + "8/24/21 | \n", + "8/25/21 | \n", + "8/26/21 | \n", + "8/27/21 | \n", + "8/28/21 | \n", + "8/29/21 | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Country/Region | \n", + "\n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " |
Afghanistan | \n", + "33.93911 | \n", + "67.709953 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "... | \n", + "152448 | \n", + "152448 | \n", + "152448 | \n", + "152583 | \n", + "152660 | \n", + "152722 | \n", + "152822 | \n", + "152960 | \n", + "152960 | \n", + "152960 | \n", + "
Albania | \n", + "41.15330 | \n", + "20.168300 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "... | \n", + "138132 | \n", + "138790 | \n", + "139324 | \n", + "139721 | \n", + "140521 | \n", + "141365 | \n", + "142253 | \n", + "143174 | \n", + "144079 | \n", + "144847 | \n", + "
Algeria | \n", + "28.03390 | \n", + "1.659600 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "... | \n", + "190656 | \n", + "191171 | \n", + "191583 | \n", + "192089 | \n", + "192626 | \n", + "193171 | \n", + "193674 | \n", + "194186 | \n", + "194671 | \n", + "195162 | \n", + "
Andorra | \n", + "42.50630 | \n", + "1.521800 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "... | \n", + "14988 | \n", + "14988 | \n", + "14988 | \n", + "15002 | \n", + "15003 | \n", + "15014 | \n", + "15016 | \n", + "15025 | \n", + "15025 | \n", + "15025 | \n", + "
Angola | \n", + "-11.20270 | \n", + "17.873900 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "... | \n", + "45583 | \n", + "45817 | \n", + "45945 | \n", + "46076 | \n", + "46340 | \n", + "46539 | \n", + "46726 | \n", + "46929 | \n", + "47079 | \n", + "47168 | \n", + "
5 rows × 588 columns
\n", + "\n", + " | infected | \n", + "recovered | \n", + "deaths | \n", + "
---|---|---|---|
2020-01-22 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "
2020-01-23 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "
2020-01-24 | \n", + "2 | \n", + "0 | \n", + "0 | \n", + "
2020-01-25 | \n", + "2 | \n", + "0 | \n", + "0 | \n", + "
2020-01-26 | \n", + "5 | \n", + "0 | \n", + "0 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "
2021-08-25 | \n", + "38223029 | \n", + "0 | \n", + "632272 | \n", + "
2021-08-26 | \n", + "38384360 | \n", + "0 | \n", + "633564 | \n", + "
2021-08-27 | \n", + "38707294 | \n", + "0 | \n", + "636720 | \n", + "
2021-08-28 | \n", + "38760363 | \n", + "0 | \n", + "637254 | \n", + "
2021-08-29 | \n", + "38796746 | \n", + "0 | \n", + "637531 | \n", + "
586 rows × 3 columns
\n", + "\n", + " | UID | \n", + "iso2 | \n", + "iso3 | \n", + "code3 | \n", + "FIPS | \n", + "Admin2 | \n", + "Province_State | \n", + "Country_Region | \n", + "Lat | \n", + "Long_ | \n", + "Combined_Key | \n", + "Population | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "4 | \n", + "AF | \n", + "AFG | \n", + "4.0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "Afghanistan | \n", + "33.939110 | \n", + "67.709953 | \n", + "Afghanistan | \n", + "38928341.0 | \n", + "
1 | \n", + "8 | \n", + "AL | \n", + "ALB | \n", + "8.0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "Albania | \n", + "41.153300 | \n", + "20.168300 | \n", + "Albania | \n", + "2877800.0 | \n", + "
2 | \n", + "12 | \n", + "DZ | \n", + "DZA | \n", + "12.0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "Algeria | \n", + "28.033900 | \n", + "1.659600 | \n", + "Algeria | \n", + "43851043.0 | \n", + "
3 | \n", + "20 | \n", + "AD | \n", + "AND | \n", + "20.0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "Andorra | \n", + "42.506300 | \n", + "1.521800 | \n", + "Andorra | \n", + "77265.0 | \n", + "
4 | \n", + "24 | \n", + "AO | \n", + "AGO | \n", + "24.0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "Angola | \n", + "-11.202700 | \n", + "17.873900 | \n", + "Angola | \n", + "32866268.0 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
4191 | \n", + "84056037 | \n", + "US | \n", + "USA | \n", + "840.0 | \n", + "56037.0 | \n", + "Sweetwater | \n", + "Wyoming | \n", + "US | \n", + "41.659439 | \n", + "-108.882788 | \n", + "Sweetwater, Wyoming, US | \n", + "42343.0 | \n", + "
4192 | \n", + "84056039 | \n", + "US | \n", + "USA | \n", + "840.0 | \n", + "56039.0 | \n", + "Teton | \n", + "Wyoming | \n", + "US | \n", + "43.935225 | \n", + "-110.589080 | \n", + "Teton, Wyoming, US | \n", + "23464.0 | \n", + "
4193 | \n", + "84056041 | \n", + "US | \n", + "USA | \n", + "840.0 | \n", + "56041.0 | \n", + "Uinta | \n", + "Wyoming | \n", + "US | \n", + "41.287818 | \n", + "-110.547578 | \n", + "Uinta, Wyoming, US | \n", + "20226.0 | \n", + "
4194 | \n", + "84056043 | \n", + "US | \n", + "USA | \n", + "840.0 | \n", + "56043.0 | \n", + "Washakie | \n", + "Wyoming | \n", + "US | \n", + "43.904516 | \n", + "-107.680187 | \n", + "Washakie, Wyoming, US | \n", + "7805.0 | \n", + "
4195 | \n", + "84056045 | \n", + "US | \n", + "USA | \n", + "840.0 | \n", + "56045.0 | \n", + "Weston | \n", + "Wyoming | \n", + "US | \n", + "43.839612 | \n", + "-104.567488 | \n", + "Weston, Wyoming, US | \n", + "6927.0 | \n", + "
4196 rows × 12 columns
\n", + "\n", + " | UID | \n", + "iso2 | \n", + "iso3 | \n", + "code3 | \n", + "FIPS | \n", + "Admin2 | \n", + "Province_State | \n", + "Country_Region | \n", + "Lat | \n", + "Long_ | \n", + "Combined_Key | \n", + "Population | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|
790 | \n", + "840 | \n", + "US | \n", + "USA | \n", + "840.0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "US | \n", + "40.0 | \n", + "-100.0 | \n", + "US | \n", + "329466283.0 | \n", + "
\n", + " | cord_uid | \n", + "sha | \n", + "source_x | \n", + "title | \n", + "doi | \n", + "pmcid | \n", + "pubmed_id | \n", + "license | \n", + "abstract | \n", + "publish_time | \n", + "authors | \n", + "journal | \n", + "mag_id | \n", + "who_covidence_id | \n", + "arxiv_id | \n", + "pdf_json_files | \n", + "pmc_json_files | \n", + "url | \n", + "s2_id | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "ug7v899j | \n", + "d1aafb70c066a2068b02786f8929fd9c900897fb | \n", + "PMC | \n", + "Clinical features of culture-proven Mycoplasma... | \n", + "10.1186/1471-2334-1-6 | \n", + "PMC35282 | \n", + "11472636 | \n", + "no-cc | \n", + "OBJECTIVE: This retrospective chart review des... | \n", + "2001-07-04 | \n", + "Madani, Tariq A; Al-Ghamdi, Aisha A | \n", + "BMC Infect Dis | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "document_parses/pdf_json/d1aafb70c066a2068b027... | \n", + "document_parses/pmc_json/PMC35282.xml.json | \n", + "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3... | \n", + "NaN | \n", + "
1 | \n", + "02tnwd4m | \n", + "6b0567729c2143a66d737eb0a2f63f2dce2e5a7d | \n", + "PMC | \n", + "Nitric oxide: a pro-inflammatory mediator in l... | \n", + "10.1186/rr14 | \n", + "PMC59543 | \n", + "11667967 | \n", + "no-cc | \n", + "Inflammatory diseases of the respiratory tract... | \n", + "2000-08-15 | \n", + "Vliet, Albert van der; Eiserich, Jason P; Cros... | \n", + "Respir Res | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "document_parses/pdf_json/6b0567729c2143a66d737... | \n", + "document_parses/pmc_json/PMC59543.xml.json | \n", + "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5... | \n", + "NaN | \n", + "
2 | \n", + "ejv2xln0 | \n", + "06ced00a5fc04215949aa72528f2eeaae1d58927 | \n", + "PMC | \n", + "Surfactant protein-D and pulmonary host defense | \n", + "10.1186/rr19 | \n", + "PMC59549 | \n", + "11667972 | \n", + "no-cc | \n", + "Surfactant protein-D (SP-D) participates in th... | \n", + "2000-08-25 | \n", + "Crouch, Erika C | \n", + "Respir Res | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "document_parses/pdf_json/06ced00a5fc04215949aa... | \n", + "document_parses/pmc_json/PMC59549.xml.json | \n", + "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5... | \n", + "NaN | \n", + "
3 | \n", + "2b73a28n | \n", + "348055649b6b8cf2b9a376498df9bf41f7123605 | \n", + "PMC | \n", + "Role of endothelin-1 in lung disease | \n", + "10.1186/rr44 | \n", + "PMC59574 | \n", + "11686871 | \n", + "no-cc | \n", + "Endothelin-1 (ET-1) is a 21 amino acid peptide... | \n", + "2001-02-22 | \n", + "Fagan, Karen A; McMurtry, Ivan F; Rodman, David M | \n", + "Respir Res | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "document_parses/pdf_json/348055649b6b8cf2b9a37... | \n", + "document_parses/pmc_json/PMC59574.xml.json | \n", + "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5... | \n", + "NaN | \n", + "
4 | \n", + "9785vg6d | \n", + "5f48792a5fa08bed9f56016f4981ae2ca6031b32 | \n", + "PMC | \n", + "Gene expression in epithelial cells in respons... | \n", + "10.1186/rr61 | \n", + "PMC59580 | \n", + "11686888 | \n", + "no-cc | \n", + "Respiratory syncytial virus (RSV) and pneumoni... | \n", + "2001-05-11 | \n", + "Domachowske, Joseph B; Bonville, Cynthia A; Ro... | \n", + "Respir Res | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "document_parses/pdf_json/5f48792a5fa08bed9f560... | \n", + "document_parses/pmc_json/PMC59580.xml.json | \n", + "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5... | \n", + "NaN | \n", + "
\n", + " | Name | \n", + "Count | \n", + "
---|---|---|
0 | \n", + "hydroxychloroquine | \n", + "9806 | \n", + "
3 | \n", + "remdesivir | \n", + "7861 | \n", + "
2 | \n", + "tocilizumab | \n", + "6118 | \n", + "
1 | \n", + "chloroquine | \n", + "4578 | \n", + "
8 | \n", + "heparin | \n", + "4161 | \n", + "
5 | \n", + "lopinavir | \n", + "3811 | \n", + "
4 | \n", + "azithromycin | \n", + "3585 | \n", + "
7 | \n", + "dexamethasone | \n", + "3340 | \n", + "
9 | \n", + "favipiravir | \n", + "2439 | \n", + "
10 | \n", + "methylprednisolone | \n", + "1600 | \n", + "
6 | \n", + "ritonavir | \n", + "948 | \n", + "
\n", + " | \n", + " | hydroxychloroquine | \n", + "chloroquine | \n", + "tocilizumab | \n", + "remdesivir | \n", + "azithromycin | \n", + "lopinavir | \n", + "ritonavir | \n", + "dexamethasone | \n", + "heparin | \n", + "favipiravir | \n", + "methylprednisolone | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|
publish_time | \n", + "publish_time | \n", + "\n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " |
2020 | \n", + "1 | \n", + "3672 | \n", + "1773 | \n", + "1779 | \n", + "2134 | \n", + "1173 | \n", + "1430 | \n", + "370 | \n", + "561 | \n", + "984 | \n", + "666 | \n", + "331 | \n", + "
2 | \n", + "0 | \n", + "19 | \n", + "0 | \n", + "3 | \n", + "3 | \n", + "18 | \n", + "11 | \n", + "1 | \n", + "3 | \n", + "12 | \n", + "19 | \n", + "|
3 | \n", + "45 | \n", + "72 | \n", + "5 | \n", + "27 | \n", + "12 | \n", + "52 | \n", + "16 | \n", + "3 | \n", + "21 | \n", + "11 | \n", + "14 | \n", + "|
4 | \n", + "188 | \n", + "238 | \n", + "50 | \n", + "124 | \n", + "68 | \n", + "113 | \n", + "13 | \n", + "14 | \n", + "77 | \n", + "48 | \n", + "14 | \n", + "|
5 | \n", + "459 | \n", + "191 | \n", + "158 | \n", + "209 | \n", + "132 | \n", + "135 | \n", + "41 | \n", + "12 | \n", + "92 | \n", + "48 | \n", + "21 | \n", + "|
6 | \n", + "381 | \n", + "149 | \n", + "243 | \n", + "186 | \n", + "110 | \n", + "132 | \n", + "18 | \n", + "48 | \n", + "84 | \n", + "30 | \n", + "29 | \n", + "|
7 | \n", + "381 | \n", + "178 | \n", + "202 | \n", + "165 | \n", + "108 | \n", + "138 | \n", + "29 | \n", + "58 | \n", + "117 | \n", + "56 | \n", + "27 | \n", + "|
8 | \n", + "307 | \n", + "115 | \n", + "172 | \n", + "165 | \n", + "145 | \n", + "91 | \n", + "24 | \n", + "56 | \n", + "95 | \n", + "45 | \n", + "35 | \n", + "|
9 | \n", + "319 | \n", + "123 | \n", + "185 | \n", + "190 | \n", + "91 | \n", + "98 | \n", + "28 | \n", + "90 | \n", + "111 | \n", + "46 | \n", + "26 | \n", + "|
10 | \n", + "319 | \n", + "96 | \n", + "212 | \n", + "227 | \n", + "72 | \n", + "127 | \n", + "39 | \n", + "97 | \n", + "117 | \n", + "81 | \n", + "37 | \n", + "|
11 | \n", + "272 | \n", + "66 | \n", + "170 | \n", + "197 | \n", + "79 | \n", + "104 | \n", + "27 | \n", + "77 | \n", + "124 | \n", + "77 | \n", + "44 | \n", + "|
12 | \n", + "255 | \n", + "102 | \n", + "229 | \n", + "271 | \n", + "98 | \n", + "76 | \n", + "31 | \n", + "76 | \n", + "87 | \n", + "56 | \n", + "59 | \n", + "|
2021 | \n", + "1 | \n", + "2191 | \n", + "780 | \n", + "1787 | \n", + "2523 | \n", + "892 | \n", + "841 | \n", + "198 | \n", + "1208 | \n", + "1096 | \n", + "805 | \n", + "474 | \n", + "
2 | \n", + "163 | \n", + "66 | \n", + "184 | \n", + "173 | \n", + "85 | \n", + "76 | \n", + "9 | \n", + "86 | \n", + "61 | \n", + "52 | \n", + "63 | \n", + "|
3 | \n", + "172 | \n", + "85 | \n", + "190 | \n", + "295 | \n", + "87 | \n", + "100 | \n", + "17 | \n", + "150 | \n", + "82 | \n", + "85 | \n", + "36 | \n", + "|
4 | \n", + "198 | \n", + "70 | \n", + "125 | \n", + "161 | \n", + "83 | \n", + "60 | \n", + "13 | \n", + "130 | \n", + "144 | \n", + "60 | \n", + "37 | \n", + "|
5 | \n", + "141 | \n", + "55 | \n", + "138 | \n", + "179 | \n", + "69 | \n", + "55 | \n", + "21 | \n", + "108 | \n", + "141 | \n", + "106 | \n", + "44 | \n", + "|
6 | \n", + "144 | \n", + "29 | \n", + "138 | \n", + "182 | \n", + "75 | \n", + "41 | \n", + "12 | \n", + "128 | \n", + "116 | \n", + "66 | \n", + "42 | \n", + "|
7 | \n", + "112 | \n", + "49 | \n", + "96 | \n", + "270 | \n", + "64 | \n", + "59 | \n", + "5 | \n", + "169 | \n", + "106 | \n", + "44 | \n", + "50 | \n", + "
\n", + " | A | \n", + "B | \n", + "
---|---|---|
0 | \n", + "1 | \n", + "I | \n", + "
1 | \n", + "2 | \n", + "like | \n", + "
2 | \n", + "3 | \n", + "to | \n", + "
3 | \n", + "4 | \n", + "use | \n", + "
4 | \n", + "5 | \n", + "Python | \n", + "
5 | \n", + "6 | \n", + "and | \n", + "
6 | \n", + "7 | \n", + "Pandas | \n", + "
7 | \n", + "8 | \n", + "very | \n", + "
8 | \n", + "9 | \n", + "much | \n", + "
\n", + " | A | \n", + "B | \n", + "
---|---|---|
0 | \n", + "1 | \n", + "I | \n", + "
1 | \n", + "2 | \n", + "like | \n", + "
2 | \n", + "3 | \n", + "to | \n", + "
3 | \n", + "4 | \n", + "use | \n", + "
\n", + " | A | \n", + "B | \n", + "
---|---|---|
5 | \n", + "6 | \n", + "and | \n", + "
\n", + " | A | \n", + "B | \n", + "DivA | \n", + "
---|---|---|---|
0 | \n", + "1 | \n", + "I | \n", + "-4.0 | \n", + "
1 | \n", + "2 | \n", + "like | \n", + "-3.0 | \n", + "
2 | \n", + "3 | \n", + "to | \n", + "-2.0 | \n", + "
3 | \n", + "4 | \n", + "use | \n", + "-1.0 | \n", + "
4 | \n", + "5 | \n", + "Python | \n", + "0.0 | \n", + "
5 | \n", + "6 | \n", + "and | \n", + "1.0 | \n", + "
6 | \n", + "7 | \n", + "Pandas | \n", + "2.0 | \n", + "
7 | \n", + "8 | \n", + "very | \n", + "3.0 | \n", + "
8 | \n", + "9 | \n", + "much | \n", + "4.0 | \n", + "
\n", + " | A | \n", + "B | \n", + "DivA | \n", + "LenB | \n", + "
---|---|---|---|---|
0 | \n", + "1 | \n", + "I | \n", + "-4.0 | \n", + "1 | \n", + "
1 | \n", + "2 | \n", + "like | \n", + "-3.0 | \n", + "4 | \n", + "
2 | \n", + "3 | \n", + "to | \n", + "-2.0 | \n", + "2 | \n", + "
3 | \n", + "4 | \n", + "use | \n", + "-1.0 | \n", + "3 | \n", + "
4 | \n", + "5 | \n", + "Python | \n", + "0.0 | \n", + "6 | \n", + "
5 | \n", + "6 | \n", + "and | \n", + "1.0 | \n", + "3 | \n", + "
6 | \n", + "7 | \n", + "Pandas | \n", + "2.0 | \n", + "6 | \n", + "
7 | \n", + "8 | \n", + "very | \n", + "3.0 | \n", + "4 | \n", + "
8 | \n", + "9 | \n", + "much | \n", + "4.0 | \n", + "4 | \n", + "
\n", + " | A | \n", + "B | \n", + "DivA | \n", + "LenB | \n", + "
---|---|---|---|---|
0 | \n", + "1 | \n", + "I | \n", + "-4.0 | \n", + "1 | \n", + "
1 | \n", + "2 | \n", + "like | \n", + "-3.0 | \n", + "4 | \n", + "
2 | \n", + "3 | \n", + "to | \n", + "-2.0 | \n", + "2 | \n", + "
3 | \n", + "4 | \n", + "use | \n", + "-1.0 | \n", + "3 | \n", + "
4 | \n", + "5 | \n", + "Python | \n", + "0.0 | \n", + "6 | \n", + "
\n", + " | A | \n", + "DivA | \n", + "
---|---|---|
LenB | \n", + "\n", + " | \n", + " |
1 | \n", + "1.000000 | \n", + "-4.000000 | \n", + "
2 | \n", + "3.000000 | \n", + "-2.000000 | \n", + "
3 | \n", + "5.000000 | \n", + "0.000000 | \n", + "
4 | \n", + "6.333333 | \n", + "1.333333 | \n", + "
6 | \n", + "6.000000 | \n", + "1.000000 | \n", + "
\n", + " | Count | \n", + "Mean | \n", + "
---|---|---|
LenB | \n", + "\n", + " | \n", + " |
1 | \n", + "1 | \n", + "1.000000 | \n", + "
2 | \n", + "1 | \n", + "3.000000 | \n", + "
3 | \n", + "2 | \n", + "5.000000 | \n", + "
4 | \n", + "3 | \n", + "6.333333 | \n", + "
6 | \n", + "2 | \n", + "6.000000 | \n", + "
\n", + " | A | \n", + "B | \n", + "DivA | \n", + "LenB | \n", + "
---|---|---|---|---|
0 | \n", + "1 | \n", + "I | \n", + "-4.0 | \n", + "1 | \n", + "
1 | \n", + "2 | \n", + "like | \n", + "-3.0 | \n", + "4 | \n", + "
2 | \n", + "3 | \n", + "to | \n", + "-2.0 | \n", + "2 | \n", + "
3 | \n", + "4 | \n", + "use | \n", + "-1.0 | \n", + "3 | \n", + "
4 | \n", + "5 | \n", + "Python | \n", + "0.0 | \n", + "6 | \n", + "