You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Data-Science-For-Beginners/4-Data-Science-Lifecycle/14-Introduction/notebook.ipynb

103 lines
3.4 KiB

{
"cells": [
{
"cell_type": "markdown",
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\r\n",
"\r\n",
"Licensed under the MIT License."
],
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"# Exploring NYC Taxi data in Winter and Summer\r\n",
"\r\n",
"Refer to the [Data dictionary](https://www1.nyc.gov/assets/tlc/downloads/pdf/data_dictionary_trip_records_yellow.pdf) to explore the columns that have been provided.\r\n"
],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"!pip install pandas"
],
"outputs": [],
"metadata": {
"scrolled": true
}
},
{
"cell_type": "code",
"execution_count": 2,
"source": [
"import pandas as pd\r\n",
"import glob\r\n",
"\r\n",
"path = '../../data/Taxi/yellow_tripdata_2019-{}.csv'\r\n",
"july_taxi = pd.read_csv(path.format('07'))\r\n",
"january_taxi = pd.read_csv(path.format('01'))\r\n",
"\r\n",
"df = pd.concat([january_taxi, july_taxi])\r\n",
"\r\n",
"# df.describe()\r\n",
"print(df.describe())"
],
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"c:\\Users\\jasmineg\\projects\\Data-Science-For-Beginners\\venv\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3441: DtypeWarning: Columns (6) have mixed types.Specify dtype option on import or set low_memory=False.\n",
" exec(code_obj, self.user_global_ns, self.user_ns)\n"
]
},
{
"output_type": "stream",
"name": "stdout",
"text": [
" VendorID passenger_count ... total_amount congestion_surcharge\n",
"count 1.394425e+07 1.394425e+07 ... 1.397821e+07 9.122233e+06\n",
"mean 1.641553e+00 1.569314e+00 ... 1.745644e+01 1.563450e+00\n",
"std 5.172343e-01 1.219889e+00 ... 1.945658e+02 1.215658e+00\n",
"min 1.000000e+00 0.000000e+00 ... -4.508000e+02 -2.500000e+00\n",
"25% 1.000000e+00 1.000000e+00 ... 9.360000e+00 0.000000e+00\n",
"50% 2.000000e+00 1.000000e+00 ... 1.295000e+01 2.500000e+00\n",
"75% 2.000000e+00 2.000000e+00 ... 1.880000e+01 2.500000e+00\n",
"max 4.000000e+00 9.000000e+00 ... 6.232617e+05 2.750000e+00\n",
"\n",
"[8 rows x 15 columns]\n"
]
}
],
"metadata": {}
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3.9.7 64-bit ('venv': venv)"
},
"language_info": {
"mimetype": "text/x-python",
"name": "python",
"pygments_lexer": "ipython3",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"version": "3.9.7",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"name": "04-nyc-taxi-join-weather-in-pandas",
"notebookId": 1709144033725344,
"interpreter": {
"hash": "6b9b57232c4b57163d057191678da2030059e733b8becc68f245de5a75abe84e"
}
},
"nbformat": 4,
"nbformat_minor": 2
}