|
|
|
|
@ -108,8 +108,54 @@
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"source": [],
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"# Basic information about the dataset\r\n"
|
|
|
|
|
print("Dataset shape:", df.shape)\r\n"
|
|
|
|
|
print("\nColumn names and types:")\r\n"
|
|
|
|
|
print(df.dtypes)\r\n"
|
|
|
|
|
"\r\n"
|
|
|
|
|
"# Convert datetime columns to proper datetime format\r\n"
|
|
|
|
|
|
|
|
|
|
"df['tpep_pickup_datetime'] = pd.to_datetime(df['tpep_pickup_datetime'])\r\n"
|
|
|
|
|
"df['tpep_dropoff_datetime'] = pd.to_datetime(df['tpep_dropoff_datetime'])\r\n"
|
|
|
|
|
|
|
|
|
|
"# Calculate trip duration in minutes\r\n"
|
|
|
|
|
"df['trip_duration_minutes'] = (df['tpep_dropoff_datetime'] - df['tpep_pickup_datetime']).dt.total_seconds() / 60\r\n"
|
|
|
|
|
"# Extract time-based features\r\n"
|
|
|
|
|
"df['pickup_hour'] = df['tpep_pickup_datetime'].dt.hour\r\n"
|
|
|
|
|
"df['pickup_day_of_week'] = df['tpep_pickup_datetime'].dt.dayofweek\r\n"
|
|
|
|
|
"df['pickup_month'] = df['tpep_pickup_datetime'].dt.month\r\n"
|
|
|
|
|
"#interesting statistics\r\n"
|
|
|
|
|
"print("\nAverage trip distance:", df['trip_distance'].mean())\r\n"
|
|
|
|
|
"print("Average fare amount:", df['fare_amount'].mean())\r\n"
|
|
|
|
|
"print("Average trip duration (minutes):", df['trip_duration_minutes'].mean())\r\n"
|
|
|
|
|
],
|
|
|
|
|
"outputs": [
|
|
|
|
|
"Dataset shape: (200, 18)\r\n
|
|
|
|
|
Column names and types:\r\n
|
|
|
|
|
VendorID float64\r\n
|
|
|
|
|
tpep_pickup_datetime object\r\n
|
|
|
|
|
tpep_dropoff_datetime object\r\n
|
|
|
|
|
passenger_count float64\r\n
|
|
|
|
|
trip_distance float64\r\n
|
|
|
|
|
RatecodeID float64\r\n
|
|
|
|
|
store_and_fwd_flag object\r\n
|
|
|
|
|
PULocationID int64\r\n
|
|
|
|
|
DOLocationID int64\r\n
|
|
|
|
|
payment_type float64\r\n
|
|
|
|
|
fare_amount float64\r\n
|
|
|
|
|
extra float64\r\n
|
|
|
|
|
mta_tax float64\r\n
|
|
|
|
|
tip_amount float64\r\n
|
|
|
|
|
tolls_amount float64\r\n
|
|
|
|
|
improvement_surcharge float64\r\n
|
|
|
|
|
total_amount float64\r\n
|
|
|
|
|
congestion_surcharge float64\r\n
|
|
|
|
|
dtype: object\r\n"
|
|
|
|
|
"Average trip distance: 2.9131500000000003\r\n
|
|
|
|
|
Average fare amount: 12.2675\r\n
|
|
|
|
|
Average trip duration (minutes): 13.548333333333334\r\n"
|
|
|
|
|
],
|
|
|
|
|
"metadata": {}
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
@ -138,4 +184,4 @@
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 2
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|