diff --git a/4-Data-Science-Lifecycle/15-analyzing/assignment.ipynb b/4-Data-Science-Lifecycle/15-analyzing/assignment.ipynb index d3a080df..328c217b 100644 --- a/4-Data-Science-Lifecycle/15-analyzing/assignment.ipynb +++ b/4-Data-Science-Lifecycle/15-analyzing/assignment.ipynb @@ -108,8 +108,54 @@ { "cell_type": "code", "execution_count": null, - "source": [], - "outputs": [], + "source": [ +"# Basic information about the dataset\r\n" +print("Dataset shape:", df.shape)\r\n" +print("\nColumn names and types:")\r\n" +print(df.dtypes)\r\n" +"\r\n" +"# Convert datetime columns to proper datetime format\r\n" + +"df['tpep_pickup_datetime'] = pd.to_datetime(df['tpep_pickup_datetime'])\r\n" +"df['tpep_dropoff_datetime'] = pd.to_datetime(df['tpep_dropoff_datetime'])\r\n" + +"# Calculate trip duration in minutes\r\n" +"df['trip_duration_minutes'] = (df['tpep_dropoff_datetime'] - df['tpep_pickup_datetime']).dt.total_seconds() / 60\r\n" +"# Extract time-based features\r\n" +"df['pickup_hour'] = df['tpep_pickup_datetime'].dt.hour\r\n" +"df['pickup_day_of_week'] = df['tpep_pickup_datetime'].dt.dayofweek\r\n" +"df['pickup_month'] = df['tpep_pickup_datetime'].dt.month\r\n" +"#interesting statistics\r\n" +"print("\nAverage trip distance:", df['trip_distance'].mean())\r\n" +"print("Average fare amount:", df['fare_amount'].mean())\r\n" +"print("Average trip duration (minutes):", df['trip_duration_minutes'].mean())\r\n" + ], + "outputs": [ +"Dataset shape: (200, 18)\r\n +Column names and types:\r\n +VendorID float64\r\n +tpep_pickup_datetime object\r\n +tpep_dropoff_datetime object\r\n +passenger_count float64\r\n +trip_distance float64\r\n +RatecodeID float64\r\n +store_and_fwd_flag object\r\n +PULocationID int64\r\n +DOLocationID int64\r\n +payment_type float64\r\n +fare_amount float64\r\n +extra float64\r\n +mta_tax float64\r\n +tip_amount float64\r\n +tolls_amount float64\r\n +improvement_surcharge float64\r\n +total_amount float64\r\n +congestion_surcharge float64\r\n +dtype: object\r\n" +"Average trip distance: 2.9131500000000003\r\n +Average fare amount: 12.2675\r\n +Average trip duration (minutes): 13.548333333333334\r\n" +], "metadata": {} } ], @@ -138,4 +184,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +}