Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 49 additions & 3 deletions 4-Data-Science-Lifecycle/15-analyzing/assignment.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,54 @@
{
"cell_type": "code",
"execution_count": null,
"source": [],
"outputs": [],
"source": [
"# Basic information about the dataset\r\n"
print("Dataset shape:", df.shape)\r\n"
print("\nColumn names and types:")\r\n"
print(df.dtypes)\r\n"
"\r\n"
"# Convert datetime columns to proper datetime format\r\n"

"df['tpep_pickup_datetime'] = pd.to_datetime(df['tpep_pickup_datetime'])\r\n"
"df['tpep_dropoff_datetime'] = pd.to_datetime(df['tpep_dropoff_datetime'])\r\n"

"# Calculate trip duration in minutes\r\n"
"df['trip_duration_minutes'] = (df['tpep_dropoff_datetime'] - df['tpep_pickup_datetime']).dt.total_seconds() / 60\r\n"
"# Extract time-based features\r\n"
"df['pickup_hour'] = df['tpep_pickup_datetime'].dt.hour\r\n"
"df['pickup_day_of_week'] = df['tpep_pickup_datetime'].dt.dayofweek\r\n"
"df['pickup_month'] = df['tpep_pickup_datetime'].dt.month\r\n"
"#interesting statistics\r\n"
"print("\nAverage trip distance:", df['trip_distance'].mean())\r\n"
"print("Average fare amount:", df['fare_amount'].mean())\r\n"
"print("Average trip duration (minutes):", df['trip_duration_minutes'].mean())\r\n"
],
"outputs": [
"Dataset shape: (200, 18)\r\n
Column names and types:\r\n
VendorID float64\r\n
tpep_pickup_datetime object\r\n
tpep_dropoff_datetime object\r\n
passenger_count float64\r\n
trip_distance float64\r\n
RatecodeID float64\r\n
store_and_fwd_flag object\r\n
PULocationID int64\r\n
DOLocationID int64\r\n
payment_type float64\r\n
fare_amount float64\r\n
extra float64\r\n
mta_tax float64\r\n
tip_amount float64\r\n
tolls_amount float64\r\n
improvement_surcharge float64\r\n
total_amount float64\r\n
congestion_surcharge float64\r\n
dtype: object\r\n"
"Average trip distance: 2.9131500000000003\r\n
Average fare amount: 12.2675\r\n
Average trip duration (minutes): 13.548333333333334\r\n"
],
"metadata": {}
}
],
Expand Down Expand Up @@ -138,4 +184,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}