|
|
|
@ -9,7 +9,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 36,
|
|
|
|
|
"execution_count": 1,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
@ -1741,7 +1741,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 32,
|
|
|
|
|
"execution_count": 25,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
@ -1756,7 +1756,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 29,
|
|
|
|
|
"execution_count": 26,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
@ -1860,7 +1860,7 @@
|
|
|
|
|
"4 NaN 1.960745 06-08 "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 29,
|
|
|
|
|
"execution_count": 26,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
@ -1872,7 +1872,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 33,
|
|
|
|
|
"execution_count": 27,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
@ -2006,7 +2006,7 @@
|
|
|
|
|
" 4 -0.230986 "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 33,
|
|
|
|
|
"execution_count": 27,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
@ -2025,7 +2025,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 34,
|
|
|
|
|
"execution_count": 28,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
@ -2037,7 +2037,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 35,
|
|
|
|
|
"execution_count": 29,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
@ -2141,7 +2141,7 @@
|
|
|
|
|
"4 NaN 1.960745 -0.230986 "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 35,
|
|
|
|
|
"execution_count": 29,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
@ -2152,7 +2152,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 38,
|
|
|
|
|
"execution_count": 30,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
@ -2171,7 +2171,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 40,
|
|
|
|
|
"execution_count": 31,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
@ -2204,7 +2204,7 @@
|
|
|
|
|
" <th>minute_trend</th>\n",
|
|
|
|
|
" <th>length</th>\n",
|
|
|
|
|
" <th>width</th>\n",
|
|
|
|
|
" <th>link_num</th>\n",
|
|
|
|
|
" <th>links_num</th>\n",
|
|
|
|
|
" <th>area</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
@ -2291,30 +2291,30 @@
|
|
|
|
|
"3 3377906280028510514 2017-03-01 06:06:00 2017-03-01 NaN \n",
|
|
|
|
|
"4 3377906280028510514 2017-03-01 06:08:00 2017-03-01 NaN \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" travel_time2 date_trend minute_trend length width link_num area \n",
|
|
|
|
|
"0 NaN 1.960745 -0.252121 48 3 2 144 \n",
|
|
|
|
|
"1 NaN 1.960745 -0.246743 48 3 2 144 \n",
|
|
|
|
|
"2 NaN 1.960745 -0.241428 48 3 2 144 \n",
|
|
|
|
|
"3 NaN 1.960745 -0.236176 48 3 2 144 \n",
|
|
|
|
|
"4 NaN 1.960745 -0.230986 48 3 2 144 "
|
|
|
|
|
" travel_time2 date_trend minute_trend length width links_num area \n",
|
|
|
|
|
"0 NaN 1.960745 -0.252121 48 3 2 144 \n",
|
|
|
|
|
"1 NaN 1.960745 -0.246743 48 3 2 144 \n",
|
|
|
|
|
"2 NaN 1.960745 -0.241428 48 3 2 144 \n",
|
|
|
|
|
"3 NaN 1.960745 -0.236176 48 3 2 144 \n",
|
|
|
|
|
"4 NaN 1.960745 -0.230986 48 3 2 144 "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 40,
|
|
|
|
|
"execution_count": 31,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"link_infos = pd.merge(link_infos, link_tops, on=['link_ID'], how='left')\n",
|
|
|
|
|
"link_infos['link_num'] = link_infos['in_links']+link_infos['out_links']\n",
|
|
|
|
|
"link_infos['links_num'] = link_infos['in_links']+link_infos['out_links']\n",
|
|
|
|
|
"link_infos['area'] = link_infos['length'] * link_infos['width']\n",
|
|
|
|
|
"df = pd.merge(df, link_infos[['link_ID','length','width', 'link_num', 'area']], on=['link_ID'], how='left')\n",
|
|
|
|
|
"df = pd.merge(df, link_infos[['link_ID','length','width', 'links_num', 'area']], on=['link_ID'], how='left')\n",
|
|
|
|
|
"df.head()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 41,
|
|
|
|
|
"execution_count": 32,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
@ -2347,14 +2347,14 @@
|
|
|
|
|
" <th>minute_trend</th>\n",
|
|
|
|
|
" <th>length</th>\n",
|
|
|
|
|
" <th>width</th>\n",
|
|
|
|
|
" <th>link_num</th>\n",
|
|
|
|
|
" <th>links_num</th>\n",
|
|
|
|
|
" <th>area</th>\n",
|
|
|
|
|
" <th>vacation</th>\n",
|
|
|
|
|
" <th>minute</th>\n",
|
|
|
|
|
" <th>hour</th>\n",
|
|
|
|
|
" <th>day</th>\n",
|
|
|
|
|
" <th>week_day</th>\n",
|
|
|
|
|
" <th>mouth</th>\n",
|
|
|
|
|
" <th>month</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
@ -2470,14 +2470,14 @@
|
|
|
|
|
"3 3377906280028510514 2017-03-01 06:06:00 2017-03-01 NaN \n",
|
|
|
|
|
"4 3377906280028510514 2017-03-01 06:08:00 2017-03-01 NaN \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" travel_time2 date_trend minute_trend length width link_num area \\\n",
|
|
|
|
|
"0 NaN 1.960745 -0.252121 48 3 2 144 \n",
|
|
|
|
|
"1 NaN 1.960745 -0.246743 48 3 2 144 \n",
|
|
|
|
|
"2 NaN 1.960745 -0.241428 48 3 2 144 \n",
|
|
|
|
|
"3 NaN 1.960745 -0.236176 48 3 2 144 \n",
|
|
|
|
|
"4 NaN 1.960745 -0.230986 48 3 2 144 \n",
|
|
|
|
|
" travel_time2 date_trend minute_trend length width links_num area \\\n",
|
|
|
|
|
"0 NaN 1.960745 -0.252121 48 3 2 144 \n",
|
|
|
|
|
"1 NaN 1.960745 -0.246743 48 3 2 144 \n",
|
|
|
|
|
"2 NaN 1.960745 -0.241428 48 3 2 144 \n",
|
|
|
|
|
"3 NaN 1.960745 -0.236176 48 3 2 144 \n",
|
|
|
|
|
"4 NaN 1.960745 -0.230986 48 3 2 144 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" vacation minute hour day week_day mouth \n",
|
|
|
|
|
" vacation minute hour day week_day month \n",
|
|
|
|
|
"0 0.0 0 6 1 3 3 \n",
|
|
|
|
|
"1 0.0 2 6 1 3 3 \n",
|
|
|
|
|
"2 0.0 4 6 1 3 3 \n",
|
|
|
|
@ -2485,7 +2485,7 @@
|
|
|
|
|
"4 0.0 8 6 1 3 3 "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 41,
|
|
|
|
|
"execution_count": 32,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
@ -2502,13 +2502,13 @@
|
|
|
|
|
"df['hour'] = df['time_interval_begin'].dt.hour\n",
|
|
|
|
|
"df['day'] = df['time_interval_begin'].dt.day\n",
|
|
|
|
|
"df['week_day'] = df['time_interval_begin'].map(lambda x: x.weekday() + 1)\n",
|
|
|
|
|
"df['mouth'] = df['time_interval_begin'].dt.month\n",
|
|
|
|
|
"df['month'] = df['time_interval_begin'].dt.month\n",
|
|
|
|
|
"df.head()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 42,
|
|
|
|
|
"execution_count": 33,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
@ -2519,7 +2519,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 43,
|
|
|
|
|
"execution_count": 34,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
@ -2552,14 +2552,14 @@
|
|
|
|
|
" <th>minute_trend</th>\n",
|
|
|
|
|
" <th>length</th>\n",
|
|
|
|
|
" <th>width</th>\n",
|
|
|
|
|
" <th>link_num</th>\n",
|
|
|
|
|
" <th>links_num</th>\n",
|
|
|
|
|
" <th>area</th>\n",
|
|
|
|
|
" <th>vacation</th>\n",
|
|
|
|
|
" <th>minute</th>\n",
|
|
|
|
|
" <th>hour</th>\n",
|
|
|
|
|
" <th>day</th>\n",
|
|
|
|
|
" <th>week_day</th>\n",
|
|
|
|
|
" <th>mouth</th>\n",
|
|
|
|
|
" <th>month</th>\n",
|
|
|
|
|
" <th>link_ID_en</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
@ -2681,14 +2681,14 @@
|
|
|
|
|
"3 3377906280028510514 2017-03-01 06:06:00 2017-03-01 NaN \n",
|
|
|
|
|
"4 3377906280028510514 2017-03-01 06:08:00 2017-03-01 NaN \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" travel_time2 date_trend minute_trend length width link_num area \\\n",
|
|
|
|
|
"0 NaN 1.960745 -0.252121 48 3 2 144 \n",
|
|
|
|
|
"1 NaN 1.960745 -0.246743 48 3 2 144 \n",
|
|
|
|
|
"2 NaN 1.960745 -0.241428 48 3 2 144 \n",
|
|
|
|
|
"3 NaN 1.960745 -0.236176 48 3 2 144 \n",
|
|
|
|
|
"4 NaN 1.960745 -0.230986 48 3 2 144 \n",
|
|
|
|
|
" travel_time2 date_trend minute_trend length width links_num area \\\n",
|
|
|
|
|
"0 NaN 1.960745 -0.252121 48 3 2 144 \n",
|
|
|
|
|
"1 NaN 1.960745 -0.246743 48 3 2 144 \n",
|
|
|
|
|
"2 NaN 1.960745 -0.241428 48 3 2 144 \n",
|
|
|
|
|
"3 NaN 1.960745 -0.236176 48 3 2 144 \n",
|
|
|
|
|
"4 NaN 1.960745 -0.230986 48 3 2 144 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" vacation minute hour day week_day mouth link_ID_en \n",
|
|
|
|
|
" vacation minute hour day week_day month link_ID_en \n",
|
|
|
|
|
"0 0.0 0 6 1 3 3 0.000138 \n",
|
|
|
|
|
"1 0.0 2 6 1 3 3 0.000138 \n",
|
|
|
|
|
"2 0.0 4 6 1 3 3 0.000138 \n",
|
|
|
|
@ -2696,7 +2696,7 @@
|
|
|
|
|
"4 0.0 8 6 1 3 3 0.000138 "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 43,
|
|
|
|
|
"execution_count": 34,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
@ -2708,7 +2708,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 44,
|
|
|
|
|
"execution_count": 35,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
@ -2741,14 +2741,14 @@
|
|
|
|
|
" <th>minute_trend</th>\n",
|
|
|
|
|
" <th>length</th>\n",
|
|
|
|
|
" <th>width</th>\n",
|
|
|
|
|
" <th>link_num</th>\n",
|
|
|
|
|
" <th>links_num</th>\n",
|
|
|
|
|
" <th>area</th>\n",
|
|
|
|
|
" <th>vacation</th>\n",
|
|
|
|
|
" <th>minute</th>\n",
|
|
|
|
|
" <th>hour</th>\n",
|
|
|
|
|
" <th>day</th>\n",
|
|
|
|
|
" <th>week_day</th>\n",
|
|
|
|
|
" <th>mouth</th>\n",
|
|
|
|
|
" <th>month</th>\n",
|
|
|
|
|
" <th>link_ID_en</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
@ -2870,14 +2870,14 @@
|
|
|
|
|
"3 3377906280028510514 2017-03-01 06:06:00 2017-03-01 NaN \n",
|
|
|
|
|
"4 3377906280028510514 2017-03-01 06:08:00 2017-03-01 NaN \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" travel_time2 date_trend minute_trend length width link_num area \\\n",
|
|
|
|
|
"0 NaN 1.960745 -0.252121 48 3 2 144 \n",
|
|
|
|
|
"1 NaN 1.960745 -0.246743 48 3 2 144 \n",
|
|
|
|
|
"2 NaN 1.960745 -0.241428 48 3 2 144 \n",
|
|
|
|
|
"3 NaN 1.960745 -0.236176 48 3 2 144 \n",
|
|
|
|
|
"4 NaN 1.960745 -0.230986 48 3 2 144 \n",
|
|
|
|
|
" travel_time2 date_trend minute_trend length width links_num area \\\n",
|
|
|
|
|
"0 NaN 1.960745 -0.252121 48 3 2 144 \n",
|
|
|
|
|
"1 NaN 1.960745 -0.246743 48 3 2 144 \n",
|
|
|
|
|
"2 NaN 1.960745 -0.241428 48 3 2 144 \n",
|
|
|
|
|
"3 NaN 1.960745 -0.236176 48 3 2 144 \n",
|
|
|
|
|
"4 NaN 1.960745 -0.230986 48 3 2 144 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" vacation minute hour day week_day mouth link_ID_en \n",
|
|
|
|
|
" vacation minute hour day week_day month link_ID_en \n",
|
|
|
|
|
"0 0.0 0 6 1 3 3 75 \n",
|
|
|
|
|
"1 0.0 2 6 1 3 3 75 \n",
|
|
|
|
|
"2 0.0 4 6 1 3 3 75 \n",
|
|
|
|
@ -2885,7 +2885,7 @@
|
|
|
|
|
"4 0.0 8 6 1 3 3 75 "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 44,
|
|
|
|
|
"execution_count": 35,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
@ -2897,12 +2897,551 @@
|
|
|
|
|
"df.head()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"标准化"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 36,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"def std(group):\n",
|
|
|
|
|
" group['travel_time_std'] = np.std(group['travel_time'])\n",
|
|
|
|
|
" return group"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 37,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>link_ID</th>\n",
|
|
|
|
|
" <th>time_interval_begin</th>\n",
|
|
|
|
|
" <th>date</th>\n",
|
|
|
|
|
" <th>travel_time</th>\n",
|
|
|
|
|
" <th>travel_time2</th>\n",
|
|
|
|
|
" <th>date_trend</th>\n",
|
|
|
|
|
" <th>minute_trend</th>\n",
|
|
|
|
|
" <th>length</th>\n",
|
|
|
|
|
" <th>width</th>\n",
|
|
|
|
|
" <th>links_num</th>\n",
|
|
|
|
|
" <th>area</th>\n",
|
|
|
|
|
" <th>vacation</th>\n",
|
|
|
|
|
" <th>minute</th>\n",
|
|
|
|
|
" <th>hour</th>\n",
|
|
|
|
|
" <th>day</th>\n",
|
|
|
|
|
" <th>week_day</th>\n",
|
|
|
|
|
" <th>month</th>\n",
|
|
|
|
|
" <th>link_ID_en</th>\n",
|
|
|
|
|
" <th>travel_time_std</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>3377906280028510514</td>\n",
|
|
|
|
|
" <td>2017-03-01 06:00:00</td>\n",
|
|
|
|
|
" <td>2017-03-01</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>1.960745</td>\n",
|
|
|
|
|
" <td>-0.252121</td>\n",
|
|
|
|
|
" <td>48</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>144</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>6</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>75</td>\n",
|
|
|
|
|
" <td>0.223232</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <td>3377906280028510514</td>\n",
|
|
|
|
|
" <td>2017-03-01 06:02:00</td>\n",
|
|
|
|
|
" <td>2017-03-01</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>1.960745</td>\n",
|
|
|
|
|
" <td>-0.246743</td>\n",
|
|
|
|
|
" <td>48</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>144</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>6</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>75</td>\n",
|
|
|
|
|
" <td>0.223232</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <td>3377906280028510514</td>\n",
|
|
|
|
|
" <td>2017-03-01 06:04:00</td>\n",
|
|
|
|
|
" <td>2017-03-01</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>1.960745</td>\n",
|
|
|
|
|
" <td>-0.241428</td>\n",
|
|
|
|
|
" <td>48</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>144</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>4</td>\n",
|
|
|
|
|
" <td>6</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>75</td>\n",
|
|
|
|
|
" <td>0.223232</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>3377906280028510514</td>\n",
|
|
|
|
|
" <td>2017-03-01 06:06:00</td>\n",
|
|
|
|
|
" <td>2017-03-01</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>1.960745</td>\n",
|
|
|
|
|
" <td>-0.236176</td>\n",
|
|
|
|
|
" <td>48</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>144</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>6</td>\n",
|
|
|
|
|
" <td>6</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>75</td>\n",
|
|
|
|
|
" <td>0.223232</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>3377906280028510514</td>\n",
|
|
|
|
|
" <td>2017-03-01 06:08:00</td>\n",
|
|
|
|
|
" <td>2017-03-01</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>1.960745</td>\n",
|
|
|
|
|
" <td>-0.230986</td>\n",
|
|
|
|
|
" <td>48</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>144</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>8</td>\n",
|
|
|
|
|
" <td>6</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>75</td>\n",
|
|
|
|
|
" <td>0.223232</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" link_ID time_interval_begin date travel_time \\\n",
|
|
|
|
|
"0 3377906280028510514 2017-03-01 06:00:00 2017-03-01 NaN \n",
|
|
|
|
|
"1 3377906280028510514 2017-03-01 06:02:00 2017-03-01 NaN \n",
|
|
|
|
|
"2 3377906280028510514 2017-03-01 06:04:00 2017-03-01 NaN \n",
|
|
|
|
|
"3 3377906280028510514 2017-03-01 06:06:00 2017-03-01 NaN \n",
|
|
|
|
|
"4 3377906280028510514 2017-03-01 06:08:00 2017-03-01 NaN \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" travel_time2 date_trend minute_trend length width links_num area \\\n",
|
|
|
|
|
"0 NaN 1.960745 -0.252121 48 3 2 144 \n",
|
|
|
|
|
"1 NaN 1.960745 -0.246743 48 3 2 144 \n",
|
|
|
|
|
"2 NaN 1.960745 -0.241428 48 3 2 144 \n",
|
|
|
|
|
"3 NaN 1.960745 -0.236176 48 3 2 144 \n",
|
|
|
|
|
"4 NaN 1.960745 -0.230986 48 3 2 144 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" vacation minute hour day week_day month link_ID_en travel_time_std \n",
|
|
|
|
|
"0 0.0 0 6 1 3 3 75 0.223232 \n",
|
|
|
|
|
"1 0.0 2 6 1 3 3 75 0.223232 \n",
|
|
|
|
|
"2 0.0 4 6 1 3 3 75 0.223232 \n",
|
|
|
|
|
"3 0.0 6 6 1 3 3 75 0.223232 \n",
|
|
|
|
|
"4 0.0 8 6 1 3 3 75 0.223232 "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 37,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"df = df.groupby('link_ID').apply(std)\n",
|
|
|
|
|
"df['travel_time'] = df['travel_time'] / df['travel_time_std']\n",
|
|
|
|
|
"df.head()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"缺失时间预测"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 38,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"params = {\n",
|
|
|
|
|
" 'learning_rate':0.2,\n",
|
|
|
|
|
" 'n_estimators':30,\n",
|
|
|
|
|
" 'subsample':0.8,\n",
|
|
|
|
|
" 'colsample_bytree':0.6,\n",
|
|
|
|
|
" 'max_depth':10,\n",
|
|
|
|
|
" 'min_child_weight':1,\n",
|
|
|
|
|
" 'reg_alpha':0,\n",
|
|
|
|
|
" 'gamma':0\n",
|
|
|
|
|
"}"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 39,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>link_ID</th>\n",
|
|
|
|
|
" <th>time_interval_begin</th>\n",
|
|
|
|
|
" <th>date</th>\n",
|
|
|
|
|
" <th>travel_time</th>\n",
|
|
|
|
|
" <th>travel_time2</th>\n",
|
|
|
|
|
" <th>date_trend</th>\n",
|
|
|
|
|
" <th>minute_trend</th>\n",
|
|
|
|
|
" <th>length</th>\n",
|
|
|
|
|
" <th>area</th>\n",
|
|
|
|
|
" <th>vacation</th>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <th>day_27</th>\n",
|
|
|
|
|
" <th>day_28</th>\n",
|
|
|
|
|
" <th>day_29</th>\n",
|
|
|
|
|
" <th>day_30</th>\n",
|
|
|
|
|
" <th>day_31</th>\n",
|
|
|
|
|
" <th>month_3</th>\n",
|
|
|
|
|
" <th>month_4</th>\n",
|
|
|
|
|
" <th>month_5</th>\n",
|
|
|
|
|
" <th>month_6</th>\n",
|
|
|
|
|
" <th>month_7</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>3377906280028510514</td>\n",
|
|
|
|
|
" <td>2017-03-01 06:00:00</td>\n",
|
|
|
|
|
" <td>2017-03-01</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>1.960745</td>\n",
|
|
|
|
|
" <td>-0.252121</td>\n",
|
|
|
|
|
" <td>48</td>\n",
|
|
|
|
|
" <td>144</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <td>3377906280028510514</td>\n",
|
|
|
|
|
" <td>2017-03-01 06:02:00</td>\n",
|
|
|
|
|
" <td>2017-03-01</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>1.960745</td>\n",
|
|
|
|
|
" <td>-0.246743</td>\n",
|
|
|
|
|
" <td>48</td>\n",
|
|
|
|
|
" <td>144</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <td>3377906280028510514</td>\n",
|
|
|
|
|
" <td>2017-03-01 06:04:00</td>\n",
|
|
|
|
|
" <td>2017-03-01</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>1.960745</td>\n",
|
|
|
|
|
" <td>-0.241428</td>\n",
|
|
|
|
|
" <td>48</td>\n",
|
|
|
|
|
" <td>144</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>3377906280028510514</td>\n",
|
|
|
|
|
" <td>2017-03-01 06:06:00</td>\n",
|
|
|
|
|
" <td>2017-03-01</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>1.960745</td>\n",
|
|
|
|
|
" <td>-0.236176</td>\n",
|
|
|
|
|
" <td>48</td>\n",
|
|
|
|
|
" <td>144</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>3377906280028510514</td>\n",
|
|
|
|
|
" <td>2017-03-01 06:08:00</td>\n",
|
|
|
|
|
" <td>2017-03-01</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>1.960745</td>\n",
|
|
|
|
|
" <td>-0.230986</td>\n",
|
|
|
|
|
" <td>48</td>\n",
|
|
|
|
|
" <td>144</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"<p>5 rows × 103 columns</p>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" link_ID time_interval_begin date travel_time \\\n",
|
|
|
|
|
"0 3377906280028510514 2017-03-01 06:00:00 2017-03-01 NaN \n",
|
|
|
|
|
"1 3377906280028510514 2017-03-01 06:02:00 2017-03-01 NaN \n",
|
|
|
|
|
"2 3377906280028510514 2017-03-01 06:04:00 2017-03-01 NaN \n",
|
|
|
|
|
"3 3377906280028510514 2017-03-01 06:06:00 2017-03-01 NaN \n",
|
|
|
|
|
"4 3377906280028510514 2017-03-01 06:08:00 2017-03-01 NaN \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" travel_time2 date_trend minute_trend length area vacation ... \\\n",
|
|
|
|
|
"0 NaN 1.960745 -0.252121 48 144 0.0 ... \n",
|
|
|
|
|
"1 NaN 1.960745 -0.246743 48 144 0.0 ... \n",
|
|
|
|
|
"2 NaN 1.960745 -0.241428 48 144 0.0 ... \n",
|
|
|
|
|
"3 NaN 1.960745 -0.236176 48 144 0.0 ... \n",
|
|
|
|
|
"4 NaN 1.960745 -0.230986 48 144 0.0 ... \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" day_27 day_28 day_29 day_30 day_31 month_3 month_4 month_5 month_6 \\\n",
|
|
|
|
|
"0 0 0 0 0 0 1 0 0 0 \n",
|
|
|
|
|
"1 0 0 0 0 0 1 0 0 0 \n",
|
|
|
|
|
"2 0 0 0 0 0 1 0 0 0 \n",
|
|
|
|
|
"3 0 0 0 0 0 1 0 0 0 \n",
|
|
|
|
|
"4 0 0 0 0 0 1 0 0 0 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" month_7 \n",
|
|
|
|
|
"0 0 \n",
|
|
|
|
|
"1 0 \n",
|
|
|
|
|
"2 0 \n",
|
|
|
|
|
"3 0 \n",
|
|
|
|
|
"4 0 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
"[5 rows x 103 columns]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 39,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"df = pd.get_dummies(df, columns=['links_num','width','minute','hour',\n",
|
|
|
|
|
" 'week_day','day','month'])\n",
|
|
|
|
|
"df.head()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"训练的数据train_df为travel_time非空的数据,而数据集test_df为travel_time空的数据"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 40,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"['length', 'area', 'vacation', 'link_ID_en', 'links_num_2', 'links_num_3', 'links_num_4', 'links_num_5', 'width_3', 'width_6', 'width_9', 'width_12', 'width_15', 'minute_0', 'minute_2', 'minute_4', 'minute_6', 'minute_8', 'minute_10', 'minute_12', 'minute_14', 'minute_16', 'minute_18', 'minute_20', 'minute_22', 'minute_24', 'minute_26', 'minute_28', 'minute_30', 'minute_32', 'minute_34', 'minute_36', 'minute_38', 'minute_40', 'minute_42', 'minute_44', 'minute_46', 'minute_48', 'minute_50', 'minute_52', 'minute_54', 'minute_56', 'minute_58', 'hour_6', 'hour_7', 'hour_8', 'hour_13', 'hour_14', 'hour_15', 'hour_16', 'hour_17', 'hour_18', 'week_day_1', 'week_day_2', 'week_day_3', 'week_day_4', 'week_day_5', 'week_day_6', 'week_day_7', 'day_1', 'day_2', 'day_3', 'day_4', 'day_5', 'day_6', 'day_7', 'day_8', 'day_9', 'day_10', 'day_11', 'day_12', 'day_13', 'day_14', 'day_15', 'day_16', 'day_17', 'day_18', 'day_19', 'day_20', 'day_21', 'day_22', 'day_23', 'day_24', 'day_25', 'day_26', 'day_27', 'day_28', 'day_29', 'day_30', 'day_31', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7']\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"feature = df.columns.values.tolist()\n",
|
|
|
|
|
"train_feature = [x for x in feature if \n",
|
|
|
|
|
" x not in ['link_ID', 'time_interval_begin', 'travel_time', 'date',\n",
|
|
|
|
|
" 'travel_time2', 'minute_trend', 'travel_time_std', 'date_trend']]\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"train_df = df.loc[~df['travel_time'].isnull()] # 获取非空的值,~是非空意思\n",
|
|
|
|
|
"test_df = df.loc[df['travel_time2'].isnull()].copy()\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(train_feature)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 41,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"(3165426, 103)\n",
|
|
|
|
|
"(1883574, 103)\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"print(train_df.shape)\n",
|
|
|
|
|
"print(test_df.shape)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"训练数据切分"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
|
|
|
"X = train_df[train_feature].values\n",
|
|
|
|
|
"y = train_df['travel_time'].values\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"eval_set = [(X_test, y_test)]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"训练回归模型来预测缺失值"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
"source": [
|
|
|
|
|
"regressor = xgb.XGBRegressor()"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|