diff --git a/机器学习竞赛实战_优胜解决方案/智慧城市-道路通行时间预测/.ipynb_checkpoints/道路通行时间预测-checkpoint.ipynb b/机器学习竞赛实战_优胜解决方案/智慧城市-道路通行时间预测/.ipynb_checkpoints/道路通行时间预测-checkpoint.ipynb
index af3985e..213c66e 100644
--- a/机器学习竞赛实战_优胜解决方案/智慧城市-道路通行时间预测/.ipynb_checkpoints/道路通行时间预测-checkpoint.ipynb
+++ b/机器学习竞赛实战_优胜解决方案/智慧城市-道路通行时间预测/.ipynb_checkpoints/道路通行时间预测-checkpoint.ipynb
@@ -9,7 +9,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
@@ -18,7 +18,7 @@
"from scipy.interpolate import UnivariateSpline\n",
"from sklearn import linear_model\n",
"import xgboost as xgb\n",
- "# from ultis import *"
+ "from sklearn.utils import *"
]
},
{
@@ -30,7 +30,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 2,
"metadata": {},
"outputs": [
{
@@ -116,7 +116,7 @@
"4 10.4 "
]
},
- "execution_count": 6,
+ "execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
@@ -144,7 +144,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 3,
"metadata": {},
"outputs": [
{
@@ -223,7 +223,7 @@
"4 4377906284422600514 55 12 1"
]
},
- "execution_count": 8,
+ "execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -249,7 +249,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
@@ -322,7 +322,7 @@
"4 4377906284422600514 2 1"
]
},
- "execution_count": 11,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@@ -339,7 +339,8 @@
"### 任务:预测未来一个月平均通行结果,每两分钟一次\n",
"回归任务\n",
"\n",
- "构建时间序列,基于前几天或者前几十天的数据预测"
+ "构建时间序列,基于前几天或者前几十天的数据预测\n",
+ "https://tianchi.aliyun.com/competition/entrance/231598/information"
]
},
{
@@ -352,7 +353,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
@@ -438,7 +439,7 @@
"4 10.4 "
]
},
- "execution_count": 12,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -449,7 +450,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -541,7 +542,7 @@
"4 10.4 2017-05-06 10:52:00 "
]
},
- "execution_count": 13,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -563,7 +564,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@@ -580,7 +581,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
@@ -594,7 +595,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
@@ -673,7 +674,7 @@
"4 3377906287934510514 2017-05-06 2.251292 2017-05-06 10:52:00"
]
},
- "execution_count": 18,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -686,7 +687,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
@@ -696,7 +697,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
@@ -713,7 +714,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 12,
"metadata": {},
"outputs": [
{
@@ -792,7 +793,7 @@
"4 4377906283759500514 2017-05-06 2.140066 2017-05-06 13:24:00"
]
},
- "execution_count": 25,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -811,7 +812,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
@@ -890,7 +891,7 @@
"4 4377906284422600514 55 12 1"
]
},
- "execution_count": 26,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -901,7 +902,7 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -913,7 +914,7 @@
" dtype='datetime64[ns]', freq='2T')"
]
},
- "execution_count": 27,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -925,7 +926,7 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 15,
"metadata": {},
"outputs": [
{
@@ -992,7 +993,7 @@
"4 4377906289869500514 2017-03-01 00:08:00"
]
},
- "execution_count": 29,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@@ -1007,7 +1008,7 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 16,
"metadata": {},
"outputs": [
{
@@ -1086,7 +1087,7 @@
"4 4377906289869500514 2017-03-01 00:08:00 NaN NaN"
]
},
- "execution_count": 32,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -1099,7 +1100,7 @@
},
{
"cell_type": "code",
- "execution_count": 41,
+ "execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
@@ -1117,7 +1118,7 @@
},
{
"cell_type": "code",
- "execution_count": 42,
+ "execution_count": 18,
"metadata": {},
"outputs": [
{
@@ -1196,7 +1197,7 @@
"184 4377906289869500514 2017-03-01 06:08:00 2017-03-01 2.174752"
]
},
- "execution_count": 42,
+ "execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@@ -1207,7 +1208,7 @@
},
{
"cell_type": "code",
- "execution_count": 43,
+ "execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
@@ -1224,7 +1225,7 @@
},
{
"cell_type": "code",
- "execution_count": 44,
+ "execution_count": 20,
"metadata": {},
"outputs": [
{
@@ -1316,7 +1317,7 @@
"184 2.174752 "
]
},
- "execution_count": 44,
+ "execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
@@ -1336,7 +1337,7 @@
},
{
"cell_type": "code",
- "execution_count": 45,
+ "execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
@@ -1361,7 +1362,7 @@
},
{
"cell_type": "code",
- "execution_count": 46,
+ "execution_count": 22,
"metadata": {},
"outputs": [
{
@@ -1459,7 +1460,7 @@
"184 2.174752 2017-03-01-06 "
]
},
- "execution_count": 46,
+ "execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
@@ -1482,7 +1483,7 @@
},
{
"cell_type": "code",
- "execution_count": 47,
+ "execution_count": 23,
"metadata": {},
"outputs": [
{
@@ -1601,7 +1602,7 @@
" 4 NaN NaN 2017-03-01-06 1.960745 "
]
},
- "execution_count": 47,
+ "execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
@@ -1620,7 +1621,7 @@
},
{
"cell_type": "code",
- "execution_count": 48,
+ "execution_count": 24,
"metadata": {},
"outputs": [
{
@@ -1718,7 +1719,7 @@
"4 NaN 1.960745 "
]
},
- "execution_count": 48,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -1731,6 +1732,1171 @@
"df.head()"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "日变化量(分钟)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def minute_trend(group):\n",
+ " tmp = group.groupby('hour_minute').mean().reset_index()\n",
+ " #s的值越小,对数据的拟合越好,但是存在过拟合风险\n",
+ " spl = UnivariateSpline(tmp.index, tmp['travel_time'].values, s=0.5)\n",
+ " tmp['minute_trend'] = spl(tmp.index)\n",
+ " group = pd.merge(group, tmp[['minute_trend', 'hour_minute']], on='hour_minute', how='left')\n",
+ " return group"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " link_ID | \n",
+ " time_interval_begin | \n",
+ " date | \n",
+ " travel_time | \n",
+ " travel_time2 | \n",
+ " date_trend | \n",
+ " hour_minute | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:00:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " 06-00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:02:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " 06-02 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:04:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " 06-04 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:06:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " 06-06 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:08:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " 06-08 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " link_ID time_interval_begin date travel_time \\\n",
+ "0 3377906280028510514 2017-03-01 06:00:00 2017-03-01 NaN \n",
+ "1 3377906280028510514 2017-03-01 06:02:00 2017-03-01 NaN \n",
+ "2 3377906280028510514 2017-03-01 06:04:00 2017-03-01 NaN \n",
+ "3 3377906280028510514 2017-03-01 06:06:00 2017-03-01 NaN \n",
+ "4 3377906280028510514 2017-03-01 06:08:00 2017-03-01 NaN \n",
+ "\n",
+ " travel_time2 date_trend hour_minute \n",
+ "0 NaN 1.960745 06-00 \n",
+ "1 NaN 1.960745 06-02 \n",
+ "2 NaN 1.960745 06-04 \n",
+ "3 NaN 1.960745 06-06 \n",
+ "4 NaN 1.960745 06-08 "
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df['hour_minute'] = df.time_interval_begin.map(lambda x: x.strftime('%H-%M'))\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " | \n",
+ " link_ID | \n",
+ " time_interval_begin | \n",
+ " date | \n",
+ " travel_time | \n",
+ " travel_time2 | \n",
+ " date_trend | \n",
+ " hour_minute | \n",
+ " minute_trend | \n",
+ "
\n",
+ " \n",
+ " link_ID | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 3377906280028510514 | \n",
+ " 0 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:00:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " 06-00 | \n",
+ " -0.252121 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:02:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " 06-02 | \n",
+ " -0.246743 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:04:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " 06-04 | \n",
+ " -0.241428 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:06:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " 06-06 | \n",
+ " -0.236176 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:08:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " 06-08 | \n",
+ " -0.230986 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " link_ID time_interval_begin date \\\n",
+ "link_ID \n",
+ "3377906280028510514 0 3377906280028510514 2017-03-01 06:00:00 2017-03-01 \n",
+ " 1 3377906280028510514 2017-03-01 06:02:00 2017-03-01 \n",
+ " 2 3377906280028510514 2017-03-01 06:04:00 2017-03-01 \n",
+ " 3 3377906280028510514 2017-03-01 06:06:00 2017-03-01 \n",
+ " 4 3377906280028510514 2017-03-01 06:08:00 2017-03-01 \n",
+ "\n",
+ " travel_time travel_time2 date_trend hour_minute \\\n",
+ "link_ID \n",
+ "3377906280028510514 0 NaN NaN 1.960745 06-00 \n",
+ " 1 NaN NaN 1.960745 06-02 \n",
+ " 2 NaN NaN 1.960745 06-04 \n",
+ " 3 NaN NaN 1.960745 06-06 \n",
+ " 4 NaN NaN 1.960745 06-08 \n",
+ "\n",
+ " minute_trend \n",
+ "link_ID \n",
+ "3377906280028510514 0 -0.252121 \n",
+ " 1 -0.246743 \n",
+ " 2 -0.241428 \n",
+ " 3 -0.236176 \n",
+ " 4 -0.230986 "
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df=df.groupby('link_ID').apply(minute_trend)\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "与季节残差一样,回归得到的值存在 df['minute_trend']里,因此现在的travel_time再次更新为 df['travel_time]= df['travel_time']-df['minute_trend]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = df.drop(['hour_minute', 'link_ID'], axis=1)\n",
+ "df = df.reset_index()\n",
+ "df = df.drop('level_1',axis=1)\n",
+ "df['travel_time'] = df['travel_time'] - df['minute_trend']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " link_ID | \n",
+ " time_interval_begin | \n",
+ " date | \n",
+ " travel_time | \n",
+ " travel_time2 | \n",
+ " date_trend | \n",
+ " minute_trend | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:00:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.252121 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:02:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.246743 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:04:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.241428 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:06:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.236176 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:08:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.230986 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " link_ID time_interval_begin date travel_time \\\n",
+ "0 3377906280028510514 2017-03-01 06:00:00 2017-03-01 NaN \n",
+ "1 3377906280028510514 2017-03-01 06:02:00 2017-03-01 NaN \n",
+ "2 3377906280028510514 2017-03-01 06:04:00 2017-03-01 NaN \n",
+ "3 3377906280028510514 2017-03-01 06:06:00 2017-03-01 NaN \n",
+ "4 3377906280028510514 2017-03-01 06:08:00 2017-03-01 NaN \n",
+ "\n",
+ " travel_time2 date_trend minute_trend \n",
+ "0 NaN 1.960745 -0.252121 \n",
+ "1 NaN 1.960745 -0.246743 \n",
+ "2 NaN 1.960745 -0.241428 \n",
+ "3 NaN 1.960745 -0.236176 \n",
+ "4 NaN 1.960745 -0.230986 "
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "link_infos = pd.read_csv('gy_contest_link_info.txt',delimiter=';',dtype={'link_ID':object})\n",
+ "link_tops = pd.read_csv('gy_contest_link_top_update.txt',delimiter=',',dtype={'link_ID':object})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "基本上大概的走势已经被 date_trend和 hour_trend决定了,剩下就是建模得到这个travel_time如何围绕这两个trends上下变化的\n",
+ "\n",
+ "选择训练特征:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " link_ID | \n",
+ " time_interval_begin | \n",
+ " date | \n",
+ " travel_time | \n",
+ " travel_time2 | \n",
+ " date_trend | \n",
+ " minute_trend | \n",
+ " length | \n",
+ " width | \n",
+ " link_num | \n",
+ " area | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:00:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.252121 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:02:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.246743 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:04:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.241428 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:06:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.236176 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:08:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.230986 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " link_ID time_interval_begin date travel_time \\\n",
+ "0 3377906280028510514 2017-03-01 06:00:00 2017-03-01 NaN \n",
+ "1 3377906280028510514 2017-03-01 06:02:00 2017-03-01 NaN \n",
+ "2 3377906280028510514 2017-03-01 06:04:00 2017-03-01 NaN \n",
+ "3 3377906280028510514 2017-03-01 06:06:00 2017-03-01 NaN \n",
+ "4 3377906280028510514 2017-03-01 06:08:00 2017-03-01 NaN \n",
+ "\n",
+ " travel_time2 date_trend minute_trend length width link_num area \n",
+ "0 NaN 1.960745 -0.252121 48 3 2 144 \n",
+ "1 NaN 1.960745 -0.246743 48 3 2 144 \n",
+ "2 NaN 1.960745 -0.241428 48 3 2 144 \n",
+ "3 NaN 1.960745 -0.236176 48 3 2 144 \n",
+ "4 NaN 1.960745 -0.230986 48 3 2 144 "
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "link_infos = pd.merge(link_infos, link_tops, on=['link_ID'], how='left')\n",
+ "link_infos['link_num'] = link_infos['in_links']+link_infos['out_links']\n",
+ "link_infos['area'] = link_infos['length'] * link_infos['width']\n",
+ "df = pd.merge(df, link_infos[['link_ID','length','width', 'link_num', 'area']], on=['link_ID'], how='left')\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " link_ID | \n",
+ " time_interval_begin | \n",
+ " date | \n",
+ " travel_time | \n",
+ " travel_time2 | \n",
+ " date_trend | \n",
+ " minute_trend | \n",
+ " length | \n",
+ " width | \n",
+ " link_num | \n",
+ " area | \n",
+ " vacation | \n",
+ " minute | \n",
+ " hour | \n",
+ " day | \n",
+ " week_day | \n",
+ " mouth | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:00:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.252121 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:02:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.246743 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:04:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.241428 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ " 0.0 | \n",
+ " 4 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:06:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.236176 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ " 0.0 | \n",
+ " 6 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:08:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.230986 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ " 0.0 | \n",
+ " 8 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " link_ID time_interval_begin date travel_time \\\n",
+ "0 3377906280028510514 2017-03-01 06:00:00 2017-03-01 NaN \n",
+ "1 3377906280028510514 2017-03-01 06:02:00 2017-03-01 NaN \n",
+ "2 3377906280028510514 2017-03-01 06:04:00 2017-03-01 NaN \n",
+ "3 3377906280028510514 2017-03-01 06:06:00 2017-03-01 NaN \n",
+ "4 3377906280028510514 2017-03-01 06:08:00 2017-03-01 NaN \n",
+ "\n",
+ " travel_time2 date_trend minute_trend length width link_num area \\\n",
+ "0 NaN 1.960745 -0.252121 48 3 2 144 \n",
+ "1 NaN 1.960745 -0.246743 48 3 2 144 \n",
+ "2 NaN 1.960745 -0.241428 48 3 2 144 \n",
+ "3 NaN 1.960745 -0.236176 48 3 2 144 \n",
+ "4 NaN 1.960745 -0.230986 48 3 2 144 \n",
+ "\n",
+ " vacation minute hour day week_day mouth \n",
+ "0 0.0 0 6 1 3 3 \n",
+ "1 0.0 2 6 1 3 3 \n",
+ "2 0.0 4 6 1 3 3 \n",
+ "3 0.0 6 6 1 3 3 \n",
+ "4 0.0 8 6 1 3 3 "
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#时间相关特征\n",
+ "df.loc[df['date'].isin(['2017-04-02','2017-04-03','2017-04-04','2017-04-29','2017-04-30',\n",
+ " '2017-05-01','2017-05-28','2017-05-29','2017-05-30']),'vacation']=1\n",
+ "\n",
+ "df.loc[~df['date'].isin(['2017-04-02','2017-04-03','2017-04-04','2017-04-29','2017-04-30',\n",
+ " '2017-05-01','2017-05-28','2017-05-29','2017-05-30']),'vacation']=0\n",
+ "\n",
+ "df['minute'] = df['time_interval_begin'].dt.minute\n",
+ "df['hour'] = df['time_interval_begin'].dt.hour\n",
+ "df['day'] = df['time_interval_begin'].dt.day\n",
+ "df['week_day'] = df['time_interval_begin'].map(lambda x: x.weekday() + 1)\n",
+ "df['mouth'] = df['time_interval_begin'].dt.month\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def mean_time(group):\n",
+ " group['link_ID_en'] = group['travel_time'].mean()\n",
+ " return group"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " link_ID | \n",
+ " time_interval_begin | \n",
+ " date | \n",
+ " travel_time | \n",
+ " travel_time2 | \n",
+ " date_trend | \n",
+ " minute_trend | \n",
+ " length | \n",
+ " width | \n",
+ " link_num | \n",
+ " area | \n",
+ " vacation | \n",
+ " minute | \n",
+ " hour | \n",
+ " day | \n",
+ " week_day | \n",
+ " mouth | \n",
+ " link_ID_en | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:00:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.252121 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 0.000138 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:02:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.246743 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 0.000138 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:04:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.241428 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ " 0.0 | \n",
+ " 4 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 0.000138 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:06:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.236176 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ " 0.0 | \n",
+ " 6 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 0.000138 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:08:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.230986 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ " 0.0 | \n",
+ " 8 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 0.000138 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " link_ID time_interval_begin date travel_time \\\n",
+ "0 3377906280028510514 2017-03-01 06:00:00 2017-03-01 NaN \n",
+ "1 3377906280028510514 2017-03-01 06:02:00 2017-03-01 NaN \n",
+ "2 3377906280028510514 2017-03-01 06:04:00 2017-03-01 NaN \n",
+ "3 3377906280028510514 2017-03-01 06:06:00 2017-03-01 NaN \n",
+ "4 3377906280028510514 2017-03-01 06:08:00 2017-03-01 NaN \n",
+ "\n",
+ " travel_time2 date_trend minute_trend length width link_num area \\\n",
+ "0 NaN 1.960745 -0.252121 48 3 2 144 \n",
+ "1 NaN 1.960745 -0.246743 48 3 2 144 \n",
+ "2 NaN 1.960745 -0.241428 48 3 2 144 \n",
+ "3 NaN 1.960745 -0.236176 48 3 2 144 \n",
+ "4 NaN 1.960745 -0.230986 48 3 2 144 \n",
+ "\n",
+ " vacation minute hour day week_day mouth link_ID_en \n",
+ "0 0.0 0 6 1 3 3 0.000138 \n",
+ "1 0.0 2 6 1 3 3 0.000138 \n",
+ "2 0.0 4 6 1 3 3 0.000138 \n",
+ "3 0.0 6 6 1 3 3 0.000138 \n",
+ "4 0.0 8 6 1 3 3 0.000138 "
+ ]
+ },
+ "execution_count": 43,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = df.groupby('link_ID').apply(mean_time)\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " link_ID | \n",
+ " time_interval_begin | \n",
+ " date | \n",
+ " travel_time | \n",
+ " travel_time2 | \n",
+ " date_trend | \n",
+ " minute_trend | \n",
+ " length | \n",
+ " width | \n",
+ " link_num | \n",
+ " area | \n",
+ " vacation | \n",
+ " minute | \n",
+ " hour | \n",
+ " day | \n",
+ " week_day | \n",
+ " mouth | \n",
+ " link_ID_en | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:00:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.252121 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 75 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:02:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.246743 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 75 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:04:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.241428 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ " 0.0 | \n",
+ " 4 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 75 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:06:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.236176 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ " 0.0 | \n",
+ " 6 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 75 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:08:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.230986 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ " 0.0 | \n",
+ " 8 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 75 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " link_ID time_interval_begin date travel_time \\\n",
+ "0 3377906280028510514 2017-03-01 06:00:00 2017-03-01 NaN \n",
+ "1 3377906280028510514 2017-03-01 06:02:00 2017-03-01 NaN \n",
+ "2 3377906280028510514 2017-03-01 06:04:00 2017-03-01 NaN \n",
+ "3 3377906280028510514 2017-03-01 06:06:00 2017-03-01 NaN \n",
+ "4 3377906280028510514 2017-03-01 06:08:00 2017-03-01 NaN \n",
+ "\n",
+ " travel_time2 date_trend minute_trend length width link_num area \\\n",
+ "0 NaN 1.960745 -0.252121 48 3 2 144 \n",
+ "1 NaN 1.960745 -0.246743 48 3 2 144 \n",
+ "2 NaN 1.960745 -0.241428 48 3 2 144 \n",
+ "3 NaN 1.960745 -0.236176 48 3 2 144 \n",
+ "4 NaN 1.960745 -0.230986 48 3 2 144 \n",
+ "\n",
+ " vacation minute hour day week_day mouth link_ID_en \n",
+ "0 0.0 0 6 1 3 3 75 \n",
+ "1 0.0 2 6 1 3 3 75 \n",
+ "2 0.0 4 6 1 3 3 75 \n",
+ "3 0.0 6 6 1 3 3 75 \n",
+ "4 0.0 8 6 1 3 3 75 "
+ ]
+ },
+ "execution_count": 44,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 通行时间长的编号大\n",
+ "sorted_link = np.sort(df['link_ID_en'].unique())\n",
+ "df['link_ID_en'] = df['link_ID_en'].map(lambda x: np.argmin(x >= sorted_link))\n",
+ "df.head()"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
diff --git a/机器学习竞赛实战_优胜解决方案/智慧城市-道路通行时间预测/道路通行时间预测.ipynb b/机器学习竞赛实战_优胜解决方案/智慧城市-道路通行时间预测/道路通行时间预测.ipynb
index 9cafb82..213c66e 100644
--- a/机器学习竞赛实战_优胜解决方案/智慧城市-道路通行时间预测/道路通行时间预测.ipynb
+++ b/机器学习竞赛实战_优胜解决方案/智慧城市-道路通行时间预测/道路通行时间预测.ipynb
@@ -2708,9 +2708,188 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 44,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " link_ID | \n",
+ " time_interval_begin | \n",
+ " date | \n",
+ " travel_time | \n",
+ " travel_time2 | \n",
+ " date_trend | \n",
+ " minute_trend | \n",
+ " length | \n",
+ " width | \n",
+ " link_num | \n",
+ " area | \n",
+ " vacation | \n",
+ " minute | \n",
+ " hour | \n",
+ " day | \n",
+ " week_day | \n",
+ " mouth | \n",
+ " link_ID_en | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:00:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.252121 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 75 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:02:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.246743 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ " 0.0 | \n",
+ " 2 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 75 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:04:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.241428 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ " 0.0 | \n",
+ " 4 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 75 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:06:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.236176 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ " 0.0 | \n",
+ " 6 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 75 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 3377906280028510514 | \n",
+ " 2017-03-01 06:08:00 | \n",
+ " 2017-03-01 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 1.960745 | \n",
+ " -0.230986 | \n",
+ " 48 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 144 | \n",
+ " 0.0 | \n",
+ " 8 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 75 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " link_ID time_interval_begin date travel_time \\\n",
+ "0 3377906280028510514 2017-03-01 06:00:00 2017-03-01 NaN \n",
+ "1 3377906280028510514 2017-03-01 06:02:00 2017-03-01 NaN \n",
+ "2 3377906280028510514 2017-03-01 06:04:00 2017-03-01 NaN \n",
+ "3 3377906280028510514 2017-03-01 06:06:00 2017-03-01 NaN \n",
+ "4 3377906280028510514 2017-03-01 06:08:00 2017-03-01 NaN \n",
+ "\n",
+ " travel_time2 date_trend minute_trend length width link_num area \\\n",
+ "0 NaN 1.960745 -0.252121 48 3 2 144 \n",
+ "1 NaN 1.960745 -0.246743 48 3 2 144 \n",
+ "2 NaN 1.960745 -0.241428 48 3 2 144 \n",
+ "3 NaN 1.960745 -0.236176 48 3 2 144 \n",
+ "4 NaN 1.960745 -0.230986 48 3 2 144 \n",
+ "\n",
+ " vacation minute hour day week_day mouth link_ID_en \n",
+ "0 0.0 0 6 1 3 3 75 \n",
+ "1 0.0 2 6 1 3 3 75 \n",
+ "2 0.0 4 6 1 3 3 75 \n",
+ "3 0.0 6 6 1 3 3 75 \n",
+ "4 0.0 8 6 1 3 3 75 "
+ ]
+ },
+ "execution_count": 44,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# 通行时间长的编号大\n",
"sorted_link = np.sort(df['link_ID_en'].unique())\n",