From ec67cad8a6d592f071cee81d8ceb4647fd92da20 Mon Sep 17 00:00:00 2001 From: benjas <909336740@qq.com> Date: Sat, 5 Dec 2020 12:24:13 +0800 Subject: [PATCH] =?UTF-8?q?Update=20=E9=81=93=E8=B7=AF=E9=80=9A=E8=A1=8C?= =?UTF-8?q?=E6=97=B6=E9=97=B4=E9=A2=84=E6=B5=8B.ipynb?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../道路通行时间预测.ipynb | 653 ++++++++++++++++-- 1 file changed, 596 insertions(+), 57 deletions(-) diff --git a/机器学习竞赛实战_优胜解决方案/智慧城市-道路通行时间预测/道路通行时间预测.ipynb b/机器学习竞赛实战_优胜解决方案/智慧城市-道路通行时间预测/道路通行时间预测.ipynb index 213c66e..cbc96a1 100644 --- a/机器学习竞赛实战_优胜解决方案/智慧城市-道路通行时间预测/道路通行时间预测.ipynb +++ b/机器学习竞赛实战_优胜解决方案/智慧城市-道路通行时间预测/道路通行时间预测.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -1741,7 +1741,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -1756,7 +1756,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -1860,7 +1860,7 @@ "4 NaN 1.960745 06-08 " ] }, - "execution_count": 29, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -1872,7 +1872,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -2006,7 +2006,7 @@ " 4 -0.230986 " ] }, - "execution_count": 33, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -2025,7 +2025,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -2037,7 +2037,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -2141,7 +2141,7 @@ "4 NaN 1.960745 -0.230986 " ] }, - "execution_count": 35, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -2152,7 +2152,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ @@ -2171,7 +2171,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -2204,7 +2204,7 @@ " minute_trend\n", " length\n", " width\n", - " link_num\n", + " links_num\n", " area\n", " \n", " \n", @@ -2291,30 +2291,30 @@ "3 3377906280028510514 2017-03-01 06:06:00 2017-03-01 NaN \n", "4 3377906280028510514 2017-03-01 06:08:00 2017-03-01 NaN \n", "\n", - " travel_time2 date_trend minute_trend length width link_num area \n", - "0 NaN 1.960745 -0.252121 48 3 2 144 \n", - "1 NaN 1.960745 -0.246743 48 3 2 144 \n", - "2 NaN 1.960745 -0.241428 48 3 2 144 \n", - "3 NaN 1.960745 -0.236176 48 3 2 144 \n", - "4 NaN 1.960745 -0.230986 48 3 2 144 " + " travel_time2 date_trend minute_trend length width links_num area \n", + "0 NaN 1.960745 -0.252121 48 3 2 144 \n", + "1 NaN 1.960745 -0.246743 48 3 2 144 \n", + "2 NaN 1.960745 -0.241428 48 3 2 144 \n", + "3 NaN 1.960745 -0.236176 48 3 2 144 \n", + "4 NaN 1.960745 -0.230986 48 3 2 144 " ] }, - "execution_count": 40, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "link_infos = pd.merge(link_infos, link_tops, on=['link_ID'], how='left')\n", - "link_infos['link_num'] = link_infos['in_links']+link_infos['out_links']\n", + "link_infos['links_num'] = link_infos['in_links']+link_infos['out_links']\n", "link_infos['area'] = link_infos['length'] * link_infos['width']\n", - "df = pd.merge(df, link_infos[['link_ID','length','width', 'link_num', 'area']], on=['link_ID'], how='left')\n", + "df = pd.merge(df, link_infos[['link_ID','length','width', 'links_num', 'area']], on=['link_ID'], how='left')\n", "df.head()" ] }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -2347,14 +2347,14 @@ " minute_trend\n", " length\n", " width\n", - " link_num\n", + " links_num\n", " area\n", " vacation\n", " minute\n", " hour\n", " day\n", " week_day\n", - " mouth\n", + " month\n", " \n", " \n", " \n", @@ -2470,14 +2470,14 @@ "3 3377906280028510514 2017-03-01 06:06:00 2017-03-01 NaN \n", "4 3377906280028510514 2017-03-01 06:08:00 2017-03-01 NaN \n", "\n", - " travel_time2 date_trend minute_trend length width link_num area \\\n", - "0 NaN 1.960745 -0.252121 48 3 2 144 \n", - "1 NaN 1.960745 -0.246743 48 3 2 144 \n", - "2 NaN 1.960745 -0.241428 48 3 2 144 \n", - "3 NaN 1.960745 -0.236176 48 3 2 144 \n", - "4 NaN 1.960745 -0.230986 48 3 2 144 \n", + " travel_time2 date_trend minute_trend length width links_num area \\\n", + "0 NaN 1.960745 -0.252121 48 3 2 144 \n", + "1 NaN 1.960745 -0.246743 48 3 2 144 \n", + "2 NaN 1.960745 -0.241428 48 3 2 144 \n", + "3 NaN 1.960745 -0.236176 48 3 2 144 \n", + "4 NaN 1.960745 -0.230986 48 3 2 144 \n", "\n", - " vacation minute hour day week_day mouth \n", + " vacation minute hour day week_day month \n", "0 0.0 0 6 1 3 3 \n", "1 0.0 2 6 1 3 3 \n", "2 0.0 4 6 1 3 3 \n", @@ -2485,7 +2485,7 @@ "4 0.0 8 6 1 3 3 " ] }, - "execution_count": 41, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -2502,13 +2502,13 @@ "df['hour'] = df['time_interval_begin'].dt.hour\n", "df['day'] = df['time_interval_begin'].dt.day\n", "df['week_day'] = df['time_interval_begin'].map(lambda x: x.weekday() + 1)\n", - "df['mouth'] = df['time_interval_begin'].dt.month\n", + "df['month'] = df['time_interval_begin'].dt.month\n", "df.head()" ] }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ @@ -2519,7 +2519,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 34, "metadata": {}, "outputs": [ { @@ -2552,14 +2552,14 @@ " minute_trend\n", " length\n", " width\n", - " link_num\n", + " links_num\n", " area\n", " vacation\n", " minute\n", " hour\n", " day\n", " week_day\n", - " mouth\n", + " month\n", " link_ID_en\n", " \n", " \n", @@ -2681,14 +2681,14 @@ "3 3377906280028510514 2017-03-01 06:06:00 2017-03-01 NaN \n", "4 3377906280028510514 2017-03-01 06:08:00 2017-03-01 NaN \n", "\n", - " travel_time2 date_trend minute_trend length width link_num area \\\n", - "0 NaN 1.960745 -0.252121 48 3 2 144 \n", - "1 NaN 1.960745 -0.246743 48 3 2 144 \n", - "2 NaN 1.960745 -0.241428 48 3 2 144 \n", - "3 NaN 1.960745 -0.236176 48 3 2 144 \n", - "4 NaN 1.960745 -0.230986 48 3 2 144 \n", + " travel_time2 date_trend minute_trend length width links_num area \\\n", + "0 NaN 1.960745 -0.252121 48 3 2 144 \n", + "1 NaN 1.960745 -0.246743 48 3 2 144 \n", + "2 NaN 1.960745 -0.241428 48 3 2 144 \n", + "3 NaN 1.960745 -0.236176 48 3 2 144 \n", + "4 NaN 1.960745 -0.230986 48 3 2 144 \n", "\n", - " vacation minute hour day week_day mouth link_ID_en \n", + " vacation minute hour day week_day month link_ID_en \n", "0 0.0 0 6 1 3 3 0.000138 \n", "1 0.0 2 6 1 3 3 0.000138 \n", "2 0.0 4 6 1 3 3 0.000138 \n", @@ -2696,7 +2696,7 @@ "4 0.0 8 6 1 3 3 0.000138 " ] }, - "execution_count": 43, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -2708,7 +2708,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 35, "metadata": {}, "outputs": [ { @@ -2741,14 +2741,14 @@ " minute_trend\n", " length\n", " width\n", - " link_num\n", + " links_num\n", " area\n", " vacation\n", " minute\n", " hour\n", " day\n", " week_day\n", - " mouth\n", + " month\n", " link_ID_en\n", " \n", " \n", @@ -2870,14 +2870,14 @@ "3 3377906280028510514 2017-03-01 06:06:00 2017-03-01 NaN \n", "4 3377906280028510514 2017-03-01 06:08:00 2017-03-01 NaN \n", "\n", - " travel_time2 date_trend minute_trend length width link_num area \\\n", - "0 NaN 1.960745 -0.252121 48 3 2 144 \n", - "1 NaN 1.960745 -0.246743 48 3 2 144 \n", - "2 NaN 1.960745 -0.241428 48 3 2 144 \n", - "3 NaN 1.960745 -0.236176 48 3 2 144 \n", - "4 NaN 1.960745 -0.230986 48 3 2 144 \n", + " travel_time2 date_trend minute_trend length width links_num area \\\n", + "0 NaN 1.960745 -0.252121 48 3 2 144 \n", + "1 NaN 1.960745 -0.246743 48 3 2 144 \n", + "2 NaN 1.960745 -0.241428 48 3 2 144 \n", + "3 NaN 1.960745 -0.236176 48 3 2 144 \n", + "4 NaN 1.960745 -0.230986 48 3 2 144 \n", "\n", - " vacation minute hour day week_day mouth link_ID_en \n", + " vacation minute hour day week_day month link_ID_en \n", "0 0.0 0 6 1 3 3 75 \n", "1 0.0 2 6 1 3 3 75 \n", "2 0.0 4 6 1 3 3 75 \n", @@ -2885,7 +2885,7 @@ "4 0.0 8 6 1 3 3 75 " ] }, - "execution_count": 44, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -2897,12 +2897,551 @@ "df.head()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "标准化" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "def std(group):\n", + " group['travel_time_std'] = np.std(group['travel_time'])\n", + " return group" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
link_IDtime_interval_begindatetravel_timetravel_time2date_trendminute_trendlengthwidthlinks_numareavacationminutehourdayweek_daymonthlink_ID_entravel_time_std
033779062800285105142017-03-01 06:00:002017-03-01NaNNaN1.960745-0.25212148321440.006133750.223232
133779062800285105142017-03-01 06:02:002017-03-01NaNNaN1.960745-0.24674348321440.026133750.223232
233779062800285105142017-03-01 06:04:002017-03-01NaNNaN1.960745-0.24142848321440.046133750.223232
333779062800285105142017-03-01 06:06:002017-03-01NaNNaN1.960745-0.23617648321440.066133750.223232
433779062800285105142017-03-01 06:08:002017-03-01NaNNaN1.960745-0.23098648321440.086133750.223232
\n", + "
" + ], + "text/plain": [ + " link_ID time_interval_begin date travel_time \\\n", + "0 3377906280028510514 2017-03-01 06:00:00 2017-03-01 NaN \n", + "1 3377906280028510514 2017-03-01 06:02:00 2017-03-01 NaN \n", + "2 3377906280028510514 2017-03-01 06:04:00 2017-03-01 NaN \n", + "3 3377906280028510514 2017-03-01 06:06:00 2017-03-01 NaN \n", + "4 3377906280028510514 2017-03-01 06:08:00 2017-03-01 NaN \n", + "\n", + " travel_time2 date_trend minute_trend length width links_num area \\\n", + "0 NaN 1.960745 -0.252121 48 3 2 144 \n", + "1 NaN 1.960745 -0.246743 48 3 2 144 \n", + "2 NaN 1.960745 -0.241428 48 3 2 144 \n", + "3 NaN 1.960745 -0.236176 48 3 2 144 \n", + "4 NaN 1.960745 -0.230986 48 3 2 144 \n", + "\n", + " vacation minute hour day week_day month link_ID_en travel_time_std \n", + "0 0.0 0 6 1 3 3 75 0.223232 \n", + "1 0.0 2 6 1 3 3 75 0.223232 \n", + "2 0.0 4 6 1 3 3 75 0.223232 \n", + "3 0.0 6 6 1 3 3 75 0.223232 \n", + "4 0.0 8 6 1 3 3 75 0.223232 " + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = df.groupby('link_ID').apply(std)\n", + "df['travel_time'] = df['travel_time'] / df['travel_time_std']\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "缺失时间预测" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "params = {\n", + " 'learning_rate':0.2,\n", + " 'n_estimators':30,\n", + " 'subsample':0.8,\n", + " 'colsample_bytree':0.6,\n", + " 'max_depth':10,\n", + " 'min_child_weight':1,\n", + " 'reg_alpha':0,\n", + " 'gamma':0\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
link_IDtime_interval_begindatetravel_timetravel_time2date_trendminute_trendlengthareavacation...day_27day_28day_29day_30day_31month_3month_4month_5month_6month_7
033779062800285105142017-03-01 06:00:002017-03-01NaNNaN1.960745-0.252121481440.0...0000010000
133779062800285105142017-03-01 06:02:002017-03-01NaNNaN1.960745-0.246743481440.0...0000010000
233779062800285105142017-03-01 06:04:002017-03-01NaNNaN1.960745-0.241428481440.0...0000010000
333779062800285105142017-03-01 06:06:002017-03-01NaNNaN1.960745-0.236176481440.0...0000010000
433779062800285105142017-03-01 06:08:002017-03-01NaNNaN1.960745-0.230986481440.0...0000010000
\n", + "

5 rows × 103 columns

\n", + "
" + ], + "text/plain": [ + " link_ID time_interval_begin date travel_time \\\n", + "0 3377906280028510514 2017-03-01 06:00:00 2017-03-01 NaN \n", + "1 3377906280028510514 2017-03-01 06:02:00 2017-03-01 NaN \n", + "2 3377906280028510514 2017-03-01 06:04:00 2017-03-01 NaN \n", + "3 3377906280028510514 2017-03-01 06:06:00 2017-03-01 NaN \n", + "4 3377906280028510514 2017-03-01 06:08:00 2017-03-01 NaN \n", + "\n", + " travel_time2 date_trend minute_trend length area vacation ... \\\n", + "0 NaN 1.960745 -0.252121 48 144 0.0 ... \n", + "1 NaN 1.960745 -0.246743 48 144 0.0 ... \n", + "2 NaN 1.960745 -0.241428 48 144 0.0 ... \n", + "3 NaN 1.960745 -0.236176 48 144 0.0 ... \n", + "4 NaN 1.960745 -0.230986 48 144 0.0 ... \n", + "\n", + " day_27 day_28 day_29 day_30 day_31 month_3 month_4 month_5 month_6 \\\n", + "0 0 0 0 0 0 1 0 0 0 \n", + "1 0 0 0 0 0 1 0 0 0 \n", + "2 0 0 0 0 0 1 0 0 0 \n", + "3 0 0 0 0 0 1 0 0 0 \n", + "4 0 0 0 0 0 1 0 0 0 \n", + "\n", + " month_7 \n", + "0 0 \n", + "1 0 \n", + "2 0 \n", + "3 0 \n", + "4 0 \n", + "\n", + "[5 rows x 103 columns]" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.get_dummies(df, columns=['links_num','width','minute','hour',\n", + " 'week_day','day','month'])\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "训练的数据train_df为travel_time非空的数据,而数据集test_df为travel_time空的数据" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['length', 'area', 'vacation', 'link_ID_en', 'links_num_2', 'links_num_3', 'links_num_4', 'links_num_5', 'width_3', 'width_6', 'width_9', 'width_12', 'width_15', 'minute_0', 'minute_2', 'minute_4', 'minute_6', 'minute_8', 'minute_10', 'minute_12', 'minute_14', 'minute_16', 'minute_18', 'minute_20', 'minute_22', 'minute_24', 'minute_26', 'minute_28', 'minute_30', 'minute_32', 'minute_34', 'minute_36', 'minute_38', 'minute_40', 'minute_42', 'minute_44', 'minute_46', 'minute_48', 'minute_50', 'minute_52', 'minute_54', 'minute_56', 'minute_58', 'hour_6', 'hour_7', 'hour_8', 'hour_13', 'hour_14', 'hour_15', 'hour_16', 'hour_17', 'hour_18', 'week_day_1', 'week_day_2', 'week_day_3', 'week_day_4', 'week_day_5', 'week_day_6', 'week_day_7', 'day_1', 'day_2', 'day_3', 'day_4', 'day_5', 'day_6', 'day_7', 'day_8', 'day_9', 'day_10', 'day_11', 'day_12', 'day_13', 'day_14', 'day_15', 'day_16', 'day_17', 'day_18', 'day_19', 'day_20', 'day_21', 'day_22', 'day_23', 'day_24', 'day_25', 'day_26', 'day_27', 'day_28', 'day_29', 'day_30', 'day_31', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7']\n" + ] + } + ], + "source": [ + "feature = df.columns.values.tolist()\n", + "train_feature = [x for x in feature if \n", + " x not in ['link_ID', 'time_interval_begin', 'travel_time', 'date',\n", + " 'travel_time2', 'minute_trend', 'travel_time_std', 'date_trend']]\n", + "\n", + "train_df = df.loc[~df['travel_time'].isnull()] # 获取非空的值,~是非空意思\n", + "test_df = df.loc[df['travel_time2'].isnull()].copy()\n", + "\n", + "print(train_feature)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(3165426, 103)\n", + "(1883574, 103)\n" + ] + } + ], + "source": [ + "print(train_df.shape)\n", + "print(test_df.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "训练数据切分" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "X = train_df[train_feature].values\n", + "y = train_df['travel_time'].values\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)\n", + "\n", + "eval_set = [(X_test, y_test)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "训练回归模型来预测缺失值" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "regressor = xgb.XGBRegressor()" + ] } ], "metadata": {