From 4b77716e1ccd517834bcf6186cc920ade9f0b238 Mon Sep 17 00:00:00 2001 From: benjas <909336740@qq.com> Date: Wed, 16 Dec 2020 21:00:23 +0800 Subject: [PATCH] =?UTF-8?q?Update=20=E5=BB=BA=E6=A8=A1=E9=A2=84=E6=B5=8B.i?= =?UTF-8?q?pynb?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../建模预测.ipynb | 591 ++++++++++++++++-- 1 file changed, 548 insertions(+), 43 deletions(-) diff --git a/机器学习竞赛实战_优胜解决方案/智慧城市-道路通行时间预测/建模预测.ipynb b/机器学习竞赛实战_优胜解决方案/智慧城市-道路通行时间预测/建模预测.ipynb index 9295881..dc38cc5 100644 --- a/机器学习竞赛实战_优胜解决方案/智慧城市-道路通行时间预测/建模预测.ipynb +++ b/机器学习竞赛实战_优胜解决方案/智慧城市-道路通行时间预测/建模预测.ipynb @@ -2,15 +2,25 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "D:\\Anaconda3\\lib\\site-packages\\sklearn\\externals\\joblib\\__init__.py:15: DeprecationWarning: sklearn.externals.joblib is deprecated in 0.21 and will be removed in 0.23. Please import this functionality directly from joblib, which can be installed with: pip install joblib. If this warning is raised when loading pickled models, you may need to re-serialize those models with scikit-learn 0.21+.\n", + " warnings.warn(msg, category=DeprecationWarning)\n" + ] + } + ], "source": [ "import pandas as pd\n", "import numpy as np\n", "from scipy.interpolate import UnivariateSpline\n", "from sklearn import linear_model\n", "import xgboost as xgb\n", + "from sklearn.externals import joblib\n", "from sklearn.utils import *" ] }, @@ -27,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -55,7 +65,7 @@ " date\n", " time_interval_begin\n", " travel_time\n", - " imputationl\n", + " imputation1\n", " lagging1\n", " lagging2\n", " lagging3\n", @@ -290,7 +300,7 @@ "" ], "text/plain": [ - " link_ID date time_interval_begin travel_time imputationl lagging1 lagging2 lagging3 lagging4 lagging5 length area vacation minute_series day_of_week day_of_week_en hour_en week_hour_1.0,1.0 week_hour_1.0,2.0 week_hour_1.0,3.0 week_hour_2.0,1.0 week_hour_2.0,2.0 week_hour_2.0,3.0 week_hour_3.0,1.0 week_hour_3.0,2.0 week_hour_3.0,3.0 links_num_2 links_num_3 links_num_4 links_num_5 width_3 width_6 width_9 width_12 width_15 link_ID_en\n", + " link_ID date time_interval_begin travel_time imputation1 lagging1 lagging2 lagging3 lagging4 lagging5 length area vacation minute_series day_of_week day_of_week_en hour_en week_hour_1.0,1.0 week_hour_1.0,2.0 week_hour_1.0,3.0 week_hour_2.0,1.0 week_hour_2.0,2.0 week_hour_2.0,3.0 week_hour_3.0,1.0 week_hour_3.0,2.0 week_hour_3.0,3.0 links_num_2 links_num_3 links_num_4 links_num_5 width_3 width_6 width_9 width_12 width_15 link_ID_en\n", "0 3377906280028510514 2017-03-01 2017-03-01 06:00:00 1.659311 True NaN NaN NaN NaN NaN 48 144 0.0 0.0 3 1.0 1.0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 47\n", "1 3377906280028510514 2017-03-01 2017-03-01 06:02:00 1.664941 True 1.659311 NaN NaN NaN NaN 48 144 0.0 2.0 3 1.0 1.0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 47\n", "2 3377906280028510514 2017-03-01 2017-03-01 06:04:00 1.671675 True 1.664941 1.659311 NaN NaN NaN 48 144 0.0 4.0 3 1.0 1.0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 47\n", @@ -298,20 +308,20 @@ "4 3377906280028510514 2017-03-01 2017-03-01 06:08:00 1.682314 True 1.676886 1.671675 1.664941 1.659311 NaN 48 144 0.0 8.0 3 1.0 1.0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 47" ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 读取处理好的特征数据\n", - "df = pd.read_csv('com_trainning.txt', delimiter=';',parse_dates=['time_interval_begin'],dtype={'link_ID':object})\n", + "df = pd.read_csv('trainning.txt', delimiter=';',parse_dates=['time_interval_begin'],dtype={'link_ID':object})\n", "df.head()" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -320,7 +330,7 @@ "['lagging5', 'lagging4', 'lagging3', 'lagging2', 'lagging1']" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -334,7 +344,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -348,7 +358,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -357,14 +367,14 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "['length', 'vacation', 'day_of_week_en', 'week_hour_1.0,1.0', 'week_hour_1.0,2.0', 'week_hour_1.0,3.0', 'week_hour_2.0,1.0', 'week_hour_2.0,2.0', 'week_hour_2.0,3.0', 'week_hour_3.0,1.0', 'week_hour_3.0,2.0', 'week_hour_3.0,3.0', 'links_num_2', 'links_num_3', 'links_num_4', 'links_num_5', 'width_3', 'width_6', 'width_9', 'width_12', 'width_15', 'link_ID_en', 'lagging5', 'lagging4', 'lagging3', 'lagging2', 'lagging1']\n" + "['imputation1', 'length', 'vacation', 'day_of_week_en', 'week_hour_1.0,1.0', 'week_hour_1.0,2.0', 'week_hour_1.0,3.0', 'week_hour_2.0,1.0', 'week_hour_2.0,2.0', 'week_hour_2.0,3.0', 'week_hour_3.0,1.0', 'week_hour_3.0,2.0', 'week_hour_3.0,3.0', 'links_num_2', 'links_num_3', 'links_num_4', 'links_num_5', 'width_3', 'width_6', 'width_9', 'width_12', 'width_15', 'link_ID_en', 'lagging5', 'lagging4', 'lagging3', 'lagging2', 'lagging1']\n" ] } ], @@ -385,7 +395,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -403,7 +413,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -413,7 +423,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -440,6 +450,9 @@ " batch = np.array(bucket[time_series], dtype=float)\n", " y = batch[:,-1]\n", " batch = np.delete(batch, -1, axis=1)\n", + " print(\"====================================\")\n", + " print(batch.shape, last.shape, type(time_series))\n", + " print(\"====================================\")\n", " batch = np.concatenate((batch, last), axis=1)\n", " y_pre = regressor.predict(batch)\n", " last = np.delete(last, 0, axis=1)\n", @@ -487,22 +500,22 @@ " lambda x: '[' + str(x)+','+str(x+pd.DateOffset(minutes=2))+')')\n", " test_df.time_interval = test_df.time_interval.astype(object)\n", " if i < 7:\n", - " test_df[['link_ID','date','time_interval','predicted']].to_csv(file1,mode='a',\n", + " test_df[['link_ID','date','time_interval','prediction']].to_csv(file1,mode='a',\n", " header=False,\n", " index=False,\n", " sep=';')\n", " elif (7 <= i) and (i < 14):\n", - " test_df[['link_ID','date','time_interval','predicted']].to_csv(file2,mode='a',\n", + " test_df[['link_ID','date','time_interval','prediction']].to_csv(file2,mode='a',\n", " header=False,\n", " index=False,\n", " sep=';')\n", " elif (14 <= i) and (i < 22):\n", - " test_df[['link_ID','date','time_interval','predicted']].to_csv(file1,mode='a',\n", + " test_df[['link_ID','date','time_interval','prediction']].to_csv(file1,mode='a',\n", " header=False,\n", " index=False,\n", " sep=';')\n", " else:\n", - " test_df[['link_ID','date','time_interval','predicted']].to_csv(file4,mode='a',\n", + " test_df[['link_ID','date','time_interval','prediction']].to_csv(file4,mode='a',\n", " header=False,\n", " index=False,\n", " sep=';')" @@ -517,7 +530,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -548,7 +561,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -620,32 +633,406 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[22:33:32] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", - "99 0.234149 0.08976782525031814\n", - "[22:38:24] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", - "99 0.216157 0.22570086184516117\n", - "[22:44:51] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n" + "[16:02:58] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "99 0.231729 0.09787323564628972\n", + "[16:08:54] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "====================================\n", + "(9900, 23) (9900, 5) \n", + "====================================\n", + "99 0.211948 0.22588986922596394\n", + "[16:14:56] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n" ] }, { - "ename": "ValueError", - "evalue": "all the input array dimensions except for the concatenation axis must match exactly", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mbest\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mparams\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mgrid\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mbest\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtrain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbest\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;32m\u001b[0m in \u001b[0;36mtrain\u001b[1;34m(df, params, best, vis)\u001b[0m\n\u001b[0;32m 32\u001b[0m \u001b[0mtrain4\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 33\u001b[0m train5]),train3,\n\u001b[1;32m---> 34\u001b[1;33m params) \n\u001b[0m\u001b[0;32m 35\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbest_iteration3\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mbest_score3\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mloss3\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 36\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m\u001b[0m in \u001b[0;36mfit_evaluate\u001b[1;34m(df, df_test, params)\u001b[0m\n\u001b[0;32m 21\u001b[0m reg_alpha=params['reg_alpha'])\n\u001b[0;32m 22\u001b[0m \u001b[0mregressor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0my_train\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mverbose\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mearly_stopping_rounds\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0meval_set\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0meval_set\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 23\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mregressor\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcross_valid\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mregressor\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalid_data\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlagging\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlagging\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mregressor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbest_iteration\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mregressor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbest_score\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;32m\u001b[0m in \u001b[0;36mcross_valid\u001b[1;34m(regressor, bucket, lagging)\u001b[0m\n\u001b[0;32m 22\u001b[0m \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mbatch\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 23\u001b[0m \u001b[0mbatch\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdelete\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbatch\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 24\u001b[1;33m \u001b[0mbatch\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconcatenate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbatch\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlast\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 25\u001b[0m \u001b[0my_pre\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mregressor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbatch\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 26\u001b[0m \u001b[0mlast\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdelete\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlast\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;31mValueError\u001b[0m: all the input array dimensions except for the concatenation axis must match exactly" + "name": "stdout", + "output_type": "stream", + "text": [ + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "99 0.207832 0.269828138777363\n", + "[16:21:05] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "====================================\n", + "(9504, 23) (9504, 5) \n", + "====================================\n", + "99 0.205743 0.27878690843594917\n", + "[16:27:05] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "====================================\n", + "(9108, 23) (9108, 5) \n", + "====================================\n", + "99 0.206546 0.2825731100341743\n", + "{'colsample_bytree': 0.6, 'gamma': 0, 'learning_rate': 0.05, 'max_depth': 7, 'min_child_weight': 1, 'n_estimators': '[99, 99, 99, 99, 99]', 'reg_alpha': 2, 'subsample': 0.6, 'loss_std': 0.06956988861011186, 'loss': '[0.09787323564628972, 0.22588986922596394, 0.269828138777363, 0.27878690843594917, 0.2825731100341743]', 'mean_loss': 0.23099025242394805, 'best_score': '[0.231729, 0.211948, 0.207832, 0.205743, 0.206546]'}\n", + "best with:{'colsample_bytree': 0.6, 'gamma': 0, 'learning_rate': 0.05, 'max_depth': 7, 'min_child_weight': 1, 'n_estimators': '[99, 99, 99, 99, 99]', 'reg_alpha': 2, 'subsample': 0.6, 'loss_std': 0.06956988861011186, 'loss': '[0.09787323564628972, 0.22588986922596394, 0.269828138777363, 0.27878690843594917, 0.2825731100341743]', 'mean_loss': 0.23099025242394805, 'best_score': '[0.231729, 0.211948, 0.207832, 0.205743, 0.206546]'}\n" ] } ], @@ -657,7 +1044,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -675,7 +1062,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -709,9 +1096,127 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[17:47:53] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", + "[0]\tvalidation_0-rmse:2.02747\tvalidation_0-mape:0.867894\n", + "Multiple eval metrics have been passed: 'validation_0-mape' will be used for early stopping.\n", + "\n", + "Will train until validation_0-mape hasn't improved in 10 rounds.\n", + "[1]\tvalidation_0-rmse:1.92734\tvalidation_0-mape:0.850712\n", + "[2]\tvalidation_0-rmse:1.83231\tvalidation_0-mape:0.83309\n", + "[3]\tvalidation_0-rmse:1.74204\tvalidation_0-mape:0.815116\n", + "[4]\tvalidation_0-rmse:1.65635\tvalidation_0-mape:0.796798\n", + "[5]\tvalidation_0-rmse:1.57575\tvalidation_0-mape:0.777818\n", + "[6]\tvalidation_0-rmse:1.49911\tvalidation_0-mape:0.758668\n", + "[7]\tvalidation_0-rmse:1.4258\tvalidation_0-mape:0.739614\n", + "[8]\tvalidation_0-rmse:1.35624\tvalidation_0-mape:0.720407\n", + "[9]\tvalidation_0-rmse:1.29025\tvalidation_0-mape:0.701088\n", + "[10]\tvalidation_0-rmse:1.22764\tvalidation_0-mape:0.681732\n", + "[11]\tvalidation_0-rmse:1.16884\tvalidation_0-mape:0.662053\n", + "[12]\tvalidation_0-rmse:1.11249\tvalidation_0-mape:0.642755\n", + "[13]\tvalidation_0-rmse:1.05955\tvalidation_0-mape:0.62329\n", + "[14]\tvalidation_0-rmse:1.00937\tvalidation_0-mape:0.603984\n", + "[15]\tvalidation_0-rmse:0.961824\tvalidation_0-mape:0.584883\n", + "[16]\tvalidation_0-rmse:0.916274\tvalidation_0-mape:0.56632\n", + "[17]\tvalidation_0-rmse:0.873956\tvalidation_0-mape:0.547599\n", + "[18]\tvalidation_0-rmse:0.833681\tvalidation_0-mape:0.529301\n", + "[19]\tvalidation_0-rmse:0.7949\tvalidation_0-mape:0.511751\n", + "[20]\tvalidation_0-rmse:0.758183\tvalidation_0-mape:0.494576\n", + "[21]\tvalidation_0-rmse:0.724009\tvalidation_0-mape:0.477586\n", + "[22]\tvalidation_0-rmse:0.69164\tvalidation_0-mape:0.46109\n", + "[23]\tvalidation_0-rmse:0.660537\tvalidation_0-mape:0.445265\n", + "[24]\tvalidation_0-rmse:0.631783\tvalidation_0-mape:0.429777\n", + "[25]\tvalidation_0-rmse:0.604043\tvalidation_0-mape:0.414922\n", + "[26]\tvalidation_0-rmse:0.5778\tvalidation_0-mape:0.400545\n", + "[27]\tvalidation_0-rmse:0.553042\tvalidation_0-mape:0.386658\n", + "[28]\tvalidation_0-rmse:0.530196\tvalidation_0-mape:0.373128\n", + "[29]\tvalidation_0-rmse:0.508169\tvalidation_0-mape:0.36026\n", + "[30]\tvalidation_0-rmse:0.487437\tvalidation_0-mape:0.3479\n", + "[31]\tvalidation_0-rmse:0.468318\tvalidation_0-mape:0.336023\n", + "[32]\tvalidation_0-rmse:0.450428\tvalidation_0-mape:0.324693\n", + "[33]\tvalidation_0-rmse:0.433156\tvalidation_0-mape:0.31389\n", + "[34]\tvalidation_0-rmse:0.416931\tvalidation_0-mape:0.303565\n", + "[35]\tvalidation_0-rmse:0.401729\tvalidation_0-mape:0.293732\n", + "[36]\tvalidation_0-rmse:0.387835\tvalidation_0-mape:0.284394\n", + "[37]\tvalidation_0-rmse:0.37451\tvalidation_0-mape:0.275516\n", + "[38]\tvalidation_0-rmse:0.362507\tvalidation_0-mape:0.267262\n", + "[39]\tvalidation_0-rmse:0.35083\tvalidation_0-mape:0.259271\n", + "[40]\tvalidation_0-rmse:0.339946\tvalidation_0-mape:0.251728\n", + "[41]\tvalidation_0-rmse:0.330117\tvalidation_0-mape:0.244688\n", + "[42]\tvalidation_0-rmse:0.321113\tvalidation_0-mape:0.238073\n", + "[43]\tvalidation_0-rmse:0.312602\tvalidation_0-mape:0.231857\n", + "[44]\tvalidation_0-rmse:0.304387\tvalidation_0-mape:0.225904\n", + "[45]\tvalidation_0-rmse:0.296781\tvalidation_0-mape:0.220307\n", + "[46]\tvalidation_0-rmse:0.290079\tvalidation_0-mape:0.21522\n", + "[47]\tvalidation_0-rmse:0.283964\tvalidation_0-mape:0.210527\n", + "[48]\tvalidation_0-rmse:0.278207\tvalidation_0-mape:0.206073\n", + "[49]\tvalidation_0-rmse:0.272557\tvalidation_0-mape:0.201743\n", + "[50]\tvalidation_0-rmse:0.267379\tvalidation_0-mape:0.197712\n", + "[51]\tvalidation_0-rmse:0.262584\tvalidation_0-mape:0.193932\n", + "[52]\tvalidation_0-rmse:0.258517\tvalidation_0-mape:0.190628\n", + "[53]\tvalidation_0-rmse:0.254437\tvalidation_0-mape:0.187326\n", + "[54]\tvalidation_0-rmse:0.250997\tvalidation_0-mape:0.184496\n", + "[55]\tvalidation_0-rmse:0.247837\tvalidation_0-mape:0.181864\n", + "[56]\tvalidation_0-rmse:0.244739\tvalidation_0-mape:0.179262\n", + "[57]\tvalidation_0-rmse:0.242088\tvalidation_0-mape:0.176975\n", + "[58]\tvalidation_0-rmse:0.239432\tvalidation_0-mape:0.174694\n", + "[59]\tvalidation_0-rmse:0.236956\tvalidation_0-mape:0.172544\n", + "[60]\tvalidation_0-rmse:0.23472\tvalidation_0-mape:0.170567\n", + "[61]\tvalidation_0-rmse:0.232673\tvalidation_0-mape:0.168749\n", + "[62]\tvalidation_0-rmse:0.230954\tvalidation_0-mape:0.167218\n", + "[63]\tvalidation_0-rmse:0.229382\tvalidation_0-mape:0.165806\n", + "[64]\tvalidation_0-rmse:0.227969\tvalidation_0-mape:0.164547\n", + "[65]\tvalidation_0-rmse:0.226601\tvalidation_0-mape:0.163268\n", + "[66]\tvalidation_0-rmse:0.22546\tvalidation_0-mape:0.162225\n", + "[67]\tvalidation_0-rmse:0.224374\tvalidation_0-mape:0.161249\n", + "[68]\tvalidation_0-rmse:0.223225\tvalidation_0-mape:0.160179\n", + "[69]\tvalidation_0-rmse:0.222167\tvalidation_0-mape:0.159196\n", + "[70]\tvalidation_0-rmse:0.221212\tvalidation_0-mape:0.158295\n", + "[71]\tvalidation_0-rmse:0.220377\tvalidation_0-mape:0.157483\n", + "[72]\tvalidation_0-rmse:0.219618\tvalidation_0-mape:0.156731\n", + "[73]\tvalidation_0-rmse:0.219029\tvalidation_0-mape:0.156166\n", + "[74]\tvalidation_0-rmse:0.218453\tvalidation_0-mape:0.155643\n", + "[75]\tvalidation_0-rmse:0.217805\tvalidation_0-mape:0.154995\n", + "[76]\tvalidation_0-rmse:0.217225\tvalidation_0-mape:0.154441\n", + "[77]\tvalidation_0-rmse:0.216778\tvalidation_0-mape:0.154038\n", + "[78]\tvalidation_0-rmse:0.21637\tvalidation_0-mape:0.153691\n", + "[79]\tvalidation_0-rmse:0.21592\tvalidation_0-mape:0.153251\n", + "[80]\tvalidation_0-rmse:0.215582\tvalidation_0-mape:0.152955\n", + "[81]\tvalidation_0-rmse:0.2153\tvalidation_0-mape:0.152717\n", + "[82]\tvalidation_0-rmse:0.215012\tvalidation_0-mape:0.152489\n", + "[83]\tvalidation_0-rmse:0.214672\tvalidation_0-mape:0.15214\n", + "[84]\tvalidation_0-rmse:0.214429\tvalidation_0-mape:0.151959\n", + "[85]\tvalidation_0-rmse:0.214134\tvalidation_0-mape:0.151676\n", + "[86]\tvalidation_0-rmse:0.213853\tvalidation_0-mape:0.151404\n", + "[87]\tvalidation_0-rmse:0.213573\tvalidation_0-mape:0.151144\n", + "[88]\tvalidation_0-rmse:0.213402\tvalidation_0-mape:0.151023\n", + "[89]\tvalidation_0-rmse:0.213262\tvalidation_0-mape:0.15091\n", + "[90]\tvalidation_0-rmse:0.21304\tvalidation_0-mape:0.150693\n", + "[91]\tvalidation_0-rmse:0.212849\tvalidation_0-mape:0.150517\n", + "[92]\tvalidation_0-rmse:0.212735\tvalidation_0-mape:0.150436\n", + "[93]\tvalidation_0-rmse:0.21258\tvalidation_0-mape:0.150284\n", + "[94]\tvalidation_0-rmse:0.212416\tvalidation_0-mape:0.150137\n", + "[95]\tvalidation_0-rmse:0.212256\tvalidation_0-mape:0.150004\n", + "[96]\tvalidation_0-rmse:0.212172\tvalidation_0-mape:0.149958\n", + "[97]\tvalidation_0-rmse:0.212046\tvalidation_0-mape:0.149827\n", + "[98]\tvalidation_0-rmse:0.211958\tvalidation_0-mape:0.149779\n", + "[99]\tvalidation_0-rmse:0.211834\tvalidation_0-mape:0.149654\n", + "XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n", + " colsample_bynode=1, colsample_bytree=0.6, gamma=0,\n", + " importance_type='gain', learning_rate=0.05, max_delta_step=0,\n", + " max_depth=7, min_child_weight=1, missing=None, n_estimators=100,\n", + " n_jobs=-1, nthread=None, objective='reg:linear', random_state=0,\n", + " reg_alpha=2, reg_lambda=1, scale_pos_weight=1, seed=None,\n", + " silent=None, subsample=0.6, verbosity=1)\n" + ] + } + ], "source": [ "xgboost_submit(df, submit_params)" ]