diff --git a/机器学习竞赛实战_优胜解决方案/智慧城市-道路通行时间预测/.ipynb_checkpoints/建模预测-checkpoint.ipynb b/机器学习竞赛实战_优胜解决方案/智慧城市-道路通行时间预测/.ipynb_checkpoints/建模预测-checkpoint.ipynb index dc38cc5..79189bf 100644 --- a/机器学习竞赛实战_优胜解决方案/智慧城市-道路通行时间预测/.ipynb_checkpoints/建模预测-checkpoint.ipynb +++ b/机器学习竞赛实战_优胜解决方案/智慧城市-道路通行时间预测/.ipynb_checkpoints/建模预测-checkpoint.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 17, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -17,6 +17,7 @@ "source": [ "import pandas as pd\n", "import numpy as np\n", + "import os\n", "from scipy.interpolate import UnivariateSpline\n", "from sklearn import linear_model\n", "import xgboost as xgb\n", @@ -315,7 +316,7 @@ ], "source": [ "# 读取处理好的特征数据\n", - "df = pd.read_csv('trainning.txt', delimiter=';',parse_dates=['time_interval_begin'],dtype={'link_ID':object})\n", + "df = pd.read_csv('data/trainning.txt', delimiter=';',parse_dates=['time_interval_begin'],dtype={'link_ID':object})\n", "df.head()" ] }, @@ -423,7 +424,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -450,9 +451,6 @@ " batch = np.array(bucket[time_series], dtype=float)\n", " y = batch[:,-1]\n", " batch = np.delete(batch, -1, axis=1)\n", - " print(\"====================================\")\n", - " print(batch.shape, last.shape, type(time_series))\n", - " print(\"====================================\")\n", " batch = np.concatenate((batch, last), axis=1)\n", " y_pre = regressor.predict(batch)\n", " last = np.delete(last, 0, axis=1)\n", @@ -640,396 +638,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "[16:02:58] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", + "[22:07:01] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", "99 0.231729 0.09787323564628972\n", - "[16:08:54] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", - "====================================\n", - "(9900, 23) (9900, 5) \n", - "====================================\n", + "[22:12:48] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", "99 0.211948 0.22588986922596394\n", - "[16:14:56] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", + "[22:18:32] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", "99 0.207832 0.269828138777363\n", - "[16:21:05] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", - "====================================\n", - "(9504, 23) (9504, 5) \n", - "====================================\n", + "[22:24:17] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", "99 0.205743 0.27878690843594917\n", - "[16:27:05] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", - "====================================\n", - "(9108, 23) (9108, 5) \n", - "====================================\n", + "[22:29:46] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", "99 0.206546 0.2825731100341743\n", "{'colsample_bytree': 0.6, 'gamma': 0, 'learning_rate': 0.05, 'max_depth': 7, 'min_child_weight': 1, 'n_estimators': '[99, 99, 99, 99, 99]', 'reg_alpha': 2, 'subsample': 0.6, 'loss_std': 0.06956988861011186, 'loss': '[0.09787323564628972, 0.22588986922596394, 0.269828138777363, 0.27878690843594917, 0.2825731100341743]', 'mean_loss': 0.23099025242394805, 'best_score': '[0.231729, 0.211948, 0.207832, 0.205743, 0.206546]'}\n", "best with:{'colsample_bytree': 0.6, 'gamma': 0, 'learning_rate': 0.05, 'max_depth': 7, 'min_child_weight': 1, 'n_estimators': '[99, 99, 99, 99, 99]', 'reg_alpha': 2, 'subsample': 0.6, 'loss_std': 0.06956988861011186, 'loss': '[0.09787323564628972, 0.22588986922596394, 0.269828138777363, 0.27878690843594917, 0.2825731100341743]', 'mean_loss': 0.23099025242394805, 'best_score': '[0.231729, 0.211948, 0.207832, 0.205743, 0.206546]'}\n" @@ -1087,8 +704,16 @@ " reg_alpha=params['reg_alpha'])\n", " regressor.fit(X_train,y_train,verbose=True,early_stopping_rounds=10,\n", " eval_metric=mape_ln,eval_set=eval_set)\n", + " try:\n", + " os.mkdir(\"model/\") # 尝试创建相对目录,有则跳过\n", + " except:\n", + " pass\n", " joblib.dump(regressor, 'model/xgbr.pkl')\n", " print(regressor)\n", + " try:\n", + " os.mkdir(\"submission/\") # 尝试创建相对目录,有则跳过\n", + " except:\n", + " pass\n", " submission(train_feature, regressor,df, \n", " 'submission/xgbrl.txt','submission/xgbr2.txt',\n", " 'submission/xgbr3.txt','submission/xgbr4.txt')" @@ -1096,14 +721,14 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[17:47:53] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", + "[22:35:34] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", "[0]\tvalidation_0-rmse:2.02747\tvalidation_0-mape:0.867894\n", "Multiple eval metrics have been passed: 'validation_0-mape' will be used for early stopping.\n", "\n",