Update 建模预测.ipynb

pull/2/head
benjas 5 years ago
parent 31d60c852e
commit aae280bff8

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 1,
"metadata": {},
"outputs": [
{
@ -17,6 +17,7 @@
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"from scipy.interpolate import UnivariateSpline\n",
"from sklearn import linear_model\n",
"import xgboost as xgb\n",
@ -315,7 +316,7 @@
],
"source": [
"# 读取处理好的特征数据\n",
"df = pd.read_csv('trainning.txt', delimiter=';',parse_dates=['time_interval_begin'],dtype={'link_ID':object})\n",
"df = pd.read_csv('data/trainning.txt', delimiter=';',parse_dates=['time_interval_begin'],dtype={'link_ID':object})\n",
"df.head()"
]
},
@ -423,7 +424,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
@ -450,9 +451,6 @@
" batch = np.array(bucket[time_series], dtype=float)\n",
" y = batch[:,-1]\n",
" batch = np.delete(batch, -1, axis=1)\n",
" print(\"====================================\")\n",
" print(batch.shape, last.shape, type(time_series))\n",
" print(\"====================================\")\n",
" batch = np.concatenate((batch, last), axis=1)\n",
" y_pre = regressor.predict(batch)\n",
" last = np.delete(last, 0, axis=1)\n",
@ -640,396 +638,15 @@
"name": "stdout",
"output_type": "stream",
"text": [
"[16:02:58] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"[22:07:01] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
"99 0.231729 0.09787323564628972\n",
"[16:08:54] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9900, 23) (9900, 5) <class 'float'>\n",
"====================================\n",
"[22:12:48] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
"99 0.211948 0.22588986922596394\n",
"[16:14:56] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"[22:18:32] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
"99 0.207832 0.269828138777363\n",
"[16:21:05] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9504, 23) (9504, 5) <class 'float'>\n",
"====================================\n",
"[22:24:17] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
"99 0.205743 0.27878690843594917\n",
"[16:27:05] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"====================================\n",
"(9108, 23) (9108, 5) <class 'float'>\n",
"====================================\n",
"[22:29:46] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
"99 0.206546 0.2825731100341743\n",
"{'colsample_bytree': 0.6, 'gamma': 0, 'learning_rate': 0.05, 'max_depth': 7, 'min_child_weight': 1, 'n_estimators': '[99, 99, 99, 99, 99]', 'reg_alpha': 2, 'subsample': 0.6, 'loss_std': 0.06956988861011186, 'loss': '[0.09787323564628972, 0.22588986922596394, 0.269828138777363, 0.27878690843594917, 0.2825731100341743]', 'mean_loss': 0.23099025242394805, 'best_score': '[0.231729, 0.211948, 0.207832, 0.205743, 0.206546]'}\n",
"best with:{'colsample_bytree': 0.6, 'gamma': 0, 'learning_rate': 0.05, 'max_depth': 7, 'min_child_weight': 1, 'n_estimators': '[99, 99, 99, 99, 99]', 'reg_alpha': 2, 'subsample': 0.6, 'loss_std': 0.06956988861011186, 'loss': '[0.09787323564628972, 0.22588986922596394, 0.269828138777363, 0.27878690843594917, 0.2825731100341743]', 'mean_loss': 0.23099025242394805, 'best_score': '[0.231729, 0.211948, 0.207832, 0.205743, 0.206546]'}\n"
@ -1087,8 +704,16 @@
" reg_alpha=params['reg_alpha'])\n",
" regressor.fit(X_train,y_train,verbose=True,early_stopping_rounds=10,\n",
" eval_metric=mape_ln,eval_set=eval_set)\n",
" try:\n",
" os.mkdir(\"model/\") # 尝试创建相对目录,有则跳过\n",
" except:\n",
" pass\n",
" joblib.dump(regressor, 'model/xgbr.pkl')\n",
" print(regressor)\n",
" try:\n",
" os.mkdir(\"submission/\") # 尝试创建相对目录,有则跳过\n",
" except:\n",
" pass\n",
" submission(train_feature, regressor,df, \n",
" 'submission/xgbrl.txt','submission/xgbr2.txt',\n",
" 'submission/xgbr3.txt','submission/xgbr4.txt')"
@ -1096,14 +721,14 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[17:47:53] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
"[22:35:34] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
"[0]\tvalidation_0-rmse:2.02747\tvalidation_0-mape:0.867894\n",
"Multiple eval metrics have been passed: 'validation_0-mape' will be used for early stopping.\n",
"\n",

Loading…
Cancel
Save