@ -6,12 +6,12 @@
"metadata": {},
"source": [
"# 自动机器学习工具\n",
"该notebook将比较市面上的多个AutoML工具, 分别采用两组数据集进行比较, 分别是波士顿房价 (回归)和森林植被类型(多分类)"
"该notebook将比较市面上的多个AutoAI工具, 分别采用两组数据集进行比较, 分别是加利福尼亚住房数据集 (回归)和森林植被类型(多分类)"
]
},
{
"cell_type": "markdown",
"id": "ba41b787 ",
"id": "b3d4d608 ",
"metadata": {},
"source": [
"# optuna一种超参数优化框架\n",
@ -20,7 +20,7 @@
},
{
"cell_type": "markdown",
"id": "c4d7f73 a",
"id": "a2d41e62 ",
"metadata": {},
"source": [
"### 波士顿房价预测任务(回归)"
@ -28,97 +28,631 @@
},
{
"cell_type": "code",
"execution_count": null ,
"id": "1d010375 ",
"execution_count": 7 ,
"id": "f5e0e977 ",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import time\n",
"import gc"
"import gc\n",
"\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import mean_squared_error\n",
"import lightgbm as lgb # 使用lgb模型"
]
},
{
"cell_type": "code",
"execution_count": 1 ,
"id": "6d01294d ",
"execution_count": 2 ,
"id": "5eda2637 ",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import load_boston\n",
"# 预处理\n",
"X, y = data['data'], data['target']\n",
"# 由于模型标签需要从0开始, 所以数字需要全部减1\n",
"print('七分类任务,处理前:',np.unique(y))\n",
"print(y)\n",
"ord = OrdinalEncoder()\n",
"y = ord.fit_transform(y.reshape(-1, 1))\n",
"y = y.reshape(-1, )\n",
"print('七分类任务,处理后:',np.unique(y))\n",
"print(y)"
"from sklearn.datasets import fetch_california_housing\n",
"data = fetch_california_housing()\n",
"X, y = data['data'], data['target']"
]
},
{
"cell_type": "code",
"execution_count": 2 ,
"id": "80a90475 ",
"execution_count": 3,
"id": "722beb8e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>MedInc</th>\n",
" <th>HouseAge</th>\n",
" <th>AveRooms</th>\n",
" <th>AveBedrms</th>\n",
" <th>Population</th>\n",
" <th>AveOccup</th>\n",
" <th>Latitude</th>\n",
" <th>Longitude</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>8.3252</td>\n",
" <td>41.0</td>\n",
" <td>6.984127</td>\n",
" <td>1.02381</td>\n",
" <td>322.0</td>\n",
" <td>2.555556</td>\n",
" <td>37.88</td>\n",
" <td>-122.23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>8.3014</td>\n",
" <td>21.0</td>\n",
" <td>6.238137</td>\n",
" <td>0.97188</td>\n",
" <td>2401.0</td>\n",
" <td>2.109842</td>\n",
" <td>37.86</td>\n",
" <td>-122.22</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,\n",
" 4.9800e+00],\n",
" [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,\n",
" 9.1400e+00],\n",
" [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,\n",
" 4.0300e+00],\n",
" ...,\n",
" [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,\n",
" 5.6400e+00],\n",
" [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,\n",
" 6.4800e+00],\n",
" [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,\n",
" 7.8800e+00]])"
" MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude \\\n",
"0 8.3252 41.0 6.984127 1.02381 322.0 2.555556 37.88 \n",
"1 8.3014 21.0 6.238137 0.97188 2401.0 2.109842 37.86 \n",
"\n",
" Longitude \n",
"0 -122.23 \n",
"1 -122.22 "
]
},
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.data"
"X = pd.DataFrame(X,columns=data.feature_names)\n",
"X.head(2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9977a37c",
"execution_count": 4 ,
"id": "08ebab89 ",
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 20640 entries, 0 to 20639\n",
"Data columns (total 8 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 MedInc 20640 non-null float64\n",
" 1 HouseAge 20640 non-null float64\n",
" 2 AveRooms 20640 non-null float64\n",
" 3 AveBedrms 20640 non-null float64\n",
" 4 Population 20640 non-null float64\n",
" 5 AveOccup 20640 non-null float64\n",
" 6 Latitude 20640 non-null float64\n",
" 7 Longitude 20640 non-null float64\n",
"dtypes: float64(8)\n",
"memory usage: 1.3 MB\n"
]
}
],
"source": [
"X.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e9193c33",
"execution_count": 5,
"id": "8932c66c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"训练集: (13932, 8)\n",
"验证集: (4644, 8)\n",
"测试集: (2064, 8)\n"
]
}
],
"source": [
"# 切分训练和测试集\n",
"train_valid_x, test_x, train_valid_y, test_y = train_test_split(X, y,random_state=42,test_size=0.1)\n",
"\n",
"# 切分训练和验证集\n",
"train_x, valid_x, train_y, valid_y = train_test_split(train_valid_x, train_valid_y,random_state=42)\n",
"print('训练集:',train_x.shape)\n",
"print('验证集:',valid_x.shape)\n",
"print('测试集:', test_x.shape)"
]
},
{
"cell_type": "markdown",
"id": "e87a2121",
"metadata": {},
"source": [
"### 使用LGB作为模型, 不使用optuna调参"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "5cad8967",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001822 seconds.\n",
"You can set `force_col_wise=true` to remove the overhead.\n",
"[LightGBM] [Info] Total Bins 1837\n",
"[LightGBM] [Info] Number of data points in the train set: 13932, number of used features: 8\n",
"[LightGBM] [Info] Start training from score 2.072422\n",
"Training until validation scores don't improve for 20 rounds\n",
"[50]\tvalid_0's rmse: 0.492877\n",
"[100]\tvalid_0's rmse: 0.471803\n",
"[150]\tvalid_0's rmse: 0.46445\n",
"[200]\tvalid_0's rmse: 0.459716\n",
"[250]\tvalid_0's rmse: 0.456658\n",
"[300]\tvalid_0's rmse: 0.454223\n",
"[350]\tvalid_0's rmse: 0.452644\n",
"Early stopping, best iteration is:\n",
"[342]\tvalid_0's rmse: 0.452522\n"
]
}
],
"source": [
"params = {'boosting_type': 'gbdt',\n",
" 'objective': 'regression',\n",
" \"metric\": 'rmse'}\n",
"dtrain = lgb.Dataset(train_x, label=train_y)\n",
"dvalid = lgb.Dataset(valid_x, label=valid_y)\n",
"model = lgb.train(params=params, train_set=dtrain,valid_sets=[dvalid],\n",
" verbose_eval=50,\n",
" early_stopping_rounds=20,\n",
" num_boost_round=5000)\n",
"predict = model.predict(test_x)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "886bfdad",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.4531666044672748"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 评估指标rmse, 越小越好\n",
"np.sqrt(mean_squared_error(test_y,predict))"
]
},
{
"cell_type": "markdown",
"id": "c271442c",
"metadata": {},
"source": [
"### 使用LGB作为模型, 使用optuna调参"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "9a87f9db",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple\n",
"Collecting optuna\n",
" Downloading https://pypi.tuna.tsinghua.edu.cn/packages/2a/b4/061c8721b5da1614794d1b66fcb212eee156efd5284f66854d02f295b0be/optuna-2.9.1-py3-none-any.whl (302 kB)\n",
"Requirement already satisfied: tqdm in d:\\programdata\\anaconda3\\lib\\site-packages (from optuna) (4.59.0)\n",
"Collecting cmaes>=0.8.2\n",
" Downloading https://pypi.tuna.tsinghua.edu.cn/packages/01/1f/43b01223a0366171f474320c6e966c39a11587287f098a5f09809b45e05f/cmaes-0.8.2-py3-none-any.whl (15 kB)\n",
"Requirement already satisfied: scipy!=1.4.0 in d:\\programdata\\anaconda3\\lib\\site-packages (from optuna) (1.4.1)\n",
"Collecting cliff\n",
" Downloading https://pypi.tuna.tsinghua.edu.cn/packages/18/f7/2a98b032a43b2925ea32bc13a8feb6cf9416e7d2b2c0f6d2ce14636a03b1/cliff-3.9.0-py3-none-any.whl (80 kB)\n",
"Collecting alembic\n",
" Downloading https://pypi.tuna.tsinghua.edu.cn/packages/7a/5a/be479a2c379e6b3c57dc56ea3b139ad4d46c2d244a0035ac4d7475116076/alembic-1.7.1-py3-none-any.whl (208 kB)\n",
"Requirement already satisfied: packaging>=20.0 in d:\\programdata\\anaconda3\\lib\\site-packages (from optuna) (20.9)\n",
"Requirement already satisfied: numpy in d:\\programdata\\anaconda3\\lib\\site-packages (from optuna) (1.18.5)\n",
"Collecting colorlog\n",
" Downloading https://pypi.tuna.tsinghua.edu.cn/packages/2d/93/4b0bb101e54206e92feb3c986c274902212b2ed8c55423e6e7f6d8b693ca/colorlog-6.4.1-py2.py3-none-any.whl (11 kB)\n",
"Requirement already satisfied: PyYAML in d:\\programdata\\anaconda3\\lib\\site-packages (from optuna) (5.4.1)\n",
"Requirement already satisfied: sqlalchemy>=1.1.0 in d:\\programdata\\anaconda3\\lib\\site-packages (from optuna) (1.4.7)\n",
"Requirement already satisfied: pyparsing>=2.0.2 in d:\\programdata\\anaconda3\\lib\\site-packages (from packaging>=20.0->optuna) (2.4.7)\n",
"Requirement already satisfied: greenlet!=0.4.17 in d:\\programdata\\anaconda3\\lib\\site-packages (from sqlalchemy>=1.1.0->optuna) (1.0.0)\n",
"Collecting importlib-resources\n",
" Downloading https://pypi.tuna.tsinghua.edu.cn/packages/f2/6c/2f3b930513bb971172ffceb63cf4e910944e57451724e69b1dec97cfefa6/importlib_resources-5.2.2-py3-none-any.whl (27 kB)\n",
"Collecting Mako\n",
" Downloading https://pypi.tuna.tsinghua.edu.cn/packages/75/69/c3ab0db9234fa5681a85a1c55203763a62902d56ad76b6d9b9bfa2c83694/Mako-1.1.5-py2.py3-none-any.whl (75 kB)\n",
"Collecting PrettyTable>=0.7.2\n",
" Downloading https://pypi.tuna.tsinghua.edu.cn/packages/bd/b5/c09f8d237e060a9e7b5d2d1577c2a6bc49fa298a7b4aefd52146f2b9a62e/prettytable-2.2.0-py3-none-any.whl (23 kB)\n",
"Collecting pbr!=2.1.0,>=2.0.0\n",
" Downloading https://pypi.tuna.tsinghua.edu.cn/packages/18/e0/1d4702dd81121d04a477c272d47ee5b6bc970d1a0990b11befa275c55cf2/pbr-5.6.0-py2.py3-none-any.whl (111 kB)\n",
"Collecting cmd2>=1.0.0\n",
" Downloading https://pypi.tuna.tsinghua.edu.cn/packages/44/ca/d407811641ec1d8bd8a38ee3165d73aa44776d7700436bd4d4a6606f2736/cmd2-2.1.2-py3-none-any.whl (141 kB)\n",
"Collecting stevedore>=2.0.1\n",
" Downloading https://pypi.tuna.tsinghua.edu.cn/packages/b2/c5/036a9a6e220ea7406a36130e80cca33a3e6b98b5328cfdba4b46b2ed0786/stevedore-3.4.0-py3-none-any.whl (49 kB)\n",
"Collecting autopage>=0.4.0\n",
" Downloading https://pypi.tuna.tsinghua.edu.cn/packages/18/a7/901e943318925f8ca3f7963616660065b3cf4e143b0327f88076ba5c4e22/autopage-0.4.0-py3-none-any.whl (20 kB)\n",
"Requirement already satisfied: colorama>=0.3.7 in d:\\programdata\\anaconda3\\lib\\site-packages (from cmd2>=1.0.0->cliff->optuna) (0.4.4)\n",
"Collecting pyreadline3\n",
" Downloading https://pypi.tuna.tsinghua.edu.cn/packages/89/29/10fbb29d957dbcee77a0832eabb9953da80d6bb9514f7ca1b3d82f50219f/pyreadline3-3.3-py3-none-any.whl (95 kB)\n",
"Requirement already satisfied: attrs>=16.3.0 in d:\\programdata\\anaconda3\\lib\\site-packages (from cmd2>=1.0.0->cliff->optuna) (20.3.0)\n",
"Collecting pyperclip>=1.6\n",
" Downloading https://pypi.tuna.tsinghua.edu.cn/packages/a7/2c/4c64579f847bd5d539803c8b909e54ba087a79d01bb3aba433a95879a6c5/pyperclip-1.8.2.tar.gz (20 kB)\n",
"Requirement already satisfied: wcwidth>=0.1.7 in d:\\programdata\\anaconda3\\lib\\site-packages (from cmd2>=1.0.0->cliff->optuna) (0.2.5)\n",
"Requirement already satisfied: zipp>=3.1.0 in d:\\programdata\\anaconda3\\lib\\site-packages (from importlib-resources->alembic->optuna) (3.4.1)\n",
"Requirement already satisfied: MarkupSafe>=0.9.2 in d:\\programdata\\anaconda3\\lib\\site-packages (from Mako->alembic->optuna) (1.1.1)\n",
"Building wheels for collected packages: pyperclip\n",
" Building wheel for pyperclip (setup.py): started\n",
" Building wheel for pyperclip (setup.py): finished with status 'done'\n",
" Created wheel for pyperclip: filename=pyperclip-1.8.2-py3-none-any.whl size=11107 sha256=96b5a96e64d8d6ae264ae796623c0148d1ca71677462878fc1f07c74e1e794ff\n",
" Stored in directory: c:\\users\\administrator\\appdata\\local\\pip\\cache\\wheels\\30\\c0\\21\\bc13df81c8b032076577671a8ef05db4e168a335e07e64d9a7\n",
"Successfully built pyperclip\n",
"Installing collected packages: pyreadline3, pyperclip, pbr, stevedore, PrettyTable, Mako, importlib-resources, cmd2, autopage, colorlog, cmaes, cliff, alembic, optuna\n",
"Successfully installed Mako-1.1.5 PrettyTable-2.2.0 alembic-1.7.1 autopage-0.4.0 cliff-3.9.0 cmaes-0.8.2 cmd2-2.1.2 colorlog-6.4.1 importlib-resources-5.2.2 optuna-2.9.1 pbr-5.6.0 pyperclip-1.8.2 pyreadline3-3.3 stevedore-3.4.0\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install optuna"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "d8a0279a",
"metadata": {},
"outputs": [],
"source": [
"### 分类"
"import optuna "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "82687da3",
"execution_count": 18 ,
"id": "7433fdcb ",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import fetch_covtype\n",
"data = fetch_covtype()"
"def objective(trial,train_x, valid_x, train_y, valid_y):\n",
" dtrain = lgb.Dataset(train_x, label=train_y)\n",
" dvalid = lgb.Dataset(valid_x, label=valid_y)\n",
"\n",
" param = {\n",
" \"objective\": \"regression\",\n",
" \"metric\": \"rmse\",\n",
" \"verbosity\": -1,\n",
" \"boosting_type\": \"gbdt\",\n",
" 'random_state':42,\n",
" \"lambda_l1\": trial.suggest_float(\"lambda_l1\", 1e-8, 10.0, log=True),\n",
" \"lambda_l2\": trial.suggest_float(\"lambda_l2\", 1e-8, 10.0, log=True),\n",
" \"num_leaves\": trial.suggest_int(\"num_leaves\", 2, 256),\n",
" \"feature_fraction\": trial.suggest_float(\"feature_fraction\", 0.4, 1.0),\n",
" \"bagging_fraction\": trial.suggest_float(\"bagging_fraction\", 0.4, 1.0),\n",
" \"bagging_freq\": trial.suggest_int(\"bagging_freq\", 1, 7),\n",
" \"min_child_samples\": trial.suggest_int(\"min_child_samples\", 5, 100),\n",
" }\n",
"\n",
" # Add a callback for pruning.\n",
" pruning_callback = optuna.integration.LightGBMPruningCallback(trial, \"rmse\")\n",
" gbm = lgb.train(\n",
" param, dtrain, valid_sets=[dvalid], verbose_eval=False, callbacks=[pruning_callback]\n",
" )\n",
"\n",
" preds = gbm.predict(valid_x)\n",
" pred_labels = np.rint(preds)\n",
" rmse = np.sqrt(mean_squared_error(valid_y,pred_labels))\n",
" return rmse"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "b9018adb",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m[I 2021-09-07 09:26:30,952]\u001b[0m A new study created in memory with name: no-name-f2147511-069d-495f-90ec-5990dc3c3716\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:31,284]\u001b[0m Trial 0 finished with value: 0.5579165649228477 and parameters: {'lambda_l1': 1.6814939560853405e-06, 'lambda_l2': 8.772634980486007, 'num_leaves': 79, 'feature_fraction': 0.4415746779386105, 'bagging_fraction': 0.5306839081914155, 'bagging_freq': 5, 'min_child_samples': 13}. Best is trial 0 with value: 0.5579165649228477.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:31,523]\u001b[0m Trial 1 finished with value: 0.5577111827415846 and parameters: {'lambda_l1': 0.00013684423280171766, 'lambda_l2': 0.010116712675880523, 'num_leaves': 42, 'feature_fraction': 0.4758659908396936, 'bagging_fraction': 0.8916447942940564, 'bagging_freq': 7, 'min_child_samples': 32}. Best is trial 1 with value: 0.5577111827415846.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:31,986]\u001b[0m Trial 2 finished with value: 0.5382832732103667 and parameters: {'lambda_l1': 1.849141022226266, 'lambda_l2': 9.297088403542385e-06, 'num_leaves': 94, 'feature_fraction': 0.6894755218120849, 'bagging_fraction': 0.9503182129837274, 'bagging_freq': 2, 'min_child_samples': 98}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:32,156]\u001b[0m Trial 3 finished with value: 0.5786765226592324 and parameters: {'lambda_l1': 4.448151837364203e-05, 'lambda_l2': 1.8955982808109254e-08, 'num_leaves': 19, 'feature_fraction': 0.493099451522876, 'bagging_fraction': 0.6742062281671596, 'bagging_freq': 5, 'min_child_samples': 35}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:32,266]\u001b[0m Trial 4 finished with value: 0.6139367553603606 and parameters: {'lambda_l1': 0.0002791229907486381, 'lambda_l2': 5.269713608638115, 'num_leaves': 5, 'feature_fraction': 0.8303631732661438, 'bagging_fraction': 0.942024874372832, 'bagging_freq': 4, 'min_child_samples': 61}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:32,826]\u001b[0m Trial 5 finished with value: 0.5453617455111696 and parameters: {'lambda_l1': 0.06860061560766911, 'lambda_l2': 0.2723830502667369, 'num_leaves': 98, 'feature_fraction': 0.7105587363977491, 'bagging_fraction': 0.5899933873219961, 'bagging_freq': 4, 'min_child_samples': 71}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:32,880]\u001b[0m Trial 6 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:33,189]\u001b[0m Trial 7 finished with value: 0.5588682864793495 and parameters: {'lambda_l1': 0.4689515611654304, 'lambda_l2': 1.5494838308859912e-07, 'num_leaves': 65, 'feature_fraction': 0.5583496504073044, 'bagging_fraction': 0.6569918432147945, 'bagging_freq': 5, 'min_child_samples': 58}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:33,462]\u001b[0m Trial 8 finished with value: 0.5447098867148504 and parameters: {'lambda_l1': 0.0010101561223389774, 'lambda_l2': 0.01515390989225731, 'num_leaves': 47, 'feature_fraction': 0.6245633996226619, 'bagging_fraction': 0.419624870667929, 'bagging_freq': 3, 'min_child_samples': 48}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:33,693]\u001b[0m Trial 9 pruned. Trial was pruned at iteration 80.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:34,181]\u001b[0m Trial 10 finished with value: 0.541110883425217 and parameters: {'lambda_l1': 1.4756346287368392e-08, 'lambda_l2': 1.3544296321997672e-05, 'num_leaves': 189, 'feature_fraction': 0.9751158697609862, 'bagging_fraction': 0.8348333900598492, 'bagging_freq': 1, 'min_child_samples': 100}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:34,761]\u001b[0m Trial 11 finished with value: 0.5430244062824532 and parameters: {'lambda_l1': 1.2282790653635484e-08, 'lambda_l2': 8.373619608433318e-06, 'num_leaves': 196, 'feature_fraction': 0.9236444858767681, 'bagging_fraction': 0.8122951589315216, 'bagging_freq': 1, 'min_child_samples': 100}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:35,237]\u001b[0m Trial 12 finished with value: 0.5479941202014348 and parameters: {'lambda_l1': 8.66531632367352, 'lambda_l2': 2.7356786615324263e-05, 'num_leaves': 182, 'feature_fraction': 0.7257944800491923, 'bagging_fraction': 0.8151059105693412, 'bagging_freq': 1, 'min_child_samples': 98}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:36,132]\u001b[0m Trial 13 finished with value: 0.5426396090105863 and parameters: {'lambda_l1': 1.5311804262482413e-08, 'lambda_l2': 8.31070641463058e-06, 'num_leaves': 241, 'feature_fraction': 0.9996713520233095, 'bagging_fraction': 0.9659578077631608, 'bagging_freq': 2, 'min_child_samples': 84}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:36,961]\u001b[0m Trial 14 finished with value: 0.5367665957770634 and parameters: {'lambda_l1': 0.014095420115571123, 'lambda_l2': 0.0002563644914171101, 'num_leaves': 149, 'feature_fraction': 0.7883099949736571, 'bagging_fraction': 0.8086023984951658, 'bagging_freq': 2, 'min_child_samples': 85}. Best is trial 14 with value: 0.5367665957770634.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:37,824]\u001b[0m Trial 15 finished with value: 0.5348886658006932 and parameters: {'lambda_l1': 0.020071941122612545, 'lambda_l2': 0.0003014852059198554, 'num_leaves': 146, 'feature_fraction': 0.7936923799556383, 'bagging_fraction': 0.756430674635471, 'bagging_freq': 3, 'min_child_samples': 83}. Best is trial 15 with value: 0.5348886658006932.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:38,622]\u001b[0m Trial 16 finished with value: 0.5372333761766732 and parameters: {'lambda_l1': 0.015173696268463625, 'lambda_l2': 0.0016757129971514612, 'num_leaves': 138, 'feature_fraction': 0.8107104336197675, 'bagging_fraction': 0.7376130779961089, 'bagging_freq': 3, 'min_child_samples': 78}. Best is trial 15 with value: 0.5348886658006932.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:39,451]\u001b[0m Trial 17 finished with value: 0.5376252190781058 and parameters: {'lambda_l1': 0.007357005023837469, 'lambda_l2': 0.000250874148036676, 'num_leaves': 147, 'feature_fraction': 0.7829205508785075, 'bagging_fraction': 0.7478270069878126, 'bagging_freq': 3, 'min_child_samples': 67}. Best is trial 15 with value: 0.5348886658006932.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:39,567]\u001b[0m Trial 18 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:40,910]\u001b[0m Trial 19 finished with value: 0.5315793527934071 and parameters: {'lambda_l1': 0.013209811084277438, 'lambda_l2': 0.06726287443306718, 'num_leaves': 231, 'feature_fraction': 0.637876115312563, 'bagging_fraction': 0.8716817438233221, 'bagging_freq': 3, 'min_child_samples': 44}. Best is trial 19 with value: 0.5315793527934071.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:42,324]\u001b[0m Trial 20 finished with value: 0.5262550702976 and parameters: {'lambda_l1': 0.003113216663103784, 'lambda_l2': 0.24783337352027449, 'num_leaves': 242, 'feature_fraction': 0.6137607392321572, 'bagging_fraction': 0.8936473883645825, 'bagging_freq': 3, 'min_child_samples': 44}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:43,182]\u001b[0m Trial 21 finished with value: 0.5297204439253479 and parameters: {'lambda_l1': 0.003978575421253158, 'lambda_l2': 0.11658574784670588, 'num_leaves': 254, 'feature_fraction': 0.6168955771891016, 'bagging_fraction': 0.8787433941466242, 'bagging_freq': 3, 'min_child_samples': 43}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:43,310]\u001b[0m Trial 22 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:44,194]\u001b[0m Trial 23 finished with value: 0.5288803542960898 and parameters: {'lambda_l1': 8.341315535148706e-06, 'lambda_l2': 0.24030532904199514, 'num_leaves': 221, 'feature_fraction': 0.5901567080654435, 'bagging_fraction': 0.8875920940685119, 'bagging_freq': 3, 'min_child_samples': 25}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:44,331]\u001b[0m Trial 24 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:45,248]\u001b[0m Trial 25 finished with value: 0.5310533243420329 and parameters: {'lambda_l1': 1.9500743317527624e-07, 'lambda_l2': 0.0502751774074574, 'num_leaves': 214, 'feature_fraction': 0.5734739546088348, 'bagging_fraction': 0.9115976551406307, 'bagging_freq': 2, 'min_child_samples': 24}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m[I 2021-09-07 09:26:46,126]\u001b[0m Trial 26 finished with value: 0.5310399189972692 and parameters: {'lambda_l1': 5.65218762871428e-06, 'lambda_l2': 1.6944446205708956, 'num_leaves': 256, 'feature_fraction': 0.6693624417062835, 'bagging_fraction': 0.8606541857435407, 'bagging_freq': 4, 'min_child_samples': 25}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:47,541]\u001b[0m Trial 27 finished with value: 0.5289141422774571 and parameters: {'lambda_l1': 3.372780137131952e-07, 'lambda_l2': 0.004231573716537988, 'num_leaves': 222, 'feature_fraction': 0.5834642111209242, 'bagging_fraction': 0.9283224808721449, 'bagging_freq': 3, 'min_child_samples': 33}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:47,734]\u001b[0m Trial 28 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:47,883]\u001b[0m Trial 29 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:48,032]\u001b[0m Trial 30 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:48,233]\u001b[0m Trial 31 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:48,470]\u001b[0m Trial 32 pruned. Trial was pruned at iteration 12.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:50,256]\u001b[0m Trial 33 finished with value: 0.5307511312800164 and parameters: {'lambda_l1': 8.331346294111473e-08, 'lambda_l2': 0.018173663934055492, 'num_leaves': 238, 'feature_fraction': 0.5874989674601676, 'bagging_fraction': 0.8876263960076438, 'bagging_freq': 2, 'min_child_samples': 30}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:52,081]\u001b[0m Trial 34 finished with value: 0.5364692667074208 and parameters: {'lambda_l1': 8.939808862490256e-07, 'lambda_l2': 1.3094120587455667, 'num_leaves': 249, 'feature_fraction': 0.739630943171398, 'bagging_fraction': 0.9999691110926188, 'bagging_freq': 3, 'min_child_samples': 17}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:52,228]\u001b[0m Trial 35 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:52,401]\u001b[0m Trial 36 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:52,527]\u001b[0m Trial 37 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:52,646]\u001b[0m Trial 38 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:52,746]\u001b[0m Trial 39 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:53,973]\u001b[0m Trial 40 finished with value: 0.5256364275490798 and parameters: {'lambda_l1': 0.0710216045724967, 'lambda_l2': 0.10020172057765622, 'num_leaves': 206, 'feature_fraction': 0.6441352660567405, 'bagging_fraction': 0.8538258125595672, 'bagging_freq': 5, 'min_child_samples': 8}. Best is trial 40 with value: 0.5256364275490798.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:54,896]\u001b[0m Trial 41 finished with value: 0.5233364787469251 and parameters: {'lambda_l1': 0.23671365201549266, 'lambda_l2': 0.0749439220683321, 'num_leaves': 210, 'feature_fraction': 0.6436961501856725, 'bagging_fraction': 0.8521968194803821, 'bagging_freq': 5, 'min_child_samples': 5}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:55,831]\u001b[0m Trial 42 finished with value: 0.5293150200028972 and parameters: {'lambda_l1': 0.6574943326302751, 'lambda_l2': 0.0361443234291568, 'num_leaves': 209, 'feature_fraction': 0.6596272574104582, 'bagging_fraction': 0.850673456207462, 'bagging_freq': 6, 'min_child_samples': 5}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:56,858]\u001b[0m Trial 43 finished with value: 0.5308606295175099 and parameters: {'lambda_l1': 0.07618036054920922, 'lambda_l2': 0.22544030283002933, 'num_leaves': 226, 'feature_fraction': 0.6984185669610476, 'bagging_fraction': 0.7845150865412134, 'bagging_freq': 5, 'min_child_samples': 10}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:57,019]\u001b[0m Trial 44 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:57,787]\u001b[0m Trial 45 finished with value: 0.5331475267867624 and parameters: {'lambda_l1': 1.4967349278475324, 'lambda_l2': 0.0008951722845905283, 'num_leaves': 207, 'feature_fraction': 0.7339992851750987, 'bagging_fraction': 0.919959739142468, 'bagging_freq': 5, 'min_child_samples': 26}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:57,865]\u001b[0m Trial 46 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:57,971]\u001b[0m Trial 47 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:58,069]\u001b[0m Trial 48 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:58,168]\u001b[0m Trial 49 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:58,317]\u001b[0m Trial 50 pruned. Trial was pruned at iteration 30.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:58,445]\u001b[0m Trial 51 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:26:59,558]\u001b[0m Trial 52 finished with value: 0.5287423259473492 and parameters: {'lambda_l1': 0.44209025556975773, 'lambda_l2': 0.02237337201530471, 'num_leaves': 180, 'feature_fraction': 0.6816257587535688, 'bagging_fraction': 0.8645245816570197, 'bagging_freq': 7, 'min_child_samples': 5}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:00,776]\u001b[0m Trial 53 finished with value: 0.5248984867652216 and parameters: {'lambda_l1': 0.02970872423104034, 'lambda_l2': 0.0197319338622143, 'num_leaves': 177, 'feature_fraction': 0.7150825328146555, 'bagging_fraction': 0.8972252525350597, 'bagging_freq': 7, 'min_child_samples': 12}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:01,886]\u001b[0m Trial 54 finished with value: 0.5310139507545261 and parameters: {'lambda_l1': 0.03922536031082296, 'lambda_l2': 1.7618527190204045e-06, 'num_leaves': 180, 'feature_fraction': 0.7089331672809696, 'bagging_fraction': 0.8021837464466384, 'bagging_freq': 7, 'min_child_samples': 11}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:02,043]\u001b[0m Trial 55 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:02,158]\u001b[0m Trial 56 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:02,937]\u001b[0m Trial 57 pruned. Trial was pruned at iteration 78.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:03,912]\u001b[0m Trial 58 finished with value: 0.5310990808119093 and parameters: {'lambda_l1': 0.008908141404368873, 'lambda_l2': 1.0024604099274968e-08, 'num_leaves': 156, 'feature_fraction': 0.7135958388327297, 'bagging_fraction': 0.8918708775644101, 'bagging_freq': 7, 'min_child_samples': 10}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:05,509]\u001b[0m Trial 59 finished with value: 0.5303075036750562 and parameters: {'lambda_l1': 0.0036415543019748704, 'lambda_l2': 0.060450481568631226, 'num_leaves': 245, 'feature_fraction': 0.6322922933331399, 'bagging_fraction': 0.8119277832780933, 'bagging_freq': 6, 'min_child_samples': 18}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:05,675]\u001b[0m Trial 60 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:06,717]\u001b[0m Trial 61 finished with value: 0.5266068329070613 and parameters: {'lambda_l1': 0.08187221145217663, 'lambda_l2': 0.12038514531389062, 'num_leaves': 230, 'feature_fraction': 0.5893130926352276, 'bagging_fraction': 0.9218811557192532, 'bagging_freq': 3, 'min_child_samples': 30}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:06,839]\u001b[0m Trial 62 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:06,957]\u001b[0m Trial 63 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:07,060]\u001b[0m Trial 64 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:07,235]\u001b[0m Trial 65 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:07,371]\u001b[0m Trial 66 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:08,252]\u001b[0m Trial 67 finished with value: 0.524615381244906 and parameters: {'lambda_l1': 0.0007372437461333507, 'lambda_l2': 0.07197819258619385, 'num_leaves': 217, 'feature_fraction': 0.7085126717217893, 'bagging_fraction': 0.9783185592011208, 'bagging_freq': 4, 'min_child_samples': 7}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m[I 2021-09-07 09:27:09,210]\u001b[0m Trial 68 finished with value: 0.524942396458539 and parameters: {'lambda_l1': 0.0011590093830295628, 'lambda_l2': 0.06353809127053359, 'num_leaves': 198, 'feature_fraction': 0.7106295012909083, 'bagging_fraction': 0.973655849802224, 'bagging_freq': 5, 'min_child_samples': 7}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:09,572]\u001b[0m Trial 69 pruned. Trial was pruned at iteration 29.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:10,511]\u001b[0m Trial 70 finished with value: 0.5227354154865925 and parameters: {'lambda_l1': 0.0019204609823774911, 'lambda_l2': 0.006957053438847566, 'num_leaves': 200, 'feature_fraction': 0.7174135338967843, 'bagging_fraction': 0.9747628414441452, 'bagging_freq': 5, 'min_child_samples': 8}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:11,445]\u001b[0m Trial 71 pruned. Trial was pruned at iteration 72.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:12,707]\u001b[0m Trial 72 finished with value: 0.5273794666616337 and parameters: {'lambda_l1': 0.00253464769953144, 'lambda_l2': 0.04250761519699366, 'num_leaves': 203, 'feature_fraction': 0.723989872880134, 'bagging_fraction': 0.9954485306248265, 'bagging_freq': 5, 'min_child_samples': 10}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:14,118]\u001b[0m Trial 73 finished with value: 0.5290081420408789 and parameters: {'lambda_l1': 0.0014187815549416435, 'lambda_l2': 0.12791352246804705, 'num_leaves': 218, 'feature_fraction': 0.7758653557130019, 'bagging_fraction': 0.9488928693793105, 'bagging_freq': 5, 'min_child_samples': 18}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:14,196]\u001b[0m Trial 74 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:15,517]\u001b[0m Trial 75 finished with value: 0.5325397214424649 and parameters: {'lambda_l1': 0.0005872739303922063, 'lambda_l2': 0.0096136159649407, 'num_leaves': 211, 'feature_fraction': 0.8012899648531023, 'bagging_fraction': 0.931501707649951, 'bagging_freq': 4, 'min_child_samples': 13}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:16,903]\u001b[0m Trial 76 finished with value: 0.5296781377317934 and parameters: {'lambda_l1': 0.006787205212552698, 'lambda_l2': 0.0790698959620889, 'num_leaves': 239, 'feature_fraction': 0.7454753912863651, 'bagging_fraction': 0.9799306593714261, 'bagging_freq': 5, 'min_child_samples': 7}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:17,956]\u001b[0m Trial 77 finished with value: 0.5309336374271006 and parameters: {'lambda_l1': 0.0037427478770181, 'lambda_l2': 0.8130901726120456, 'num_leaves': 232, 'feature_fraction': 0.7224358819185711, 'bagging_fraction': 0.9520121758601578, 'bagging_freq': 4, 'min_child_samples': 12}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:18,071]\u001b[0m Trial 78 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:19,373]\u001b[0m Trial 79 finished with value: 0.5347521802448649 and parameters: {'lambda_l1': 0.017570019491239652, 'lambda_l2': 0.05059260326524094, 'num_leaves': 226, 'feature_fraction': 0.6981808494111595, 'bagging_fraction': 0.9993645575870086, 'bagging_freq': 4, 'min_child_samples': 20}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:19,471]\u001b[0m Trial 80 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:21,230]\u001b[0m Trial 81 finished with value: 0.5227732830528738 and parameters: {'lambda_l1': 0.0003058538457597125, 'lambda_l2': 0.04324937485563167, 'num_leaves': 202, 'feature_fraction': 0.73206644303025, 'bagging_fraction': 0.9911170826848787, 'bagging_freq': 5, 'min_child_samples': 10}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:22,266]\u001b[0m Trial 82 finished with value: 0.5279916258627607 and parameters: {'lambda_l1': 0.0002580487853233371, 'lambda_l2': 0.015002378165105253, 'num_leaves': 213, 'feature_fraction': 0.665112654945876, 'bagging_fraction': 0.9341515771267435, 'bagging_freq': 5, 'min_child_samples': 11}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:26,040]\u001b[0m Trial 83 pruned. Trial was pruned at iteration 51.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:29,359]\u001b[0m Trial 84 finished with value: 0.5257703648298483 and parameters: {'lambda_l1': 0.09313058634431053, 'lambda_l2': 0.0794176561682967, 'num_leaves': 174, 'feature_fraction': 0.7330255912724464, 'bagging_fraction': 0.9556166881594967, 'bagging_freq': 6, 'min_child_samples': 14}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:29,749]\u001b[0m Trial 85 pruned. Trial was pruned at iteration 30.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:29,919]\u001b[0m Trial 86 pruned. Trial was pruned at iteration 17.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:30,026]\u001b[0m Trial 87 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:30,861]\u001b[0m Trial 88 finished with value: 0.5270306752854106 and parameters: {'lambda_l1': 0.0003495273937948516, 'lambda_l2': 0.057992141622577896, 'num_leaves': 167, 'feature_fraction': 0.7406493057210397, 'bagging_fraction': 0.8812843020741449, 'bagging_freq': 5, 'min_child_samples': 10}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:30,933]\u001b[0m Trial 89 pruned. Trial was pruned at iteration 11.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:31,029]\u001b[0m Trial 90 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:31,358]\u001b[0m Trial 91 pruned. Trial was pruned at iteration 48.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:31,714]\u001b[0m Trial 92 pruned. Trial was pruned at iteration 41.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:32,543]\u001b[0m Trial 93 finished with value: 0.5264391883178012 and parameters: {'lambda_l1': 0.055429809241931895, 'lambda_l2': 0.013315511259061849, 'num_leaves': 235, 'feature_fraction': 0.6209901742544794, 'bagging_fraction': 0.9603625243176316, 'bagging_freq': 5, 'min_child_samples': 9}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:32,825]\u001b[0m Trial 94 pruned. Trial was pruned at iteration 40.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:32,940]\u001b[0m Trial 95 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:33,525]\u001b[0m Trial 96 finished with value: 0.521566218012535 and parameters: {'lambda_l1': 0.001072724146880552, 'lambda_l2': 0.043446953857601264, 'num_leaves': 142, 'feature_fraction': 0.6922532844601723, 'bagging_fraction': 0.9898097804978216, 'bagging_freq': 5, 'min_child_samples': 5}. Best is trial 96 with value: 0.521566218012535.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:34,149]\u001b[0m Trial 97 finished with value: 0.5196727190561812 and parameters: {'lambda_l1': 0.0025915189209523223, 'lambda_l2': 0.37941398551753863, 'num_leaves': 159, 'feature_fraction': 0.6942886423659413, 'bagging_fraction': 0.9888819332176355, 'bagging_freq': 6, 'min_child_samples': 5}. Best is trial 97 with value: 0.5196727190561812.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:34,855]\u001b[0m Trial 98 finished with value: 0.5265000531725292 and parameters: {'lambda_l1': 0.0006868362572582551, 'lambda_l2': 0.040525833784093586, 'num_leaves': 143, 'feature_fraction': 0.6930600995921747, 'bagging_fraction': 0.9897374239207342, 'bagging_freq': 6, 'min_child_samples': 6}. Best is trial 97 with value: 0.5196727190561812.\u001b[0m\n",
"\u001b[32m[I 2021-09-07 09:27:34,935]\u001b[0m Trial 99 pruned. Trial was pruned at iteration 10.\u001b[0m\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of finished trials: 100\n",
"Best trial:\n",
" Value: 0.5196727190561812\n",
" Params: \n",
" lambda_l1: 0.0025915189209523223\n",
" lambda_l2: 0.37941398551753863\n",
" num_leaves: 159\n",
" feature_fraction: 0.6942886423659413\n",
" bagging_fraction: 0.9888819332176355\n",
" bagging_freq: 6\n",
" min_child_samples: 5\n"
]
}
],
"source": [
"if __name__ == \"__main__\":\n",
" study = optuna.create_study(\n",
" pruner=optuna.pruners.MedianPruner(n_warmup_steps=10), direction=\"minimize\"\n",
" )\n",
" study.optimize(lambda trial: objective(trial, train_x, valid_x, train_y, valid_y), n_trials=100)\n",
"\n",
" print(\"Number of finished trials: {}\".format(len(study.trials)))\n",
"\n",
" print(\"Best trial:\")\n",
" trial = study.best_trial\n",
"\n",
" print(\" Value: {}\".format(trial.value))\n",
"\n",
" print(\" Params: \")\n",
" for key, value in trial.params.items():\n",
" print(\" {}: {}\".format(key, value))"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "681d9cc2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'boosting_type': 'gbdt', 'objective': 'regression', 'metric': 'rmse', 'lambda_l1': 0.0025915189209523223, 'lambda_l2': 0.37941398551753863, 'num_leaves': 159, 'feature_fraction': 0.6942886423659413, 'bagging_fraction': 0.9888819332176355, 'bagging_freq': 6, 'min_child_samples': 5}\n"
]
}
],
"source": [
"params = {'boosting_type': 'gbdt',\n",
" 'objective': 'regression',\n",
" \"metric\": 'rmse'}\n",
"for key, value in trial.params.items():\n",
" params[key]=value\n",
"print(params)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "f5534bab",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000585 seconds.\n",
"You can set `force_row_wise=true` to remove the overhead.\n",
"And if memory is not enough, you can set `force_col_wise=true`.\n",
"[LightGBM] [Info] Total Bins 1837\n",
"[LightGBM] [Info] Number of data points in the train set: 13932, number of used features: 8\n",
"[LightGBM] [Info] Start training from score 2.072422\n",
"Training until validation scores don't improve for 20 rounds\n",
"[50]\tvalid_0's rmse: 0.459286\n",
"[100]\tvalid_0's rmse: 0.449051\n",
"[150]\tvalid_0's rmse: 0.448356\n",
"Early stopping, best iteration is:\n",
"[166]\tvalid_0's rmse: 0.447872\n"
]
}
],
"source": [
"dtrain = lgb.Dataset(train_x, label=train_y)\n",
"dvalid = lgb.Dataset(valid_x, label=valid_y)\n",
"model = lgb.train(params=params, train_set=dtrain,valid_sets=[dvalid],\n",
" verbose_eval=50,\n",
" early_stopping_rounds=20,\n",
" num_boost_round=5000)\n",
"predict = model.predict(test_x)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "f28d82da",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.44403838770137805"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 评估指标rmse, 越小越好\n",
"np.sqrt(mean_squared_error(test_y,predict))"
]
},
{
"cell_type": "markdown",
"id": "5e292bdb",
"metadata": {},
"source": [
"### 回归任务的结论\n",
"不使用optuna的分数是0.4531666044672748, 使用的分数是0.44403838770137805, 提升了0.00912821676589675。"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "36384535",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {