From 162e9a7de21512037296954022d9cb8c5a6e2c4f Mon Sep 17 00:00:00 2001
From: benjas <909336740@qq.com>
Date: Tue, 7 Sep 2021 09:33:15 +0800
Subject: [PATCH] Add. Regression task for optuna
---
.../AutoAI Tools-checkpoint.ipynb | 641 +++++++++++++++++-
竞赛优胜技巧/AutoAI Tools.ipynb | 628 +++++++++++++++--
2 files changed, 1220 insertions(+), 49 deletions(-)
diff --git a/竞赛优胜技巧/.ipynb_checkpoints/AutoAI Tools-checkpoint.ipynb b/竞赛优胜技巧/.ipynb_checkpoints/AutoAI Tools-checkpoint.ipynb
index c551443..3744e19 100644
--- a/竞赛优胜技巧/.ipynb_checkpoints/AutoAI Tools-checkpoint.ipynb
+++ b/竞赛优胜技巧/.ipynb_checkpoints/AutoAI Tools-checkpoint.ipynb
@@ -6,13 +6,650 @@
"metadata": {},
"source": [
"# 自动机器学习工具\n",
- "该notebook将比较市面上的多个AutoML工具,分别采用两组数据集进行比较,分别是波士顿房价(回归)和森林植被类型(多分类)"
+ "该notebook将比较市面上的多个AutoAI工具,分别采用两组数据集进行比较,分别是加利福尼亚住房数据集(回归)和森林植被类型(多分类)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b3d4d608",
+ "metadata": {},
+ "source": [
+ "# optuna一种超参数优化框架\n",
+ "https://github.com/optuna/optuna"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a2d41e62",
+ "metadata": {},
+ "source": [
+ "### 波士顿房价预测任务(回归)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "f5e0e977",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import time\n",
+ "import gc\n",
+ "\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.metrics import mean_squared_error\n",
+ "import lightgbm as lgb # 使用lgb模型"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "5eda2637",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.datasets import fetch_california_housing\n",
+ "data = fetch_california_housing()\n",
+ "X, y = data['data'], data['target']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "722beb8e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " MedInc | \n",
+ " HouseAge | \n",
+ " AveRooms | \n",
+ " AveBedrms | \n",
+ " Population | \n",
+ " AveOccup | \n",
+ " Latitude | \n",
+ " Longitude | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 8.3252 | \n",
+ " 41.0 | \n",
+ " 6.984127 | \n",
+ " 1.02381 | \n",
+ " 322.0 | \n",
+ " 2.555556 | \n",
+ " 37.88 | \n",
+ " -122.23 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 8.3014 | \n",
+ " 21.0 | \n",
+ " 6.238137 | \n",
+ " 0.97188 | \n",
+ " 2401.0 | \n",
+ " 2.109842 | \n",
+ " 37.86 | \n",
+ " -122.22 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude \\\n",
+ "0 8.3252 41.0 6.984127 1.02381 322.0 2.555556 37.88 \n",
+ "1 8.3014 21.0 6.238137 0.97188 2401.0 2.109842 37.86 \n",
+ "\n",
+ " Longitude \n",
+ "0 -122.23 \n",
+ "1 -122.22 "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X = pd.DataFrame(X,columns=data.feature_names)\n",
+ "X.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "08ebab89",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 20640 entries, 0 to 20639\n",
+ "Data columns (total 8 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 MedInc 20640 non-null float64\n",
+ " 1 HouseAge 20640 non-null float64\n",
+ " 2 AveRooms 20640 non-null float64\n",
+ " 3 AveBedrms 20640 non-null float64\n",
+ " 4 Population 20640 non-null float64\n",
+ " 5 AveOccup 20640 non-null float64\n",
+ " 6 Latitude 20640 non-null float64\n",
+ " 7 Longitude 20640 non-null float64\n",
+ "dtypes: float64(8)\n",
+ "memory usage: 1.3 MB\n"
+ ]
+ }
+ ],
+ "source": [
+ "X.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "8932c66c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "训练集: (13932, 8)\n",
+ "验证集: (4644, 8)\n",
+ "测试集: (2064, 8)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 切分训练和测试集\n",
+ "train_valid_x, test_x, train_valid_y, test_y = train_test_split(X, y,random_state=42,test_size=0.1)\n",
+ "\n",
+ "# 切分训练和验证集\n",
+ "train_x, valid_x, train_y, valid_y = train_test_split(train_valid_x, train_valid_y,random_state=42)\n",
+ "print('训练集:',train_x.shape)\n",
+ "print('验证集:',valid_x.shape)\n",
+ "print('测试集:', test_x.shape)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e87a2121",
+ "metadata": {},
+ "source": [
+ "### 使用LGB作为模型,不使用optuna调参"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "5cad8967",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001822 seconds.\n",
+ "You can set `force_col_wise=true` to remove the overhead.\n",
+ "[LightGBM] [Info] Total Bins 1837\n",
+ "[LightGBM] [Info] Number of data points in the train set: 13932, number of used features: 8\n",
+ "[LightGBM] [Info] Start training from score 2.072422\n",
+ "Training until validation scores don't improve for 20 rounds\n",
+ "[50]\tvalid_0's rmse: 0.492877\n",
+ "[100]\tvalid_0's rmse: 0.471803\n",
+ "[150]\tvalid_0's rmse: 0.46445\n",
+ "[200]\tvalid_0's rmse: 0.459716\n",
+ "[250]\tvalid_0's rmse: 0.456658\n",
+ "[300]\tvalid_0's rmse: 0.454223\n",
+ "[350]\tvalid_0's rmse: 0.452644\n",
+ "Early stopping, best iteration is:\n",
+ "[342]\tvalid_0's rmse: 0.452522\n"
+ ]
+ }
+ ],
+ "source": [
+ "params = {'boosting_type': 'gbdt',\n",
+ " 'objective': 'regression',\n",
+ " \"metric\": 'rmse'}\n",
+ "dtrain = lgb.Dataset(train_x, label=train_y)\n",
+ "dvalid = lgb.Dataset(valid_x, label=valid_y)\n",
+ "model = lgb.train(params=params, train_set=dtrain,valid_sets=[dvalid],\n",
+ " verbose_eval=50,\n",
+ " early_stopping_rounds=20,\n",
+ " num_boost_round=5000)\n",
+ "predict = model.predict(test_x)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "886bfdad",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.4531666044672748"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 评估指标rmse,越小越好\n",
+ "np.sqrt(mean_squared_error(test_y,predict))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c271442c",
+ "metadata": {},
+ "source": [
+ "### 使用LGB作为模型,使用optuna调参"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "9a87f9db",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple\n",
+ "Collecting optuna\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/2a/b4/061c8721b5da1614794d1b66fcb212eee156efd5284f66854d02f295b0be/optuna-2.9.1-py3-none-any.whl (302 kB)\n",
+ "Requirement already satisfied: tqdm in d:\\programdata\\anaconda3\\lib\\site-packages (from optuna) (4.59.0)\n",
+ "Collecting cmaes>=0.8.2\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/01/1f/43b01223a0366171f474320c6e966c39a11587287f098a5f09809b45e05f/cmaes-0.8.2-py3-none-any.whl (15 kB)\n",
+ "Requirement already satisfied: scipy!=1.4.0 in d:\\programdata\\anaconda3\\lib\\site-packages (from optuna) (1.4.1)\n",
+ "Collecting cliff\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/18/f7/2a98b032a43b2925ea32bc13a8feb6cf9416e7d2b2c0f6d2ce14636a03b1/cliff-3.9.0-py3-none-any.whl (80 kB)\n",
+ "Collecting alembic\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/7a/5a/be479a2c379e6b3c57dc56ea3b139ad4d46c2d244a0035ac4d7475116076/alembic-1.7.1-py3-none-any.whl (208 kB)\n",
+ "Requirement already satisfied: packaging>=20.0 in d:\\programdata\\anaconda3\\lib\\site-packages (from optuna) (20.9)\n",
+ "Requirement already satisfied: numpy in d:\\programdata\\anaconda3\\lib\\site-packages (from optuna) (1.18.5)\n",
+ "Collecting colorlog\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/2d/93/4b0bb101e54206e92feb3c986c274902212b2ed8c55423e6e7f6d8b693ca/colorlog-6.4.1-py2.py3-none-any.whl (11 kB)\n",
+ "Requirement already satisfied: PyYAML in d:\\programdata\\anaconda3\\lib\\site-packages (from optuna) (5.4.1)\n",
+ "Requirement already satisfied: sqlalchemy>=1.1.0 in d:\\programdata\\anaconda3\\lib\\site-packages (from optuna) (1.4.7)\n",
+ "Requirement already satisfied: pyparsing>=2.0.2 in d:\\programdata\\anaconda3\\lib\\site-packages (from packaging>=20.0->optuna) (2.4.7)\n",
+ "Requirement already satisfied: greenlet!=0.4.17 in d:\\programdata\\anaconda3\\lib\\site-packages (from sqlalchemy>=1.1.0->optuna) (1.0.0)\n",
+ "Collecting importlib-resources\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/f2/6c/2f3b930513bb971172ffceb63cf4e910944e57451724e69b1dec97cfefa6/importlib_resources-5.2.2-py3-none-any.whl (27 kB)\n",
+ "Collecting Mako\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/75/69/c3ab0db9234fa5681a85a1c55203763a62902d56ad76b6d9b9bfa2c83694/Mako-1.1.5-py2.py3-none-any.whl (75 kB)\n",
+ "Collecting PrettyTable>=0.7.2\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/bd/b5/c09f8d237e060a9e7b5d2d1577c2a6bc49fa298a7b4aefd52146f2b9a62e/prettytable-2.2.0-py3-none-any.whl (23 kB)\n",
+ "Collecting pbr!=2.1.0,>=2.0.0\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/18/e0/1d4702dd81121d04a477c272d47ee5b6bc970d1a0990b11befa275c55cf2/pbr-5.6.0-py2.py3-none-any.whl (111 kB)\n",
+ "Collecting cmd2>=1.0.0\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/44/ca/d407811641ec1d8bd8a38ee3165d73aa44776d7700436bd4d4a6606f2736/cmd2-2.1.2-py3-none-any.whl (141 kB)\n",
+ "Collecting stevedore>=2.0.1\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/b2/c5/036a9a6e220ea7406a36130e80cca33a3e6b98b5328cfdba4b46b2ed0786/stevedore-3.4.0-py3-none-any.whl (49 kB)\n",
+ "Collecting autopage>=0.4.0\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/18/a7/901e943318925f8ca3f7963616660065b3cf4e143b0327f88076ba5c4e22/autopage-0.4.0-py3-none-any.whl (20 kB)\n",
+ "Requirement already satisfied: colorama>=0.3.7 in d:\\programdata\\anaconda3\\lib\\site-packages (from cmd2>=1.0.0->cliff->optuna) (0.4.4)\n",
+ "Collecting pyreadline3\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/89/29/10fbb29d957dbcee77a0832eabb9953da80d6bb9514f7ca1b3d82f50219f/pyreadline3-3.3-py3-none-any.whl (95 kB)\n",
+ "Requirement already satisfied: attrs>=16.3.0 in d:\\programdata\\anaconda3\\lib\\site-packages (from cmd2>=1.0.0->cliff->optuna) (20.3.0)\n",
+ "Collecting pyperclip>=1.6\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/a7/2c/4c64579f847bd5d539803c8b909e54ba087a79d01bb3aba433a95879a6c5/pyperclip-1.8.2.tar.gz (20 kB)\n",
+ "Requirement already satisfied: wcwidth>=0.1.7 in d:\\programdata\\anaconda3\\lib\\site-packages (from cmd2>=1.0.0->cliff->optuna) (0.2.5)\n",
+ "Requirement already satisfied: zipp>=3.1.0 in d:\\programdata\\anaconda3\\lib\\site-packages (from importlib-resources->alembic->optuna) (3.4.1)\n",
+ "Requirement already satisfied: MarkupSafe>=0.9.2 in d:\\programdata\\anaconda3\\lib\\site-packages (from Mako->alembic->optuna) (1.1.1)\n",
+ "Building wheels for collected packages: pyperclip\n",
+ " Building wheel for pyperclip (setup.py): started\n",
+ " Building wheel for pyperclip (setup.py): finished with status 'done'\n",
+ " Created wheel for pyperclip: filename=pyperclip-1.8.2-py3-none-any.whl size=11107 sha256=96b5a96e64d8d6ae264ae796623c0148d1ca71677462878fc1f07c74e1e794ff\n",
+ " Stored in directory: c:\\users\\administrator\\appdata\\local\\pip\\cache\\wheels\\30\\c0\\21\\bc13df81c8b032076577671a8ef05db4e168a335e07e64d9a7\n",
+ "Successfully built pyperclip\n",
+ "Installing collected packages: pyreadline3, pyperclip, pbr, stevedore, PrettyTable, Mako, importlib-resources, cmd2, autopage, colorlog, cmaes, cliff, alembic, optuna\n",
+ "Successfully installed Mako-1.1.5 PrettyTable-2.2.0 alembic-1.7.1 autopage-0.4.0 cliff-3.9.0 cmaes-0.8.2 cmd2-2.1.2 colorlog-6.4.1 importlib-resources-5.2.2 optuna-2.9.1 pbr-5.6.0 pyperclip-1.8.2 pyreadline3-3.3 stevedore-3.4.0\n",
+ "Note: you may need to restart the kernel to use updated packages.\n"
+ ]
+ }
+ ],
+ "source": [
+ "pip install optuna"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "d8a0279a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import optuna"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "7433fdcb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def objective(trial,train_x, valid_x, train_y, valid_y):\n",
+ " dtrain = lgb.Dataset(train_x, label=train_y)\n",
+ " dvalid = lgb.Dataset(valid_x, label=valid_y)\n",
+ "\n",
+ " param = {\n",
+ " \"objective\": \"regression\",\n",
+ " \"metric\": \"rmse\",\n",
+ " \"verbosity\": -1,\n",
+ " \"boosting_type\": \"gbdt\",\n",
+ " 'random_state':42,\n",
+ " \"lambda_l1\": trial.suggest_float(\"lambda_l1\", 1e-8, 10.0, log=True),\n",
+ " \"lambda_l2\": trial.suggest_float(\"lambda_l2\", 1e-8, 10.0, log=True),\n",
+ " \"num_leaves\": trial.suggest_int(\"num_leaves\", 2, 256),\n",
+ " \"feature_fraction\": trial.suggest_float(\"feature_fraction\", 0.4, 1.0),\n",
+ " \"bagging_fraction\": trial.suggest_float(\"bagging_fraction\", 0.4, 1.0),\n",
+ " \"bagging_freq\": trial.suggest_int(\"bagging_freq\", 1, 7),\n",
+ " \"min_child_samples\": trial.suggest_int(\"min_child_samples\", 5, 100),\n",
+ " }\n",
+ "\n",
+ " # Add a callback for pruning.\n",
+ " pruning_callback = optuna.integration.LightGBMPruningCallback(trial, \"rmse\")\n",
+ " gbm = lgb.train(\n",
+ " param, dtrain, valid_sets=[dvalid], verbose_eval=False, callbacks=[pruning_callback]\n",
+ " )\n",
+ "\n",
+ " preds = gbm.predict(valid_x)\n",
+ " pred_labels = np.rint(preds)\n",
+ " rmse = np.sqrt(mean_squared_error(valid_y,pred_labels))\n",
+ " return rmse"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "b9018adb",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m[I 2021-09-07 09:26:30,952]\u001b[0m A new study created in memory with name: no-name-f2147511-069d-495f-90ec-5990dc3c3716\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:31,284]\u001b[0m Trial 0 finished with value: 0.5579165649228477 and parameters: {'lambda_l1': 1.6814939560853405e-06, 'lambda_l2': 8.772634980486007, 'num_leaves': 79, 'feature_fraction': 0.4415746779386105, 'bagging_fraction': 0.5306839081914155, 'bagging_freq': 5, 'min_child_samples': 13}. Best is trial 0 with value: 0.5579165649228477.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:31,523]\u001b[0m Trial 1 finished with value: 0.5577111827415846 and parameters: {'lambda_l1': 0.00013684423280171766, 'lambda_l2': 0.010116712675880523, 'num_leaves': 42, 'feature_fraction': 0.4758659908396936, 'bagging_fraction': 0.8916447942940564, 'bagging_freq': 7, 'min_child_samples': 32}. Best is trial 1 with value: 0.5577111827415846.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:31,986]\u001b[0m Trial 2 finished with value: 0.5382832732103667 and parameters: {'lambda_l1': 1.849141022226266, 'lambda_l2': 9.297088403542385e-06, 'num_leaves': 94, 'feature_fraction': 0.6894755218120849, 'bagging_fraction': 0.9503182129837274, 'bagging_freq': 2, 'min_child_samples': 98}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:32,156]\u001b[0m Trial 3 finished with value: 0.5786765226592324 and parameters: {'lambda_l1': 4.448151837364203e-05, 'lambda_l2': 1.8955982808109254e-08, 'num_leaves': 19, 'feature_fraction': 0.493099451522876, 'bagging_fraction': 0.6742062281671596, 'bagging_freq': 5, 'min_child_samples': 35}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:32,266]\u001b[0m Trial 4 finished with value: 0.6139367553603606 and parameters: {'lambda_l1': 0.0002791229907486381, 'lambda_l2': 5.269713608638115, 'num_leaves': 5, 'feature_fraction': 0.8303631732661438, 'bagging_fraction': 0.942024874372832, 'bagging_freq': 4, 'min_child_samples': 61}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:32,826]\u001b[0m Trial 5 finished with value: 0.5453617455111696 and parameters: {'lambda_l1': 0.06860061560766911, 'lambda_l2': 0.2723830502667369, 'num_leaves': 98, 'feature_fraction': 0.7105587363977491, 'bagging_fraction': 0.5899933873219961, 'bagging_freq': 4, 'min_child_samples': 71}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:32,880]\u001b[0m Trial 6 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:33,189]\u001b[0m Trial 7 finished with value: 0.5588682864793495 and parameters: {'lambda_l1': 0.4689515611654304, 'lambda_l2': 1.5494838308859912e-07, 'num_leaves': 65, 'feature_fraction': 0.5583496504073044, 'bagging_fraction': 0.6569918432147945, 'bagging_freq': 5, 'min_child_samples': 58}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:33,462]\u001b[0m Trial 8 finished with value: 0.5447098867148504 and parameters: {'lambda_l1': 0.0010101561223389774, 'lambda_l2': 0.01515390989225731, 'num_leaves': 47, 'feature_fraction': 0.6245633996226619, 'bagging_fraction': 0.419624870667929, 'bagging_freq': 3, 'min_child_samples': 48}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:33,693]\u001b[0m Trial 9 pruned. Trial was pruned at iteration 80.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:34,181]\u001b[0m Trial 10 finished with value: 0.541110883425217 and parameters: {'lambda_l1': 1.4756346287368392e-08, 'lambda_l2': 1.3544296321997672e-05, 'num_leaves': 189, 'feature_fraction': 0.9751158697609862, 'bagging_fraction': 0.8348333900598492, 'bagging_freq': 1, 'min_child_samples': 100}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:34,761]\u001b[0m Trial 11 finished with value: 0.5430244062824532 and parameters: {'lambda_l1': 1.2282790653635484e-08, 'lambda_l2': 8.373619608433318e-06, 'num_leaves': 196, 'feature_fraction': 0.9236444858767681, 'bagging_fraction': 0.8122951589315216, 'bagging_freq': 1, 'min_child_samples': 100}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:35,237]\u001b[0m Trial 12 finished with value: 0.5479941202014348 and parameters: {'lambda_l1': 8.66531632367352, 'lambda_l2': 2.7356786615324263e-05, 'num_leaves': 182, 'feature_fraction': 0.7257944800491923, 'bagging_fraction': 0.8151059105693412, 'bagging_freq': 1, 'min_child_samples': 98}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:36,132]\u001b[0m Trial 13 finished with value: 0.5426396090105863 and parameters: {'lambda_l1': 1.5311804262482413e-08, 'lambda_l2': 8.31070641463058e-06, 'num_leaves': 241, 'feature_fraction': 0.9996713520233095, 'bagging_fraction': 0.9659578077631608, 'bagging_freq': 2, 'min_child_samples': 84}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:36,961]\u001b[0m Trial 14 finished with value: 0.5367665957770634 and parameters: {'lambda_l1': 0.014095420115571123, 'lambda_l2': 0.0002563644914171101, 'num_leaves': 149, 'feature_fraction': 0.7883099949736571, 'bagging_fraction': 0.8086023984951658, 'bagging_freq': 2, 'min_child_samples': 85}. Best is trial 14 with value: 0.5367665957770634.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:37,824]\u001b[0m Trial 15 finished with value: 0.5348886658006932 and parameters: {'lambda_l1': 0.020071941122612545, 'lambda_l2': 0.0003014852059198554, 'num_leaves': 146, 'feature_fraction': 0.7936923799556383, 'bagging_fraction': 0.756430674635471, 'bagging_freq': 3, 'min_child_samples': 83}. Best is trial 15 with value: 0.5348886658006932.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:38,622]\u001b[0m Trial 16 finished with value: 0.5372333761766732 and parameters: {'lambda_l1': 0.015173696268463625, 'lambda_l2': 0.0016757129971514612, 'num_leaves': 138, 'feature_fraction': 0.8107104336197675, 'bagging_fraction': 0.7376130779961089, 'bagging_freq': 3, 'min_child_samples': 78}. Best is trial 15 with value: 0.5348886658006932.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:39,451]\u001b[0m Trial 17 finished with value: 0.5376252190781058 and parameters: {'lambda_l1': 0.007357005023837469, 'lambda_l2': 0.000250874148036676, 'num_leaves': 147, 'feature_fraction': 0.7829205508785075, 'bagging_fraction': 0.7478270069878126, 'bagging_freq': 3, 'min_child_samples': 67}. Best is trial 15 with value: 0.5348886658006932.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:39,567]\u001b[0m Trial 18 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:40,910]\u001b[0m Trial 19 finished with value: 0.5315793527934071 and parameters: {'lambda_l1': 0.013209811084277438, 'lambda_l2': 0.06726287443306718, 'num_leaves': 231, 'feature_fraction': 0.637876115312563, 'bagging_fraction': 0.8716817438233221, 'bagging_freq': 3, 'min_child_samples': 44}. Best is trial 19 with value: 0.5315793527934071.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:42,324]\u001b[0m Trial 20 finished with value: 0.5262550702976 and parameters: {'lambda_l1': 0.003113216663103784, 'lambda_l2': 0.24783337352027449, 'num_leaves': 242, 'feature_fraction': 0.6137607392321572, 'bagging_fraction': 0.8936473883645825, 'bagging_freq': 3, 'min_child_samples': 44}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:43,182]\u001b[0m Trial 21 finished with value: 0.5297204439253479 and parameters: {'lambda_l1': 0.003978575421253158, 'lambda_l2': 0.11658574784670588, 'num_leaves': 254, 'feature_fraction': 0.6168955771891016, 'bagging_fraction': 0.8787433941466242, 'bagging_freq': 3, 'min_child_samples': 43}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:43,310]\u001b[0m Trial 22 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:44,194]\u001b[0m Trial 23 finished with value: 0.5288803542960898 and parameters: {'lambda_l1': 8.341315535148706e-06, 'lambda_l2': 0.24030532904199514, 'num_leaves': 221, 'feature_fraction': 0.5901567080654435, 'bagging_fraction': 0.8875920940685119, 'bagging_freq': 3, 'min_child_samples': 25}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:44,331]\u001b[0m Trial 24 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:45,248]\u001b[0m Trial 25 finished with value: 0.5310533243420329 and parameters: {'lambda_l1': 1.9500743317527624e-07, 'lambda_l2': 0.0502751774074574, 'num_leaves': 214, 'feature_fraction': 0.5734739546088348, 'bagging_fraction': 0.9115976551406307, 'bagging_freq': 2, 'min_child_samples': 24}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m[I 2021-09-07 09:26:46,126]\u001b[0m Trial 26 finished with value: 0.5310399189972692 and parameters: {'lambda_l1': 5.65218762871428e-06, 'lambda_l2': 1.6944446205708956, 'num_leaves': 256, 'feature_fraction': 0.6693624417062835, 'bagging_fraction': 0.8606541857435407, 'bagging_freq': 4, 'min_child_samples': 25}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:47,541]\u001b[0m Trial 27 finished with value: 0.5289141422774571 and parameters: {'lambda_l1': 3.372780137131952e-07, 'lambda_l2': 0.004231573716537988, 'num_leaves': 222, 'feature_fraction': 0.5834642111209242, 'bagging_fraction': 0.9283224808721449, 'bagging_freq': 3, 'min_child_samples': 33}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:47,734]\u001b[0m Trial 28 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:47,883]\u001b[0m Trial 29 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:48,032]\u001b[0m Trial 30 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:48,233]\u001b[0m Trial 31 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:48,470]\u001b[0m Trial 32 pruned. Trial was pruned at iteration 12.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:50,256]\u001b[0m Trial 33 finished with value: 0.5307511312800164 and parameters: {'lambda_l1': 8.331346294111473e-08, 'lambda_l2': 0.018173663934055492, 'num_leaves': 238, 'feature_fraction': 0.5874989674601676, 'bagging_fraction': 0.8876263960076438, 'bagging_freq': 2, 'min_child_samples': 30}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:52,081]\u001b[0m Trial 34 finished with value: 0.5364692667074208 and parameters: {'lambda_l1': 8.939808862490256e-07, 'lambda_l2': 1.3094120587455667, 'num_leaves': 249, 'feature_fraction': 0.739630943171398, 'bagging_fraction': 0.9999691110926188, 'bagging_freq': 3, 'min_child_samples': 17}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:52,228]\u001b[0m Trial 35 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:52,401]\u001b[0m Trial 36 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:52,527]\u001b[0m Trial 37 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:52,646]\u001b[0m Trial 38 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:52,746]\u001b[0m Trial 39 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:53,973]\u001b[0m Trial 40 finished with value: 0.5256364275490798 and parameters: {'lambda_l1': 0.0710216045724967, 'lambda_l2': 0.10020172057765622, 'num_leaves': 206, 'feature_fraction': 0.6441352660567405, 'bagging_fraction': 0.8538258125595672, 'bagging_freq': 5, 'min_child_samples': 8}. Best is trial 40 with value: 0.5256364275490798.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:54,896]\u001b[0m Trial 41 finished with value: 0.5233364787469251 and parameters: {'lambda_l1': 0.23671365201549266, 'lambda_l2': 0.0749439220683321, 'num_leaves': 210, 'feature_fraction': 0.6436961501856725, 'bagging_fraction': 0.8521968194803821, 'bagging_freq': 5, 'min_child_samples': 5}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:55,831]\u001b[0m Trial 42 finished with value: 0.5293150200028972 and parameters: {'lambda_l1': 0.6574943326302751, 'lambda_l2': 0.0361443234291568, 'num_leaves': 209, 'feature_fraction': 0.6596272574104582, 'bagging_fraction': 0.850673456207462, 'bagging_freq': 6, 'min_child_samples': 5}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:56,858]\u001b[0m Trial 43 finished with value: 0.5308606295175099 and parameters: {'lambda_l1': 0.07618036054920922, 'lambda_l2': 0.22544030283002933, 'num_leaves': 226, 'feature_fraction': 0.6984185669610476, 'bagging_fraction': 0.7845150865412134, 'bagging_freq': 5, 'min_child_samples': 10}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:57,019]\u001b[0m Trial 44 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:57,787]\u001b[0m Trial 45 finished with value: 0.5331475267867624 and parameters: {'lambda_l1': 1.4967349278475324, 'lambda_l2': 0.0008951722845905283, 'num_leaves': 207, 'feature_fraction': 0.7339992851750987, 'bagging_fraction': 0.919959739142468, 'bagging_freq': 5, 'min_child_samples': 26}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:57,865]\u001b[0m Trial 46 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:57,971]\u001b[0m Trial 47 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:58,069]\u001b[0m Trial 48 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:58,168]\u001b[0m Trial 49 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:58,317]\u001b[0m Trial 50 pruned. Trial was pruned at iteration 30.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:58,445]\u001b[0m Trial 51 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:59,558]\u001b[0m Trial 52 finished with value: 0.5287423259473492 and parameters: {'lambda_l1': 0.44209025556975773, 'lambda_l2': 0.02237337201530471, 'num_leaves': 180, 'feature_fraction': 0.6816257587535688, 'bagging_fraction': 0.8645245816570197, 'bagging_freq': 7, 'min_child_samples': 5}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:00,776]\u001b[0m Trial 53 finished with value: 0.5248984867652216 and parameters: {'lambda_l1': 0.02970872423104034, 'lambda_l2': 0.0197319338622143, 'num_leaves': 177, 'feature_fraction': 0.7150825328146555, 'bagging_fraction': 0.8972252525350597, 'bagging_freq': 7, 'min_child_samples': 12}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:01,886]\u001b[0m Trial 54 finished with value: 0.5310139507545261 and parameters: {'lambda_l1': 0.03922536031082296, 'lambda_l2': 1.7618527190204045e-06, 'num_leaves': 180, 'feature_fraction': 0.7089331672809696, 'bagging_fraction': 0.8021837464466384, 'bagging_freq': 7, 'min_child_samples': 11}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:02,043]\u001b[0m Trial 55 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:02,158]\u001b[0m Trial 56 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:02,937]\u001b[0m Trial 57 pruned. Trial was pruned at iteration 78.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:03,912]\u001b[0m Trial 58 finished with value: 0.5310990808119093 and parameters: {'lambda_l1': 0.008908141404368873, 'lambda_l2': 1.0024604099274968e-08, 'num_leaves': 156, 'feature_fraction': 0.7135958388327297, 'bagging_fraction': 0.8918708775644101, 'bagging_freq': 7, 'min_child_samples': 10}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:05,509]\u001b[0m Trial 59 finished with value: 0.5303075036750562 and parameters: {'lambda_l1': 0.0036415543019748704, 'lambda_l2': 0.060450481568631226, 'num_leaves': 245, 'feature_fraction': 0.6322922933331399, 'bagging_fraction': 0.8119277832780933, 'bagging_freq': 6, 'min_child_samples': 18}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:05,675]\u001b[0m Trial 60 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:06,717]\u001b[0m Trial 61 finished with value: 0.5266068329070613 and parameters: {'lambda_l1': 0.08187221145217663, 'lambda_l2': 0.12038514531389062, 'num_leaves': 230, 'feature_fraction': 0.5893130926352276, 'bagging_fraction': 0.9218811557192532, 'bagging_freq': 3, 'min_child_samples': 30}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:06,839]\u001b[0m Trial 62 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:06,957]\u001b[0m Trial 63 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:07,060]\u001b[0m Trial 64 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:07,235]\u001b[0m Trial 65 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:07,371]\u001b[0m Trial 66 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:08,252]\u001b[0m Trial 67 finished with value: 0.524615381244906 and parameters: {'lambda_l1': 0.0007372437461333507, 'lambda_l2': 0.07197819258619385, 'num_leaves': 217, 'feature_fraction': 0.7085126717217893, 'bagging_fraction': 0.9783185592011208, 'bagging_freq': 4, 'min_child_samples': 7}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m[I 2021-09-07 09:27:09,210]\u001b[0m Trial 68 finished with value: 0.524942396458539 and parameters: {'lambda_l1': 0.0011590093830295628, 'lambda_l2': 0.06353809127053359, 'num_leaves': 198, 'feature_fraction': 0.7106295012909083, 'bagging_fraction': 0.973655849802224, 'bagging_freq': 5, 'min_child_samples': 7}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:09,572]\u001b[0m Trial 69 pruned. Trial was pruned at iteration 29.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:10,511]\u001b[0m Trial 70 finished with value: 0.5227354154865925 and parameters: {'lambda_l1': 0.0019204609823774911, 'lambda_l2': 0.006957053438847566, 'num_leaves': 200, 'feature_fraction': 0.7174135338967843, 'bagging_fraction': 0.9747628414441452, 'bagging_freq': 5, 'min_child_samples': 8}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:11,445]\u001b[0m Trial 71 pruned. Trial was pruned at iteration 72.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:12,707]\u001b[0m Trial 72 finished with value: 0.5273794666616337 and parameters: {'lambda_l1': 0.00253464769953144, 'lambda_l2': 0.04250761519699366, 'num_leaves': 203, 'feature_fraction': 0.723989872880134, 'bagging_fraction': 0.9954485306248265, 'bagging_freq': 5, 'min_child_samples': 10}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:14,118]\u001b[0m Trial 73 finished with value: 0.5290081420408789 and parameters: {'lambda_l1': 0.0014187815549416435, 'lambda_l2': 0.12791352246804705, 'num_leaves': 218, 'feature_fraction': 0.7758653557130019, 'bagging_fraction': 0.9488928693793105, 'bagging_freq': 5, 'min_child_samples': 18}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:14,196]\u001b[0m Trial 74 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:15,517]\u001b[0m Trial 75 finished with value: 0.5325397214424649 and parameters: {'lambda_l1': 0.0005872739303922063, 'lambda_l2': 0.0096136159649407, 'num_leaves': 211, 'feature_fraction': 0.8012899648531023, 'bagging_fraction': 0.931501707649951, 'bagging_freq': 4, 'min_child_samples': 13}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:16,903]\u001b[0m Trial 76 finished with value: 0.5296781377317934 and parameters: {'lambda_l1': 0.006787205212552698, 'lambda_l2': 0.0790698959620889, 'num_leaves': 239, 'feature_fraction': 0.7454753912863651, 'bagging_fraction': 0.9799306593714261, 'bagging_freq': 5, 'min_child_samples': 7}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:17,956]\u001b[0m Trial 77 finished with value: 0.5309336374271006 and parameters: {'lambda_l1': 0.0037427478770181, 'lambda_l2': 0.8130901726120456, 'num_leaves': 232, 'feature_fraction': 0.7224358819185711, 'bagging_fraction': 0.9520121758601578, 'bagging_freq': 4, 'min_child_samples': 12}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:18,071]\u001b[0m Trial 78 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:19,373]\u001b[0m Trial 79 finished with value: 0.5347521802448649 and parameters: {'lambda_l1': 0.017570019491239652, 'lambda_l2': 0.05059260326524094, 'num_leaves': 226, 'feature_fraction': 0.6981808494111595, 'bagging_fraction': 0.9993645575870086, 'bagging_freq': 4, 'min_child_samples': 20}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:19,471]\u001b[0m Trial 80 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:21,230]\u001b[0m Trial 81 finished with value: 0.5227732830528738 and parameters: {'lambda_l1': 0.0003058538457597125, 'lambda_l2': 0.04324937485563167, 'num_leaves': 202, 'feature_fraction': 0.73206644303025, 'bagging_fraction': 0.9911170826848787, 'bagging_freq': 5, 'min_child_samples': 10}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:22,266]\u001b[0m Trial 82 finished with value: 0.5279916258627607 and parameters: {'lambda_l1': 0.0002580487853233371, 'lambda_l2': 0.015002378165105253, 'num_leaves': 213, 'feature_fraction': 0.665112654945876, 'bagging_fraction': 0.9341515771267435, 'bagging_freq': 5, 'min_child_samples': 11}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:26,040]\u001b[0m Trial 83 pruned. Trial was pruned at iteration 51.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:29,359]\u001b[0m Trial 84 finished with value: 0.5257703648298483 and parameters: {'lambda_l1': 0.09313058634431053, 'lambda_l2': 0.0794176561682967, 'num_leaves': 174, 'feature_fraction': 0.7330255912724464, 'bagging_fraction': 0.9556166881594967, 'bagging_freq': 6, 'min_child_samples': 14}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:29,749]\u001b[0m Trial 85 pruned. Trial was pruned at iteration 30.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:29,919]\u001b[0m Trial 86 pruned. Trial was pruned at iteration 17.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:30,026]\u001b[0m Trial 87 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:30,861]\u001b[0m Trial 88 finished with value: 0.5270306752854106 and parameters: {'lambda_l1': 0.0003495273937948516, 'lambda_l2': 0.057992141622577896, 'num_leaves': 167, 'feature_fraction': 0.7406493057210397, 'bagging_fraction': 0.8812843020741449, 'bagging_freq': 5, 'min_child_samples': 10}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:30,933]\u001b[0m Trial 89 pruned. Trial was pruned at iteration 11.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:31,029]\u001b[0m Trial 90 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:31,358]\u001b[0m Trial 91 pruned. Trial was pruned at iteration 48.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:31,714]\u001b[0m Trial 92 pruned. Trial was pruned at iteration 41.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:32,543]\u001b[0m Trial 93 finished with value: 0.5264391883178012 and parameters: {'lambda_l1': 0.055429809241931895, 'lambda_l2': 0.013315511259061849, 'num_leaves': 235, 'feature_fraction': 0.6209901742544794, 'bagging_fraction': 0.9603625243176316, 'bagging_freq': 5, 'min_child_samples': 9}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:32,825]\u001b[0m Trial 94 pruned. Trial was pruned at iteration 40.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:32,940]\u001b[0m Trial 95 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:33,525]\u001b[0m Trial 96 finished with value: 0.521566218012535 and parameters: {'lambda_l1': 0.001072724146880552, 'lambda_l2': 0.043446953857601264, 'num_leaves': 142, 'feature_fraction': 0.6922532844601723, 'bagging_fraction': 0.9898097804978216, 'bagging_freq': 5, 'min_child_samples': 5}. Best is trial 96 with value: 0.521566218012535.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:34,149]\u001b[0m Trial 97 finished with value: 0.5196727190561812 and parameters: {'lambda_l1': 0.0025915189209523223, 'lambda_l2': 0.37941398551753863, 'num_leaves': 159, 'feature_fraction': 0.6942886423659413, 'bagging_fraction': 0.9888819332176355, 'bagging_freq': 6, 'min_child_samples': 5}. Best is trial 97 with value: 0.5196727190561812.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:34,855]\u001b[0m Trial 98 finished with value: 0.5265000531725292 and parameters: {'lambda_l1': 0.0006868362572582551, 'lambda_l2': 0.040525833784093586, 'num_leaves': 143, 'feature_fraction': 0.6930600995921747, 'bagging_fraction': 0.9897374239207342, 'bagging_freq': 6, 'min_child_samples': 6}. Best is trial 97 with value: 0.5196727190561812.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:34,935]\u001b[0m Trial 99 pruned. Trial was pruned at iteration 10.\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of finished trials: 100\n",
+ "Best trial:\n",
+ " Value: 0.5196727190561812\n",
+ " Params: \n",
+ " lambda_l1: 0.0025915189209523223\n",
+ " lambda_l2: 0.37941398551753863\n",
+ " num_leaves: 159\n",
+ " feature_fraction: 0.6942886423659413\n",
+ " bagging_fraction: 0.9888819332176355\n",
+ " bagging_freq: 6\n",
+ " min_child_samples: 5\n"
+ ]
+ }
+ ],
+ "source": [
+ "if __name__ == \"__main__\":\n",
+ " study = optuna.create_study(\n",
+ " pruner=optuna.pruners.MedianPruner(n_warmup_steps=10), direction=\"minimize\"\n",
+ " )\n",
+ " study.optimize(lambda trial: objective(trial, train_x, valid_x, train_y, valid_y), n_trials=100)\n",
+ "\n",
+ " print(\"Number of finished trials: {}\".format(len(study.trials)))\n",
+ "\n",
+ " print(\"Best trial:\")\n",
+ " trial = study.best_trial\n",
+ "\n",
+ " print(\" Value: {}\".format(trial.value))\n",
+ "\n",
+ " print(\" Params: \")\n",
+ " for key, value in trial.params.items():\n",
+ " print(\" {}: {}\".format(key, value))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "681d9cc2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'boosting_type': 'gbdt', 'objective': 'regression', 'metric': 'rmse', 'lambda_l1': 0.0025915189209523223, 'lambda_l2': 0.37941398551753863, 'num_leaves': 159, 'feature_fraction': 0.6942886423659413, 'bagging_fraction': 0.9888819332176355, 'bagging_freq': 6, 'min_child_samples': 5}\n"
+ ]
+ }
+ ],
+ "source": [
+ "params = {'boosting_type': 'gbdt',\n",
+ " 'objective': 'regression',\n",
+ " \"metric\": 'rmse'}\n",
+ "for key, value in trial.params.items():\n",
+ " params[key]=value\n",
+ "print(params)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "f5534bab",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000585 seconds.\n",
+ "You can set `force_row_wise=true` to remove the overhead.\n",
+ "And if memory is not enough, you can set `force_col_wise=true`.\n",
+ "[LightGBM] [Info] Total Bins 1837\n",
+ "[LightGBM] [Info] Number of data points in the train set: 13932, number of used features: 8\n",
+ "[LightGBM] [Info] Start training from score 2.072422\n",
+ "Training until validation scores don't improve for 20 rounds\n",
+ "[50]\tvalid_0's rmse: 0.459286\n",
+ "[100]\tvalid_0's rmse: 0.449051\n",
+ "[150]\tvalid_0's rmse: 0.448356\n",
+ "Early stopping, best iteration is:\n",
+ "[166]\tvalid_0's rmse: 0.447872\n"
+ ]
+ }
+ ],
+ "source": [
+ "dtrain = lgb.Dataset(train_x, label=train_y)\n",
+ "dvalid = lgb.Dataset(valid_x, label=valid_y)\n",
+ "model = lgb.train(params=params, train_set=dtrain,valid_sets=[dvalid],\n",
+ " verbose_eval=50,\n",
+ " early_stopping_rounds=20,\n",
+ " num_boost_round=5000)\n",
+ "predict = model.predict(test_x)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "f28d82da",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.44403838770137805"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 评估指标rmse,越小越好\n",
+ "np.sqrt(mean_squared_error(test_y,predict))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5e292bdb",
+ "metadata": {},
+ "source": [
+ "### 回归任务的结论\n",
+ "不使用optuna的分数是0.4531666044672748,使用的分数是0.44403838770137805,提升了0.00912821676589675。"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "b2143f17",
+ "id": "36384535",
"metadata": {},
"outputs": [],
"source": []
diff --git a/竞赛优胜技巧/AutoAI Tools.ipynb b/竞赛优胜技巧/AutoAI Tools.ipynb
index ec7337f..3744e19 100644
--- a/竞赛优胜技巧/AutoAI Tools.ipynb
+++ b/竞赛优胜技巧/AutoAI Tools.ipynb
@@ -6,12 +6,12 @@
"metadata": {},
"source": [
"# 自动机器学习工具\n",
- "该notebook将比较市面上的多个AutoML工具,分别采用两组数据集进行比较,分别是波士顿房价(回归)和森林植被类型(多分类)"
+ "该notebook将比较市面上的多个AutoAI工具,分别采用两组数据集进行比较,分别是加利福尼亚住房数据集(回归)和森林植被类型(多分类)"
]
},
{
"cell_type": "markdown",
- "id": "ba41b787",
+ "id": "b3d4d608",
"metadata": {},
"source": [
"# optuna一种超参数优化框架\n",
@@ -20,7 +20,7 @@
},
{
"cell_type": "markdown",
- "id": "c4d7f73a",
+ "id": "a2d41e62",
"metadata": {},
"source": [
"### 波士顿房价预测任务(回归)"
@@ -28,97 +28,631 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "1d010375",
+ "execution_count": 7,
+ "id": "f5e0e977",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
+ "import pandas as pd\n",
"import time\n",
- "import gc"
+ "import gc\n",
+ "\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.metrics import mean_squared_error\n",
+ "import lightgbm as lgb # 使用lgb模型"
]
},
{
"cell_type": "code",
- "execution_count": 1,
- "id": "6d01294d",
+ "execution_count": 2,
+ "id": "5eda2637",
"metadata": {},
"outputs": [],
"source": [
- "from sklearn.datasets import load_boston\n",
- "# 预处理\n",
- "X, y = data['data'], data['target']\n",
- "# 由于模型标签需要从0开始,所以数字需要全部减1\n",
- "print('七分类任务,处理前:',np.unique(y))\n",
- "print(y)\n",
- "ord = OrdinalEncoder()\n",
- "y = ord.fit_transform(y.reshape(-1, 1))\n",
- "y = y.reshape(-1, )\n",
- "print('七分类任务,处理后:',np.unique(y))\n",
- "print(y)"
+ "from sklearn.datasets import fetch_california_housing\n",
+ "data = fetch_california_housing()\n",
+ "X, y = data['data'], data['target']"
]
},
{
"cell_type": "code",
- "execution_count": 2,
- "id": "80a90475",
+ "execution_count": 3,
+ "id": "722beb8e",
"metadata": {},
"outputs": [
{
"data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " MedInc | \n",
+ " HouseAge | \n",
+ " AveRooms | \n",
+ " AveBedrms | \n",
+ " Population | \n",
+ " AveOccup | \n",
+ " Latitude | \n",
+ " Longitude | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 8.3252 | \n",
+ " 41.0 | \n",
+ " 6.984127 | \n",
+ " 1.02381 | \n",
+ " 322.0 | \n",
+ " 2.555556 | \n",
+ " 37.88 | \n",
+ " -122.23 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 8.3014 | \n",
+ " 21.0 | \n",
+ " 6.238137 | \n",
+ " 0.97188 | \n",
+ " 2401.0 | \n",
+ " 2.109842 | \n",
+ " 37.86 | \n",
+ " -122.22 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
"text/plain": [
- "array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,\n",
- " 4.9800e+00],\n",
- " [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,\n",
- " 9.1400e+00],\n",
- " [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,\n",
- " 4.0300e+00],\n",
- " ...,\n",
- " [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,\n",
- " 5.6400e+00],\n",
- " [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,\n",
- " 6.4800e+00],\n",
- " [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,\n",
- " 7.8800e+00]])"
+ " MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude \\\n",
+ "0 8.3252 41.0 6.984127 1.02381 322.0 2.555556 37.88 \n",
+ "1 8.3014 21.0 6.238137 0.97188 2401.0 2.109842 37.86 \n",
+ "\n",
+ " Longitude \n",
+ "0 -122.23 \n",
+ "1 -122.22 "
]
},
- "execution_count": 2,
+ "execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "data.data"
+ "X = pd.DataFrame(X,columns=data.feature_names)\n",
+ "X.head(2)"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "9977a37c",
+ "execution_count": 4,
+ "id": "08ebab89",
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 20640 entries, 0 to 20639\n",
+ "Data columns (total 8 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 MedInc 20640 non-null float64\n",
+ " 1 HouseAge 20640 non-null float64\n",
+ " 2 AveRooms 20640 non-null float64\n",
+ " 3 AveBedrms 20640 non-null float64\n",
+ " 4 Population 20640 non-null float64\n",
+ " 5 AveOccup 20640 non-null float64\n",
+ " 6 Latitude 20640 non-null float64\n",
+ " 7 Longitude 20640 non-null float64\n",
+ "dtypes: float64(8)\n",
+ "memory usage: 1.3 MB\n"
+ ]
+ }
+ ],
+ "source": [
+ "X.info()"
+ ]
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "e9193c33",
+ "execution_count": 5,
+ "id": "8932c66c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "训练集: (13932, 8)\n",
+ "验证集: (4644, 8)\n",
+ "测试集: (2064, 8)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 切分训练和测试集\n",
+ "train_valid_x, test_x, train_valid_y, test_y = train_test_split(X, y,random_state=42,test_size=0.1)\n",
+ "\n",
+ "# 切分训练和验证集\n",
+ "train_x, valid_x, train_y, valid_y = train_test_split(train_valid_x, train_valid_y,random_state=42)\n",
+ "print('训练集:',train_x.shape)\n",
+ "print('验证集:',valid_x.shape)\n",
+ "print('测试集:', test_x.shape)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e87a2121",
+ "metadata": {},
+ "source": [
+ "### 使用LGB作为模型,不使用optuna调参"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "5cad8967",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001822 seconds.\n",
+ "You can set `force_col_wise=true` to remove the overhead.\n",
+ "[LightGBM] [Info] Total Bins 1837\n",
+ "[LightGBM] [Info] Number of data points in the train set: 13932, number of used features: 8\n",
+ "[LightGBM] [Info] Start training from score 2.072422\n",
+ "Training until validation scores don't improve for 20 rounds\n",
+ "[50]\tvalid_0's rmse: 0.492877\n",
+ "[100]\tvalid_0's rmse: 0.471803\n",
+ "[150]\tvalid_0's rmse: 0.46445\n",
+ "[200]\tvalid_0's rmse: 0.459716\n",
+ "[250]\tvalid_0's rmse: 0.456658\n",
+ "[300]\tvalid_0's rmse: 0.454223\n",
+ "[350]\tvalid_0's rmse: 0.452644\n",
+ "Early stopping, best iteration is:\n",
+ "[342]\tvalid_0's rmse: 0.452522\n"
+ ]
+ }
+ ],
+ "source": [
+ "params = {'boosting_type': 'gbdt',\n",
+ " 'objective': 'regression',\n",
+ " \"metric\": 'rmse'}\n",
+ "dtrain = lgb.Dataset(train_x, label=train_y)\n",
+ "dvalid = lgb.Dataset(valid_x, label=valid_y)\n",
+ "model = lgb.train(params=params, train_set=dtrain,valid_sets=[dvalid],\n",
+ " verbose_eval=50,\n",
+ " early_stopping_rounds=20,\n",
+ " num_boost_round=5000)\n",
+ "predict = model.predict(test_x)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "886bfdad",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.4531666044672748"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 评估指标rmse,越小越好\n",
+ "np.sqrt(mean_squared_error(test_y,predict))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c271442c",
+ "metadata": {},
+ "source": [
+ "### 使用LGB作为模型,使用optuna调参"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "9a87f9db",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple\n",
+ "Collecting optuna\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/2a/b4/061c8721b5da1614794d1b66fcb212eee156efd5284f66854d02f295b0be/optuna-2.9.1-py3-none-any.whl (302 kB)\n",
+ "Requirement already satisfied: tqdm in d:\\programdata\\anaconda3\\lib\\site-packages (from optuna) (4.59.0)\n",
+ "Collecting cmaes>=0.8.2\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/01/1f/43b01223a0366171f474320c6e966c39a11587287f098a5f09809b45e05f/cmaes-0.8.2-py3-none-any.whl (15 kB)\n",
+ "Requirement already satisfied: scipy!=1.4.0 in d:\\programdata\\anaconda3\\lib\\site-packages (from optuna) (1.4.1)\n",
+ "Collecting cliff\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/18/f7/2a98b032a43b2925ea32bc13a8feb6cf9416e7d2b2c0f6d2ce14636a03b1/cliff-3.9.0-py3-none-any.whl (80 kB)\n",
+ "Collecting alembic\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/7a/5a/be479a2c379e6b3c57dc56ea3b139ad4d46c2d244a0035ac4d7475116076/alembic-1.7.1-py3-none-any.whl (208 kB)\n",
+ "Requirement already satisfied: packaging>=20.0 in d:\\programdata\\anaconda3\\lib\\site-packages (from optuna) (20.9)\n",
+ "Requirement already satisfied: numpy in d:\\programdata\\anaconda3\\lib\\site-packages (from optuna) (1.18.5)\n",
+ "Collecting colorlog\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/2d/93/4b0bb101e54206e92feb3c986c274902212b2ed8c55423e6e7f6d8b693ca/colorlog-6.4.1-py2.py3-none-any.whl (11 kB)\n",
+ "Requirement already satisfied: PyYAML in d:\\programdata\\anaconda3\\lib\\site-packages (from optuna) (5.4.1)\n",
+ "Requirement already satisfied: sqlalchemy>=1.1.0 in d:\\programdata\\anaconda3\\lib\\site-packages (from optuna) (1.4.7)\n",
+ "Requirement already satisfied: pyparsing>=2.0.2 in d:\\programdata\\anaconda3\\lib\\site-packages (from packaging>=20.0->optuna) (2.4.7)\n",
+ "Requirement already satisfied: greenlet!=0.4.17 in d:\\programdata\\anaconda3\\lib\\site-packages (from sqlalchemy>=1.1.0->optuna) (1.0.0)\n",
+ "Collecting importlib-resources\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/f2/6c/2f3b930513bb971172ffceb63cf4e910944e57451724e69b1dec97cfefa6/importlib_resources-5.2.2-py3-none-any.whl (27 kB)\n",
+ "Collecting Mako\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/75/69/c3ab0db9234fa5681a85a1c55203763a62902d56ad76b6d9b9bfa2c83694/Mako-1.1.5-py2.py3-none-any.whl (75 kB)\n",
+ "Collecting PrettyTable>=0.7.2\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/bd/b5/c09f8d237e060a9e7b5d2d1577c2a6bc49fa298a7b4aefd52146f2b9a62e/prettytable-2.2.0-py3-none-any.whl (23 kB)\n",
+ "Collecting pbr!=2.1.0,>=2.0.0\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/18/e0/1d4702dd81121d04a477c272d47ee5b6bc970d1a0990b11befa275c55cf2/pbr-5.6.0-py2.py3-none-any.whl (111 kB)\n",
+ "Collecting cmd2>=1.0.0\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/44/ca/d407811641ec1d8bd8a38ee3165d73aa44776d7700436bd4d4a6606f2736/cmd2-2.1.2-py3-none-any.whl (141 kB)\n",
+ "Collecting stevedore>=2.0.1\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/b2/c5/036a9a6e220ea7406a36130e80cca33a3e6b98b5328cfdba4b46b2ed0786/stevedore-3.4.0-py3-none-any.whl (49 kB)\n",
+ "Collecting autopage>=0.4.0\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/18/a7/901e943318925f8ca3f7963616660065b3cf4e143b0327f88076ba5c4e22/autopage-0.4.0-py3-none-any.whl (20 kB)\n",
+ "Requirement already satisfied: colorama>=0.3.7 in d:\\programdata\\anaconda3\\lib\\site-packages (from cmd2>=1.0.0->cliff->optuna) (0.4.4)\n",
+ "Collecting pyreadline3\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/89/29/10fbb29d957dbcee77a0832eabb9953da80d6bb9514f7ca1b3d82f50219f/pyreadline3-3.3-py3-none-any.whl (95 kB)\n",
+ "Requirement already satisfied: attrs>=16.3.0 in d:\\programdata\\anaconda3\\lib\\site-packages (from cmd2>=1.0.0->cliff->optuna) (20.3.0)\n",
+ "Collecting pyperclip>=1.6\n",
+ " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/a7/2c/4c64579f847bd5d539803c8b909e54ba087a79d01bb3aba433a95879a6c5/pyperclip-1.8.2.tar.gz (20 kB)\n",
+ "Requirement already satisfied: wcwidth>=0.1.7 in d:\\programdata\\anaconda3\\lib\\site-packages (from cmd2>=1.0.0->cliff->optuna) (0.2.5)\n",
+ "Requirement already satisfied: zipp>=3.1.0 in d:\\programdata\\anaconda3\\lib\\site-packages (from importlib-resources->alembic->optuna) (3.4.1)\n",
+ "Requirement already satisfied: MarkupSafe>=0.9.2 in d:\\programdata\\anaconda3\\lib\\site-packages (from Mako->alembic->optuna) (1.1.1)\n",
+ "Building wheels for collected packages: pyperclip\n",
+ " Building wheel for pyperclip (setup.py): started\n",
+ " Building wheel for pyperclip (setup.py): finished with status 'done'\n",
+ " Created wheel for pyperclip: filename=pyperclip-1.8.2-py3-none-any.whl size=11107 sha256=96b5a96e64d8d6ae264ae796623c0148d1ca71677462878fc1f07c74e1e794ff\n",
+ " Stored in directory: c:\\users\\administrator\\appdata\\local\\pip\\cache\\wheels\\30\\c0\\21\\bc13df81c8b032076577671a8ef05db4e168a335e07e64d9a7\n",
+ "Successfully built pyperclip\n",
+ "Installing collected packages: pyreadline3, pyperclip, pbr, stevedore, PrettyTable, Mako, importlib-resources, cmd2, autopage, colorlog, cmaes, cliff, alembic, optuna\n",
+ "Successfully installed Mako-1.1.5 PrettyTable-2.2.0 alembic-1.7.1 autopage-0.4.0 cliff-3.9.0 cmaes-0.8.2 cmd2-2.1.2 colorlog-6.4.1 importlib-resources-5.2.2 optuna-2.9.1 pbr-5.6.0 pyperclip-1.8.2 pyreadline3-3.3 stevedore-3.4.0\n",
+ "Note: you may need to restart the kernel to use updated packages.\n"
+ ]
+ }
+ ],
+ "source": [
+ "pip install optuna"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "d8a0279a",
"metadata": {},
"outputs": [],
"source": [
- "### 分类"
+ "import optuna"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "82687da3",
+ "execution_count": 18,
+ "id": "7433fdcb",
"metadata": {},
"outputs": [],
"source": [
- "from sklearn.datasets import fetch_covtype\n",
- "data = fetch_covtype()"
+ "def objective(trial,train_x, valid_x, train_y, valid_y):\n",
+ " dtrain = lgb.Dataset(train_x, label=train_y)\n",
+ " dvalid = lgb.Dataset(valid_x, label=valid_y)\n",
+ "\n",
+ " param = {\n",
+ " \"objective\": \"regression\",\n",
+ " \"metric\": \"rmse\",\n",
+ " \"verbosity\": -1,\n",
+ " \"boosting_type\": \"gbdt\",\n",
+ " 'random_state':42,\n",
+ " \"lambda_l1\": trial.suggest_float(\"lambda_l1\", 1e-8, 10.0, log=True),\n",
+ " \"lambda_l2\": trial.suggest_float(\"lambda_l2\", 1e-8, 10.0, log=True),\n",
+ " \"num_leaves\": trial.suggest_int(\"num_leaves\", 2, 256),\n",
+ " \"feature_fraction\": trial.suggest_float(\"feature_fraction\", 0.4, 1.0),\n",
+ " \"bagging_fraction\": trial.suggest_float(\"bagging_fraction\", 0.4, 1.0),\n",
+ " \"bagging_freq\": trial.suggest_int(\"bagging_freq\", 1, 7),\n",
+ " \"min_child_samples\": trial.suggest_int(\"min_child_samples\", 5, 100),\n",
+ " }\n",
+ "\n",
+ " # Add a callback for pruning.\n",
+ " pruning_callback = optuna.integration.LightGBMPruningCallback(trial, \"rmse\")\n",
+ " gbm = lgb.train(\n",
+ " param, dtrain, valid_sets=[dvalid], verbose_eval=False, callbacks=[pruning_callback]\n",
+ " )\n",
+ "\n",
+ " preds = gbm.predict(valid_x)\n",
+ " pred_labels = np.rint(preds)\n",
+ " rmse = np.sqrt(mean_squared_error(valid_y,pred_labels))\n",
+ " return rmse"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "b9018adb",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m[I 2021-09-07 09:26:30,952]\u001b[0m A new study created in memory with name: no-name-f2147511-069d-495f-90ec-5990dc3c3716\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:31,284]\u001b[0m Trial 0 finished with value: 0.5579165649228477 and parameters: {'lambda_l1': 1.6814939560853405e-06, 'lambda_l2': 8.772634980486007, 'num_leaves': 79, 'feature_fraction': 0.4415746779386105, 'bagging_fraction': 0.5306839081914155, 'bagging_freq': 5, 'min_child_samples': 13}. Best is trial 0 with value: 0.5579165649228477.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:31,523]\u001b[0m Trial 1 finished with value: 0.5577111827415846 and parameters: {'lambda_l1': 0.00013684423280171766, 'lambda_l2': 0.010116712675880523, 'num_leaves': 42, 'feature_fraction': 0.4758659908396936, 'bagging_fraction': 0.8916447942940564, 'bagging_freq': 7, 'min_child_samples': 32}. Best is trial 1 with value: 0.5577111827415846.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:31,986]\u001b[0m Trial 2 finished with value: 0.5382832732103667 and parameters: {'lambda_l1': 1.849141022226266, 'lambda_l2': 9.297088403542385e-06, 'num_leaves': 94, 'feature_fraction': 0.6894755218120849, 'bagging_fraction': 0.9503182129837274, 'bagging_freq': 2, 'min_child_samples': 98}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:32,156]\u001b[0m Trial 3 finished with value: 0.5786765226592324 and parameters: {'lambda_l1': 4.448151837364203e-05, 'lambda_l2': 1.8955982808109254e-08, 'num_leaves': 19, 'feature_fraction': 0.493099451522876, 'bagging_fraction': 0.6742062281671596, 'bagging_freq': 5, 'min_child_samples': 35}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:32,266]\u001b[0m Trial 4 finished with value: 0.6139367553603606 and parameters: {'lambda_l1': 0.0002791229907486381, 'lambda_l2': 5.269713608638115, 'num_leaves': 5, 'feature_fraction': 0.8303631732661438, 'bagging_fraction': 0.942024874372832, 'bagging_freq': 4, 'min_child_samples': 61}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:32,826]\u001b[0m Trial 5 finished with value: 0.5453617455111696 and parameters: {'lambda_l1': 0.06860061560766911, 'lambda_l2': 0.2723830502667369, 'num_leaves': 98, 'feature_fraction': 0.7105587363977491, 'bagging_fraction': 0.5899933873219961, 'bagging_freq': 4, 'min_child_samples': 71}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:32,880]\u001b[0m Trial 6 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:33,189]\u001b[0m Trial 7 finished with value: 0.5588682864793495 and parameters: {'lambda_l1': 0.4689515611654304, 'lambda_l2': 1.5494838308859912e-07, 'num_leaves': 65, 'feature_fraction': 0.5583496504073044, 'bagging_fraction': 0.6569918432147945, 'bagging_freq': 5, 'min_child_samples': 58}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:33,462]\u001b[0m Trial 8 finished with value: 0.5447098867148504 and parameters: {'lambda_l1': 0.0010101561223389774, 'lambda_l2': 0.01515390989225731, 'num_leaves': 47, 'feature_fraction': 0.6245633996226619, 'bagging_fraction': 0.419624870667929, 'bagging_freq': 3, 'min_child_samples': 48}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:33,693]\u001b[0m Trial 9 pruned. Trial was pruned at iteration 80.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:34,181]\u001b[0m Trial 10 finished with value: 0.541110883425217 and parameters: {'lambda_l1': 1.4756346287368392e-08, 'lambda_l2': 1.3544296321997672e-05, 'num_leaves': 189, 'feature_fraction': 0.9751158697609862, 'bagging_fraction': 0.8348333900598492, 'bagging_freq': 1, 'min_child_samples': 100}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:34,761]\u001b[0m Trial 11 finished with value: 0.5430244062824532 and parameters: {'lambda_l1': 1.2282790653635484e-08, 'lambda_l2': 8.373619608433318e-06, 'num_leaves': 196, 'feature_fraction': 0.9236444858767681, 'bagging_fraction': 0.8122951589315216, 'bagging_freq': 1, 'min_child_samples': 100}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:35,237]\u001b[0m Trial 12 finished with value: 0.5479941202014348 and parameters: {'lambda_l1': 8.66531632367352, 'lambda_l2': 2.7356786615324263e-05, 'num_leaves': 182, 'feature_fraction': 0.7257944800491923, 'bagging_fraction': 0.8151059105693412, 'bagging_freq': 1, 'min_child_samples': 98}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:36,132]\u001b[0m Trial 13 finished with value: 0.5426396090105863 and parameters: {'lambda_l1': 1.5311804262482413e-08, 'lambda_l2': 8.31070641463058e-06, 'num_leaves': 241, 'feature_fraction': 0.9996713520233095, 'bagging_fraction': 0.9659578077631608, 'bagging_freq': 2, 'min_child_samples': 84}. Best is trial 2 with value: 0.5382832732103667.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:36,961]\u001b[0m Trial 14 finished with value: 0.5367665957770634 and parameters: {'lambda_l1': 0.014095420115571123, 'lambda_l2': 0.0002563644914171101, 'num_leaves': 149, 'feature_fraction': 0.7883099949736571, 'bagging_fraction': 0.8086023984951658, 'bagging_freq': 2, 'min_child_samples': 85}. Best is trial 14 with value: 0.5367665957770634.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:37,824]\u001b[0m Trial 15 finished with value: 0.5348886658006932 and parameters: {'lambda_l1': 0.020071941122612545, 'lambda_l2': 0.0003014852059198554, 'num_leaves': 146, 'feature_fraction': 0.7936923799556383, 'bagging_fraction': 0.756430674635471, 'bagging_freq': 3, 'min_child_samples': 83}. Best is trial 15 with value: 0.5348886658006932.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:38,622]\u001b[0m Trial 16 finished with value: 0.5372333761766732 and parameters: {'lambda_l1': 0.015173696268463625, 'lambda_l2': 0.0016757129971514612, 'num_leaves': 138, 'feature_fraction': 0.8107104336197675, 'bagging_fraction': 0.7376130779961089, 'bagging_freq': 3, 'min_child_samples': 78}. Best is trial 15 with value: 0.5348886658006932.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:39,451]\u001b[0m Trial 17 finished with value: 0.5376252190781058 and parameters: {'lambda_l1': 0.007357005023837469, 'lambda_l2': 0.000250874148036676, 'num_leaves': 147, 'feature_fraction': 0.7829205508785075, 'bagging_fraction': 0.7478270069878126, 'bagging_freq': 3, 'min_child_samples': 67}. Best is trial 15 with value: 0.5348886658006932.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:39,567]\u001b[0m Trial 18 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:40,910]\u001b[0m Trial 19 finished with value: 0.5315793527934071 and parameters: {'lambda_l1': 0.013209811084277438, 'lambda_l2': 0.06726287443306718, 'num_leaves': 231, 'feature_fraction': 0.637876115312563, 'bagging_fraction': 0.8716817438233221, 'bagging_freq': 3, 'min_child_samples': 44}. Best is trial 19 with value: 0.5315793527934071.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:42,324]\u001b[0m Trial 20 finished with value: 0.5262550702976 and parameters: {'lambda_l1': 0.003113216663103784, 'lambda_l2': 0.24783337352027449, 'num_leaves': 242, 'feature_fraction': 0.6137607392321572, 'bagging_fraction': 0.8936473883645825, 'bagging_freq': 3, 'min_child_samples': 44}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:43,182]\u001b[0m Trial 21 finished with value: 0.5297204439253479 and parameters: {'lambda_l1': 0.003978575421253158, 'lambda_l2': 0.11658574784670588, 'num_leaves': 254, 'feature_fraction': 0.6168955771891016, 'bagging_fraction': 0.8787433941466242, 'bagging_freq': 3, 'min_child_samples': 43}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:43,310]\u001b[0m Trial 22 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:44,194]\u001b[0m Trial 23 finished with value: 0.5288803542960898 and parameters: {'lambda_l1': 8.341315535148706e-06, 'lambda_l2': 0.24030532904199514, 'num_leaves': 221, 'feature_fraction': 0.5901567080654435, 'bagging_fraction': 0.8875920940685119, 'bagging_freq': 3, 'min_child_samples': 25}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:44,331]\u001b[0m Trial 24 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:45,248]\u001b[0m Trial 25 finished with value: 0.5310533243420329 and parameters: {'lambda_l1': 1.9500743317527624e-07, 'lambda_l2': 0.0502751774074574, 'num_leaves': 214, 'feature_fraction': 0.5734739546088348, 'bagging_fraction': 0.9115976551406307, 'bagging_freq': 2, 'min_child_samples': 24}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m[I 2021-09-07 09:26:46,126]\u001b[0m Trial 26 finished with value: 0.5310399189972692 and parameters: {'lambda_l1': 5.65218762871428e-06, 'lambda_l2': 1.6944446205708956, 'num_leaves': 256, 'feature_fraction': 0.6693624417062835, 'bagging_fraction': 0.8606541857435407, 'bagging_freq': 4, 'min_child_samples': 25}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:47,541]\u001b[0m Trial 27 finished with value: 0.5289141422774571 and parameters: {'lambda_l1': 3.372780137131952e-07, 'lambda_l2': 0.004231573716537988, 'num_leaves': 222, 'feature_fraction': 0.5834642111209242, 'bagging_fraction': 0.9283224808721449, 'bagging_freq': 3, 'min_child_samples': 33}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:47,734]\u001b[0m Trial 28 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:47,883]\u001b[0m Trial 29 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:48,032]\u001b[0m Trial 30 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:48,233]\u001b[0m Trial 31 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:48,470]\u001b[0m Trial 32 pruned. Trial was pruned at iteration 12.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:50,256]\u001b[0m Trial 33 finished with value: 0.5307511312800164 and parameters: {'lambda_l1': 8.331346294111473e-08, 'lambda_l2': 0.018173663934055492, 'num_leaves': 238, 'feature_fraction': 0.5874989674601676, 'bagging_fraction': 0.8876263960076438, 'bagging_freq': 2, 'min_child_samples': 30}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:52,081]\u001b[0m Trial 34 finished with value: 0.5364692667074208 and parameters: {'lambda_l1': 8.939808862490256e-07, 'lambda_l2': 1.3094120587455667, 'num_leaves': 249, 'feature_fraction': 0.739630943171398, 'bagging_fraction': 0.9999691110926188, 'bagging_freq': 3, 'min_child_samples': 17}. Best is trial 20 with value: 0.5262550702976.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:52,228]\u001b[0m Trial 35 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:52,401]\u001b[0m Trial 36 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:52,527]\u001b[0m Trial 37 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:52,646]\u001b[0m Trial 38 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:52,746]\u001b[0m Trial 39 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:53,973]\u001b[0m Trial 40 finished with value: 0.5256364275490798 and parameters: {'lambda_l1': 0.0710216045724967, 'lambda_l2': 0.10020172057765622, 'num_leaves': 206, 'feature_fraction': 0.6441352660567405, 'bagging_fraction': 0.8538258125595672, 'bagging_freq': 5, 'min_child_samples': 8}. Best is trial 40 with value: 0.5256364275490798.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:54,896]\u001b[0m Trial 41 finished with value: 0.5233364787469251 and parameters: {'lambda_l1': 0.23671365201549266, 'lambda_l2': 0.0749439220683321, 'num_leaves': 210, 'feature_fraction': 0.6436961501856725, 'bagging_fraction': 0.8521968194803821, 'bagging_freq': 5, 'min_child_samples': 5}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:55,831]\u001b[0m Trial 42 finished with value: 0.5293150200028972 and parameters: {'lambda_l1': 0.6574943326302751, 'lambda_l2': 0.0361443234291568, 'num_leaves': 209, 'feature_fraction': 0.6596272574104582, 'bagging_fraction': 0.850673456207462, 'bagging_freq': 6, 'min_child_samples': 5}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:56,858]\u001b[0m Trial 43 finished with value: 0.5308606295175099 and parameters: {'lambda_l1': 0.07618036054920922, 'lambda_l2': 0.22544030283002933, 'num_leaves': 226, 'feature_fraction': 0.6984185669610476, 'bagging_fraction': 0.7845150865412134, 'bagging_freq': 5, 'min_child_samples': 10}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:57,019]\u001b[0m Trial 44 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:57,787]\u001b[0m Trial 45 finished with value: 0.5331475267867624 and parameters: {'lambda_l1': 1.4967349278475324, 'lambda_l2': 0.0008951722845905283, 'num_leaves': 207, 'feature_fraction': 0.7339992851750987, 'bagging_fraction': 0.919959739142468, 'bagging_freq': 5, 'min_child_samples': 26}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:57,865]\u001b[0m Trial 46 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:57,971]\u001b[0m Trial 47 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:58,069]\u001b[0m Trial 48 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:58,168]\u001b[0m Trial 49 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:58,317]\u001b[0m Trial 50 pruned. Trial was pruned at iteration 30.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:58,445]\u001b[0m Trial 51 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:26:59,558]\u001b[0m Trial 52 finished with value: 0.5287423259473492 and parameters: {'lambda_l1': 0.44209025556975773, 'lambda_l2': 0.02237337201530471, 'num_leaves': 180, 'feature_fraction': 0.6816257587535688, 'bagging_fraction': 0.8645245816570197, 'bagging_freq': 7, 'min_child_samples': 5}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:00,776]\u001b[0m Trial 53 finished with value: 0.5248984867652216 and parameters: {'lambda_l1': 0.02970872423104034, 'lambda_l2': 0.0197319338622143, 'num_leaves': 177, 'feature_fraction': 0.7150825328146555, 'bagging_fraction': 0.8972252525350597, 'bagging_freq': 7, 'min_child_samples': 12}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:01,886]\u001b[0m Trial 54 finished with value: 0.5310139507545261 and parameters: {'lambda_l1': 0.03922536031082296, 'lambda_l2': 1.7618527190204045e-06, 'num_leaves': 180, 'feature_fraction': 0.7089331672809696, 'bagging_fraction': 0.8021837464466384, 'bagging_freq': 7, 'min_child_samples': 11}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:02,043]\u001b[0m Trial 55 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:02,158]\u001b[0m Trial 56 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:02,937]\u001b[0m Trial 57 pruned. Trial was pruned at iteration 78.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:03,912]\u001b[0m Trial 58 finished with value: 0.5310990808119093 and parameters: {'lambda_l1': 0.008908141404368873, 'lambda_l2': 1.0024604099274968e-08, 'num_leaves': 156, 'feature_fraction': 0.7135958388327297, 'bagging_fraction': 0.8918708775644101, 'bagging_freq': 7, 'min_child_samples': 10}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:05,509]\u001b[0m Trial 59 finished with value: 0.5303075036750562 and parameters: {'lambda_l1': 0.0036415543019748704, 'lambda_l2': 0.060450481568631226, 'num_leaves': 245, 'feature_fraction': 0.6322922933331399, 'bagging_fraction': 0.8119277832780933, 'bagging_freq': 6, 'min_child_samples': 18}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:05,675]\u001b[0m Trial 60 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:06,717]\u001b[0m Trial 61 finished with value: 0.5266068329070613 and parameters: {'lambda_l1': 0.08187221145217663, 'lambda_l2': 0.12038514531389062, 'num_leaves': 230, 'feature_fraction': 0.5893130926352276, 'bagging_fraction': 0.9218811557192532, 'bagging_freq': 3, 'min_child_samples': 30}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:06,839]\u001b[0m Trial 62 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:06,957]\u001b[0m Trial 63 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:07,060]\u001b[0m Trial 64 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:07,235]\u001b[0m Trial 65 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:07,371]\u001b[0m Trial 66 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:08,252]\u001b[0m Trial 67 finished with value: 0.524615381244906 and parameters: {'lambda_l1': 0.0007372437461333507, 'lambda_l2': 0.07197819258619385, 'num_leaves': 217, 'feature_fraction': 0.7085126717217893, 'bagging_fraction': 0.9783185592011208, 'bagging_freq': 4, 'min_child_samples': 7}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m[I 2021-09-07 09:27:09,210]\u001b[0m Trial 68 finished with value: 0.524942396458539 and parameters: {'lambda_l1': 0.0011590093830295628, 'lambda_l2': 0.06353809127053359, 'num_leaves': 198, 'feature_fraction': 0.7106295012909083, 'bagging_fraction': 0.973655849802224, 'bagging_freq': 5, 'min_child_samples': 7}. Best is trial 41 with value: 0.5233364787469251.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:09,572]\u001b[0m Trial 69 pruned. Trial was pruned at iteration 29.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:10,511]\u001b[0m Trial 70 finished with value: 0.5227354154865925 and parameters: {'lambda_l1': 0.0019204609823774911, 'lambda_l2': 0.006957053438847566, 'num_leaves': 200, 'feature_fraction': 0.7174135338967843, 'bagging_fraction': 0.9747628414441452, 'bagging_freq': 5, 'min_child_samples': 8}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:11,445]\u001b[0m Trial 71 pruned. Trial was pruned at iteration 72.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:12,707]\u001b[0m Trial 72 finished with value: 0.5273794666616337 and parameters: {'lambda_l1': 0.00253464769953144, 'lambda_l2': 0.04250761519699366, 'num_leaves': 203, 'feature_fraction': 0.723989872880134, 'bagging_fraction': 0.9954485306248265, 'bagging_freq': 5, 'min_child_samples': 10}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:14,118]\u001b[0m Trial 73 finished with value: 0.5290081420408789 and parameters: {'lambda_l1': 0.0014187815549416435, 'lambda_l2': 0.12791352246804705, 'num_leaves': 218, 'feature_fraction': 0.7758653557130019, 'bagging_fraction': 0.9488928693793105, 'bagging_freq': 5, 'min_child_samples': 18}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:14,196]\u001b[0m Trial 74 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:15,517]\u001b[0m Trial 75 finished with value: 0.5325397214424649 and parameters: {'lambda_l1': 0.0005872739303922063, 'lambda_l2': 0.0096136159649407, 'num_leaves': 211, 'feature_fraction': 0.8012899648531023, 'bagging_fraction': 0.931501707649951, 'bagging_freq': 4, 'min_child_samples': 13}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:16,903]\u001b[0m Trial 76 finished with value: 0.5296781377317934 and parameters: {'lambda_l1': 0.006787205212552698, 'lambda_l2': 0.0790698959620889, 'num_leaves': 239, 'feature_fraction': 0.7454753912863651, 'bagging_fraction': 0.9799306593714261, 'bagging_freq': 5, 'min_child_samples': 7}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:17,956]\u001b[0m Trial 77 finished with value: 0.5309336374271006 and parameters: {'lambda_l1': 0.0037427478770181, 'lambda_l2': 0.8130901726120456, 'num_leaves': 232, 'feature_fraction': 0.7224358819185711, 'bagging_fraction': 0.9520121758601578, 'bagging_freq': 4, 'min_child_samples': 12}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:18,071]\u001b[0m Trial 78 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:19,373]\u001b[0m Trial 79 finished with value: 0.5347521802448649 and parameters: {'lambda_l1': 0.017570019491239652, 'lambda_l2': 0.05059260326524094, 'num_leaves': 226, 'feature_fraction': 0.6981808494111595, 'bagging_fraction': 0.9993645575870086, 'bagging_freq': 4, 'min_child_samples': 20}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:19,471]\u001b[0m Trial 80 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:21,230]\u001b[0m Trial 81 finished with value: 0.5227732830528738 and parameters: {'lambda_l1': 0.0003058538457597125, 'lambda_l2': 0.04324937485563167, 'num_leaves': 202, 'feature_fraction': 0.73206644303025, 'bagging_fraction': 0.9911170826848787, 'bagging_freq': 5, 'min_child_samples': 10}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:22,266]\u001b[0m Trial 82 finished with value: 0.5279916258627607 and parameters: {'lambda_l1': 0.0002580487853233371, 'lambda_l2': 0.015002378165105253, 'num_leaves': 213, 'feature_fraction': 0.665112654945876, 'bagging_fraction': 0.9341515771267435, 'bagging_freq': 5, 'min_child_samples': 11}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:26,040]\u001b[0m Trial 83 pruned. Trial was pruned at iteration 51.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:29,359]\u001b[0m Trial 84 finished with value: 0.5257703648298483 and parameters: {'lambda_l1': 0.09313058634431053, 'lambda_l2': 0.0794176561682967, 'num_leaves': 174, 'feature_fraction': 0.7330255912724464, 'bagging_fraction': 0.9556166881594967, 'bagging_freq': 6, 'min_child_samples': 14}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:29,749]\u001b[0m Trial 85 pruned. Trial was pruned at iteration 30.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:29,919]\u001b[0m Trial 86 pruned. Trial was pruned at iteration 17.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:30,026]\u001b[0m Trial 87 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:30,861]\u001b[0m Trial 88 finished with value: 0.5270306752854106 and parameters: {'lambda_l1': 0.0003495273937948516, 'lambda_l2': 0.057992141622577896, 'num_leaves': 167, 'feature_fraction': 0.7406493057210397, 'bagging_fraction': 0.8812843020741449, 'bagging_freq': 5, 'min_child_samples': 10}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:30,933]\u001b[0m Trial 89 pruned. Trial was pruned at iteration 11.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:31,029]\u001b[0m Trial 90 pruned. Trial was pruned at iteration 10.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:31,358]\u001b[0m Trial 91 pruned. Trial was pruned at iteration 48.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:31,714]\u001b[0m Trial 92 pruned. Trial was pruned at iteration 41.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:32,543]\u001b[0m Trial 93 finished with value: 0.5264391883178012 and parameters: {'lambda_l1': 0.055429809241931895, 'lambda_l2': 0.013315511259061849, 'num_leaves': 235, 'feature_fraction': 0.6209901742544794, 'bagging_fraction': 0.9603625243176316, 'bagging_freq': 5, 'min_child_samples': 9}. Best is trial 70 with value: 0.5227354154865925.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:32,825]\u001b[0m Trial 94 pruned. Trial was pruned at iteration 40.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:32,940]\u001b[0m Trial 95 pruned. Trial was pruned at iteration 13.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:33,525]\u001b[0m Trial 96 finished with value: 0.521566218012535 and parameters: {'lambda_l1': 0.001072724146880552, 'lambda_l2': 0.043446953857601264, 'num_leaves': 142, 'feature_fraction': 0.6922532844601723, 'bagging_fraction': 0.9898097804978216, 'bagging_freq': 5, 'min_child_samples': 5}. Best is trial 96 with value: 0.521566218012535.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:34,149]\u001b[0m Trial 97 finished with value: 0.5196727190561812 and parameters: {'lambda_l1': 0.0025915189209523223, 'lambda_l2': 0.37941398551753863, 'num_leaves': 159, 'feature_fraction': 0.6942886423659413, 'bagging_fraction': 0.9888819332176355, 'bagging_freq': 6, 'min_child_samples': 5}. Best is trial 97 with value: 0.5196727190561812.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:34,855]\u001b[0m Trial 98 finished with value: 0.5265000531725292 and parameters: {'lambda_l1': 0.0006868362572582551, 'lambda_l2': 0.040525833784093586, 'num_leaves': 143, 'feature_fraction': 0.6930600995921747, 'bagging_fraction': 0.9897374239207342, 'bagging_freq': 6, 'min_child_samples': 6}. Best is trial 97 with value: 0.5196727190561812.\u001b[0m\n",
+ "\u001b[32m[I 2021-09-07 09:27:34,935]\u001b[0m Trial 99 pruned. Trial was pruned at iteration 10.\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of finished trials: 100\n",
+ "Best trial:\n",
+ " Value: 0.5196727190561812\n",
+ " Params: \n",
+ " lambda_l1: 0.0025915189209523223\n",
+ " lambda_l2: 0.37941398551753863\n",
+ " num_leaves: 159\n",
+ " feature_fraction: 0.6942886423659413\n",
+ " bagging_fraction: 0.9888819332176355\n",
+ " bagging_freq: 6\n",
+ " min_child_samples: 5\n"
+ ]
+ }
+ ],
+ "source": [
+ "if __name__ == \"__main__\":\n",
+ " study = optuna.create_study(\n",
+ " pruner=optuna.pruners.MedianPruner(n_warmup_steps=10), direction=\"minimize\"\n",
+ " )\n",
+ " study.optimize(lambda trial: objective(trial, train_x, valid_x, train_y, valid_y), n_trials=100)\n",
+ "\n",
+ " print(\"Number of finished trials: {}\".format(len(study.trials)))\n",
+ "\n",
+ " print(\"Best trial:\")\n",
+ " trial = study.best_trial\n",
+ "\n",
+ " print(\" Value: {}\".format(trial.value))\n",
+ "\n",
+ " print(\" Params: \")\n",
+ " for key, value in trial.params.items():\n",
+ " print(\" {}: {}\".format(key, value))"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "681d9cc2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'boosting_type': 'gbdt', 'objective': 'regression', 'metric': 'rmse', 'lambda_l1': 0.0025915189209523223, 'lambda_l2': 0.37941398551753863, 'num_leaves': 159, 'feature_fraction': 0.6942886423659413, 'bagging_fraction': 0.9888819332176355, 'bagging_freq': 6, 'min_child_samples': 5}\n"
+ ]
+ }
+ ],
+ "source": [
+ "params = {'boosting_type': 'gbdt',\n",
+ " 'objective': 'regression',\n",
+ " \"metric\": 'rmse'}\n",
+ "for key, value in trial.params.items():\n",
+ " params[key]=value\n",
+ "print(params)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "f5534bab",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000585 seconds.\n",
+ "You can set `force_row_wise=true` to remove the overhead.\n",
+ "And if memory is not enough, you can set `force_col_wise=true`.\n",
+ "[LightGBM] [Info] Total Bins 1837\n",
+ "[LightGBM] [Info] Number of data points in the train set: 13932, number of used features: 8\n",
+ "[LightGBM] [Info] Start training from score 2.072422\n",
+ "Training until validation scores don't improve for 20 rounds\n",
+ "[50]\tvalid_0's rmse: 0.459286\n",
+ "[100]\tvalid_0's rmse: 0.449051\n",
+ "[150]\tvalid_0's rmse: 0.448356\n",
+ "Early stopping, best iteration is:\n",
+ "[166]\tvalid_0's rmse: 0.447872\n"
+ ]
+ }
+ ],
+ "source": [
+ "dtrain = lgb.Dataset(train_x, label=train_y)\n",
+ "dvalid = lgb.Dataset(valid_x, label=valid_y)\n",
+ "model = lgb.train(params=params, train_set=dtrain,valid_sets=[dvalid],\n",
+ " verbose_eval=50,\n",
+ " early_stopping_rounds=20,\n",
+ " num_boost_round=5000)\n",
+ "predict = model.predict(test_x)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "f28d82da",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.44403838770137805"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 评估指标rmse,越小越好\n",
+ "np.sqrt(mean_squared_error(test_y,predict))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5e292bdb",
+ "metadata": {},
+ "source": [
+ "### 回归任务的结论\n",
+ "不使用optuna的分数是0.4531666044672748,使用的分数是0.44403838770137805,提升了0.00912821676589675。"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "36384535",
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {