diff --git a/机器学习竞赛实战_优胜解决方案/京东用户购买意向预测/4-模型预测及评估.ipynb b/机器学习竞赛实战_优胜解决方案/京东用户购买意向预测/4-模型预测及评估.ipynb deleted file mode 100644 index b6ee0ca..0000000 --- a/机器学习竞赛实战_优胜解决方案/京东用户购买意向预测/4-模型预测及评估.ipynb +++ /dev/null @@ -1,1540 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import lightgbm as lgb # 模型\n", - "import pandas as pd # 数据处理包\n", - "import numpy as np # 数据处理包\n", - "from sklearn import metrics # 混淆矩阵\n", - "from sklearn.model_selection import StratifiedKFold, GridSearchCV, train_test_split # 分层五折验证包、寻找最优参函数、切分数据\n", - "from sklearn.metrics import accuracy_score, roc_curve, auc, confusion_matrix # 准确率、roc计算、auc计算、混淆矩阵\n", - "import matplotlib.pyplot as plt # 图形处理包\n", - "import itertools # 处理混淆矩阵\n", - "import gc # 处理缓存,有兴趣的可以搜搜怎么使用\n", - "import warnings # 忽略普通警告,不打印太多东西\n", - "warnings.filterwarnings('ignore')\n", - "plt.rcParams['font.sans-serif']=['SimHei'] # 让图形可以显示中文\n", - "plt.rcParams['axes.unicode_minus']=False" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " | user_id | \n", - "sku_id | \n", - "cate | \n", - "action_before_3_1.0_x | \n", - "action_before_3_2.0_x | \n", - "action_before_3_3.0_x | \n", - "action_before_3_4.0_x | \n", - "action_before_3_5.0_x | \n", - "action_before_3_6.0_x | \n", - "action_before_3_1.0_y | \n", - "... | \n", - "cate_action_4_mean | \n", - "cate_action_5_mean | \n", - "cate_action_6_mean | \n", - "has_bad_comment | \n", - "bad_comment_rate | \n", - "comment_num_0 | \n", - "comment_num_1 | \n", - "comment_num_2 | \n", - "comment_num_3 | \n", - "comment_num_4 | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", - "200001.0 | \n", - "20308.0 | \n", - "8.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "... | \n", - "214.800000 | \n", - "665.166667 | \n", - "185387.233333 | \n", - "1.0 | \n", - "0.0132 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
1 | \n", - "200001.0 | \n", - "38604.0 | \n", - "9.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "... | \n", - "59.033333 | \n", - "106.600000 | \n", - "37389.133333 | \n", - "1.0 | \n", - "0.0250 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
2 | \n", - "200001.0 | \n", - "164215.0 | \n", - "8.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "... | \n", - "214.800000 | \n", - "665.166667 | \n", - "185387.233333 | \n", - "1.0 | \n", - "0.0386 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
3 | \n", - "200002.0 | \n", - "2866.0 | \n", - "9.0 | \n", - "2.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "10.0 | \n", - "28.0 | \n", - "... | \n", - "59.033333 | \n", - "106.600000 | \n", - "37389.133333 | \n", - "0.0 | \n", - "0.0000 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "
4 | \n", - "200002.0 | \n", - "3673.0 | \n", - "9.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "4.0 | \n", - "30.0 | \n", - "... | \n", - "59.033333 | \n", - "106.600000 | \n", - "37389.133333 | \n", - "1.0 | \n", - "0.0436 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
5 rows × 236 columns
\n", - "\n", - " | user_id | \n", - "sku_id | \n", - "cate | \n", - "action_before_3_1.0_x | \n", - "action_before_3_2.0_x | \n", - "action_before_3_3.0_x | \n", - "action_before_3_4.0_x | \n", - "action_before_3_5.0_x | \n", - "action_before_3_6.0_x | \n", - "action_before_3_1.0_y | \n", - "... | \n", - "cate_action_5_mean | \n", - "cate_action_6_mean | \n", - "has_bad_comment | \n", - "bad_comment_rate | \n", - "comment_num_0 | \n", - "comment_num_1 | \n", - "comment_num_2 | \n", - "comment_num_3 | \n", - "comment_num_4 | \n", - "label | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", - "202633.0 | \n", - "12564.0 | \n", - "8.0 | \n", - "1.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "2.0 | \n", - "1.0 | \n", - "... | \n", - "20.866667 | \n", - "5167.6 | \n", - "1.0 | \n", - "0.0260 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "1.0 | \n", - "
1 | \n", - "218498.0 | \n", - "149854.0 | \n", - "8.0 | \n", - "4.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "4.0 | \n", - "2.0 | \n", - "... | \n", - "20.866667 | \n", - "5167.6 | \n", - "1.0 | \n", - "0.0403 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "1.0 | \n", - "
2 | \n", - "221842.0 | \n", - "75877.0 | \n", - "8.0 | \n", - "3.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "5.0 | \n", - "79.0 | \n", - "... | \n", - "20.866667 | \n", - "5167.6 | \n", - "1.0 | \n", - "0.0245 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "1.0 | \n", - "
3 | \n", - "222886.0 | \n", - "154636.0 | \n", - "8.0 | \n", - "20.0 | \n", - "1.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "26.0 | \n", - "10.0 | \n", - "... | \n", - "20.866667 | \n", - "5167.6 | \n", - "1.0 | \n", - "0.0208 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "1.0 | \n", - "
4 | \n", - "235240.0 | \n", - "38222.0 | \n", - "8.0 | \n", - "30.0 | \n", - "1.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "28.0 | \n", - "55.0 | \n", - "... | \n", - "20.866667 | \n", - "5167.6 | \n", - "1.0 | \n", - "0.0166 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "1.0 | \n", - "
5 rows × 237 columns
\n", - "\n", - " | user_id | \n", - "sku_id | \n", - "cate | \n", - "action_before_3_1.0_x | \n", - "action_before_3_2.0_x | \n", - "action_before_3_3.0_x | \n", - "action_before_3_4.0_x | \n", - "action_before_3_5.0_x | \n", - "action_before_3_6.0_x | \n", - "action_before_3_1.0_y | \n", - "... | \n", - "cate_action_4_mean | \n", - "cate_action_5_mean | \n", - "cate_action_6_mean | \n", - "has_bad_comment | \n", - "bad_comment_rate | \n", - "comment_num_0 | \n", - "comment_num_1 | \n", - "comment_num_2 | \n", - "comment_num_3 | \n", - "comment_num_4 | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", - "202633.0 | \n", - "12564.0 | \n", - "8.0 | \n", - "1.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "2.0 | \n", - "1.0 | \n", - "... | \n", - "8.4 | \n", - "20.866667 | \n", - "5167.6 | \n", - "1.0 | \n", - "0.0260 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
1 | \n", - "218498.0 | \n", - "149854.0 | \n", - "8.0 | \n", - "4.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "4.0 | \n", - "2.0 | \n", - "... | \n", - "8.4 | \n", - "20.866667 | \n", - "5167.6 | \n", - "1.0 | \n", - "0.0403 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
2 | \n", - "221842.0 | \n", - "75877.0 | \n", - "8.0 | \n", - "3.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "5.0 | \n", - "79.0 | \n", - "... | \n", - "8.4 | \n", - "20.866667 | \n", - "5167.6 | \n", - "1.0 | \n", - "0.0245 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
3 | \n", - "222886.0 | \n", - "154636.0 | \n", - "8.0 | \n", - "20.0 | \n", - "1.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "26.0 | \n", - "10.0 | \n", - "... | \n", - "8.4 | \n", - "20.866667 | \n", - "5167.6 | \n", - "1.0 | \n", - "0.0208 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
4 | \n", - "235240.0 | \n", - "38222.0 | \n", - "8.0 | \n", - "30.0 | \n", - "1.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "28.0 | \n", - "55.0 | \n", - "... | \n", - "8.4 | \n", - "20.866667 | \n", - "5167.6 | \n", - "1.0 | \n", - "0.0166 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
5 rows × 236 columns
\n", - "\n", - " | label | \n", - "
---|---|
0 | \n", - "1.0 | \n", - "
1 | \n", - "1.0 | \n", - "
2 | \n", - "1.0 | \n", - "
3 | \n", - "1.0 | \n", - "
4 | \n", - "1.0 | \n", - "
\n", - " | cate | \n", - "action_before_3_1.0_x | \n", - "action_before_3_2.0_x | \n", - "action_before_3_3.0_x | \n", - "action_before_3_4.0_x | \n", - "action_before_3_5.0_x | \n", - "action_before_3_6.0_x | \n", - "action_before_3_1.0_y | \n", - "action_before_3_2.0_y | \n", - "action_before_3_3.0_y | \n", - "... | \n", - "cate_action_4_mean | \n", - "cate_action_5_mean | \n", - "cate_action_6_mean | \n", - "has_bad_comment | \n", - "bad_comment_rate | \n", - "comment_num_0 | \n", - "comment_num_1 | \n", - "comment_num_2 | \n", - "comment_num_3 | \n", - "comment_num_4 | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", - "8.0 | \n", - "1.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "2.0 | \n", - "1.0 | \n", - "0.0 | \n", - "0.0 | \n", - "... | \n", - "8.4 | \n", - "20.866667 | \n", - "5167.6 | \n", - "1.0 | \n", - "0.0260 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
1 | \n", - "8.0 | \n", - "4.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "4.0 | \n", - "2.0 | \n", - "0.0 | \n", - "0.0 | \n", - "... | \n", - "8.4 | \n", - "20.866667 | \n", - "5167.6 | \n", - "1.0 | \n", - "0.0403 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
2 | \n", - "8.0 | \n", - "3.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "5.0 | \n", - "79.0 | \n", - "0.0 | \n", - "0.0 | \n", - "... | \n", - "8.4 | \n", - "20.866667 | \n", - "5167.6 | \n", - "1.0 | \n", - "0.0245 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
3 | \n", - "8.0 | \n", - "20.0 | \n", - "1.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "26.0 | \n", - "10.0 | \n", - "0.0 | \n", - "0.0 | \n", - "... | \n", - "8.4 | \n", - "20.866667 | \n", - "5167.6 | \n", - "1.0 | \n", - "0.0208 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
4 | \n", - "8.0 | \n", - "30.0 | \n", - "1.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "28.0 | \n", - "55.0 | \n", - "0.0 | \n", - "0.0 | \n", - "... | \n", - "8.4 | \n", - "20.866667 | \n", - "5167.6 | \n", - "1.0 | \n", - "0.0166 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
5 rows × 234 columns
\n", - "\n", - " | user_id | \n", - "sku_id | \n", - "cate | \n", - "action_before_3_1.0_x | \n", - "action_before_3_2.0_x | \n", - "action_before_3_3.0_x | \n", - "action_before_3_4.0_x | \n", - "action_before_3_5.0_x | \n", - "action_before_3_6.0_x | \n", - "action_before_3_1.0_y | \n", - "... | \n", - "cate_action_4_mean | \n", - "cate_action_5_mean | \n", - "cate_action_6_mean | \n", - "has_bad_comment | \n", - "bad_comment_rate | \n", - "comment_num_0 | \n", - "comment_num_1 | \n", - "comment_num_2 | \n", - "comment_num_3 | \n", - "comment_num_4 | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", - "200005.0 | \n", - "67444.0 | \n", - "4.0 | \n", - "2.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "3.0 | \n", - "26.0 | \n", - "... | \n", - "73.400000 | \n", - "169.366667 | \n", - "48251.0 | \n", - "1.0 | \n", - "0.0821 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
1 | \n", - "200005.0 | \n", - "72967.0 | \n", - "4.0 | \n", - "26.0 | \n", - "1.0 | \n", - "0.0 | \n", - "1.0 | \n", - "0.0 | \n", - "30.0 | \n", - "2.0 | \n", - "... | \n", - "73.400000 | \n", - "169.366667 | \n", - "48251.0 | \n", - "1.0 | \n", - "0.0196 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
2 | \n", - "200007.0 | \n", - "26229.0 | \n", - "9.0 | \n", - "2.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "2.0 | \n", - "12.0 | \n", - "... | \n", - "20.766667 | \n", - "56.700000 | \n", - "12937.7 | \n", - "1.0 | \n", - "0.0198 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
3 | \n", - "200007.0 | \n", - "63315.0 | \n", - "9.0 | \n", - "4.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "3.0 | \n", - "10.0 | \n", - "... | \n", - "20.766667 | \n", - "56.700000 | \n", - "12937.7 | \n", - "1.0 | \n", - "0.0476 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
4 | \n", - "200007.0 | \n", - "126404.0 | \n", - "9.0 | \n", - "4.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "3.0 | \n", - "10.0 | \n", - "... | \n", - "20.766667 | \n", - "56.700000 | \n", - "12937.7 | \n", - "0.0 | \n", - "0.0000 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
5 rows × 236 columns
\n", - "\n", - " | user_id | \n", - "sku_id | \n", - "cate | \n", - "action_before_3_1.0_x | \n", - "action_before_3_2.0_x | \n", - "action_before_3_3.0_x | \n", - "action_before_3_4.0_x | \n", - "action_before_3_5.0_x | \n", - "action_before_3_6.0_x | \n", - "action_before_3_1.0_y | \n", - "... | \n", - "cate_action_4_mean | \n", - "cate_action_5_mean | \n", - "cate_action_6_mean | \n", - "has_bad_comment | \n", - "bad_comment_rate | \n", - "comment_num_0 | \n", - "comment_num_1 | \n", - "comment_num_2 | \n", - "comment_num_3 | \n", - "comment_num_4 | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", - "200005.0 | \n", - "67444.0 | \n", - "4.0 | \n", - "2.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "3.0 | \n", - "26.0 | \n", - "... | \n", - "73.400000 | \n", - "169.366667 | \n", - "48251.0 | \n", - "1.0 | \n", - "0.0821 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
1 | \n", - "200005.0 | \n", - "72967.0 | \n", - "4.0 | \n", - "26.0 | \n", - "1.0 | \n", - "0.0 | \n", - "1.0 | \n", - "0.0 | \n", - "30.0 | \n", - "2.0 | \n", - "... | \n", - "73.400000 | \n", - "169.366667 | \n", - "48251.0 | \n", - "1.0 | \n", - "0.0196 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
2 | \n", - "200007.0 | \n", - "26229.0 | \n", - "9.0 | \n", - "2.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "2.0 | \n", - "12.0 | \n", - "... | \n", - "20.766667 | \n", - "56.700000 | \n", - "12937.7 | \n", - "1.0 | \n", - "0.0198 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
3 | \n", - "200007.0 | \n", - "63315.0 | \n", - "9.0 | \n", - "4.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "3.0 | \n", - "10.0 | \n", - "... | \n", - "20.766667 | \n", - "56.700000 | \n", - "12937.7 | \n", - "1.0 | \n", - "0.0476 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
4 | \n", - "200007.0 | \n", - "126404.0 | \n", - "9.0 | \n", - "4.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "3.0 | \n", - "10.0 | \n", - "... | \n", - "20.766667 | \n", - "56.700000 | \n", - "12937.7 | \n", - "0.0 | \n", - "0.0000 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "
5 rows × 236 columns
\n", - "