From 44417ebbb53a15248626655dcd8b7ee47728ec61 Mon Sep 17 00:00:00 2001 From: benjas <909336740@qq.com> Date: Sun, 14 Feb 2021 09:57:35 +0800 Subject: [PATCH] =?UTF-8?q?Delete=204-=E6=A8=A1=E5=9E=8B=E9=A2=84=E6=B5=8B?= =?UTF-8?q?=E5=8F=8A=E8=AF=84=E4=BC=B0-checkpoint.ipynb?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../4-模型预测及评估-checkpoint.ipynb | 1055 ----------------- 1 file changed, 1055 deletions(-) delete mode 100644 机器学习竞赛实战_优胜解决方案/京东用户购买意向预测/.ipynb_checkpoints/4-模型预测及评估-checkpoint.ipynb diff --git a/机器学习竞赛实战_优胜解决方案/京东用户购买意向预测/.ipynb_checkpoints/4-模型预测及评估-checkpoint.ipynb b/机器学习竞赛实战_优胜解决方案/京东用户购买意向预测/.ipynb_checkpoints/4-模型预测及评估-checkpoint.ipynb deleted file mode 100644 index 4686da3..0000000 --- a/机器学习竞赛实战_优胜解决方案/京东用户购买意向预测/.ipynb_checkpoints/4-模型预测及评估-checkpoint.ipynb +++ /dev/null @@ -1,1055 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import lightgbm as lgb # 模型\n", - "import pandas as pd # 数据处理包\n", - "import numpy as np # 数据处理包\n", - "from sklearn import metrics # 混淆矩阵\n", - "from sklearn.model_selection import StratifiedKFold, GridSearchCV, train_test_split # 分层五折验证包、寻找最优参函数、切分数据\n", - "from sklearn.metrics import accuracy_score, roc_curve, auc, confusion_matrix # 准确率、roc计算、auc计算、混淆矩阵\n", - "import matplotlib.pyplot as plt # 图形处理包\n", - "import itertools # 处理混淆矩阵\n", - "import gc # 处理缓存,有兴趣的可以搜搜怎么使用\n", - "import warnings # 忽略普通警告,不打印太多东西\n", - "warnings.filterwarnings('ignore')\n", - "plt.rcParams['font.sans-serif']=['SimHei'] # 让图形可以显示中文\n", - "plt.rcParams['axes.unicode_minus']=False" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_idsku_idcateaction_before_3_1.0_xaction_before_3_2.0_xaction_before_3_3.0_xaction_before_3_4.0_xaction_before_3_5.0_xaction_before_3_6.0_xaction_before_3_1.0_y...cate_action_5_meancate_action_6_meanhas_bad_commentbad_comment_ratecomment_num_0comment_num_1comment_num_2comment_num_3comment_num_4label
0202633.012564.08.01.00.00.00.00.02.01.0...20.8666675167.61.00.02600.00.00.00.01.01.0
1218498.0149854.08.04.00.00.00.00.04.02.0...20.8666675167.61.00.04030.00.00.00.01.01.0
2221842.075877.08.03.00.00.00.00.05.079.0...20.8666675167.61.00.02450.00.00.00.01.01.0
3222886.0154636.08.020.01.00.00.00.026.010.0...20.8666675167.61.00.02080.00.00.00.01.01.0
4235240.038222.08.030.01.00.00.00.028.055.0...20.8666675167.61.00.01660.00.00.00.01.01.0
\n", - "

5 rows × 237 columns

\n", - "
" - ], - "text/plain": [ - " user_id sku_id cate action_before_3_1.0_x action_before_3_2.0_x \\\n", - "0 202633.0 12564.0 8.0 1.0 0.0 \n", - "1 218498.0 149854.0 8.0 4.0 0.0 \n", - "2 221842.0 75877.0 8.0 3.0 0.0 \n", - "3 222886.0 154636.0 8.0 20.0 1.0 \n", - "4 235240.0 38222.0 8.0 30.0 1.0 \n", - "\n", - " action_before_3_3.0_x action_before_3_4.0_x action_before_3_5.0_x \\\n", - "0 0.0 0.0 0.0 \n", - "1 0.0 0.0 0.0 \n", - "2 0.0 0.0 0.0 \n", - "3 0.0 0.0 0.0 \n", - "4 0.0 0.0 0.0 \n", - "\n", - " action_before_3_6.0_x action_before_3_1.0_y ... cate_action_5_mean \\\n", - "0 2.0 1.0 ... 20.866667 \n", - "1 4.0 2.0 ... 20.866667 \n", - "2 5.0 79.0 ... 20.866667 \n", - "3 26.0 10.0 ... 20.866667 \n", - "4 28.0 55.0 ... 20.866667 \n", - "\n", - " cate_action_6_mean has_bad_comment bad_comment_rate comment_num_0 \\\n", - "0 5167.6 1.0 0.0260 0.0 \n", - "1 5167.6 1.0 0.0403 0.0 \n", - "2 5167.6 1.0 0.0245 0.0 \n", - "3 5167.6 1.0 0.0208 0.0 \n", - "4 5167.6 1.0 0.0166 0.0 \n", - "\n", - " comment_num_1 comment_num_2 comment_num_3 comment_num_4 label \n", - "0 0.0 0.0 0.0 1.0 1.0 \n", - "1 0.0 0.0 0.0 1.0 1.0 \n", - "2 0.0 0.0 0.0 1.0 1.0 \n", - "3 0.0 0.0 0.0 1.0 1.0 \n", - "4 0.0 0.0 0.0 1.0 1.0 \n", - "\n", - "[5 rows x 237 columns]" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data = pd.read_csv('data/train_set.csv') # 读取训练数据\n", - "data.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_idsku_idcateaction_before_3_1.0_xaction_before_3_2.0_xaction_before_3_3.0_xaction_before_3_4.0_xaction_before_3_5.0_xaction_before_3_6.0_xaction_before_3_1.0_y...cate_action_4_meancate_action_5_meancate_action_6_meanhas_bad_commentbad_comment_ratecomment_num_0comment_num_1comment_num_2comment_num_3comment_num_4
0202633.012564.08.01.00.00.00.00.02.01.0...8.420.8666675167.61.00.02600.00.00.00.01.0
1218498.0149854.08.04.00.00.00.00.04.02.0...8.420.8666675167.61.00.04030.00.00.00.01.0
2221842.075877.08.03.00.00.00.00.05.079.0...8.420.8666675167.61.00.02450.00.00.00.01.0
3222886.0154636.08.020.01.00.00.00.026.010.0...8.420.8666675167.61.00.02080.00.00.00.01.0
4235240.038222.08.030.01.00.00.00.028.055.0...8.420.8666675167.61.00.01660.00.00.00.01.0
\n", - "

5 rows × 236 columns

\n", - "
" - ], - "text/plain": [ - " user_id sku_id cate action_before_3_1.0_x action_before_3_2.0_x \\\n", - "0 202633.0 12564.0 8.0 1.0 0.0 \n", - "1 218498.0 149854.0 8.0 4.0 0.0 \n", - "2 221842.0 75877.0 8.0 3.0 0.0 \n", - "3 222886.0 154636.0 8.0 20.0 1.0 \n", - "4 235240.0 38222.0 8.0 30.0 1.0 \n", - "\n", - " action_before_3_3.0_x action_before_3_4.0_x action_before_3_5.0_x \\\n", - "0 0.0 0.0 0.0 \n", - "1 0.0 0.0 0.0 \n", - "2 0.0 0.0 0.0 \n", - "3 0.0 0.0 0.0 \n", - "4 0.0 0.0 0.0 \n", - "\n", - " action_before_3_6.0_x action_before_3_1.0_y ... cate_action_4_mean \\\n", - "0 2.0 1.0 ... 8.4 \n", - "1 4.0 2.0 ... 8.4 \n", - "2 5.0 79.0 ... 8.4 \n", - "3 26.0 10.0 ... 8.4 \n", - "4 28.0 55.0 ... 8.4 \n", - "\n", - " cate_action_5_mean cate_action_6_mean has_bad_comment bad_comment_rate \\\n", - "0 20.866667 5167.6 1.0 0.0260 \n", - "1 20.866667 5167.6 1.0 0.0403 \n", - "2 20.866667 5167.6 1.0 0.0245 \n", - "3 20.866667 5167.6 1.0 0.0208 \n", - "4 20.866667 5167.6 1.0 0.0166 \n", - "\n", - " comment_num_0 comment_num_1 comment_num_2 comment_num_3 comment_num_4 \n", - "0 0.0 0.0 0.0 0.0 1.0 \n", - "1 0.0 0.0 0.0 0.0 1.0 \n", - "2 0.0 0.0 0.0 0.0 1.0 \n", - "3 0.0 0.0 0.0 0.0 1.0 \n", - "4 0.0 0.0 0.0 0.0 1.0 \n", - "\n", - "[5 rows x 236 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "train_x = data.loc[:,data.columns != 'label'] # 将训练数据集分成特征和标签\n", - "train_y = data.loc[:,data.columns == 'label']\n", - "train_x.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
label
01.0
11.0
21.0
31.0
41.0
\n", - "
" - ], - "text/plain": [ - " label\n", - "0 1.0\n", - "1 1.0\n", - "2 1.0\n", - "3 1.0\n", - "4 1.0" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "train_y.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cateaction_before_3_1.0_xaction_before_3_2.0_xaction_before_3_3.0_xaction_before_3_4.0_xaction_before_3_5.0_xaction_before_3_6.0_xaction_before_3_1.0_yaction_before_3_2.0_yaction_before_3_3.0_y...cate_action_4_meancate_action_5_meancate_action_6_meanhas_bad_commentbad_comment_ratecomment_num_0comment_num_1comment_num_2comment_num_3comment_num_4
08.01.00.00.00.00.02.01.00.00.0...8.420.8666675167.61.00.02600.00.00.00.01.0
18.04.00.00.00.00.04.02.00.00.0...8.420.8666675167.61.00.04030.00.00.00.01.0
28.03.00.00.00.00.05.079.00.00.0...8.420.8666675167.61.00.02450.00.00.00.01.0
38.020.01.00.00.00.026.010.00.00.0...8.420.8666675167.61.00.02080.00.00.00.01.0
48.030.01.00.00.00.028.055.00.00.0...8.420.8666675167.61.00.01660.00.00.00.01.0
\n", - "

5 rows × 234 columns

\n", - "
" - ], - "text/plain": [ - " cate action_before_3_1.0_x action_before_3_2.0_x action_before_3_3.0_x \\\n", - "0 8.0 1.0 0.0 0.0 \n", - "1 8.0 4.0 0.0 0.0 \n", - "2 8.0 3.0 0.0 0.0 \n", - "3 8.0 20.0 1.0 0.0 \n", - "4 8.0 30.0 1.0 0.0 \n", - "\n", - " action_before_3_4.0_x action_before_3_5.0_x action_before_3_6.0_x \\\n", - "0 0.0 0.0 2.0 \n", - "1 0.0 0.0 4.0 \n", - "2 0.0 0.0 5.0 \n", - "3 0.0 0.0 26.0 \n", - "4 0.0 0.0 28.0 \n", - "\n", - " action_before_3_1.0_y action_before_3_2.0_y action_before_3_3.0_y ... \\\n", - "0 1.0 0.0 0.0 ... \n", - "1 2.0 0.0 0.0 ... \n", - "2 79.0 0.0 0.0 ... \n", - "3 10.0 0.0 0.0 ... \n", - "4 55.0 0.0 0.0 ... \n", - "\n", - " cate_action_4_mean cate_action_5_mean cate_action_6_mean \\\n", - "0 8.4 20.866667 5167.6 \n", - "1 8.4 20.866667 5167.6 \n", - "2 8.4 20.866667 5167.6 \n", - "3 8.4 20.866667 5167.6 \n", - "4 8.4 20.866667 5167.6 \n", - "\n", - " has_bad_comment bad_comment_rate comment_num_0 comment_num_1 \\\n", - "0 1.0 0.0260 0.0 0.0 \n", - "1 1.0 0.0403 0.0 0.0 \n", - "2 1.0 0.0245 0.0 0.0 \n", - "3 1.0 0.0208 0.0 0.0 \n", - "4 1.0 0.0166 0.0 0.0 \n", - "\n", - " comment_num_2 comment_num_3 comment_num_4 \n", - "0 0.0 0.0 1.0 \n", - "1 0.0 0.0 1.0 \n", - "2 0.0 0.0 1.0 \n", - "3 0.0 0.0 1.0 \n", - "4 0.0 0.0 1.0 \n", - "\n", - "[5 rows x 234 columns]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "del train_x['user_id']\n", - "del train_x['sku_id']\n", - "\n", - "train_x.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(14619, 234)" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "train_x.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_idsku_idcateaction_before_3_1.0_xaction_before_3_2.0_xaction_before_3_3.0_xaction_before_3_4.0_xaction_before_3_5.0_xaction_before_3_6.0_xaction_before_3_1.0_y...cate_action_4_meancate_action_5_meancate_action_6_meanhas_bad_commentbad_comment_ratecomment_num_0comment_num_1comment_num_2comment_num_3comment_num_4
0200005.067444.04.02.00.00.00.00.03.026.0...73.400000169.36666748251.01.00.08210.00.00.00.01.0
1200005.072967.04.026.01.00.01.00.030.02.0...73.400000169.36666748251.01.00.01960.00.00.00.01.0
2200007.026229.09.02.00.00.00.00.02.012.0...20.76666756.70000012937.71.00.01980.00.00.00.01.0
3200007.063315.09.04.00.00.00.00.03.010.0...20.76666756.70000012937.71.00.04760.00.00.00.01.0
4200007.0126404.09.04.00.00.00.00.03.010.0...20.76666756.70000012937.70.00.00000.00.00.00.01.0
\n", - "

5 rows × 236 columns

\n", - "
" - ], - "text/plain": [ - " user_id sku_id cate action_before_3_1.0_x action_before_3_2.0_x \\\n", - "0 200005.0 67444.0 4.0 2.0 0.0 \n", - "1 200005.0 72967.0 4.0 26.0 1.0 \n", - "2 200007.0 26229.0 9.0 2.0 0.0 \n", - "3 200007.0 63315.0 9.0 4.0 0.0 \n", - "4 200007.0 126404.0 9.0 4.0 0.0 \n", - "\n", - " action_before_3_3.0_x action_before_3_4.0_x action_before_3_5.0_x \\\n", - "0 0.0 0.0 0.0 \n", - "1 0.0 1.0 0.0 \n", - "2 0.0 0.0 0.0 \n", - "3 0.0 0.0 0.0 \n", - "4 0.0 0.0 0.0 \n", - "\n", - " action_before_3_6.0_x action_before_3_1.0_y ... cate_action_4_mean \\\n", - "0 3.0 26.0 ... 73.400000 \n", - "1 30.0 2.0 ... 73.400000 \n", - "2 2.0 12.0 ... 20.766667 \n", - "3 3.0 10.0 ... 20.766667 \n", - "4 3.0 10.0 ... 20.766667 \n", - "\n", - " cate_action_5_mean cate_action_6_mean has_bad_comment bad_comment_rate \\\n", - "0 169.366667 48251.0 1.0 0.0821 \n", - "1 169.366667 48251.0 1.0 0.0196 \n", - "2 56.700000 12937.7 1.0 0.0198 \n", - "3 56.700000 12937.7 1.0 0.0476 \n", - "4 56.700000 12937.7 0.0 0.0000 \n", - "\n", - " comment_num_0 comment_num_1 comment_num_2 comment_num_3 comment_num_4 \n", - "0 0.0 0.0 0.0 0.0 1.0 \n", - "1 0.0 0.0 0.0 0.0 1.0 \n", - "2 0.0 0.0 0.0 0.0 1.0 \n", - "3 0.0 0.0 0.0 0.0 1.0 \n", - "4 0.0 0.0 0.0 0.0 1.0 \n", - "\n", - "[5 rows x 236 columns]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data = pd.read_csv('data/val_set.csv') # 读取验证数据\n", - "data.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "val_x = data.loc[:,data.columns != 'label'] # 将验证数据集分成特征和标签\n", - "val_y = data.loc[:,data.columns == 'label']\n", - "val_x.head()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}