|
|
|
@ -21,6 +21,250 @@
|
|
|
|
|
"plt.rcParams['axes.unicode_minus']=False"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 2,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>user_id</th>\n",
|
|
|
|
|
" <th>sku_id</th>\n",
|
|
|
|
|
" <th>cate</th>\n",
|
|
|
|
|
" <th>action_before_3_1.0_x</th>\n",
|
|
|
|
|
" <th>action_before_3_2.0_x</th>\n",
|
|
|
|
|
" <th>action_before_3_3.0_x</th>\n",
|
|
|
|
|
" <th>action_before_3_4.0_x</th>\n",
|
|
|
|
|
" <th>action_before_3_5.0_x</th>\n",
|
|
|
|
|
" <th>action_before_3_6.0_x</th>\n",
|
|
|
|
|
" <th>action_before_3_1.0_y</th>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <th>cate_action_4_mean</th>\n",
|
|
|
|
|
" <th>cate_action_5_mean</th>\n",
|
|
|
|
|
" <th>cate_action_6_mean</th>\n",
|
|
|
|
|
" <th>has_bad_comment</th>\n",
|
|
|
|
|
" <th>bad_comment_rate</th>\n",
|
|
|
|
|
" <th>comment_num_0</th>\n",
|
|
|
|
|
" <th>comment_num_1</th>\n",
|
|
|
|
|
" <th>comment_num_2</th>\n",
|
|
|
|
|
" <th>comment_num_3</th>\n",
|
|
|
|
|
" <th>comment_num_4</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>200001.0</td>\n",
|
|
|
|
|
" <td>20308.0</td>\n",
|
|
|
|
|
" <td>8.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>214.800000</td>\n",
|
|
|
|
|
" <td>665.166667</td>\n",
|
|
|
|
|
" <td>185387.233333</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" <td>0.0132</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <td>200001.0</td>\n",
|
|
|
|
|
" <td>38604.0</td>\n",
|
|
|
|
|
" <td>9.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>59.033333</td>\n",
|
|
|
|
|
" <td>106.600000</td>\n",
|
|
|
|
|
" <td>37389.133333</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" <td>0.0250</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <td>200001.0</td>\n",
|
|
|
|
|
" <td>164215.0</td>\n",
|
|
|
|
|
" <td>8.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>214.800000</td>\n",
|
|
|
|
|
" <td>665.166667</td>\n",
|
|
|
|
|
" <td>185387.233333</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" <td>0.0386</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>200002.0</td>\n",
|
|
|
|
|
" <td>2866.0</td>\n",
|
|
|
|
|
" <td>9.0</td>\n",
|
|
|
|
|
" <td>2.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>10.0</td>\n",
|
|
|
|
|
" <td>28.0</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>59.033333</td>\n",
|
|
|
|
|
" <td>106.600000</td>\n",
|
|
|
|
|
" <td>37389.133333</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0000</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>200002.0</td>\n",
|
|
|
|
|
" <td>3673.0</td>\n",
|
|
|
|
|
" <td>9.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>4.0</td>\n",
|
|
|
|
|
" <td>30.0</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>59.033333</td>\n",
|
|
|
|
|
" <td>106.600000</td>\n",
|
|
|
|
|
" <td>37389.133333</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" <td>0.0436</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"<p>5 rows × 236 columns</p>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" user_id sku_id cate action_before_3_1.0_x action_before_3_2.0_x \\\n",
|
|
|
|
|
"0 200001.0 20308.0 8.0 0.0 0.0 \n",
|
|
|
|
|
"1 200001.0 38604.0 9.0 0.0 0.0 \n",
|
|
|
|
|
"2 200001.0 164215.0 8.0 0.0 0.0 \n",
|
|
|
|
|
"3 200002.0 2866.0 9.0 2.0 0.0 \n",
|
|
|
|
|
"4 200002.0 3673.0 9.0 0.0 0.0 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" action_before_3_3.0_x action_before_3_4.0_x action_before_3_5.0_x \\\n",
|
|
|
|
|
"0 1.0 0.0 0.0 \n",
|
|
|
|
|
"1 1.0 0.0 0.0 \n",
|
|
|
|
|
"2 1.0 0.0 0.0 \n",
|
|
|
|
|
"3 0.0 0.0 0.0 \n",
|
|
|
|
|
"4 0.0 0.0 0.0 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" action_before_3_6.0_x action_before_3_1.0_y ... cate_action_4_mean \\\n",
|
|
|
|
|
"0 0.0 0.0 ... 214.800000 \n",
|
|
|
|
|
"1 0.0 0.0 ... 59.033333 \n",
|
|
|
|
|
"2 0.0 0.0 ... 214.800000 \n",
|
|
|
|
|
"3 10.0 28.0 ... 59.033333 \n",
|
|
|
|
|
"4 4.0 30.0 ... 59.033333 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" cate_action_5_mean cate_action_6_mean has_bad_comment bad_comment_rate \\\n",
|
|
|
|
|
"0 665.166667 185387.233333 1.0 0.0132 \n",
|
|
|
|
|
"1 106.600000 37389.133333 1.0 0.0250 \n",
|
|
|
|
|
"2 665.166667 185387.233333 1.0 0.0386 \n",
|
|
|
|
|
"3 106.600000 37389.133333 0.0 0.0000 \n",
|
|
|
|
|
"4 106.600000 37389.133333 1.0 0.0436 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" comment_num_0 comment_num_1 comment_num_2 comment_num_3 comment_num_4 \n",
|
|
|
|
|
"0 0.0 0.0 0.0 0.0 1.0 \n",
|
|
|
|
|
"1 0.0 0.0 0.0 0.0 1.0 \n",
|
|
|
|
|
"2 0.0 0.0 0.0 0.0 1.0 \n",
|
|
|
|
|
"3 0.0 0.0 0.0 0.0 0.0 \n",
|
|
|
|
|
"4 0.0 0.0 0.0 0.0 1.0 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
"[5 rows x 236 columns]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 2,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"data = pd.read_csv('data/test_set.csv') # 读取训练数据\n",
|
|
|
|
|
"data.head()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 3,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"(270139, 236)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 3,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"data.shape"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 2,
|
|
|
|
|