From 28bfcb32a355a944c2d99a2b3d132f835f645c3f Mon Sep 17 00:00:00 2001 From: benjas <909336740@qq.com> Date: Wed, 20 Jan 2021 16:39:33 +0800 Subject: [PATCH] Add. Problems with test data --- .../逻辑回归-信用卡欺诈检测.ipynb | 275 +++++++++++++++++- 1 file changed, 268 insertions(+), 7 deletions(-) diff --git a/机器学习竞赛实战_优胜解决方案/信用卡欺诈检测/逻辑回归-信用卡欺诈检测.ipynb b/机器学习竞赛实战_优胜解决方案/信用卡欺诈检测/逻辑回归-信用卡欺诈检测.ipynb index 6c4f048..2a506a2 100644 --- a/机器学习竞赛实战_优胜解决方案/信用卡欺诈检测/逻辑回归-信用卡欺诈检测.ipynb +++ b/机器学习竞赛实战_优胜解决方案/信用卡欺诈检测/逻辑回归-信用卡欺诈检测.ipynb @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -57,7 +57,10 @@ "import numpy as np\n", "\n", "# 把图轻松的镶嵌到这个notebook中\n", - "%matplotlib inline" + "%matplotlib inline\n", + "\n", + "import warnings # 忽略普通警告,不打印太多东西\n", + "warnings.filterwarnings('ignore')" ] }, { @@ -705,16 +708,16 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "# 编写Kflod函数——printing_Kfold_scores,实际中我们可以直接调用\n", "def printing_Kfold_scores(x_train_data,y_train_data):\n", - " fold = KFold(len(y_train_data),5,shuffle=False) #shuffle=False是指数据集不用洗牌\n", + " fold = KFold(5,shuffle=False) #shuffle=False是指数据集不用洗牌\n", " \n", - " # 定义不同力度的正则化惩罚力度,越大惩罚力度越大\n", - " c_param_range = [0.01,0.1,1,10,100] \n", + " # 定义不同力度的正则化惩罚力度,值越大惩罚力度越小\n", + " c_param_range = [0.01,0.1,1,10,100]\n", " # 展示结果用的表格\n", " results_table = pd.DataFrame(index = range(len(c_param_range),2), columns = ['C_parameter','Mean recall score'])\n", " results_table['C_parameter'] = c_param_range\n", @@ -726,7 +729,265 @@ " print('-------------------------------------------')\n", " print('正则化惩罚力度: ', c_param)\n", " print('-------------------------------------------')\n", - " print('\\n')" + " \n", + " # 计算每一次迭代后的召回率,一次5次\n", + " recall_accs = []\n", + " \n", + " # 一步步分解来执行交叉验证\n", + " for iteration, indices in enumerate(fold.split(x_train_data)):\n", + " \n", + " # 选择算法模型+给定参数\n", + " lr = LogisticRegression(C = c_param, penalty = 'l1') #L1正则化防止过拟合,通过k折交叉验证寻找最佳的参数C。 \n", + "\n", + " # 训练模型。注意索引不要给错了,训练的时候一定传入的是训练集,所以X和Y的索引都是0\n", + " lr.fit(x_train_data.iloc[indices[0],:],y_train_data.iloc[indices[0],:].values.ravel())\n", + "\n", + " # 使用验证集预测模型结果,这里用的就是验证集,索引为1\n", + " y_pred_undersample = lr.predict(x_train_data.iloc[indices[1],:].values)\n", + "\n", + " # 评估模型。有了预测结果之后就可以来进行评估了,这里recall_score需要传入预测值和真实值。\n", + " recall_acc = recall_score(y_train_data.iloc[indices[1],:].values,y_pred_undersample)\n", + " # 保存每一步的结果,以便后续计算平均值。\n", + " recall_accs.append(recall_acc)\n", + " print('Iteration ', iteration,': 召回率 = ', recall_acc)\n", + "\n", + " # 当执行完所有的交叉验证后,计算平均结果\n", + " results_table.loc[j,'Mean recall score'] = np.mean(recall_accs)\n", + " j += 1 # 在这儿的意思是 num = num + 1\n", + " print('')\n", + " print('平均召回率 ', np.mean(recall_accs))\n", + " print('')\n", + "\n", + " # 找到最好的参数,哪一个Recall高,自然就是最好的了。\n", + " best_c = results_table.loc[results_table['Mean recall score'].astype('float32').idxmax()]['C_parameter']\n", + "\n", + " # 打印最好的结果\n", + " print('***********************************')\n", + " print('效果最好的模型所选参数 = ', best_c)\n", + " print('***********************************')\n", + "\n", + " return best_c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "交叉验证与不同参数的结果" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------------------------------------------\n", + "正则化惩罚力度: 0.01\n", + "-------------------------------------------\n", + "Iteration 0 : 召回率 = 0.9315068493150684\n", + "Iteration 1 : 召回率 = 0.9178082191780822\n", + "Iteration 2 : 召回率 = 1.0\n", + "Iteration 3 : 召回率 = 0.972972972972973\n", + "Iteration 4 : 召回率 = 0.9545454545454546\n", + "\n", + "平均召回率 0.9553666992023157\n", + "\n", + "-------------------------------------------\n", + "正则化惩罚力度: 0.1\n", + "-------------------------------------------\n", + "Iteration 0 : 召回率 = 0.8493150684931506\n", + "Iteration 1 : 召回率 = 0.863013698630137\n", + "Iteration 2 : 召回率 = 0.9491525423728814\n", + "Iteration 3 : 召回率 = 0.9324324324324325\n", + "Iteration 4 : 召回率 = 0.9090909090909091\n", + "\n", + "平均召回率 0.900600930203902\n", + "\n", + "-------------------------------------------\n", + "正则化惩罚力度: 1\n", + "-------------------------------------------\n", + "Iteration 0 : 召回率 = 0.8493150684931506\n", + "Iteration 1 : 召回率 = 0.8904109589041096\n", + "Iteration 2 : 召回率 = 0.9830508474576272\n", + "Iteration 3 : 召回率 = 0.9459459459459459\n", + "Iteration 4 : 召回率 = 0.9090909090909091\n", + "\n", + "平均召回率 0.9155627459783485\n", + "\n", + "-------------------------------------------\n", + "正则化惩罚力度: 10\n", + "-------------------------------------------\n", + "Iteration 0 : 召回率 = 0.863013698630137\n", + "Iteration 1 : 召回率 = 0.8904109589041096\n", + "Iteration 2 : 召回率 = 0.9830508474576272\n", + "Iteration 3 : 召回率 = 0.9324324324324325\n", + "Iteration 4 : 召回率 = 0.9090909090909091\n", + "\n", + "平均召回率 0.9155997693030431\n", + "\n", + "-------------------------------------------\n", + "正则化惩罚力度: 100\n", + "-------------------------------------------\n", + "Iteration 0 : 召回率 = 0.8767123287671232\n", + "Iteration 1 : 召回率 = 0.8904109589041096\n", + "Iteration 2 : 召回率 = 0.9830508474576272\n", + "Iteration 3 : 召回率 = 0.9459459459459459\n", + "Iteration 4 : 召回率 = 0.9090909090909091\n", + "\n", + "平均召回率 0.921042198033143\n", + "\n", + "***********************************\n", + "效果最好的模型所选参数 = 0.01\n", + "***********************************\n" + ] + } + ], + "source": [ + "best_c = printing_Kfold_scores(X_train_undersample,y_train_undersample)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "# 混淆矩阵\n", + "def plot_confusion_matrix(cm, classes,\n", + " title='Confusion matrix',\n", + " cmap=plt.cm.Blues):\n", + " \"\"\"\n", + " 绘制混淆矩阵\n", + " \"\"\"\n", + " plt.imshow(cm, interpolation='nearest', cmap=cmap)\n", + " plt.title(title)\n", + " plt.colorbar()\n", + " tick_marks = np.arange(len(classes))\n", + " plt.xticks(tick_marks, classes, rotation=0)\n", + " plt.yticks(tick_marks, classes)\n", + "\n", + " thresh = cm.max() / 2.\n", + " for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n", + " plt.text(j, i, cm[i, j],\n", + " horizontalalignment=\"center\",\n", + " color=\"white\" if cm[i, j] > thresh else \"black\")\n", + "\n", + " plt.tight_layout()\n", + " plt.ylabel('True label')\n", + " plt.xlabel('Predicted label')" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "召回率: 0.9319727891156463\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAU0AAAEmCAYAAADmw8JdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAdsElEQVR4nO3debxVdb3/8df7gIoECAQoAgopTpEjEjknXYUkoR6ZOOJQpKnd9DZomdrglV+TZVmGOZCWiJUzTnEzh59YqDiQCmip6FFAUhEURT73j7UOd3M8nLPWZu+z9ua8nz3Wg72Gs76fAz7efdd3fdfaigjMzCybhqILMDOrJw5NM7McHJpmZjk4NM3McnBompnl4NA0M8vBodnBSNpU0s2SXpd03Xqc5yhJd1aytqJI2lfS00XXYfVBnqdZmyQdCZwB7AAsA+YA50fEfet53mOA04C9ImLVehda4yQFMDQiFhRdi20Y3NOsQZLOAH4K/DewObAV8EtgXAVOvzUwryMEZhaSOhddg9WZiPBSQwuwGfAmcFgrx2xCEqovpctPgU3SfQcAC4H/AhYBjcDx6b7vAO8A76ZtnAicB1xdcu7BQACd0/XjgGdJerv/BI4q2X5fyc/tBfwdeD39c6+SfXcD3wPuT89zJ9BnHb9bU/1fL6l/PPBJYB6wFPhmyfEjgAeA19JjfwFsnO67J/1dlqe/7+El5/8G8DJwVdO29Ge2SdvYPV3fElgCHFD0fxteamNxT7P2fAzoAlzfyjHfAkYCuwK7kATH2SX7tyAJ3wEkwXixpF4RcS5J7/XaiOgWEZe1VoikDwAXAWMiojtJMM5p4bjewK3psR8EfgLcKumDJYcdCRwP9AM2Br7aStNbkPwdDADOAS4Fjgb2APYFzpH0ofTY94DTgT4kf3ejgC8BRMR+6TG7pL/vtSXn703S655U2nBEPEMSqL+T1BW4ArgyIu5upV7rQByateeDwJJo/fL5KOC7EbEoIhaT9CCPKdn/brr/3YiYQdLL2r7MelYDwyRtGhGNETG3hWMOAeZHxFURsSoirgGeAj5VcswVETEvIt4CppME/rq8SzJ++y4wjSQQfxYRy9L25wI7A0TEQxExK233X8Cvgf0z/E7nRsTKtJ61RMSlwHzgQaA/yf9JmQEOzVr0KtCnjbG2LYHnStafS7etOUez0F0BdMtbSEQsJ7mkPQlolHSrpB0y1NNU04CS9Zdz1PNqRLyXfm4KtVdK9r/V9POStpN0i6SXJb1B0pPu08q5ARZHxNttHHMpMAz4eUSsbONY60AcmrXnAeBtknG8dXmJ5NKyyVbptnIsB7qWrG9RujMi7oiI/yDpcT1FEiZt1dNU04tl1pTHr0jqGhoRPYBvAmrjZ1qdMiKpG8k48WXAeenwgxng0Kw5EfE6yTjexZLGS+oqaSNJYyT9ID3sGuBsSX0l9UmPv7rMJucA+0naStJmwFlNOyRtLunQdGxzJcll/nstnGMGsJ2kIyV1lnQ4sBNwS5k15dEdeAN4M+0Fn9xs/yvAh973U637GfBQRHyeZKz2kvWu0jYYDs0aFBE/IZmjeTawGHgBOBW4IT3k+8Bs4DHgceDhdFs5bd0FXJue6yHWDroGkrvwL5HcUd6f9CZLs3O8CoxNj32V5M732IhYUk5NOX2V5CbTMpJe8LXN9p8HTJX0mqTPtXUySeOA0SRDEpD8O+wu6aiKVWx1zZPbzcxycE/TzCwHh6aZWQ4OTTOzHByaZmY51NTLCrRR11CXnkWXYRX0kW37F12CVdDC55/j1VeXtDUPNrNOPbaOWPW+h7LWKd5afEdEjK5U++WordDs0pNNdv9i0WVYBd158zeLLsEq6KD9R1b0fLHqLTbZvs2ZYGu8Pefitp72qrqaCk0z62gEqq9RQoemmRVHgCp2td8uHJpmViz3NM3MshI0dCq6iFwcmmZWLF+em5llJHx5bmaWndzTNDPLxT1NM7Mc3NM0M8vKk9vNzLLz5HYzs5zc0zQzy8qX52Zm2Qno5CeCzMyy85immVlWvjw3M8vHPU0zsxzc0zQzy0h+9tzMLB/3NM3McnBP08wsK989NzPLxz1NM7OM/OZ2M7M8/MVqZmb51FlPs76qNbMNT9NczSxLm6fS5ZIWSXqiZNsPJT0l6TFJ10vqWbLvLEkLJD0t6eAs5To0zaw4Su+eZ13adiUwutm2u4BhEbEzMA84K2laOwETgA+nP/NLSW2OFTg0zaxYFexpRsQ9wNJm2+6MiFXp6ixgYPp5HDAtIlZGxD+BBcCIttpwaJpZoSRlXirgBOC29PMA4IWSfQvTba3yjSAzK0zyFUG5wrCPpNkl61MiYkqmtqRvAauA35U031y0dR6HppkVR7QcXeu2JCKG525GmgiMBUZFRFMwLgQGlRw2EHiprXP58tzMCpT90rzcy3NJo4FvAIdGxIqSXTcBEyRtImkIMBT4W1vnc0/TzApVobHKpnNdAxxAchm/EDiX5G75JsBdaVuzIuKkiJgraTrwD5LL9lMi4r222nBomlmhGhoqd8EbEUe0sPmyVo4/Hzg/TxsOTTMrTv4xzcI5NM2sMKJiU4najUPTzArl0DQzy8GhaWaWg0PTzCwr3wgyM8vHPU0zs4x899zMLCeHpplZVgI1ODTNzDJzT9PMLAeHpplZRr4RZGaWV31lpkPTzAokX54bcMk3DmXMx7Zj8b+XM/z4XwFwzgkfZ+w+27N6dbD4teVMuuAGGl99E4Aff3k0B390KCtWvsukC25gzvyXiyzfWvGVU77AXbfPoE/fvvx11hwA5j7+KF8//VSWL3+TQVttzS8v/S3de/QouNL6UW+h6a+7qIKrbpvDuK9dvda2C6fdz4gTLmHk53/NbQ/M46yJ+wNw8Ee3ZZuBvRl21M859Uc3c9EZhxRRsmV0+JHHcs0fb1lr2xmnncS3zjufux94hDFjx/PLi35cUHX1qZ2/jXK9OTSr4P7HnmfpsrfW2rZsxTtrPnftsvGar7wbu88O/P6OxwD42z9eZLNuXdiid7f2KtVy+tje+9KzV6+1tj2zYB4f23tfAPb/+Chuuen6IkqrX8qx1ACHZjs67/MHMv+6rzDhEx/he5f9BYAt+3Rn4aLX1xzz4uI32LJv96JKtDLssOOHuWPGzQDcfMMfeenFhQVXVF/c0ywhabSkpyUtkHRmNduqB+f95n8YethPmfbnxznpMyMAaOm/g2jzm5etllx48RSuuPQSDtrvo7z55jI23mjjokuqG3kCc4MPTUmdgIuBMcBOwBGSdqpWe/Vk+p8fZ/x+OwLw4uJlDOy32Zp9A/r2oHHJsqJKszIM3W4Hrr1hBnfe8yCf/uzhbD3kQ0WXVFcaGhoyL7WgmlWMABZExLMR8Q4wDRhXxfZq2jYDeq/5fMje2zPv+SUA3Hr/0xx58M4AjNhpAG8sX8nLS98spEYrz+LFiwBYvXo1F/7wAo49YVLBFdWZOhvTrOaUowHACyXrC4GPNj9I0iQg+a9sk82a765LU8/5DPvuOpg+m3VlwXWn870r7mb0yG0ZOqgPqyN4/pXX+PKPbwXg9lnzOXjkUOb+/jRWrHyXL06+seDqrTUnnXA0//++e1j66hJ223EIXzvrHJYvf5MrLk2mln3yU+M54uiJBVdZX2rlsjuraoZmS38T7xuti4gpwBSAhu5bbhCjeRO/+6f3bZs645F1Hn/6T2dUsxyroEsuv7rF7V84+bR2rmQD4cnta1kIDCpZHwi8VMX2zKzOiJZvhtayao5p/h0YKmmIpI2BCcBNVWzPzOpO/d09r1pPMyJWSToVuAPoBFweEXOr1Z6Z1acaycLMqvrseUTMADxgZ2brVCs9yKxqY+KTmXVMSnqaWZc2TyddLmmRpCdKtvWWdJek+emfvdLtknRR+vDNY5J2z1KyQ9PMCiOgoUGZlwyuBEY323YmMDMihgIz03VIHrwZmi6TgF9lacChaWaFqmRoRsQ9wNJmm8cBU9PPU4HxJdt/G4lZQE9J/dusN/NvZmZWafkvz/tIml2yZHn8avOIaARI/+yXbm/pAZwBbZ3MLyE2s8Ik8zRz3QhaEhHDK9h8c20+YOPQNLMCtcv8y1ck9Y+IxvTye1G6vawHcHx5bmaFquTd83W4CWh6IcBE4MaS7cemd9FHAq83Xca3xj1NMytUJXuakq4BDiAZ+1wInAtMBqZLOhF4HjgsPXwG8ElgAbACOD5LGw5NMyvO+vUg3ycijljHrlEtHBvAKXnbcGiaWWHKuBFUOIemmRWqzjLToWlmxXJP08wsK5H18cia4dA0s8LU40uIHZpmVqDaeblwVg5NMytUnWWmQ9PMiuWepplZVhWe3N4eHJpmVhhPbjczy8mhaWaWQ51lpkPTzIrlnqaZWVa+EWRmlp3I/C2TNcOhaWaFaqizrqZD08wKVWeZ6dA0s+Ik3/1TX6np0DSzQtXZkKZD08yKtcH0NCX1aO0HI+KNypdjZh1NnWVmqz3NuUCQPB7apGk9gK2qWJeZdQAimXZUT9YZmhExqD0LMbOOqd7GNBuyHCRpgqRvpp8HStqjumWZWYeg5M3tWZda0GZoSvoF8HHgmHTTCuCSahZlZh2DgE4NyrzUgix3z/eKiN0lPQIQEUslbVzlusysg6iRDmRmWULzXUkNJDd/kPRBYHVVqzKzDqNWLruzyjKmeTHwR6CvpO8A9wH/r6pVmVmHIOVbakGbPc2I+K2kh4BPpJsOi4gnqluWmXUUlX5hh6TTgc+TXB0/DhwP9AemAb2Bh4FjIuKdcs6f6e450Al4F3gnx8+YmbVJOZY2zyUNAL4MDI+IYSTZNYHk6vjCiBgK/Bs4sdx6s9w9/xZwDbAlMBD4vaSzym3QzKxUFaYcdQY2ldQZ6Ao0AgcCf0j3TwXGl1tvlhtBRwN7RMQKAEnnAw8BF5TbqJkZJL3HnDOJ+kiaXbI+JSKmNK1ExIuSfgQ8D7wF3EmSV69FxKr0sIXAgHJrzhKazzU7rjPwbLkNmpmtkX/S+pKIGL7u06kXMA4YArwGXAeMaeHQyNNoqdZe2HFheuIVwFxJd6TrB5HcQTczW28Vvg/0CeCfEbE4Obf+BOwF9JTUOe1tDgReKreB1nqaTXfI5wK3lmyfVW5jZmalmp4IqqDngZGSupJcno8CZgN/AT5Lcgd9InBjuQ209sKOy8o9qZlZVpWc3B4RD0r6A8m0olXAI8AUko7fNEnfT7eVnW9tjmlK2gY4H9gJ6FJS3HblNmpm1qTSc9Yj4lzg3GabnwVGVOL8WeZcXglcQfK7jQGmk3RxzczWi5RMbs+61IIsodk1Iu4AiIhnIuJskrcemZmttw3uMUpgpZJBh2cknQS8CPSrbllm1lHU2ws7soTm6UA3kkeTzgc2A06oZlFm1nHUWWZmemHHg+nHZfzfi4jNzNabqJ2xyqxam9x+Pa3Mmo+Iz1SlIjPrOGporDKr1nqav2i3KlK7bbcl989sPlPA6lmvPU8tugSroJVPv1Dxc24wY5oRMbM9CzGzjqne3jWZ5UaQmVlVVOExyqpzaJpZoeosM7OHpqRNImJlNYsxs44lmbReX6mZ5c3tIyQ9DsxP13eR9POqV2ZmHUKDsi+1IMsY7EXAWOBVgIh4FD9GaWYVsiE+RtkQEc8160K/V6V6zKwDSb7uokbSMKMsofmCpBFASOoEnAbMq25ZZtZRbIhTjk4muUTfCngF+HO6zcxsvdVZRzPTs+eLSL432MysolRD78nMKsub2y+lhWfQI2JSVSoysw6lzjIz0+X5n0s+dwE+DVT+AVQz63AEdK6VuUQZZbk8v7Z0XdJVwF1Vq8jMOpQNsafZ3BBg60oXYmYdUA1NWs8qy5jmv/m/Mc0GYClwZjWLMrOOQxX/PsrqajU00+8G2oXke4EAVkfEOl9MbGaWRzK5vegq8ml1XmkakNdHxHvp4sA0s4raEJ89/5uk3ateiZl1SJIyL7Wgte8I6hwRq4B9gC9IegZYTtKjjohwkJrZeqnHy/PWxjT/BuwOjG+nWsyso6mhtxdl1VpoCiAinmmnWsysA9qQHqPsK+mMde2MiJ9UoR4z60CS7wiq8DmlnsBvgGEk0yVPAJ4GrgUGA/8CPhcR/y7n/K2V2wnoBnRfx2Jmtp5EQ44lo58Bt0fEDiRTJp8kmVs+MyKGAjNZj7nmrfU0GyPiu+We2MysLaKyY5qSegD7AccBRMQ7wDuSxgEHpIdNBe4GvlFOG631NOtroMHM6k+OOZrpXfY+kmaXLM3ftvYhYDFwhaRHJP1G0geAzSOiESD9s1+5JbfW0xxV7knNzLLKeSNoSUQMb2V/Z5JZP6dFxIOSfkaFH/teZ08zIpZWsiEzs+aaLs8r+MVqC4GFEfFguv4HkhB9RVJ/gPTPReXWXG9fz2FmG5iG9O3tWZa2RMTLJN9rtn26aRTwD+AmYGK6bSJwY7n1lvNqODOziqnCNM3TgN9J2hh4FjiepIM4XdKJwPPAYeWe3KFpZoURlb/cjYg5QEvjnhW5T+PQNLPiiJp5EUdWDk0zK1R9RaZD08wKJKCTe5pmZtnVWWY6NM2sSLXzcuGsHJpmVphq3D2vNoemmRXKPU0zsxzqKzIdmmZWJM/TNDPLzmOaZmY5uadpZpZDfUWmQ9PMCuQngszMcqqzzHRomlmRhOrsAt2haWaFck/TzCyjZMpRfaWmQ9PMipP9C9NqhkPTzArl0DQzy8E3gmwtX/z8Cdw24xb69uvHQ3OeAGDp0qUcc+ThPPfcv9h668Fcfc10evXqVXClti6XnHsUY/YbxuKlyxh+2H8DcM6XDmHs/juzOoLFS5cx6dyraVz8OqcfO4rDP7knAJ07NbDDkC0YdOCZ/PuNFUX+CjVLQEN9ZWbdPfZZd46ZeBw33nL7Wtt+9IPJHHDgKJ54cj4HHDiKH/1gckHVWRZX3TyLcadcvNa2C6fOZMThFzBywmRuu/cJzpo0Jtn+25mMnDCZkRMmc87Pb+Leh+Y7MNugHP+rBQ7NKttn3/3o3bv3WttuuflGjj4m+d76o4+ZyM033VBEaZbR/Q8/w9LX1w6+ZcvfXvO566abEBHv+7nPjR7O9Nsfqnp99a5ByrzUAl+eF2DRK6/Qv39/APr378/iRYsKrsjKcd4pn+KosSN4/c23GD3porX2bdplI/5jrx05ffL0gqqrD748LyHpckmLJD1RrTbMinTexTczdMy3mXbbbE46fL+19h2y30d4YM6zvjRvU56L89pI12penl8JjK7i+etWv803p7GxEYDGxkb69utXcEW2Pqbf9nfGj9p1rW2HHbwH1/nSvG3pPM2sSy2oWmhGxD3A0mqdv54dMvZQrr5qKgBXXzWVsZ8aV3BFltc2W/Vd8/mQ/Xdm3r9eWbPeo1sX9tljW26++7EiSqs7yrHUgsLHNCVNAiYBDNpqq4Krqbxjjz6Ce/96N0uWLGGbwQP59jnf4atfP5Ojj/gcU6+4jEGDtuJ3064rukxrxdQLjmPfPYbSp2c3Ftz+Pb53yQxG7/Nhhm7dj9Wrg+cbl/Ll86etOf7Qj+/CzFlPseLtdwqsuj4kY5qVj0NJnYDZwIsRMVbSEGAa0Bt4GDgmIsr6B1JLd/0qRdJg4JaIGJbl+D32GB73Pzi7avVY++u156lFl2AVtPLp6axesahiKbfjR3aLK67/S+bjPza010MRMbyt4ySdAQwHeqShOR34U0RMk3QJ8GhE/Kqcmj3lyMyKVeHrc0kDgUOA36TrAg4E/pAeMhUYX265hV+em1nHVoW74j8Fvg50T9c/CLwWEavS9YXAgHJPXs0pR9cADwDbS1oo6cRqtWVm9Svn3fM+kmaXLJPWPpfGAosionTqQkupXPa4ZNV6mhFxRLXObWYbjpz9zCVtjGnuDRwq6ZNAF6AHSc+zp6TOaW9zIPBSedV6TNPMCiSSr/DNurQlIs6KiIERMRiYAPxPRBwF/AX4bHrYRODGcmt2aJpZcdpvcvs3gDMkLSAZ47ys3BP5RpCZFapak9Yj4m7g7vTzs8CISpzXoWlmxaqVR30ycmiaWYFq50UcWTk0zaxQtfIijqwcmmZWmFp6EUdWDk0zK1adpaZD08wK5TFNM7McPKZpZpZVDb2RPSuHppkVypfnZmYZJc+eF11FPg5NMytUnWWmQ9PMClZnqenQNLNCeUzTzCwHj2mameVQZ5np0DSzgtVZajo0zawwyQs76is1HZpmVhxBQ31lpkPTzArm0DQzy8pvbjczy8VTjszMMvKb283M8qqz1HRomlmhPKZpZpaDxzTNzHKos8x0aJpZgfx1F2ZmedVXajYUXYCZdVwieYwy69Lm+aRBkv4i6UlJcyX9Z7q9t6S7JM1P/+xVbs0OTTMrlJR9yWAV8F8RsSMwEjhF0k7AmcDMiBgKzEzXy+LQNLNCKcf/2hIRjRHxcPp5GfAkMAAYB0xND5sKjC+3Xo9pmlmx8g1p9pE0u2R9SkRMafG00mBgN+BBYPOIaIQkWCX1K6tWHJpmVrCct4GWRMTwNs8pdQP+CHwlIt5QBW/R+/LczAqTZzwza+5J2ogkMH8XEX9KN78iqX+6vz+wqNyaHZpmVqhKjmkq6VJeBjwZET8p2XUTMDH9PBG4sdx6fXluZsWq7DTNvYFjgMclzUm3fROYDEyXdCLwPHBYuQ04NM2sUJXMzIi4r5VTjqpEGw5NMyuUH6M0M8tIiIY6S03fCDIzy8E9TTMrVJ11NB2aZlYsv7ndzCwrv0/TzCw7fxulmVledZaaDk0zK5THNM3McvCYpplZDnWWmQ5NMytWJd912R4cmmZWGFF/l+eKiKJrWEPSYuC5outoB32AJUUXYRXVUf5Nt46IvpU6maTbSf7usloSEaMr1X45aio0OwpJs7O8st/qh/9NOw6/sMPMLAeHpplZDg7NYrT4laNW1/xv2kF4TNPMLAf3NM3McnBompnl4NBsR5JGS3pa0gJJZxZdj60/SZdLWiTpiaJrsfbh0GwnkjoBFwNjgJ2AIyTtVGxVVgFXAoVOtrb25dBsPyOABRHxbES8A0wDxhVck62niLgHWFp0HdZ+HJrtZwDwQsn6wnSbmdURh2b7aem1BJ7vZVZnHJrtZyEwqGR9IPBSQbWYWZkcmu3n78BQSUMkbQxMAG4quCYzy8mh2U4iYhVwKnAH8CQwPSLmFluVrS9J1wAPANtLWijpxKJrsuryY5RmZjm4p2lmloND08wsB4emmVkODk0zsxwcmmZmOTg0NyCS3pM0R9ITkq6T1HU9znWApFvSz4e29lYmST0lfamMNs6T9NWs25sdc6Wkz+Zoa7DfRGSV4NDcsLwVEbtGxDDgHeCk0p1K5P43j4ibImJyK4f0BHKHplk9cmhuuO4Ftk17WE9K+iXwMDBI0kGSHpD0cNoj7QZr3vf5lKT7gM80nUjScZJ+kX7eXNL1kh5Nl72AycA2aS/3h+lxX5P0d0mPSfpOybm+lb5T9M/A9m39EpK+kJ7nUUl/bNZ7/oSkeyXNkzQ2Pb6TpB+WtP3F9f2LNCvl0NwASepM8t7Ox9NN2wO/jYjdgOXA2cAnImJ3YDZwhqQuwKXAp4B9gS3WcfqLgL9GxC7A7sBc4EzgmbSX+zVJBwFDSV6Htyuwh6T9JO1B8vjobiShvGeGX+dPEbFn2t6TQOkTN4OB/YFDgEvS3+FE4PWI2DM9/xckDcnQjlkmnYsuwCpqU0lz0s/3ApcBWwLPRcSsdPtIkpcg3y8JYGOSxwB3AP4ZEfMBJF0NTGqhjQOBYwEi4j3gdUm9mh1zULo8kq53IwnR7sD1EbEibSPLs/fDJH2fZAigG8ljqE2mR8RqYL6kZ9Pf4SBg55Lxzs3StudlaMusTQ7NDctbEbFr6YY0GJeXbgLuiogjmh23K5V7VZ2ACyLi183a+EoZbVwJjI+IRyUdBxxQsq/5uSJt+7SIKA1XJA3O2a5Zi3x53vHMAvaWtC2ApK6StgOeAoZI2iY97oh1/PxM4OT0ZztJ6gEsI+lFNrkDOKFkrHSApH7APcCnJW0qqTvJUEBbugONkjYCjmq27zBJDWnNHwKeTts+OT0eSdtJ+kCGdswycU+zg4mIxWmP7RpJm6Sbz46IeZImAbdKWgLcBwxr4RT/CUxJ3+bzHnByRDwg6f50Ss9t6bjmjsADaU/3TeDoiHhY0rXAHOA5kiGEtnwbeDA9/nHWDuengb8CmwMnRcTbkn5DMtb5sJLGFwPjs/3tmLXNbzkyM8vBl+dmZjk4NM3McnBompnl4NA0M8vBoWlmloND08wsB4emmVkO/wsSzEycO3TyTgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import itertools\n", + "# 选择最优正则化参数\n", + "lr = LogisticRegression(C = best_c, penalty = 'l1')\n", + "# 训练模型\n", + "lr.fit(X_train_undersample,y_train_undersample.values.ravel())\n", + "# 测试模型\n", + "y_pred_undersample = lr.predict(X_test_undersample.values)\n", + "# 计算所需值\n", + "cnf_matrix = confusion_matrix(y_test_undersample,y_pred_undersample)\n", + "np.set_printoptions(precision=2)\n", + " \n", + "print(\"召回率: \", cnf_matrix[1,1]/(cnf_matrix[1,0]+cnf_matrix[1,1]))\n", + "# 绘制\n", + "class_names = [0,1]\n", + "plt.figure()\n", + "plot_confusion_matrix(cnf_matrix\n", + " , classes=class_names\n", + " , title='Confusion matrix')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "右上角19表示原本正常的,被判定为异常。右下角表示原本异常的,被判定为异常的。看似结果不错。\n", + "\n", + "但这里还不是我们的原始需求,我们的原始需求是在28万多个中,找到492个异常的。而目前是1:1的比例。" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "召回率: 0.9183673469387755\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVkAAAEmCAYAAADIhuPPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3deZwV1bnu8d/TtCAER5wBhyiixqMIikbjkKgIasSTGxVHHBKiUZOcjJoYcY6e5GYwGo1GFDRHJANH4oRINNFcVEBxwAnEgRYUEDQoTsB7/6jVuMXu3buhi+re/Xzzqc/e9daqVWt3x5fVa69apYjAzMzyUVN0A8zMqpmTrJlZjpxkzcxy5CRrZpYjJ1kzsxw5yZqZ5chJtp2R1FnS3yS9LelPq1HP8ZLubcm2FUXSvpKeL7odVp3kebKtk6TjgO8COwCLgWnApRHx0GrWeyJwNrB3RCxd7Ya2cpIC6BURM4tui7VP7sm2QpK+C/wauAzYFNgS+B0wuAWq3wp4oT0k2EpIqi26DVblIsJbK9qA9YB3gKPKlOlEloTnpO3XQKd07ACgDvgeMA+YC5ySjl0IfAh8lK5xGnABcEtJ3VsDAdSm/ZOBWWS96ZeA40viD5WctzcwGXg7ve5dcuwB4GLgX6mee4GNGvls9e3/YUn7jwQOBV4AFgI/LinfH5gEvJXKXgV0TMf+mT7Lu+nzHlNS/4+A14Gb62PpnG3TNfqm/S2ABcABRf9/w1vb3NyTbX0+D6wNjC1T5ifAXkAfYFeyRHNeyfHNyJJ1d7JEerWkDSJiOFnv+LaI6BoRN5RriKTPAFcCgyJiHbJEOq2BchsCd6ay3YBfAndK6lZS7DjgFGAToCPw/TKX3ozsZ9AdOB+4HjgB6AfsC5wv6bOp7DLgv4CNyH52BwLfBIiI/VKZXdPnva2k/g3JevXDSi8cES+SJeA/SuoC3AjcFBEPlGmvWaOcZFufbsCCKP/n/PHARRExLyLmk/VQTyw5/lE6/lFE3EXWi+u9iu1ZDuwsqXNEzI2I6Q2UOQyYERE3R8TSiLgVeA74ckmZGyPihYh4DxhD9g9EYz4iG3/+CBhNlkB/ExGL0/WnA7sARMTUiHg4Xfdl4PfA/hV8puER8UFqzydExPXADOARYHOyf9TMVomTbOvzJrBRE2OFWwCvlOy/kmIr6lgpSS8Buja3IRHxLtmf2KcDcyXdKWmHCtpT36buJfuvN6M9b0bEsvS+Pgm+UXL8vfrzJW0v6Q5Jr0v6N1lPfaMydQPMj4j3myhzPbAz8NuI+KCJsmaNcpJtfSYB75ONQzZmDtmfuvW2TLFV8S7QpWR/s9KDETE+Ig4m69E9R5Z8mmpPfZteW8U2Ncc1ZO3qFRHrAj8G1MQ5ZafUSOpKNs59A3BBGg4xWyVOsq1MRLxNNg55taQjJXWRtJakQZL+OxW7FThP0saSNkrlb1nFS04D9pO0paT1gHPrD0jaVNIRaWz2A7Jhh2UN1HEXsL2k4yTVSjoG2Am4YxXb1BzrAP8G3km97DNWOv4G8NlPnVXeb4CpEfE1srHma1e7ldZuOcm2QhHxS7I5sucB84HZwFnA/6YilwBTgCeBp4DHUmxVrjUBuC3VNZVPJsYaslkKc8i+cd+f9KXSSnW8CRyeyr5JNjPg8IhYsCptaqbvk32ptpisl33bSscvAEZKekvS0U1VJmkwMJBsiASy30NfSce3WIutXfHNCGZmOXJP1swsR06yZmY5cpI1M8uRk6yZWY5a1eIYqu0c6rhO0c2wFvS57XsW3QRrQa/NfoWFby5oah5yxTqsu1XE0k/ddNeoeG/++IgY2FLXXxNaV5LtuA6dejc5y8bakHETflF0E6wFHXHQPi1aXyx9r1n/zb8/7eqm7uZrdVpVkjWz9kag6h61dJI1s+IIUIuNPrRKTrJmViz3ZM3M8iKo6VB0I3LlJGtmxfJwgZlZToSHC8zM8iP3ZM3McuWerJlZjtyTNTPLi29GMDPLj29GMDPLmXuyZmZ5qf7hgur+dGbWugno0KHyranqpN6SppVs/5b0HUkbSpogaUZ63SCVl6QrJc2U9KSkviV1DU3lZ0gaWhLvJ+mpdM6VUvnxDidZMyuWVPnWhIh4PiL6REQfoB+wBBgLnANMjIhewMS0DzAI6JW2YcA1WZO0ITAc2BPoDwyvT8ypzLCS88qub+ska2YFSsMFlW7NcyDwYkS8AgwGRqb4SODI9H4wMCoyDwPrS9ocOASYEBELI2IRMAEYmI6tGxGTInvU96iSuhrkMVkzK1bzZhdsJGlKyf51EXFdI2WHALem95tGxFyAiJgraZMU7w7MLjmnLsXKxesaiDfKSdbMitW8HuqCiNi9ySqljsARwLlNFW0gFqsQb5SHC8ysOM0Zj21ej3cQ8FhEvJH230h/6pNe56V4HVD6ILoewJwm4j0aiDfKSdbMipXPmOyxfDxUADAOqJ8hMBS4vSR+UpplsBfwdhpWGA8MkLRB+sJrADA+HVssaa80q+Ckkroa5OECMytWC9/xJakLcDDwjZLw5cAYSacBrwJHpfhdwKHATLKZCKcARMRCSRcDk1O5iyJiYXp/BnAT0Bm4O22NcpI1swK1/M0IEbEE6LZS7E2y2QYrlw3gzEbqGQGMaCA+Bdi50vY4yZpZsbx2gZlZTvxkBDOzPPlBimZm+XJP1swsRx6TNTPLiap/qUMnWTMrlnuyZmb5aWI51jbPSdbMCpM94stJ1swsH6Lhda2qiJOsmRVI7smameXJSdbMLEc1NZ7CZWaWD4/JmpnlRx6TNTPLl5OsmVmOnGTNzHLkJGtmlhd/8WVmli/3ZM3McuLZBWZmOav2JFvdt1qYWesmUI0q3iqqUlpf0p8lPSfpWUmfl7ShpAmSZqTXDVJZSbpS0kxJT0rqW1LP0FR+hqShJfF+kp5K51ypJv6VcJI1s0JJqnir0G+AeyJiB2BX4FngHGBiRPQCJqZ9gEFAr7QNA65JbdoQGA7sCfQHhtcn5lRmWMl5A8s1xknWzArVkklW0rrAfsANABHxYUS8BQwGRqZiI4Ej0/vBwKjIPAysL2lz4BBgQkQsjIhFwARgYDq2bkRMiogARpXU1SAnWTMrTP0XX81IshtJmlKyDVupys8C84EbJT0u6Q+SPgNsGhFzAdLrJql8d2B2yfl1KVYuXtdAvFH+4svMitW8770WRMTuZY7XAn2BsyPiEUm/4eOhgUqvHqsQb5R7smZWHLX4mGwdUBcRj6T9P5Ml3TfSn/qk13kl5XuWnN8DmNNEvEcD8Ua5J7uKem21CTdfceqK/W26d+Pia+5kvXW6cOpX9mb+oncAGH7VOMY/9Ay1tTVcc/7x9NmhJ7UdavjjnY/yixH3ArBe185cM/w4dtp2cyLg9Av/yCNPvsT53zyMw/ffheURzF+4mGHDb2Hu/LcL+bzt0Y2/v4rbbrmRiOCYE07h1NPPXnHs+qt/xc8u+DFTnpvNht02YsLdf+OXl19EjWroUFvLTy/5b/bYax9em/0KZ5x8LMuWLWPp0o846WtncPzJXy/wU7U+LTmFKyJelzRbUu+IeB44EHgmbUOBy9Pr7emUccBZkkaTfcn1dkTMlTQeuKzky64BwLkRsVDSYkl7AY8AJwG/LdcmJ9lVNOOVeew15HIAamrEi+MvZdz9T3DiEZ/nt7fcz69vnviJ8v/noL506ljLHkdfRue11+Lxv5zHmLun8Orchfzih1/l3v/3DMf94AbWqu1Al7U7AvCrkRO56Hd3AvDNY/fn3GGD+Nalo9fsB22nnn92OrfdciNjxz/IWh07cvIxR/DFgwexzbbbMee12Tz0wN/ZosfHHZ299/0iBw08HEk8O/0pzv7aCdw36Qk23nRz/nTX/XTq1Il333mHgfv146CBh7HpZlsU+OlalxzmyZ4N/FFSR2AWcArZX+1jJJ0GvAoclcreBRwKzASWpLKkZHoxMDmVuygiFqb3ZwA3AZ2Bu9PWKCfZFvDF/r15qW4+r85d1GiZIOiydkc6dKihc6eOfPjRMha/+z7rfGZtvtB3W75+/s0AfLR0GW+/8x4Ai999f8X5XTp3Ivsy09aEF194jj79+tO5SxcA9tx7X+6963a+cfb3uOS8H3LO8EsZdtJRK8p/pmvXFe/fW/LuisTRsWPHFfEPP/yA5cuXr6FP0Ia0cI6NiGlAQ+O2BzZQNoAzG6lnBDCigfgUYOdK2+Mx2RZw1CH9GHPP1BX7pw/Zj0dvO5drhx/P+ut0BuCv9z3Okvc/5KUJl/LC3Rfx61ETWfTvJWzTvRsLFr3DdReewKRbf8Tvzj9uRU8W4IIzv8yMuy9myKDdufiaO9f4Z2uvtt/xczw66SEWLXyT95Ys4YH77mHua3Xcd88dbLb5Fuy48y6fOmf8nbdz0Od35bTjvsIVv7l2RXzOa7MZtP8e7NOnF984+3vuxa4kh3myrUquSVbSQEnPpzsjyn3D12atVduBw/b/D/464XEArv/Tg+z05QvYc8jlvL7g31z+3a8AsMfntmbZsuV8dsBP2PGw4Xz7xC+xdfdu1NZ2oM8OPbn+Tw/y+WOvYMl7H/D9Uw9eUf8FV/+NXoN+yui7p3D6MfsV8hnbo+2234FvnP09Tvrq4Zx8zBHs8Lld6FBby9W/uoLvnHN+g+cccthg7pv0BL8fOYZfXn7RivgW3Xty9z8mc/+jT/PX225h/rw31tTHaPWak2CdZFciqQNwNdkdFTsBx0raKa/rFeWQL+zEtOdmM2/hYgDmLVzM8uVBRDDir/9i9523AuDoQbtz7/97hqVLlzN/0TtMmjaLfjttyWtvLOK1eW8x+elXABh73zT67NDzU9cZc/dkjjywz5r7YMYxJ5zM3/4+idv+dh/rr78BPXpuRd2rr3DYAf3Zt29vXp/zGl8+8PPMf+P1T5zXf+8v8OrLs1j45oJPxDfdbAt69d6JyQ//a01+jFavpqam4q0tyrPV/YGZETErIj4ERpPdXVFVjh64+yeGCjbbaN0V7wd/aVeeeXEuAHWvL+SAPXoD0GXtjvTfZWuef/kN3nhzMXWvL6LXVtnc6AP69+a5Wdl/tNtuufGKug7bfxdeeNk9oDVpwfxsls9rda8y/s7b+coxxzP52Vd58LHnefCx59lsi+78beIkNt50M16e9eKKMfOnn3icjz78kA027MbcOXW8/142xv72W4uY+ugkPrvd9oV9plZJzdjaoDy/+Grojok9Vy6U7tjI7tpYq+vKh1u1zmuvxZf23IGzLrl1RezSbx/JLr17EBG8MnchZ6dj1972T6678ASm/vknSHDz7Q/z9Ixset13r/gTN152Mh1rO/DyawsYNvwWAC751mB6bbUJy5cHr85d6JkFa9g3TzmWtxYtpHattbjwil+z3vobNFr2njvGMnbM/1BbuxZrd16bK6+/GUnMfOF5Lht+DpKICL5+5nfYYaeKvzNpF9rqMECllNc31pKOAg6JiK+l/ROB/hFxdmPn1HTZJDr1PjqX9lgxnpnwi6KbYC3oiIP24alpU1ssK3barFf0OP7KisvP+uWhU5u446vVybMn29gdE2ZmQBoFqO6ObK5jspOBXpK2SZOCh5DdXWFmllT/7ILcerIRsVTSWcB4oAMwIiKm53U9M2ub2mjurFiud3xFxF1kt62ZmTWorfZQK+Xbas2sOHJP1swsNyJbYKmaOcmaWaGcZM3M8uLhAjOz/GTzZKs7yzrJmlmB2u7810o5yZpZoao8xzrJmlmx3JM1M8uLv/gyM8uPv/gyM8tZledYJ1kzK1a192Tb5kNzzKw6KLvjq9KtoiqllyU9JWmapCkptqGkCZJmpNcNUlySrkwPe31SUt+Seoam8jMkDS2J90v1z0znlm2Yk6yZFaZ+0e5Kt2b4YkT0KXmKwjnAxIjoBUxM+5A96LVX2oYB10CWlIHhZI/M6g8Mr0/MqcywkvMGlmuIk6yZFWiNLdo9GBiZ3o8EjiyJj4rMw8D6kjYHDgEmRMTCiFgETAAGpmPrRsSkyJ7dNaqkrgY5yZpZoZrZk91I0pSSbVgDVQZwr6SpJcc3jYi5AOl1kxRv6IGv3ZuI1zUQb5S/+DKzQjWzh7qgggcp7hMRcyRtAkyQ9Fy5yzcQi1WIN8o9WTMrTjN6sZXm4oiYk17nAWPJxlTfSH/qk17npeKNPfC1XLxHA/FGOcmaWWHqb0ZoqTFZSZ+RtE79e2AA8DTZQ1zrZwgMBW5P78cBJ6VZBnsBb6fhhPHAAEkbpC+8BgDj07HFkvZKswpOKqmrQR4uMLNCtfA82U2BsanOWuB/IuIeSZOBMZJOA14Fjkrl7wIOBWYCS4BTACJioaSLyZ66DXBRRCxM788AbgI6A3enrVFOsmZWqJbMsRExC9i1gfibwIENxAM4s5G6RgAjGohPAXautE1OsmZWqGq/48tJ1syK41W4zMzyIyq/XbatcpI1s0LVVHlX1knWzApV5TnWSdbMipPdZFDdWdZJ1swKVeVDsk6yZlasdtuTlbRuuRMj4t8t3xwza2+qPMeW7clO59OrztTvB7Blju0ys3ZAZNO4qlmjSTYiejZ2zMyspVT7mGxFq3BJGiLpx+l9D0n98m2WmbULzViBq62O3TaZZCVdBXwRODGFlgDX5tkoM2sfBHSoUcVbW1TJ7IK9I6KvpMdhxRJgHXNul5m1E220g1qxSpLsR5JqSI9YkNQNWJ5rq8ys3WirwwCVqmRM9mrgL8DGki4EHgKuyLVVZtYuNOfRM201FzfZk42IUZKmAgel0FER8XS+zTKz9sILxGQ6AB+RDRn4uWBm1mKqO8VWNrvgJ8CtwBZkT2b8H0nn5t0wM2sfqn0KVyU92ROAfhGxBEDSpcBU4Gd5NszMqp+o/psRKkmyr6xUrhaYlU9zzKxdacM91EqVWyDmV2RjsEuA6ZLGp/0BZDMMzMxWW5Xn2LI92foZBNOBO0viD+fXHDNrT+rv+GrxeqUOwBTgtYg4XNI2wGhgQ+Ax4MSI+FBSJ2AU0A94EzgmIl5OdZwLnAYsA74VEeNTfCDwG7IJAX+IiMvLtaXcAjE3rNanNDOrQE7DBd8GngXql2y9AvhVRIyWdC1Z8rwmvS6KiO0kDUnljpG0EzAE+BzZl/73Sdo+1XU1cDBQB0yWNC4inmmsIZXMLthW0mhJT0p6oX5blU9tZrYyNWOrqD6pB3AY8Ie0L+BLwJ9TkZHAken94LRPOn5gKj8YGB0RH0TES8BMoH/aZkbErIj4kKx3PLhceyqZ83oTcCPZZxwEjEkVm5mtFim7GaHSDdhI0pSSbVgD1f4a+CEf3/7fDXgrIpam/Tqge3rfHZgNkI6/ncqviK90TmPxRlUyu6BLRIyX9IuIeBE4T9KDFZxnZtakZo4WLIiI3RuvS4cD8yJiqqQD6sMNFI0mjjUWb6hjGg3EVqgkyX6Qus8vSjodeA3YpILzzMya1MJjsvsAR0g6FFibbEz218D6kmpTb7UHMCeVrwN6AnWSaoH1gIUl8Xql5zQWb1AlwwX/BXQFvpU+wNeBUys4z8ysSS25QExEnBsRPSJia7Ivrv4eEccD9wNfTcWGAren9+PSPun43yMiUnyIpE5pZkIv4FFgMtBL0jZpydchqWyjKlkg5pH0djEfL9xtZrbahNbUAjE/AkZLugR4HKifPXUDcLOkmWQ92CEAETFd0hjgGWApcGZELAOQdBYwnmwK14iImF7uwuVuRhhLmbGGiPhKZZ/NzKwROS5hGBEPAA+k97PIZgasXOZ94KhGzr8UuLSB+F3AXZW2o1xP9qpKK2kpu+24Jf96ZI1f1swq1LFDLjcOtHidrUm5mxEmrsmGmFn7VO1rp1a6nqyZWYvL67ba1sRJ1swKVeU5tvIkK6lTRHyQZ2PMrH3JpmZVd5atZO2C/pKeAmak/V0l/Tb3lplZu1Cjyre2qJIx5yuBw8mWASMingC+mGejzKz9aPdPqwVqIuKVlbr0y3Jqj5m1I9njZ9po9qxQJUl2tqT+QKSFcM8GvNShmbUIT+GCM8iGDLYE3gDuSzEzs9VW5R3ZitYumEe6n9fMrCVJa2ztgsI0mWQlXU8DaxhEREOL5ZqZNUuV59iKhgvuK3m/NvCffHJlcDOzVSKgtq3OzapQJcMFt5XuS7oZmJBbi8ysXXFP9tO2AbZq6YaYWTvUhm8yqFQlY7KL+HhMtoZsYdtz8myUmbUfqvg5tG1T2SSbnu21K9lzvQCWp0czmJmttuxmhKJbka+y84BTQh0bEcvS5gRrZi3KaxfAo5L65t4SM2uXJFW8tUXlnvFV//jcLwBfl/Qi8C5ZDz8iwonXzFZLexguKDcm+yjQFzhyDbXFzNqbNry6VqXKDRcIICJebGhbQ+0zsypXk26trWRriqS1JT0q6QlJ0yVdmOLbSHpE0gxJt0nqmOKd0v7MdHzrkrrOTfHnJR1SEh+YYjMlNTnTqlxPdmNJ323sYET8sslPbGZWRvaMrxat8gPgSxHxjqS1gIck3Q18F/hVRIyWdC1wGnBNel0UEdtJGgJcARwjaSeyNVs+B2wB3Cdp+3SNq4GDgTpgsqRxEfFMYw0q9/E6AF2BdRrZzMxWk6hpxtaUyLyTdtdKWwBfAv6c4iP5eBh0cNonHT8wTV0dDIyOiA8i4iVgJtA/bTMjYlZEfAiMTmUbVa4nOzciLmryU5mZrSLR8mOyad3rqcB2ZL3OF4G30hf5kPVAu6f33UlrsUTEUklvA91S/OGSakvPmb1SfM9y7SmXZKt8ONrMCtf8+a8bSZpSsn9dRFxXWiAilgF9JK0PjAV2bKCe+jn/DV09ysQb+uu/7P0D5ZLsgeVONDNrCc1cT3ZBROxeScGIeEvSA8BewPol01J7AHNSsTqgJ1AnqRZYj2zpgPp4vdJzGos3qNEx2YhYWMkHMTNbVfXDBS31IEVJG6ceLJI6AwcBzwL3A19NxYYCt6f349I+6fjf052t44AhafbBNkAvsmmtk4FeabZCR7Ivx8aVa9OqrMJlZtZiWvjJCJsDI9O4bA0wJiLukPQMMFrSJcDjwA2p/A3AzZJmkvVghwBExHRJY4BngKXAmWkYAklnAePJJgeMiIjp5RrkJGtmhWrJHBsRTwK7NRCfRTYzYOX4+8BRjdR1KXBpA/G7gLsqbZOTrJkVRvhptWZm+RFtduGXSjnJmlmhqjvFOsmaWYEEdHBP1swsP1WeY51kzaxIbXcx7ko5yZpZYTy7wMwsZ+7JmpnlqLpTrJOsmRXJ82TNzPLjMVkzs5y5J2tmlqPqTrFOsmZWIN/xZWaWsyrPsU6yZlYkoSofMHCSNbNCuSdrZpaTbApXdWdZJ1kzK06FD0hsy5xkzaxQTrJmZjmq9i++qv2OtsJ942unsuUWm9Cvz84rYuf+6AfsuvMO7LHbLhz91f/krbfeKrCF1pSGfocXDv8pe+y2C3v268PhgwYwZ84cAP75jwfYtNt67NmvD3v268Nll1xUVLPbBAE1qnxri5xkc3bi0JO5/Y57PhE78KCDmTrtaSY//iS9em3Pz6/4WUGts0o09Dv8r+/9gMmPP8kjU6cx6NDD+VlJMt3nC/vyyNRpPDJ1Gj8+7/w13dw2R834X5N1ST0l3S/pWUnTJX07xTeUNEHSjPS6QYpL0pWSZkp6UlLfkrqGpvIzJA0tifeT9FQ650o1cV+wk2zOvrDvfmy44YafiB108ABqa7ORmv577sVrdXVFNM0q1NDvcN11113xfsmSd6v+/vs81UgVbxVYCnwvInYE9gLOlLQTcA4wMSJ6ARPTPsAgoFfahgHXQJaUgeHAnkB/YHh9Yk5lhpWcN7Ds56vw52A5GXXTCA4ZOKjoZtgqGP7Tn7DdNj0Zfesf+ekFH/dkH3l4Ev377srgwwfxzPTpBbaw9Wvp4YKImBsRj6X3i4Fnge7AYGBkKjYSODK9HwyMiszDwPqSNgcOASZExMKIWARMAAamY+tGxKSICGBUSV0Nyi3JShohaZ6kp/O6Rlt3xc8upUNtLUOOO77optgquPDiS5n50myGHHs81/7uKgD67NaX5198hUcfe4Izzjybo79a9r8/a9ZggQA2kjSlZBvWaM3S1sBuwCPAphExF7JEDGySinUHZpecVpdi5eJ1DcQblWdP9iaa6Ea3Z7eMGsldd97BTaP+6D8127ijhxzH/479C5ANI3Tt2hWAgYMO5aOPPmLBggVFNq91S/NkK92ABRGxe8l2XYPVSl2BvwDfiYh/l2/Bp8QqxBuVW5KNiH8CC/Oqvy27d/w9/N9fXMGfx46jS5cuRTfHVsHMGTNWvL/zb+PYvvcOALz++utkf0XC5EcfZfny5XTr1q2QNrYVasZWUX3SWmQJ9o8R8dcUfiP9qU96nZfidUDPktN7AHOaiPdoIN6owufJpu7+MICeW25ZcGta3kknHMuD/3iABQsWsO3WPfjp+Rfy8//+GR988AGHDzwYyL78+u3vri24pdaYhn6H99xzFzNeeJ4a1bDlVltx5dXZ72/sX/7M9dddQ22HWtbu3JlRt4z2XyplZGOyLffzSd/03wA8GxG/LDk0DhgKXJ5eby+JnyVpNNmXXG9HxFxJ44HLSr7sGgCcGxELJS2WtBfZMMRJwG/Ltqn+X908pDGROyJi5yaKAtCv3+7xr0em5NYeM1s9++y5O1OnTmmxrLjjf+wWN469v+Lyn++1wdSI2L2x45K+ADwIPAUsT+EfkyXEMcCWwKvAUSlhCriKbGhzCXBKRExJdZ2azgW4NCJuTPHdyYZDOwN3A2dHmURaeE/WzNq5FuzoR8RDZWo8sIHyAZzZSF0jgBENxKcAFXUcwUnWzArm22pXkaRbgUlAb0l1kk7L61pm1nY1c3ZBm5NbTzYijs2rbjOrHm00d1bMwwVmVhjhR4KbmeWnDQ8DVMpJ1swKVeU51knWzApW5VnWSdbMCuRHgpuZ5cpjsmZmOWnOwi9tlZOsmRWryrOsk6yZFcpjsmZmOfKYrJlZXnwzgplZvjxcYGaWk2ztgqJbkS8nWTMrVJXnWCdZMytYlWdZJ1kzK5THZM3McuQxWTOzHFV5jnWSNbOCVXmWze1BimZmTV0z6DAAAAbHSURBVMkWiKn8f03WJ42QNE/S0yWxDSVNkDQjvW6Q4pJ0paSZkp6U1LfknKGp/AxJQ0vi/SQ9lc65UhU8O8dJ1syKI6hpxlaBm4CBK8XOASZGRC9gYtoHGAT0Stsw4BrIkjIwHNgT6A8Mr0/MqcywkvNWvtanOMmaWbHUjK0JEfFPYOFK4cHAyPR+JHBkSXxUZB4G1pe0OXAIMCEiFkbEImACMDAdWzciJkVEAKNK6mqUx2TNrEDNfjLCRpKmlOxfFxHXNXHOphExFyAi5kraJMW7A7NLytWlWLl4XQPxspxkzaxQzZzCtSAidm+pSzcQi1WIl+XhAjMrTHNGClZjEsIb6U990uu8FK8DepaU6wHMaSLeo4F4WU6yZlas/LPsOKB+hsBQ4PaS+ElplsFewNtpWGE8MEDSBukLrwHA+HRssaS90qyCk0rqapSHC8ysUC15W62kW4EDyMZu68hmCVwOjJF0GvAqcFQqfhdwKDATWAKcAhARCyVdDExO5S6KiPov084gm8HQGbg7bWU5yZpZoVryttqIOLaRQwc2UDaAMxupZwQwooH4FGDn5rTJSdbMClXlN3w5yZpZgfz4GTOzvFV3lnWSNbPCiIpvl22znGTNrFAeLjAzy5GfjGBmlqfqzrFOsmZWrCrPsU6yZlYceQqXmVm+PCZrZpan6s6xTrJmVqwqz7FOsmZWLI/JmpnlRIiaKs+yXrTbzCxH7smaWaGqvCPrJGtmxfIULjOzvPhmBDOz/KzmU2jbBCdZMytWlWdZJ1kzK5THZM3McuQxWTOzHFV5jnWSNbNiqcq7sk6yZlYYUf3DBYqIotuwgqT5wCtFt2MN2AhYUHQjrEW1l9/pVhGxcUtVJukesp9dpRZExMCWuv6a0KqSbHshaUpE7F50O6zl+HdqjfECMWZmOXKSNTPLkZNsMa4rugHW4vw7tQZ5TNbMLEfuyZqZ5chJ1swsR06ya5CkgZKelzRT0jlFt8dWn6QRkuZJerrotljr5CS7hkjqAFwNDAJ2Ao6VtFOxrbIWcBPQpibH25rlJLvm9AdmRsSsiPgQGA0MLrhNtpoi4p/AwqLbYa2Xk+ya0x2YXbJfl2JmVsWcZNechpbB8Pw5syrnJLvm1AE9S/Z7AHMKaouZrSFOsmvOZKCXpG0kdQSGAOMKbpOZ5cxJdg2JiKXAWcB44FlgTERML7ZVtrok3QpMAnpLqpN0WtFtstbFt9WameXIPVkzsxw5yZqZ5chJ1swsR06yZmY5cpI1M8uRk2wVkbRM0jRJT0v6k6Quq1HXAZLuSO+PKLdqmKT1JX1zFa5xgaTvVxpfqcxNkr7ajGtt7ZWyrAhOstXlvYjoExE7Ax8Cp5ceVKbZv/OIGBcRl5cpsj7Q7CRr1h44yVavB4HtUg/uWUm/Ax4DekoaIGmSpMdSj7crrFjv9jlJDwFfqa9I0smSrkrvN5U0VtITadsbuBzYNvWif57K/UDSZElPSrqwpK6fpDV17wN6N/UhJH091fOEpL+s1Ds/SNKDkl6QdHgq30HSz0uu/Y3V/UGarQ4n2SokqZZs3dqnUqg3MCoidgPeBc4DDoqIvsAU4LuS1gauB74M7Ats1kj1VwL/iIhdgb7AdOAc4MXUi/6BpAFAL7LlHfsA/STtJ6kf2e3Eu5El8T0q+Dh/jYg90vWeBUrvqNoa2B84DLg2fYbTgLcjYo9U/9clbVPBdcxyUVt0A6xFdZY0Lb1/ELgB2AJ4JSIeTvG9yBYN/5ckgI5kt4XuALwUETMAJN0CDGvgGl8CTgKIiGXA25I2WKnMgLQ9nva7kiXddYCxEbEkXaOStRt2lnQJ2ZBEV7LbkuuNiYjlwAxJs9JnGADsUjJeu1669gsVXMusxTnJVpf3IqJPaSAl0ndLQ8CEiDh2pXJ9aLmlFwX8LCJ+v9I1vrMK17gJODIinpB0MnBAybGV64p07bMjojQZI2nrZl7XrEV4uKD9eRjYR9J2AJK6SNoeeA7YRtK2qdyxjZw/ETgjndtB0rrAYrJear3xwKklY73dJW0C/BP4T0mdJa1DNjTRlHWAuZLWAo5f6dhRkmpSmz8LPJ+ufUYqj6TtJX2mguuY5cI92XYmIuanHuGtkjql8HkR8YKkYcCdkhYADwE7N1DFt4Hr0mpTy4AzImKSpH+lKVJ3p3HZHYFJqSf9DnBCRDwm6TZgGvAK2ZBGU34KPJLKP8Unk/nzwD+ATYHTI+J9SX8gG6t9TNnF5wNHVvbTMWt5XoXLzCxHHi4wM8uRk6yZWY6cZM3McuQka2aWIydZM7McOcmameXISdbMLEf/H+x1hyCHgngWAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "lr = LogisticRegression(C = best_c, penalty = 'l1')\n", + "lr.fit(X_train_undersample,y_train_undersample.values.ravel())\n", + "# 代码和上面大致相同,唯一不同的,是这里我们使用的是真实比例\n", + "y_pred = lr.predict(X_test.values)\n", + " \n", + "cnf_matrix = confusion_matrix(y_test,y_pred)\n", + "np.set_printoptions(precision=2)\n", + " \n", + "print(\"召回率: \", cnf_matrix[1,1]/(cnf_matrix[1,0]+cnf_matrix[1,1]))\n", + " \n", + "class_names = [0,1]\n", + "plt.figure()\n", + "plot_confusion_matrix(cnf_matrix\n", + " , classes=class_names\n", + " , title='Confusion matrix')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "第一眼看到召回率92.8%貌似不错,但是右上角9433,表示有这么多正常的人被预测为异常,误判了这么多人。" ] }, {