From 70973c2a7e83992d4720801a91a91eac2d3d14e2 Mon Sep 17 00:00:00 2001 From: benjas <909336740@qq.com> Date: Wed, 20 Jan 2021 10:57:02 +0800 Subject: [PATCH] Add. Data StandardScaler --- .../逻辑回归-信用卡欺诈检测.ipynb | 253 ++++++++++++++++-- 1 file changed, 238 insertions(+), 15 deletions(-) diff --git a/机器学习竞赛实战_优胜解决方案/信用卡欺诈检测/逻辑回归-信用卡欺诈检测.ipynb b/机器学习竞赛实战_优胜解决方案/信用卡欺诈检测/逻辑回归-信用卡欺诈检测.ipynb index b2653f6..95aff30 100644 --- a/机器学习竞赛实战_优胜解决方案/信用卡欺诈检测/逻辑回归-信用卡欺诈检测.ipynb +++ b/机器学习竞赛实战_优胜解决方案/信用卡欺诈检测/逻辑回归-信用卡欺诈检测.ipynb @@ -47,30 +47,22 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "D:\\Anaconda3\\lib\\importlib\\_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject\n", - " return f(*args, **kwds)\n" - ] - } - ], + "outputs": [], "source": [ "# 导入工具包\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", - "%matplotlib inline # 把图轻松的镶嵌到这个notebook中" + "# 把图轻松的镶嵌到这个notebook中\n", + "%matplotlib inline" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -268,7 +260,7 @@ "[5 rows x 31 columns]" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -289,7 +281,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -341,6 +333,237 @@ "* 第二种方式则会减少真实数据,使得模型可学的数据变少,能力也会减弱。" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 数据标准化处理\n", + "\n", + "上面Amount列的值还是原值,相比其它列的值过大,会导致模型结果出现偏差,认为Amount列是非常重要的,具体可参考前面讲过的回归分析章节,需要对其标准化,大的值在区间内依然是大的,小的值在区间内依然是小的,可以理解为一种缩放。" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | V1 | \n", + "V2 | \n", + "V3 | \n", + "V4 | \n", + "V5 | \n", + "V6 | \n", + "V7 | \n", + "V8 | \n", + "V9 | \n", + "V10 | \n", + "... | \n", + "V21 | \n", + "V22 | \n", + "V23 | \n", + "V24 | \n", + "V25 | \n", + "V26 | \n", + "V27 | \n", + "V28 | \n", + "Class | \n", + "normAmount | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "-1.359807 | \n", + "-0.072781 | \n", + "2.536347 | \n", + "1.378155 | \n", + "-0.338321 | \n", + "0.462388 | \n", + "0.239599 | \n", + "0.098698 | \n", + "0.363787 | \n", + "0.090794 | \n", + "... | \n", + "-0.018307 | \n", + "0.277838 | \n", + "-0.110474 | \n", + "0.066928 | \n", + "0.128539 | \n", + "-0.189115 | \n", + "0.133558 | \n", + "-0.021053 | \n", + "0 | \n", + "0.244964 | \n", + "
1 | \n", + "1.191857 | \n", + "0.266151 | \n", + "0.166480 | \n", + "0.448154 | \n", + "0.060018 | \n", + "-0.082361 | \n", + "-0.078803 | \n", + "0.085102 | \n", + "-0.255425 | \n", + "-0.166974 | \n", + "... | \n", + "-0.225775 | \n", + "-0.638672 | \n", + "0.101288 | \n", + "-0.339846 | \n", + "0.167170 | \n", + "0.125895 | \n", + "-0.008983 | \n", + "0.014724 | \n", + "0 | \n", + "-0.342475 | \n", + "
2 | \n", + "-1.358354 | \n", + "-1.340163 | \n", + "1.773209 | \n", + "0.379780 | \n", + "-0.503198 | \n", + "1.800499 | \n", + "0.791461 | \n", + "0.247676 | \n", + "-1.514654 | \n", + "0.207643 | \n", + "... | \n", + "0.247998 | \n", + "0.771679 | \n", + "0.909412 | \n", + "-0.689281 | \n", + "-0.327642 | \n", + "-0.139097 | \n", + "-0.055353 | \n", + "-0.059752 | \n", + "0 | \n", + "1.160686 | \n", + "
3 | \n", + "-0.966272 | \n", + "-0.185226 | \n", + "1.792993 | \n", + "-0.863291 | \n", + "-0.010309 | \n", + "1.247203 | \n", + "0.237609 | \n", + "0.377436 | \n", + "-1.387024 | \n", + "-0.054952 | \n", + "... | \n", + "-0.108300 | \n", + "0.005274 | \n", + "-0.190321 | \n", + "-1.175575 | \n", + "0.647376 | \n", + "-0.221929 | \n", + "0.062723 | \n", + "0.061458 | \n", + "0 | \n", + "0.140534 | \n", + "
4 | \n", + "-1.158233 | \n", + "0.877737 | \n", + "1.548718 | \n", + "0.403034 | \n", + "-0.407193 | \n", + "0.095921 | \n", + "0.592941 | \n", + "-0.270533 | \n", + "0.817739 | \n", + "0.753074 | \n", + "... | \n", + "-0.009431 | \n", + "0.798278 | \n", + "-0.137458 | \n", + "0.141267 | \n", + "-0.206010 | \n", + "0.502292 | \n", + "0.219422 | \n", + "0.215153 | \n", + "0 | \n", + "-0.073403 | \n", + "
5 rows × 30 columns
\n", + "