|
|
@ -334,9 +334,374 @@
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
|
|
|
|
|
"### 如何确定字段需要处理\n",
|
|
|
|
"我们需要解决一些异常值,如某值相对其它值过大的离群点"
|
|
|
|
"我们需要解决一些异常值,如某值相对其它值过大的离群点"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
},
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
|
|
"execution_count": 5,
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
|
|
"text/html": [
|
|
|
|
|
|
|
|
"<div>\n",
|
|
|
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
|
|
|
" }\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
|
|
|
" }\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
|
|
|
" }\n",
|
|
|
|
|
|
|
|
"</style>\n",
|
|
|
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
|
|
|
" <th>A1</th>\n",
|
|
|
|
|
|
|
|
" <th>A2</th>\n",
|
|
|
|
|
|
|
|
" <th>A3</th>\n",
|
|
|
|
|
|
|
|
" <th>A4</th>\n",
|
|
|
|
|
|
|
|
" <th>A6</th>\n",
|
|
|
|
|
|
|
|
" <th>A8</th>\n",
|
|
|
|
|
|
|
|
" <th>A10</th>\n",
|
|
|
|
|
|
|
|
" <th>A12</th>\n",
|
|
|
|
|
|
|
|
" <th>A13</th>\n",
|
|
|
|
|
|
|
|
" <th>A15</th>\n",
|
|
|
|
|
|
|
|
" <th>A17</th>\n",
|
|
|
|
|
|
|
|
" <th>A18</th>\n",
|
|
|
|
|
|
|
|
" <th>A19</th>\n",
|
|
|
|
|
|
|
|
" <th>A21</th>\n",
|
|
|
|
|
|
|
|
" <th>A22</th>\n",
|
|
|
|
|
|
|
|
" <th>A23</th>\n",
|
|
|
|
|
|
|
|
" <th>A27</th>\n",
|
|
|
|
|
|
|
|
" <th>B1</th>\n",
|
|
|
|
|
|
|
|
" <th>B2</th>\n",
|
|
|
|
|
|
|
|
" <th>B3</th>\n",
|
|
|
|
|
|
|
|
" <th>B6</th>\n",
|
|
|
|
|
|
|
|
" <th>B8</th>\n",
|
|
|
|
|
|
|
|
" <th>B12</th>\n",
|
|
|
|
|
|
|
|
" <th>B13</th>\n",
|
|
|
|
|
|
|
|
" <th>B14</th>\n",
|
|
|
|
|
|
|
|
" <th>收率</th>\n",
|
|
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
|
|
" <th>count</th>\n",
|
|
|
|
|
|
|
|
" <td>1396.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>42.0</td>\n",
|
|
|
|
|
|
|
|
" <td>1354.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1396.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1396.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>149.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1396.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1396.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1396.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1396.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1396.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1396.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1396.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1393.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1396.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1393.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1396.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1386.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1394.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1394.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1396.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1395.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1395.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1395.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1396.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1396.000000</td>\n",
|
|
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
|
|
" <th>mean</th>\n",
|
|
|
|
|
|
|
|
" <td>298.853868</td>\n",
|
|
|
|
|
|
|
|
" <td>125.0</td>\n",
|
|
|
|
|
|
|
|
" <td>403.515510</td>\n",
|
|
|
|
|
|
|
|
" <td>705.974212</td>\n",
|
|
|
|
|
|
|
|
" <td>28.287751</td>\n",
|
|
|
|
|
|
|
|
" <td>78.818792</td>\n",
|
|
|
|
|
|
|
|
" <td>100.861032</td>\n",
|
|
|
|
|
|
|
|
" <td>102.641834</td>\n",
|
|
|
|
|
|
|
|
" <td>0.199907</td>\n",
|
|
|
|
|
|
|
|
" <td>103.829370</td>\n",
|
|
|
|
|
|
|
|
" <td>104.766905</td>\n",
|
|
|
|
|
|
|
|
" <td>0.199928</td>\n",
|
|
|
|
|
|
|
|
" <td>231.067335</td>\n",
|
|
|
|
|
|
|
|
" <td>48.707825</td>\n",
|
|
|
|
|
|
|
|
" <td>9.117120</td>\n",
|
|
|
|
|
|
|
|
" <td>5.002872</td>\n",
|
|
|
|
|
|
|
|
" <td>74.396848</td>\n",
|
|
|
|
|
|
|
|
" <td>334.452742</td>\n",
|
|
|
|
|
|
|
|
" <td>3.454412</td>\n",
|
|
|
|
|
|
|
|
" <td>3.500072</td>\n",
|
|
|
|
|
|
|
|
" <td>72.065186</td>\n",
|
|
|
|
|
|
|
|
" <td>43.709677</td>\n",
|
|
|
|
|
|
|
|
" <td>1020.215054</td>\n",
|
|
|
|
|
|
|
|
" <td>0.149419</td>\n",
|
|
|
|
|
|
|
|
" <td>410.403295</td>\n",
|
|
|
|
|
|
|
|
" <td>0.923244</td>\n",
|
|
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
|
|
" <th>std</th>\n",
|
|
|
|
|
|
|
|
" <td>10.130552</td>\n",
|
|
|
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
|
|
|
" <td>13.348093</td>\n",
|
|
|
|
|
|
|
|
" <td>53.214754</td>\n",
|
|
|
|
|
|
|
|
" <td>6.742765</td>\n",
|
|
|
|
|
|
|
|
" <td>2.683920</td>\n",
|
|
|
|
|
|
|
|
" <td>0.905198</td>\n",
|
|
|
|
|
|
|
|
" <td>0.915387</td>\n",
|
|
|
|
|
|
|
|
" <td>0.002524</td>\n",
|
|
|
|
|
|
|
|
" <td>0.963639</td>\n",
|
|
|
|
|
|
|
|
" <td>1.401446</td>\n",
|
|
|
|
|
|
|
|
" <td>0.002676</td>\n",
|
|
|
|
|
|
|
|
" <td>50.478071</td>\n",
|
|
|
|
|
|
|
|
" <td>4.976531</td>\n",
|
|
|
|
|
|
|
|
" <td>0.369152</td>\n",
|
|
|
|
|
|
|
|
" <td>0.136638</td>\n",
|
|
|
|
|
|
|
|
" <td>3.044490</td>\n",
|
|
|
|
|
|
|
|
" <td>105.120753</td>\n",
|
|
|
|
|
|
|
|
" <td>0.388585</td>\n",
|
|
|
|
|
|
|
|
" <td>0.002678</td>\n",
|
|
|
|
|
|
|
|
" <td>9.161986</td>\n",
|
|
|
|
|
|
|
|
" <td>4.338396</td>\n",
|
|
|
|
|
|
|
|
" <td>205.920155</td>\n",
|
|
|
|
|
|
|
|
" <td>0.008213</td>\n",
|
|
|
|
|
|
|
|
" <td>26.018410</td>\n",
|
|
|
|
|
|
|
|
" <td>0.030880</td>\n",
|
|
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
|
|
" <th>min</th>\n",
|
|
|
|
|
|
|
|
" <td>200.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>125.0</td>\n",
|
|
|
|
|
|
|
|
" <td>270.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>470.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>17.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>70.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>100.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>98.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>0.120000</td>\n",
|
|
|
|
|
|
|
|
" <td>100.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>89.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>0.100000</td>\n",
|
|
|
|
|
|
|
|
" <td>100.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>20.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>3.500000</td>\n",
|
|
|
|
|
|
|
|
" <td>4.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>45.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>3.500000</td>\n",
|
|
|
|
|
|
|
|
" <td>0.150000</td>\n",
|
|
|
|
|
|
|
|
" <td>3.500000</td>\n",
|
|
|
|
|
|
|
|
" <td>40.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>20.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>400.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>0.030000</td>\n",
|
|
|
|
|
|
|
|
" <td>40.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>0.624000</td>\n",
|
|
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
|
|
" <th>25%</th>\n",
|
|
|
|
|
|
|
|
" <td>300.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>125.0</td>\n",
|
|
|
|
|
|
|
|
" <td>405.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>700.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>24.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>80.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>100.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>102.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>0.200000</td>\n",
|
|
|
|
|
|
|
|
" <td>103.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>104.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>0.200000</td>\n",
|
|
|
|
|
|
|
|
" <td>200.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>50.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>9.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>5.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>73.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>320.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>3.500000</td>\n",
|
|
|
|
|
|
|
|
" <td>3.500000</td>\n",
|
|
|
|
|
|
|
|
" <td>65.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>45.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>800.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>0.150000</td>\n",
|
|
|
|
|
|
|
|
" <td>400.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>0.902000</td>\n",
|
|
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
|
|
" <th>50%</th>\n",
|
|
|
|
|
|
|
|
" <td>300.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>125.0</td>\n",
|
|
|
|
|
|
|
|
" <td>405.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>700.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>29.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>80.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>101.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>103.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>0.200000</td>\n",
|
|
|
|
|
|
|
|
" <td>104.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>105.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>0.200000</td>\n",
|
|
|
|
|
|
|
|
" <td>200.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>50.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>9.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>5.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>73.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>320.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>3.500000</td>\n",
|
|
|
|
|
|
|
|
" <td>3.500000</td>\n",
|
|
|
|
|
|
|
|
" <td>78.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>45.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1200.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>0.150000</td>\n",
|
|
|
|
|
|
|
|
" <td>400.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>0.925000</td>\n",
|
|
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
|
|
" <th>75%</th>\n",
|
|
|
|
|
|
|
|
" <td>300.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>125.0</td>\n",
|
|
|
|
|
|
|
|
" <td>405.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>700.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>30.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>80.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>102.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>103.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>0.200000</td>\n",
|
|
|
|
|
|
|
|
" <td>104.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>105.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>0.200000</td>\n",
|
|
|
|
|
|
|
|
" <td>300.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>50.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>9.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>5.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>77.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>330.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>3.500000</td>\n",
|
|
|
|
|
|
|
|
" <td>3.500000</td>\n",
|
|
|
|
|
|
|
|
" <td>80.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>45.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1200.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>0.150000</td>\n",
|
|
|
|
|
|
|
|
" <td>420.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>0.943000</td>\n",
|
|
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
|
|
" <th>max</th>\n",
|
|
|
|
|
|
|
|
" <td>300.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>125.0</td>\n",
|
|
|
|
|
|
|
|
" <td>405.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>980.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>97.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>82.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>103.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>107.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>0.200000</td>\n",
|
|
|
|
|
|
|
|
" <td>109.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>108.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>0.200000</td>\n",
|
|
|
|
|
|
|
|
" <td>350.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>90.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>10.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>10.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>80.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1200.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>3.600000</td>\n",
|
|
|
|
|
|
|
|
" <td>3.600000</td>\n",
|
|
|
|
|
|
|
|
" <td>80.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>73.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1200.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>0.150000</td>\n",
|
|
|
|
|
|
|
|
" <td>460.000000</td>\n",
|
|
|
|
|
|
|
|
" <td>1.000800</td>\n",
|
|
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
|
|
|
"</table>\n",
|
|
|
|
|
|
|
|
"</div>"
|
|
|
|
|
|
|
|
],
|
|
|
|
|
|
|
|
"text/plain": [
|
|
|
|
|
|
|
|
" A1 A2 A3 A4 A6 A8 \\\n",
|
|
|
|
|
|
|
|
"count 1396.000000 42.0 1354.000000 1396.000000 1396.000000 149.000000 \n",
|
|
|
|
|
|
|
|
"mean 298.853868 125.0 403.515510 705.974212 28.287751 78.818792 \n",
|
|
|
|
|
|
|
|
"std 10.130552 0.0 13.348093 53.214754 6.742765 2.683920 \n",
|
|
|
|
|
|
|
|
"min 200.000000 125.0 270.000000 470.000000 17.000000 70.000000 \n",
|
|
|
|
|
|
|
|
"25% 300.000000 125.0 405.000000 700.000000 24.000000 80.000000 \n",
|
|
|
|
|
|
|
|
"50% 300.000000 125.0 405.000000 700.000000 29.000000 80.000000 \n",
|
|
|
|
|
|
|
|
"75% 300.000000 125.0 405.000000 700.000000 30.000000 80.000000 \n",
|
|
|
|
|
|
|
|
"max 300.000000 125.0 405.000000 980.000000 97.000000 82.000000 \n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
" A10 A12 A13 A15 A17 \\\n",
|
|
|
|
|
|
|
|
"count 1396.000000 1396.000000 1396.000000 1396.000000 1396.000000 \n",
|
|
|
|
|
|
|
|
"mean 100.861032 102.641834 0.199907 103.829370 104.766905 \n",
|
|
|
|
|
|
|
|
"std 0.905198 0.915387 0.002524 0.963639 1.401446 \n",
|
|
|
|
|
|
|
|
"min 100.000000 98.000000 0.120000 100.000000 89.000000 \n",
|
|
|
|
|
|
|
|
"25% 100.000000 102.000000 0.200000 103.000000 104.000000 \n",
|
|
|
|
|
|
|
|
"50% 101.000000 103.000000 0.200000 104.000000 105.000000 \n",
|
|
|
|
|
|
|
|
"75% 102.000000 103.000000 0.200000 104.000000 105.000000 \n",
|
|
|
|
|
|
|
|
"max 103.000000 107.000000 0.200000 109.000000 108.000000 \n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
" A18 A19 A21 A22 A23 \\\n",
|
|
|
|
|
|
|
|
"count 1396.000000 1396.000000 1393.000000 1396.000000 1393.000000 \n",
|
|
|
|
|
|
|
|
"mean 0.199928 231.067335 48.707825 9.117120 5.002872 \n",
|
|
|
|
|
|
|
|
"std 0.002676 50.478071 4.976531 0.369152 0.136638 \n",
|
|
|
|
|
|
|
|
"min 0.100000 100.000000 20.000000 3.500000 4.000000 \n",
|
|
|
|
|
|
|
|
"25% 0.200000 200.000000 50.000000 9.000000 5.000000 \n",
|
|
|
|
|
|
|
|
"50% 0.200000 200.000000 50.000000 9.000000 5.000000 \n",
|
|
|
|
|
|
|
|
"75% 0.200000 300.000000 50.000000 9.000000 5.000000 \n",
|
|
|
|
|
|
|
|
"max 0.200000 350.000000 90.000000 10.000000 10.000000 \n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
" A27 B1 B2 B3 B6 \\\n",
|
|
|
|
|
|
|
|
"count 1396.000000 1386.000000 1394.000000 1394.000000 1396.000000 \n",
|
|
|
|
|
|
|
|
"mean 74.396848 334.452742 3.454412 3.500072 72.065186 \n",
|
|
|
|
|
|
|
|
"std 3.044490 105.120753 0.388585 0.002678 9.161986 \n",
|
|
|
|
|
|
|
|
"min 45.000000 3.500000 0.150000 3.500000 40.000000 \n",
|
|
|
|
|
|
|
|
"25% 73.000000 320.000000 3.500000 3.500000 65.000000 \n",
|
|
|
|
|
|
|
|
"50% 73.000000 320.000000 3.500000 3.500000 78.000000 \n",
|
|
|
|
|
|
|
|
"75% 77.000000 330.000000 3.500000 3.500000 80.000000 \n",
|
|
|
|
|
|
|
|
"max 80.000000 1200.000000 3.600000 3.600000 80.000000 \n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
" B8 B12 B13 B14 收率 \n",
|
|
|
|
|
|
|
|
"count 1395.000000 1395.000000 1395.000000 1396.000000 1396.000000 \n",
|
|
|
|
|
|
|
|
"mean 43.709677 1020.215054 0.149419 410.403295 0.923244 \n",
|
|
|
|
|
|
|
|
"std 4.338396 205.920155 0.008213 26.018410 0.030880 \n",
|
|
|
|
|
|
|
|
"min 20.000000 400.000000 0.030000 40.000000 0.624000 \n",
|
|
|
|
|
|
|
|
"25% 45.000000 800.000000 0.150000 400.000000 0.902000 \n",
|
|
|
|
|
|
|
|
"50% 45.000000 1200.000000 0.150000 400.000000 0.925000 \n",
|
|
|
|
|
|
|
|
"75% 45.000000 1200.000000 0.150000 420.000000 0.943000 \n",
|
|
|
|
|
|
|
|
"max 73.000000 1200.000000 0.150000 460.000000 1.000800 "
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
"execution_count": 5,
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
],
|
|
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
|
|
"# pd.set_option('display.max_rows',100)#设置最大可见100行\n",
|
|
|
|
|
|
|
|
"pd.set_option('display.max_columns',100) #给最大列设置为100列\n",
|
|
|
|
|
|
|
|
"df_trn.describe()"
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
|
|
"**观测点:**\n",
|
|
|
|
|
|
|
|
"<ul>\n",
|
|
|
|
|
|
|
|
" <li>A5、A9等字段的describe没有了,而head()是有的,说明这些字段有问题\n",
|
|
|
|
|
|
|
|
" <li>理论上,std(方差也可以)越大表明特征间的差异越大,这样模型能学到区分性,但是过大可能是数据有离群值,B1、B12是需要关注的,再看其它值,B1里面最小值是3.5,25%/50%/75%都是320,3.5非常离群,而B12里最小值和中位数和最大值像是递进。"
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 5,
|
|
|
|
"execution_count": 5,
|
|
|
|