diff --git a/机器学习竞赛实战_优胜解决方案/工业化工生产预测/.ipynb_checkpoints/工业生产预测-checkpoint.ipynb b/机器学习竞赛实战_优胜解决方案/工业化工生产预测/.ipynb_checkpoints/工业生产预测-checkpoint.ipynb index de8a0a6..d2d70e4 100644 --- a/机器学习竞赛实战_优胜解决方案/工业化工生产预测/.ipynb_checkpoints/工业生产预测-checkpoint.ipynb +++ b/机器学习竞赛实战_优胜解决方案/工业化工生产预测/.ipynb_checkpoints/工业生产预测-checkpoint.ipynb @@ -334,9 +334,374 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "### 如何确定字段需要处理\n", "我们需要解决一些异常值,如某值相对其它值过大的离群点" ] }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A1A2A3A4A6A8A10A12A13A15A17A18A19A21A22A23A27B1B2B3B6B8B12B13B14收率
count1396.00000042.01354.0000001396.0000001396.000000149.0000001396.0000001396.0000001396.0000001396.0000001396.0000001396.0000001396.0000001393.0000001396.0000001393.0000001396.0000001386.0000001394.0000001394.0000001396.0000001395.0000001395.0000001395.0000001396.0000001396.000000
mean298.853868125.0403.515510705.97421228.28775178.818792100.861032102.6418340.199907103.829370104.7669050.199928231.06733548.7078259.1171205.00287274.396848334.4527423.4544123.50007272.06518643.7096771020.2150540.149419410.4032950.923244
std10.1305520.013.34809353.2147546.7427652.6839200.9051980.9153870.0025240.9636391.4014460.00267650.4780714.9765310.3691520.1366383.044490105.1207530.3885850.0026789.1619864.338396205.9201550.00821326.0184100.030880
min200.000000125.0270.000000470.00000017.00000070.000000100.00000098.0000000.120000100.00000089.0000000.100000100.00000020.0000003.5000004.00000045.0000003.5000000.1500003.50000040.00000020.000000400.0000000.03000040.0000000.624000
25%300.000000125.0405.000000700.00000024.00000080.000000100.000000102.0000000.200000103.000000104.0000000.200000200.00000050.0000009.0000005.00000073.000000320.0000003.5000003.50000065.00000045.000000800.0000000.150000400.0000000.902000
50%300.000000125.0405.000000700.00000029.00000080.000000101.000000103.0000000.200000104.000000105.0000000.200000200.00000050.0000009.0000005.00000073.000000320.0000003.5000003.50000078.00000045.0000001200.0000000.150000400.0000000.925000
75%300.000000125.0405.000000700.00000030.00000080.000000102.000000103.0000000.200000104.000000105.0000000.200000300.00000050.0000009.0000005.00000077.000000330.0000003.5000003.50000080.00000045.0000001200.0000000.150000420.0000000.943000
max300.000000125.0405.000000980.00000097.00000082.000000103.000000107.0000000.200000109.000000108.0000000.200000350.00000090.00000010.00000010.00000080.0000001200.0000003.6000003.60000080.00000073.0000001200.0000000.150000460.0000001.000800
\n", + "
" + ], + "text/plain": [ + " A1 A2 A3 A4 A6 A8 \\\n", + "count 1396.000000 42.0 1354.000000 1396.000000 1396.000000 149.000000 \n", + "mean 298.853868 125.0 403.515510 705.974212 28.287751 78.818792 \n", + "std 10.130552 0.0 13.348093 53.214754 6.742765 2.683920 \n", + "min 200.000000 125.0 270.000000 470.000000 17.000000 70.000000 \n", + "25% 300.000000 125.0 405.000000 700.000000 24.000000 80.000000 \n", + "50% 300.000000 125.0 405.000000 700.000000 29.000000 80.000000 \n", + "75% 300.000000 125.0 405.000000 700.000000 30.000000 80.000000 \n", + "max 300.000000 125.0 405.000000 980.000000 97.000000 82.000000 \n", + "\n", + " A10 A12 A13 A15 A17 \\\n", + "count 1396.000000 1396.000000 1396.000000 1396.000000 1396.000000 \n", + "mean 100.861032 102.641834 0.199907 103.829370 104.766905 \n", + "std 0.905198 0.915387 0.002524 0.963639 1.401446 \n", + "min 100.000000 98.000000 0.120000 100.000000 89.000000 \n", + "25% 100.000000 102.000000 0.200000 103.000000 104.000000 \n", + "50% 101.000000 103.000000 0.200000 104.000000 105.000000 \n", + "75% 102.000000 103.000000 0.200000 104.000000 105.000000 \n", + "max 103.000000 107.000000 0.200000 109.000000 108.000000 \n", + "\n", + " A18 A19 A21 A22 A23 \\\n", + "count 1396.000000 1396.000000 1393.000000 1396.000000 1393.000000 \n", + "mean 0.199928 231.067335 48.707825 9.117120 5.002872 \n", + "std 0.002676 50.478071 4.976531 0.369152 0.136638 \n", + "min 0.100000 100.000000 20.000000 3.500000 4.000000 \n", + "25% 0.200000 200.000000 50.000000 9.000000 5.000000 \n", + "50% 0.200000 200.000000 50.000000 9.000000 5.000000 \n", + "75% 0.200000 300.000000 50.000000 9.000000 5.000000 \n", + "max 0.200000 350.000000 90.000000 10.000000 10.000000 \n", + "\n", + " A27 B1 B2 B3 B6 \\\n", + "count 1396.000000 1386.000000 1394.000000 1394.000000 1396.000000 \n", + "mean 74.396848 334.452742 3.454412 3.500072 72.065186 \n", + "std 3.044490 105.120753 0.388585 0.002678 9.161986 \n", + "min 45.000000 3.500000 0.150000 3.500000 40.000000 \n", + "25% 73.000000 320.000000 3.500000 3.500000 65.000000 \n", + "50% 73.000000 320.000000 3.500000 3.500000 78.000000 \n", + "75% 77.000000 330.000000 3.500000 3.500000 80.000000 \n", + "max 80.000000 1200.000000 3.600000 3.600000 80.000000 \n", + "\n", + " B8 B12 B13 B14 收率 \n", + "count 1395.000000 1395.000000 1395.000000 1396.000000 1396.000000 \n", + "mean 43.709677 1020.215054 0.149419 410.403295 0.923244 \n", + "std 4.338396 205.920155 0.008213 26.018410 0.030880 \n", + "min 20.000000 400.000000 0.030000 40.000000 0.624000 \n", + "25% 45.000000 800.000000 0.150000 400.000000 0.902000 \n", + "50% 45.000000 1200.000000 0.150000 400.000000 0.925000 \n", + "75% 45.000000 1200.000000 0.150000 420.000000 0.943000 \n", + "max 73.000000 1200.000000 0.150000 460.000000 1.000800 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# pd.set_option('display.max_rows',100)#设置最大可见100行\n", + "pd.set_option('display.max_columns',100) #给最大列设置为100列\n", + "df_trn.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**观测点:**\n", + "