diff --git a/机器学习竞赛实战_优胜解决方案/特征工程建模/Untitled.ipynb b/机器学习竞赛实战_优胜解决方案/特征工程建模/Untitled.ipynb deleted file mode 100644 index 7d7b3c0..0000000 --- a/机器学习竞赛实战_优胜解决方案/特征工程建模/Untitled.ipynb +++ /dev/null @@ -1,908 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import xgboost as xgb\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.metrics import roc_auc_score\n", - "from featexp import univariate_plotter # pip install featexp\n", - "from featexp import get_univariate_plots\n", - "from featexp import get_trend_stats" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " | SK_ID_CURR | \n", - "TARGET | \n", - "NAME_CONTRACT_TYPE | \n", - "CODE_GENDER | \n", - "FLAG_OWN_CAR | \n", - "FLAG_OWN_REALTY | \n", - "CNT_CHILDREN | \n", - "AMT_INCOME_TOTAL | \n", - "AMT_CREDIT | \n", - "AMT_ANNUITY | \n", - "AMT_GOODS_PRICE | \n", - "NAME_TYPE_SUITE | \n", - "NAME_INCOME_TYPE | \n", - "NAME_EDUCATION_TYPE | \n", - "NAME_FAMILY_STATUS | \n", - "NAME_HOUSING_TYPE | \n", - "REGION_POPULATION_RELATIVE | \n", - "DAYS_BIRTH | \n", - "DAYS_EMPLOYED | \n", - "DAYS_REGISTRATION | \n", - "DAYS_ID_PUBLISH | \n", - "OWN_CAR_AGE | \n", - "FLAG_MOBIL | \n", - "FLAG_EMP_PHONE | \n", - "FLAG_WORK_PHONE | \n", - "FLAG_CONT_MOBILE | \n", - "FLAG_PHONE | \n", - "FLAG_EMAIL | \n", - "OCCUPATION_TYPE | \n", - "CNT_FAM_MEMBERS | \n", - "REGION_RATING_CLIENT | \n", - "REGION_RATING_CLIENT_W_CITY | \n", - "WEEKDAY_APPR_PROCESS_START | \n", - "HOUR_APPR_PROCESS_START | \n", - "REG_REGION_NOT_LIVE_REGION | \n", - "REG_REGION_NOT_WORK_REGION | \n", - "LIVE_REGION_NOT_WORK_REGION | \n", - "REG_CITY_NOT_LIVE_CITY | \n", - "REG_CITY_NOT_WORK_CITY | \n", - "LIVE_CITY_NOT_WORK_CITY | \n", - "ORGANIZATION_TYPE | \n", - "EXT_SOURCE_1 | \n", - "EXT_SOURCE_2 | \n", - "EXT_SOURCE_3 | \n", - "APARTMENTS_AVG | \n", - "BASEMENTAREA_AVG | \n", - "YEARS_BEGINEXPLUATATION_AVG | \n", - "YEARS_BUILD_AVG | \n", - "COMMONAREA_AVG | \n", - "ELEVATORS_AVG | \n", - "ENTRANCES_AVG | \n", - "FLOORSMAX_AVG | \n", - "FLOORSMIN_AVG | \n", - "LANDAREA_AVG | \n", - "LIVINGAPARTMENTS_AVG | \n", - "LIVINGAREA_AVG | \n", - "NONLIVINGAPARTMENTS_AVG | \n", - "NONLIVINGAREA_AVG | \n", - "APARTMENTS_MODE | \n", - "BASEMENTAREA_MODE | \n", - "... | \n", - "ELEVATORS_MODE | \n", - "ENTRANCES_MODE | \n", - "FLOORSMAX_MODE | \n", - "FLOORSMIN_MODE | \n", - "LANDAREA_MODE | \n", - "LIVINGAPARTMENTS_MODE | \n", - "LIVINGAREA_MODE | \n", - "NONLIVINGAPARTMENTS_MODE | \n", - "NONLIVINGAREA_MODE | \n", - "APARTMENTS_MEDI | \n", - "BASEMENTAREA_MEDI | \n", - "YEARS_BEGINEXPLUATATION_MEDI | \n", - "YEARS_BUILD_MEDI | \n", - "COMMONAREA_MEDI | \n", - "ELEVATORS_MEDI | \n", - "ENTRANCES_MEDI | \n", - "FLOORSMAX_MEDI | \n", - "FLOORSMIN_MEDI | \n", - "LANDAREA_MEDI | \n", - "LIVINGAPARTMENTS_MEDI | \n", - "LIVINGAREA_MEDI | \n", - "NONLIVINGAPARTMENTS_MEDI | \n", - "NONLIVINGAREA_MEDI | \n", - "FONDKAPREMONT_MODE | \n", - "HOUSETYPE_MODE | \n", - "TOTALAREA_MODE | \n", - "WALLSMATERIAL_MODE | \n", - "EMERGENCYSTATE_MODE | \n", - "OBS_30_CNT_SOCIAL_CIRCLE | \n", - "DEF_30_CNT_SOCIAL_CIRCLE | \n", - "OBS_60_CNT_SOCIAL_CIRCLE | \n", - "DEF_60_CNT_SOCIAL_CIRCLE | \n", - "DAYS_LAST_PHONE_CHANGE | \n", - "FLAG_DOCUMENT_2 | \n", - "FLAG_DOCUMENT_3 | \n", - "FLAG_DOCUMENT_4 | \n", - "FLAG_DOCUMENT_5 | \n", - "FLAG_DOCUMENT_6 | \n", - "FLAG_DOCUMENT_7 | \n", - "FLAG_DOCUMENT_8 | \n", - "FLAG_DOCUMENT_9 | \n", - "FLAG_DOCUMENT_10 | \n", - "FLAG_DOCUMENT_11 | \n", - "FLAG_DOCUMENT_12 | \n", - "FLAG_DOCUMENT_13 | \n", - "FLAG_DOCUMENT_14 | \n", - "FLAG_DOCUMENT_15 | \n", - "FLAG_DOCUMENT_16 | \n", - "FLAG_DOCUMENT_17 | \n", - "FLAG_DOCUMENT_18 | \n", - "FLAG_DOCUMENT_19 | \n", - "FLAG_DOCUMENT_20 | \n", - "FLAG_DOCUMENT_21 | \n", - "AMT_REQ_CREDIT_BUREAU_HOUR | \n", - "AMT_REQ_CREDIT_BUREAU_DAY | \n", - "AMT_REQ_CREDIT_BUREAU_WEEK | \n", - "AMT_REQ_CREDIT_BUREAU_MON | \n", - "AMT_REQ_CREDIT_BUREAU_QRT | \n", - "AMT_REQ_CREDIT_BUREAU_YEAR | \n", - "DATE | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", - "145457 | \n", - "0 | \n", - "Cash loans | \n", - "M | \n", - "Y | \n", - "N | \n", - "0 | \n", - "288000.0 | \n", - "242595.0 | \n", - "10813.5 | \n", - "202500.0 | \n", - "Unaccompanied | \n", - "Pensioner | \n", - "Secondary / secondary special | \n", - "Married | \n", - "Municipal apartment | \n", - "0.046220 | \n", - "-22230 | \n", - "365243 | \n", - "-7689.0 | \n", - "-4096 | \n", - "5.0 | \n", - "1 | \n", - "0 | \n", - "0 | \n", - "1 | \n", - "0 | \n", - "0 | \n", - "NaN | \n", - "2.0 | \n", - "1 | \n", - "1 | \n", - "FRIDAY | \n", - "13 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "XNA | \n", - "NaN | \n", - "0.735594 | \n", - "0.413597 | \n", - "0.6113 | \n", - "0.3295 | \n", - "0.9871 | \n", - "0.8232 | \n", - "0.4761 | \n", - "0.96 | \n", - "0.4138 | \n", - "0.4583 | \n", - "0.5 | \n", - "0.142 | \n", - "0.453 | \n", - "0.6385 | \n", - "0.2085 | \n", - "0.4423 | \n", - "0.6229 | \n", - "0.342 | \n", - "... | \n", - "0.9667 | \n", - "0.4138 | \n", - "0.4583 | \n", - "0.5 | \n", - "0.1452 | \n", - "0.4949 | \n", - "0.6652 | \n", - "0.2101 | \n", - "0.4682 | \n", - "0.6173 | \n", - "0.3295 | \n", - "0.9871 | \n", - "0.8256 | \n", - "0.4791 | \n", - "0.96 | \n", - "0.4138 | \n", - "0.4583 | \n", - "0.5 | \n", - "0.1444 | \n", - "0.4609 | \n", - "0.65 | \n", - "0.2096 | \n", - "0.4516 | \n", - "org spec account | \n", - "block of flats | \n", - "0.8750 | \n", - "Stone, brick | \n", - "No | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "-1347.0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "1 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "2.0 | \n", - "2018-05 | \n", - "
1 | \n", - "128979 | \n", - "0 | \n", - "Cash loans | \n", - "F | \n", - "N | \n", - "N | \n", - "0 | \n", - "94500.0 | \n", - "646920.0 | \n", - "20866.5 | \n", - "540000.0 | \n", - "Unaccompanied | \n", - "Pensioner | \n", - "Secondary / secondary special | \n", - "Married | \n", - "House / apartment | \n", - "0.018850 | \n", - "-20599 | \n", - "365243 | \n", - "-784.0 | \n", - "-2393 | \n", - "NaN | \n", - "1 | \n", - "0 | \n", - "0 | \n", - "1 | \n", - "0 | \n", - "0 | \n", - "NaN | \n", - "2.0 | \n", - "2 | \n", - "2 | \n", - "SUNDAY | \n", - "15 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "XNA | \n", - "NaN | \n", - "0.461944 | \n", - "0.326475 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "... | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "-1273.0 | \n", - "0 | \n", - "1 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "3.0 | \n", - "2018-05 | \n", - "
2 | \n", - "145448 | \n", - "0 | \n", - "Cash loans | \n", - "M | \n", - "Y | \n", - "Y | \n", - "1 | \n", - "225000.0 | \n", - "183294.0 | \n", - "14823.0 | \n", - "153000.0 | \n", - "Unaccompanied | \n", - "Working | \n", - "Secondary / secondary special | \n", - "Married | \n", - "House / apartment | \n", - "0.020713 | \n", - "-11070 | \n", - "-1345 | \n", - "-3463.0 | \n", - "-3618 | \n", - "19.0 | \n", - "1 | \n", - "1 | \n", - "0 | \n", - "1 | \n", - "0 | \n", - "0 | \n", - "Drivers | \n", - "3.0 | \n", - "3 | \n", - "3 | \n", - "SATURDAY | \n", - "9 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "1 | \n", - "1 | \n", - "1 | \n", - "Self-employed | \n", - "NaN | \n", - "0.374592 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "... | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "12.0 | \n", - "0.0 | \n", - "11.0 | \n", - "0.0 | \n", - "-1127.0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "1 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "5.0 | \n", - "2018-03 | \n", - "
3 | \n", - "294475 | \n", - "0 | \n", - "Cash loans | \n", - "M | \n", - "Y | \n", - "N | \n", - "0 | \n", - "180000.0 | \n", - "260640.0 | \n", - "20169.0 | \n", - "225000.0 | \n", - "Family | \n", - "Working | \n", - "Secondary / secondary special | \n", - "Married | \n", - "House / apartment | \n", - "0.026392 | \n", - "-15901 | \n", - "-130 | \n", - "-7799.0 | \n", - "-4449 | \n", - "12.0 | \n", - "1 | \n", - "1 | \n", - "1 | \n", - "1 | \n", - "0 | \n", - "0 | \n", - "Security staff | \n", - "2.0 | \n", - "2 | \n", - "2 | \n", - "THURSDAY | \n", - "18 | \n", - "0 | \n", - "1 | \n", - "1 | \n", - "0 | \n", - "1 | \n", - "1 | \n", - "Business Entity Type 3 | \n", - "NaN | \n", - "0.712657 | \n", - "NaN | \n", - "0.1031 | \n", - "NaN | \n", - "0.9856 | \n", - "NaN | \n", - "NaN | \n", - "0.00 | \n", - "0.2069 | \n", - "0.1667 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0.1050 | \n", - "NaN | \n", - "... | \n", - "0.0000 | \n", - "0.2069 | \n", - "0.1667 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "0.1041 | \n", - "NaN | \n", - "0.9856 | \n", - "NaN | \n", - "NaN | \n", - "0.00 | \n", - "0.2069 | \n", - "0.1667 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "block of flats | \n", - "0.0696 | \n", - "Stone, brick | \n", - "No | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "-1768.0 | \n", - "0 | \n", - "1 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "0.0 | \n", - "1.0 | \n", - "2018-04 | \n", - "
4 | \n", - "216609 | \n", - "0 | \n", - "Revolving loans | \n", - "M | \n", - "Y | \n", - "Y | \n", - "1 | \n", - "112500.0 | \n", - "180000.0 | \n", - "9000.0 | \n", - "180000.0 | \n", - "Unaccompanied | \n", - "State servant | \n", - "Higher education | \n", - "Married | \n", - "House / apartment | \n", - "0.007020 | \n", - "-10234 | \n", - "-1993 | \n", - "-4040.0 | \n", - "-2913 | \n", - "6.0 | \n", - "1 | \n", - "1 | \n", - "0 | \n", - "1 | \n", - "0 | \n", - "0 | \n", - "NaN | \n", - "3.0 | \n", - "2 | \n", - "2 | \n", - "WEDNESDAY | \n", - "11 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "Emergency | \n", - "0.405051 | \n", - "0.528879 | \n", - "0.604113 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "... | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "1.0 | \n", - "0.0 | \n", - "1.0 | \n", - "0.0 | \n", - "-429.0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "2018-03 | \n", - "
5 rows × 123 columns
\n", - "