{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## 银行违约预测\n", "### 任务:预测是否违约" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import xgboost as xgb\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.metrics import roc_auc_score\n", "from featexp import univariate_plotter # pip install featexp\n", "from featexp import get_univariate_plots\n", "from featexp import get_trend_stats" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | SK_ID_CURR | \n", "TARGET | \n", "NAME_CONTRACT_TYPE | \n", "CODE_GENDER | \n", "FLAG_OWN_CAR | \n", "FLAG_OWN_REALTY | \n", "CNT_CHILDREN | \n", "AMT_INCOME_TOTAL | \n", "AMT_CREDIT | \n", "AMT_ANNUITY | \n", "... | \n", "FLAG_DOCUMENT_18 | \n", "FLAG_DOCUMENT_19 | \n", "FLAG_DOCUMENT_20 | \n", "FLAG_DOCUMENT_21 | \n", "AMT_REQ_CREDIT_BUREAU_HOUR | \n", "AMT_REQ_CREDIT_BUREAU_DAY | \n", "AMT_REQ_CREDIT_BUREAU_WEEK | \n", "AMT_REQ_CREDIT_BUREAU_MON | \n", "AMT_REQ_CREDIT_BUREAU_QRT | \n", "AMT_REQ_CREDIT_BUREAU_YEAR | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "100002 | \n", "1 | \n", "Cash loans | \n", "M | \n", "N | \n", "Y | \n", "0 | \n", "202500.0 | \n", "406597.5 | \n", "24700.5 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "
1 | \n", "100003 | \n", "0 | \n", "Cash loans | \n", "F | \n", "N | \n", "N | \n", "0 | \n", "270000.0 | \n", "1293502.5 | \n", "35698.5 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
2 | \n", "100004 | \n", "0 | \n", "Revolving loans | \n", "M | \n", "Y | \n", "Y | \n", "0 | \n", "67500.0 | \n", "135000.0 | \n", "6750.0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
3 | \n", "100006 | \n", "0 | \n", "Cash loans | \n", "F | \n", "N | \n", "Y | \n", "0 | \n", "135000.0 | \n", "312682.5 | \n", "29686.5 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
4 | \n", "100007 | \n", "0 | \n", "Cash loans | \n", "M | \n", "N | \n", "Y | \n", "0 | \n", "121500.0 | \n", "513000.0 | \n", "21865.5 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
5 rows × 122 columns
\n", "\n", " | AMT_INCOME_TOTAL | \n", "Samples_in_bin | \n", "target_mean | \n", "AMT_INCOME_TOTAL_mean | \n", "
---|---|---|---|---|
0 | \n", "[25650.0, 81000.0] | \n", "22334 | \n", "0.082699 | \n", "66257.130366 | \n", "
1 | \n", "(81000.0, 99000.0] | \n", "20424 | \n", "0.081032 | \n", "91076.626816 | \n", "
2 | \n", "(99000.0, 112500.0] | \n", "24746 | \n", "0.086842 | \n", "111311.671092 | \n", "
3 | \n", "(112500.0, 135000.0] | \n", "32725 | \n", "0.085623 | \n", "131844.069914 | \n", "
4 | \n", "(135000.0, 144000.0] | \n", "2846 | \n", "0.087491 | \n", "142867.813598 | \n", "
5 | \n", "(144000.0, 162000.0] | \n", "20804 | \n", "0.087531 | \n", "157082.746126 | \n", "
6 | \n", "(162000.0, 180000.0] | \n", "20550 | \n", "0.086326 | \n", "178073.976044 | \n", "
7 | \n", "(180000.0, 225000.0] | \n", "29937 | \n", "0.077997 | \n", "212795.095082 | \n", "
8 | \n", "(225000.0, 270000.0] | \n", "13392 | \n", "0.070266 | \n", "259515.009845 | \n", "
9 | \n", "(270000.0, 9000000.0] | \n", "18274 | \n", "0.062110 | \n", "389590.875377 | \n", "
\n", " | Feature | \n", "Trend_changes | \n", "Trend_changes_test | \n", "Trend_correlation | \n", "
---|---|---|---|---|
0 | \n", "CNT_CHILDREN | \n", "2 | \n", "2 | \n", "0.975688 | \n", "
1 | \n", "AMT_INCOME_TOTAL | \n", "4 | \n", "3 | \n", "0.921382 | \n", "
2 | \n", "AMT_CREDIT | \n", "3 | \n", "3 | \n", "0.988779 | \n", "
3 | \n", "AMT_ANNUITY | \n", "4 | \n", "4 | \n", "0.972325 | \n", "
4 | \n", "AMT_GOODS_PRICE | \n", "7 | \n", "7 | \n", "0.994683 | \n", "
5 | \n", "REGION_POPULATION_RELATIVE | \n", "5 | \n", "5 | \n", "0.989683 | \n", "
6 | \n", "DAYS_BIRTH | \n", "0 | \n", "0 | \n", "0.992783 | \n", "
7 | \n", "DAYS_EMPLOYED | \n", "1 | \n", "1 | \n", "0.995426 | \n", "
8 | \n", "DAYS_REGISTRATION | \n", "2 | \n", "2 | \n", "0.976891 | \n", "
9 | \n", "DAYS_ID_PUBLISH | \n", "0 | \n", "2 | \n", "0.985101 | \n", "
10 | \n", "OWN_CAR_AGE | \n", "2 | \n", "1 | \n", "0.966213 | \n", "
11 | \n", "FLAG_MOBIL | \n", "0 | \n", "0 | \n", "0.000000 | \n", "
12 | \n", "FLAG_EMP_PHONE | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
13 | \n", "FLAG_WORK_PHONE | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
14 | \n", "FLAG_CONT_MOBILE | \n", "0 | \n", "0 | \n", "0.000000 | \n", "
15 | \n", "FLAG_PHONE | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
16 | \n", "FLAG_EMAIL | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
17 | \n", "CNT_FAM_MEMBERS | \n", "1 | \n", "1 | \n", "0.996885 | \n", "
18 | \n", "REGION_RATING_CLIENT | \n", "0 | \n", "0 | \n", "0.999736 | \n", "
19 | \n", "REGION_RATING_CLIENT_W_CITY | \n", "0 | \n", "0 | \n", "0.999113 | \n", "
20 | \n", "HOUR_APPR_PROCESS_START | \n", "2 | \n", "2 | \n", "0.945565 | \n", "
21 | \n", "REG_REGION_NOT_LIVE_REGION | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
22 | \n", "REG_REGION_NOT_WORK_REGION | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
23 | \n", "LIVE_REGION_NOT_WORK_REGION | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
24 | \n", "REG_CITY_NOT_LIVE_CITY | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
25 | \n", "REG_CITY_NOT_WORK_CITY | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
26 | \n", "LIVE_CITY_NOT_WORK_CITY | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
27 | \n", "EXT_SOURCE_1 | \n", "2 | \n", "0 | \n", "0.998696 | \n", "
28 | \n", "EXT_SOURCE_2 | \n", "0 | \n", "0 | \n", "0.998221 | \n", "
29 | \n", "EXT_SOURCE_3 | \n", "0 | \n", "0 | \n", "0.998048 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
168 | \n", "OCCUPATION_TYPE_Waiters/barmen staff | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
169 | \n", "OCCUPATION_TYPE_nan | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
170 | \n", "WEEKDAY_APPR_PROCESS_START_FRIDAY | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
171 | \n", "WEEKDAY_APPR_PROCESS_START_MONDAY | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
172 | \n", "WEEKDAY_APPR_PROCESS_START_SATURDAY | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
173 | \n", "WEEKDAY_APPR_PROCESS_START_SUNDAY | \n", "0 | \n", "0 | \n", "-1.000000 | \n", "
174 | \n", "WEEKDAY_APPR_PROCESS_START_THURSDAY | \n", "0 | \n", "0 | \n", "-1.000000 | \n", "
175 | \n", "WEEKDAY_APPR_PROCESS_START_TUESDAY | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
176 | \n", "WEEKDAY_APPR_PROCESS_START_WEDNESDAY | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
177 | \n", "WEEKDAY_APPR_PROCESS_START_nan | \n", "0 | \n", "0 | \n", "0.000000 | \n", "
178 | \n", "FONDKAPREMONT_MODE_not specified | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
179 | \n", "FONDKAPREMONT_MODE_org spec account | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
180 | \n", "FONDKAPREMONT_MODE_reg oper account | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
181 | \n", "FONDKAPREMONT_MODE_reg oper spec account | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
182 | \n", "FONDKAPREMONT_MODE_nan | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
183 | \n", "HOUSETYPE_MODE_block of flats | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
184 | \n", "HOUSETYPE_MODE_specific housing | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
185 | \n", "HOUSETYPE_MODE_terraced house | \n", "0 | \n", "0 | \n", "-1.000000 | \n", "
186 | \n", "HOUSETYPE_MODE_nan | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
187 | \n", "WALLSMATERIAL_MODE_Block | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
188 | \n", "WALLSMATERIAL_MODE_Mixed | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
189 | \n", "WALLSMATERIAL_MODE_Monolithic | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
190 | \n", "WALLSMATERIAL_MODE_Others | \n", "0 | \n", "0 | \n", "-1.000000 | \n", "
191 | \n", "WALLSMATERIAL_MODE_Panel | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
192 | \n", "WALLSMATERIAL_MODE_Stone, brick | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
193 | \n", "WALLSMATERIAL_MODE_Wooden | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
194 | \n", "WALLSMATERIAL_MODE_nan | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
195 | \n", "EMERGENCYSTATE_MODE_No | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
196 | \n", "EMERGENCYSTATE_MODE_Yes | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
197 | \n", "EMERGENCYSTATE_MODE_nan | \n", "0 | \n", "0 | \n", "1.000000 | \n", "
198 rows × 4 columns
\n", "