|
|
|
@ -1,908 +0,0 @@
|
|
|
|
|
{
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 2,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
"import numpy as np\n",
|
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
|
"import xgboost as xgb\n",
|
|
|
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
|
|
|
"from sklearn.ensemble import RandomForestClassifier\n",
|
|
|
|
|
"from sklearn.metrics import roc_auc_score\n",
|
|
|
|
|
"from featexp import univariate_plotter # pip install featexp\n",
|
|
|
|
|
"from featexp import get_univariate_plots\n",
|
|
|
|
|
"from featexp import get_trend_stats"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 3,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>SK_ID_CURR</th>\n",
|
|
|
|
|
" <th>TARGET</th>\n",
|
|
|
|
|
" <th>NAME_CONTRACT_TYPE</th>\n",
|
|
|
|
|
" <th>CODE_GENDER</th>\n",
|
|
|
|
|
" <th>FLAG_OWN_CAR</th>\n",
|
|
|
|
|
" <th>FLAG_OWN_REALTY</th>\n",
|
|
|
|
|
" <th>CNT_CHILDREN</th>\n",
|
|
|
|
|
" <th>AMT_INCOME_TOTAL</th>\n",
|
|
|
|
|
" <th>AMT_CREDIT</th>\n",
|
|
|
|
|
" <th>AMT_ANNUITY</th>\n",
|
|
|
|
|
" <th>AMT_GOODS_PRICE</th>\n",
|
|
|
|
|
" <th>NAME_TYPE_SUITE</th>\n",
|
|
|
|
|
" <th>NAME_INCOME_TYPE</th>\n",
|
|
|
|
|
" <th>NAME_EDUCATION_TYPE</th>\n",
|
|
|
|
|
" <th>NAME_FAMILY_STATUS</th>\n",
|
|
|
|
|
" <th>NAME_HOUSING_TYPE</th>\n",
|
|
|
|
|
" <th>REGION_POPULATION_RELATIVE</th>\n",
|
|
|
|
|
" <th>DAYS_BIRTH</th>\n",
|
|
|
|
|
" <th>DAYS_EMPLOYED</th>\n",
|
|
|
|
|
" <th>DAYS_REGISTRATION</th>\n",
|
|
|
|
|
" <th>DAYS_ID_PUBLISH</th>\n",
|
|
|
|
|
" <th>OWN_CAR_AGE</th>\n",
|
|
|
|
|
" <th>FLAG_MOBIL</th>\n",
|
|
|
|
|
" <th>FLAG_EMP_PHONE</th>\n",
|
|
|
|
|
" <th>FLAG_WORK_PHONE</th>\n",
|
|
|
|
|
" <th>FLAG_CONT_MOBILE</th>\n",
|
|
|
|
|
" <th>FLAG_PHONE</th>\n",
|
|
|
|
|
" <th>FLAG_EMAIL</th>\n",
|
|
|
|
|
" <th>OCCUPATION_TYPE</th>\n",
|
|
|
|
|
" <th>CNT_FAM_MEMBERS</th>\n",
|
|
|
|
|
" <th>REGION_RATING_CLIENT</th>\n",
|
|
|
|
|
" <th>REGION_RATING_CLIENT_W_CITY</th>\n",
|
|
|
|
|
" <th>WEEKDAY_APPR_PROCESS_START</th>\n",
|
|
|
|
|
" <th>HOUR_APPR_PROCESS_START</th>\n",
|
|
|
|
|
" <th>REG_REGION_NOT_LIVE_REGION</th>\n",
|
|
|
|
|
" <th>REG_REGION_NOT_WORK_REGION</th>\n",
|
|
|
|
|
" <th>LIVE_REGION_NOT_WORK_REGION</th>\n",
|
|
|
|
|
" <th>REG_CITY_NOT_LIVE_CITY</th>\n",
|
|
|
|
|
" <th>REG_CITY_NOT_WORK_CITY</th>\n",
|
|
|
|
|
" <th>LIVE_CITY_NOT_WORK_CITY</th>\n",
|
|
|
|
|
" <th>ORGANIZATION_TYPE</th>\n",
|
|
|
|
|
" <th>EXT_SOURCE_1</th>\n",
|
|
|
|
|
" <th>EXT_SOURCE_2</th>\n",
|
|
|
|
|
" <th>EXT_SOURCE_3</th>\n",
|
|
|
|
|
" <th>APARTMENTS_AVG</th>\n",
|
|
|
|
|
" <th>BASEMENTAREA_AVG</th>\n",
|
|
|
|
|
" <th>YEARS_BEGINEXPLUATATION_AVG</th>\n",
|
|
|
|
|
" <th>YEARS_BUILD_AVG</th>\n",
|
|
|
|
|
" <th>COMMONAREA_AVG</th>\n",
|
|
|
|
|
" <th>ELEVATORS_AVG</th>\n",
|
|
|
|
|
" <th>ENTRANCES_AVG</th>\n",
|
|
|
|
|
" <th>FLOORSMAX_AVG</th>\n",
|
|
|
|
|
" <th>FLOORSMIN_AVG</th>\n",
|
|
|
|
|
" <th>LANDAREA_AVG</th>\n",
|
|
|
|
|
" <th>LIVINGAPARTMENTS_AVG</th>\n",
|
|
|
|
|
" <th>LIVINGAREA_AVG</th>\n",
|
|
|
|
|
" <th>NONLIVINGAPARTMENTS_AVG</th>\n",
|
|
|
|
|
" <th>NONLIVINGAREA_AVG</th>\n",
|
|
|
|
|
" <th>APARTMENTS_MODE</th>\n",
|
|
|
|
|
" <th>BASEMENTAREA_MODE</th>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <th>ELEVATORS_MODE</th>\n",
|
|
|
|
|
" <th>ENTRANCES_MODE</th>\n",
|
|
|
|
|
" <th>FLOORSMAX_MODE</th>\n",
|
|
|
|
|
" <th>FLOORSMIN_MODE</th>\n",
|
|
|
|
|
" <th>LANDAREA_MODE</th>\n",
|
|
|
|
|
" <th>LIVINGAPARTMENTS_MODE</th>\n",
|
|
|
|
|
" <th>LIVINGAREA_MODE</th>\n",
|
|
|
|
|
" <th>NONLIVINGAPARTMENTS_MODE</th>\n",
|
|
|
|
|
" <th>NONLIVINGAREA_MODE</th>\n",
|
|
|
|
|
" <th>APARTMENTS_MEDI</th>\n",
|
|
|
|
|
" <th>BASEMENTAREA_MEDI</th>\n",
|
|
|
|
|
" <th>YEARS_BEGINEXPLUATATION_MEDI</th>\n",
|
|
|
|
|
" <th>YEARS_BUILD_MEDI</th>\n",
|
|
|
|
|
" <th>COMMONAREA_MEDI</th>\n",
|
|
|
|
|
" <th>ELEVATORS_MEDI</th>\n",
|
|
|
|
|
" <th>ENTRANCES_MEDI</th>\n",
|
|
|
|
|
" <th>FLOORSMAX_MEDI</th>\n",
|
|
|
|
|
" <th>FLOORSMIN_MEDI</th>\n",
|
|
|
|
|
" <th>LANDAREA_MEDI</th>\n",
|
|
|
|
|
" <th>LIVINGAPARTMENTS_MEDI</th>\n",
|
|
|
|
|
" <th>LIVINGAREA_MEDI</th>\n",
|
|
|
|
|
" <th>NONLIVINGAPARTMENTS_MEDI</th>\n",
|
|
|
|
|
" <th>NONLIVINGAREA_MEDI</th>\n",
|
|
|
|
|
" <th>FONDKAPREMONT_MODE</th>\n",
|
|
|
|
|
" <th>HOUSETYPE_MODE</th>\n",
|
|
|
|
|
" <th>TOTALAREA_MODE</th>\n",
|
|
|
|
|
" <th>WALLSMATERIAL_MODE</th>\n",
|
|
|
|
|
" <th>EMERGENCYSTATE_MODE</th>\n",
|
|
|
|
|
" <th>OBS_30_CNT_SOCIAL_CIRCLE</th>\n",
|
|
|
|
|
" <th>DEF_30_CNT_SOCIAL_CIRCLE</th>\n",
|
|
|
|
|
" <th>OBS_60_CNT_SOCIAL_CIRCLE</th>\n",
|
|
|
|
|
" <th>DEF_60_CNT_SOCIAL_CIRCLE</th>\n",
|
|
|
|
|
" <th>DAYS_LAST_PHONE_CHANGE</th>\n",
|
|
|
|
|
" <th>FLAG_DOCUMENT_2</th>\n",
|
|
|
|
|
" <th>FLAG_DOCUMENT_3</th>\n",
|
|
|
|
|
" <th>FLAG_DOCUMENT_4</th>\n",
|
|
|
|
|
" <th>FLAG_DOCUMENT_5</th>\n",
|
|
|
|
|
" <th>FLAG_DOCUMENT_6</th>\n",
|
|
|
|
|
" <th>FLAG_DOCUMENT_7</th>\n",
|
|
|
|
|
" <th>FLAG_DOCUMENT_8</th>\n",
|
|
|
|
|
" <th>FLAG_DOCUMENT_9</th>\n",
|
|
|
|
|
" <th>FLAG_DOCUMENT_10</th>\n",
|
|
|
|
|
" <th>FLAG_DOCUMENT_11</th>\n",
|
|
|
|
|
" <th>FLAG_DOCUMENT_12</th>\n",
|
|
|
|
|
" <th>FLAG_DOCUMENT_13</th>\n",
|
|
|
|
|
" <th>FLAG_DOCUMENT_14</th>\n",
|
|
|
|
|
" <th>FLAG_DOCUMENT_15</th>\n",
|
|
|
|
|
" <th>FLAG_DOCUMENT_16</th>\n",
|
|
|
|
|
" <th>FLAG_DOCUMENT_17</th>\n",
|
|
|
|
|
" <th>FLAG_DOCUMENT_18</th>\n",
|
|
|
|
|
" <th>FLAG_DOCUMENT_19</th>\n",
|
|
|
|
|
" <th>FLAG_DOCUMENT_20</th>\n",
|
|
|
|
|
" <th>FLAG_DOCUMENT_21</th>\n",
|
|
|
|
|
" <th>AMT_REQ_CREDIT_BUREAU_HOUR</th>\n",
|
|
|
|
|
" <th>AMT_REQ_CREDIT_BUREAU_DAY</th>\n",
|
|
|
|
|
" <th>AMT_REQ_CREDIT_BUREAU_WEEK</th>\n",
|
|
|
|
|
" <th>AMT_REQ_CREDIT_BUREAU_MON</th>\n",
|
|
|
|
|
" <th>AMT_REQ_CREDIT_BUREAU_QRT</th>\n",
|
|
|
|
|
" <th>AMT_REQ_CREDIT_BUREAU_YEAR</th>\n",
|
|
|
|
|
" <th>DATE</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>145457</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>Cash loans</td>\n",
|
|
|
|
|
" <td>M</td>\n",
|
|
|
|
|
" <td>Y</td>\n",
|
|
|
|
|
" <td>N</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>288000.0</td>\n",
|
|
|
|
|
" <td>242595.0</td>\n",
|
|
|
|
|
" <td>10813.5</td>\n",
|
|
|
|
|
" <td>202500.0</td>\n",
|
|
|
|
|
" <td>Unaccompanied</td>\n",
|
|
|
|
|
" <td>Pensioner</td>\n",
|
|
|
|
|
" <td>Secondary / secondary special</td>\n",
|
|
|
|
|
" <td>Married</td>\n",
|
|
|
|
|
" <td>Municipal apartment</td>\n",
|
|
|
|
|
" <td>0.046220</td>\n",
|
|
|
|
|
" <td>-22230</td>\n",
|
|
|
|
|
" <td>365243</td>\n",
|
|
|
|
|
" <td>-7689.0</td>\n",
|
|
|
|
|
" <td>-4096</td>\n",
|
|
|
|
|
" <td>5.0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>2.0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>FRIDAY</td>\n",
|
|
|
|
|
" <td>13</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>XNA</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.735594</td>\n",
|
|
|
|
|
" <td>0.413597</td>\n",
|
|
|
|
|
" <td>0.6113</td>\n",
|
|
|
|
|
" <td>0.3295</td>\n",
|
|
|
|
|
" <td>0.9871</td>\n",
|
|
|
|
|
" <td>0.8232</td>\n",
|
|
|
|
|
" <td>0.4761</td>\n",
|
|
|
|
|
" <td>0.96</td>\n",
|
|
|
|
|
" <td>0.4138</td>\n",
|
|
|
|
|
" <td>0.4583</td>\n",
|
|
|
|
|
" <td>0.5</td>\n",
|
|
|
|
|
" <td>0.142</td>\n",
|
|
|
|
|
" <td>0.453</td>\n",
|
|
|
|
|
" <td>0.6385</td>\n",
|
|
|
|
|
" <td>0.2085</td>\n",
|
|
|
|
|
" <td>0.4423</td>\n",
|
|
|
|
|
" <td>0.6229</td>\n",
|
|
|
|
|
" <td>0.342</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>0.9667</td>\n",
|
|
|
|
|
" <td>0.4138</td>\n",
|
|
|
|
|
" <td>0.4583</td>\n",
|
|
|
|
|
" <td>0.5</td>\n",
|
|
|
|
|
" <td>0.1452</td>\n",
|
|
|
|
|
" <td>0.4949</td>\n",
|
|
|
|
|
" <td>0.6652</td>\n",
|
|
|
|
|
" <td>0.2101</td>\n",
|
|
|
|
|
" <td>0.4682</td>\n",
|
|
|
|
|
" <td>0.6173</td>\n",
|
|
|
|
|
" <td>0.3295</td>\n",
|
|
|
|
|
" <td>0.9871</td>\n",
|
|
|
|
|
" <td>0.8256</td>\n",
|
|
|
|
|
" <td>0.4791</td>\n",
|
|
|
|
|
" <td>0.96</td>\n",
|
|
|
|
|
" <td>0.4138</td>\n",
|
|
|
|
|
" <td>0.4583</td>\n",
|
|
|
|
|
" <td>0.5</td>\n",
|
|
|
|
|
" <td>0.1444</td>\n",
|
|
|
|
|
" <td>0.4609</td>\n",
|
|
|
|
|
" <td>0.65</td>\n",
|
|
|
|
|
" <td>0.2096</td>\n",
|
|
|
|
|
" <td>0.4516</td>\n",
|
|
|
|
|
" <td>org spec account</td>\n",
|
|
|
|
|
" <td>block of flats</td>\n",
|
|
|
|
|
" <td>0.8750</td>\n",
|
|
|
|
|
" <td>Stone, brick</td>\n",
|
|
|
|
|
" <td>No</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>-1347.0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>2.0</td>\n",
|
|
|
|
|
" <td>2018-05</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <td>128979</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>Cash loans</td>\n",
|
|
|
|
|
" <td>F</td>\n",
|
|
|
|
|
" <td>N</td>\n",
|
|
|
|
|
" <td>N</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>94500.0</td>\n",
|
|
|
|
|
" <td>646920.0</td>\n",
|
|
|
|
|
" <td>20866.5</td>\n",
|
|
|
|
|
" <td>540000.0</td>\n",
|
|
|
|
|
" <td>Unaccompanied</td>\n",
|
|
|
|
|
" <td>Pensioner</td>\n",
|
|
|
|
|
" <td>Secondary / secondary special</td>\n",
|
|
|
|
|
" <td>Married</td>\n",
|
|
|
|
|
" <td>House / apartment</td>\n",
|
|
|
|
|
" <td>0.018850</td>\n",
|
|
|
|
|
" <td>-20599</td>\n",
|
|
|
|
|
" <td>365243</td>\n",
|
|
|
|
|
" <td>-784.0</td>\n",
|
|
|
|
|
" <td>-2393</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>2.0</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>SUNDAY</td>\n",
|
|
|
|
|
" <td>15</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>XNA</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.461944</td>\n",
|
|
|
|
|
" <td>0.326475</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>-1273.0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>3.0</td>\n",
|
|
|
|
|
" <td>2018-05</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <td>145448</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>Cash loans</td>\n",
|
|
|
|
|
" <td>M</td>\n",
|
|
|
|
|
" <td>Y</td>\n",
|
|
|
|
|
" <td>Y</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>225000.0</td>\n",
|
|
|
|
|
" <td>183294.0</td>\n",
|
|
|
|
|
" <td>14823.0</td>\n",
|
|
|
|
|
" <td>153000.0</td>\n",
|
|
|
|
|
" <td>Unaccompanied</td>\n",
|
|
|
|
|
" <td>Working</td>\n",
|
|
|
|
|
" <td>Secondary / secondary special</td>\n",
|
|
|
|
|
" <td>Married</td>\n",
|
|
|
|
|
" <td>House / apartment</td>\n",
|
|
|
|
|
" <td>0.020713</td>\n",
|
|
|
|
|
" <td>-11070</td>\n",
|
|
|
|
|
" <td>-1345</td>\n",
|
|
|
|
|
" <td>-3463.0</td>\n",
|
|
|
|
|
" <td>-3618</td>\n",
|
|
|
|
|
" <td>19.0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>Drivers</td>\n",
|
|
|
|
|
" <td>3.0</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>SATURDAY</td>\n",
|
|
|
|
|
" <td>9</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>Self-employed</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.374592</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>12.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>11.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>-1127.0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>5.0</td>\n",
|
|
|
|
|
" <td>2018-03</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>294475</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>Cash loans</td>\n",
|
|
|
|
|
" <td>M</td>\n",
|
|
|
|
|
" <td>Y</td>\n",
|
|
|
|
|
" <td>N</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>180000.0</td>\n",
|
|
|
|
|
" <td>260640.0</td>\n",
|
|
|
|
|
" <td>20169.0</td>\n",
|
|
|
|
|
" <td>225000.0</td>\n",
|
|
|
|
|
" <td>Family</td>\n",
|
|
|
|
|
" <td>Working</td>\n",
|
|
|
|
|
" <td>Secondary / secondary special</td>\n",
|
|
|
|
|
" <td>Married</td>\n",
|
|
|
|
|
" <td>House / apartment</td>\n",
|
|
|
|
|
" <td>0.026392</td>\n",
|
|
|
|
|
" <td>-15901</td>\n",
|
|
|
|
|
" <td>-130</td>\n",
|
|
|
|
|
" <td>-7799.0</td>\n",
|
|
|
|
|
" <td>-4449</td>\n",
|
|
|
|
|
" <td>12.0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>Security staff</td>\n",
|
|
|
|
|
" <td>2.0</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>THURSDAY</td>\n",
|
|
|
|
|
" <td>18</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>Business Entity Type 3</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.712657</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.1031</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.9856</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.00</td>\n",
|
|
|
|
|
" <td>0.2069</td>\n",
|
|
|
|
|
" <td>0.1667</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.1050</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>0.0000</td>\n",
|
|
|
|
|
" <td>0.2069</td>\n",
|
|
|
|
|
" <td>0.1667</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.1041</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.9856</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>0.00</td>\n",
|
|
|
|
|
" <td>0.2069</td>\n",
|
|
|
|
|
" <td>0.1667</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>block of flats</td>\n",
|
|
|
|
|
" <td>0.0696</td>\n",
|
|
|
|
|
" <td>Stone, brick</td>\n",
|
|
|
|
|
" <td>No</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>-1768.0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" <td>2018-04</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>216609</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>Revolving loans</td>\n",
|
|
|
|
|
" <td>M</td>\n",
|
|
|
|
|
" <td>Y</td>\n",
|
|
|
|
|
" <td>Y</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>112500.0</td>\n",
|
|
|
|
|
" <td>180000.0</td>\n",
|
|
|
|
|
" <td>9000.0</td>\n",
|
|
|
|
|
" <td>180000.0</td>\n",
|
|
|
|
|
" <td>Unaccompanied</td>\n",
|
|
|
|
|
" <td>State servant</td>\n",
|
|
|
|
|
" <td>Higher education</td>\n",
|
|
|
|
|
" <td>Married</td>\n",
|
|
|
|
|
" <td>House / apartment</td>\n",
|
|
|
|
|
" <td>0.007020</td>\n",
|
|
|
|
|
" <td>-10234</td>\n",
|
|
|
|
|
" <td>-1993</td>\n",
|
|
|
|
|
" <td>-4040.0</td>\n",
|
|
|
|
|
" <td>-2913</td>\n",
|
|
|
|
|
" <td>6.0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>3.0</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>WEDNESDAY</td>\n",
|
|
|
|
|
" <td>11</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>Emergency</td>\n",
|
|
|
|
|
" <td>0.405051</td>\n",
|
|
|
|
|
" <td>0.528879</td>\n",
|
|
|
|
|
" <td>0.604113</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>-429.0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>1.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>0.0</td>\n",
|
|
|
|
|
" <td>2018-03</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"<p>5 rows × 123 columns</p>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" SK_ID_CURR TARGET NAME_CONTRACT_TYPE ... AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR DATE\n",
|
|
|
|
|
"0 145457 0 Cash loans ... 0.0 2.0 2018-05\n",
|
|
|
|
|
"1 128979 0 Cash loans ... 0.0 3.0 2018-05\n",
|
|
|
|
|
"2 145448 0 Cash loans ... 0.0 5.0 2018-03\n",
|
|
|
|
|
"3 294475 0 Cash loans ... 0.0 1.0 2018-04\n",
|
|
|
|
|
"4 216609 0 Revolving loans ... 0.0 0.0 2018-03\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"[5 rows x 123 columns]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 3,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"application_raw = pd.read_csv('application_train.csv') # 银行贷款数据,预测违约可能性0/1\n",
|
|
|
|
|
"application_raw.head()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"# 数据预处理\n",
|
|
|
|
|
"def get_nonull_dummy_data(application_train_raw, dummy_drop=['ORGANIZATION_TYPE']):\n",
|
|
|
|
|
" # 缺失值填充\n",
|
|
|
|
|
" nulls = pd.isnull(application_train_raw),sum()\n",
|
|
|
|
|
" less_nulls = nulls[(nulls<3075)&(nulls!=0)].index\n",
|
|
|
|
|
" less_nulls_float = []\n",
|
|
|
|
|
" for i in range(len(less_nulls)):\n",
|
|
|
|
|
" if application_train_raw[less_nulls[i]].dtype!='0':\n",
|
|
|
|
|
" less_nulls_float.append(less_nulls[i])\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" application_train_raw[less_nulls_float] = application_train_raw[less_nulls_float].fillna(application_train_raw[less_nulls_float].mean())\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" # 缺失值填充\n",
|
|
|
|
|
" more_nulls = nulls[(nulls >= 3075)].index\n",
|
|
|
|
|
" more_nulls_float = []\n",
|
|
|
|
|
" for i in range(len(more_nulls)):\n",
|
|
|
|
|
" if application_train_raw[more_nulls[i]].dtype!='0':\n",
|
|
|
|
|
" more_nulls_float.append(more_nulls[i])\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" application_train_raw[more_nulls_float] = application_train_raw[more_nulls_float].fillna(application_train_raw[more_nulls_float].mean())\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" # 特征编码\n",
|
|
|
|
|
" application_train_raw.drop(columns=dummy_drop, axis=1, inplace=True)\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" all_cols = application_train_raw.columns\n",
|
|
|
|
|
" cat_cols = []\n",
|
|
|
|
|
" for col in all_cols:\n",
|
|
|
|
|
" if application_train_raw[col].dtype!='0':\n",
|
|
|
|
|
" more_nulls_float.append(col)\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" application_train_raw = pd.get_dummies(application_train_raw,columns=cat_cols, axis=1, dummy_na=True)\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" return application_train_raw\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def import_and_create_train_test_data(test_size=0.33, random_state=42):\n",
|
|
|
|
|
" application_raw = pd.read_csv('application_train.csv') \n",
|
|
|
|
|
" application = get_nonull_dummy_data(application_raw, dummy_drop=['ORGANIZATION_TYPE'])\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" X = application.drop(['TARGET'],axis=1)\n",
|
|
|
|
|
" y = application['TARGET']\n",
|
|
|
|
|
" X_train, X_test,y_train,y_test = train_test_split(X, y, test_size=test_size,random_state=random_state)\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" train_users = X_train[['SK_ID_CURR']]\n",
|
|
|
|
|
" train_users['TARGET'] = y_train\n",
|
|
|
|
|
" test_users = X_test[['SK_ID_CURR']]\n",
|
|
|
|
|
" test_users['TARGET'] = y_test\n",
|
|
|
|
|
" train_users.reset_index(drop=True, inplace=True)\n",
|
|
|
|
|
" test_users.reset_index(drop=True, inplace=True)\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" return(X_train, X_test,y_train,y_test,train_users,test_users)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def import_and_create_TEST_data():\n",
|
|
|
|
|
" application_raw = pd.read_csv('test_data.csv') \n",
|
|
|
|
|
" application = get_nonull_dummy_data(application_raw, dummy_drop=['ORGANIZATION_TYPE'])\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" X = application\n",
|
|
|
|
|
" users = X[['SK_ID_CURR']]\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" "
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
"display_name": "Python 3",
|
|
|
|
|
"language": "python",
|
|
|
|
|
"name": "python3"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
"version": 3
|
|
|
|
|
},
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
"name": "python",
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
"version": "3.7.3"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 2
|
|
|
|
|
}
|