|
|
|
@ -1903,15 +1903,42 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"execution_count": 9,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"Your selected dataframe has 60 columns.\n",
|
|
|
|
|
"There are 46 columns that have missing values.\n",
|
|
|
|
|
"We will remove 11 columns.\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"# Get the columns with > 50% m\n",
|
|
|
|
|
"# Get the columns with > 50% missing\n",
|
|
|
|
|
"missing_df = missing_values_table(data);\n",
|
|
|
|
|
"missing_columns = list(missing_df[missing_df['% of Total Values']> 50].index)\n",
|
|
|
|
|
"print('We will remove %d columns.'% len(missing_columns))"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 10,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"# Drop the columns\n",
|
|
|
|
|
"data = data.drop(columns = list(missing_columns))"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|