|
|
|
|
@ -3919,8 +3919,11 @@
|
|
|
|
|
"except ImportError:\n",
|
|
|
|
|
" print(\"scipy is required for Z-score calculation. Please install it with 'pip install scipy' and rerun this cell.\")\n",
|
|
|
|
|
"else:\n",
|
|
|
|
|
" # Calculate Z-scores for age\n",
|
|
|
|
|
" dirty_data['age_zscore'] = np.abs(stats.zscore(dirty_data['age']))\n",
|
|
|
|
|
" # Calculate Z-scores for age, handling NaN values\n",
|
|
|
|
|
" age_nonan = dirty_data['age'].dropna()\n",
|
|
|
|
|
" zscores = np.abs(stats.zscore(age_nonan))\n",
|
|
|
|
|
" dirty_data['age_zscore'] = np.nan\n",
|
|
|
|
|
" dirty_data.loc[age_nonan.index, 'age_zscore'] = zscores\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" # Typically, Z-score > 3 indicates an outlier\n",
|
|
|
|
|
" print(\"Rows with age Z-score > 3:\")\n",
|
|
|
|
|
|