diff --git a/2-Working-With-Data/08-data-preparation/notebook.ipynb b/2-Working-With-Data/08-data-preparation/notebook.ipynb index 294f278c..ceff57fe 100644 --- a/2-Working-With-Data/08-data-preparation/notebook.ipynb +++ b/2-Working-With-Data/08-data-preparation/notebook.ipynb @@ -3919,8 +3919,11 @@ "except ImportError:\n", " print(\"scipy is required for Z-score calculation. Please install it with 'pip install scipy' and rerun this cell.\")\n", "else:\n", - " # Calculate Z-scores for age\n", - " dirty_data['age_zscore'] = np.abs(stats.zscore(dirty_data['age']))\n", + " # Calculate Z-scores for age, handling NaN values\n", + " age_nonan = dirty_data['age'].dropna()\n", + " zscores = np.abs(stats.zscore(age_nonan))\n", + " dirty_data['age_zscore'] = np.nan\n", + " dirty_data.loc[age_nonan.index, 'age_zscore'] = zscores\n", "\n", " # Typically, Z-score > 3 indicates an outlier\n", " print(\"Rows with age Z-score > 3:\")\n",