From 8e723abc2495f04c70611c8098ab56fece11bb6f Mon Sep 17 00:00:00 2001 From: Lee Stott Date: Fri, 3 Oct 2025 14:58:12 +0100 Subject: [PATCH] Update 2-Working-With-Data/08-data-preparation/notebook.ipynb Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- 2-Working-With-Data/08-data-preparation/notebook.ipynb | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/2-Working-With-Data/08-data-preparation/notebook.ipynb b/2-Working-With-Data/08-data-preparation/notebook.ipynb index 294f278c..ceff57fe 100644 --- a/2-Working-With-Data/08-data-preparation/notebook.ipynb +++ b/2-Working-With-Data/08-data-preparation/notebook.ipynb @@ -3919,8 +3919,11 @@ "except ImportError:\n", " print(\"scipy is required for Z-score calculation. Please install it with 'pip install scipy' and rerun this cell.\")\n", "else:\n", - " # Calculate Z-scores for age\n", - " dirty_data['age_zscore'] = np.abs(stats.zscore(dirty_data['age']))\n", + " # Calculate Z-scores for age, handling NaN values\n", + " age_nonan = dirty_data['age'].dropna()\n", + " zscores = np.abs(stats.zscore(age_nonan))\n", + " dirty_data['age_zscore'] = np.nan\n", + " dirty_data.loc[age_nonan.index, 'age_zscore'] = zscores\n", "\n", " # Typically, Z-score > 3 indicates an outlier\n", " print(\"Rows with age Z-score > 3:\")\n",