From 95ded644faba6d4bd979b0ab8646b32cb303f423 Mon Sep 17 00:00:00 2001 From: Lee Stott Date: Fri, 3 Oct 2025 14:48:08 +0100 Subject: [PATCH] Update 2-Working-With-Data/08-data-preparation/notebook.ipynb Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../08-data-preparation/notebook.ipynb | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/2-Working-With-Data/08-data-preparation/notebook.ipynb b/2-Working-With-Data/08-data-preparation/notebook.ipynb index 22c9a757..bcdf44c7 100644 --- a/2-Working-With-Data/08-data-preparation/notebook.ipynb +++ b/2-Working-With-Data/08-data-preparation/notebook.ipynb @@ -3909,18 +3909,21 @@ "metadata": {}, "outputs": [], "source": [ - "from scipy import stats\n", - "\n", - "# Calculate Z-scores for age\n", - "dirty_data['age_zscore'] = np.abs(stats.zscore(dirty_data['age']))\n", + "try:\n", + " from scipy import stats\n", + "except ImportError:\n", + " print(\"scipy is required for Z-score calculation. Please install it with 'pip install scipy' and rerun this cell.\")\n", + "else:\n", + " # Calculate Z-scores for age\n", + " dirty_data['age_zscore'] = np.abs(stats.zscore(dirty_data['age']))\n", "\n", - "# Typically, Z-score > 3 indicates an outlier\n", - "print(\"Rows with age Z-score > 3:\")\n", - "zscore_outliers = dirty_data[dirty_data['age_zscore'] > 3]\n", - "print(zscore_outliers[['customer_id', 'name', 'age', 'age_zscore']])\n", + " # Typically, Z-score > 3 indicates an outlier\n", + " print(\"Rows with age Z-score > 3:\")\n", + " zscore_outliers = dirty_data[dirty_data['age_zscore'] > 3]\n", + " print(zscore_outliers[['customer_id', 'name', 'age', 'age_zscore']])\n", "\n", - "# Clean up the temporary column\n", - "dirty_data = dirty_data.drop('age_zscore', axis=1)" + " # Clean up the temporary column\n", + " dirty_data = dirty_data.drop('age_zscore', axis=1)" ] }, {