diff --git a/2-Working-With-Data/08-data-preparation/notebook.ipynb b/2-Working-With-Data/08-data-preparation/notebook.ipynb index 22c9a757..bcdf44c7 100644 --- a/2-Working-With-Data/08-data-preparation/notebook.ipynb +++ b/2-Working-With-Data/08-data-preparation/notebook.ipynb @@ -3909,18 +3909,21 @@ "metadata": {}, "outputs": [], "source": [ - "from scipy import stats\n", - "\n", - "# Calculate Z-scores for age\n", - "dirty_data['age_zscore'] = np.abs(stats.zscore(dirty_data['age']))\n", + "try:\n", + " from scipy import stats\n", + "except ImportError:\n", + " print(\"scipy is required for Z-score calculation. Please install it with 'pip install scipy' and rerun this cell.\")\n", + "else:\n", + " # Calculate Z-scores for age\n", + " dirty_data['age_zscore'] = np.abs(stats.zscore(dirty_data['age']))\n", "\n", - "# Typically, Z-score > 3 indicates an outlier\n", - "print(\"Rows with age Z-score > 3:\")\n", - "zscore_outliers = dirty_data[dirty_data['age_zscore'] > 3]\n", - "print(zscore_outliers[['customer_id', 'name', 'age', 'age_zscore']])\n", + " # Typically, Z-score > 3 indicates an outlier\n", + " print(\"Rows with age Z-score > 3:\")\n", + " zscore_outliers = dirty_data[dirty_data['age_zscore'] > 3]\n", + " print(zscore_outliers[['customer_id', 'name', 'age', 'age_zscore']])\n", "\n", - "# Clean up the temporary column\n", - "dirty_data = dirty_data.drop('age_zscore', axis=1)" + " # Clean up the temporary column\n", + " dirty_data = dirty_data.drop('age_zscore', axis=1)" ] }, {