Update 2-Working-With-Data/08-data-preparation/notebook.ipynb

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
copilot/fix-e446e3a1-6b4c-4310-87d5-641ed6823a37
Lee Stott 2 months ago committed by GitHub
parent c7982edc02
commit 95ded644fa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -3909,18 +3909,21 @@
"metadata": {},
"outputs": [],
"source": [
"from scipy import stats\n",
"\n",
"# Calculate Z-scores for age\n",
"dirty_data['age_zscore'] = np.abs(stats.zscore(dirty_data['age']))\n",
"try:\n",
" from scipy import stats\n",
"except ImportError:\n",
" print(\"scipy is required for Z-score calculation. Please install it with 'pip install scipy' and rerun this cell.\")\n",
"else:\n",
" # Calculate Z-scores for age\n",
" dirty_data['age_zscore'] = np.abs(stats.zscore(dirty_data['age']))\n",
"\n",
"# Typically, Z-score > 3 indicates an outlier\n",
"print(\"Rows with age Z-score > 3:\")\n",
"zscore_outliers = dirty_data[dirty_data['age_zscore'] > 3]\n",
"print(zscore_outliers[['customer_id', 'name', 'age', 'age_zscore']])\n",
" # Typically, Z-score > 3 indicates an outlier\n",
" print(\"Rows with age Z-score > 3:\")\n",
" zscore_outliers = dirty_data[dirty_data['age_zscore'] > 3]\n",
" print(zscore_outliers[['customer_id', 'name', 'age', 'age_zscore']])\n",
"\n",
"# Clean up the temporary column\n",
"dirty_data = dirty_data.drop('age_zscore', axis=1)"
" # Clean up the temporary column\n",
" dirty_data = dirty_data.drop('age_zscore', axis=1)"
]
},
{

Loading…
Cancel
Save