diff --git a/2-Working-With-Data/08-data-preparation/notebook.ipynb b/2-Working-With-Data/08-data-preparation/notebook.ipynb index 1023b7f3..fe10a4f6 100644 --- a/2-Working-With-Data/08-data-preparation/notebook.ipynb +++ b/2-Working-With-Data/08-data-preparation/notebook.ipynb @@ -3815,24 +3815,29 @@ "metadata": {}, "outputs": [], "source": [ - "# rapidfuzz was already imported in an earlier cell\n", - "from rapidfuzz import process, fuzz\n", - "\n", - "\n", + "try:\n", + " from rapidfuzz import process, fuzz\n", + "except ImportError:\n", + " print(\"rapidfuzz is not installed. Please install it with 'pip install rapidfuzz' to use fuzzy matching.\")\n", + " process = None\n", + " fuzz = None\n", "\n", "# Get unique countries\n", "unique_countries = dirty_data['country'].unique()\n", "\n", "# For each country, find similar matches\n", - "print(\"Finding similar country names (similarity > 70%):\")\n", - "for country in unique_countries:\n", - " matches = process.extract(country, unique_countries, scorer=fuzz.ratio, limit=3)\n", - " # Filter matches with similarity > 70 and not identical\n", - " similar = [m for m in matches if m[1] > 70 and m[0] != country]\n", - " if similar:\n", - " print(f\"\\n'{country}' is similar to:\")\n", - " for match, score, _ in similar:\n", - " print(f\" - '{match}' (similarity: {score}%)\")" + "if process is not None and fuzz is not None:\n", + " print(\"Finding similar country names (similarity > 70%):\")\n", + " for country in unique_countries:\n", + " matches = process.extract(country, unique_countries, scorer=fuzz.ratio, limit=3)\n", + " # Filter matches with similarity > 70 and not identical\n", + " similar = [m for m in matches if m[1] > 70 and m[0] != country]\n", + " if similar:\n", + " print(f\"\\n'{country}' is similar to:\")\n", + " for match, score, _ in similar:\n", + " print(f\" - '{match}' (similarity: {score}%)\")\n", + "else:\n", + " print(\"Skipping fuzzy matching because rapidfuzz is not available.\")" ] }, {