From d8ec0fe1e5ebac21afe68c0563cc54ba3253e751 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 17 Feb 2026 07:58:29 +0000
Subject: [PATCH] Remove underscore prefix from function name for better
 notebook style

Co-authored-by: leestott <2511341+leestott@users.noreply.github.com>
---
 .../01-defining-data-science/notebook.ipynb   | 39 +------------------
 .../solution/notebook.ipynb                   |  2 +-
 2 files changed, 2 insertions(+), 39 deletions(-)

diff --git a/1-Introduction/01-defining-data-science/notebook.ipynb b/1-Introduction/01-defining-data-science/notebook.ipynb
index 7b564420..35a1af9b 100644
--- a/1-Introduction/01-defining-data-science/notebook.ipynb
+++ b/1-Introduction/01-defining-data-science/notebook.ipynb
@@ -91,44 +91,7 @@
    "cell_type": "code",
    "execution_count": 64,
    "source": [
-    "from bs4 import BeautifulSoup\r\n",
-    "\r\n",
-    "# Parse the HTML content\r\n",
-    "soup = BeautifulSoup(text, 'html.parser')\r\n",
-    "\r\n",
-    "# Extract only the main article content from Wikipedia\r\n",
-    "# Wikipedia uses 'mw-parser-output' class for the main article content\r\n",
-    "content = soup.find('div', class_='mw-parser-output')\r\n",
-    "\r\n",
-    "def _clean_wikipedia_content(content_node):\r\n",
-    "    \"\"\"Remove common non-article elements from a Wikipedia content node.\"\"\"\r\n",
-    "    # Strip jump links, navboxes, reference lists/superscripts, edit sections, TOC, sidebars, etc.\r\n",
-    "    selectors = [\r\n",
-    "        '.mw-jump-link',\r\n",
-    "        '.navbox',\r\n",
-    "        '.reflist',\r\n",
-    "        'sup.reference',\r\n",
-    "        '.mw-editsection',\r\n",
-    "        '.hatnote',\r\n",
-    "        '.metadata',\r\n",
-    "        '.infobox',\r\n",
-    "        '#toc',\r\n",
-    "        '.toc',\r\n",
-    "        '.sidebar',\r\n",
-    "    ]\r\n",
-    "    for selector in selectors:\r\n",
-    "        for el in content_node.select(selector):\r\n",
-    "            el.decompose()\r\n",
-    "\r\n",
-    "if content:\r\n",
-    "    # Clean the content node to better approximate article text only.\r\n",
-    "    _clean_wikipedia_content(content)\r\n",
-    "    text = content.get_text(separator=' ', strip=True)\r\n",
-    "    print(text[:1000])\r\n",
-    "else:\r\n",
-    "    print(\"Could not find main content. Using full page text.\")\r\n",
-    "    text = soup.get_text(separator=' ', strip=True)\r\n",
-    "    print(text[:1000])"
+    "from bs4 import BeautifulSoup\r\n\r\n# Parse the HTML content\r\nsoup = BeautifulSoup(text, 'html.parser')\r\n\r\n# Extract only the main article content from Wikipedia\r\n# Wikipedia uses 'mw-parser-output' class for the main article content\r\ncontent = soup.find('div', class_='mw-parser-output')\r\n\r\ndef clean_wikipedia_content(content_node):\r\n    \"\"\"Remove common non-article elements from a Wikipedia content node.\"\"\"\r\n    # Strip jump links, navboxes, reference lists/superscripts, edit sections, TOC, sidebars, etc.\r\n    selectors = [\r\n        '.mw-jump-link',\r\n        '.navbox',\r\n        '.reflist',\r\n        'sup.reference',\r\n        '.mw-editsection',\r\n        '.hatnote',\r\n        '.metadata',\r\n        '.infobox',\r\n        '#toc',\r\n        '.toc',\r\n        '.sidebar',\r\n    ]\r\n    for selector in selectors:\r\n        for el in content_node.select(selector):\r\n            el.decompose()\r\n\r\nif content:\r\n    # Clean the content node to better approximate article text only.\r\n    clean_wikipedia_content(content)\r\n    text = content.get_text(separator=' ', strip=True)\r\n    print(text[:1000])\r\nelse:\r\n    print(\"Could not find main content. Using full page text.\")\r\n    text = soup.get_text(separator=' ', strip=True)\r\n    print(text[:1000])"
    ],
    "outputs": [
     {
diff --git a/1-Introduction/01-defining-data-science/solution/notebook.ipynb b/1-Introduction/01-defining-data-science/solution/notebook.ipynb
index 92e616b5..75e45a91 100644
--- a/1-Introduction/01-defining-data-science/solution/notebook.ipynb
+++ b/1-Introduction/01-defining-data-science/solution/notebook.ipynb
@@ -94,7 +94,7 @@
    "cell_type": "code",
    "execution_count": 4,
    "source": [
-    "from bs4 import BeautifulSoup\r\n\r\n# Parse the HTML content\r\nsoup = BeautifulSoup(text, 'html.parser')\r\n\r\n# Extract only the main article content from Wikipedia\r\n# Wikipedia uses 'mw-parser-output' class for the main article content\r\ncontent = soup.find('div', class_='mw-parser-output')\r\n\r\ndef _clean_wikipedia_content(content_node):\r\n    \"\"\"Remove common non-article elements from a Wikipedia content node.\"\"\"\r\n    # Strip jump links, navboxes, reference lists/superscripts, edit sections, TOC, sidebars, etc.\r\n    selectors = [\r\n        '.mw-jump-link',\r\n        '.navbox',\r\n        '.reflist',\r\n        'sup.reference',\r\n        '.mw-editsection',\r\n        '.hatnote',\r\n        '.metadata',\r\n        '.infobox',\r\n        '#toc',\r\n        '.toc',\r\n        '.sidebar',\r\n    ]\r\n    for selector in selectors:\r\n        for el in content_node.select(selector):\r\n            el.decompose()\r\n\r\nif content:\r\n    # Clean the content node to better approximate article text only.\r\n    _clean_wikipedia_content(content)\r\n    text = content.get_text(separator=' ', strip=True)\r\n    print(text[:1000])\r\nelse:\r\n    print(\"Could not find main content. Using full page text.\")\r\n    text = soup.get_text(separator=' ', strip=True)\r\n    print(text[:1000])"
+    "from bs4 import BeautifulSoup\r\n\r\n# Parse the HTML content\r\nsoup = BeautifulSoup(text, 'html.parser')\r\n\r\n# Extract only the main article content from Wikipedia\r\n# Wikipedia uses 'mw-parser-output' class for the main article content\r\ncontent = soup.find('div', class_='mw-parser-output')\r\n\r\ndef clean_wikipedia_content(content_node):\r\n    \"\"\"Remove common non-article elements from a Wikipedia content node.\"\"\"\r\n    # Strip jump links, navboxes, reference lists/superscripts, edit sections, TOC, sidebars, etc.\r\n    selectors = [\r\n        '.mw-jump-link',\r\n        '.navbox',\r\n        '.reflist',\r\n        'sup.reference',\r\n        '.mw-editsection',\r\n        '.hatnote',\r\n        '.metadata',\r\n        '.infobox',\r\n        '#toc',\r\n        '.toc',\r\n        '.sidebar',\r\n    ]\r\n    for selector in selectors:\r\n        for el in content_node.select(selector):\r\n            el.decompose()\r\n\r\nif content:\r\n    # Clean the content node to better approximate article text only.\r\n    clean_wikipedia_content(content)\r\n    text = content.get_text(separator=' ', strip=True)\r\n    print(text[:1000])\r\nelse:\r\n    print(\"Could not find main content. Using full page text.\")\r\n    text = soup.get_text(separator=' ', strip=True)\r\n    print(text[:1000])"
    ],
    "outputs": [
     {