"from bs4 import BeautifulSoup\r\n\r\n# Parse the HTML content\r\nsoup = BeautifulSoup(text, 'html.parser')\r\n\r\n# Extract only the main article content from Wikipedia\r\n# Wikipedia uses 'mw-parser-output' class for the main article content\r\ncontent = soup.find('div', class_='mw-parser-output')\r\n\r\nif content:\r\n # Get text from the content, excluding navigation, references, etc.\r\n text = content.get_text(separator=' ', strip=True)\r\n print(text[:1000])\r\nelse:\r\n print(\"Could not find main content. Using full page text.\")\r\n text = soup.get_text(separator=' ', strip=True)\r\n print(text[:1000])"
"from bs4 import BeautifulSoup\r\n",
"\r\n",
"# Parse the HTML content\r\n",
"soup = BeautifulSoup(text, 'html.parser')\r\n",
"\r\n",
"# Extract only the main article content from Wikipedia\r\n",
"# Wikipedia uses 'mw-parser-output' class for the main article content\r\n",