mirror of https://github.com/Black-Gold/Learn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
128 lines
4.8 KiB
128 lines
4.8 KiB
import time
|
|
import datetime
|
|
import re
|
|
import xlwt
|
|
from xlwt import Workbook
|
|
from selenium import webdriver
|
|
from selenium.webdriver.common.keys import Keys
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
|
|
|
|
class Bolagsverket:
|
|
def __init__(self):
|
|
self.bot = webdriver.Firefox(
|
|
executable_path="E:/geckodriver"
|
|
)
|
|
|
|
def navigate_and_crawl(self):
|
|
bot = self.bot
|
|
bot.get("https://poit.bolagsverket.se/poit/PublikPoitIn.do")
|
|
time.sleep(5)
|
|
bot.find_element_by_id("nav1-2").click()
|
|
time.sleep(5)
|
|
bot.find_element_by_tag_name("form").find_element_by_tag_name("a").click()
|
|
time.sleep(5)
|
|
|
|
search_form = bot.find_element_by_tag_name("form")
|
|
search_form.find_element_by_xpath(
|
|
"//select[@id='tidsperiod']/option[text()='Annan period']"
|
|
).click()
|
|
wait = WebDriverWait(bot, 10)
|
|
input_from = wait.until(
|
|
EC.element_to_be_clickable((By.XPATH, "//input[@id='from']"))
|
|
)
|
|
input_from.send_keys("2019-09-23")
|
|
# input_from.send_keys(str(datetime.date.today()-datetime.timedelta(1)))
|
|
input_to = wait.until(
|
|
EC.element_to_be_clickable((By.XPATH, "//input[@id='tom']"))
|
|
)
|
|
input_to.send_keys("2019-09-24")
|
|
# input_to.send_keys(str(datetime.date.today()))
|
|
time.sleep(5)
|
|
|
|
amnesomrade = wait.until(
|
|
EC.element_to_be_clickable((By.XPATH, "//select[@id='amnesomrade']"))
|
|
)
|
|
amnesomrade.find_element_by_xpath(
|
|
"//select[@id='amnesomrade']/option[text()='Bolagsverkets registreringar']"
|
|
).click()
|
|
time.sleep(5)
|
|
kungorelserubrik = wait.until(
|
|
EC.element_to_be_clickable((By.XPATH, "//select[@id='kungorelserubrik']"))
|
|
)
|
|
kungorelserubrik.find_element_by_xpath(
|
|
"//select[@id='kungorelserubrik']/option[text()='Aktiebolagsregistret']"
|
|
).click()
|
|
time.sleep(5)
|
|
underrubrik = wait.until(
|
|
EC.element_to_be_clickable((By.XPATH, "//select[@id='underrubrik']"))
|
|
)
|
|
underrubrik.find_element_by_xpath(
|
|
"//select[@id='underrubrik']/option[text()='Nyregistreringar']"
|
|
).click()
|
|
|
|
# Search Button
|
|
button_sok = wait.until(
|
|
EC.element_to_be_clickable((By.XPATH, "//input[@id='SokKungorelse']"))
|
|
)
|
|
button_sok.click()
|
|
time.sleep(5)
|
|
|
|
number_of_pages = bot.find_element_by_xpath(
|
|
"//div[@class='gotopagediv']/em[@class='gotopagebuttons']"
|
|
).text.split("av", 1)[1]
|
|
number_of_pages.strip().replace(" ", "")
|
|
|
|
number_of_results = bot.find_elements_by_xpath("//table/tbody/tr")
|
|
|
|
wb = Workbook()
|
|
for page in range(int(number_of_pages)):
|
|
sheet = wb.add_sheet("Sheet" + str(page))
|
|
style = xlwt.easyxf("font: bold 1")
|
|
sheet.write(0, 0, "Post Address", style)
|
|
sheet.write(0, 1, "Bildat", style)
|
|
sheet.write(0, 2, "Foretagsnamn", style)
|
|
sheet.write(0, 3, "Email", style)
|
|
|
|
for i in range(len(number_of_results)):
|
|
result = bot.find_elements_by_xpath("//table/tbody/tr")[i]
|
|
link = result.find_element_by_tag_name("a")
|
|
bot.execute_script("arguments[0].click();", link)
|
|
time.sleep(5)
|
|
|
|
information = [bot.find_element_by_class_name("kungtext").text]
|
|
try:
|
|
postaddress = re.search("Postadress:(.*),", information[0])
|
|
sheet.write(i + 1, 0, str(postaddress.group(1)))
|
|
bildat = re.search("Bildat:(.*)\n", information[0])
|
|
sheet.write(i + 1, 1, str(bildat.group(1)))
|
|
foretagsnamn = re.search("Företagsnamn:(.*)\n", information[0])
|
|
sheet.write(i + 1, 2, str(foretagsnamn.group(1)))
|
|
email = re.search("E-post:(.*)\n", information[0])
|
|
sheet.write(i + 1, 3, str(email.group(1)))
|
|
print(
|
|
postaddress.group(1),
|
|
bildat.group(1),
|
|
foretagsnamn.group(1),
|
|
email.group(1),
|
|
)
|
|
except AttributeError as e:
|
|
print("Email is null")
|
|
sheet.write(i + 1, 3, "null")
|
|
pass
|
|
bot.back()
|
|
time.sleep(5)
|
|
wb.save("emails.xls")
|
|
print("Going to next page ...")
|
|
button_next = wait.until(
|
|
EC.element_to_be_clickable((By.XPATH, "//input/[@id='movenextTop']"))
|
|
)
|
|
button_next.click()
|
|
time.sleep(5)
|
|
|
|
|
|
bot = Bolagsverket()
|
|
bot.navigate_and_crawl()
|