5 years ago · 246e7cd5e9
parent 6aefff0af3
commit 246e7cd5e9
7 changed files with 387 additions and 82 deletions
--- a/Linux_man_cn/nohup.md
+++ b/Linux_man_cn/nohup.md
@ -1,26 +1,25 @@
-# **nohup**
+# **nohup**
-
+
-## 说明
+## 说明
-
+
-**nohup命令** 可以将程序以忽略挂起信号的方式运行起来，被运行的程序的输出信息将不会显示到终端
+**nohup命令** 可以将程序以忽略挂起信号的方式运行起来，被运行的程序的输出信息将不会显示到终端
-
+
-无论是否将 nohup 命令的输出重定向到终端，输出都将附加到当前目录的 nohup.out 文件中。如果当前目录的 nohup.out 文件不可写，输出重定向
+无论是否将 nohup 命令的输出重定向到终端，输出都将附加到当前目录的 nohup.out 文件中。如果当前目录的 nohup.out 文件不可写，输出重定向
-到`$HOME/nohup.out`文件中。如果没有文件能创建或打开以用于追加，那么 command 参数指定的命令不可调用。如果标准错误是一个终端，那么把
+到`$HOME/nohup.out`文件中。如果没有文件能创建或打开以用于追加，那么 command 参数指定的命令不可调用。如果标准错误是一个终端，那么把
-指定的命令写给标准错误的所有输出作为标准输出重定向到相同的文件描述符
+指定的命令写给标准错误的所有输出作为标准输出重定向到相同的文件描述符
-
+
-## 选项
+## 选项
-
+
-```markdown
+```markdown
-If standard input is a terminal, redirect it from /dev/null
+If standard input is a terminal, redirect it from /dev/null
-If standard output is a terminal, append output to 'nohup.out' if possible,'$HOME/nohup.out' otherwise
+If standard output is a terminal, append output to 'nohup.out' if possible,'$HOME/nohup.out' otherwise
-If standard error is a terminal, redirect it to standard output
+If standard error is a terminal, redirect it to standard output
-To save output to FILE, use 'nohup COMMAND > FILE
+To save output to FILE, use 'nohup COMMAND > FILE
-```
+```
-
+
-## 实例
+## 实例
-
+
-```bash
+```bash
-nohup command > myout.file 2>&1 &
+nohup command > myout.file 2>&1 &
-
+
-```
+```
--- a/Py3Scripts/NumberToChinese.py
+++ b/Py3Scripts/NumberToChinese.py
@ -1,44 +1,45 @@
-def digital_to_chinese(digital):
+def digital_to_chinese(digital):
-    str_digital = str(digital)
+    str_digital = str(digital)
-    chinese = {'1': '壹', '2': '贰', '3': '叁', '4': '肆', '5': '伍', '6': '陆', '7': '柒', '8': '捌', '9': '玖', '0': '零'}
+    chinese = {'1': '壹', '2': '贰', '3': '叁', '4': '肆', '5': '伍', '6': '陆', '7': '柒', '8': '捌', '9': '玖', '0': '零'}
-    chinese2 = ['拾', '佰', '仟', '万', '厘', '分', '角']
+    chinese2 = ['拾', '佰', '仟', '万', '厘', '分', '角']
-    jiao = ''
+    jiao = ''
-    bs = str_digital.split('.')
+    bs = str_digital.split('.')
-    yuan = bs[0]
+    yuan = bs[0]
-    if len(bs) > 1:
+    if len(bs) > 1:
-        jiao = bs[1]
+        jiao = bs[1]
-    r_yuan = [i for i in reversed(yuan)]
+    r_yuan = [i for i in reversed(yuan)]
-    count = 0
+    count = 0
-    for i in range(len(yuan)):
+    for i in range(len(yuan)):
-        if i == 0:
+        if i == 0:
-            r_yuan[i] += '圆'
+            r_yuan[i] += '圆'
-            continue
+            continue
-        r_yuan[i] += chinese2[count]
+        r_yuan[i] += chinese2[count]
-        count += 1
+        count += 1
-        if count == 4:
+        if count == 4:
-            count = 0
+            count = 0
-            chinese2[3] = '亿'
+            chinese2[3] = '亿'
-
+
-    s_jiao = [i for i in jiao][:3]  # 去掉小于厘之后的
+    s_jiao = [i for i in jiao][:3]  # 去掉小于厘之后的
-
+
-    j_count = -1
+    j_count = -1
-    for i in range(len(s_jiao)):
+    for i in range(len(s_jiao)):
-        s_jiao[i] += chinese2[j_count]
+        s_jiao[i] += chinese2[j_count]
-        j_count -= 1
+        j_count -= 1
-    last = [i for i in reversed(r_yuan)] + s_jiao
+    last = [i for i in reversed(r_yuan)] + s_jiao
-
+
-    last_str = ''.join(last)
+    last_str = ''.join(last)
-    print(str_digital)
+    print(str_digital)
-    print(last_str)
+    print(last_str)
-    for i in range(len(last_str)):
+    for i in range(len(last_str)):
-        digital = last_str[i]
+        digital = last_str[i]
-        if digital in chinese:
+        if digital in chinese:
-            last_str = last_str.replace(digital, chinese[digital])
+            last_str = last_str.replace(digital, chinese[digital])
-    print(last_str)
+    print(last_str)
-    return last_str
+    return last_str
-
+
-
+
-number = float(input("输入需要转换的数字："))
+# number = float(input("输入需要转换的数字："))
-
+number = float(4650)
-if __name__ == '__main__':
+
-    digital_to_chinese(number)
+if __name__ == '__main__':
    digital_to_chinese(number)
--- a/Py3Scripts/py_extract_data.py
+++ b/Py3Scripts/py_extract_data.py
@ -0,0 +1,117 @@
 import re
 import csv
 import urllib.request
 from urllib.request import urlopen, Request
 from bs4 import BeautifulSoup
 import xlrd
 import time
 dots = []
 def read_excel_file():
    loc = "dots.xls"
    wb = xlrd.open_workbook(loc)
    sheet = wb.sheet_by_index(0)
    sheet.cell_value(0, 0)
    for i in range(1, 5):
        dot = str(sheet.cell_value(i, 0)).replace(".0", "")
        dots.append(dot)
 def crawl_data(url):
    req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
    html = urlopen(req).read()
    bs = BeautifulSoup(html, "html.parser")
    bold_texts = bs.find_all("b")
    for b in bold_texts:
        try:
            date = (
                re.search(
                    "The information below reflects the content of the FMCSA management information systems as of(.*).",
                    b.get_text(strip=True, separator="  "),
                )
                .group(1)
                .strip()
            )
            if len(date) > 11:
                date = date.split(".", 1)[0]
            print(date)
        except AttributeError:
            pass
    information = bs.find("center").get_text(strip=True, separator="  ")
    operating = re.search("Operating Status:(.*)Out", information).group(1).strip()
    legal_name = re.search("Legal Name:(.*)DBA", information).group(1).strip()
    physical_address = (
        re.search("Physical Address:(.*)Phone", information).group(1).strip()
    )
    mailing_address = (
        re.search("Mailing Address:(.*)USDOT", information).group(1).strip()
    )
    usdot_address = (
        re.search("USDOT Number:(.*)State Carrier ID Number", information)
        .group(1)
        .strip()
    )
    power_units = re.search("Power Units:(.*)Drivers", information).group(1).strip()
    drivers = re.search("Drivers:(.*)MCS-150 Form Date", information).group(1).strip()
    write_csv(
        date,
        operating,
        legal_name,
        physical_address,
        mailing_address,
        usdot_address,
        power_units,
        drivers,
    )
 def write_csv(
    date,
    operating,
    legal_name,
    physical_address,
    mailing_address,
    usdot_address,
    power_units,
    drivers,
 ):
    with open(
        usdot_address + ".csv", mode="w", newline="", encoding="utf-8"
    ) as csv_file:
        fieldnames = [
            "Date",
            "Operating Status",
            "Legal_Name",
            "Physical Address",
            "Mailing Address",
            "Power Units",
            "Drivers",
        ]
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerow(
            {
                "Date": date,
                "Operating Status": operating,
                "Legal_Name": legal_name,
                "Physical Address": physical_address,
                "Mailing Address": mailing_address,
                "Power Units": power_units,
                "Drivers": drivers,
            }
        )
 read_excel_file()
 print(dots)
 for dot in dots:
    crawl_data(
        "https://safer.fmcsa.dot.gov/query.asp?searchtype=ANY&query_type=queryCarrierSnapshot&query_param=USDOT&query_string="
        + dot
    )
    time.sleep(5)
--- a/Py3Scripts/py_multithread.py
+++ b/Py3Scripts/py_multithread.py
@ -0,0 +1,55 @@
 # Python多线程例子来示例加线程锁
 # 1。使用线程定义一个子类。线程类
 # 2。实例化子类并触发线程
 # 3。在线程的运行方法中实现锁
 import threading
 import datetime
 exitFlag = 0
 class myThread(threading.Thread):
    def __init__(self, name, counter):
        threading.Thread.__init__(self)
        self.threadID = counter
        self.name = name
        self.counter = counter
    def run(self):
        print("\n开始 " + self.name)
        # Acquire lock to synchronize thread
        threadLock.acquire()
        print_date(self.name, self.counter)
        # Release lock for the next thread
        threadLock.release()
        print("退出 " + self.name)
 def print_date(threadName, counter):
    datefields = []
    today = datetime.date.today()
    datefields.append(today)
    print("{}[{}]: {}".format(threadName, counter, datefields[0]))
 threadLock = threading.Lock()
 threads = []
 # Create new threads
 thread1 = myThread("线程", 1)
 thread2 = myThread("线程", 2)
 # Start new Threads
 thread1.start()
 thread2.start()
 # Add threads to thread list
 threads.append(thread1)
 threads.append(thread2)
 # Wait for all threads to complete
 for thread in threads:
    thread.join()
 print("\n退出程序!!!")
--- a/Py3Scripts/pymongo_example.py
+++ b/Py3Scripts/pymongo_example.py
@ -39,6 +39,10 @@ collection = db.arc_AdminConf
 # 以product_id升序创建索引
 # create_index = collection.create_index([('product_id', pymongo.ASCENDING)], unique=True)
 # 打印集合索引
 for index in collection.list_indexes():
    pprint(index)
 # 打印集合索引信息
 # pprint(sorted(list(collection.index_information())))
--- a/Py3Scripts/selenium_examples.py
+++ b/Py3Scripts/selenium_examples.py
@ -171,7 +171,7 @@ WebDriver都将延迟 driver.get() 的响应或 driver.navigate().to() 的调用
 # normal默认加载策略
 # WebDriver等待整个页面的加载，设置为normal时，WebDriver保持等待直到返回load事件
 options = Options()
-options.page_load_strategy = 'normal'
+options.page_load_strategy = "normal"
 browser = webdriver.Chrome(options=options)
 browser.get("xxx.com")
 browser.quit()
@ -179,7 +179,7 @@ browser.quit()
 # eager加载策略
 # WebDriver保持等待并直到完全加载并解析了html文件，忽略css样式表、图片和subframes的加载
 # 设置为eager时，保持等待直到返回DOMContentLoaded事件
-options.page_load_strategy = 'eager'
+options.page_load_strategy = "eager"
 # none加载策略
 # WebDriver仅等待至初始页面下载完成
@ -200,7 +200,7 @@ search_box = search_src.find_element_by_name("q")
 search_box.send_keys("searchconent")
 # 从父元素的上下文查找匹配子webelement的列表
-element = browser.find_element_by_tag_name('div')
+element = browser.find_element_by_tag_name("div")
 sub_elements = element.find_element_by_tag_name("p")
 for ele in sub_elements:
    print(ele.text)
@ -252,25 +252,27 @@ webdriver.ActionChains(browser).move_to_element(gmailLink).perform()
 xOffset = 100
 yOffset = 100
 # 将鼠标移动到指定坐标位置，可移到窗口之外
-webdriver.ActionChains(browser).move_by_offset(xOffset,yOffset).perform()
+webdriver.ActionChains(browser).move_by_offset(xOffset, yOffset).perform()
 # 在一个元素点击并按住，然后移到另一个元素
 sourceEle = driver.find_element_by_id("draggable")
-targetEle  = driver.find_element_by_id("droppable")
+targetEle = driver.find_element_by_id("droppable")
 # 鼠标从sourceEle移动到targetEle元素
-webdriver.ActionChains(browser).drag_and_drop(sourceEle,targetEle).perform()
+webdriver.ActionChains(browser).drag_and_drop(sourceEle, targetEle).perform()
 # 在一个元素点击并按住，然后移动一定的偏移量
 targetEleXOffset = targetEle.location.get("x")
 targetEleYOffset = targetEle.location.get("y")
-webdriver.ActionChains(browser).drag_and_drop_by_offset(sourceEle, targetEleXOffset, targetEleYOffset).perform()
+webdriver.ActionChains(browser).drag_and_drop_by_offset(
    sourceEle, targetEleXOffset, targetEleYOffset
 ).perform()
 # 释放按下的鼠标左键，如果webelement移动了，将自动释放在给定webelement上按下的鼠标左键
 webdriver.ActionChains(browser).release().perform()
 # 添加cookies
 # 常用于将cookie添加到当前访问的上下文中. 添加Cookie仅接受一组已定义的可序列化JSON对象
-browser.get('xx.com')
+browser.get("xx.com")
 browser.add_cookie({"name": "key", "value": "value"})
 # 获取cookie
@ -285,9 +287,9 @@ browser.delete_all_cookies()
 # Lax,将Cookie sameSite属性设置为Lax时，该Cookie将与第三方网站发起的GET请求一起发送
 # 目前此功能已嵌入chrome80+,适用于selenium4+
-driver.add_cookie({"name": "foo", "value": "value", 'sameSite': 'Strict'})
+driver.add_cookie({"name": "foo", "value": "value", "sameSite": "Strict"})
-driver.add_cookie({"name": "foo1", "value": "value", 'sameSite': 'Lax'})
+driver.add_cookie({"name": "foo1", "value": "value", "sameSite": "Lax"})
-cookie1 = driver.get_cookie('foo')
+cookie1 = driver.get_cookie("foo")
-cookie2 = driver.get_cookie('foo1')
+cookie2 = driver.get_cookie("foo1")
 print(cookie1)
 print(cookie2)
--- a/Py3Scripts/selenium_upwork_data.py
+++ b/Py3Scripts/selenium_upwork_data.py
@ -0,0 +1,127 @@
 import time
 import datetime
 import re
 import xlwt
 from xlwt import Workbook
 from selenium import webdriver
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.ui import WebDriverWait
 class Bolagsverket:
    def __init__(self):
        self.bot = webdriver.Firefox(
            executable_path="E:/geckodriver"
        )
    def navigate_and_crawl(self):
        bot = self.bot
        bot.get("https://poit.bolagsverket.se/poit/PublikPoitIn.do")
        time.sleep(5)
        bot.find_element_by_id("nav1-2").click()
        time.sleep(5)
        bot.find_element_by_tag_name("form").find_element_by_tag_name("a").click()
        time.sleep(5)
        search_form = bot.find_element_by_tag_name("form")
        search_form.find_element_by_xpath(
            "//select[@id='tidsperiod']/option[text()='Annan period']"
        ).click()
        wait = WebDriverWait(bot, 10)
        input_from = wait.until(
            EC.element_to_be_clickable((By.XPATH, "//input[@id='from']"))
        )
        input_from.send_keys("2019-09-23")
        # input_from.send_keys(str(datetime.date.today()-datetime.timedelta(1)))
        input_to = wait.until(
            EC.element_to_be_clickable((By.XPATH, "//input[@id='tom']"))
        )
        input_to.send_keys("2019-09-24")
        # input_to.send_keys(str(datetime.date.today()))
        time.sleep(5)
        amnesomrade = wait.until(
            EC.element_to_be_clickable((By.XPATH, "//select[@id='amnesomrade']"))
        )
        amnesomrade.find_element_by_xpath(
            "//select[@id='amnesomrade']/option[text()='Bolagsverkets registreringar']"
        ).click()
        time.sleep(5)
        kungorelserubrik = wait.until(
            EC.element_to_be_clickable((By.XPATH, "//select[@id='kungorelserubrik']"))
        )
        kungorelserubrik.find_element_by_xpath(
            "//select[@id='kungorelserubrik']/option[text()='Aktiebolagsregistret']"
        ).click()
        time.sleep(5)
        underrubrik = wait.until(
            EC.element_to_be_clickable((By.XPATH, "//select[@id='underrubrik']"))
        )
        underrubrik.find_element_by_xpath(
            "//select[@id='underrubrik']/option[text()='Nyregistreringar']"
        ).click()
        # Search Button
        button_sok = wait.until(
            EC.element_to_be_clickable((By.XPATH, "//input[@id='SokKungorelse']"))
        )
        button_sok.click()
        time.sleep(5)
        number_of_pages = bot.find_element_by_xpath(
            "//div[@class='gotopagediv']/em[@class='gotopagebuttons']"
        ).text.split("av", 1)[1]
        number_of_pages.strip().replace(" ", "")
        number_of_results = bot.find_elements_by_xpath("//table/tbody/tr")
        wb = Workbook()
        for page in range(int(number_of_pages)):
            sheet = wb.add_sheet("Sheet" + str(page))
            style = xlwt.easyxf("font: bold 1")
            sheet.write(0, 0, "Post Address", style)
            sheet.write(0, 1, "Bildat", style)
            sheet.write(0, 2, "Foretagsnamn", style)
            sheet.write(0, 3, "Email", style)
            for i in range(len(number_of_results)):
                result = bot.find_elements_by_xpath("//table/tbody/tr")[i]
                link = result.find_element_by_tag_name("a")
                bot.execute_script("arguments[0].click();", link)
                time.sleep(5)
                information = [bot.find_element_by_class_name("kungtext").text]
                try:
                    postaddress = re.search("Postadress:(.*),", information[0])
                    sheet.write(i + 1, 0, str(postaddress.group(1)))
                    bildat = re.search("Bildat:(.*)\n", information[0])
                    sheet.write(i + 1, 1, str(bildat.group(1)))
                    foretagsnamn = re.search("Företagsnamn:(.*)\n", information[0])
                    sheet.write(i + 1, 2, str(foretagsnamn.group(1)))
                    email = re.search("E-post:(.*)\n", information[0])
                    sheet.write(i + 1, 3, str(email.group(1)))
                    print(
                        postaddress.group(1),
                        bildat.group(1),
                        foretagsnamn.group(1),
                        email.group(1),
                    )
                except AttributeError as e:
                    print("Email is null")
                    sheet.write(i + 1, 3, "null")
                    pass
                bot.back()
                time.sleep(5)
                wb.save("emails.xls")
            print("Going to next page ...")
            button_next = wait.until(
                EC.element_to_be_clickable((By.XPATH, "//input/[@id='movenextTop']"))
            )
            button_next.click()
            time.sleep(5)
 bot = Bolagsverket()
 bot.navigate_and_crawl()