add pypinyin tools

4 years ago · ab7aa43d15
parent e94da615a3
commit ab7aa43d15
141 changed files with 1252393 additions and 0 deletions
--- a/third_party/python-pinyin/.bumpversion.cfg
+++ b/third_party/python-pinyin/.bumpversion.cfg
@ -0,0 +1,6 @@
 [bumpversion]
 commit = True
 tag = True
 current_version = 0.41.0
 [bumpversion:file:pypinyin/__init__.py]
--- a/third_party/python-pinyin/.circleci/config.yml
+++ b/third_party/python-pinyin/.circleci/config.yml
@ -0,0 +1,153 @@
 # Python CircleCI 2.0 configuration file
 #
 # Check https://circleci.com/docs/2.0/language-python/ for more details
 #
 version: 2
 jobs:
  python3.8: &DEFAULT
    docker:
      - image: circleci/python:3.8
    environment:
      TOX_ENV: py38
      RUN_CHECK: 1
    working_directory: ~/repo
    steps:
      - checkout
      # Download and cache dependencies
 #      - restore_cache:
 #          keys:
 #          - v1-dependencies-{{ .Environment.TOX_ENV }}-{{ checksum "requirements_dev.txt" }}
      - run:
          name: install dependencies
          command: |
            # pip install -U pip virtualenv --user
            if ! which virtualenv; then
              pip install 'virtualenv<=20.0.21' --user
            fi
            export PATH="~/.local/bin:$PATH"
            virtualenv venv
            . venv/bin/activate
            pip install codecov
            pip install tox
            if [[ $RUN_CHECK == 1 ]]; then
              pip install -U -r requirements_dev.txt
            fi
            if [[ $(python -c "import sys; print(sys.stdin.encoding)" |grep None) ]]; then
              export PYTHONIOENCODING=utf-8
            fi
 #
 #      - save_cache:
 #          paths:
 #            - ./venv
 #          key: v1-dependencies-{{ .Environment.TOX_ENV }}-{{ checksum "requirements_dev.txt" }}
      - run:
          name: run tests
          command: |
            . venv/bin/activate
            if [[ $RUN_CHECK == 1 ]]; then
              pre-commit run --all-files
              mypy pypinyin
            fi
            tox -e $TOX_ENV
            python setup.py install
            pypinyin hello
            echo hello | pypinyin
            pypinyin < setup.cfg
            codecov
      - store_artifacts:
          path: test-reports
          destination: test-reports
  python3.9:
    <<: *DEFAULT
    docker:
      - image: circleci/python:3.9
    environment:
      TOX_ENV: py39
  python3.7:
    <<: *DEFAULT
    docker:
      - image: circleci/python:3.7
    environment:
      TOX_ENV: py37
  python3.6:
    <<: *DEFAULT
    docker:
      - image: circleci/python:3.6
    environment:
      TOX_ENV: py36
  python3.5:
    <<: *DEFAULT
    docker:
      - image: circleci/python:3.5
    environment:
      TOX_ENV: py35
  python3.4:
    <<: *DEFAULT
    docker:
      - image: circleci/python:3.4
    environment:
      TOX_ENV: py34
  python2.7:
    <<: *DEFAULT
    docker:
      - image: circleci/python:2.7
    environment:
      TOX_ENV: py27
  # python2.6:
  #   <<: *DEFAULT
  #   docker:
  #     - image: python:2.6
  #   environment:
  #     TOX_ENV: py33
  pypy2:
    <<: *DEFAULT
    docker:
      - image: pypy:2
    environment:
      TOX_ENV: pypy
  pypy3:
    <<: *DEFAULT
    docker:
      - image: pypy:3
    environment:
      TOX_ENV: pypy3
 workflows:
  version: 2
  testing:
    jobs:
      - python3.9
      - python3.8
      - python3.7
      - python3.6
      - python3.5
      - python3.4
      - python2.7
      - pypy2
      - pypy3
--- a/third_party/python-pinyin/.coveragerc
+++ b/third_party/python-pinyin/.coveragerc
@ -0,0 +1,15 @@
 [run]
 branch = True
 omit =
    # pypinyin/runner.py
    pypinyin/__main__.py
 [report]
 exclude_lines =
    pragma: no cover
    except NameError
    except ImportError
    pass
    def main
    if py3:
    if __name__ == .__main__.:
--- a/third_party/python-pinyin/.editorconfig
+++ b/third_party/python-pinyin/.editorconfig
@ -0,0 +1,25 @@
 # EditorConfig is awesome: http://EditorConfig.org
 # top-most EditorConfig file
 root = true
 # Unix-style newlines with a newline ending every file
 [*]
 charset = utf-8
 end_of_line = lf
 insert_final_newline = true
 trim_trailing_whitespace = true
 # Indentiation
 [*.{py,rst}]
 indent_style = space
 indent_size = 4
 [Makefile]
 indent_style = tab
 indent_size = 4
 [*.{ini,yml}]
 indent_style = space
 indent_size = 2
 [*.md]
 trim_trailing_whitespace = false
--- a/third_party/python-pinyin/.flake8
+++ b/third_party/python-pinyin/.flake8
@ -0,0 +1,50 @@
 [flake8]
 ########## OPTIONS ##########
 # Set the maximum length that any line (with some exceptions) may be.
 max-line-length = 120
 ################### FILE PATTERNS ##########################
 # Provide a comma-separated list of glob patterns to exclude from checks.
 exclude =
    # git folder
    .git,
    # python cache
    __pycache__,
    third_party/,
 # Provide a comma-separate list of glob patterns to include for checks.
 filename =
    *.py
 ########## RULES ##########
 # ERROR CODES
 #
 # E/W  - PEP8 errors/warnings (pycodestyle)
 # F    - linting errors (pyflakes)
 # C    - McCabe complexity error (mccabe)
 #
 # W503 - line break before binary operator
 # Specify a list of codes to ignore.
 ignore =
    W503
    E252,E262,E127,E265,E126,E266,E241,E261,E128,E125
    W291,W293,W605
    E203,E305,E402,E501,E721,E741,F403,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303,
    # shebang has extra meaning in fbcode lints, so I think it's not worth trying
    # to line this up with executable bit
    EXE001,
    # these ignores are from flake8-bugbear; please fix!
    B007,B008,
    # these ignores are from flake8-comprehensions; please fix!
    C400,C401,C402,C403,C404,C405,C407,C411,C413,C414,C415
 # Specify the list of error codes you wish Flake8 to report.
 select =
    E,
    W,
    F,
    C
--- a/third_party/python-pinyin/.github/CONTRIBUTING.md
+++ b/third_party/python-pinyin/.github/CONTRIBUTING.md
@ -0,0 +1,16 @@
 # Contributing
 * 如果是关于单个汉字的拼音有误的问题，麻烦前往 [pinyin-data][pinyin-data] 进行反馈。
 * 如果是关于词组的拼音有误的问题，麻烦前往 [phrase-pinyin-data][phrase-pinyin-data] 进行反馈。
 * 有任何疑问或建议欢迎创建 [issue][issue] 或提交 [PR][pr] 。
 * 项目代码开发方面的问题可以看看 [开发文档][开发文档] 。
 Thanks for contributing! :heart:
 [pinyin-data]: https://github.com/mozillazg/pinyin-data/issues
 [phrase-pinyin-data]: https://github.com/mozillazg/phrase-pinyin-data
 [issue]: https://github.com/mozillazg/python-pinyin/issues
 [pr]: https://github.com/mozillazg/python-pinyin/pulls
 [开发文档]: https://pypinyin.readthedocs.io/zh_CN/develop/develop.html
--- a/third_party/python-pinyin/.github/ISSUE_TEMPLATE.md
+++ b/third_party/python-pinyin/.github/ISSUE_TEMPLATE.md
@ -0,0 +1,21 @@
 ## 运行环境
 * 操作系统（Linux/macOS/Windows）：
 * Python 版本：
 * pypinyin 版本：
 <!--
 P.S. 可以通过 `python -V` 获取 Python 版本。
 P.S. 可以通过 `pypinyin -V` 或者 `pip freeze |grep pypinyin` 或 `pypinyin.__version__` 获取 pypinyin 版本信息。
 -->
 ## 问题描述
 ## 问题复现步骤
 <!--
 感谢反馈！❤️
 -->
--- a/third_party/python-pinyin/.github/PULL_REQUEST_TEMPLATE.md
+++ b/third_party/python-pinyin/.github/PULL_REQUEST_TEMPLATE.md
@ -0,0 +1,15 @@
 ## PR 描述
 ## 待办事项
 * [ ] 符合代码规范
 * [ ] 单元测试
 * [ ] 文档
 <!--
 感谢你的贡献！❤️
 P.S. 麻烦选择 `develop` 分支作为 PR 的目标分支，谢谢~
 -->
--- a/third_party/python-pinyin/.github/workflows/ci.yml
+++ b/third_party/python-pinyin/.github/workflows/ci.yml
@ -0,0 +1,29 @@
 # This workflow will install Python dependencies, run tests and lint with a single version of Python
 # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 name: CI
 on: [push, pull_request]
 jobs:
  build:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [windows-latest]
        # python-version: [3.7, 3.8]
        python-version: [3.9]
        tox-env: [py37, py38, py39]
    steps:
    - uses: actions/checkout@v2
    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v2
      with:
        python-version: ${{ matrix.python-version }}
    - name: Install dependencies
      run: |
        python -m pip install tox
    - name: Test with tox
      run: tox -e ${{ matrix.tox-env}}
--- a/third_party/python-pinyin/.github/workflows/codeql-analysis.yml
+++ b/third_party/python-pinyin/.github/workflows/codeql-analysis.yml
@ -0,0 +1,71 @@
 # For most projects, this workflow file will not need changing; you simply need
 # to commit it to your repository.
 #
 # You may wish to alter this file to override the set of languages analyzed,
 # or to provide custom queries or build logic.
 name: "CodeQL"
 on:
  push:
    branches: [master, develop]
  pull_request:
    # The branches below must be a subset of the branches above
    branches: [master, develop]
  schedule:
    - cron: '0 2 * * 6'
 jobs:
  analyze:
    name: Analyze
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        # Override automatic language detection by changing the below list
        # Supported options are ['csharp', 'cpp', 'go', 'java', 'javascript', 'python']
        language: ['python']
        # Learn more...
        # https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#overriding-automatic-language-detection
    steps:
    - name: Checkout repository
      uses: actions/checkout@v2
      with:
        # We must fetch at least the immediate parents so that if this is
        # a pull request then we can checkout the head.
        fetch-depth: 2
    # If this run was triggered by a pull request event, then checkout
    # the head of the pull request instead of the merge commit.
    - run: git checkout HEAD^2
      if: ${{ github.event_name == 'pull_request' }}
    # Initializes the CodeQL tools for scanning.
    - name: Initialize CodeQL
      uses: github/codeql-action/init@v1
      with:
        languages: ${{ matrix.language }}
        # If you wish to specify custom queries, you can do so here or in a config file.
        # By default, queries listed here will override any specified in a config file.
        # Prefix the list here with "+" to use these queries and those in the config file.
        # queries: ./path/to/local/query, your-org/your-repo/queries@main
    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
    # If this step fails, then you should remove it and run the build manually (see below)
    - name: Autobuild
      uses: github/codeql-action/autobuild@v1
    # ℹ️ Command-line programs to run using the OS shell.
    # 📚 https://git.io/JvXDl
    # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
    #    and modify them (or add more) to build your code if your project
    #    uses a compiled language
    #- run: |
    #   make bootstrap
    #   make release
    - name: Perform CodeQL Analysis
      uses: github/codeql-action/analyze@v1
--- a/third_party/python-pinyin/.gitignore
+++ b/third_party/python-pinyin/.gitignore
@ -0,0 +1,54 @@
 *.py[cod]
 *.sw[op]
 # C extensions
 *.so
 # Packages
 *.egg
 *.egg-info
 dist
 build
 eggs
 parts
 bin
 var
 sdist
 develop-eggs
 .installed.cfg
 lib
 lib64
 _build
 # Installer logs
 pip-log.txt
 # Unit test / coverage reports
 .coverage
 .tox
 nosetests.xml
 htmlcov
 # Translations
 *.mo
 # Mr Developer
 .mr.developer.cfg
 .project
 .pydevproject
 tools/words.txt
 *~
 tools/phrases_dict.txt
 venv
 .cache/
 2.7/
 .python-version
 venv2.7/
 venvPyInstaller/
 output.dat
 vocab.bin
 vocab.large.bin
 .mypy_cache/
 .pytest_cache/
 /pypinyin/phrases_dict_large.py
--- a/third_party/python-pinyin/.gitmodules
+++ b/third_party/python-pinyin/.gitmodules
@ -0,0 +1,6 @@
 [submodule "pinyin-data"]
 	path = pinyin-data
 	url = https://github.com/mozillazg/pinyin-data.git
 [submodule "phrase-pinyin-data"]
 	path = phrase-pinyin-data
 	url = https://github.com/mozillazg/phrase-pinyin-data.git
--- a/third_party/python-pinyin/.pre-commit-config.yaml
+++ b/third_party/python-pinyin/.pre-commit-config.yaml
@ -0,0 +1,29 @@
 repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks.git
    rev: v3.4.0
    hooks:
      - id: check-merge-conflict
      - id: debug-statements
        exclude: 'tools/|(pypinyin/(phrases_dict.py|pinyin_dict.py|phonetic_symbol.py))'
      - id: double-quote-string-fixer
        exclude: 'pypinyin/(phrases_dict.py|pinyin_dict.py|phonetic_symbol.py)'
      - id: end-of-file-fixer
        exclude: '.bumpversion.cfg'
      - id: requirements-txt-fixer
      - id: trailing-whitespace
  - repo: https://gitlab.com/pycqa/flake8
    rev: 3.8.4
    hooks:
      - id: flake8
        exclude: 'tools|pypinyin/(phrases_dict.py|pinyin_dict.py|phonetic_symbol.py)|(docs/conf.py)'
  # - repo: https://github.com/pre-commit/mirrors-mypy
  #   rev: 'v0.812'
  #   hooks:
  #     - id: mypy
  #       files: 'pypinyin/'
  - repo: https://github.com/pre-commit/mirrors-yapf.git
    sha: v0.16.0
    hooks:
    -   id: yapf
        files: \.py$
        exclude: (?=phrase-pinyin-data|pinyin-data).*(\.py)$
--- a/third_party/python-pinyin/.style.yapf
+++ b/third_party/python-pinyin/.style.yapf
@ -0,0 +1,3 @@
 [style]
 based_on_style = pep8
 column_limit = 80
--- a/third_party/python-pinyin/.whitesource
+++ b/third_party/python-pinyin/.whitesource
@ -0,0 +1,12 @@
 {
  "scanSettings": {
    "baseBranches": []
  },
  "checkRunSettings": {
    "vulnerableCheckRunConclusionLevel": "failure",
    "displayMode": "diff"
  },
  "issueSettings": {
    "minSeverityLevel": "LOW"
  }
 }
--- a/third_party/python-pinyin/CHANGELOG.rst
+++ b/third_party/python-pinyin/CHANGELOG.rst
@ -0,0 +1,903 @@
 Changelog
 ---------
 `0.41.0`_ (2021-03-13)
 ++++++++++++++++++++++++
 * **[New]** 新增 ``pypinyin.contrib.tone_convert`` 模块，用于
  ``Style.TONE`` 、 ``Style.TONE2`` 、 ``Style.TONE3`` 、 ``Style.NORMAL`` 风格的拼音之间互相转换。
  详见 `文档 <https://pypinyin.readthedocs.io/zh_CN/develop/contrib.html#tone-convert>`__
 * **[Improved]** 使用 `pinyin-data`_ v0.10.2 的拼音数据。
 `0.40.0`_ (2020-11-22)
 ++++++++++++++++++++++++
 * **[Improved]** 精简 phrases_dict, 删除 phrases_dict 中凡是能通过 pinyin_dict 得到相同结果的数据。
 * **[Improved]** 使用 `phrase-pinyin-data`_ v0.10.5 的词语拼音数据。
 * **[Improved]** 使用 `pinyin-data`_ v0.10.1 的拼音数据。
 `0.39.1`_ (2020-10-08)
 ++++++++++++++++++++++++
 * **[Improved]** 使用 `phrase-pinyin-data`_ v0.10.4 的词语拼音数据。
 * **[Improved]** 使用 `pinyin-data`_ v0.10.0 的拼音数据。
 `0.39.0`_ (2020-08-16)
 ++++++++++++++++++++++++
 * **[New]** ``pinyin`` 和 ``lazy_pinyin`` 函数增加参数 ``v_to_u`` 和 ``neutral_tone_with_five``:
  * ``v_to_u=True`` 时在无声调相关拼音风格下使用 ``ü`` 代替原来的 ``v``
  .. code-block:: python
      >>> lazy_pinyin('战略')
      ['zhan', 'lve']
      >>> lazy_pinyin('战略', v_to_u=True)
      ['zhan', 'lüe']
  * ``neutral_tone_with_five=True`` 时在数字标识声调相关风格下使用 ``5`` 标识轻声
  .. code-block:: python
      >>> lazy_pinyin('衣裳', style=Style.TONE3)
      ['yi1', 'shang']
      >>> lazy_pinyin('衣裳', style=Style.TONE3, neutral_tone_with_five=True)
      ['yi1', 'shang5']
 `0.38.1`_ (2020-07-05)
 ++++++++++++++++++++++++
 * **[Improved]** 优化内置分词，处理前缀匹配导致无法正确识别尾部词语的问题。 Fixed `#205`_
 * **[Improved]** 使用 `phrase-pinyin-data`_ v0.10.3 的词语拼音数据。
 `0.38.0`_ (2020-06-07)
 ++++++++++++++++++++++++
 * **[Improved]** 优化内置分词，严格按照是否是词语来分词。 Fixed `#139`_
 * **[Improved]** 使用 `pinyin-data`_ v0.9.0 的拼音数据。
 `0.37.0`_ (2020-02-09)
 ++++++++++++++++++++++++
 * **[Bugfixed]** 修复 ``NeutralToneWith5Mixin`` 在 ``TONE3`` 相关风格未把 5 标在预期的拼音末尾位置。
 * **[New]** 增加 Python 3.8 下的测试，正式支持 Python 3.8 。
 `0.36.0`_ (2019-10-27)
 +++++++++++++++++++++++
 * **[New]** 增加 ``V2UMixin`` 用于支持无声调相关拼音风格下的结果使用 ``ü`` 代替原来的 ``v`` 。
  详见 `文档 <https://pypinyin.readthedocs.io/zh_CN/master/contrib.html#v2umixin>`__ 。
 * **[New]** 增加 ``NeutralToneWith5Mixin`` 用于支持使用数字表示声调的拼音风格下使用 5 标识轻声。
  详见 `文档 <https://pypinyin.readthedocs.io/zh_CN/master/contrib.html#neutraltonewith5mixin>`__ 。
 * **[New]** 增加 ``Pinyin`` 和 ``DefaultConverter`` 类用于实现自定义处理过程和结果
  （实验性功能，绝大部分用户无需关心新增的这两个类）。
 * **[Improved]** 使用 `phrase-pinyin-data`_ v0.10.2 的词语拼音数据。
 * **[Improved]** 使用 `pinyin-data`_ v0.8.1 的拼音数据。
 `0.35.4`_ (2019-07-13)
 +++++++++++++++++++++++
 * **[Bugfixed]** 修复 ``m̄`` ``ê̄``  ``ế`` ``ê̌`` ``ề`` 这几个音无法转换为不含声调结果的问题。
 * **[Improved]** 使用 `phrase-pinyin-data`_ v0.10.1 的词语拼音数据。 Fixed `#174`_
 * **[Improved]** 使用 `pinyin-data`_ v0.8.0 的拼音数据。
 * **[Improved]** 修复一处参数注释错误。(via `#176`_ Thanks `@yangwe1`_)
 `0.35.3`_ (2019-05-11)
 ++++++++++++++++++++++++
 * **[Bugfixed]** 修复鼻音 ``m̀`` 无法转换为不含声调结果的问题。
 * **[Improved]** 使用 `phrase-pinyin-data`_ v0.10.0 的词语拼音数据。
  Fixed `#166`_ `#167`_ `#169`_ `#170`_
 * **[Improved]** Windows CI 增加在 x64 下跑测试 (via `#164`_ Thanks `@hanabi1224`_)
 `0.35.2`_ (2019-04-06)
 +++++++++++++++++++++++
 * **[Improved]** 使用 `phrase-pinyin-data`_ v0.9.2 的词语拼音数据。 Fixed `#159`_ `#160`_
 * **[Improved]** 使用 `pinyin-data`_ v0.7.0 的拼音数据。
 `0.35.1`_ (2019-03-02)
 +++++++++++++++++++++++
 * **[Bugfixed]** 修复 ``朝阳`` 在 ``heteronym=False`` 时输出了多个音的情况。
 `0.35.0`_ (2019-02-24)
 +++++++++++++++++++++++
 * **[Improved]** 使用 `phrase-pinyin-data`_ v0.9.0 的词语拼音数据。 Fixed `#154`_ `#149`_
 * **[New]** 支持 ``朝阳`` 这种一个词多个音（ ``'朝阳': [['zhāo', 'cháo'], ['yáng']]`` ）在多音字模式下输出多个音。 Fixed `#154`_
 `0.34.1`_ (2018-12-30)
 +++++++++++++++++++++++
 * **[Improved]** 使用 `phrase-pinyin-data`_ v0.8.5 的词语拼音数据。 Fixed `#151`_
 `0.34.0`_ (2018-12-08)
 +++++++++++++++++++++++
 不兼容旧版的变更
 ~~~~~~~~~~~~~~~~~~
 * **[Changed]** 当 ``errors`` 参数的值是个回调对象并且返回值是个 ``list`` 时，
  会使用这个 list 来 extend 结果 list (via `#147`_ . Thanks `@howl-anderson`_ ) ::
    # 更新前
    >>> pinyin('你好☆☆', errors=lambda x: ['star' for _ in x])
    [['nǐ'], ['hǎo'], ['star', 'star']]
    # 更新后
    >>> pinyin('你好☆☆', errors=lambda x: ['star' for _ in x])
    [['nǐ'], ['hǎo'], ['star'], ['star']]
 详见文档: https://pypinyin.readthedocs.io/zh_CN/develop/usage.html#handle-no-pinyin
 `0.33.2`_ (2018-11-03)
 ++++++++++++++++++++++++
 * **[Bugfixed]** 修复 ``strict=True`` 时韵母相关风格下没有正确处理韵母 ``üan`` 的问题。
 `0.33.1`_ (2018-09-23)
 ++++++++++++++++++++++++
 * **[Improved]** 使用 `pinyin-data`_ v0.6.2 的拼音数据。
 * **[Improved]** 使用 `phrase-pinyin-data`_ v0.8.4 的词语拼音数据。
 `0.33.0`_ (2018-08-05)
 ++++++++++++++++++++++++
 * **[Bugfixed]** 修复命令行程序在 ``sys.stdin.encoding`` 为 ``None`` 时无法正常工作的问题。
 * **[Improved]** 使用 `pinyin-data`_ v0.6.1 的拼音数据。
 * **[Improved]** 使用 `phrase-pinyin-data`_ v0.8.3 的词语拼音数据。
  * Fixed `#137`_
 * **[Changed]** 不再测试 Python 2.6 和 Python 3.3，增加测试 Python 3.7 和 PyPy3
  即不保证程序兼容 Python 2.6 和 Python 3.3。
 `0.32.0`_ (2018-07-28)
 ++++++++++++++++++++++++
 * **[Improved]** 使用 `pinyin-data`_ v0.6.0 的拼音数据。
 * **[Improved]** 使用 `phrase-pinyin-data`_ v0.8.2 的词语拼音数据。
 `0.31.0`_ (2018-06-10)
 ++++++++++++++++++++++++
 * **[New]** 增加 py.typed 标记文件，支持 `PEP 561`_ (via `#130`_)
 * **[Changed]** 使用 `phrase-pinyin-data`_ v0.7.3 的词语拼音数据。
  * fixed `#112`_ `#117`_ `#122`_ `#131`_
  * 精简词组拼音，删除部分数据有误的拼音数据
 `0.30.1`_ (2018-04-25)
 ++++++++++++++++++++++++
 * **[Improved]** 更新文档和测试。(via `7fa0b87 <https://github.com/mozillazg/python-pinyin/commit/7fa0b879df47e8a7e5af5edb5f243dd4ea645410>`_)
 * **[Improved]** 对用户传入的已进行分词处理的数据进行二次分词以便提高准确性。(via `#126`_)
 * **[Improved]** 使用 `pinyin-data`_ v0.5.1 的拼音数据。(via `#125`_)
 `0.30.0`_ (2018-02-03)
 ++++++++++++++++++++++++
 * **[New]** 支持有拼音的非汉字字符 ``〇`` (via `#119`_)。
 * **[Changed]** 修复之前无意中把 ``pinyin`` 函数中的 ``strict`` 参数的默认值修改为了 ``False`` ，
  现在把 ``strict`` 参数的默认值恢复为预期的 ``True`` (via `#121`_)。关于 ``strict`` 参数的影响详见文档： `strict 参数的影响`_
 `0.29.0`_ (2018-01-14)
 ++++++++++++++++++++++++
 * **[New]** 可以通过环境变量 ``PYPINYIN_NO_DICT_COPY`` 禁用代码内对 dict 的 copy 操作，节省内存(via `#115`_ thanks `@daya0576`_ )。
 `0.28.0`_ (2017-12-08)
 ++++++++++++++++++++++++
 * **[New]** 给代码增加类型注解(via `#110`_)。
 `0.27.0`_ (2017-10-28)
 ++++++++++++++++++++++++
 * **[New]** 命令行工具支持通过更简便的方式指定参数及拼音风格。
  (详见 `#105`_, Thanks `@wdscxsj`_ )
 * **[Improved]** 增加说明 ``strict`` 参数对结果有什么影响的文档。
 `0.26.1`_ (2017-10-25)
 ++++++++++++++++++++++++
 * **[Improved]** 使用 `phrase-pinyin-data`_ v0.5.1 的词语拼音数据。fixed `#106`_
 `0.26.0`_ (2017-10-12)
 +++++++++++++++++++++++
 * **[Changed]** 不再自动调用 jieba 分词模块，改为自动调用内置的正向最大匹配分词模块来分词。
  (via `#102`_)
 `0.25.0`_ (2017-10-01)
 +++++++++++++++++++++++
 * **[New]** 内置一个正向最大匹配分词模块，使用内置的词语拼音库来训练这个分词模块，
  解决自定义词语库有时可能不生效的问题（因为这个词语在 jieba 等分词模块中不是可用词）。(via `#81`_)
  获取拼音或自定义词库后使用：
  .. code-block:: python
      >>> from pypinyin import pinyin, load_phrases_dict
      >>> load_phrases_dict({'了局': [['liǎo'], ['jú']]})
      >>> pinyin('了局啊')   # 使用 jieba 分词
      Building prefix dict from the default dictionary ...
      Dumping model to file cache /var/folders/s6/z9r_07h53pj_d4x7qjszwmbw0000gn/T/jieba.cache
      Loading model cost 1.175 seconds.
      Prefix dict has been built succesfully.
      [['le'], ['jú'], ['a']]
      >>> from pypinyin.contrib.mmseg import seg, retrain
      >>> retrain(seg)   # 没有使用 load_phrases_dict 时可以不调用这个函数
      >>> pinyin(seg.cut('了局啊'))  # 使用内置的正向最大匹配分词
      [['liǎo'], ['jú'], ['a']]
      >>>
  单独使用:
  .. code-block:: python
        >>> from pypinyin.contrib.mmseg import seg
        >>> text = '你好，我是中国人，我爱我的祖国'
        >>> seg.cut(text)
        <generator object Seg.cut at 0x10b2df2b0>
        >>> list(seg.cut(text))
        ['你好', '，', '我', '是', '中国人', '，', '我', '爱',
         '我的', '祖', '国']
        >>> seg.train(['祖国', '我是'])
        >>> list(seg.cut(text))
        ['你好', '，', '我是', '中国人', '，', '我', '爱',
         '我的', '祖国']
        >>>
 `0.24.0`_ (2017-09-17)
 ++++++++++++++++++++++++
 * **[New]** 支持类似 pyinstaller 的打包工具对使用 pypinyin 的程序进行打包，
  不会出现跟打包前不一样的输出（比如： `#92`_ ）（via `#93`_ ）。
 `0.23.0`_ (2017-07-09)
 ++++++++++++++++++++++++
 * **[New]** 使用 `phrase-pinyin-data`_ v0.5.0 的词语拼音数据。
 `0.22.0`_ (2017-06-14)
 ++++++++++++++++++++++++
 * **[New]** 支持 IronPython (via `#86`_). Thanks `@LevyLession`_
 `0.21.1`_ (2017-05-29)
 ++++++++++++++++++++++++
 * **[Bugfixed]** 修复在 Python 2 下通过 pip install 安装 wheel 格式的安装包后, 无法正常使用的问题。（Python 2 下没有自动安装依赖包）
 `0.21.0`_ (2017-05-14)
 ++++++++++++++++++++++++
 * **[New]** 重构各拼音风格实现，支持自定义拼音风格或覆盖已有拼音风格的实现.
  .. code-block:: python
      from pypinyin.style import register
      @register('style1')
      def func(pinyin, **kwargs):
          # pinyin = xxx   # convert to style1
          return pinyin
      def func(pinyin, **kwargs):
          # pinyin = xxx   # convert to style2
          return pinyin
      register('style2', func=func)
 `0.20.0`_ (2017-05-13)
 ++++++++++++++++++++++++
 * **[New]** 增加 ``strict`` 参数来控制处理声母和韵母时是否严格遵循 `《汉语拼音方案》 <http://www.moe.edu.cn/s78/A19/yxs_left/moe_810/s230/195802/t19580201_186000.html>`_ 标准。
  当 ``strict=True`` 时根据 `《汉语拼音方案》 <http://www.moe.edu.cn/s78/A19/yxs_left/moe_810/s230/195802/t19580201_186000.html>`_ 的如下规则处理声母、在韵母相关风格下还原正确的韵母：
   * 21 个声母： ``b p m f d t n l g k h j q x zh ch sh r z c s`` （**y, w 不是声母**）
   * i行的韵母，前面没有声母的时候，写成yi(衣)，ya(呀)，ye(耶)，yao(腰)，you(忧)，yan(烟)，yin(因)，yang(央)，ying(英)，yong(雍)。（**y 不是声母**）
   * u行的韵母，前面没有声母的时候，写成wu(乌)，wa(蛙)，wo(窝)，wai(歪)，wei(威)，wan(弯)，wen(温)，wang(汪)，weng(翁)。（**w 不是声母**）
   * ü行的韵母，前面没有声母的时候，写成yu(迂)，yue(约)，yuan(冤)，yun(晕)；ü上两点省略。（**韵母相关风格下还原正确的韵母 ü**）
   * ü行的韵跟声母j，q，x拼的时候，写成ju(居)，qu(区)，xu(虚)，ü上两点也省略；
     但是跟声母n，l拼的时候，仍然写成nü(女)，lü(吕)。（**韵母相关风格下还原正确的韵母 ü**）
   * iou，uei，uen前面加声母的时候，写成iu，ui，un。例如niu(牛)，gui(归)，lun(论)。（**韵母相关风格下还原正确的韵母 iou，uei，uen**）
  具体差异可以查看 tests/test_standard.py 中的对比结果测试用例
 * **[Changed]** 改为使用 enum 定义拼音风格（兼容旧版本）
 `0.19.0`_ (2017-05-05)
 ++++++++++++++++++++++++
 * **[New]** 韵母风格下根据 `汉语拼音方案`_ 还原原始的 ``iou`` , ``uei`` , ``uen`` 韵母。
    iou，uei，uen前面加声母的时候，写成iu，ui，un。
    例如niu(牛)，gui(归)，lun(论)。即：
    * niu 的韵母是 iou
    * gui 的韵母是 uei
    * lun 的韵母是 uen
 * **[Fixed]** 修复韵母相关风格下没有正确处理 ``wu`` 的韵母的问题
  (比如: ``无`` 在 ``FINALS_TONE`` 风格下的结果是 ``uú`` 的问题) 。
 * **[Fixed]** 修复漏了 ǖ -> v1 的转换。
 `0.18.2`_ (2017-04-25)
 ++++++++++++++++++++++++
 * **[Fixed]** 使用 `phrase-pinyin-data`_ v0.4.1 的词语拼音数据, fixed `#72`_ 。
 `0.18.1`_ (2017-03-22)
 ++++++++++++++++++++++++
 * **[Improved]** PyPI 上传过程中出了点问题。
 `0.18.0`_ (2017-03-22)
 ++++++++++++++++++++++++
 * **[Changed]** 使用 `phrase-pinyin-data`_ v0.4.0 的词语拼音数据。
 `0.17.0`_ (2017-03-13)
 ++++++++++++++++++++++++
 * **[Changed]** 词语拼音数据改为使用来自 `phrase-pinyin-data`_ v0.3.1 的拼音数据。
 * **[Fixed]** 修正 ``斯事体大`` 的拼音。
 `0.16.1`_ (2017-02-12)
 ++++++++++++++++++++++++
 * **[Improved]** 使用 `pinyin-data`_ v0.4.1 的拼音数据. fixed `#58`_
 * **[Improved]** 更新 `厦门` 的拼音. fixed `#59`_
 `0.16.0`_ (2016-11-27)
 ++++++++++++++++++++++++
 * **[New]** Added new pinyin styles - ``CYRILLIC`` (汉语拼音与俄语字母对照表) and ``CYRILLIC _FIRST`` (via `#55`_ thanks `@tyrbonit`_)
  .. code-block:: python
      >>> pypinyin.pinyin('中心', style=pypinyin.CYRILLIC)
      [['чжун1'], ['синь1']]
      >>> pypinyin.pinyin('中心', style=pypinyin.CYRILLIC_FIRST)
      [['ч'], ['с']]
 * **[New]** Added Russian translation README (`README_ru.rst`_)
 * **[New]** Command-line tool supported the new pinyin styles: ``CYRILLIC, CYRILLIC_FIRST``
 `0.15.0`_ (2016-10-18)
 ++++++++++++++++++++++++
 * **[Changed]** 使用 `pinyin-data`_ v0.4.0 的拼音数据
 `0.14.0`_ (2016-09-24)
 ++++++++++++++++++++++++
 * **[New]** 新增注音 ``BOPOMOFO`` 及注音首字母 ``BOPOMOFO_FIRST`` 风格(via `#51`_ thanks `@gumblex`_ `@Artoria2e5`_)
  .. code-block:: python
      >>> pypinyin.pinyin('中心', style=pypinyin.BOPOMOFO)
      [['ㄓㄨㄥ'], ['ㄒㄧㄣ']]
      >>> pypinyin.pinyin('中心', style=pypinyin.BOPOMOFO_FIRST)
      [['ㄓ'], ['ㄒ']]
 * **[New]** 新增音调在拼音后的 ``TONE3`` 以及 ``FINALS_TONE3`` 风格(via `#51`_ thanks `@gumblex`_ `@Artoria2e5`_ )
  .. code-block:: python
      >>> pypinyin.pinyin('中心', style=pypinyin.TONE3)
      [['zhong1'], ['xin1']]
      >>> pypinyin.pinyin('中心', style=pypinyin.FINALS_TONE3)
      [['ong1'], ['in1']]
 * **[New]** 命令行程序支持新增的四个风格: ``TONE3, FINALS_TONE3, BOPOMOFO, BOPOMOFO_FIRST``
 * **[Bugfixed]** 修复 TONE2 中 ü 标轻声的问题（像 侵略 -> qi1n lv0e4），以及去除文档中 0 表示轻声(via `#51`_ thanks `@gumblex`_)
 * **[Changed]** 不再使用 0 表示轻声，轻声时没有数字(via `#51`_ thanks `@gumblex`_)
 `0.13.0`_ (2016-08-19)
 ++++++++++++++++++++++++
 * **[Changed]** 分离词组库中包含中文逗号的词语(via `f097b6a <https://github.com/mozillazg/python-pinyin/commit/f097b6ad7b9e2acbc1ecc214991be510f4f95d72>`_)
 * **[Changed]** 使用 `pinyin-data`_ v0.3.0 的拼音数据
 `0.12.1`_ (2016-05-11)
 ++++++++++++++++++++++++
 * **[Bugfixed]** 修复一些词语存在拼音粘连在一起的情况. (`#41`_ thanks `@jolly-tao`_ )
 `0.12.0`_ (2016-03-12)
 ++++++++++++++++++++++++
 * **[Changed]** 单个汉字的拼音数据改为使用来自 `pinyin-data`_ 的拼音数据。
 * **[New]** 命令行程序支持从标准输入读取汉字信息::
    $ echo "你好" | pypinyin
    nǐ hǎo
    $ pypinyin < hello.txt
    nǐ hǎo
 `0.11.1`_ (2016-02-17)
 +++++++++++++++++++++++
 * **[Bugfixed]** 更新 phrases_dict 修复类似 `#36`_ 的问题。thanks `@someus`_
 `0.11.0`_ (2016-01-16)
 +++++++++++++++++++++++
 * **[Changed]** 分割 ``__init__.py`` 为 ``compat.py``, ``constants.py``， ``core.py`` 和 ``utils.py``。
  影响: ``__init__.py`` 中只保留文档中提到过的 api, 如果使用了不在文档中的 api 则需要调整代码。
 `0.10.0`_ (2016-01-02)
 +++++++++++++++++++++++
 * **[New]** Python 3.3++++ 以上版本默认支持 ``U++++20000 ~ U++++2FA1F`` 区间内的汉字(详见 `#33`_)
 `0.9.5`_ (2015-12-19)
 +++++++++++++++++++++++
 * **[Bugfixed]** 修复未正确处理鼻音（详见 `汉语拼音 - 维基百科`_ ）的问题(`#31`_ thanks `@xulin97`_ ):
  * ``ḿ、ń、ň、ǹ`` 对应 “呒”、“呣”、“唔”、“嗯”等字。
    这些字之前在各种风格下都输出原始的汉字而不是拼音。
 `0.9.4`_ (2015-11-27)
 +++++++++++++++++++++++
 * **[Improved]** 细微调整，主要是更新文档
 `0.9.3`_ (2015-11-15)
 +++++++++++++++++++++++
 * **[Bugfixed]** Fixed Python 3 compatibility was broken.
 `0.9.2`_ (2015-11-15)
 +++++++++++++++++++++++
 * **[New]** ``load_single_dict`` 和 ``load_phrases_dict`` 增加 ``style`` 参数支持 TONE2 风格的拼音 ::
      load_single_dict({ord(u'啊'): 'a1'}, style='tone2')
      load_phrases_dict({u"阿爸": [[u"a1"], [u"ba4"]]}, style='tone2'}
 * **[Improved]** Improved docs
 `0.9.1`_ (2015-10-17)
 +++++++++++++++++++++++
 * **[Bugfixed][Changed]** 修复 ``ju``, ``qu``, ``xu``, ``yu``, ``yi`` 和 ``wu`` 的韵母( `#26`_ ). Thanks `@MingStar`_ :
  * ``ju``, ``qu``, ``xu`` 的韵母应该是 ``v``
  * ``yi`` 的韵母是 ``i``
  * ``wu`` 的韵母是 ``u``
  * 从现在开始 ``y`` 既不是声母也不是韵母，详见 `汉语拼音方案`_
 `0.9.0`_ (2015-09-20)
 +++++++++++++++++++++++
 * **[Changed]** 将拼音词典库里的国际音标字母替换为 ASCII 字母. Thanks `@MingStar`_ :
  * ``ɑ -> a``
  * ``ɡ -> g``
 `0.8.5`_ (2015-08-23)
 +++++++++++++++++++++++
 * **[Bugfixed]** 修复 zh, ch, sh, z, c, s 顺序问题导致获取声母有误
 `0.8.4`_ (2015-08-23)
 +++++++++++++++++++++++
 * **[Changed]** ``y``, ``w`` 也不是声母. (`hotoo/pinyin#57 <https://github.com/hotoo/pinyin/issues/57>`__):
  * 以 ``y``, ``w`` 开头的拼音在声母(``INITIALS``)模式下将返回 ``['']``
 `0.8.3`_ (2015-08-20)
 +++++++++++++++++++++++
 * **[Improved]** 上传到 PyPI 出了点问题，但是又 `没法重新上传 <http://sourceforge.net/p/pypi/support-requests/468/>`__ ，只好新增一个版本
 `0.8.2`_ (2015-08-20)
 +++++++++++++++++++++++
 * **[Bugfixed][Changed]** 修复误把 yu 放入声母列表里的 BUG(`#22`_). Thanks `@MingStar`_
 `0.8.1`_ (2015-07-04)
 +++++++++++++++++++++++
 * **[Bugfixed]** 重构内置的分词功能，修复“无法正确处理包含空格的字符串的问题”
 `0.8.0`_ (2015-06-27)
 ++++++++++++++++++++++++
 * **[New]** 内置简单的分词功能，完善处理没有拼音的字符
  （如果不需要处理多音字问题, 现在可以不用安装 ``jieba`` 或其他分词模块了）::
        # 之前, 安装了结巴分词模块
        lazy_pinyin(u'你好abc☆☆')
        [u'ni', u'hao', 'a', 'b', 'c', u'\u2606', u'\u2606']
        # 现在, 无论是否安装结巴分词模块
        lazy_pinyin(u'你好abc☆☆')
        [u'ni', u'hao', u'abc\u2606\u2606']
 * | **[Changed]** 当 ``errors`` 参数是回调函数时，函数的参数由 ``单个字符`` 变更为 ``单个字符或词组`` 。
  | 即: 对于 ``abc`` 字符串, 之前将调用三次 ``errors`` 回调函数: ``func('a') ... func('b') ... func('abc')``
  | 现在只调用一次: ``func('abc')`` 。
 * **[Changed]** 将英文字符也纳入 ``errors`` 参数的处理范围::
        # 之前
        lazy_pinyin(u'abc', errors='ignore')
        [u'abc']
        # 现在
        lazy_pinyin(u'abc', errors='ignore')
        []
 `0.7.0`_ (2015-06-20)
 ++++++++++++++++++++++++
 * **[Bugfixed]** Python 2 下无法使用 ``from pypinyin import *`` 的问题
 * **[New]** 支持以下环境变量:
  * ``PYPINYIN_NO_JIEBA=true``: 禁用“自动调用结巴分词模块”
  * ``PYPINYIN_NO_PHRASES=true``: 禁用内置的“词组拼音库”
 `0.6.0`_ (2015-06-10)
 ++++++++++++++++++++++++
 * **[New]** ``errors`` 参数支持回调函数(`#17`_): ::
    def foobar(char):
        return u'a'
    pinyin(u'あ', errors=foobar)
 `0.5.7`_ (2015-05-17)
 ++++++++++++++++++++++
 * **[Bugfixed]** 纠正包含 "便宜" 的一些词组的读音
 `0.5.6`_ (2015-02-26)
 ++++++++++++++++++++++
 * **[Bugfixed]** "苹果" pinyin error. `#11`__
 * **[Bugfixed]** 重复 import jieba 的问题
 * **[Improved]** 精简 phrases_dict
 * **[Improved]** 更新文档
 __ https://github.com/mozillazg/python-pinyin/issues/11
 `0.5.5`_ (2015-01-27)
 ++++++++++++++++++++++
 * **[Bugfixed]** phrases_dict error
 `0.5.4`_ (2014-12-26)
 ++++++++++++++++++++++
 * **[Bugfixed]** 无法正确处理由分词模块产生的中英文混合词组（比如：B超，维生素C）的问题.  `#8`__
 __ https://github.com/mozillazg/python-pinyin/issues/8
 `0.5.3`_ (2014-12-07)
 ++++++++++++++++++++++
 * **[Improved]** 更新拼音库
 `0.5.2`_ (2014-09-21)
 +++++++++++++++++++++
 * **[Improved]** 载入拼音库时，改为载入其副本。防止内置的拼音库被破坏
 * **[Bugfixed]** ``胜败乃兵家常事`` 的音标问题
 `0.5.1`_ (2014-03-09)
 +++++++++++++++++++++
 * **[New]** 参数 ``errors`` 用来控制如何处理没有拼音的字符:
  * ``'default'``: 保留原始字符
  * ``'ignore'``: 忽略该字符
  * ``'replace'``: 替换为去掉 ``\u`` 的 unicode 编码字符串(``u'\u90aa'`` => ``u'90aa'``)
  只处理 ``[^a-zA-Z0-9_]`` 字符。
 `0.5.0`_ (2014-03-01)
 +++++++++++++++++++++
 * **[Changed]** **使用新的单字拼音库内容和格式**
  | 新的格式：``{0x963F: u"ā,ē"}``
  | 旧的格式：``{u'啊': u"ā,ē"}``
 `0.4.4`_ (2014-01-16)
 +++++++++++++++++++++
 * **[Improved]** 清理命令行命令的输出结果，去除无关信息
 * **[Bugfixed]** “ImportError: No module named runner”
 `0.4.3`_ (2014-01-10)
 +++++++++++++++++++++
 * **[Bugfixed]** 命令行工具在 Python 3 下的兼容性问题
 `0.4.2`_ (2014-01-10)
 +++++++++++++++++++++
 * **[Changed]** 拼音风格前的 ``STYLE_`` 前缀（兼容包含 ``STYLE_`` 前缀的拼音风格）
 * **[New]** 命令行工具，具体用法请见： ``pypinyin -h``
 `0.4.1`_ (2014-01-04)
 +++++++++++++++++++++
 * **[New]** 支持自定义拼音库，方便用户修正程序结果(``load_single_dict``, ``load_phrases_dict``)
 `0.4.0`_ (2014-01-03)
 +++++++++++++++++++++
 * **[Changed]** 将 ``jieba`` 模块改为可选安装，用户可以选择使用自己喜爱的分词模块对汉字进行分词处理
 * **[New]** 支持 Python 3
 `0.3.1`_ (2013-12-24)
 +++++++++++++++++++++
 * **[New]** ``lazy_pinyin`` ::
    >>> lazy_pinyin(u'中心')
    ['zhong', 'xin']
 `0.3.0`_ (2013-09-26)
 +++++++++++++++++++++
 * **[Bugfixed]** 首字母风格无法正确处理只有韵母的汉字
 * **[New]** 三个拼音风格:
    * ``pypinyin.STYLE_FINALS`` ：       韵母风格1，只返回各个拼音的韵母部分，不带声调。如： ``ong uo``
    * ``pypinyin.STYLE_FINALS_TONE`` ：   韵母风格2，带声调，声调在韵母第一个字母上。如： ``ōng uó``
    * ``pypinyin.STYLE_FINALS_TONE2`` ：  韵母风格2，带声调，声调在各个拼音之后，用数字 [0-4] 进行表示。如： ``o1ng uo2``
 `0.2.0`_ (2013-09-22)
 +++++++++++++++++++++
 * **[Improved]** 完善对中英文混合字符串的支持::
    >> pypinyin.pinyin(u'你好abc')
    [[u'n\u01d0'], [u'h\u01ceo'], [u'abc']]
 0.1.0 (2013-09-21)
 ++++++++++++++++++
 * **[New]** Initial Release
 .. _#17: https://github.com/mozillazg/python-pinyin/pull/17
 .. _#22: https://github.com/mozillazg/python-pinyin/pull/22
 .. _#26: https://github.com/mozillazg/python-pinyin/pull/26
 .. _@MingStar: https://github.com/MingStar
 .. _汉语拼音方案: https://zh.wiktionary.org/wiki/%E9%99%84%E5%BD%95:%E6%B1%89%E8%AF%AD%E6%8B%BC%E9%9F%B3%E6%96%B9%E6%A1%88
 .. _汉语拼音方案.pdf: http://www.moe.edu.cn/s78/A19/yxs_left/moe_810/s230/195802/t19580201_186000.html
 .. _汉语拼音 - 维基百科: https://zh.wikipedia.org/wiki/%E6%B1%89%E8%AF%AD%E6%8B%BC%E9%9F%B3#cite_ref-10
 .. _@xulin97: https://github.com/xulin97
 .. _#31: https://github.com/mozillazg/python-pinyin/issues/31
 .. _#33: https://github.com/mozillazg/python-pinyin/pull/33
 .. _#36: https://github.com/mozillazg/python-pinyin/issues/36
 .. _pinyin-data: https://github.com/mozillazg/pinyin-data
 .. _@someus: https://github.com/someus
 .. _#34: https://github.com/mozillazg/python-pinyin/issues/34
 .. _#41: https://github.com/mozillazg/python-pinyin/issues/41
 .. _@jolly-tao: https://github.com/jolly-tao
 .. _@gumblex: https://github.com/gumblex
 .. _@Artoria2e5: https://github.com/Artoria2e5
 .. _#51: https://github.com/mozillazg/python-pinyin/issues/51
 .. _#55: https://github.com/mozillazg/python-pinyin/pull/55
 .. _@tyrbonit: https://github.com/tyrbonit
 .. _README_ru.rst: https://github.com/mozillazg/python-pinyin/blob/master/README_ru.rst
 .. _#58: https://github.com/mozillazg/python-pinyin/issues/58
 .. _#59: https://github.com/mozillazg/python-pinyin/issues/59
 .. _#72: https://github.com/mozillazg/python-pinyin/issues/72
 .. _phrase-pinyin-data: https://github.com/mozillazg/phrase-pinyin-data
 .. _@LevyLession: https://github.com/LevyLession
 .. _#86: https://github.com/mozillazg/python-pinyin/issues/86
 .. _#92: https://github.com/mozillazg/python-pinyin/issues/92
 .. _#93: https://github.com/mozillazg/python-pinyin/issues/93
 .. _#81: https://github.com/mozillazg/python-pinyin/issues/81
 .. _#102: https://github.com/mozillazg/python-pinyin/issues/102
 .. _#105: https://github.com/mozillazg/python-pinyin/issues/105
 .. _#106: https://github.com/mozillazg/python-pinyin/issues/106
 .. _@wdscxsj: https://github.com/wdscxsj
 .. _#110: https://github.com/mozillazg/python-pinyin/pull/110
 .. _#115: https://github.com/mozillazg/python-pinyin/pull/115
 .. _#119: https://github.com/mozillazg/python-pinyin/pull/119
 .. _@daya0576: https://github.com/daya0576
 .. _#121: https://github.com/mozillazg/python-pinyin/pull/121
 .. _#125: https://github.com/mozillazg/python-pinyin/pull/125
 .. _#126: https://github.com/mozillazg/python-pinyin/pull/126
 .. _#112: https://github.com/mozillazg/python-pinyin/issues/112
 .. _#117: https://github.com/mozillazg/python-pinyin/issues/117
 .. _#122: https://github.com/mozillazg/python-pinyin/issues/122
 .. _#131: https://github.com/mozillazg/python-pinyin/issues/131
 .. _#130: https://github.com/mozillazg/python-pinyin/pull/130
 .. _PEP 561: https://www.python.org/dev/peps/pep-0561/
 .. _#137: https://github.com/mozillazg/python-pinyin/issues/137
 .. _#147: https://github.com/mozillazg/python-pinyin/pull/147
 .. _@howl-anderson: https://github.com/howl-anderson
 .. _#151: https://github.com/mozillazg/python-pinyin/issues/151
 .. _#154: https://github.com/mozillazg/python-pinyin/issues/154
 .. _#149: https://github.com/mozillazg/python-pinyin/issues/149
 .. _#159: https://github.com/mozillazg/python-pinyin/issues/159
 .. _#160: https://github.com/mozillazg/python-pinyin/issues/160
 .. _strict 参数的影响: https://pypinyin.readthedocs.io/zh_CN/master/usage.html#strict
 .. _#166: https://github.com/mozillazg/python-pinyin/issues/166
 .. _#167: https://github.com/mozillazg/python-pinyin/issues/167
 .. _#169: https://github.com/mozillazg/python-pinyin/issues/169
 .. _#170: https://github.com/mozillazg/python-pinyin/issues/170
 .. _#174: https://github.com/mozillazg/python-pinyin/issues/174
 .. _#139: https://github.com/mozillazg/python-pinyin/issues/139
 .. _#205: https://github.com/mozillazg/python-pinyin/issues/205
 .. _#164: https://github.com/mozillazg/python-pinyin/pull/164
 .. _#176: https://github.com/mozillazg/python-pinyin/pull/176
 .. _@hanabi1224: https://github.com/hanabi1224
 .. _@yangwe1: https://github.com/yangwe1
 .. _0.2.0: https://github.com/mozillazg/python-pinyin/compare/v0.1.0...v0.2.0
 .. _0.3.0: https://github.com/mozillazg/python-pinyin/compare/v0.2.0...v0.3.0
 .. _0.3.1: https://github.com/mozillazg/python-pinyin/compare/v0.3.0...v0.3.1
 .. _0.4.0: https://github.com/mozillazg/python-pinyin/compare/v0.3.1...v0.4.0
 .. _0.4.1: https://github.com/mozillazg/python-pinyin/compare/v0.4.0...v0.4.1
 .. _0.4.2: https://github.com/mozillazg/python-pinyin/compare/v0.4.1...v0.4.2
 .. _0.4.3: https://github.com/mozillazg/python-pinyin/compare/v0.4.2...v0.4.3
 .. _0.4.4: https://github.com/mozillazg/python-pinyin/compare/v0.4.3...v0.4.4
 .. _0.5.0: https://github.com/mozillazg/python-pinyin/compare/v0.4.4...v0.5.0
 .. _0.5.1: https://github.com/mozillazg/python-pinyin/compare/v0.5.0...v0.5.1
 .. _0.5.2: https://github.com/mozillazg/python-pinyin/compare/v0.5.1...v0.5.2
 .. _0.5.3: https://github.com/mozillazg/python-pinyin/compare/v0.5.2...v0.5.3
 .. _0.5.4: https://github.com/mozillazg/python-pinyin/compare/v0.5.3...v0.5.4
 .. _0.5.5: https://github.com/mozillazg/python-pinyin/compare/v0.5.4...v0.5.5
 .. _0.5.6: https://github.com/mozillazg/python-pinyin/compare/v0.5.5...v0.5.6
 .. _0.5.7: https://github.com/mozillazg/python-pinyin/compare/v0.5.6...v0.5.7
 .. _0.6.0: https://github.com/mozillazg/python-pinyin/compare/v0.5.7...v0.6.0
 .. _0.7.0: https://github.com/mozillazg/python-pinyin/compare/v0.6.0...v0.7.0
 .. _0.8.0: https://github.com/mozillazg/python-pinyin/compare/v0.7.0...v0.8.0
 .. _0.8.1: https://github.com/mozillazg/python-pinyin/compare/v0.8.0...v0.8.1
 .. _0.8.2: https://github.com/mozillazg/python-pinyin/compare/v0.8.1...v0.8.2
 .. _0.8.3: https://github.com/mozillazg/python-pinyin/compare/v0.8.2...v0.8.3
 .. _0.8.4: https://github.com/mozillazg/python-pinyin/compare/v0.8.3...v0.8.4
 .. _0.8.5: https://github.com/mozillazg/python-pinyin/compare/v0.8.4...v0.8.5
 .. _0.9.0: https://github.com/mozillazg/python-pinyin/compare/v0.8.5...v0.9.0
 .. _0.9.1: https://github.com/mozillazg/python-pinyin/compare/v0.9.0...v0.9.1
 .. _0.9.2: https://github.com/mozillazg/python-pinyin/compare/v0.9.1...v0.9.2
 .. _0.9.3: https://github.com/mozillazg/python-pinyin/compare/v0.9.2...v0.9.3
 .. _0.9.4: https://github.com/mozillazg/python-pinyin/compare/v0.9.3...v0.9.4
 .. _0.9.5: https://github.com/mozillazg/python-pinyin/compare/v0.9.4...v0.9.5
 .. _0.10.0: https://github.com/mozillazg/python-pinyin/compare/v0.9.5...v0.10.0
 .. _0.11.0: https://github.com/mozillazg/python-pinyin/compare/v0.10.0...v0.11.0
 .. _0.11.1: https://github.com/mozillazg/python-pinyin/compare/v0.11.0...v0.11.1
 .. _0.12.0: https://github.com/mozillazg/python-pinyin/compare/v0.11.1...v0.12.0
 .. _0.12.1: https://github.com/mozillazg/python-pinyin/compare/v0.12.0...v0.12.1
 .. _0.13.0: https://github.com/mozillazg/python-pinyin/compare/v0.12.1...v0.13.0
 .. _0.14.0: https://github.com/mozillazg/python-pinyin/compare/v0.13.0...v0.14.0
 .. _0.15.0: https://github.com/mozillazg/python-pinyin/compare/v0.14.0...v0.15.0
 .. _0.16.0: https://github.com/mozillazg/python-pinyin/compare/v0.15.0...v0.16.0
 .. _0.16.1: https://github.com/mozillazg/python-pinyin/compare/v0.16.0...v0.16.1
 .. _0.17.0: https://github.com/mozillazg/python-pinyin/compare/v0.16.1...v0.17.0
 .. _0.18.0: https://github.com/mozillazg/python-pinyin/compare/v0.17.0...v0.18.0
 .. _0.18.1: https://github.com/mozillazg/python-pinyin/compare/v0.18.0...v0.18.1
 .. _0.18.2: https://github.com/mozillazg/python-pinyin/compare/v0.18.1...v0.18.2
 .. _0.19.0: https://github.com/mozillazg/python-pinyin/compare/v0.18.2...v0.19.0
 .. _0.20.0: https://github.com/mozillazg/python-pinyin/compare/v0.19.0...v0.20.0
 .. _0.21.0: https://github.com/mozillazg/python-pinyin/compare/v0.20.0...v0.21.0
 .. _0.21.1: https://github.com/mozillazg/python-pinyin/compare/v0.21.0...v0.21.1
 .. _0.22.0: https://github.com/mozillazg/python-pinyin/compare/v0.21.1...v0.22.0
 .. _0.23.0: https://github.com/mozillazg/python-pinyin/compare/v0.22.0...v0.23.0
 .. _0.24.0: https://github.com/mozillazg/python-pinyin/compare/v0.23.0...v0.24.0
 .. _0.25.0: https://github.com/mozillazg/python-pinyin/compare/v0.24.0...v0.25.0
 .. _0.26.0: https://github.com/mozillazg/python-pinyin/compare/v0.25.0...v0.26.0
 .. _0.26.1: https://github.com/mozillazg/python-pinyin/compare/v0.26.0...v0.26.1
 .. _0.27.0: https://github.com/mozillazg/python-pinyin/compare/v0.26.1...v0.27.0
 .. _0.28.0: https://github.com/mozillazg/python-pinyin/compare/v0.27.0...v0.28.0
 .. _0.29.0: https://github.com/mozillazg/python-pinyin/compare/v0.28.0...v0.29.0
 .. _0.30.0: https://github.com/mozillazg/python-pinyin/compare/v0.29.0...v0.30.0
 .. _0.30.1: https://github.com/mozillazg/python-pinyin/compare/v0.30.0...v0.30.1
 .. _0.31.0: https://github.com/mozillazg/python-pinyin/compare/v0.30.1...v0.31.0
 .. _0.32.0: https://github.com/mozillazg/python-pinyin/compare/v0.31.0...v0.32.0
 .. _0.33.0: https://github.com/mozillazg/python-pinyin/compare/v0.32.0...v0.33.0
 .. _0.33.1: https://github.com/mozillazg/python-pinyin/compare/v0.33.0...v0.33.1
 .. _0.33.2: https://github.com/mozillazg/python-pinyin/compare/v0.33.1...v0.33.2
 .. _0.34.0: https://github.com/mozillazg/python-pinyin/compare/v0.33.2...v0.34.0
 .. _0.34.1: https://github.com/mozillazg/python-pinyin/compare/v0.34.0...v0.34.1
 .. _0.35.0: https://github.com/mozillazg/python-pinyin/compare/v0.34.1...v0.35.0
 .. _0.35.1: https://github.com/mozillazg/python-pinyin/compare/v0.35.0...v0.35.1
 .. _0.35.2: https://github.com/mozillazg/python-pinyin/compare/v0.35.1...v0.35.2
 .. _0.35.3: https://github.com/mozillazg/python-pinyin/compare/v0.35.2...v0.35.3
 .. _0.35.4: https://github.com/mozillazg/python-pinyin/compare/v0.35.3...v0.35.4
 .. _0.36.0: https://github.com/mozillazg/python-pinyin/compare/v0.35.4...v0.36.0
 .. _0.37.0: https://github.com/mozillazg/python-pinyin/compare/v0.36.0...v0.37.0
 .. _0.38.0: https://github.com/mozillazg/python-pinyin/compare/v0.37.0...v0.38.0
 .. _0.38.1: https://github.com/mozillazg/python-pinyin/compare/v0.38.0...v0.38.1
 .. _0.39.0: https://github.com/mozillazg/python-pinyin/compare/v0.38.1...v0.39.0
 .. _0.39.1: https://github.com/mozillazg/python-pinyin/compare/v0.39.0...v0.39.1
 .. _0.40.0: https://github.com/mozillazg/python-pinyin/compare/v0.39.1...v0.40.0
 .. _0.41.0: https://github.com/mozillazg/python-pinyin/compare/v0.40.0...v0.41.0
--- a/third_party/python-pinyin/CODE_OF_CONDUCT.md
+++ b/third_party/python-pinyin/CODE_OF_CONDUCT.md
@ -0,0 +1,46 @@
 # Contributor Covenant Code of Conduct
 ## Our Pledge
 In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
 ## Our Standards
 Examples of behavior that contributes to creating a positive environment include:
 * Using welcoming and inclusive language
 * Being respectful of differing viewpoints and experiences
 * Gracefully accepting constructive criticism
 * Focusing on what is best for the community
 * Showing empathy towards other community members
 Examples of unacceptable behavior by participants include:
 * The use of sexualized language or imagery and unwelcome sexual attention or advances
 * Trolling, insulting/derogatory comments, and personal or political attacks
 * Public or private harassment
 * Publishing others' private information, such as a physical or electronic address, without explicit permission
 * Other conduct which could reasonably be considered inappropriate in a professional setting
 ## Our Responsibilities
 Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
 Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
 ## Scope
 This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
 ## Enforcement
 Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at mozillazg101@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
 Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
 ## Attribution
 This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
 [homepage]: http://contributor-covenant.org
 [version]: http://contributor-covenant.org/version/1/4/
--- a/third_party/python-pinyin/LICENSE.txt
+++ b/third_party/python-pinyin/LICENSE.txt
@ -0,0 +1,20 @@
 The MIT License (MIT)
 Copyright (c) 2016 mozillazg, 闲耘 <hotoo.cn@gmail.com>
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
 the Software without restriction, including without limitation the rights to
 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 the Software, and to permit persons to whom the Software is furnished to do so,
 subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/third_party/python-pinyin/MANIFEST.in
+++ b/third_party/python-pinyin/MANIFEST.in
@ -0,0 +1,2 @@
 include README.rst LICENSE.txt CHANGELOG.rst
 recursive-include pypinyin *.pyi py.typed
--- a/third_party/python-pinyin/Makefile
+++ b/third_party/python-pinyin/Makefile
@ -0,0 +1,97 @@
 help:
 	@echo "test             run test"
 	@echo "publish          publish to PyPI"
 	@echo "publish_test     publish to TestPyPI"
 	@echo "docs_html        make html docs"
 	@echo "docs_serve       serve docs"
 	@echo "gen_data         gen pinyin data"
 	@echo "gen_pinyin_dict  gen single hanzi pinyin dict"
 	@echo "gen_phrases_dict gen phrase hanzi pinyin dict"
 	@echo "lint             run lint"
 	@echo "clean - remove all build, test, coverage and Python artifacts"
 	@echo "clean-build - remove build artifacts"
 	@echo "clean-pyc - remove Python file artifacts"
 	@echo "clean-test - remove test and coverage artifacts"
 .PHONY: test
 test: lint
 	@echo "run test"
 	make testonly
 .PHONY: testonly
 testonly:
 	py.test --random-order --cov pypinyin tests/ pypinyin/
 .PHONY: publish
 publish: clean
 	@echo "publish to pypi"
 	python setup.py sdist
 	python setup.py bdist_wheel
 	twine upload dist/*
 .PHONY: publish_test
 publish_test: clean
 	@echo "publish to test pypi"
 	python setup.py sdist
 	python setup.py bdist_wheel
 	twine upload --repository test dist/*
 .PHONY: docs_html
 docs_html:
 	cd docs && make html
 .PHONY: docs_serve
 docs_serve: docs_html
 	cd docs/_build/html && python -m http.server
 .PHONY: gen_data
 gen_data: gen_pinyin_dict gen_phrases_dict
 .PHONY: gen_pinyin_dict
 gen_pinyin_dict:
 	python gen_pinyin_dict.py pinyin-data/pinyin.txt pypinyin/pinyin_dict.py
 .PHONY: gen_phrases_dict
 gen_phrases_dict:
 	python gen_phrases_dict.py phrase-pinyin-data/pinyin.txt pypinyin/phrases_dict_large.py
 	python tidy_phrases_dict.py
 .PHONY: lint
 lint:
 	pre-commit run --all-files
 	mypy --strict pypinyin
 clean: clean-build clean-pyc clean-test
 clean-build:
 	rm -fr build/
 	rm -fr dist/
 	rm -fr .eggs/
 	find . -name '*.egg-info' -exec rm -fr {} +
 	find . -name '*.egg' -exec rm -f {} +
 clean-pyc:
 	find . -name '*.pyc' -exec rm -f {} +
 	find . -name '*.pyo' -exec rm -f {} +
 	find . -name '*~' -exec rm -f {} +
 	find . -name '__pycache__' -exec rm -fr {} +
 clean-test:
 	rm -fr .tox/
 	rm -f .coverage
 	rm -fr htmlcov/
 rebase_master:
 	git fetch origin && git rebase origin/master
 merge_dev:
 	git merge --no-ff origin/develop
 bump_patch:
 	bumpversion --verbose patch
 bump_minor:
 	bumpversion --verbose minor
 start_next:
 	git push && git push --tags && git checkout develop && git rebase master && git push
--- a/third_party/python-pinyin/README.md
+++ b/third_party/python-pinyin/README.md
@ -0,0 +1,8 @@
 # Modify from
 * [python-pinyin](https://github.com/mozillazg/python-pinyin.git)
 commit: 55e524aa1b7b8eec3d15c5306043c6cdd5938b03
 licence: MIT
 ## Features
 * only support py3
 * remove pyi
--- a/third_party/python-pinyin/README.rst
+++ b/third_party/python-pinyin/README.rst
@ -0,0 +1,210 @@
 汉字拼音转换工具（Python 版）
 =============================
 |Build| |GitHubAction| |Coverage| |Pypi version| |DOI|
 将汉字转为拼音。可以用于汉字注音、排序、检索(`Russian translation`_) 。
 基于 `hotoo/pinyin <https://github.com/hotoo/pinyin>`__ 开发。
 * Documentation: http://pypinyin.rtfd.io/
 * GitHub: https://github.com/mozillazg/python-pinyin
 * License: MIT license
 * PyPI: https://pypi.org/project/pypinyin
 * Python version: 2.7, pypy, pypy3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9
 .. contents::
 特性
 ----
 * 根据词组智能匹配最正确的拼音。
 * 支持多音字。
 * 简单的繁体支持, 注音支持。
 * 支持多种不同拼音/注音风格。
 安装
 ----
 .. code-block:: bash
    $ pip install pypinyin
 使用示例
 --------
 Python 3(Python 2 下把 ``'中心'`` 替换为 ``u'中心'`` 即可):
 .. code-block:: python
    >>> from pypinyin import pinyin, lazy_pinyin, Style
    >>> pinyin('中心')
    [['zhōng'], ['xīn']]
    >>> pinyin('中心', heteronym=True)  # 启用多音字模式
    [['zhōng', 'zhòng'], ['xīn']]
    >>> pinyin('中心', style=Style.FIRST_LETTER)  # 设置拼音风格
    [['z'], ['x']]
    >>> pinyin('中心', style=Style.TONE2, heteronym=True)
    [['zho1ng', 'zho4ng'], ['xi1n']]
    >>> pinyin('中心', style=Style.TONE3, heteronym=True)
    [['zhong1', 'zhong4'], ['xin1']]
    >>> pinyin('中心', style=Style.BOPOMOFO)  # 注音风格
    [['ㄓㄨㄥ'], ['ㄒㄧㄣ']]
    >>> lazy_pinyin('中心')  # 不考虑多音字的情况
    ['zhong', 'xin']
    >>> lazy_pinyin('战略', v_to_u=True)  # 不使用 v 表示 ü
    ['zhan', 'lüe']
    # 使用 5 标识轻声
    >>> lazy_pinyin('衣裳', style=Style.TONE3, neutral_tone_with_five=True)
    ['yi1', 'shang5']
 **注意事项** ：
 * 默认情况下拼音结果不会标明哪个韵母是轻声，轻声的韵母没有声调或数字标识（可以通过参数 ``neutral_tone_with_five=True`` 开启使用 ``5`` 标识轻声 ）。
 * 默认情况下无声调相关拼音风格下的结果会使用 ``v`` 表示 ``ü`` （可以通过参数 ``v_to_u=True`` 开启使用 ``ü`` 代替 ``v`` ）。
 * 默认情况下会原样输出没有拼音的字符（自定义处理没有拼音的字符的方法见 `文档 <https://pypinyin.readthedocs.io/zh_CN/master/usage.html#handle-no-pinyin>`__ ）。
 命令行工具：
 .. code-block:: console
    $ pypinyin 音乐
    yīn yuè
    $ pypinyin -h
 文档
 --------
 详细文档请访问：http://pypinyin.rtfd.io/ 。
 项目代码开发方面的问题可以看看 `开发文档`_ 。
 FAQ
 ---------
 词语中的多音字拼音有误？
 +++++++++++++++++++++++++++++
 目前是通过词组拼音库的方式来解决多音字问题的。如果出现拼音有误的情况，
 可以自定义词组拼音来调整词语中的拼音：
 .. code-block:: python
    >>> from pypinyin import Style, pinyin, load_phrases_dict
    >>> pinyin('步履蹒跚')
    [['bù'], ['lǚ'], ['mán'], ['shān']]
    >>> load_phrases_dict({'步履蹒跚': [['bù'], ['lǚ'], ['pán'], ['shān']]})
    >>> pinyin('步履蹒跚')
    [['bù'], ['lǚ'], ['pán'], ['shān']]
 详见 `文档 <https://pypinyin.readthedocs.io/zh_CN/master/usage.html#custom-dict>`__ 。
 如果是分词导致的拼音有误的话，可以先使用其他的分词模块对数据进行分词处理，
 然后将分词后的词组结果列表作为函数的参数即可:
 .. code-block:: python
    >>> # 使用其他分词模块分词，比如 jieba 之类，
    >>> #或者基于 phrases_dict.py 里的词语数据使用其他分词算法分词
    >>> words = list(jieba.cut('每股24.67美元的确定性协议'))
    >>> pinyin(words)
 为什么没有 y, w, yu 几个声母？
 ++++++++++++++++++++++++++++++++++++++++++++
 .. code-block:: python
    >>> from pypinyin import Style, pinyin
    >>> pinyin('下雨天', style=Style.INITIALS)
    [['x'], [''], ['t']]
 因为根据 `《汉语拼音方案》 <http://www.moe.gov.cn/s78/A19/yxs_left/moe_810/s230/195802/t19580201_186000.html>`__ ，
 y，w，ü (yu) 都不是声母。
    声母风格（INITIALS）下，“雨”、“我”、“圆”等汉字返回空字符串，因为根据
    `《汉语拼音方案》 <http://www.moe.edu.cn/s78/A19/yxs_left/moe_810/s230/195802/t19580201_186000.html>`__ ，
    y，w，ü (yu) 都不是声母，在某些特定韵母无声母时，才加上 y 或 w，而 ü 也有其特定规则。    —— @hotoo
    **如果你觉得这个给你带来了麻烦，那么也请小心一些无声母的汉字（如“啊”、“饿”、“按”、“昂”等）。
    这时候你也许需要的是首字母风格（FIRST_LETTER）**。    —— @hotoo
    参考: `hotoo/pinyin#57 <https://github.com/hotoo/pinyin/issues/57>`__,
    `#22 <https://github.com/mozillazg/python-pinyin/pull/22>`__,
    `#27 <https://github.com/mozillazg/python-pinyin/issues/27>`__,
    `#44 <https://github.com/mozillazg/python-pinyin/issues/44>`__
 如果觉得这个行为不是你想要的，就是想把 y 当成声母的话，可以指定 ``strict=False`` ，
 这个可能会符合你的预期：
 .. code-block:: python
    >>> from pypinyin import Style, pinyin
    >>> pinyin('下雨天', style=Style.INITIALS)
    [['x'], [''], ['t']]
    >>> pinyin('下雨天', style=Style.INITIALS, strict=False)
    [['x'], ['y'], ['t']]
 详见 `strict 参数的影响`_ 。
 如何减少内存占用
 ++++++++++++++++++++
 如果对拼音的准确性不是特别在意的话，可以通过设置环境变量 ``PYPINYIN_NO_PHRASES``
 和 ``PYPINYIN_NO_DICT_COPY`` 来节省内存。
 详见 `文档 <https://pypinyin.readthedocs.io/zh_CN/master/faq.html#no-phrases>`__
 更多 FAQ 详见文档中的
 `FAQ <https://pypinyin.readthedocs.io/zh_CN/master/faq.html>`__ 部分。
 .. _#13 : https://github.com/mozillazg/python-pinyin/issues/113
 .. _strict 参数的影响: https://pypinyin.readthedocs.io/zh_CN/master/usage.html#strict
 拼音数据
 ---------
 * 单个汉字的拼音使用 `pinyin-data`_ 的数据
 * 词组的拼音使用 `phrase-pinyin-data`_ 的数据
 * 声母和韵母使用 `《汉语拼音方案》 <http://www.moe.gov.cn/s78/A19/yxs_left/moe_810/s230/195802/t19580201_186000.html>`__ 的数据
 Related Projects
 -----------------
 * `hotoo/pinyin`__: 汉字拼音转换工具 Node.js/JavaScript 版。
 * `mozillazg/go-pinyin`__: 汉字拼音转换工具 Go 版。
 * `mozillazg/rust-pinyin`__: 汉字拼音转换工具 Rust 版。
 __ https://github.com/hotoo/pinyin
 __ https://github.com/mozillazg/go-pinyin
 __ https://github.com/mozillazg/rust-pinyin
 .. |Build| image:: https://img.shields.io/circleci/project/github/mozillazg/python-pinyin/master.svg
   :target: https://circleci.com/gh/mozillazg/python-pinyin
 .. |GitHubAction| image:: https://github.com/mozillazg/python-pinyin/workflows/CI/badge.svg
   :target: https://github.com/mozillazg/python-pinyin/actions
 .. |Coverage| image:: https://img.shields.io/codecov/c/github/mozillazg/python-pinyin/master.svg
   :target: https://codecov.io/gh/mozillazg/python-pinyin
 .. |PyPI version| image:: https://img.shields.io/pypi/v/pypinyin.svg
   :target: https://pypi.org/project/pypinyin/
 .. |DOI| image:: https://zenodo.org/badge/12830126.svg
   :target: https://zenodo.org/badge/latestdoi/12830126
 .. _Russian translation: https://github.com/mozillazg/python-pinyin/blob/master/README_ru.rst
 .. _pinyin-data: https://github.com/mozillazg/pinyin-data
 .. _phrase-pinyin-data: https://github.com/mozillazg/phrase-pinyin-data
 .. _开发文档: https://pypinyin.readthedocs.io/zh_CN/develop/develop.html
--- a/third_party/python-pinyin/docs/CHANGELOG.rst
+++ b/third_party/python-pinyin/docs/CHANGELOG.rst
@ -0,0 +1 @@
 ../CHANGELOG.rst
--- a/third_party/python-pinyin/docs/Makefile
+++ b/third_party/python-pinyin/docs/Makefile
@ -0,0 +1,177 @@
 # Makefile for Sphinx documentation
 #
 # You can set these variables from the command line.
 SPHINXOPTS    =
 SPHINXBUILD   = sphinx-build
 PAPER         =
 BUILDDIR      = _build
 # User-friendly check for sphinx-build
 ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 endif
 # Internal variables.
 PAPEROPT_a4     = -D latex_paper_size=a4
 PAPEROPT_letter = -D latex_paper_size=letter
 ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 # the i18n builder cannot share the environment and doctrees with the others
 I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 help:
 	@echo "Please use \`make <target>' where <target> is one of"
 	@echo "  html       to make standalone HTML files"
 	@echo "  dirhtml    to make HTML files named index.html in directories"
 	@echo "  singlehtml to make a single large HTML file"
 	@echo "  pickle     to make pickle files"
 	@echo "  json       to make JSON files"
 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 	@echo "  qthelp     to make HTML files and a qthelp project"
 	@echo "  devhelp    to make HTML files and a Devhelp project"
 	@echo "  epub       to make an epub"
 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 	@echo "  text       to make text files"
 	@echo "  man        to make manual pages"
 	@echo "  texinfo    to make Texinfo files"
 	@echo "  info       to make Texinfo files and run them through makeinfo"
 	@echo "  gettext    to make PO message catalogs"
 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 	@echo "  xml        to make Docutils-native XML files"
 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 	@echo "  linkcheck  to check all external links for integrity"
 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 clean:
 	rm -rf $(BUILDDIR)/*
 html:
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 dirhtml:
 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 singlehtml:
 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 	@echo
 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 pickle:
 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 	@echo
 	@echo "Build finished; now you can process the pickle files."
 json:
 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 	@echo
 	@echo "Build finished; now you can process the JSON files."
 htmlhelp:
 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 	@echo
 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 qthelp:
 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 	@echo
 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/BaiduPCS.qhcp"
 	@echo "To view the help file:"
 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/BaiduPCS.qhc"
 devhelp:
 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 	@echo
 	@echo "Build finished."
 	@echo "To view the help file:"
 	@echo "# mkdir -p $$HOME/.local/share/devhelp/BaiduPCS"
 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/BaiduPCS"
 	@echo "# devhelp"
 epub:
 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 	@echo
 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
 latex:
 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
 	@echo
 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
 	      "(use \`make latexpdf' here to do that automatically)."
 latexpdf:
 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
 	@echo "Running LaTeX files through pdflatex..."
 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
 latexpdfja:
 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
 	@echo "Running LaTeX files through platex and dvipdfmx..."
 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
 text:
 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
 	@echo
 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
 man:
 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
 	@echo
 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
 texinfo:
 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
 	@echo
 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
 	@echo "Run \`make' in that directory to run these through makeinfo" \
 	      "(use \`make info' here to do that automatically)."
 info:
 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
 	@echo "Running Texinfo files through makeinfo..."
 	make -C $(BUILDDIR)/texinfo info
 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
 gettext:
 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
 	@echo
 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
 changes:
 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
 	@echo
 	@echo "The overview file is in $(BUILDDIR)/changes."
 linkcheck:
 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
 	@echo
 	@echo "Link check complete; look for any errors in the above output " \
 	      "or in $(BUILDDIR)/linkcheck/output.txt."
 doctest:
 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
 	@echo "Testing of doctests in the sources finished, look at the " \
 	      "results in $(BUILDDIR)/doctest/output.txt."
 xml:
 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
 	@echo
 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
 pseudoxml:
 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
 	@echo
 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
--- a/third_party/python-pinyin/docs/api.rst
+++ b/third_party/python-pinyin/docs/api.rst
@ -0,0 +1,43 @@
 API
 ====
 .. _style:
 拼音风格
 -----------
 .. autoclass:: pypinyin.Style
   :members:
   :undoc-members:
   :member-order: bysource
 .. _core_api:
 核心 API
 -------------
 .. autofunction:: pypinyin.pinyin
 .. autofunction:: pypinyin.lazy_pinyin
 .. autofunction:: pypinyin.load_single_dict
 .. autofunction:: pypinyin.load_phrases_dict
 .. autofunction:: pypinyin.slug
 .. _convert_style:
 注册新的拼音风格
 -----------------
 .. autofunction:: pypinyin.style.register
 .. _seg:
 .. _#27: https://github.com/mozillazg/python-pinyin/issues/27
--- a/third_party/python-pinyin/docs/conf.py
+++ b/third_party/python-pinyin/docs/conf.py
@ -0,0 +1,309 @@
 # -*- coding: utf-8 -*-
 #
 # pypinyin documentation build configuration file, created by
 # sphinx-quickstart on Fri Sep 06 22:22:13 2013.
 #
 # This file is execfile()d with the current directory set to its containing dir.
 #
 # Note that not all possible configuration values are present in this
 # autogenerated file.
 #
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 import sys, os
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #sys.path.insert(0, os.path.abspath('.'))
 sys.path.insert(0, os.path.abspath('.'))
 sys.path.insert(0, os.path.abspath('..'))
 # sys.path.insert(0, os.path.abspath('../pypinyin'))
 # -- General configuration -----------------------------------------------------
 # If your documentation needs a minimal Sphinx version, state it here.
 #needs_sphinx = '1.0'
 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 extensions = [
    'sphinx.ext.autodoc',
    'sphinx.ext.viewcode',
    'sphinx.ext.extlinks',
    'sphinx.ext.todo',
 ]
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
 # The suffix of source filenames.
 source_suffix = '.rst'
 # The encoding of source files.
 #source_encoding = 'utf-8-sig'
 # The master toctree document.
 master_doc = 'index'
 import pypinyin
 # General information about the project.
 project = pypinyin.__title__
 copyright = pypinyin.__copyright__
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 #
 # The short X.Y version.
 version = pypinyin.__version__
 # The full version, including alpha/beta/rc tags.
 release = pypinyin.__version__
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
 language = 'zh_CN'
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
 #today = ''
 # Else, today_fmt is used as the format for a strftime call.
 #today_fmt = '%B %d, %Y'
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 exclude_patterns = ['_build']
 # The reST default role (used for this markup: `text`) to use for all documents.
 #default_role = None
 # If true, '()' will be appended to :func: etc. cross-reference text.
 #add_function_parentheses = True
 # If true, the current module name will be prepended to all description
 # unit titles (such as .. function::).
 #add_module_names = True
 # If true, sectionauthor and moduleauthor directives will be shown in the
 # output. They are ignored by default.
 #show_authors = False
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = 'sphinx'
 # A list of ignored prefixes for module index sorting.
 #modindex_common_prefix = []
 # If true, keep warnings as "system message" paragraphs in the built documents.
 #keep_warnings = False
 # -- Options for HTML output ---------------------------------------------------
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 html_theme = 'nature'
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
 #html_theme_options = {}
 # Add any paths that contain custom themes here, relative to this directory.
 #html_theme_path = []
 # The name for this set of Sphinx documents.  If None, it defaults to
 # "<project> v<release> documentation".
 #html_title = None
 # A shorter title for the navigation bar.  Default is the same as html_title.
 #html_short_title = None
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
 #html_logo = None
 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
 #html_favicon = None
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ['_static']
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
 #html_last_updated_fmt = '%b %d, %Y'
 # If true, SmartyPants will be used to convert quotes and dashes to
 # typographically correct entities.
 #html_use_smartypants = True
 # Custom sidebar templates, maps document names to template names.
 #html_sidebars = {}
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
 #html_additional_pages = {}
 # If false, no module index is generated.
 #html_domain_indices = True
 # If false, no index is generated.
 #html_use_index = True
 # If true, the index is split into individual pages for each letter.
 #html_split_index = False
 # If true, links to the reST sources are added to the pages.
 #html_show_sourcelink = True
 # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
 #html_show_sphinx = True
 # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
 #html_show_copyright = True
 # If true, an OpenSearch description file will be output, and all pages will
 # contain a <link> tag referring to it.  The value of this option must be the
 # base URL from which the finished HTML is served.
 #html_use_opensearch = ''
 # This is the file name suffix for HTML files (e.g. ".xhtml").
 #html_file_suffix = None
 # Output file base name for HTML help builder.
 htmlhelp_basename = 'pypinyindoc'
 # -- Options for LaTeX output --------------------------------------------------
 latex_elements = {
    # The paper size ('letterpaper' or 'a4paper').
    #'papersize': 'letterpaper',
    # The font size ('10pt', '11pt' or '12pt').
    #'pointsize': '10pt',
    # Additional stuff for the LaTeX preamble.
    #'preamble': '',
 }
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, documentclass [howto/manual]).
 latex_documents = [
    ('index', 'pypinyin.tex', 'pypinyin Documentation', 'mozillazg', 'manual'),
 ]
 # The name of an image file (relative to this directory) to place at the top of
 # the title page.
 #latex_logo = None
 # For "manual" documents, if this is true, then toplevel headings are parts,
 # not chapters.
 #latex_use_parts = False
 # If true, show page references after internal links.
 #latex_show_pagerefs = False
 # If true, show URL addresses after external links.
 #latex_show_urls = False
 # Documents to append as an appendix to all manuals.
 #latex_appendices = []
 # If false, no module index is generated.
 #latex_domain_indices = True
 # -- Options for manual page output --------------------------------------------
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 man_pages = [('index', 'pypinyin', 'pypinyin Documentation', ['mozillazg'], 1)]
 # If true, show URL addresses after external links.
 #man_show_urls = False
 # -- Options for Texinfo output ------------------------------------------------
 # Grouping the document tree into Texinfo files. List of tuples
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
    ('index', 'pypinyin', 'pypinyin Documentation', 'mozillazg', 'pypinyin',
     'One line description of project.', 'Miscellaneous'),
 ]
 # Documents to append as an appendix to all manuals.
 #texinfo_appendices = []
 # If false, no module index is generated.
 #texinfo_domain_indices = True
 # How to display URL addresses: 'footnote', 'no', or 'inline'.
 #texinfo_show_urls = 'footnote'
 # If true, do not generate a @detailmenu in the "Top" node's menu.
 #texinfo_no_detailmenu = False
 # -- Options for Epub output ---------------------------------------------------
 # Bibliographic Dublin Core info.
 epub_title = 'pypinyin'
 epub_author = 'mozillazg'
 epub_publisher = 'mozillazg'
 epub_copyright = '2016 mozillazg'
 # The language of the text. It defaults to the language option
 # or en if the language is not set.
 #epub_language = ''
 # The scheme of the identifier. Typical schemes are ISBN or URL.
 #epub_scheme = ''
 # The unique identifier of the text. This can be a ISBN number
 # or the project homepage.
 #epub_identifier = ''
 # A unique identification for the text.
 #epub_uid = ''
 # A tuple containing the cover image and cover page html template filenames.
 #epub_cover = ()
 # A sequence of (type, uri, title) tuples for the guide element of content.opf.
 #epub_guide = ()
 # HTML files that should be inserted before the pages created by sphinx.
 # The format is a list of tuples containing the path and title.
 #epub_pre_files = []
 # HTML files that should be inserted after the pages created by sphinx.
 # The format is a list of tuples containing the path and title.
 #epub_post_files = []
 # A list of files that should not be packed into the epub file.
 #epub_exclude_files = []
 # The depth of the table of contents in toc.ncx.
 #epub_tocdepth = 3
 # Allow duplicate toc entries.
 #epub_tocdup = True
 # Fix unsupported image types using the PIL.
 #epub_fix_images = False
 # Scale large images.
 #epub_max_image_width = 0
 # If 'no', URL addresses will not be shown.
 #epub_show_urls = 'inline'
 # If false, no index is generated.
 #epub_use_index = True
 # Example configuration for intersphinx: refer to the Python standard library.
 intersphinx_mapping = {'http://docs.python.org/': None}
--- a/third_party/python-pinyin/docs/contrib.rst
+++ b/third_party/python-pinyin/docs/contrib.rst
@ -0,0 +1,38 @@
 .. _contrib:
 contrib
 ========
 .. _tone_convert:
 拼音转换
 --------
 .. autofunction:: pypinyin.contrib.tone_convert.to_normal
 .. autofunction:: pypinyin.contrib.tone_convert.to_tone
 .. autofunction:: pypinyin.contrib.tone_convert.to_tone2
 .. autofunction:: pypinyin.contrib.tone_convert.to_tone3
 .. autofunction:: pypinyin.contrib.tone_convert.tone_to_normal
 .. autofunction:: pypinyin.contrib.tone_convert.tone_to_tone2
 .. autofunction:: pypinyin.contrib.tone_convert.tone_to_tone3
 .. autofunction:: pypinyin.contrib.tone_convert.tone2_to_normal
 .. autofunction:: pypinyin.contrib.tone_convert.tone2_to_tone
 .. autofunction:: pypinyin.contrib.tone_convert.tone2_to_tone3
 .. autofunction:: pypinyin.contrib.tone_convert.tone3_to_normal
 .. autofunction:: pypinyin.contrib.tone_convert.tone3_to_tone
 .. autofunction:: pypinyin.contrib.tone_convert.tone3_to_tone2
 V2UMixin
 ---------
 .. autoclass:: pypinyin.contrib.uv.V2UMixin
 NeutralToneWith5Mixin
 -----------------------
 .. autoclass:: pypinyin.contrib.neutral_tone.NeutralToneWith5Mixin
--- a/third_party/python-pinyin/docs/develop.rst
+++ b/third_party/python-pinyin/docs/develop.rst
@ -0,0 +1,127 @@
 .. _develop:
 开发文档
 ========
 准备开发环境
 -------------
 ::
    $ virtualenv venv
    $ . venv/bin/activate
    (venv) $ pip install -U -r requirements_dev.txt
    (venv) $ pip install -e .
    (venv) $ pre-commit install
 TODO: 把这个步骤放到一个 make 命令中。
 .. note::
    推荐在 Python 3.6+ 环境下进行开发。
 测试
 ------
 可以通过 ``make test`` 命令在当前 Python 版本下运行单元测试: ::
    (venv) $ make test
 可以通过 ``tox`` 测试程序在多个 Python 版本下的单元测试结果（这一步也可以在提 PR 的时候通过 CI 来运行）: ::
    (venv) $ tox
 .. note::
    如果对测试有疑问或者有些测试实在无法通过，可以先提交 PR 大家一起来看看。
 目录结构
 --------
 关键文件和目录 ::
    $ tree -L 2
    .
    ├── CHANGELOG.rst        # 更新日志
    ├── Makefile
    ├── README.rst
    ├── docs                 # 文档
    ├── gen_phrases_dict.py  # 生成 phrases_dict.py 的脚本
    ├── gen_pinyin_dict.py   # 生成 pinyin_dict.py 的脚本
    ├── phrase-pinyin-data   # gen_phrases_dict.py 使用的数据源
    ├── pinyin-data          # gen_pinyin_dict.py 使用的数据源
    ├── pypinyin             # pypinyin 模块源代码
    │   ├── __init__.py
    │   ├── __main__.py      # 命令行程序的入口
    │   ├── compat.py
    │   ├── constants.py
    │   ├── contrib          # 目前包含了一个分词模块
    │   ├── core.py          # pypinyin 模块的核心逻辑
    │   ├── phonetic_symbol.py
    │   ├── phrases_dict.py   # 词组的拼音数据，由 gen_phrases_dict.py 生成
    │   ├── pinyin_dict.py    # 单个汉字的拼音数据，由 gen_pinyin_dict.py 生成
    │   ├── runner.py         # 命令行程序的主逻辑
    │   ├── standard.py       # strict=True 时的拼音转换逻辑
    │   ├── style             # 各种拼音风格在 style 目录下实现
    │   ├── utils.py
    ├── pytest.ini
    ├── requirements_dev.txt
    ├── setup.cfg
    ├── setup.py
    ├── tests
    ├── tox.ini
 实现思路/主逻辑
 ----------------
 主逻辑:
 1. 对输入的字符串按是否是汉字进行分词（``seg``）
 2. 对分词结果的每个词条进行获取词条拼音的逻辑
   1. 检查词条是否是汉字，不是汉字则走处理没有拼音数据的逻辑（``handle_nopinyin``）
   2. 检查词条是否在 ``PHRASES_DICT`` 中，如果在直接取 ``PHRASES_DICT`` 中这个词条的拼音数据
   3. 如果词条不在 ``PHRASES_DICT`` 中，遍历词条包含的字符，每个字符进行 ``single_pinyin`` 逻辑处理
 3. ``single_pinyin`` 的逻辑：
   1. 检查字符是否在 ``PINYIN_DICT`` 中，如果在的话，取 ``PINYIN_DICT`` 中这个字符的拼音数据
   2. 如果不在的话，走 ``handle_nopinyin`` 逻辑
 4. ``handle_nopinyin`` 逻辑: 根据 ``errors`` 参数的值返回不同的结果。
 5. 对上面的步骤获得的拼音数据按指定的拼音风格进行转换。
 * ``PHRASES_DICT``：词组拼音数据
 * ``PINYIN_DICT``: 单个汉字的拼音数据
 TODO: 画流程图
 发布新版本
 ----------
 1. 切分到 develop 分支
 2. rebase master 分支的代码: ``make rebase_master``
 3. 通过 ``make gen_data`` 生成最新的数据文件
 4. 通过 ``make test`` 跑测试
 5. 更新 CHANGELOG
 6. 提交代码
 7. 检查 develop 分支的 CI 结果
 8. 切换到 master 分支
 9. 合并 develop 分支代码: ``make merge_dev``
 10. 更新版本号:
    * 大改动(1.1.x -> 1.2.x)：``make bump_minor``
    * 小改动(1.1.1 -> 1.1.2)：``make bump_patch``
 11. 发布到 test pypi: ``make publish_test``
 12. 安装和测试发布到 test pypi 上的版本
 13. 发布到 pypi: ``make publish``
 14. 安装和测试发布到 pypi 上的版本
 15. 提交 master 分支代码，更新 develop 分支代码，进入下一个开发阶段：``make start_next``
--- a/third_party/python-pinyin/docs/faq.rst
+++ b/third_party/python-pinyin/docs/faq.rst
@ -0,0 +1,61 @@
 FAQ
 -----
 .. _no_phrases:
 如何禁用内置的“词组拼音库”
 ++++++++++++++++++++++++++++++++
 设置环境变量 ``PYPINYIN_NO_PHRASES=true`` 即可
 .. _no_dict_copy:
 如何禁用默认的“拼音库”copy 操作
 +++++++++++++++++++++++++++++++++++++++++++
 设置环境变量 ``PYPINYIN_NO_DICT_COPY=true`` 即可.
 副作用: 用户的自定义拼音库出现问题时, 无法回退到自带的拼音库.
 .. _limit_memory:
 如何减少内存占用
 +++++++++++++++++++++
 如果对拼音正确性不在意的话，可以按照上面所说的设置环境变量 ``PYPINYIN_NO_PHRASES``
 和 ``PYPINYIN_NO_DICT_COPY`` 详见 `#13`_
 .. _initials_problem:
 ``INITIALS`` 声母风格下，以 ``y``, ``w``, ``yu`` 开头的汉字返回空字符串
 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 比如：
  .. code:: python
      pinyin('火影忍者', style=Style.INITIALS)
      [['h'], [''], ['r'], ['zh']]
 因为 ``y``, ``w``, ``yu`` 都不是声母。参考:
 `hotoo/pinyin#57 <https://github.com/hotoo/pinyin/issues/57>`__,
 `#22 <https://github.com/mozillazg/python-pinyin/pull/22>`__,
 `#27 <https://github.com/mozillazg/python-pinyin/issues/27>`__,
 `#44 <https://github.com/mozillazg/python-pinyin/issues/44>`__
  声母风格（INITIALS）下，“雨”、“我”、“圆”等汉字返回空字符串，因为根据
  `《汉语拼音方案》 <http://www.moe.edu.cn/s78/A19/yxs_left/moe_810/s230/195802/t19580201_186000.html>`__ ，
  y，w，ü (yu) 都不是声母，在某些特定韵母无声母时，才加上 y 或 w，而 ü 也有其特定规则。
  如果你觉得这个给你带来了麻烦，那么也请小心一些无声母的汉字（如“啊”、“饿”、“按”、“昂”等）。
  这时候你也许需要的是首字母风格（FIRST_LETTER）。    —— @hotoo
 如果觉得这个行为不是你想要的，就是想把 y 当成声母的话，可以指定 ``strict=False`` ， 这个可能会符合你的预期。详见 `strict 参数的影响`_
 .. _#13: https://github.com/mozillazg/python-pinyin/issues/113
 .. _strict 参数的影响: https://pypinyin.readthedocs.io/zh_CN/master/usage.html#strict
--- a/third_party/python-pinyin/docs/index.rst
+++ b/third_party/python-pinyin/docs/index.rst
@ -0,0 +1,64 @@
 .. pypinyin documentation master file, created by
   sphinx-quickstart on Fri Sep 06 22:22:13 2013.
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.
 汉字拼音转换工具（Python 版）
 =============================
 |Build| |Coverage| |Pypi version|
 将汉字转为拼音。可以用于汉字注音、排序、检索(`Russian translation`_) 。
 基于 `hotoo/pinyin <https://github.com/hotoo/pinyin>`__ 开发。
 * Documentation: http://pypinyin.rtfd.io
 * GitHub: https://github.com/mozillazg/python-pinyin
 * License: MIT license
 * PyPI: https://pypi.org/project/pypinyin
 * Python version: 2.7, pypy, pypy3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9
 特性
 ----
 * 根据词组智能匹配最正确的拼音。
 * 支持多音字。
 * 简单的繁体支持, 注音支持。
 * 支持多种不同拼音风格。
 .. |Build| image:: https://img.shields.io/circleci/project/github/mozillazg/python-pinyin/master.svg
   :target: https://circleci.com/gh/mozillazg/python-pinyin
 .. |Coverage| image:: https://img.shields.io/codecov/c/github/mozillazg/python-pinyin/master.svg
   :target: https://codecov.io/gh/mozillazg/python-pinyin
 .. |PyPI version| image:: https://img.shields.io/pypi/v/pypinyin.svg
   :target: https://pypi.org/project/pypinyin/
 .. |PyPI downloads| image:: https://img.shields.io/pypi/dm/pypinyin.svg
   :target: https://pypi.org/project/pypinyin/
 .. _Russian translation: https://github.com/mozillazg/python-pinyin/blob/master/README_ru.rst
 Contents
 --------
 .. toctree::
    :maxdepth: 4
    installation
    usage
    api
    contrib
    develop
    faq
    related
    CHANGELOG
 Indices and tables
 ------------------
 * :ref:`genindex`
 * :ref:`modindex`
 * :ref:`search`
--- a/third_party/python-pinyin/docs/installation.rst
+++ b/third_party/python-pinyin/docs/installation.rst
@ -0,0 +1,20 @@
 安装
 ======
 可以使用 pip 进行安装:
 .. code-block:: bash
    $ pip install pypinyin
 easy_install 安装:
 .. code-block:: bash
    $ easy_install pypinyin
 源码安装:
 .. code-block:: bash
    $ python setup.py install
--- a/third_party/python-pinyin/docs/make.bat
+++ b/third_party/python-pinyin/docs/make.bat
@ -0,0 +1,242 @@
@ECHO OFF
 REM Command file for Sphinx documentation
 if "%SPHINXBUILD%" == "" (
 	set SPHINXBUILD=sphinx-build
 )
 set BUILDDIR=_build
 set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 set I18NSPHINXOPTS=%SPHINXOPTS% .
 if NOT "%PAPER%" == "" (
 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 )
 if "%1" == "" goto help
 if "%1" == "help" (
 	:help
 	echo.Please use `make ^<target^>` where ^<target^> is one of
 	echo.  html       to make standalone HTML files
 	echo.  dirhtml    to make HTML files named index.html in directories
 	echo.  singlehtml to make a single large HTML file
 	echo.  pickle     to make pickle files
 	echo.  json       to make JSON files
 	echo.  htmlhelp   to make HTML files and a HTML help project
 	echo.  qthelp     to make HTML files and a qthelp project
 	echo.  devhelp    to make HTML files and a Devhelp project
 	echo.  epub       to make an epub
 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 	echo.  text       to make text files
 	echo.  man        to make manual pages
 	echo.  texinfo    to make Texinfo files
 	echo.  gettext    to make PO message catalogs
 	echo.  changes    to make an overview over all changed/added/deprecated items
 	echo.  xml        to make Docutils-native XML files
 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 	echo.  linkcheck  to check all external links for integrity
 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 	goto end
 )
 if "%1" == "clean" (
 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 	del /q /s %BUILDDIR%\*
 	goto end
 )
 %SPHINXBUILD% 2> nul
 if errorlevel 9009 (
 	echo.
 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 	echo.installed, then set the SPHINXBUILD environment variable to point
 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 	echo.may add the Sphinx directory to PATH.
 	echo.
 	echo.If you don't have Sphinx installed, grab it from
 	echo.http://sphinx-doc.org/
 	exit /b 1
 )
 if "%1" == "html" (
 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 	goto end
 )
 if "%1" == "dirhtml" (
 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 	goto end
 )
 if "%1" == "singlehtml" (
 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 	goto end
 )
 if "%1" == "pickle" (
 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished; now you can process the pickle files.
 	goto end
 )
 if "%1" == "json" (
 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished; now you can process the JSON files.
 	goto end
 )
 if "%1" == "htmlhelp" (
 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished; now you can run HTML Help Workshop with the ^
 .hhp project file in %BUILDDIR%/htmlhelp.
 	goto end
 )
 if "%1" == "qthelp" (
 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
 .qhcp project file in %BUILDDIR%/qthelp, like this:
 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\BaiduPCS.qhcp
 	echo.To view the help file:
 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\BaiduPCS.ghc
 	goto end
 )
 if "%1" == "devhelp" (
 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished.
 	goto end
 )
 if "%1" == "epub" (
 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
 	goto end
 )
 if "%1" == "latex" (
 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
 	goto end
 )
 if "%1" == "latexpdf" (
 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
 	cd %BUILDDIR%/latex
 	make all-pdf
 	cd %BUILDDIR%/..
 	echo.
 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
 	goto end
 )
 if "%1" == "latexpdfja" (
 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
 	cd %BUILDDIR%/latex
 	make all-pdf-ja
 	cd %BUILDDIR%/..
 	echo.
 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
 	goto end
 )
 if "%1" == "text" (
 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished. The text files are in %BUILDDIR%/text.
 	goto end
 )
 if "%1" == "man" (
 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
 	goto end
 )
 if "%1" == "texinfo" (
 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
 	goto end
 )
 if "%1" == "gettext" (
 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
 	goto end
 )
 if "%1" == "changes" (
 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.The overview file is in %BUILDDIR%/changes.
 	goto end
 )
 if "%1" == "linkcheck" (
 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Link check complete; look for any errors in the above output ^
 or in %BUILDDIR%/linkcheck/output.txt.
 	goto end
 )
 if "%1" == "doctest" (
 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Testing of doctests in the sources finished, look at the ^
 results in %BUILDDIR%/doctest/output.txt.
 	goto end
 )
 if "%1" == "xml" (
 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
 	goto end
 )
 if "%1" == "pseudoxml" (
 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
 	goto end
 )
 :end
--- a/third_party/python-pinyin/docs/related.rst
+++ b/third_party/python-pinyin/docs/related.rst
@ -0,0 +1,10 @@
 Related Projects
 ===================
 * `hotoo/pinyin`__: 汉字拼音转换工具 Node.js/JavaScript 版。
 * `mozillazg/go-pinyin`__: 汉字拼音转换工具 Go 版。
 * `mozillazg/rust-pinyin`__: 汉字拼音转换工具 Rust 版。
 __ https://github.com/hotoo/pinyin
 __ https://github.com/mozillazg/go-pinyin
 __ https://github.com/mozillazg/rust-pinyin
--- a/third_party/python-pinyin/docs/usage.rst
+++ b/third_party/python-pinyin/docs/usage.rst
@ -0,0 +1,247 @@
 使用
 ======
 .. _example:
 示例
 -------
 .. code-block:: python
    >>> from pypinyin import pinyin, lazy_pinyin, Style
    >>> pinyin('中心')
    [['zhōng'], ['xīn']]
    >>> pinyin('中心', heteronym=True)  # 启用多音字模式
    [['zhōng', 'zhòng'], ['xīn']]
    >>> pinyin('中心', style=Style.FIRST_LETTER)  # 设置拼音风格
    [['z'], ['x']]
    >>> pinyin('中心', style=Style.TONE2, heteronym=True)
    [['zho1ng', 'zho4ng'], ['xi1n']]
    >>> lazy_pinyin('中心')  # 不考虑多音字的情况
    ['zhong', 'xin']
 **注意事项** ：
 * 默认情况下拼音结果不会标明哪个韵母是轻声，轻声的韵母没有声调或数字标识（可以通过参数 ``neutral_tone_with_five=True`` 开启使用 ``5`` 标识轻声 ）。
 * 默认情况下无声调相关拼音风格下的结果会使用 ``v`` 表示 ``ü`` （可以通过参数 ``v_to_u=True`` 开启使用 ``ü`` 代替 ``v`` ）。
 * 默认情况下会原样输出没有拼音的字符（自定义处理没有拼音的字符的方法见 `文档 <https://pypinyin.readthedocs.io/zh_CN/master/usage.html#handle-no-pinyin>`__ ）。
 .. _handle_no_pinyin:
 处理不包含拼音的字符
 ---------------------
 当程序遇到不包含拼音的字符(串)时，会根据 ``errors`` 参数的值做相应的处理:
 * ``default`` (默认行为): 不做任何处理，原样返回::
      pinyin('你好☆☆')
      [['nǐ'], ['hǎo'], ['☆☆']]
 * ``ignore`` : 忽略该字符 ::
      pinyin('你好☆☆', errors='ignore')
      [['nǐ'], ['hǎo']]
 * ``replace`` : 替换为去掉 ``\u`` 的 unicode 编码::
      pinyin('你好☆☆', errors='replace')
      [['nǐ'], ['hǎo'], ['26062606']]
 * callable 对象 : 提供一个回调函数，接受无拼音字符(串)作为参数,
  支持的返回值类型: ``unicode`` 或 ``list`` 或 ``None`` 。::
      pinyin('你好☆☆', errors=lambda x: 'star')
      [['nǐ'], ['hǎo'], ['star']]
      pinyin('你好☆☆', errors=lambda x: None)
      [['nǐ'], ['hǎo']]
  返回值类型为 ``list`` 时，会自动 expend list ::
      pinyin('你好☆☆', errors=lambda x: ['star' for _ in x])
      [['nǐ'], ['hǎo'], ['star'], ['star']]
      # 指定多音字
      pinyin('你好☆☆', heteronym=True, errors=lambda x: [['star', '☆'] for _ in x])
      [['nǐ'], ['hǎo'], ['star', '☆'], ['star', '☆']]
 .. _custom_dict:
 自定义拼音库
 ------------
 如果对结果不满意，可以通过
 :py:func:`~pypinyin.load_single_dict` 或
 :py:func:`~pypinyin.load_phrases_dict`
 以自定义拼音库的方式修正结果：
 .. code-block:: python
    >> from pypinyin import lazy_pinyin, load_phrases_dict, Style, load_single_dict
    >> hans = '桔子'
    >> lazy_pinyin(hans, style=Style.TONE2)
    ['jie2', 'zi3']
    >> load_phrases_dict({'桔子': [['jú'], ['zǐ']]})  # 增加 "桔子" 词组
    >> lazy_pinyin(hans, style=Style.TONE2)
    ['ju2', 'zi3']
    >>
    >> hans = '还没'
    >> lazy_pinyin(hans, style=Style.TONE2)
    ['hua2n', 'me2i']
    >> load_single_dict({ord('还'): 'hái,huán'})  # 调整 "还" 字的拼音顺序
    >>> lazy_pinyin('还没', style=Style.TONE2)
    ['ha2i', 'me2i']
 .. _custom_style:
 自定义拼音风格
 ----------------
 可以通过 :py:func:`~pypinyin.style.register` 来实现自定义拼音风格的需求：
 .. code-block:: python
    In [1]: from pypinyin import lazy_pinyin
    In [2]: from pypinyin.style import register
    In [3]: @register('kiss')
       ...: def kiss(pinyin, **kwargs):
       ...:     return '😘 {0}'.format(pinyin)
       ...:
    In [4]: lazy_pinyin('么么', style='kiss')
    Out[4]: ['😘 me', '😘 me']
 .. _strict:
 ``strict`` 参数的影响
 -------------------------------
 ``strict`` 参数用于控制处理声母和韵母时是否严格遵循 `《汉语拼音方案》`_ 标准：
 .. code-block:: python
    In [1]: from pypinyin import Style, lazy_pinyin
    In [2]: lazy_pinyin('乌', style=Style.TONE)
    Out[2]: ['wū']
    In [3]: lazy_pinyin('乌', style=Style.INITIALS)
    Out[3]: ['']
    In [4]: lazy_pinyin('乌', style=Style.INITIALS, strict=False)
    Out[4]: ['w']
    In [5]: lazy_pinyin('迂', style=Style.TONE)
    Out[5]: ['yū']
    In [6]: lazy_pinyin('迂', style=Style.FINALS_TONE)
    Out[6]: ['ǖ']
    In [7]: lazy_pinyin('迂', style=Style.FINALS_TONE, strict=False)
    Out[7]: ['ū']
 当 ``strict=True`` 时根据 `《汉语拼音方案》`_ 的如下规则处理声母、在韵母相关风格下还原正确的韵母
 （只对只获取声母或只获取韵母相关拼音风格有效，不影响其他获取完整拼音信息的拼音风格的结果）：
 * 21 个声母： ``b p m f d t n l g k h j q x zh ch sh r z c s`` （**y, w 不是声母**）
 * i行的韵母，前面没有声母的时候，写成yi(衣)，ya(呀)，ye(耶)，yao(腰)，you(忧)，yan(烟)，
  yin(因)，yang(央)，ying(英)，yong(雍)。（**y 不是声母**）
 * u行的韵母，前面没有声母的时候，写成wu(乌)，wa(蛙)，wo(窝)，wai(歪)，wei(威)，wan(弯)，
  wen(温)，wang(汪)，weng(翁)。（**w 不是声母**）
 * ü行的韵母，前面没有声母的时候，写成yu(迂)，yue(约)，yuan(冤)，yun(晕)；ü上两点省略。
  （**韵母相关风格下还原正确的韵母 ü**）
 * ü行的韵跟声母j，q，x拼的时候，写成ju(居)，qu(区)，xu(虚)，ü上两点也省略；
  但是跟声母n，l拼的时候，仍然写成nü(女)，lü(吕)。（**韵母相关风格下还原正确的韵母 ü**）
 * iou，uei，uen前面加声母的时候，写成iu，ui，un。例如niu(牛)，gui(归)，lun(论)。
  （**韵母相关风格下还原正确的韵母 iou，uei，uen**）
 当 ``strict=False`` 时就是不遵守上面的规则来处理声母和韵母，
 比如：``y``, ``w`` 会被当做声母，yu(迂) 的韵母就是一般认为的 ``u`` 等。
 具体差异可以查看 `tests/test_standard.py <https://github.com/mozillazg/python-pinyin/blob/master/tests/test_standard.py>`_ 中的对比结果测试用例
 .. _cli:
 命令行工具
 ------------
 程序内置了一个命令行工具 ``pypinyin`` :
 .. code-block:: console
    $ pypinyin 音乐
    yīn yuè
    $ pypinyin -h
 命令行工具支持如下参数：
 .. code-block:: console
    $ pypinyin -h
    usage: pypinyin [-h] [-V] [-f {pinyin,slug}]
                    [-s {NORMAL,zhao,TONE,zh4ao,TONE2,zha4o,TONE3,zhao4,INITIALS,zh,FIRST_LETTER,z,FINALS,ao,FINALS_TONE,4ao,FINALS_TONE2,a4o,FINALS_TONE3,ao4,BOPOMOFO,BOPOMOFO_FIRST,CYRILLIC,CYRILLIC_FIRST}]
                    [-p SEPARATOR] [-e {default,ignore,replace}] [-m]
                    hans
    convert chinese to pinyin.
    positional arguments:
      hans                  chinese string
    optional arguments:
      -h, --help            show this help message and exit
      -V, --version         show program's version number and exit
      -f {pinyin,slug}, --func {pinyin,slug}
                            function name (default: "pinyin")
      -s {NORMAL,zhao,TONE,zh4ao,TONE2,zha4o,TONE3,zhao4,INITIALS,zh,FIRST_LETTER,z,FINALS,ao,FINALS_TONE,4ao,FINALS_TONE2,a4o,FINALS_TONE3,ao4,BOPOMOFO,BOPOMOFO_FIRST,CYRILLIC,CYRILLIC_FIRST}, --style {NORMAL,zhao,TONE,zh4ao,TONE2,zha4o,TONE3,zhao4,INITIALS,zh,FIRST_LETTER,z,FINALS,ao,FINALS_TONE,4ao,FINALS_TONE2,a4o,FINALS_TONE3,ao4,BOPOMOFO,BOPOMOFO_FIRST,CYRILLIC,CYRILLIC_FIRST}
                            pinyin style (default: "zh4ao")
      -p SEPARATOR, --separator SEPARATOR
                            slug separator (default: "-")
      -e {default,ignore,replace}, --errors {default,ignore,replace}
                            how to handle none-pinyin string (default: "default")
      -m, --heteronym       enable heteronym
 ``-s``, ``--style`` 参数可以选值的含义如下：
 ================== =========================================
 -s 或 --style 的值 对应的拼音风格
 ================== =========================================
 zhao               :py:attr:`~pypinyin.Style.NORMAL`
 zh4ao              :py:attr:`~pypinyin.Style.TONE`
 zha4o              :py:attr:`~pypinyin.Style.TONE2`
 zhao4              :py:attr:`~pypinyin.Style.TONE3`
 zh                 :py:attr:`~pypinyin.Style.INITIALS`
 z                  :py:attr:`~pypinyin.Style.FIRST_LETTER`
 ao                 :py:attr:`~pypinyin.Style.FINALS`
 4ao                :py:attr:`~pypinyin.Style.FINALS_TONE`
 a4o                :py:attr:`~pypinyin.Style.FINALS_TONE2`
 ao4                :py:attr:`~pypinyin.Style.FINALS_TONE3`
 NORMAL             :py:attr:`~pypinyin.Style.NORMAL`
 TONE               :py:attr:`~pypinyin.Style.TONE`
 TONE2              :py:attr:`~pypinyin.Style.TONE2`
 TONE3              :py:attr:`~pypinyin.Style.TONE3`
 INITIALS           :py:attr:`~pypinyin.Style.INITIALS`
 FIRST_LETTER       :py:attr:`~pypinyin.Style.FIRST_LETTER`
 FINALS             :py:attr:`~pypinyin.Style.FINALS`
 FINALS_TONE        :py:attr:`~pypinyin.Style.FINALS_TONE`
 FINALS_TONE2       :py:attr:`~pypinyin.Style.FINALS_TONE2`
 FINALS_TONE3       :py:attr:`~pypinyin.Style.FINALS_TONE3`
 BOPOMOFO           :py:attr:`~pypinyin.Style.BOPOMOFO`
 BOPOMOFO_FIRST     :py:attr:`~pypinyin.Style.BOPOMOFO_FIRST`
 CYRILLIC           :py:attr:`~pypinyin.Style.CYRILLIC`
 CYRILLIC_FIRST     :py:attr:`~pypinyin.Style.CYRILLIC_FIRST`
 ================== =========================================
 .. _《汉语拼音方案》: http://www.moe.gov.cn/s78/A19/yxs_left/moe_810/s230/195802/t19580201_186000.html
--- a/third_party/python-pinyin/gen_phrases_dict.py
+++ b/third_party/python-pinyin/gen_phrases_dict.py
@ -0,0 +1,60 @@
 import sys
 def remove_dup_items(lst):
    new_lst = []
    for item in lst:
        if item not in new_lst:
            new_lst.append(item)
    return new_lst
 def parse(fp):
    phrases_dict = {}
    for line in in_fp.readlines():
        line = line.strip()
        if line.startswith('#') or not line:
            continue
        # 中国: zhōng guó
        data = line.split('#')[0]
        hanzi, pinyin = data.strip().split(':')
        hanzi = hanzi.strip()
        # [[zhōng], [guó]]
        pinyin_list = [[s] for s in pinyin.split()]
        if hanzi not in phrases_dict:
            phrases_dict[hanzi] = pinyin_list
        else:
            for index, value in enumerate(phrases_dict[hanzi]):
                value.extend(pinyin_list[index])
                phrases_dict[hanzi][index] = remove_dup_items(value)
    return phrases_dict
 def main(in_fp, out_fp):
    out_fp.write('''# Warning: Auto-generated file, don't edit.
 phrases_dict = {
 ''')
    hanzi_pairs = sorted(parse(in_fp).items(), key=lambda x: x[0])
    for hanzi, pinyin_list in hanzi_pairs:
        #     中国: [[zhōng], [guó]]
        new_line = "    '{hanzi}': {pinyin_list},\n".format(
            hanzi=hanzi.strip(), pinyin_list=pinyin_list)
        out_fp.write(new_line)
    out_fp.write('}\n')
 if __name__ == '__main__':
    if len(sys.argv) == 1:
        print('python gen_phrases_dict.py INPUT OUTPUT')
        sys.exit(1)
    in_f = sys.argv[1]
    out_f = sys.argv[2]
    with open(in_f) as in_fp, open(out_f, 'w') as out_fp:
        main(in_fp, out_fp)
--- a/third_party/python-pinyin/gen_pinyin_dict.py
+++ b/third_party/python-pinyin/gen_pinyin_dict.py
@ -0,0 +1,37 @@
 import sys
 def main(in_fp, out_fp):
    out_fp.write('''# Warning: Auto-generated file, don't edit.
 pinyin_dict = {
 ''')
    for line in in_fp.readlines():
        line = line.strip()
        if line.startswith('#') or not line:
            continue
        else:
            # line is U+4E2D: zhōng,zhòng  # 中
            # raw_line U+4E2D: zhōng,zhòng
            raw_line = line.split('#')[0].strip()
            # 0x4E2D: zhōng,zhòng
            new_line = raw_line.replace('U+', '0x')
            # 0x4E2D: 'zhōng,zhòng
            new_line = new_line.replace(': ', ": '")
            #     0x4E2D: 'zhōng,zhòng'\n
            new_line = "    {new_line}',\n".format(new_line=new_line)
            out_fp.write(new_line)
    out_fp.write('}\n')
 if __name__ == '__main__':
    if len(sys.argv) == 1:
        print('python gen_pinyin_dict.py INPUT OUTPUT')
        sys.exit(1)
    in_f = sys.argv[1]
    out_f = sys.argv[2]
    with open(in_f) as in_fp, open(out_f, 'w') as out_fp:
        main(in_fp, out_fp)
--- a/third_party/python-pinyin/phrase-pinyin-data/.bumpversion.cfg
+++ b/third_party/python-pinyin/phrase-pinyin-data/.bumpversion.cfg
@ -0,0 +1,11 @@
 [bumpversion]
 commit = True
 tag = True
 current_version = 0.10.5
 [bumpversion:file:merge.py]
 [bumpversion:file:pinyin.txt]
 [bumpversion:file:large_pinyin.txt]
--- a/third_party/python-pinyin/phrase-pinyin-data/.gitignore
+++ b/third_party/python-pinyin/phrase-pinyin-data/.gitignore
@ -0,0 +1,92 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 env/
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 *.egg-info/
 .installed.cfg
 *.egg
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *,cover
 .hypothesis/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 target/
 # IPython Notebook
 .ipynb_checkpoints
 # pyenv
 .python-version
 # celery beat schedule file
 celerybeat-schedule
 # dotenv
 .env
 # virtualenv
 venv/
 ENV/
 # Spyder project settings
 .spyderproject
 # Rope project settings
 .ropeproject
 new.txt
 cc-cedict.zip
 cedict_ts.u8
--- a/third_party/python-pinyin/phrase-pinyin-data/.travis.yml
+++ b/third_party/python-pinyin/phrase-pinyin-data/.travis.yml
@ -0,0 +1,6 @@
 language: python
 python:
  - 3.6
 script:
  - make merge
--- a/third_party/python-pinyin/phrase-pinyin-data/CHANGELOG.md
+++ b/third_party/python-pinyin/phrase-pinyin-data/CHANGELOG.md
@ -0,0 +1,213 @@
 # ChangeLog
 ## [0.10.5] (2020-11-22)
 * 增加 `还君明珠` 的拼音。
 ## [0.10.4] (2020-10-08)
 * 纠正一些词语的拼音。
 ## [0.10.3] (2020-07-05)
 * 增加 `还珠` 的拼音。
 ## [0.10.2] (2019-10-26)
 * 纠正一些词语的拼音。
 ## [0.10.1] (2019-07-06)
 * 修正部分拼音数据。
 ## [0.10.0] (2019-05-10)
 * 新增 `cc_cedict.txt`: [cc-cedict.org](https://cc-cedict.org/) 拼音数据。Thanks [@hanabi1224]
 * 纠正一些词语的拼音
 ## [0.9.2] (2019-04-06)
 * 修复部分词语的拼音声调标错了位置的问题
 ## [0.9.1] (2019-03-31)
 * 纠正一批词语的的拼音：
  * `鸟事`
  * `虮虱相吊`
  * `别鹤离鸾`
  * `年华垂暮`
  * `本枝百世`
  * `操戈同室`
  * 部分词语中 `丢` 的拼音
 ## [0.9.0] (2019-02-23)
 * 新增 `腌臢: ā zā`
 * `朝阳` 增加 `cháo yáng` 这个音
 * 新增 `土地`、`领地`、`基地`
 ## [0.8.5] (2018-12-26)
 * 纠正 `油炸`、`洗发` 的拼音
 ## [0.8.4] (2018-09-16)
 * 纠正 `步履蹒跚` 的拼音
 * 纠正部分词语中 `长` 的拼音
 ## [0.8.3] (2018-08-04)
 * 纠正部分 `查`、`大` 的读音 (via [ee1ded4])
 ## [0.8.2] (2018-07-28)
 * 纠正 `有一只` 的读音 (via [330b348])
 ## [0.8.1] (2018-07-28)
 * 纠正几个 `一` 的读音 (via [6e3b9eb])
 * 修复部分拼音包含 `xh` 的问题 (via [ae12df98])
 ## [0.8.0] (2018-07-08)
 * 纠正 `称雨道晴` 的拼音 (via [67412ab])
 * 纠正部分词语中 `干` 的拼音 (via [38474cb])
 * 增加 `时长` 的拼音 (via [c40b965])
 ## [0.7.3] (2018-06-10)
 * 纠正 `一语中的`, `一语中人` 的拼音 (via [3b62ed3])
 ## [0.7.2] (2018-06-10)
 * 纠正部分拼音数据 (via [af5d783])
 ## [0.7.1] (2018-06-04)
 * 纠正 `负债累累` `经纶济世` 的拼音 (via [#16])
 ## [0.7.0] (2018-05-27)
 * 新增 zdic_cibs.txt 和 zdic_cybs.txt (via [#13])
  * `zdic_cibs.txt`: [汉典网](http://www.zdic.net) 汉语词典拼音数据
  * `zdic_cybs.txt`: [汉典网](http://www.zdic.net) 成语词典拼音数据
 * 增加基于 zdic_cibs.txt 和 zdic_cybs.txt 的 large_pinyin.txt (via [#13])
 * 纠正部分读音(via [#10],[#11], [#15])
 ## [0.6.0] (2018-03-11)
 * Revert [#3](https://github.com/mozillazg/phrase-pinyin-data/pull/3) 增加的拼音数据（错误有点多）
 ## [0.5.1] (2017-10-25)
 * 修正一批缺少 ā 和 dī 不对的词语(via [#7][#7])
 ## [0.5.0] (2017-07-09)
 * 增加 `还贷` 的拼音(Thanks [@zhuangh](https://github.com/zhuangh))
 ## [0.4.1] (2017-04-10)
 * 纠正 `朝阳`, `昂昂自若` 的拼音(via [e6d6d27][e6d6d27], [6e7ea16][6e7ea16])
 ## [0.4.0] (2017-03-22)
 * 新增2万多个词组拼音数据(via [fc50fcd][fc50fcd], 感谢 [@onsunsl][@onsunsl] 分享他/她收集的43400个拼音数据: [#3][#3] ).
 ## [0.3.1] (2017-03-13)
 * 纠正 `斯事体大` 的拼音
 ## [0.3.0] (2017-03-12)
 * 增加 overwrite.txt 用于新增/纠正拼音数据
 * 纠正 `便宜`, `所长`, `打开天窗说亮话` 的拼音数据
 * 增加 `朝阳区`
 ## [0.2.0] (2017-03-04)
 * 添加一批拼音(via [04de9f7][04de9f7])。
 ## 0.1.0 (2017-03-04)
 * Initial Release
 [0.10.4]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.10.3...v0.10.4
 [0.10.3]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.10.2...v0.10.3
 [0.10.2]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.10.1...v0.10.2
 [0.10.1]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.10.0...v0.10.1
 [0.10.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.9.2...v0.10.0
 [0.9.2]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.9.1...v0.9.2
 [0.9.1]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.9.0...v0.9.1
 [0.9.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.8.5...v0.9.0
 [0.8.5]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.8.4...v0.8.5
 [0.8.4]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.8.3...v0.8.4
 [0.8.3]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.8.2...v0.8.3
 [0.8.2]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.8.1...v0.8.2
 [0.8.1]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.8.0...v0.8.1
 [0.8.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.7.3...v0.8.0
 [0.7.3]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.7.2...v0.7.3
 [0.7.2]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.7.1...v0.7.2
 [0.7.1]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.7.0...v0.7.1
 [0.7.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.6.0...v0.7.0
 [0.6.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.5.0...v0.6.0
 [0.5.1]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.5.0...v0.5.1
 [0.5.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.4.1...v0.5.0
 [0.4.1]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.4.0...v0.4.1
 [0.4.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.3.1...v0.4.0
 [0.3.1]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.3.0...v0.3.1
 [0.3.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.2.0...v0.3.0
 [0.2.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.1.0...v0.2.0
 [04de9f7]: https://github.com/mozillazg/phrase-pinyin-data/commit/04de9f7f520e2f2188cb4c468c30d6fb811a20ba
 [fc50fcd]: https://github.com/mozillazg/phrase-pinyin-data/commit/fc50fcd7faa94205096d582fc7a1b31265943a85
 [@onsunsl]: https://github.com/onsunsl
 [#3]: https://github.com/mozillazg/phrase-pinyin-data/pull/3
 [e6d6d27]: https://github.com/mozillazg/phrase-pinyin-data/commit/e6d6d270900fdca32ccbe9a414ea4642e537e522
 [6e7ea16]: https://github.com/mozillazg/phrase-pinyin-data/commit/6e7ea167dee0c812514f0bf9701ff5c103a566af
 [#7]: https://github.com/mozillazg/phrase-pinyin-data/pull/7
 [#10]: https://github.com/mozillazg/phrase-pinyin-data/pull/10
 [#11]: https://github.com/mozillazg/phrase-pinyin-data/pull/11
 [#13]: https://github.com/mozillazg/phrase-pinyin-data/pull/13
 [#15]: https://github.com/mozillazg/phrase-pinyin-data/pull/15
 [#16]: https://github.com/mozillazg/phrase-pinyin-data/pull/16
 [af5d783]: https://github.com/mozillazg/phrase-pinyin-data/commit/af5d7831b0e84e4a5306e304b3b2da3268e35f17
 [3b62ed3]: https://github.com/mozillazg/phrase-pinyin-data/commit/3b62ed303f129868c7ccee4f2d5e44dcea7d30d4
 [67412ab]: https://github.com/mozillazg/phrase-pinyin-data/commit/67412abbf8570ac80a41dc012f228c0864823a62
 [38474cb]: https://github.com/mozillazg/phrase-pinyin-data/commit/38474cb91dedd27b3d51b39811704f3d045837b1
 [c40b965]: https://github.com/mozillazg/phrase-pinyin-data/commit/c40b9653ea2ab066d1c0606e9e07dd4225ff2485
 [6e3b9eb]: https://github.com/mozillazg/phrase-pinyin-data/commit/6e3b9eb805ed3e3a5955c179e752ec5e1293216f
 [ae12df98]: https://github.com/mozillazg/phrase-pinyin-data/commit/ae12df98438a508249bdf591334b6415bb5ccf8d
 [330b348]: https://github.com/mozillazg/phrase-pinyin-data/commit/330b3481ba350de07b580991a5a8b7a83aaefde9
 [ee1ded4]: https://github.com/mozillazg/phrase-pinyin-data/commit/ee1ded4938624ac4ce3dc7991ab370e09dbd745c
 [@hanabi1224]: https://github.com/hanabi1224
 [0.10.5]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.10.4...v0.10.5
--- a/third_party/python-pinyin/phrase-pinyin-data/LICENSE
+++ b/third_party/python-pinyin/phrase-pinyin-data/LICENSE
@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2017 mozillazg
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/third_party/python-pinyin/phrase-pinyin-data/Makefile
+++ b/third_party/python-pinyin/phrase-pinyin-data/Makefile
@ -0,0 +1,42 @@
 .PHONY: help
 help:
 	@echo "merge          update pinyin.txt and large_pinyin.txt"
 	@echo "er             find r"
 	@echo "check          check unexpected char"
 	@echo "cedict_get     get latest cedict data"
 	@echo "cedict         parse latest cedict data"
 .PHONY: merge
 merge:
 	python merge.py pinyin.txt overwrite.txt > new.txt && mv new.txt pinyin.txt
 	python merge.py zdic_cibs.txt zdic_cybs.txt cc_cedict.txt pinyin.txt overwrite.txt > new.txt && mv new.txt large_pinyin.txt
 .PHONY: er
 er:
 	cat overwrite.txt|grep 儿|grep -v ér|grep -v er
 .PHONY: tone_mark
 tone_mark:
 	ls *.txt | xargs -L 1 sed -i 's/ùo/uò/g'
 	ls *.txt | xargs -L 1 sed -i 's/oǔ/ǒu/g'
 	ls *.txt | xargs -L 1 sed -i 's/ùi/uì/g'
 	ls *.txt | xargs -L 1 sed -i 's/íe/ié/g'
 	ls *.txt | xargs -L 1 sed -i 's/ùi/uì/g'
 	ls *.txt | xargs -L 1 sed -i 's/ǐe/iě/g'
 	ls *.txt | xargs -L 1 sed -i 's/aō/āo/g'
 	ls *.txt | xargs -L 1 sed -i 's/ìan/iàn/g'
 	ls *.txt | xargs -L 1 sed -i 's/īan/iān/g'
 .PHONY: check
 check: tone_mark
 	-rg 'ɡ|ɑ'
 .PHONY: cedict_get
 cedict_get:
 	python -m pip install -U -r requirements_dev.txt
 	python get_latest_cc_cedict.py
 .PHONY: cedict
 cedict:
 	python -m pip install -U -r requirements_dev.txt
 	python parse_latest_cc_cedict.py
--- a/third_party/python-pinyin/phrase-pinyin-data/README.md
+++ b/third_party/python-pinyin/phrase-pinyin-data/README.md
@ -0,0 +1,54 @@
 # phrase-pinyin-data [![Build Status](https://travis-ci.org/mozillazg/phrase-pinyin-data.svg?branch=master)](https://travis-ci.org/mozillazg/phrase-pinyin-data)
 词语拼音数据。
 ## 数据介绍
 拼音数据的格式：
 ```
 {phrase}: {pinyin}
 ```
 * 以 `#` 开头的行是注释
 * 行尾的 `#` 也是注释
 * `{phrase}` 汉字词语
 * `{pinyin}` 词语的拼音，使用空格分隔每个汉字的拼音
 * 一行一个词语的读音，有多个音的词语会出现在多行
 * 示例：
  ```
  # 注释
  中国: zhōng guó
  北京: běi jīng  # 注释
  ```
 文件说明:
 * `overwrite.txt`: 手工纠正的拼音数据
 * `pinyin.txt`: `pinyin.txt + overwrite.txt` 后的拼音数据
 * `zdic_cibs.txt`: [汉典网](http://www.zdic.net/) 汉语词典拼音数据
 * `zdic_cybs.txt`: [汉典网](http://www.zdic.net/) 成语词典拼音数据
 * `cc_cedict.txt`: [cc-cedict.org](https://cc-cedict.org/) 拼音数据
 * `large_pinyin.txt`: `zdic_cibs.txt + zdic_cybs.txt + cc_cedict.txt + pinyin.txt + overwrite.txt` 后的拼音数据
 ## 修改数据
 * 修改 `pinyin.txt` 或 `overwrite.txt` 都可以了
 * 执行 `make merge` 命令可以按照合并规则生成最新的 `pinyin.txt`
 ## 参考资料
 * 初始数据基于 [phrases-dict.js](https://github.com/hotoo/pinyin/blob/05f74496c34ccb32db1a0fd0b358a798a22a51e5/data/phrases-dict.js) 和 [phrases_dict.py](https://github.com/mozillazg/python-pinyin/blob/366de0363ff1fb9a718ce668448bea59de09a4bf/pypinyin/phrases_dict.py)
 * [汉典 zdic.net](http://www.zdic.net/)
 * [字海网，叶典网](http://zisea.com/)
 * [国学大师_国学网](http://www.guoxuedashi.com/)
 * [CC-CEDICT download - MDBG English to Chinese dictionary](http://www.mdbg.net/chindict/chindict.php?page=cc-cedict)
 * [漢語大詞典](http://www.ivantsoi.com/hydcd/search.html)
 ## 相关项目
 * [mozillazg/pinyin-data](https://github.com/mozillazg/pinyin-data): 汉字拼音数据
--- a/third_party/python-pinyin/phrase-pinyin-data/cc_cedict.txt
+++ b/third_party/python-pinyin/phrase-pinyin-data/cc_cedict.txt
--- a/third_party/python-pinyin/phrase-pinyin-data/get_latest_cc_cedict.py
+++ b/third_party/python-pinyin/phrase-pinyin-data/get_latest_cc_cedict.py
@ -0,0 +1,26 @@
 # -*- coding: utf-8 -*-
 import os
 import io
 import shutil
 import codecs
 import zipfile
 import requests
 ROOT = os.path.dirname(os.path.realpath(__file__))
 if __name__ == '__main__':
    DOWNLOAD_URL = 'https://cc-cedict.org/editor/editor_export_cedict.php?c=zip'
    zip_file_path = os.path.join(ROOT, 'cc-cedict.zip')
    with open(zip_file_path, 'wb') as f:
        response = requests.get(DOWNLOAD_URL, stream=True)
        shutil.copyfileobj(response.raw, f)
    with open(zip_file_path, 'rb') as fp:
        z = zipfile.ZipFile(fp)
        z.extractall(ROOT)
--- a/third_party/python-pinyin/phrase-pinyin-data/large_pinyin.txt
+++ b/third_party/python-pinyin/phrase-pinyin-data/large_pinyin.txt
--- a/third_party/python-pinyin/phrase-pinyin-data/merge.py
+++ b/third_party/python-pinyin/phrase-pinyin-data/merge.py
@ -0,0 +1,58 @@
 # -*- coding: utf-8 -*-
 import sys
 import codecs
 def parse(lines):
    """
    :yield: hanzi, others
    """
    for line in lines:
        line = line.strip()
        if line.startswith('#') or not line:
            continue
        hanzi, others = line.split(':', 1)
        yield hanzi.strip(), others.strip()
 def merge(pinyin_d_list):
    """
    :rtype: dict
    """
    final_d = {}
    for overwrite_d in pinyin_d_list:
        final_d.update(overwrite_d)
    return final_d
 def sort(pinyin_d):
    """
    :rtype: list
    """
    return sorted(pinyin_d.items(), key=lambda x: x[0])
 def output(pinyin_s):
    print('# version: 0.10.5')
    print('# source: https://github.com/mozillazg/phrase-pinyin-data')
    for hanzi, pinyin in pinyin_s:
        hanzi = hanzi.split('_')[0]
        print('{hanzi}: {pinyin}'.format(hanzi=hanzi, pinyin=pinyin))
 def main(files):
    pinyin_d_list = []
    for name in files:
        with codecs.open(name, 'r', 'utf-8-sig') as fp:
            d = {}
            for h, p in parse(fp):
                d.setdefault(h, p)
            pinyin_d_list.append(d)
    pinyin_d = merge(pinyin_d_list)
    output(sort(pinyin_d))
 if __name__ == '__main__':
    main(sys.argv[1:])
--- a/third_party/python-pinyin/phrase-pinyin-data/overwrite.txt
+++ b/third_party/python-pinyin/phrase-pinyin-data/overwrite.txt
@ -0,0 +1,7 @@
 # 新增或纠正的拼音数据
 # 升级版本的时候会合并回 pinyin.txt
 # 示例
 斯事体大: sī shì tǐ dà
 朝阳: zhāo yáng
 朝阳_2: cháo yáng
 还君明珠: huán jūn míng zhū
--- a/third_party/python-pinyin/phrase-pinyin-data/parse_latest_cc_cedict.py
+++ b/third_party/python-pinyin/phrase-pinyin-data/parse_latest_cc_cedict.py
@ -0,0 +1,74 @@
 # -*- coding: utf-8 -*-
 import os
 import io
 import re
 import codecs
 from pypinyin.phonetic_symbol import phonetic_symbol
 from pypinyin.pinyin_dict import pinyin_dict
 from pypinyin.style.tone import ToneConverter
 ROOT = os.path.dirname(os.path.realpath(__file__))
 tone_converter = ToneConverter()
 tone3_2_tone_dict = {}
 for k, v in pinyin_dict.items():
    parts = v.split(',')
    for part in parts:
        part = part.strip()
        if part:
            tone3 = tone_converter.to_tone3(part).strip().lower()
            if tone3:
                tone3_2_tone_dict[tone3] = part
 def tone3_to_tone1(tone3):
    tone3 = tone3.strip().lower()
    # 儿化
    if tone3 == 'r5':
        return 'er'
    # 轻声
    if '5' in tone3:
        new = tone3.replace('5', '')
        if new:
            return new
    # 律
    if 'u:' in tone3:
        tone3 = tone3.replace('u:', 'v')
    return tone3_2_tone_dict[tone3]
 if __name__ == '__main__':
    LINE_PARTS_RE = re.compile(
        r'(?P<zht>\w+)\s+(?P<zhs>\w+)\s+\[(?P<py>.+?)\]')
    LETTER_DIGIT_RE = re.compile(r'[a-zA-Z0-9]')
    cnt = 0
    with codecs.open(os.path.join(ROOT, 'cc_cedict.txt'), 'w', 'utf-8-sig') as fpw:
        with codecs.open(os.path.join(ROOT, 'cedict_ts.u8'), 'r', 'utf-8-sig') as fpr:
            for line in fpr:
                line_stripped = line.strip()
                if not line or line_stripped[0] == '#' or line_stripped[0] == '%':
                    continue
                # print(line_stripped)
                parts = LINE_PARTS_RE.match(line_stripped)
                if not parts:
                    continue
                zhs = parts.group('zhs')
                py = parts.group('py').split()
                try:
                    tone1 = [tone3_to_tone1(i) for i in py]
                except Exception as e:
                    print(e)
                    #input()
                    continue
                #print(zhs, py, tone1)
                if LETTER_DIGIT_RE.search(zhs):
                    continue
                if len(zhs) < 2:
                    continue
                fpw.write(f'{zhs}: {" ".join(tone1)}\n')
                cnt += 1
                if cnt % 10000 == 0:
                    print(f'{cnt} lines processed...')
--- a/third_party/python-pinyin/phrase-pinyin-data/pinyin.txt
+++ b/third_party/python-pinyin/phrase-pinyin-data/pinyin.txt
--- a/third_party/python-pinyin/phrase-pinyin-data/requirements_dev.txt
+++ b/third_party/python-pinyin/phrase-pinyin-data/requirements_dev.txt
@ -0,0 +1,2 @@
 requests
 pypinyin
--- a/third_party/python-pinyin/phrase-pinyin-data/zdic_cibs.txt
+++ b/third_party/python-pinyin/phrase-pinyin-data/zdic_cibs.txt
--- a/third_party/python-pinyin/phrase-pinyin-data/zdic_cybs.txt
+++ b/third_party/python-pinyin/phrase-pinyin-data/zdic_cybs.txt
--- a/third_party/python-pinyin/pinyin-data/.bumpversion.cfg
+++ b/third_party/python-pinyin/pinyin-data/.bumpversion.cfg
@ -0,0 +1,9 @@
 [bumpversion]
 commit = True
 tag = True
 current_version = 0.10.2
 [bumpversion:file:merge_unihan.py]
 [bumpversion:file:pinyin.txt]
--- a/third_party/python-pinyin/pinyin-data/.github/workflows/python-app.yml
+++ b/third_party/python-pinyin/pinyin-data/.github/workflows/python-app.yml
@ -0,0 +1,31 @@
 # This workflow will install Python dependencies, run tests and lint with a single version of Python
 # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 name: Python application
 on:
  push:
    branches: [ ]
  pull_request:
    branches: [ ]
 jobs:
  build:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v2
    - name: Set up Python 3.9
      uses: actions/setup-python@v2
      with:
        python-version: 3.9
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        pip install flake8 pytest
        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
    - name: try merge_unihan
      run: |
        make merge_unihan
--- a/third_party/python-pinyin/pinyin-data/.gitignore
+++ b/third_party/python-pinyin/pinyin-data/.gitignore
@ -0,0 +1,62 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 env/
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 *.egg-info/
 .installed.cfg
 *.egg
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *,cover
 .hypothesis/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 target/
 #Ipython Notebook
 .ipynb_checkpoints
--- a/third_party/python-pinyin/pinyin-data/.travis.yml
+++ b/third_party/python-pinyin/pinyin-data/.travis.yml
@ -0,0 +1,6 @@
 language: python
 python:
  - "3.5"
 script:
  - make merge_unihan
--- a/third_party/python-pinyin/pinyin-data/CHANGELOG.md
+++ b/third_party/python-pinyin/pinyin-data/CHANGELOG.md
@ -0,0 +1,134 @@
 # ChangeLog
 ## [0.10.2] (2021-03-13)
 * 修改 `帧` 的最常用读音为 `zhēn`
 * 修复 `zdic.txt` 中两个拼音字母 `è í` 使用不当的问题. Thanks [@Ace-Who](https://github.com/Ace-Who)
 ## [0.10.1] (2020-11-22)
 * 调整 `地` 和 `謦` 的拼音顺序
 ## [0.10.0] (2020-10-07)
 * 新增 `kTGHZ2013.txt`: [Unihan Database][unihan] 中 [kTGHZ2013](http://www.unicode.org/reports/tr38/#kTGHZ2013) 部分的拼音数据（来源于《通用规范汉字字典》的拼音数据）
 * 修正部分拼音的读音
 * 生成 `pinyin.txt` 时合并来自 `kTGHZ2013.txt` 的拼音数据
 ## [0.9.0] (2020-06-06)
 * 更新 Unihan 数据版本为 13.0.0
 ## [0.8.1] (2019-10-26)
 * 修正 `迹` 和 `分` 的读音。
 ## [0.8.0] (2019-06-01)
 * 增加 `kanji.txt` 日本自造汉字的拼音数据 via [#32]. Thanks [@LuoZijun](https://github.com/LuoZijun)
 * 去掉几个有误的轻声数据
 ## [0.7.0] (2019-03-31)
 * 更新 Unihan 数据版本为 12.0.0
 ## [0.6.2] (2018-09-16)
 * 修改 `蹒` 的最常用读音为 `pán`
 ## [0.6.1] (2018-08-04)
 * 修改 `著` 的默认读音为 `zhù` via [8802f31]
 ## [0.6.0] (2018-07-08)
 * 更新 Unihan 数据版本为 11.0.0 via [68dc169]
 ## [0.5.1] (2018-04-19)
 * 更正 `卓`、`啥` 的拼音数据 via [#26] 。Thanks [@shibingli](https://github.com/shibingli)
 * 更新 `〇` 的拼音数据 via [#27]
 ## [0.5.0] (2018-03-18)
 * 更新 Unihan 数据版本为 10.0.0 via [#19][#19]
 * 新增 kMandarin_overwrite.txt 用于手工纠正 kMandarin.txt 中有误的拼音数据 via [#21][#21]
 * 更正 `讽`、`识` 的最常用读音 via [#20][#20]
 * 更正 埔,彷,珖,U+275C8 的常用发音 [635b238c4](https://github.com/mozillazg/pinyin-data/commit/635b238c4d21e55d8fd66299c8da3ae555253b3a)
 ## [0.4.1] (2017-02-12)
 * `妳` 的最常用拼音调整为 `nǐ` via [eb08200](https://github.com/mozillazg/pinyin-data/commit/eb08200d0a203c57ecc62ec7a118765518430238)
 * `钭` 的拼音更新为 `tǒu,dǒu` via [fb9e64e](https://github.com/mozillazg/pinyin-data/commit/fb9e64e6c0a20eb0e792e8a402dffbf8cc2dfa57)
 ## [0.4.0] (2016-10-17)
 * Update PUA.txt 详见 [#7](https://github.com/mozillazg/pinyin-data/issues/7) thanks [@Artoria2e5][@Artoria2e5]
 * Rename PUA.txt to GBK_PUA.txt 详见 [#7](https://github.com/mozillazg/pinyin-data/issues/7)
 * Add kMandarin_8105.txt (《通用规范汉字表》里 8105 个汉字最常用的一个读音) [#9][#9] [#11][#11]
 * Update pinyin.txt with latest data
 ## [0.3.0] (2016-08-19)
 * Fixed format of zdic.txt via [b8e4394](https://github.com/mozillazg/pinyin-data/commit/b8e439490d2c6e8c711652983db52fb69136919b).
 * Fixed some pinyin: 罗 via [468ffaa](https://github.com/mozillazg/pinyin-data/commit/468ffaa8eb678637c7565a02e6836255bd0df06c).
 * Support Chinese that in PUA([Private Use Area](https://en.wikipedia.org/wiki/Private_Use_Areas>)) via [#2](https://github.com/mozillazg/pinyin-data/pull/2).
 * pinyin.txt add line comments that startswith `#` via [9944f79](https://github.com/mozillazg/pinyin-data/commit/9944f795e191fb3606d65ada84b6fad5665f8776).
 ## [0.2.0] (2016-07-19)
 * Update to the latest version of [Unihan Database](http://www.unicode.org/charts/unihan.html):
  > Date: 2016-06-01 07:01:48 GMT [JHJ]       
  > Unicode version: 9.0.0
 ## 0.1.0 (2016-03-11)
 * Initial Release
 [@Artoria2e5]: https://github.com/Artoria2e5
 [#9]: https://github.com/mozillazg/pinyin-data/pull/9
 [#11]: https://github.com/mozillazg/pinyin-data/pull/11
 [#19]: https://github.com/mozillazg/pinyin-data/pull/19
 [#20]: https://github.com/mozillazg/pinyin-data/pull/20
 [#21]: https://github.com/mozillazg/pinyin-data/pull/21
 [#26]: https://github.com/mozillazg/pinyin-data/pull/26
 [#27]: https://github.com/mozillazg/pinyin-data/pull/27
 [68dc169]: https://github.com/mozillazg/pinyin-data/commit/68dc169c3f0f02cb9bf53290edab2d2d2463e0c5
 [8802f31]: https://github.com/mozillazg/pinyin-data/commit/8802f31e0e65c6e34a497adb55993425741a9d41
 [#32]: https://github.com/mozillazg/pinyin-data/pull/32
 [unihan]: http://www.unicode.org/charts/unihan.html
 [0.2.0]: https://github.com/mozillazg/pinyin-data/compare/v0.1.0...v0.2.0
 [0.3.0]: https://github.com/mozillazg/pinyin-data/compare/v0.2.0...v0.3.0
 [0.4.0]: https://github.com/mozillazg/pinyin-data/compare/v0.3.0...v0.4.0
 [0.4.1]: https://github.com/mozillazg/pinyin-data/compare/v0.4.0...v0.4.1
 [0.5.0]: https://github.com/mozillazg/pinyin-data/compare/v0.4.1...v0.5.0
 [0.5.1]: https://github.com/mozillazg/pinyin-data/compare/v0.5.0...v0.5.1
 [0.6.0]: https://github.com/mozillazg/pinyin-data/compare/v0.5.1...v0.6.0
 [0.6.1]: https://github.com/mozillazg/pinyin-data/compare/v0.6.0...v0.6.1
 [0.6.2]: https://github.com/mozillazg/pinyin-data/compare/v0.6.1...v0.6.2
 [0.7.0]: https://github.com/mozillazg/pinyin-data/compare/v0.6.2...v0.7.0
 [0.8.0]: https://github.com/mozillazg/pinyin-data/compare/v0.7.0...v0.8.0
 [0.8.1]: https://github.com/mozillazg/pinyin-data/compare/v0.8.0...v0.8.1
 [0.9.0]: https://github.com/mozillazg/pinyin-data/compare/v0.8.1...v0.9.0
 [0.10.0]: https://github.com/mozillazg/pinyin-data/compare/v0.9.0...v0.10.0
 [0.10.1]: https://github.com/mozillazg/pinyin-data/compare/v0.10.0...v0.10.1
 [0.10.2]: https://github.com/mozillazg/pinyin-data/compare/v0.10.1...v0.10.2
--- a/third_party/python-pinyin/pinyin-data/GBK_PUA.txt
+++ b/third_party/python-pinyin/pinyin-data/GBK_PUA.txt
@ -0,0 +1,82 @@
 # GBK/GB 18030 PUA 映射
 # 详见：https://zh.wikipedia.org/wiki/GB_18030#PUA
 # U+E815:   #   Unihan: U+2E81 ⺁
 U+E816: zuǒ  #   Unihan: U+20087 𠂇
 # U+E817:   #   Unihan: U+20089 𠂉
 U+E818: gǔn  #   Unihan: U+200CC 𠃌
 # U+E819:   #   Unihan: U+2E84 ⺄
 U+E81A: zhòu,zhū  #   Unihan: U+3473 㑳
 U+E81B: zhòu  #   Unihan: U+3447 㑇
 # U+E81C:   #   Unihan: U+2E88 ⺈
 # U+E81D:   #   Unihan: U+2E8B ⺋
 # U+E81E:   #   Unihan: U+9FB4 龴
 U+E81F: wāi  #   Unihan: U+359E 㖞
 U+E820: hǎn  #   Unihan: U+361A 㘚
 U+E821: hǎn  #   Unihan: U+360E 㘎
 # U+E822:   #   Unihan: U+2E8C ⺌
 # U+E823:   #   Unihan: U+2E97 ⺗
 U+E824: zhòu,chǎo  #   Unihan: U+396E 㥮
 U+E825: zhòu  #   Unihan: U+3918 㤘
 # U+E826:   #   Unihan: U+9FB5 龵
 U+E827: gāng  #   Unihan: U+39CF 㧏
 U+E828: kuǎi  #   Unihan: U+39DF 㧟
 U+E829: sǒng  #   Unihan: U+3A73 㩳
 U+E82A: sǒng  #   Unihan: U+39D0 㧐
 # U+E82B:   #   Unihan: U+9FB6 龶
 # U+E82C:   #   Unihan: U+9FB7 龷
 U+E82D: gāng  #   Unihan: U+3B4E 㭎
 U+E82E: kuài  #   Unihan: U+3C6E 㱮
 U+E82F: tà  #   Unihan: U+3CE0 㳠
 # U+E830:   #   Unihan: U+2EA7 ⺧
 U+E831: pěng  #   Unihan: U+215D7 𡗗
 # U+E832:   #   Unihan: U+9FB8 龸
 # U+E833:   #   Unihan: U+2EAA ⺪
 U+E834: lōu  #   Unihan: U+4056 䁖
 U+E835: cǎn  #   Unihan: U+415F 䅟
 # U+E836:   #   Unihan: U+2EAE ⺮
 U+E837: chōu,chóu  #   Unihan: U+4337 䌷
 # U+E838:   #   Unihan: U+2EB3 ⺳
 # U+E839:   #   Unihan: U+2EB6 ⺶
 # U+E83A:   #   Unihan: U+2EB7 ⺷
 U+E83B: zāi  #   Unihan: U+2298F 𢦏
 U+E83C: bà,bēi  #   Unihan: U+43B1 䎱
 U+E83D: bà  #   Unihan: U+43AC 䎬
 # U+E83E:   #   Unihan: U+2EBB ⺻
 U+E83F: zhuān  #   Unihan: U+43DD 䏝
 U+E840: qióng  #   Unihan: U+44D6 䓖
 U+E841: kuì,huì  #   Unihan: U+4661 䙡
 U+E842: kuì  #   Unihan: U+464C 䙌
 # U+E843:   #   Unihan: U+9FB9 龹
 U+E844: xīn  #   Unihan: U+4723 䜣
 U+E845: yàn  #   Unihan: U+4729 䜩
 U+E846: jìng,qíng  #   Unihan: U+477C 䝼
 U+E847: qíng  #   Unihan: U+478D 䞍
 # U+E848:   #   Unihan: U+2ECA ⻊
 U+E849: shàn  #   Unihan: U+4947 䥇
 U+E84A: yé  #   Unihan: U+497A 䥺
 U+E84B: pō  #   Unihan: U+497D 䥽
 U+E84C: shàn  #   Unihan: U+4982 䦂
 U+E84D: zhuō  #   Unihan: U+4983 䦃
 U+E84E: shàn  #   Unihan: U+4985 䦅
 U+E84F: jué  #   Unihan: U+4986 䦆
 U+E850: wěn,chuài  #   Unihan: U+499F 䦟
 U+E851: zhèng  #   Unihan: U+499B 䦛
 U+E852: chuài  #   Unihan: U+49B7 䦷
 U+E853: zhèng  #   Unihan: U+49B6 䦶
 # U+E854:   #   Unihan: U+9FBA 龺
 U+E855: yíng  #   Unihan: U+241FE 𤇾
 U+E856: yú  #   Unihan: U+4CA3 䲣
 U+E857: yìn  #   Unihan: U+4C9F 䲟
 U+E858: chūn  #   Unihan: U+4CA0 䲠
 U+E859: qiū  #   Unihan: U+4CA1 䲡
 U+E85A: yú  #   Unihan: U+4C77 䱷
 U+E85B: téng  #   Unihan: U+4CA2 䲢
 U+E85C: shī  #   Unihan: U+4D13 䴓
 U+E85D: jiāo  #   Unihan: U+4D14 䴔
 U+E85E: liè  #   Unihan: U+4D15 䴕
 U+E85F: jīng  #   Unihan: U+4D16 䴖
 U+E860: jú  #   Unihan: U+4D17 䴗
 U+E861: tī  #   Unihan: U+4D18 䴘
 U+E862: pì  #   Unihan: U+4D19 䴙
 U+E863: yǎn  #   Unihan: U+4DAE 䶮
 # U+E864:   #   Unihan: U+9FBB 龻
--- a/third_party/python-pinyin/pinyin-data/LICENSE
+++ b/third_party/python-pinyin/pinyin-data/LICENSE
@ -0,0 +1,21 @@
 The MIT License (MIT)
 Copyright (c) 2016 mozillazg
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/third_party/python-pinyin/pinyin-data/Makefile
+++ b/third_party/python-pinyin/pinyin-data/Makefile
@ -0,0 +1,17 @@
 .PHONY: help
 help:
 	@echo "merge_unihan          merge Unihan data"
 	@echo "pua                   generate PUA"
 	@echo "check                 check unexpected char"
 .PHONY: merge_unihan
 merge_unihan: check
 	python merge_unihan.py
 .PHONY: pua
 pua:
 	python tools/gen_gb_pua.py > GBK_PUA.txt
 .PHONY: check
 check:
 	-rg 'ɡ|ɑ|í|è'
--- a/third_party/python-pinyin/pinyin-data/README.md
+++ b/third_party/python-pinyin/pinyin-data/README.md
@ -0,0 +1,68 @@
 # pinyin-data [![Build Status](https://travis-ci.org/mozillazg/pinyin-data.svg?branch=master)](https://travis-ci.org/mozillazg/pinyin-data)
 汉字拼音数据。
 ## 数据介绍
 拼音数据的格式：
    {code point}: {pinyins}  # {hanzi} {comments}
 * 以 `#` 开头的行是注释，行内 `#` 后面的字符也是注释
 * `{pinyins}` 中使用逗号分隔多个拼音
 * 示例：
        # 注释
        U+4E2D: zhōng,zhòng  # 中
 [Unihan Database][unihan] 数据版本：
 > Date: 2020-02-18 18:27:33 GMT [JHJ]     
 > Unicode version: 13.0.0
 * `kTGHZ2013.txt`: [Unihan Database][unihan] 中 [kTGHZ2013](http://www.unicode.org/reports/tr38/#kTGHZ2013) 部分的拼音数据（来源于《通用规范汉字字典》的拼音数据）
 * `kHanyuPinyin.txt`: [Unihan Database][unihan] 中 [kHanyuPinyin](http://www.unicode.org/reports/tr38/#kHanyuPinyin) 部分的拼音数据（来源于《漢語大字典》的拼音数据）
 * `kXHC1983.txt`: [Unihan Database][unihan] 中 [kXHC1983](http://www.unicode.org/reports/tr38/#kXHC1983) 部分的拼音数据（来源于《现代汉语词典》的拼音数据）
 * `kHanyuPinlu.txt`: [Unihan Database][unihan] 中 [kHanyuPinlu](http://www.unicode.org/reports/tr38/#kHanyuPinlu) 部分的拼音数据（来源于《現代漢語頻率詞典》的拼音数据）
 * `kMandarin.txt`: [Unihan Database][unihan] 中 [kMandarin](http://www.unicode.org/reports/tr38/#kMandarin) 部分的拼音数据（普通话中最常用的一个读音。zh-CN 为主，如果 zh-CN 中没有则使用 zh-TW 中的拼音）
 * `kMandarin_overwrite.txt`: 手工纠正 `kMandarin.txt` 中有误的拼音数据（**可以修改**）
 * `GBK_PUA.txt`: [Private Use Area](https://en.wikipedia.org/wiki/Private_Use_Areas) 中有拼音的汉字，参考 [GB 18030 - 维基百科，自由的百科全书](https://zh.wikipedia.org/wiki/GB_18030#PUA) （**可以修改**）
 * `nonCJKUI.txt`: 不属于 [CJK Unified Ideograph](https://en.wikipedia.org/wiki/CJK_Unified_Ideographs) 但是却有拼音的字符（**可以修改**）
 * `kanji.txt`: [日本自造汉字](https://zh.wikipedia.org/wiki/%E6%97%A5%E6%9C%AC%E6%B1%89%E5%AD%97#7_%E6%97%A5%E6%9C%AC%E6%B1%89%E5%AD%97%E7%9A%84%E6%B1%89%E8%AF%AD%E6%99%AE%E9%80%9A%E8%AF%9D%E8%A7%84%E8%8C%83%E8%AF%BB%E9%9F%B3%E8%A1%A8) 的拼音数据 （**可以修改**）
 * `kMandarin_8105.txt`: [《通用规范汉字表》](https://zh.wikipedia.org/wiki/通用规范汉字表)(2013 年版)里 8105 个汉字最常用的一个读音 (**可以修改**)
 * `overwrite.txt`: 手工纠正的拼音数据（**可以修改**）
 * `pinyin.txt`: 合并上述文件后的拼音数据
 * `zdic.txt`: [汉典网](http://zdic.net) 的拼音数据（**可以修改**）
 ## 修改数据
 * 上面标注了 **可以修改** 字样的文件都可以直接修改
 * 如果汉字的拼音不需要修改，只是调整第一个读音的话，可以直接修改 `kMandarin_8105.txt` 这个文件
 * 执行 `merge_unihan` 命令可以按照合并规则生成最新的 `pinyin.txt` 文件
 * 进入 unihan 目录，执行 `make update` 命令可以更新最新的 Unihan 数据
 ## 参考资料
 * [汉语拼音方案](http://www.moe.edu.cn/s78/A19/yxs_left/moe_810/s230/195802/t19580201_186000.html)
 * [Unihan Database Lookup](http://www.unicode.org/charts/unihan.html)
 * [汉典 zdic.net](http://www.zdic.net/)
 * [字海网，叶典网](http://zisea.com/)
 * [国学大师_国学网](http://www.guoxuedashi.com/)
 * [Unicode、GB2312、GBK和GB18030中的汉字](http://www.fmddlmyy.cn/text24.html)
 * [GB 18030 - 维基百科，自由的百科全书](https://zh.wikipedia.org/wiki/GB_18030#PUA)
 * [通用规范汉字表 - 维基百科，自由的百科全书](https://zh.wikipedia.org/wiki/%E9%80%9A%E7%94%A8%E8%A7%84%E8%8C%83%E6%B1%89%E5%AD%97%E8%A1%A8)
 * [China’s 通用规范汉字表 (Tōngyòng Guīfàn Hànzìbiǎo)](https://blogs.adobe.com/CCJKType/2014/03/china-8105.html)
 * [日本汉字的汉语读音规范](http://www.moe.gov.cn/s78/A19/yxs_left/moe_810/s230/201001/t20100115_75698.html)
 * [日本汉字的汉语普通话规范读音表- 维基百科](https://zh.wikipedia.org/wiki/%E6%97%A5%E6%9C%AC%E6%B1%89%E5%AD%97#7_%E6%97%A5%E6%9C%AC%E6%B1%89%E5%AD%97%E7%9A%84%E6%B1%89%E8%AF%AD%E6%99%AE%E9%80%9A%E8%AF%9D%E8%A7%84%E8%8C%83%E8%AF%BB%E9%9F%B3%E8%A1%A8)
 * [漢語大字典（第二版）](http://www.ivantsoi.com/hydzd/index.html)
 [unihan]: http://www.unicode.org/charts/unihan.html
 ## 相关项目
 * [mozillazg/phrase-pinyin-data](https://github.com/mozillazg/phrase-pinyin-data): 词语拼音数据
--- a/third_party/python-pinyin/pinyin-data/kHanyuPinlu.txt
+++ b/third_party/python-pinyin/pinyin-data/kHanyuPinlu.txt
--- a/third_party/python-pinyin/pinyin-data/kHanyuPinyin.txt
+++ b/third_party/python-pinyin/pinyin-data/kHanyuPinyin.txt
--- a/third_party/python-pinyin/pinyin-data/kMandarin.txt
+++ b/third_party/python-pinyin/pinyin-data/kMandarin.txt
--- a/third_party/python-pinyin/pinyin-data/kMandarin_8105.txt
+++ b/third_party/python-pinyin/pinyin-data/kMandarin_8105.txt
--- a/third_party/python-pinyin/pinyin-data/kMandarin_overwrite.txt
+++ b/third_party/python-pinyin/pinyin-data/kMandarin_overwrite.txt
@ -0,0 +1,67 @@
 U+389C: kāng  # 㢜
 U+60B7: lì  # 悷
 U+417F: huá  # 䅿
 U+46BE: rén  # 䚾
 U+4B78: fù  # 䭸
 U+4B7B: fēn  # 䭻
 U+4CC9: dōng  # 䳉
 U+4D7B: huì  # 䵻
 U+57D4: pǔ  # 埔
 U+5A47: cǎi  # 婇
 U+5F6F: piāo  # 彯
 U+5F77: páng  # 彷
 U+60B7: lì  # 悷
 U+65FD: tūn  # 旽
 U+6A0B: tōng  # 樋
 U+6ADA: lǘ  # 櫚
 U+6E5E: zhēn  # 湞
 U+73D6: guāng  # 珖
 U+77A1: guī  # 瞡
 U+7BC9: zhù  # 築
 U+815C: méi  # 腜
 U+816C: róu  # 腬
 U+8192: ōu  # 膒
 U+8491: yīn  # 蒑
 U+8A09: fàn  # 訉
 U+90D8: lǚ  # 郘
 U+9D24: zhōng  # 鴤
 U+2031A: nòng  # 𠌚
 U+2141D: fú  # 𡐝
 U+21594: nuó  # 𡖔
 U+2199D: xiāo  # 𡦝
 U+21B0D: mí  # 𡬍
 U+21B10: yí  # 𡬐
 U+21B15: lóng  # 𡬕
 U+2243F: rǎng  # 𢐿
 U+2273D: kuí  # 𢜽
 U+22741: hōng  # 𢝁
 U+22892: sū  # 𢢒
 U+22A10: jí  # 𢨐
 U+245ED: xià  # 𤗭
 U+24704: huái  # 𤜄
 U+247AE: zhài  # 𤞮
 U+24856: yán  # 𤡖
 U+248B5: lài  # 𤢵
 U+249EB: jīn  # 𤧫
 U+2546B: kān  # 𥑫
 U+2588D: hù  # 𥢍
 U+2588F: diàn  # 𥢏
 U+25C1F: yuán  # 𥰟
 U+272D5: kùn  # 𧋕
 U+2757A: shuāng  # 𧕺
 U+275C8: nú  # 𧗈
 U+27956: lí  # 𧥖
 U+280A2: jí  # 𨂢
 U+2824B: tuō  # 𨉋
 U+284A8: hài  # 𨒨
 U+28ABF: liú  # 𨪿
 U+28DED: chán  # 𨷭
 U+28E30: jú  # 𨸰
 U+293CF: wéi  # 𩏏
 U+295F5: zhēng  # 𩗵
 U+29B5D: wǒ  # 𩭝
 U+2A048: zhuāng  # 𪁈
 U+2A2A2: shí  # 𪊢
 U+8B9D: zhán  # 讝
 U+3D14: jí  # 㴔
 U+8B26: qǐng  # 謦
--- a/third_party/python-pinyin/pinyin-data/kTGHZ2013.txt
+++ b/third_party/python-pinyin/pinyin-data/kTGHZ2013.txt
--- a/third_party/python-pinyin/pinyin-data/kXHC1983.txt
+++ b/third_party/python-pinyin/pinyin-data/kXHC1983.txt
--- a/third_party/python-pinyin/pinyin-data/kanji.txt
+++ b/third_party/python-pinyin/pinyin-data/kanji.txt
@ -0,0 +1,32 @@
 U+5302: yún  # 匂 yún 为日本汉字读音; xiōng 为现代汉语读音;
 U+4E3C: dǎn  # 丼 dǎn 为日本汉字读音; jǐng 为现代汉语读音;
 U+8FBB: shí  # 辻
 U+8FBC: rù  # 込
 U+51E7: jīn  # 凧
 U+6763: shān  # 杣
 U+67A0: zá  # 枠
 U+7551: tián  # 畑
 U+6803: lì  # 栃
 U+6802: méi  # 栂
 U+5CE0: kǎ  # 峠
 U+4FE3: yǔ  # 俣
 U+7C7E: rèn  # 籾
 U+7560: tián  # 畠
 U+96EB: xià  # 雫
 U+7B39: shì  # 笹
 U+5840: píng  # 塀
 U+6919: chāng  # 椙
 U+7872: yù  # 硲
 U+86EF: lǎo  # 蛯
 U+55B0: cān  # 喰
 U+643E: zhà  # 搾
 U+698A: shén  # 榊
 U+50CD: dòng  # 働
 U+7CC0: huā  # 糀
 U+9786: bǐng  # 鞆
 U+69C7: zhēn  # 槇
 U+6A2B: jiān  # 樫
 U+9D2B: tián  # 鴫
 U+567A: xīn  # 噺
 U+7C17: liáng  # 簗
 U+9EBF: mó  # 麿
--- a/third_party/python-pinyin/pinyin-data/merge_unihan.py
+++ b/third_party/python-pinyin/pinyin-data/merge_unihan.py
@ -0,0 +1,123 @@
 # -*- coding: utf-8 -*-
 import collections
 def code_to_hanzi(code):
    hanzi = chr(int(code.replace('U+', '0x'), 16))
    return hanzi
 def sort_pinyin_dict(pinyin_dict):
    return collections.OrderedDict(
        sorted(pinyin_dict.items(),
               key=lambda item: int(item[0].replace('U+', '0x'), 16))
    )
 def remove_dup_items(lst):
    new_lst = []
    for item in lst:
        if item not in new_lst:
            new_lst.append(item)
    return new_lst
 def parse_pinyins(fp):
    pinyin_map = {}
    for line in fp:
        line = line.strip()
        if line.startswith('#') or not line:
            continue
        code, pinyin = line.split('#')[0].split(':')
        pinyin = ','.join([x.strip() for x in pinyin.split() if x.strip()])
        pinyin_map[code.strip()] = pinyin.split(',')
    return pinyin_map
 def merge(raw_pinyin_map, adjust_pinyin_map, overwrite_pinyin_map):
    new_pinyin_map = {}
    for code, pinyins in raw_pinyin_map.items():
        if code in overwrite_pinyin_map:
            pinyins = overwrite_pinyin_map[code]
        elif code in adjust_pinyin_map:
            pinyins = adjust_pinyin_map[code] + pinyins
        new_pinyin_map[code] = remove_dup_items(pinyins)
    return new_pinyin_map
 def save_data(pinyin_map, writer):
    for code, pinyins in pinyin_map.items():
        hanzi = code_to_hanzi(code)
        line = '{code}: {pinyin}  # {hanzi}\n'.format(
            code=code, pinyin=','.join(pinyins), hanzi=hanzi
        )
        writer.write(line)
 def extend_pinyins(old_map, new_map, only_no_exists=False):
    for code, pinyins in new_map.items():
        if only_no_exists:   # 只当 code 不存在时才更新
            if code not in old_map:
                old_map[code] = pinyins
        else:
            old_map.setdefault(code, []).extend(pinyins)
 if __name__ == '__main__':
    raw_pinyin_map = {}
    with open('kHanyuPinyin.txt') as fp:
        khanyupinyin = parse_pinyins(fp)
        raw_pinyin_map.update(khanyupinyin)
    with open('kXHC1983.txt') as fp:
        kxhc1983 = parse_pinyins(fp)
        extend_pinyins(raw_pinyin_map, kxhc1983)
    with open('nonCJKUI.txt') as fp:
        noncjkui = parse_pinyins(fp)
        extend_pinyins(raw_pinyin_map, noncjkui)
    with open('kMandarin_8105.txt') as fp:
        adjust_pinyin_map = parse_pinyins(fp)
        extend_pinyins(raw_pinyin_map, adjust_pinyin_map)
    with open('kMandarin_overwrite.txt') as fp:
        _map = parse_pinyins(fp)
        extend_pinyins(adjust_pinyin_map, _map)
        extend_pinyins(raw_pinyin_map, adjust_pinyin_map)
    with open('kMandarin.txt') as fp:
        _map = parse_pinyins(fp)
        extend_pinyins(adjust_pinyin_map, _map)
        extend_pinyins(raw_pinyin_map, adjust_pinyin_map)
    with open('kTGHZ2013.txt') as fp:
        _map = parse_pinyins(fp)
        extend_pinyins(adjust_pinyin_map, _map)
        extend_pinyins(raw_pinyin_map, adjust_pinyin_map)
    with open('kHanyuPinlu.txt') as fp:
        khanyupinyinlu = parse_pinyins(fp)
        extend_pinyins(adjust_pinyin_map, _map)
        extend_pinyins(raw_pinyin_map, adjust_pinyin_map)
    with open('GBK_PUA.txt') as fp:
        pua_pinyin_map = parse_pinyins(fp)
        extend_pinyins(raw_pinyin_map, pua_pinyin_map)
    with open('kanji.txt') as fp:
        _map = parse_pinyins(fp)
        extend_pinyins(raw_pinyin_map, _map, only_no_exists=True)
    with open('overwrite.txt') as fp:
        overwrite_pinyin_map = parse_pinyins(fp)
        extend_pinyins(raw_pinyin_map, overwrite_pinyin_map)
    new_pinyin_map = merge(raw_pinyin_map, adjust_pinyin_map,
                           overwrite_pinyin_map)
    new_pinyin_map = sort_pinyin_dict(new_pinyin_map)
    assert len(new_pinyin_map) == len(raw_pinyin_map)
    code_set = set(new_pinyin_map.keys())
    assert set(khanyupinyin.keys()) - code_set == set()
    assert set(khanyupinyinlu.keys()) - code_set == set()
    assert set(kxhc1983.keys()) - code_set == set()
    assert set(adjust_pinyin_map.keys()) - code_set == set()
    assert set(overwrite_pinyin_map.keys()) - code_set == set()
    assert set(pua_pinyin_map.keys()) - code_set == set()
    with open('pinyin.txt', 'w') as fp:
        fp.write('# version: 0.10.2\n')
        fp.write('# source: https://github.com/mozillazg/pinyin-data\n')
        save_data(new_pinyin_map, fp)
--- a/third_party/python-pinyin/pinyin-data/nonCJKUI.txt
+++ b/third_party/python-pinyin/pinyin-data/nonCJKUI.txt
@ -0,0 +1 @@
 U+3007: líng,yuán,xīng  # 〇
--- a/third_party/python-pinyin/pinyin-data/overwrite.txt
+++ b/third_party/python-pinyin/pinyin-data/overwrite.txt
@ -0,0 +1,63 @@
 # 手工纠正错误的拼音数据
 # 井号开头的行将会被忽略，可以用作注释
 # 数据格式：{code point}: {pinyins}  # {hanzi}
 # 示例：
 # U+4E2D: zhōng,zhòng  # 中
 U+5353: zhuó,zhuō  # 卓
 U+5565: shá,shà  # 啥
 U+5666: yuě,huì  # 噦
 U+59B3: nǐ,nǎi  # 妳
 U+8BB8: xǔ,hǔ  # 许
 U+94AD: tǒu,dǒu  # 钭
 U+9E00: chǔ,zhú,chù  # 鸀
 U+E815: yè  # 
 U+E816: zuǒ,yǒu  # 
 U+E81B: zhòu,zhū  # 
 U+E81D: jié,jiē  # 
 U+E824: zhòu  # 
 U+E826: shǒu  # 
 U+E82B: fēng  # 
 U+E82C: gòng  # 
 U+E82E: huì,kuì  # 
 U+E830: jiān  # 
 U+E831: ēn  # 
 U+E832: xiǎo  # 
 U+E834: lóu,lǘ  # 
 U+E835: cǎn,shān,cēn  # 
 U+E836: zhú  # 
 U+E838: wǎng  # 
 U+E83A: yáng,xiáng  # 
 U+E83D: bà,bēi  # 
 U+E83F: zhuān,zhuán,chuǎn,chún  # 
 U+E842: kuì,huì  # 
 U+E843: juǎn  # 
 U+E846: qíng  # 
 U+E84A: yé,yá  # 
 U+E850: chuài  # 
 U+E854: zhuó  # 
 U+E864: luán  # 
 U+241FE: yíng  # 𤇾
 U+275C8: nú  # 𧗈
 U+47C1: xiāo,chāo  # 䟁
 U+9EBF: mí  # 麿
 U+7C17: zhù  # 簗
 U+8279: cǎo  # 艹
 U+88CF: lǐ  # 裏
 U+88E1: lǐ  # 裡
 U+5206: fēn,fèn,fén  # 分
 U+208E1: fèng  # 𠣡
 U+2589F: hù  # 𥢟
 U+258F9: ràn  # 𥣹
 U+287B3: qú  # 𨞳
 U+2A008: yuān  # 𪀈
 U+9EFE: mǐn,miǎn,měng  # 黾
 U+55A3: xǔ  # 喣
 U+529A: zhú  # 劚
 U+532E: kuì,guì  # 匮
 U+9400: kuì,guì  # 鐀
 U+87AB: shì,zhē  # 螫
 U+5C82: qǐ,kǎi  # 岂
 U+534E: huá,huà,huā  # 华
 U+5455: ǒu,ōu,òu  # 呕
 U+4ECE: cóng,zòng  # 从
 U+513F: ér,er,rén  # 儿
--- a/third_party/python-pinyin/pinyin-data/pinyin.txt
+++ b/third_party/python-pinyin/pinyin-data/pinyin.txt
--- a/third_party/python-pinyin/pinyin-data/tools/china-8105-06062014.txt
+++ b/third_party/python-pinyin/pinyin-data/tools/china-8105-06062014.txt
--- a/third_party/python-pinyin/pinyin-data/tools/gen_8105.py
+++ b/third_party/python-pinyin/pinyin-data/tools/gen_8105.py
@ -0,0 +1,48 @@
 # -*- coding: utf-8 -*-
 """生成初始的 kMandarin_8105.txt"""
 from merge_unihan import parse_pinyins, code_to_hanzi
 def parse_china_x():
    with open('tools/china-8105-06062014.txt') as fp:
        for line in fp:
            line = line.strip()
            if line.startswith('#') or not line:
                continue
            yield line.split()[0]
 def parse_zdic():
    with open('zdic.txt') as fp:
        return parse_pinyins(fp)
 def parse_kmandain():
    with open('pinyin.txt') as fp:
        return parse_pinyins(fp)
 def diff(kmandarin, zdic, commons):
    for key in commons:
        hanzi = code_to_hanzi(key)
        if key in kmandarin:
            value = kmandarin[key][0]
            if key in zdic and value != zdic[key][0]:
                yield '{0}: {1}  # {2} -> {3}'.format(
                    key, value, hanzi, zdic[key][0]
                )
            else:
                yield '{0}: {1}  # {2}'.format(key, value, hanzi)
        elif key in zdic:
            value = zdic[key][0]
            yield '{0}: {1}  # {2}'.format(key, value, hanzi)
        else:
            yield '# {0}: {1}  # {2}'.format(key, '<-', hanzi)
 if __name__ == '__main__':
    zdic = parse_zdic()
    kmandarin = parse_kmandain()
    commons = parse_china_x()
    lst = diff(kmandarin, zdic, commons)
    for x in lst:
        print(x)
--- a/third_party/python-pinyin/pinyin-data/tools/gen_gb_pua.py
+++ b/third_party/python-pinyin/pinyin-data/tools/gen_gb_pua.py
@ -0,0 +1,166 @@
 # -*- coding: utf-8 -*-
 import re
 import sys
 sys.path.append('.')
 from merge_unihan import parse_pinyins
 def get_pinyins(file_path):
    with open(file_path) as fp:
        return parse_pinyins(fp)
 def get_pua_map():
    text = '''
 # A6D9 		E78D () 	FE10 (︐)
 # A6DA 		E78E () 	FE12 (︒)
 # A6DB 		E78F () 	FE11 (︑)
 # A6DC 		E790 () 	FE13 (︓)
 # A6DD 		E791 () 	FE14 (︔)
 # A6DE 		E792 () 	FE15 (︕)
 # A6DF 		E793 () 	FE16 (︖)
 # A6EC 		E794 () 	FE17 (︗)
 # A6ED 		E795 () 	FE18 (︘)
 # A8BC 	E7C7 () 	1E3F (ḿ) 	1E3F (ḿ)
 # A8BF 	E7C8 () 	01F9 (ǹ) 	01F9 (ǹ)
 # A989 	E7E7 () 	303E (〾) 	303E (〾)
 # A98A 	E7E8 () 	2FF0 (⿰) 	2FF0 (⿰)
 # A98B 	E7E9 () 	2FF1 (⿱) 	2FF1 (⿱)
 # A98C 	E7EA () 	2FF2 (⿲) 	2FF2 (⿲)
 # A98D 	E7EB () 	2FF3 (⿳) 	2FF3 (⿳)
 # A98E 	E7EC () 	2FF4 (⿴) 	2FF4 (⿴)
 # A98F 	E7ED () 	2FF5 (⿵) 	2FF5 (⿵)
 # A990 	E7EE () 	2FF6 (⿶) 	2FF6 (⿶)
 # A991 	E7EF () 	2FF7 (⿷) 	2FF7 (⿷)
 # A992 	E7F0 () 	2FF8 (⿸) 	2FF8 (⿸)
 # A993 	E7F1 () 	2FF9 (⿹) 	2FF9 (⿹)
 # A994 	E7F2 () 	2FFA (⿺) 	2FFA (⿺)
 # A995 	E7F3 () 	2FFB (⿻) 	2FFB (⿻)
 FE50 	E815 () 	2E81 (⺁) 	2E81 (⺁)
 FE51 	E816 () 	E816 () 	20087 (𠂇)
 FE52 	E817 () 	E817 () 	20089 (𠂉)
 FE53 	E818 () 	E818 () 	200CC (𠃌)
 FE54 	E819 () 	2E84 (⺄) 	2E84 (⺄)
 FE55 	E81A () 	3473 (㑳) 	3473 (㑳)
 FE56 	E81B () 	3447 (㑇) 	3447 (㑇)
 FE57 	E81C () 	2E88 (⺈) 	2E88 (⺈)
 FE58 	E81D () 	2E8B (⺋) 	2E8B (⺋)
 FE59 	E81E () 	E81E () 	9FB4 (龴)
 FE5A 	E81F () 	359E (㖞) 	359E (㖞)
 FE5B 	E820 () 	361A (㘚) 	361A (㘚)
 FE5C 	E821 () 	360E (㘎) 	360E (㘎)
 FE5D 	E822 () 	2E8C (⺌) 	2E8C (⺌)
 FE5E 	E823 () 	2E97 (⺗) 	2E97 (⺗)
 FE5F 	E824 () 	396E (㥮) 	396E (㥮)
 FE60 	E825 () 	3918 (㤘) 	3918 (㤘)
 FE61 	E826 () 	E826 () 	9FB5 (龵)
 FE62 	E827 () 	39CF (㧏) 	39CF (㧏)
 FE63 	E828 () 	39DF (㧟) 	39DF (㧟)
 FE64 	E829 () 	3A73 (㩳) 	3A73 (㩳)
 FE65 	E82A () 	39D0 (㧐) 	39D0 (㧐)
 FE66 	E82B () 	E82B () 	9FB6 (龶)
 FE67 	E82C () 	E82C () 	9FB7 (龷)
 FE68 	E82D () 	3B4E (㭎) 	3B4E (㭎)
 FE69 	E82E () 	3C6E (㱮) 	3C6E (㱮)
 FE6A 	E82F () 	3CE0 (㳠) 	3CE0 (㳠)
 FE6B 	E830 () 	2EA7 (⺧) 	2EA7 (⺧)
 FE6C 	E831 () 	E831 () 	215D7 (𡗗)
 FE6D 	E832 () 	E832 () 	9FB8 (龸)
 FE6E 	E833 () 	2EAA (⺪) 	2EAA (⺪)
 FE6F 	E834 () 	4056 (䁖) 	4056 (䁖)
 FE70 	E835 () 	415F (䅟) 	415F (䅟)
 FE71 	E836 () 	2EAE (⺮) 	2EAE (⺮)
 FE72 	E837 () 	4337 (䌷) 	4337 (䌷)
 FE73 	E838 () 	2EB3 (⺳) 	2EB3 (⺳)
 FE74 	E839 () 	2EB6 (⺶) 	2EB6 (⺶)
 FE75 	E83A () 	2EB7 (⺷) 	2EB7 (⺷)
 FE76 	E83B () 	E83B () 	2298F (𢦏)
 FE77 	E83C () 	43B1 (䎱) 	43B1 (䎱)
 FE78 	E83D () 	43AC (䎬) 	43AC (䎬)
 FE79 	E83E () 	2EBB (⺻) 	2EBB (⺻)
 FE7A 	E83F () 	43DD (䏝) 	43DD (䏝)
 FE7B 	E840 () 	44D6 (䓖) 	44D6 (䓖)
 FE7C 	E841 () 	4661 (䙡) 	4661 (䙡)
 FE7D 	E842 () 	464C (䙌) 	464C (䙌)
 FE7E 	E843 () 	E843 () 	9FB9 (龹)
 FE80 	E844 () 	4723 (䜣) 	4723 (䜣)
 FE81 	E845 () 	4729 (䜩) 	4729 (䜩)
 FE82 	E846 () 	477C (䝼) 	477C (䝼)
 FE83 	E847 () 	478D (䞍) 	478D (䞍)
 FE84 	E848 () 	2ECA (⻊) 	2ECA (⻊)
 FE85 	E849 () 	4947 (䥇) 	4947 (䥇)
 FE86 	E84A () 	497A (䥺) 	497A (䥺)
 FE87 	E84B () 	497D (䥽) 	497D (䥽)
 FE88 	E84C () 	4982 (䦂) 	4982 (䦂)
 FE89 	E84D () 	4983 (䦃) 	4983 (䦃)
 FE8A 	E84E () 	4985 (䦅) 	4985 (䦅)
 FE8B 	E84F () 	4986 (䦆) 	4986 (䦆)
 FE8C 	E850 () 	499F (䦟) 	499F (䦟)
 FE8D 	E851 () 	499B (䦛) 	499B (䦛)
 FE8E 	E852 () 	49B7 (䦷) 	49B7 (䦷)
 FE8F 	E853 () 	49B6 (䦶) 	49B6 (䦶)
 FE90 	E854 () 	E854 () 	9FBA (龺)
 FE91 	E855 () 	E855 () 	241FE (𤇾)
 FE92 	E856 () 	4CA3 (䲣) 	4CA3 (䲣)
 FE93 	E857 () 	4C9F (䲟) 	4C9F (䲟)
 FE94 	E858 () 	4CA0 (䲠) 	4CA0 (䲠)
 FE95 	E859 () 	4CA1 (䲡) 	4CA1 (䲡)
 FE96 	E85A () 	4C77 (䱷) 	4C77 (䱷)
 FE97 	E85B () 	4CA2 (䲢) 	4CA2 (䲢)
 FE98 	E85C () 	4D13 (䴓) 	4D13 (䴓)
 FE99 	E85D () 	4D14 (䴔) 	4D14 (䴔)
 FE9A 	E85E () 	4D15 (䴕) 	4D15 (䴕)
 FE9B 	E85F () 	4D16 (䴖) 	4D16 (䴖)
 FE9C 	E860 () 	4D17 (䴗) 	4D17 (䴗)
 FE9D 	E861 () 	4D18 (䴘) 	4D18 (䴘)
 FE9E 	E862 () 	4D19 (䴙) 	4D19 (䴙)
 FE9F 	E863 () 	4DAE (䶮) 	4DAE (䶮)
 FEA0 	E864 () 	E864 () 	9FBB (龻)
 '''.strip()
    for line in text.split('\n'):
        if line.startswith('#'):
            continue
        gb, gbk, gb_18030, unicode_4_1 = line.split('\t')
        # print(gb, gbk, gb_18030, unicode_4_1)
        # print(get_han_point(gbk), get_han_point(unicode_4_1))
        yield get_han_point(gbk), get_han_point(unicode_4_1)
 def get_han_point(text):
    if not text:
        return '', ''
    regex = re.compile(r'(?P<point>[A-Z0-9]+) \((?P<han>[^\)]+)\)')
    result = regex.findall(text)
    return result[0]
 def point_to_u_point(point):
    point = point.upper()
    if not point.startswith('U+'):
        point = 'U+' + point
    return point
 def gen_pua_data(gbk, unicode_4_1, pinyin_map):
    gbk_point, gbk_han = gbk
    gbk_point = point_to_u_point(gbk_point)
    unicode_4_1_point, unicode_4_1_han = unicode_4_1
    unicode_4_1_point = point_to_u_point(unicode_4_1_point)
    pinyins = ','.join(pinyin_map.get(unicode_4_1_point, []))
    prefix = ''
    if not pinyins:
        prefix = '# '
    return (
        '{prefix}{gbk_point}: {pinyins}  # {gbk_han}  '
        'Unihan: {unicode_4_1_point} {unicode_4_1_han}'
    ).format(**locals())
 if __name__ == '__main__':
    pinyin_map = get_pinyins('pinyin.txt')
    print('# GBK/GB 18030 PUA 映射\n'
          '# 详见：https://zh.wikipedia.org/wiki/GB_18030#PUA')
    for gbk, unicode_4_1 in get_pua_map():
        print(gen_pua_data(gbk, unicode_4_1, pinyin_map))
--- a/third_party/python-pinyin/pinyin-data/tools/improve_8105.py
+++ b/third_party/python-pinyin/pinyin-data/tools/improve_8105.py
@ -0,0 +1,88 @@
 # -*- coding: utf-8 -*-
 """补充 8105 中汉字的拼音数据"""
 from collections import namedtuple
 import re
 import sys
 from pyquery import PyQuery
 import requests
 re_pinyin = re.compile(r'拼音：(?P<pinyin>\S+) ')
 re_code = re.compile(r'统一码\w?：(?P<code>\S+) ')
 re_alternate = re.compile(r'异体字：\s+?(?P<alternate>\S+)')
 HanziInfo = namedtuple('HanziInfo', 'pinyin code alternate')
 def fetch_html(url, params):
    response = requests.get(url, params=params)
    return response.content
 def fetch_info(hanzi):
    url = 'http://www.guoxuedashi.com/zidian/so.php'
    params = {
        'sokeyzi': hanzi,
        'kz': 1,
        'submit': '',
    }
    html = fetch_html(url, params)
    pq = PyQuery(html)
    pq = PyQuery(pq('table.zui td')[1])
    text = pq('tr').text()
    text_alternate = pq(html)('.info_txt2')('em').text()
    pinyin = ''
    pinyin_match = re_pinyin.search(text)
    if pinyin_match is not None:
        pinyin = pinyin_match.group('pinyin')
    code = re_code.search(text).group('code')
    alternate = ''
    alternate_match = re_alternate.search(text_alternate)
    if alternate_match is not None:
        alternate = alternate_match.group('alternate')
    return HanziInfo(pinyin, code, alternate)
 def parse_hanzi(hanzi):
    info = fetch_info(hanzi)
    if (not info.pinyin) and info.alternate:
        alternate = fetch_info(info.alternate)
    else:
        alternate = ''
    return HanziInfo(info.pinyin, info.code, alternate)
 def main(lines):
    for line in lines:
        if line.startswith('# U+') and '<-' in line:
            # # U+xxx ... -> U+xxx
            code = line.split(':')[0].strip('# ')
            # U+xxx -> xxx
            code = code[2:]
            info = parse_hanzi(code)
            pinyin = info.pinyin
            extra = ''
            if (not pinyin) and info.alternate:
                alternate = info.alternate
                pinyin = alternate.pinyin
                extra = '  => U+{0}'.format(alternate.code)
                if ',' in pinyin:
                    first_pinyin, extra_pinyin = pinyin.split(',', 1)
                    pinyin = first_pinyin
                    extra += '  ?-> ' + extra_pinyin
            if pinyin:
                line = line.strip()
                # # U+xxx -> U+xxx
                line = line[2:]
                line = line.replace('<-', pinyin)
                if extra:
                    line += extra
        yield line.strip()
 if __name__ == '__main__':
    args = sys.argv[1:]
    input_file = args[0]
    with open(input_file) as fp:
        for line in main(fp):
            print(line)
--- a/third_party/python-pinyin/pinyin-data/tools/requirements.txt
+++ b/third_party/python-pinyin/pinyin-data/tools/requirements.txt
@ -0,0 +1,2 @@
 pyquery==1.2.13
 requests==2.20.0
--- a/third_party/python-pinyin/pinyin-data/unihan/.gitignore
+++ b/third_party/python-pinyin/pinyin-data/unihan/.gitignore
@ -0,0 +1 @@
 Unihan*
--- a/third_party/python-pinyin/pinyin-data/unihan/Makefile
+++ b/third_party/python-pinyin/pinyin-data/unihan/Makefile
@ -0,0 +1,20 @@
 .PHONY: help
 help:
 	@echo "parse		parse Unihan database "
 	@echo "update		update Unihan database"
 	@echo "diff		diff between Unihan data and parsed data"
 .PHONY:parse
 parse:
 	@python parse_pinyin.py
 .PHONY:update
 update:
 	-rm Unihan*
 	wget ftp://ftp.unicode.org/Public/UNIDATA/Unihan.zip -O Unihan.zip
 	unzip Unihan.zip
 	python parse_pinyin.py
 .PHONY:diff
 diff:
 	@bash diff.sh
--- a/third_party/python-pinyin/pinyin-data/unihan/README.md
+++ b/third_party/python-pinyin/pinyin-data/unihan/README.md
@ -0,0 +1,9 @@
 # Unihan Database
 http://www.unicode.org/charts/unihan.html
 Update Unihan databse:
 ```
 make update
 ```
--- a/third_party/python-pinyin/pinyin-data/unihan/diff.sh
+++ b/third_party/python-pinyin/pinyin-data/unihan/diff.sh
@ -0,0 +1,14 @@
 #!/usr/bin/env bash
 set -euo pipefail
 IFS=$'\n\t'
 function main() {
    printf '%-14s  %-8s  %-8s\n' '' 'parsed' 'Unihan'
    for kind in 'kHanyuPinyin' 'kMandarin' 'kHanyuPinlu' 'kXHC1983'
    do
        unihanCount=$(less Unihan_Readings.txt |grep -v '^#' |grep -c "$kind")
        parsedCount=$(less "$kind".txt | grep -c "")
        printf '%-14s  %-8s  %-8s\n' "$kind" "$parsedCount" "$unihanCount"
    done
 }
 main
--- a/third_party/python-pinyin/pinyin-data/unihan/kHanyuPinlu.txt
+++ b/third_party/python-pinyin/pinyin-data/unihan/kHanyuPinlu.txt
@ -0,0 +1 @@
 ../kHanyuPinlu.txt
--- a/third_party/python-pinyin/pinyin-data/unihan/kHanyuPinyin.txt
+++ b/third_party/python-pinyin/pinyin-data/unihan/kHanyuPinyin.txt
@ -0,0 +1 @@
 ../kHanyuPinyin.txt
--- a/third_party/python-pinyin/pinyin-data/unihan/kMandarin.txt
+++ b/third_party/python-pinyin/pinyin-data/unihan/kMandarin.txt
@ -0,0 +1 @@
 ../kMandarin.txt
--- a/third_party/python-pinyin/pinyin-data/unihan/kTGHZ2013.txt
+++ b/third_party/python-pinyin/pinyin-data/unihan/kTGHZ2013.txt
@ -0,0 +1 @@
 ../kTGHZ2013.txt
--- a/third_party/python-pinyin/pinyin-data/unihan/kXHC1983.txt
+++ b/third_party/python-pinyin/pinyin-data/unihan/kXHC1983.txt
@ -0,0 +1 @@
 ../kXHC1983.txt
--- a/third_party/python-pinyin/pinyin-data/unihan/parse_pinyin.py
+++ b/third_party/python-pinyin/pinyin-data/unihan/parse_pinyin.py
@ -0,0 +1,102 @@
 # -*- coding: utf-8 -*-
 import functools
 import operator
 import re
 def re_match_pinyin_line(kind):
    return re.compile(
        r'^U\+(?P<code>[0-9A-Z]+)\t{}\t(?P<pinyin>.+)$'.format(kind)
    )
 PINYIN = r'[^\d\.,]+'
 re_khanyupinyin = re.compile(r'''
    (?:\d{5}\.\d{2}0,)*\d{5}\.\d{2}0:
    ((?:%(pinyin)s,)*)
    (%(pinyin)s)
 ''' % ({'pinyin': PINYIN}), re.X)
 re_kmandarin = re.compile(r'''
    ()()
    ({pinyin})
 '''.format(pinyin=PINYIN), re.X)
 re_kxhc1983 = re.compile(r'''
    ()()[0-9]{4}\.[0-9]{3}\*?
    (?:,[0-9]{4}\.[0-9]{3}\*?)*:
    (%(pinyin)s)
 ''' % ({'pinyin': PINYIN}), re.X)
 re_khanyupinlu = re.compile(r'''
    ()()({pinyin})\([0-9]+\)
 '''.format(pinyin=PINYIN), re.X)
 re_ktghz2013 = re.compile(r'''
    ()()[0-9]{3}\.[0-9]{3}
    (?:,[0-9]{3}\.[0-9]{3})*:
    (%(pinyin)s)
 ''' % ({'pinyin': PINYIN}), re.X)
 re_kinds_map = {
    'kHanyuPinyin': re_khanyupinyin,
    'kMandarin': re_kmandarin,
    'kXHC1983': re_kxhc1983,
    'kHanyuPinlu': re_khanyupinlu,
    'kTGHZ2013': re_ktghz2013,
 }
 def remove_dup_items(lst):
    new_list = []
    for item in lst:
        if item not in new_list:
            new_list.append(item)
    return new_list
 def parse(lines, kind='kHanyuPinyin', ignore_prefix='#') -> str:
    re_line = re_match_pinyin_line(kind)
    re_pinyin = re_kinds_map[kind]
    for line in lines:
        line = line.strip()
        if line.startswith(ignore_prefix):
            continue
        match = re_line.match(line)
        if match is None:
            continue
        code = match.group('code')
        raw_pinyin = match.group('pinyin')
        raw_pinyins = re_pinyin.findall(raw_pinyin)
        # 处理有三个或三个以上拼音的情况，此时 raw_pinyins 类似
        # [(' xī,', 'lǔ '), (' lǔ,', 'xī')] or [('shú,dú,', 'tù')]
        for n, values in enumerate(raw_pinyins):
            value = []
            for v in values:
                value.extend(v.split(','))
            raw_pinyins[n] = value
        pinyins = functools.reduce(
            operator.add, raw_pinyins
        )
        pinyins = [x.strip() for x in pinyins if x.strip()]
        pinyins = remove_dup_items(pinyins)
        pinyin = ','.join(pinyins)
        yield code, pinyin
 def save_data(pinyins, writer):
    for code, pinyin in pinyins:
        gl = {}
        exec('hanzi=chr(0x{})'.format(code), gl)
        hanzi = gl['hanzi']
        line = 'U+{code}: {pinyin}  # {hanzi}\n'.format(
            code=code, pinyin=pinyin, hanzi=hanzi
        )
        writer.write(line)
 if __name__ == '__main__':
    with open('Unihan_Readings.txt') as fp:
        for kind in ('kHanyuPinyin', 'kMandarin',
                     'kHanyuPinlu', 'kXHC1983', 'kTGHZ2013'):
            fp.seek(0)
            with open('{}.txt'.format(kind), 'w') as writer:
                pinyins = parse(fp.readlines(), kind=kind)
                save_data(pinyins, writer)
--- a/third_party/python-pinyin/pinyin-data/zdic.txt
+++ b/third_party/python-pinyin/pinyin-data/zdic.txt
--- a/third_party/python-pinyin/pypinyin/init.py
+++ b/third_party/python-pinyin/pypinyin/init.py
@ -0,0 +1,52 @@
 """汉字拼音转换工具."""
 from pypinyin.constants import BOPOMOFO
 from pypinyin.constants import BOPOMOFO_FIRST
 from pypinyin.constants import CYRILLIC
 from pypinyin.constants import CYRILLIC_FIRST
 from pypinyin.constants import FINALS
 from pypinyin.constants import FINALS_TONE
 from pypinyin.constants import FINALS_TONE2
 from pypinyin.constants import FINALS_TONE3
 from pypinyin.constants import FIRST_LETTER
 from pypinyin.constants import INITIALS
 from pypinyin.constants import NORMAL
 from pypinyin.constants import Style
 from pypinyin.constants import STYLE_BOPOMOFO
 from pypinyin.constants import STYLE_BOPOMOFO_FIRST
 from pypinyin.constants import STYLE_CYRILLIC
 from pypinyin.constants import STYLE_CYRILLIC_FIRST
 from pypinyin.constants import STYLE_FINALS
 from pypinyin.constants import STYLE_FINALS_TONE
 from pypinyin.constants import STYLE_FINALS_TONE2
 from pypinyin.constants import STYLE_FINALS_TONE3
 from pypinyin.constants import STYLE_FIRST_LETTER
 from pypinyin.constants import STYLE_INITIALS
 from pypinyin.constants import STYLE_NORMAL
 from pypinyin.constants import STYLE_TONE
 from pypinyin.constants import STYLE_TONE2
 from pypinyin.constants import STYLE_TONE3
 from pypinyin.constants import TONE
 from pypinyin.constants import TONE2
 from pypinyin.constants import TONE3
 from pypinyin.core import lazy_pinyin
 from pypinyin.core import load_phrases_dict
 from pypinyin.core import load_single_dict
 from pypinyin.core import pinyin
 from pypinyin.core import slug
 __all__ = [
    'pinyin', 'lazy_pinyin', 'slug', 'load_single_dict', 'load_phrases_dict',
    'Style', 'STYLE_NORMAL', 'NORMAL', 'STYLE_TONE', 'TONE', 'STYLE_TONE2',
    'TONE2', 'STYLE_TONE3', 'TONE3', 'STYLE_INITIALS', 'INITIALS',
    'STYLE_FINALS', 'FINALS', 'STYLE_FINALS_TONE', 'FINALS_TONE',
    'STYLE_FINALS_TONE2', 'FINALS_TONE2', 'STYLE_FINALS_TONE3', 'FINALS_TONE3',
    'STYLE_FIRST_LETTER', 'FIRST_LETTER', 'STYLE_BOPOMOFO', 'BOPOMOFO',
    'STYLE_BOPOMOFO_FIRST', 'BOPOMOFO_FIRST', 'STYLE_CYRILLIC', 'CYRILLIC',
    'STYLE_CYRILLIC_FIRST', 'CYRILLIC_FIRST'
 ]
 __title__ = 'pypinyin'
 __version__ = '0.41.0'
 __license__ = 'MIT'
 __author__ = 'Hui Zhang'
 __copyright__ = 'Copyright (c) 2021 Hui Zhang'
--- a/third_party/python-pinyin/pypinyin/main.py
+++ b/third_party/python-pinyin/pypinyin/main.py
@ -0,0 +1,5 @@
 #!/usr/bin/env python3
 from pypinyin.runner import main
 if __name__ == '__main__':
    main()
--- a/third_party/python-pinyin/pypinyin/constants.py
+++ b/third_party/python-pinyin/pypinyin/constants.py
@ -0,0 +1,99 @@
 import os
 import re
 from enum import IntEnum
 from enum import unique
 from pypinyin import pinyin_dict
 SUPPORT_UCS4 = len('\U00020000') == 1
 # 词语拼音库
 if os.environ.get('PYPINYIN_NO_PHRASES'):
    PHRASES_DICT = {}
 else:
    from pypinyin import phrases_dict
    PHRASES_DICT = phrases_dict.phrases_dict  # type: Dict[Text, List[List[Text]]]
 # 单字拼音库
 PINYIN_DICT = pinyin_dict.pinyin_dict  # type: Dict[int, Text]
 # 利用环境变量控制不做copy操作(无自定义拼音库的情况), 以减少内存使用
 if not os.environ.get('PYPINYIN_NO_DICT_COPY'):
    PINYIN_DICT = PINYIN_DICT.copy()
    PHRASES_DICT = PHRASES_DICT.copy()
 # 匹配使用数字标识声调的字符的正则表达式
 RE_TONE2 = re.compile(r'([aeoiuvnm])([1-4])$')
 # 有拼音的汉字
 # https://www.qqxiuzi.cn/zh/hanzi-unicode-bianma.php
 # https://developer.mozilla.org/zh-CN/docs/Web/JavaScript/Guide/Regular_Expressions
 if SUPPORT_UCS4:
    RE_HANS = re.compile(r'^(?:['
                         r'\u3007'  # 〇
                         r'\u3400-\u4dbf'  # CJK扩展A:[3400-4DBF]
                         r'\u4e00-\u9fff'  # CJK基本:[4E00-9FFF]
                         r'\uf900-\ufaff'  # CJK兼容:[F900-FAFF]
                         r'\U00020000-\U0002A6DF'  # CJK扩展B:[20000-2A6DF]
                         r'\U0002A703-\U0002B73F'  # CJK扩展C:[2A700-2B73F]
                         r'\U0002B740-\U0002B81D'  # CJK扩展D:[2B740-2B81D]
                         r'\U0002F80A-\U0002FA1F'  # CJK兼容扩展:[2F800-2FA1F]
                         r'])+$')
 else:
    RE_HANS = re.compile(  # pragma: no cover
        r'^(?:['
        r'\u3007'  # 〇
        r'\u3400-\u4dbf'  # CJK扩展A:[3400-4DBF]
        r'\u4e00-\u9fff'  # CJK基本:[4E00-9FFF]
        r'\uf900-\ufaff'  # CJK兼容:[F900-FAFF]
        r'])+$')
@unique
 class Style(IntEnum):
    """拼音风格"""
    #: 普通风格，不带声调。如： 中国 -> ``zhong guo``
    NORMAL = 0
    #: 标准声调风格，拼音声调在韵母第一个字母上（默认风格）。如： 中国 -> ``zhōng guó``
    TONE = 1
    #: 声调风格2，即拼音声调在各个韵母之后，用数字 [1-4] 进行表示。如： 中国 -> ``zho1ng guo2``
    TONE2 = 2
    #: 声调风格3，即拼音声调在各个拼音之后，用数字 [1-4] 进行表示。如： 中国 -> ``zhong1 guo2``
    TONE3 = 8
    #: 声母风格，只返回各个拼音的声母部分（注：有的拼音没有声母，详见 `#27`_）。如： 中国 -> ``zh g``
    INITIALS = 3
    #: 首字母风格，只返回拼音的首字母部分。如： 中国 -> ``z g``
    FIRST_LETTER = 4
    #: 韵母风格，只返回各个拼音的韵母部分，不带声调。如： 中国 -> ``ong uo``
    FINALS = 5
    #: 标准韵母风格，带声调，声调在韵母第一个字母上。如：中国 -> ``ōng uó``
    FINALS_TONE = 6
    #: 韵母风格2，带声调，声调在各个韵母之后，用数字 [1-4] 进行表示。如： 中国 -> ``o1ng uo2``
    FINALS_TONE2 = 7
    #: 韵母风格3，带声调，声调在各个拼音之后，用数字 [1-4] 进行表示。如： 中国 -> ``ong1 uo2``
    FINALS_TONE3 = 9
    #: 注音风格，带声调，阴平（第一声）不标。如： 中国 -> ``ㄓㄨㄥ ㄍㄨㄛˊ``
    BOPOMOFO = 10
    #: 注音风格，仅首字母。如： 中国 -> ``ㄓ ㄍ``
    BOPOMOFO_FIRST = 11
    #: 汉语拼音与俄语字母对照风格，声调在各个拼音之后，用数字 [1-4] 进行表示。如： 中国 -> ``чжун1 го2``
    CYRILLIC = 12
    #: 汉语拼音与俄语字母对照风格，仅首字母。如： 中国 -> ``ч г``
    CYRILLIC_FIRST = 13
 NORMAL = STYLE_NORMAL = Style.NORMAL
 TONE = STYLE_TONE = Style.TONE
 TONE2 = STYLE_TONE2 = Style.TONE2
 TONE3 = STYLE_TONE3 = Style.TONE3
 INITIALS = STYLE_INITIALS = Style.INITIALS
 FIRST_LETTER = STYLE_FIRST_LETTER = Style.FIRST_LETTER
 FINALS = STYLE_FINALS = Style.FINALS
 FINALS_TONE = STYLE_FINALS_TONE = Style.FINALS_TONE
 FINALS_TONE2 = STYLE_FINALS_TONE2 = Style.FINALS_TONE2
 FINALS_TONE3 = STYLE_FINALS_TONE3 = Style.FINALS_TONE3
 BOPOMOFO = STYLE_BOPOMOFO = Style.BOPOMOFO
 BOPOMOFO_FIRST = STYLE_BOPOMOFO_FIRST = Style.BOPOMOFO_FIRST
 CYRILLIC = STYLE_CYRILLIC = Style.CYRILLIC
 CYRILLIC_FIRST = STYLE_CYRILLIC_FIRST = Style.CYRILLIC_FIRST
--- a/third_party/python-pinyin/pypinyin/contrib/_tone_rule.py
+++ b/third_party/python-pinyin/pypinyin/contrib/_tone_rule.py
@ -0,0 +1,44 @@
 from typing import Optional
 from typing import Text
 def right_mark_index(pinyin_no_tone: Text) -> Optional[int]:
    """
    标调位置
        有 ɑ 不放过，
    　　没 ɑ 找 o、e；
    　　ɑ、o、e、i、u、ü
    　　标调就按这顺序；
    　　i、u 若是连在一起，
    　　谁在后面就标谁。
    有ɑ不放过（有ɑ一定要标在ɑ上）；
    无ɑ找oe（没有ɑ的时候标在o上,如果没有o则标在e上）;
    iu并列标在后（iu, ui的情况,标在后面的字母上,比如说iu应该标u,ui应该标i）；
    单个韵母不用说（只能标在单韵母上）
    http://www.hwjyw.com/resource/content/2010/06/04/8183.shtml
    https://www.zhihu.com/question/23655297
    https://github.com/mozillazg/python-pinyin/issues/160
    http://www.pinyin.info/rules/where.html
    """
    # 有 ɑ 不放过, 没 ɑ 找 o、e
    for c in ['a', 'o', 'e']:
        if c in pinyin_no_tone:
            return pinyin_no_tone.index(c)
    # i、u 若是连在一起，谁在后面就标谁
    for c in ['iu', 'ui']:
        if c in pinyin_no_tone:
            return pinyin_no_tone.index(c) + 1
    # ɑ、o、e、i、u、ü
    for c in ['i', 'u', 'v', 'ü']:
        if c in pinyin_no_tone:
            return pinyin_no_tone.index(c)
    # n, m, ê
    for c in ['n', 'm', 'ê']:
        if c in pinyin_no_tone:
            return pinyin_no_tone.index(c)
--- a/third_party/python-pinyin/pypinyin/contrib/neutral_tone.py
+++ b/third_party/python-pinyin/pypinyin/contrib/neutral_tone.py
@ -0,0 +1,68 @@
 import re
 from typing import Any
 from typing import Optional
 from typing import Text
 from typing import Tuple
 from pypinyin import Style
 from pypinyin.contrib._tone_rule import right_mark_index
 _re_number = re.compile(r'\d')
 class NeutralToneWith5Mixin():
    """声调使用数字表示的相关拼音风格下的结果使用 5 标识轻声。
    使用方法::
        from pypinyin import lazy_pinyin, Style
        from pypinyin.contrib.neutral_tone import NeutralToneWith5Mixin
        from pypinyin.converter import DefaultConverter
        from pypinyin.core import Pinyin
        # 原来的结果中不会标识轻声
        print(lazy_pinyin('好了', style=Style.TONE2))
        # 输出: ['ha3o', 'le']
        class MyConverter(NeutralToneWith5Mixin, DefaultConverter):
            pass
        my_pinyin = Pinyin(MyConverter())
        pinyin = my_pinyin.pinyin
        lazy_pinyin = my_pinyin.lazy_pinyin
        #  新的结果中使用 ``5`` 标识轻声
        print(lazy_pinyin('好了', style=Style.TONE2))
        # 输出: ['ha3o', 'le5']
        print(pinyin('好了', style=Style.TONE2))
        # 输出：[['ha3o'], ['le5']]
    """
    NUMBER_TONE = (Style.TONE2, Style.TONE3, Style.FINALS_TONE2,
                   Style.FINALS_TONE3)  # type: Tuple[Style]
    NUMBER_AT_END = (Style.TONE3, Style.FINALS_TONE3)  # type: Tuple[Style]
    def post_convert_style(self,
                           han: Text,
                           orig_pinyin: Text,
                           converted_pinyin: Text,
                           style: Style,
                           strict: bool,
                           **kwargs: Any) -> Optional[Text]:
        pre_data = super().post_convert_style(
            han, orig_pinyin, converted_pinyin, style, strict, **kwargs)
        if style not in self.NUMBER_TONE:
            return pre_data
        if pre_data is not None:
            converted_pinyin = pre_data
        # 有声调，跳过
        if _re_number.search(converted_pinyin):
            return converted_pinyin
        if style in self.NUMBER_AT_END:
            return '{}5'.format(converted_pinyin)
        # 找到应该在哪个字母上标声调
        mark_index = right_mark_index(converted_pinyin)
        before = converted_pinyin[:mark_index + 1]
        after = converted_pinyin[mark_index + 1:]
        return '{}5{}'.format(before, after)
--- a/third_party/python-pinyin/pypinyin/contrib/tone_convert.py
+++ b/third_party/python-pinyin/pypinyin/contrib/tone_convert.py
@ -0,0 +1,341 @@
 import re
 from typing import Optional
 from typing import Text
 from pypinyin.contrib._tone_rule import right_mark_index
 from pypinyin.style._constants import RE_TONE3
 from pypinyin.style.tone import converter
 from pypinyin.utils import _replace_tone2_style_dict_to_default
 _re_number = re.compile(r'\d')
 def _v_to_u(pinyin: Text, replace: bool=False) -> Text:
    """replace v to u
    Args:
        pinyin (Text): pinyin
        replace (bool, optional): True, v to u; False, v as it is. Defaults to False.
    Returns:
        Text: new pinyin
    """
    if not replace:
        return pinyin
    return pinyin.replace('v', 'ü')
 def _fix_v_u(origin_py: Text, new_py: Text, v_to_u: bool) -> Text:
    """ fix v u
    Args:
        origin_py (Text): origin pinyin
        new_py (Text): new pinyin
        v_to_u (bool): True, replace v to u; False, v as it is.
    Returns:
        Text:
    """
    if not v_to_u:
        if 'ü' in new_py and 'ü' not in origin_py:
            return new_py.replace('ü', 'v')
    return _v_to_u(new_py, replace=True)
 def _get_number_from_pinyin(pinyin: Text) -> Optional[int]:
    """get tone number
    Args:
        pinyin (Text): [description]
    Returns:
        Optional[int]: int or None
    """
    numbers = _re_number.findall(pinyin)
    if numbers:
        number = numbers[0]
    else:
        number = None
    return number
 def _improve_tone3(tone3: Text, neutral_tone_with_5: bool=False) -> Text:
    """neutral tone with 5 number if need.
    Args:
        tone3 (Text): [description]
        neutral_tone_with_5 (bool, optional): True, neutral tone with 5 number. Defaults to False.
    Returns:
        Text: [description]
    """
    number = _get_number_from_pinyin(tone3)
    if number is None and neutral_tone_with_5:
        tone3 = '{}5'.format(tone3)
    return tone3
 def tone_to_tone3(tone: Text,
                  v_to_u: bool=False,
                  neutral_tone_with_5: bool=False) -> Text:
    """将 :py:attr:`~pypinyin.Style.TONE` 风格的拼音转换为
    :py:attr:`~pypinyin.Style.TONE3` 风格的拼音
    :param tone: :py:attr:`~pypinyin.Style.TONE` 风格的拼音
    :param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
    :param neutral_tone_with_5: 是否使用 ``5`` 标识轻声
    :return: :py:attr:`~pypinyin.Style.TONE3` 风格的拼音
    Usage::
        >>> from pypinyin.contrib.tone_convert import tone_to_tone3
        >>> tone_to_tone3('zhōng')
        'zhong1'
        >>> tone_to_tone3('shang', neutral_tone_with_5=True)
        'shang5'
        >>> tone_to_tone3('lüè', v_to_u=True)
        'lüe4'
    """
    tone3 = converter.to_tone3(tone)
    s = _improve_tone3(tone3, neutral_tone_with_5=neutral_tone_with_5)
    return _v_to_u(s, v_to_u)
 def tone_to_tone2(tone: Text,
                  v_to_u: bool=False,
                  neutral_tone_with_5: bool=False) -> Text:
    """将 :py:attr:`~pypinyin.Style.TONE` 风格的拼音转换为
    :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
    :param tone: :py:attr:`~pypinyin.Style.TONE` 风格的拼音
    :param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
    :param neutral_tone_with_5: 是否使用 ``5`` 标识轻声
    :return: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
    Usage::
        >>> from pypinyin.contrib.tone_convert import tone_to_tone2
        >>> tone_to_tone2('zhōng')
        'zho1ng'
        >>> tone_to_tone2('shang', neutral_tone_with_5=True)
        'sha5ng'
        >>> tone_to_tone2('lüè', v_to_u=True)
        'lüe4'
    """
    tone3 = tone_to_tone3(
        tone, v_to_u=v_to_u, neutral_tone_with_5=neutral_tone_with_5)
    s = tone3_to_tone2(tone3)
    return _v_to_u(s, v_to_u)
 def tone_to_normal(tone: Text, v_to_u: bool=False) -> Text:
    """将 :py:attr:`~pypinyin.Style.TONE` 风格的拼音转换为
    :py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
    :param tone: :py:attr:`~pypinyin.Style.TONE` 风格的拼音
    :param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
    :return: :py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
    Usage::
        >>> from pypinyin.contrib.tone_convert import tone_to_normal
        >>> tone_to_normal('zhōng')
        'zhong'
        >>> tone_to_normal('lüè', v_to_u=True)
        'lüe'
    """
    s = tone_to_tone2(tone, v_to_u=v_to_u)
    s = _re_number.sub('', s)
    return _v_to_u(s, v_to_u)
 def tone2_to_normal(tone2: Text, v_to_u: bool=False) -> Text:
    """将 :py:attr:`~pypinyin.Style.TONE2` 风格的拼音转换为
    :py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
    :param tone2: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
    :param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
    :return: Style.NORMAL 风格的拼音
    Usage::
        >>> from pypinyin.contrib.tone_convert import tone2_to_normal
        >>> tone2_to_normal('zho1ng')
        'zhong'
        >>> tone2_to_normal('lüe4', v_to_u=True)
        'lüe'
    """
    s = _re_number.sub('', tone2)
    return _v_to_u(s, v_to_u)
 def tone2_to_tone(tone2: Text) -> Text:
    """将 :py:attr:`~pypinyin.Style.TONE2` 风格的拼音转换为
    :py:attr:`~pypinyin.Style.TONE` 风格的拼音
    :param tone2: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
    :return: Style.TONE 风格的拼音
    Usage::
        >>> from pypinyin.contrib.tone_convert import tone2_to_tone
        >>> tone2_to_tone('zho1ng')
        'zhōng'
    """
    return _replace_tone2_style_dict_to_default(tone2)
 def tone2_to_tone3(tone2: Text) -> Text:
    """将 :py:attr:`~pypinyin.Style.TONE2` 风格的拼音转换为
    :py:attr:`~pypinyin.Style.TONE3` 风格的拼音
    :param tone2: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
    :return: :py:attr:`~pypinyin.Style.TONE3` 风格的拼音
    Usage::
        >>> from pypinyin.contrib.tone_convert import tone2_to_tone3
        >>> tone2_to_tone3('zho1ng')
        'zhong1'
    """
    tone3 = RE_TONE3.sub(r'\1\3\2', tone2)
    return tone3
 def tone3_to_normal(tone3: Text, v_to_u: bool=False) -> Text:
    """将 :py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为
    :py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
    :param tone3: :py:attr:`~pypinyin.Style.TONE3` 风格的拼音
    :param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
    :return: :py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
    Usage::
        >>> from pypinyin.contrib.tone_convert import tone3_to_normal
        >>> tone3_to_normal('zhong1')
        'zhong'
        >>> tone3_to_normal('lüe4', v_to_u=True)
        'lüe'
    """
    s = _re_number.sub('', tone3)
    return _v_to_u(s, v_to_u)
 def tone3_to_tone(tone3: Text) -> Text:
    """将 :py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为
    :py:attr:`~pypinyin.Style.TONE` 风格的拼音
    :param tone3: :py:attr:`~pypinyin.Style.TONE3` 风格的拼音
    :return: :py:attr:`~pypinyin.Style.TONE` 风格的拼音
    Usage::
        >>> from pypinyin.contrib.tone_convert import tone3_to_tone
        >>> tone3_to_tone('zhong1')
        'zhōng'
    """
    tone2 = tone3_to_tone2(tone3)
    return tone2_to_tone(tone2)
 def tone3_to_tone2(tone3: Text) -> Text:
    """将 :py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为
    :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
    :param tone3: :py:attr:`~pypinyin.Style.TONE3` 风格的拼音
    :return: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
    Usage::
        >>> from pypinyin.contrib.tone_convert import tone3_to_tone2
        >>> tone3_to_tone2('zhong1')
        'zho1ng'
    """
    no_number_tone3 = tone3_to_normal(tone3)
    mark_index = right_mark_index(no_number_tone3)
    if mark_index is None:
        mark_index = len(no_number_tone3) - 1
    before = no_number_tone3[:mark_index + 1]
    after = no_number_tone3[mark_index + 1:]
    number = _get_number_from_pinyin(tone3)
    if number is None:
        return tone3
    return '{}{}{}'.format(before, number, after)
 def to_normal(pinyin: Text, v_to_u: bool=False) -> Text:
    """将 :py:attr:`~pypinyin.Style.TONE`、
    :py:attr:`~pypinyin.Style.TONE2` 或
    :py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为
    :py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
    :param pinyin: :py:attr:`~pypinyin.Style.TONE`、
                   :py:attr:`~pypinyin.Style.TONE2` 或
                   :py:attr:`~pypinyin.Style.TONE3` 风格的拼音
    :param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``. True, v to u; False, v as it is.
    :return: :py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
    Usage::
        >>> from pypinyin.contrib.tone_convert import to_normal
        >>> to_normal('zhōng')
        'zhong'
        >>> to_normal('zho1ng')
        'zhong'
        >>> to_normal('zhong1')
        'zhong'
        >>> to_normal('lüè', v_to_u=True)
        'lüe'
    """
    s = tone_to_tone2(pinyin, v_to_u=True)
    s = tone2_to_normal(s)
    return _fix_v_u(pinyin, s, v_to_u)
 def to_tone(pinyin: Text) -> Text:
    """将 :py:attr:`~pypinyin.Style.TONE2` 或
    :py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为
    :py:attr:`~pypinyin.Style.TONE` 风格的拼音
    :param pinyin: :py:attr:`~pypinyin.Style.TONE2` 或
                   :py:attr:`~pypinyin.Style.TONE3` 风格的拼音
    :return: :py:attr:`~pypinyin.Style.TONE` 风格的拼音
    Usage::
        >>> from pypinyin.contrib.tone_convert import to_tone
        >>> to_tone('zho1ng')
        'zhōng'
        >>> to_tone('zhong1')
        'zhōng'
    """
    if not _re_number.search(pinyin):
        return pinyin
    s = tone_to_tone2(pinyin)
    s = tone2_to_tone(s)
    return s
 def to_tone2(pinyin: Text, v_to_u: bool=False,
             neutral_tone_with_5: bool=False) -> Text:
    """将 :py:attr:`~pypinyin.Style.TONE` 或
    :py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为
    :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
    :param pinyin: :py:attr:`~pypinyin.Style.TONE` 或
                   :py:attr:`~pypinyin.Style.TONE3` 风格的拼音
    :param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
    :param neutral_tone_with_5: 是否使用 ``5`` 标识轻声
    :return: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
    Usage::
        >>> from pypinyin.contrib.tone_convert import to_tone2
        >>> to_tone2('zhōng')
        'zho1ng'
        >>> to_tone2('zhong1')
        'zho1ng'
        >>> to_tone2('shang', neutral_tone_with_5=True)
        'sha5ng'
        >>> to_tone2('lüè', v_to_u=True)
        'lüe4'
    """
    s = tone_to_tone3(
        pinyin, v_to_u=True, neutral_tone_with_5=neutral_tone_with_5)
    s = tone3_to_tone2(s)
    return _fix_v_u(pinyin, s, v_to_u)
 def to_tone3(pinyin: Text, v_to_u: bool=False, neutral_tone_with_5: bool=False):
    """将 :py:attr:`~pypinyin.Style.TONE` 或
    :py:attr:`~pypinyin.Style.TONE2` 风格的拼音转换为
    :py:attr:`~pypinyin.Style.TONE3` 风格的拼音
    :param pinyin: :py:attr:`~pypinyin.Style.TONE` 或
                   :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
    :param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
    :param neutral_tone_with_5: 是否使用 ``5`` 标识轻声
    :return: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
    Usage::
        >>> from pypinyin.contrib.tone_convert import to_tone3
        >>> to_tone3('zhōng')
        'zhong1'
        >>> to_tone3('zho1ng')
        'zhong1'
        >>> to_tone3('shang', neutral_tone_with_5=True)
        'shang5'
        >>> to_tone3('lüè', v_to_u=True)
        'lüe4'
    """
    s = tone_to_tone2(
        pinyin, v_to_u=True, neutral_tone_with_5=neutral_tone_with_5)
    s = tone2_to_tone3(s)
    return _fix_v_u(pinyin, s, v_to_u)
--- a/third_party/python-pinyin/pypinyin/contrib/uv.py
+++ b/third_party/python-pinyin/pypinyin/contrib/uv.py
@ -0,0 +1,44 @@
 from typing import Any
 from typing import Optional
 from typing import Text
 from pypinyin.constants import Style
 class V2UMixin():
    """无声调相关拼音风格下的结果使用 ``ü`` 代替原来的 ``v``
    使用方法::
        from pypinyin import lazy_pinyin, Style
        from pypinyin.contrib.uv import V2UMixin
        from pypinyin.converter import DefaultConverter
        from pypinyin.core import Pinyin
        # 原来的结果中会使用 ``v`` 表示 ``ü``
        print(lazy_pinyin('战略'))
        # 输出：['zhan', 'lve']
        class MyConverter(V2UMixin, DefaultConverter):
            pass
        my_pinyin = Pinyin(MyConverter())
        pinyin = my_pinyin.pinyin
        lazy_pinyin = my_pinyin.lazy_pinyin
        #  新的结果中使用 ``ü`` 代替原来的 ``v``
        print(lazy_pinyin('战略'))
        # 输出: ['zhan', 'lüe']
        print(pinyin('战略', style=Style.NORMAL))
        # 输出：[['zhan'], ['lüe']]
    """
    def post_convert_style(self,
                           han: Text,
                           orig_pinyin: Text,
                           converted_pinyin: Text,
                           style: Style,
                           strict: bool,
                           **kwargs: Any) -> Optional[Text]:
        pre_data = super().post_convert_style(
            han, orig_pinyin, converted_pinyin, style, strict, **kwargs)
        if pre_data is not None:
            converted_pinyin = pre_data
        return converted_pinyin.replace('v', 'ü')
--- a/third_party/python-pinyin/pypinyin/converter.py
+++ b/third_party/python-pinyin/pypinyin/converter.py
@ -0,0 +1,459 @@
 from copy import deepcopy
 from typing import Any
 from typing import Callable
 from typing import List
 from typing import Optional
 from typing import Text
 from typing import Union
 from pypinyin.constants import PHRASES_DICT
 from pypinyin.constants import PINYIN_DICT
 from pypinyin.constants import RE_HANS
 from pypinyin.constants import Style
 from pypinyin.contrib.neutral_tone import NeutralToneWith5Mixin
 from pypinyin.contrib.uv import V2UMixin
 from pypinyin.style import auto_discover
 from pypinyin.style import convert as convert_style
 from pypinyin.utils import _remove_dup_items
 TStyle = Style
 TErrors = Union[Callable[[Text], Text], Text]
 TPinyinResult = List[List[Text]]
 TErrorResult = Union[Text, List[Text], None]
 TNoPinyinResult = Union[TPinyinResult, List[Text], Text, None]
 auto_discover()
 class Converter():
    def convert(self,
                words: Text,
                style: TStyle,
                heteronym: bool,
                errors: TErrors,
                strict: bool=...,
                **kwargs: Any) -> TPinyinResult:
        # TODO: use ``abc`` module
        raise NotImplementedError  # pragma: no cover
 class DefaultConverter(Converter):
    def __init__(self, **kwargs: Any) -> None:
        pass
    def post_pinyin(self,
                    han: Text,
                    heteronym: bool,
                    pinyin: TPinyinResult,
                    **kwargs: Any) -> Union[TPinyinResult, None]:
        """找到汉字对应的拼音后，会调用 ``post_pinyin`` 方法。
        如果返回值不为 ``None`` 会使用返回的结果作为 han 的拼音数据。
        :param han: 单个汉字或者词语
        :param heteronym: 是否需要处理多音字
        :param pinyin: 单个汉字的拼音数据或词语的拼音数据 list
        :type pinyin: list
        :param kwargs: 其他关键字参数，暂时无用，用于以后扩展新的参数。
        :return: ``None`` 或代替 ``pinyin`` 作为 han 的拼音 list。
        """
        pass
    def _single_pinyin(self,
                       han: Text,
                       style: TStyle,
                       heteronym: bool,
                       errors: TErrors,
                       strict: bool) -> TPinyinResult:
        """单字拼音转换.
        :param han: 单个汉字
        :param errors: 指定如何处理没有拼音的字符，详情请参考
                       :py:func:`~pypinyin.pinyin`
        :param strict: 只获取声母或只获取韵母相关拼音风格的返回结果
                       是否严格遵照《汉语拼音方案》来处理声母和韵母，
                       详见 :ref:`strict`
        :return: 返回拼音列表，多音字会有多个拼音项
        :rtype: list
        """
        num = ord(han)
        # 处理没有拼音的字符
        if num not in PINYIN_DICT:
            return self.handle_nopinyin(
                han,
                style=style,
                errors=errors,
                heteronym=heteronym,
                strict=strict)
        pys = PINYIN_DICT[num].split(',')  # 字的拼音列表
        post_data = self.post_pinyin(han, heteronym, [pys])
        if post_data is not None:
            pys = post_data[0]
        if not heteronym:
            orig_pinyin = pys[0]
            return [[
                self.convert_style(
                    han, orig_pinyin, style=style, strict=strict)
            ]]
        # 输出多音字的多个读音
        # 临时存储已存在的拼音，避免多音字拼音转换为非声调风格出现重复。
        # TODO: change to use set
        # TODO: add test for cache
        py_cached = {}
        pinyins = []
        for orig_pinyin in pys:
            py = self.convert_style(
                han, orig_pinyin, style=style, strict=strict)
            if py in py_cached:
                continue
            py_cached[py] = py
            pinyins.append(py)
        return [pinyins]
    def _phrase_pinyin(self,
                       phrase: Text,
                       style: TStyle,
                       heteronym: bool,
                       errors: TErrors,
                       strict: bool) -> TPinyinResult:
        """词语拼音转换.
        :param phrase: 词语
        :param errors: 指定如何处理没有拼音的字符
        :param strict: 只获取声母或只获取韵母相关拼音风格的返回结果
                       是否严格遵照《汉语拼音方案》来处理声母和韵母，
                       详见 :ref:`strict`
        :return: 拼音列表
        :rtype: list
        """
        py = []
        if phrase in PHRASES_DICT:
            py = deepcopy(PHRASES_DICT[phrase])
            post_data = self.post_pinyin(phrase, heteronym, py)
            if post_data is not None:
                py = post_data
            for idx, item in enumerate(py):
                han = phrase[idx]
                if heteronym:
                    py[idx] = _remove_dup_items([
                        self.convert_style(
                            han, orig_pinyin=x, style=style, strict=strict)
                        for x in item
                    ])
                else:
                    orig_pinyin = item[0]
                    py[idx] = [
                        self.convert_style(
                            han,
                            orig_pinyin=orig_pinyin,
                            style=style,
                            strict=strict)
                    ]
        else:
            for i in phrase:
                single = self._single_pinyin(
                    i,
                    style=style,
                    heteronym=heteronym,
                    errors=errors,
                    strict=strict)
                if single:
                    py.extend(single)
        return py
    def convert(self,
                words: Text,
                style: TStyle,
                heteronym: bool,
                errors: TErrors,
                strict: bool=...,
                **kwargs: Any) -> TPinyinResult:
        """根据参数把汉字转成相应风格的拼音结果。
        :param words: 汉字字符串
        :type words: unicode
        :param style: 拼音风格
        :param heteronym: 是否启用多音字
        :type heteronym: bool
        :param errors: 如果处理没有拼音的字符
        :param strict: 只获取声母或只获取韵母相关拼音风格的返回结果
                       是否严格遵照《汉语拼音方案》来处理声母和韵母，
                       详见 :ref:`strict`
        :type strict: bool
        :return: 按风格转换后的拼音结果
        :rtype: list
        """
        pys = []
        # 初步过滤没有拼音的字符
        if RE_HANS.match(words):
            pys = self._phrase_pinyin(
                words,
                style=style,
                heteronym=heteronym,
                errors=errors,
                strict=strict)
            return pys
        py = self.handle_nopinyin(
            words,
            style=style,
            errors=errors,
            heteronym=heteronym,
            strict=strict)
        if py:
            pys.extend(py)
        return pys
    def pre_convert_style(self,
                          han: Text,
                          orig_pinyin: Text,
                          style: TStyle,
                          strict: bool,
                          **kwargs: Any) -> Optional[Text]:
        """在把原始带声调的拼音按拼音风格转换前会调用 ``pre_convert_style`` 方法。
        如果返回值不为 ``None`` 会使用返回的结果代替 ``orig_pinyin``
        来进行后面的风格转换。
        :param han: 要处理的汉字
        :param orig_pinyin: 汉字对应的原始带声调拼音
        :param style: 要转换的拼音风格
        :param strict: 只获取声母或只获取韵母相关拼音风格的返回结果
                       是否严格遵照《汉语拼音方案》来处理声母和韵母，
                       详见 :ref:`strict`
        :param kwargs: 其他关键字参数，暂时无用，用于以后扩展新的参数。
        :return: ``None`` 或代替 ``orig_pinyin`` 参与拼音风格转换的拼音字符串。
        """
        pass
    def post_convert_style(self,
                           han: Text,
                           orig_pinyin: Text,
                           converted_pinyin: Text,
                           style: TStyle,
                           strict: bool,
                           **kwargs: Any) -> Optional[Text]:
        """在把原始带声调的拼音按拼音风格转换前会调用 ``pre_convert_style`` 方法。
        如果返回值不为 ``None`` 会使用返回的结果代替 ``converted_pinyin``
        作为拼音风格转换后的最终拼音结果。
        :param han: 要处理的汉字
        :param orig_pinyin: 汉字对应的原始带声调拼音
        :param converted_pinyin: 按拼音风格转换处理后的拼音
        :param style: 要转换的拼音风格
        :param strict: 只获取声母或只获取韵母相关拼音风格的返回结果
                       是否严格遵照《汉语拼音方案》来处理声母和韵母，
                       详见 :ref:`strict`
        :param kwargs: 其他关键字参数，暂时无用，用于以后扩展新的参数。
        :return: ``None`` 或代替 ``converted_pinyin`` 作为拼音风格转换后的拼音结果。
        """
        pass
    def _convert_style(self,
                       han: Text,
                       pinyin: Text,
                       style: TStyle,
                       strict: bool,
                       default: Text,
                       **kwargs: Any) -> Text:
        return convert_style(pinyin, style, strict, default=default, **kwargs)
    def convert_style(self,
                      han: Text,
                      orig_pinyin: Text,
                      style: TStyle,
                      strict: bool,
                      **kwargs: Any) -> Text:
        """按 ``style`` 的值对 ``orig_pinyin`` 进行处理，返回处理后的拼音
        转换风格前会调用 ``pre_convert_style`` 方法，
        转换后会调用 ``post_convert_style`` 方法。
        :param han: 要处理的汉字
        :param orig_pinyin: 汉字对应的原始带声调拼音
        :param style: 拼音风格
        :param strict: 只获取声母或只获取韵母相关拼音风格的返回结果
                       是否严格遵照《汉语拼音方案》来处理声母和韵母，
                       详见 :ref:`strict`
        :param kwargs: 其他关键字参数，暂时无用，用于以后扩展新的参数。
        :return: 按拼音风格转换处理后的拼音
        """
        pre_data = self.pre_convert_style(
            han, orig_pinyin, style=style, strict=strict)
        if pre_data is not None:
            pinyin = pre_data
        else:
            pinyin = orig_pinyin
        converted_pinyin = self._convert_style(
            han, pinyin, style=style, strict=strict, default=pinyin)
        post_data = self.post_convert_style(
            han, pinyin, converted_pinyin, style=style, strict=strict)
        if post_data is None:
            post_data = converted_pinyin
        return post_data
    def pre_handle_nopinyin(self,
                            chars: Text,
                            style: TStyle,
                            heteronym: bool,
                            errors: TErrors,
                            strict: bool) -> TNoPinyinResult:
        """处理没有拼音的字符串前会调用 ``pre_handle_nopinyin`` 方法。
        如果返回值不为 ``None`` 会使用返回的结果作为处理没有拼音字符串的结果，
        不再使用内置方法进行处理。
        :param chars: 待处理的没有拼音的字符串
        :param errors: 如何处理
        :param heteronym: 是否需要处理多音字
        :param kwargs: 其他关键字参数，暂时无用，用于以后扩展新的参数。
        :return: ``None`` 或代替 ``chars`` 参与拼音风格转换的拼音字符串
                  或拼音结果 list。
        """
        pass
    def post_handle_nopinyin(self,
                             chars: Text,
                             style: Style,
                             heteronym: bool,
                             errors: TErrors,
                             strict: bool,
                             pinyin: TNoPinyinResult,
                             **kwargs: Any) -> TNoPinyinResult:
        """处理完没有拼音的字符串后会调用 ``post_handle_nopinyin`` 方法。
        如果返回值不为 ``None`` 会使用返回的结果作为处理没有拼音的字符串的结果。
        :param chars: 待处理的没有拼音的字符串
        :param errors: 如何处理
        :param heteronym: 是否需要处理多音字
        :param strict: 只获取声母或只获取韵母相关拼音风格的返回结果
                       是否严格遵照《汉语拼音方案》来处理声母和韵母，
                       详见 :ref:`strict`
        :param pinyin: 处理后的拼音信息，值为空 list 或包含拼音信息的 list
        :param kwargs: 其他关键字参数，暂时无用，用于以后扩展新的参数。
        :return: ``None`` 或代替 ``pinyin`` 做为处理结果。
        """
        pass
    def _convert_nopinyin_chars(self,
                                chars: Text,
                                style: TStyle,
                                heteronym: bool,
                                errors: TErrors,
                                strict: bool) -> TNoPinyinResult:
        """转换没有拼音的字符。
        """
        if callable(errors):
            return errors(chars)
        if errors == 'default':
            return chars
        elif errors == 'ignore':
            return None
        elif errors == 'replace':
            if len(chars) > 1:
                return ''.join(str('%x' % ord(x)) for x in chars)
            else:
                return str('%x' % ord(chars))
    def handle_nopinyin(self,
                        chars: Text,
                        style: TStyle,
                        heteronym: bool,
                        errors: TErrors,
                        strict: bool,
                        **kwargs: Any) -> TPinyinResult:
        """处理没有拼音的字符串。
        处理前会调用 ``pre_handle_nopinyin`` 方法，
        处理后会调用 ``post_handle_nopinyin`` 方法。
        :param chars: 待处理的没有拼音的字符串
        :param style: 拼音风格
        :param errors: 如何处理
        :param heteronym: 是否需要处理多音字
        :param strict: 只获取声母或只获取韵母相关拼音风格的返回结果
                       是否严格遵照《汉语拼音方案》来处理声母和韵母，
                       详见 :ref:`strict`
        :return: 处理后的拼音结果，如果为 ``None`` 或空 list 表示忽略这个字符串.
        :rtype: list
        """
        pre_data = self.pre_handle_nopinyin(
            chars, style, errors=errors, heteronym=heteronym, strict=strict)
        if pre_data is not None:
            py = pre_data
        else:
            pre_data = chars
            py = self._convert_nopinyin_chars(
                pre_data,
                style,
                errors=errors,
                heteronym=heteronym,
                strict=strict)
        post_data = self.post_handle_nopinyin(
            chars,
            style,
            errors=errors,
            heteronym=heteronym,
            strict=strict,
            pinyin=py)
        if post_data is not None:
            py = post_data
        if not py:
            return []
        if isinstance(py, list):
            # 包含多音字信息
            if isinstance(py[0], list):
                if heteronym:
                    return py
                # [[a, b], [c, d]]
                # [[a], [c]]
                return [[x[0]] for x in py]
            return [[i] for i in py]
        else:
            return [[py]]
 class _v2UConverter(V2UMixin, DefaultConverter):
    pass
 class _neutralToneWith5Converter(NeutralToneWith5Mixin, DefaultConverter):
    pass
 class _neutralToneWith5AndV2UConverter(NeutralToneWith5Mixin, V2UMixin,
                                       DefaultConverter):
    pass
 class _mixConverter(DefaultConverter):
    def __init__(self, v_to_u=False, neutral_tone_with_five=False, **kwargs):
        super().__init__(**kwargs)
        self._v_to_u = v_to_u
        self._neutral_tone_with_five = neutral_tone_with_five
        self._v2uconverter = _v2UConverter()
        self._neutraltonewith5converter = _neutralToneWith5Converter()
        self._neutraltonewith5andv2uconverter = \
            _neutralToneWith5AndV2UConverter()
    def post_convert_style(self, han, orig_pinyin, converted_pinyin, style,
                           strict, **kwargs):
        if self._v_to_u and not self._neutral_tone_with_five:
            return self._v2uconverter.post_convert_style(
                han, orig_pinyin, converted_pinyin, style, strict, **kwargs)
        if self._neutral_tone_with_five and not self._v_to_u:
            return self._neutraltonewith5converter.post_convert_style(
                han, orig_pinyin, converted_pinyin, style, strict, **kwargs)
        if self._neutral_tone_with_five and self._v_to_u:
            return self._neutraltonewith5andv2uconverter.post_convert_style(
                han, orig_pinyin, converted_pinyin, style, strict, **kwargs)
        return super().post_convert_style(han, orig_pinyin, converted_pinyin,
                                          style, strict, **kwargs)
--- a/third_party/python-pinyin/pypinyin/core.py
+++ b/third_party/python-pinyin/pypinyin/core.py
@ -0,0 +1,333 @@
 from itertools import chain
 from typing import Any
 from typing import Callable
 from typing import Dict
 from typing import List
 from typing import Optional
 from typing import Text
 from typing import Union
 from pypinyin.constants import PHRASES_DICT
 from pypinyin.constants import PINYIN_DICT
 from pypinyin.constants import Style
 from pypinyin.converter import _mixConverter
 from pypinyin.converter import Converter
 from pypinyin.converter import DefaultConverter
 from pypinyin.seg import mmseg
 from pypinyin.seg import simpleseg
 from pypinyin.utils import (_replace_tone2_style_dict_to_default)
 TStyle = Style
 TErrors = Union[Callable[[Text], Text], Text]
 TPinyinResult = List[List[Text]]
 def load_single_dict(pinyin_dict: Dict[int, Text], style: str='default'):
    """载入用户自定义的单字拼音库
    :param pinyin_dict: 单字拼音库。比如： ``{0x963F: u"ā,ē"}``
    :param style: pinyin_dict 参数值的拼音库风格. 支持 'default', 'tone2'
    :type pinyin_dict: dict
    """
    if style == 'tone2':
        for k, v in pinyin_dict.items():
            v = _replace_tone2_style_dict_to_default(v)
            PINYIN_DICT[k] = v
    else:
        PINYIN_DICT.update(pinyin_dict)
    mmseg.retrain(mmseg.seg)
 def load_phrases_dict(phrases_dict: Dict[Text, List[List[Text]]],
                      style: str='default'):
    """载入用户自定义的词语拼音库
    :param phrases_dict: 词语拼音库。比如： ``{u"阿爸": [[u"ā"], [u"bà"]]}``
    :param style: phrases_dict 参数值的拼音库风格. 支持 'default', 'tone2'
    :type phrases_dict: dict
    """
    if style == 'tone2':
        for k, value in phrases_dict.items():
            v = [
                list(map(_replace_tone2_style_dict_to_default, pys))
                for pys in value
            ]
            PHRASES_DICT[k] = v
    else:
        PHRASES_DICT.update(phrases_dict)
    mmseg.retrain(mmseg.seg)
 class Pinyin():
    def __init__(self, converter: Converter=None, **kwargs: Any):
        self._converter = converter or DefaultConverter()
    def pinyin(self,
               hans: Union[List[Text], Text],
               style: TStyle=Style.TONE,
               heteronym: bool=False,
               errors: TErrors='default',
               strict: bool=True,
               **kwargs: Any) -> TPinyinResult:
        """将汉字转换为拼音，返回汉字的拼音列表。
        :param hans: 汉字字符串( ``'你好吗'`` )或列表( ``['你好', '吗']`` ).
                     可以使用自己喜爱的分词模块对字符串进行分词处理,
                     只需将经过分词处理的字符串列表传进来就可以了。
        :type hans: unicode 字符串或字符串列表
        :param style: 指定拼音风格，默认是 :py:attr:`~pypinyin.Style.TONE` 风格。
                      更多拼音风格详见 :class:`~pypinyin.Style`
        :param errors: 指定如何处理没有拼音的字符。详见 :ref:`handle_no_pinyin`
                       * ``'default'``: 保留原始字符
                       * ``'ignore'``: 忽略该字符
                       * ``'replace'``: 替换为去掉 ``\\u`` 的 unicode 编码字符串
                         (``'\\u90aa'`` => ``'90aa'``)
                       * callable 对象: 回调函数之类的可调用对象。
        :param heteronym: 是否启用多音字
        :param strict: 只获取声母或只获取韵母相关拼音风格的返回结果
                       是否严格遵照《汉语拼音方案》来处理声母和韵母，
                       详见 :ref:`strict`
        :return: 拼音列表
        :rtype: list
        """
        # 对字符串进行分词处理
        if isinstance(hans, str):
            han_list = self.seg(hans)
        else:
            han_list = chain(*(self.seg(x) for x in hans))
        pys = []
        for words in han_list:
            pys.extend(
                self._converter.convert(
                    words, style, heteronym, errors, strict=strict))
        return pys
    def lazy_pinyin(self,
                    hans: Union[List[Text], Text],
                    style: TStyle=Style.NORMAL,
                    errors: TErrors='default',
                    strict: bool=True,
                    **kwargs: Any) -> List[Text]:
        """将汉字转换为拼音，返回不包含多音字结果的拼音列表.
        与 :py:func:`~pypinyin.pinyin` 的区别是每个汉字的拼音是个字符串，
        并且每个字只包含一个读音.
        :param hans: 汉字
        :type hans: unicode or list
        :param style: 指定拼音风格，默认是 :py:attr:`~pypinyin.Style.NORMAL` 风格。
                      更多拼音风格详见 :class:`~pypinyin.Style`。
        :param errors: 指定如何处理没有拼音的字符，详情请参考
                       :py:func:`~pypinyin.pinyin`
        :param strict: 只获取声母或只获取韵母相关拼音风格的返回结果
                       是否严格遵照《汉语拼音方案》来处理声母和韵母，
                       详见 :ref:`strict`
        :return: 拼音列表(e.g. ``['zhong', 'guo', 'ren']``)
        :rtype: list
        """
        return list(
            chain(*self.pinyin(
                hans,
                style=style,
                heteronym=False,
                errors=errors,
                strict=strict)))
    def pre_seg(self, hans: Text, **kwargs: Any) -> Optional[List[Text]]:
        """对字符串进行分词前将调用 ``pre_seg`` 方法对未分词的字符串做预处理。
        默认原样返回传入的 ``hans``。
        如果这个方法的返回值类型是 ``list``，表示返回的是一个分词后的结果，此时，
        ``seg`` 方法中将不再调用 ``seg_function`` 进行分词。
        :param hans: 分词前的字符串
        :return: ``None`` or ``list``
        """
        pass
    def post_seg(self, hans: Text, seg_data: List[Text],
                 **kwargs: Any) -> Optional[List[Text]]:
        """对字符串进行分词后将调用 ``post_seg`` 方法对分词后的结果做处理。
        默认原样返回传入的 ``seg_data``。
        如果这个方法的返回值类型是 ``list``，表示对分词结果做了二次处理，此时，
        ``seg`` 方法将以这个返回的数据作为返回值。
        :param hans: 分词前的字符串
        :param seg_data: 分词后的结果
        :type seg_data: list
        :return: ``None`` or ``list``
        """
        pass
    def seg(self, hans: Text, **kwargs: Any) -> List[Text]:
        """对汉字进行分词。
        分词前会调用 ``pre_seg`` 方法，分词后会调用 ``post_seg`` 方法。
        :param hans:
        :return:
        """
        pre_data = self.pre_seg(hans)
        if isinstance(pre_data, list):
            seg_data = pre_data
        else:
            seg_data = self.get_seg()(hans)
        post_data = self.post_seg(hans, seg_data)
        if isinstance(post_data, list):
            return post_data
        return seg_data
    def get_seg(self, **kwargs: Any) -> Callable[[Text], List[Text]]:
        """获取分词函数。
        :return: 分词函数
        """
        return simpleseg.seg
 _default_convert = DefaultConverter()
 _default_pinyin = Pinyin(_default_convert)
 def pinyin(hans: Union[List[Text], Text],
           style: TStyle=Style.TONE,
           heteronym: bool=False,
           errors: TErrors='default',
           strict: bool=True,
           v_to_u: bool=False,
           neutral_tone_with_five=False) -> List[List[Text]]:
    """将汉字转换为拼音，返回汉字的拼音列表。
    :param hans: 汉字字符串( ``'你好吗'`` )或列表( ``['你好', '吗']`` ).
                 可以使用自己喜爱的分词模块对字符串进行分词处理,
                 只需将经过分词处理的字符串列表传进来就可以了。
    :type hans: unicode 字符串或字符串列表
    :param style: 指定拼音风格，默认是 :py:attr:`~pypinyin.Style.TONE` 风格。
                  更多拼音风格详见 :class:`~pypinyin.Style`
    :param errors: 指定如何处理没有拼音的字符。详见 :ref:`handle_no_pinyin`
                   * ``'default'``: 保留原始字符
                   * ``'ignore'``: 忽略该字符
                   * ``'replace'``: 替换为去掉 ``\\u`` 的 unicode 编码字符串
                     (``'\\u90aa'`` => ``'90aa'``)
                   * callable 对象: 回调函数之类的可调用对象。
    :param heteronym: 是否启用多音字
    :param strict: 只获取声母或只获取韵母相关拼音风格的返回结果
                   是否严格遵照《汉语拼音方案》来处理声母和韵母，
                   详见 :ref:`strict`
    :param v_to_u: 无声调相关拼音风格下的结果是否使用 ``ü`` 代替原来的 ``v``
    :type v_to_u: bool
    :param neutral_tone_with_five: 声调使用数字表示的相关拼音风格下的结果是否
                                   使用 5 标识轻声
    :type neutral_tone_with_five: bool
    :return: 拼音列表
    :rtype: list
    :raise AssertionError: 当传入的字符串不是 unicode 字符时会抛出这个异常
    Usage::
      >>> from pypinyin import pinyin, Style
      >>> import pypinyin
      >>> pinyin('中心')
      [['zhōng'], ['xīn']]
      >>> pinyin('中心', heteronym=True)  # 启用多音字模式
      [['zhōng', 'zhòng'], ['xīn']]
      >>> pinyin('中心', style=Style.FIRST_LETTER)  # 设置拼音风格
      [['z'], ['x']]
      >>> pinyin('中心', style=Style.TONE2)
      [['zho1ng'], ['xi1n']]
      >>> pinyin('中心', style=Style.CYRILLIC)
      [['чжун1'], ['синь1']]
      >>> pinyin('战略', v_to_u=True, style=Style.NORMAL)
      [['zhan'], ['lüe']]
      >>> pinyin('衣裳', style=Style.TONE3, neutral_tone_with_five=True)
      [['yi1'], ['shang5']]
    """
    _pinyin = Pinyin(
        _mixConverter(
            v_to_u=v_to_u, neutral_tone_with_five=neutral_tone_with_five))
    return _pinyin.pinyin(
        hans, style=style, heteronym=heteronym, errors=errors, strict=strict)
 def slug(hans: Union[List[Text], Text],
         style: TStyle=Style.NORMAL,
         heteronym: bool=False,
         separator: Text='-',
         errors: TErrors='default',
         strict: bool=True) -> Text:
    """将汉字转换为拼音，然后生成 slug 字符串.
    :param hans: 汉字
    :type hans: unicode or list
    :param style: 指定拼音风格，默认是 :py:attr:`~pypinyin.Style.NORMAL` 风格。
                  更多拼音风格详见 :class:`~pypinyin.Style`
    :param heteronym: 是否启用多音字
    :param separator: 两个拼音间的分隔符/连接符
    :param errors: 指定如何处理没有拼音的字符，详情请参考
                   :py:func:`~pypinyin.pinyin`
    :param strict: 只获取声母或只获取韵母相关拼音风格的返回结果
                   是否严格遵照《汉语拼音方案》来处理声母和韵母，
                   详见 :ref:`strict`
    :return: slug 字符串.
    :raise AssertionError: 当传入的字符串不是 unicode 字符时会抛出这个异常
    ::
      >>> import pypinyin
      >>> from pypinyin import Style
      >>> pypinyin.slug('中国人')
      'zhong-guo-ren'
      >>> pypinyin.slug('中国人', separator=' ')
      'zhong guo ren'
      >>> pypinyin.slug('中国人', style=Style.FIRST_LETTER)
      'z-g-r'
      >>> pypinyin.slug('中国人', style=Style.CYRILLIC)
      'чжун1-го2-жэнь2'
    """
    return separator.join(
        chain(*_default_pinyin.pinyin(
            hans,
            style=style,
            heteronym=heteronym,
            errors=errors,
            strict=strict)))
 def lazy_pinyin(hans: Union[List[Text], Text],
                style: TStyle=Style.NORMAL,
                errors: TErrors='default',
                strict: bool=True,
                v_to_u: bool=False,
                neutral_tone_with_five: bool=False) -> List[Text]:
    """将汉字转换为拼音，返回不包含多音字结果的拼音列表.
    与 :py:func:`~pypinyin.pinyin` 的区别是返回的拼音是个字符串，
    并且每个字只包含一个读音.
    :param hans: 汉字
    :type hans: unicode or list
    :param style: 指定拼音风格，默认是 :py:attr:`~pypinyin.Style.NORMAL` 风格。
                  更多拼音风格详见 :class:`~pypinyin.Style`。
    :param errors: 指定如何处理没有拼音的字符，详情请参考
                   :py:func:`~pypinyin.pinyin`
    :param strict: 只获取声母或只获取韵母相关拼音风格的返回结果
                   是否严格遵照《汉语拼音方案》来处理声母和韵母，
                   详见 :ref:`strict`
    :param v_to_u: 无声调相关拼音风格下的结果是否使用 ``ü`` 代替原来的 ``v``
    :type v_to_u: bool
    :param neutral_tone_with_five: 声调使用数字表示的相关拼音风格下的结果是否
                                   使用 5 标识轻声
    :type neutral_tone_with_five: bool
    :return: 拼音列表(e.g. ``['zhong', 'guo', 'ren']``)
    :rtype: list
    :raise AssertionError: 当传入的字符串不是 unicode 字符时会抛出这个异常
    Usage::
      >>> from pypinyin import lazy_pinyin, Style
      >>> import pypinyin
      >>> lazy_pinyin('中心')
      ['zhong', 'xin']
      >>> lazy_pinyin('中心', style=Style.TONE)
      ['zhōng', 'xīn']
      >>> lazy_pinyin('中心', style=Style.FIRST_LETTER)
      ['z', 'x']
      >>> lazy_pinyin('中心', style=Style.TONE2)
      ['zho1ng', 'xi1n']
      >>> lazy_pinyin('中心', style=Style.CYRILLIC)
      ['чжун1', 'синь1']
      >>> lazy_pinyin('战略', v_to_u=True)
      ['zhan', 'lüe']
      >>> lazy_pinyin('衣裳', style=Style.TONE3, neutral_tone_with_five=True)
      ['yi1', 'shang5']
    """
    _pinyin = Pinyin(
        _mixConverter(
            v_to_u=v_to_u, neutral_tone_with_five=neutral_tone_with_five))
    return _pinyin.lazy_pinyin(hans, style=style, errors=errors, strict=strict)
--- a/third_party/python-pinyin/pypinyin/phonetic_symbol.py
+++ b/third_party/python-pinyin/pypinyin/phonetic_symbol.py
@ -0,0 +1,41 @@
 # 带声调字符。
 phonetic_symbol = {
    "ā": "a1",
    "á": "a2",
    "ǎ": "a3",
    "à": "a4",
    "ē": "e1",
    "é": "e2",
    "ě": "e3",
    "è": "e4",
    "ō": "o1",
    "ó": "o2",
    "ǒ": "o3",
    "ò": "o4",
    "ī": "i1",
    "í": "i2",
    "ǐ": "i3",
    "ì": "i4",
    "ū": "u1",
    "ú": "u2",
    "ǔ": "u3",
    "ù": "u4",
    # üe
    "ü": "v",
    "ǖ": "v1",
    "ǘ": "v2",
    "ǚ": "v3",
    "ǜ": "v4",
    "ń": "n2",
    "ň": "n3",
    "ǹ": "n4",
    "m̄": "m1",  # len('m̄') == 2
    "ḿ": "m2",
    "m̀": "m4",  # len("m̀") == 2
    "ê̄": "ê1",  # len('ê̄') == 2
    "ế": "ê2",
    "ê̌": "ê3",  # len('ê̌') == 2
    "ề": "ê4",
 }
 phonetic_symbol_reverse = dict((v, k) for k, v in phonetic_symbol.items())
--- a/Show More
+++ b/Show More
		`@ -0,0 +1,2 @@`
							`include README.rst LICENSE.txt CHANGELOG.rst`
							`recursive-include pypinyin *.pyi py.typed`