parent
e94da615a3
commit
ab7aa43d15
@ -0,0 +1,6 @@
|
|||||||
|
[bumpversion]
|
||||||
|
commit = True
|
||||||
|
tag = True
|
||||||
|
current_version = 0.41.0
|
||||||
|
|
||||||
|
[bumpversion:file:pypinyin/__init__.py]
|
@ -0,0 +1,153 @@
|
|||||||
|
# Python CircleCI 2.0 configuration file
|
||||||
|
#
|
||||||
|
# Check https://circleci.com/docs/2.0/language-python/ for more details
|
||||||
|
#
|
||||||
|
version: 2
|
||||||
|
jobs:
|
||||||
|
python3.8: &DEFAULT
|
||||||
|
docker:
|
||||||
|
- image: circleci/python:3.8
|
||||||
|
|
||||||
|
environment:
|
||||||
|
TOX_ENV: py38
|
||||||
|
RUN_CHECK: 1
|
||||||
|
|
||||||
|
working_directory: ~/repo
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
|
||||||
|
# Download and cache dependencies
|
||||||
|
# - restore_cache:
|
||||||
|
# keys:
|
||||||
|
# - v1-dependencies-{{ .Environment.TOX_ENV }}-{{ checksum "requirements_dev.txt" }}
|
||||||
|
|
||||||
|
- run:
|
||||||
|
name: install dependencies
|
||||||
|
command: |
|
||||||
|
# pip install -U pip virtualenv --user
|
||||||
|
if ! which virtualenv; then
|
||||||
|
pip install 'virtualenv<=20.0.21' --user
|
||||||
|
fi
|
||||||
|
export PATH="~/.local/bin:$PATH"
|
||||||
|
|
||||||
|
virtualenv venv
|
||||||
|
. venv/bin/activate
|
||||||
|
|
||||||
|
pip install codecov
|
||||||
|
pip install tox
|
||||||
|
|
||||||
|
if [[ $RUN_CHECK == 1 ]]; then
|
||||||
|
pip install -U -r requirements_dev.txt
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ $(python -c "import sys; print(sys.stdin.encoding)" |grep None) ]]; then
|
||||||
|
export PYTHONIOENCODING=utf-8
|
||||||
|
fi
|
||||||
|
#
|
||||||
|
# - save_cache:
|
||||||
|
# paths:
|
||||||
|
# - ./venv
|
||||||
|
# key: v1-dependencies-{{ .Environment.TOX_ENV }}-{{ checksum "requirements_dev.txt" }}
|
||||||
|
|
||||||
|
- run:
|
||||||
|
name: run tests
|
||||||
|
command: |
|
||||||
|
. venv/bin/activate
|
||||||
|
|
||||||
|
if [[ $RUN_CHECK == 1 ]]; then
|
||||||
|
pre-commit run --all-files
|
||||||
|
mypy pypinyin
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
tox -e $TOX_ENV
|
||||||
|
|
||||||
|
python setup.py install
|
||||||
|
pypinyin hello
|
||||||
|
echo hello | pypinyin
|
||||||
|
pypinyin < setup.cfg
|
||||||
|
|
||||||
|
codecov
|
||||||
|
|
||||||
|
- store_artifacts:
|
||||||
|
path: test-reports
|
||||||
|
destination: test-reports
|
||||||
|
|
||||||
|
python3.9:
|
||||||
|
<<: *DEFAULT
|
||||||
|
docker:
|
||||||
|
- image: circleci/python:3.9
|
||||||
|
environment:
|
||||||
|
TOX_ENV: py39
|
||||||
|
|
||||||
|
python3.7:
|
||||||
|
<<: *DEFAULT
|
||||||
|
docker:
|
||||||
|
- image: circleci/python:3.7
|
||||||
|
environment:
|
||||||
|
TOX_ENV: py37
|
||||||
|
|
||||||
|
python3.6:
|
||||||
|
<<: *DEFAULT
|
||||||
|
docker:
|
||||||
|
- image: circleci/python:3.6
|
||||||
|
environment:
|
||||||
|
TOX_ENV: py36
|
||||||
|
|
||||||
|
python3.5:
|
||||||
|
<<: *DEFAULT
|
||||||
|
docker:
|
||||||
|
- image: circleci/python:3.5
|
||||||
|
environment:
|
||||||
|
TOX_ENV: py35
|
||||||
|
|
||||||
|
python3.4:
|
||||||
|
<<: *DEFAULT
|
||||||
|
docker:
|
||||||
|
- image: circleci/python:3.4
|
||||||
|
environment:
|
||||||
|
TOX_ENV: py34
|
||||||
|
|
||||||
|
python2.7:
|
||||||
|
<<: *DEFAULT
|
||||||
|
docker:
|
||||||
|
- image: circleci/python:2.7
|
||||||
|
environment:
|
||||||
|
TOX_ENV: py27
|
||||||
|
|
||||||
|
# python2.6:
|
||||||
|
# <<: *DEFAULT
|
||||||
|
# docker:
|
||||||
|
# - image: python:2.6
|
||||||
|
# environment:
|
||||||
|
# TOX_ENV: py33
|
||||||
|
|
||||||
|
pypy2:
|
||||||
|
<<: *DEFAULT
|
||||||
|
docker:
|
||||||
|
- image: pypy:2
|
||||||
|
environment:
|
||||||
|
TOX_ENV: pypy
|
||||||
|
|
||||||
|
pypy3:
|
||||||
|
<<: *DEFAULT
|
||||||
|
docker:
|
||||||
|
- image: pypy:3
|
||||||
|
environment:
|
||||||
|
TOX_ENV: pypy3
|
||||||
|
|
||||||
|
|
||||||
|
workflows:
|
||||||
|
version: 2
|
||||||
|
testing:
|
||||||
|
jobs:
|
||||||
|
- python3.9
|
||||||
|
- python3.8
|
||||||
|
- python3.7
|
||||||
|
- python3.6
|
||||||
|
- python3.5
|
||||||
|
- python3.4
|
||||||
|
- python2.7
|
||||||
|
- pypy2
|
||||||
|
- pypy3
|
@ -0,0 +1,15 @@
|
|||||||
|
[run]
|
||||||
|
branch = True
|
||||||
|
omit =
|
||||||
|
# pypinyin/runner.py
|
||||||
|
pypinyin/__main__.py
|
||||||
|
|
||||||
|
[report]
|
||||||
|
exclude_lines =
|
||||||
|
pragma: no cover
|
||||||
|
except NameError
|
||||||
|
except ImportError
|
||||||
|
pass
|
||||||
|
def main
|
||||||
|
if py3:
|
||||||
|
if __name__ == .__main__.:
|
@ -0,0 +1,25 @@
|
|||||||
|
# EditorConfig is awesome: http://EditorConfig.org
|
||||||
|
|
||||||
|
# top-most EditorConfig file
|
||||||
|
root = true
|
||||||
|
|
||||||
|
# Unix-style newlines with a newline ending every file
|
||||||
|
[*]
|
||||||
|
charset = utf-8
|
||||||
|
end_of_line = lf
|
||||||
|
insert_final_newline = true
|
||||||
|
trim_trailing_whitespace = true
|
||||||
|
|
||||||
|
# Indentiation
|
||||||
|
[*.{py,rst}]
|
||||||
|
indent_style = space
|
||||||
|
indent_size = 4
|
||||||
|
[Makefile]
|
||||||
|
indent_style = tab
|
||||||
|
indent_size = 4
|
||||||
|
[*.{ini,yml}]
|
||||||
|
indent_style = space
|
||||||
|
indent_size = 2
|
||||||
|
|
||||||
|
[*.md]
|
||||||
|
trim_trailing_whitespace = false
|
@ -0,0 +1,50 @@
|
|||||||
|
[flake8]
|
||||||
|
|
||||||
|
########## OPTIONS ##########
|
||||||
|
# Set the maximum length that any line (with some exceptions) may be.
|
||||||
|
max-line-length = 120
|
||||||
|
|
||||||
|
|
||||||
|
################### FILE PATTERNS ##########################
|
||||||
|
# Provide a comma-separated list of glob patterns to exclude from checks.
|
||||||
|
exclude =
|
||||||
|
# git folder
|
||||||
|
.git,
|
||||||
|
# python cache
|
||||||
|
__pycache__,
|
||||||
|
third_party/,
|
||||||
|
# Provide a comma-separate list of glob patterns to include for checks.
|
||||||
|
filename =
|
||||||
|
*.py
|
||||||
|
|
||||||
|
|
||||||
|
########## RULES ##########
|
||||||
|
|
||||||
|
# ERROR CODES
|
||||||
|
#
|
||||||
|
# E/W - PEP8 errors/warnings (pycodestyle)
|
||||||
|
# F - linting errors (pyflakes)
|
||||||
|
# C - McCabe complexity error (mccabe)
|
||||||
|
#
|
||||||
|
# W503 - line break before binary operator
|
||||||
|
|
||||||
|
# Specify a list of codes to ignore.
|
||||||
|
ignore =
|
||||||
|
W503
|
||||||
|
E252,E262,E127,E265,E126,E266,E241,E261,E128,E125
|
||||||
|
W291,W293,W605
|
||||||
|
E203,E305,E402,E501,E721,E741,F403,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303,
|
||||||
|
# shebang has extra meaning in fbcode lints, so I think it's not worth trying
|
||||||
|
# to line this up with executable bit
|
||||||
|
EXE001,
|
||||||
|
# these ignores are from flake8-bugbear; please fix!
|
||||||
|
B007,B008,
|
||||||
|
# these ignores are from flake8-comprehensions; please fix!
|
||||||
|
C400,C401,C402,C403,C404,C405,C407,C411,C413,C414,C415
|
||||||
|
|
||||||
|
# Specify the list of error codes you wish Flake8 to report.
|
||||||
|
select =
|
||||||
|
E,
|
||||||
|
W,
|
||||||
|
F,
|
||||||
|
C
|
@ -0,0 +1,16 @@
|
|||||||
|
# Contributing
|
||||||
|
|
||||||
|
|
||||||
|
* 如果是关于单个汉字的拼音有误的问题,麻烦前往 [pinyin-data][pinyin-data] 进行反馈。
|
||||||
|
* 如果是关于词组的拼音有误的问题,麻烦前往 [phrase-pinyin-data][phrase-pinyin-data] 进行反馈。
|
||||||
|
* 有任何疑问或建议欢迎创建 [issue][issue] 或提交 [PR][pr] 。
|
||||||
|
* 项目代码开发方面的问题可以看看 [开发文档][开发文档] 。
|
||||||
|
|
||||||
|
Thanks for contributing! :heart:
|
||||||
|
|
||||||
|
|
||||||
|
[pinyin-data]: https://github.com/mozillazg/pinyin-data/issues
|
||||||
|
[phrase-pinyin-data]: https://github.com/mozillazg/phrase-pinyin-data
|
||||||
|
[issue]: https://github.com/mozillazg/python-pinyin/issues
|
||||||
|
[pr]: https://github.com/mozillazg/python-pinyin/pulls
|
||||||
|
[开发文档]: https://pypinyin.readthedocs.io/zh_CN/develop/develop.html
|
@ -0,0 +1,15 @@
|
|||||||
|
## PR 描述
|
||||||
|
|
||||||
|
|
||||||
|
## 待办事项
|
||||||
|
|
||||||
|
* [ ] 符合代码规范
|
||||||
|
* [ ] 单元测试
|
||||||
|
* [ ] 文档
|
||||||
|
|
||||||
|
|
||||||
|
<!--
|
||||||
|
感谢你的贡献!❤️
|
||||||
|
|
||||||
|
P.S. 麻烦选择 `develop` 分支作为 PR 的目标分支,谢谢~
|
||||||
|
-->
|
@ -0,0 +1,29 @@
|
|||||||
|
# This workflow will install Python dependencies, run tests and lint with a single version of Python
|
||||||
|
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
|
||||||
|
|
||||||
|
name: CI
|
||||||
|
|
||||||
|
on: [push, pull_request]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os: [windows-latest]
|
||||||
|
# python-version: [3.7, 3.8]
|
||||||
|
python-version: [3.9]
|
||||||
|
tox-env: [py37, py38, py39]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v2
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install tox
|
||||||
|
- name: Test with tox
|
||||||
|
run: tox -e ${{ matrix.tox-env}}
|
@ -0,0 +1,54 @@
|
|||||||
|
*.py[cod]
|
||||||
|
*.sw[op]
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Packages
|
||||||
|
*.egg
|
||||||
|
*.egg-info
|
||||||
|
dist
|
||||||
|
build
|
||||||
|
eggs
|
||||||
|
parts
|
||||||
|
bin
|
||||||
|
var
|
||||||
|
sdist
|
||||||
|
develop-eggs
|
||||||
|
.installed.cfg
|
||||||
|
lib
|
||||||
|
lib64
|
||||||
|
_build
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
.coverage
|
||||||
|
.tox
|
||||||
|
nosetests.xml
|
||||||
|
htmlcov
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
|
||||||
|
# Mr Developer
|
||||||
|
.mr.developer.cfg
|
||||||
|
.project
|
||||||
|
.pydevproject
|
||||||
|
|
||||||
|
tools/words.txt
|
||||||
|
*~
|
||||||
|
tools/phrases_dict.txt
|
||||||
|
venv
|
||||||
|
.cache/
|
||||||
|
2.7/
|
||||||
|
.python-version
|
||||||
|
venv2.7/
|
||||||
|
venvPyInstaller/
|
||||||
|
output.dat
|
||||||
|
vocab.bin
|
||||||
|
vocab.large.bin
|
||||||
|
.mypy_cache/
|
||||||
|
.pytest_cache/
|
||||||
|
/pypinyin/phrases_dict_large.py
|
@ -0,0 +1,6 @@
|
|||||||
|
[submodule "pinyin-data"]
|
||||||
|
path = pinyin-data
|
||||||
|
url = https://github.com/mozillazg/pinyin-data.git
|
||||||
|
[submodule "phrase-pinyin-data"]
|
||||||
|
path = phrase-pinyin-data
|
||||||
|
url = https://github.com/mozillazg/phrase-pinyin-data.git
|
@ -0,0 +1,29 @@
|
|||||||
|
repos:
|
||||||
|
- repo: https://github.com/pre-commit/pre-commit-hooks.git
|
||||||
|
rev: v3.4.0
|
||||||
|
hooks:
|
||||||
|
- id: check-merge-conflict
|
||||||
|
- id: debug-statements
|
||||||
|
exclude: 'tools/|(pypinyin/(phrases_dict.py|pinyin_dict.py|phonetic_symbol.py))'
|
||||||
|
- id: double-quote-string-fixer
|
||||||
|
exclude: 'pypinyin/(phrases_dict.py|pinyin_dict.py|phonetic_symbol.py)'
|
||||||
|
- id: end-of-file-fixer
|
||||||
|
exclude: '.bumpversion.cfg'
|
||||||
|
- id: requirements-txt-fixer
|
||||||
|
- id: trailing-whitespace
|
||||||
|
- repo: https://gitlab.com/pycqa/flake8
|
||||||
|
rev: 3.8.4
|
||||||
|
hooks:
|
||||||
|
- id: flake8
|
||||||
|
exclude: 'tools|pypinyin/(phrases_dict.py|pinyin_dict.py|phonetic_symbol.py)|(docs/conf.py)'
|
||||||
|
# - repo: https://github.com/pre-commit/mirrors-mypy
|
||||||
|
# rev: 'v0.812'
|
||||||
|
# hooks:
|
||||||
|
# - id: mypy
|
||||||
|
# files: 'pypinyin/'
|
||||||
|
- repo: https://github.com/pre-commit/mirrors-yapf.git
|
||||||
|
sha: v0.16.0
|
||||||
|
hooks:
|
||||||
|
- id: yapf
|
||||||
|
files: \.py$
|
||||||
|
exclude: (?=phrase-pinyin-data|pinyin-data).*(\.py)$
|
@ -0,0 +1,3 @@
|
|||||||
|
[style]
|
||||||
|
based_on_style = pep8
|
||||||
|
column_limit = 80
|
@ -0,0 +1,12 @@
|
|||||||
|
{
|
||||||
|
"scanSettings": {
|
||||||
|
"baseBranches": []
|
||||||
|
},
|
||||||
|
"checkRunSettings": {
|
||||||
|
"vulnerableCheckRunConclusionLevel": "failure",
|
||||||
|
"displayMode": "diff"
|
||||||
|
},
|
||||||
|
"issueSettings": {
|
||||||
|
"minSeverityLevel": "LOW"
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,46 @@
|
|||||||
|
# Contributor Covenant Code of Conduct
|
||||||
|
|
||||||
|
## Our Pledge
|
||||||
|
|
||||||
|
In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
|
||||||
|
|
||||||
|
## Our Standards
|
||||||
|
|
||||||
|
Examples of behavior that contributes to creating a positive environment include:
|
||||||
|
|
||||||
|
* Using welcoming and inclusive language
|
||||||
|
* Being respectful of differing viewpoints and experiences
|
||||||
|
* Gracefully accepting constructive criticism
|
||||||
|
* Focusing on what is best for the community
|
||||||
|
* Showing empathy towards other community members
|
||||||
|
|
||||||
|
Examples of unacceptable behavior by participants include:
|
||||||
|
|
||||||
|
* The use of sexualized language or imagery and unwelcome sexual attention or advances
|
||||||
|
* Trolling, insulting/derogatory comments, and personal or political attacks
|
||||||
|
* Public or private harassment
|
||||||
|
* Publishing others' private information, such as a physical or electronic address, without explicit permission
|
||||||
|
* Other conduct which could reasonably be considered inappropriate in a professional setting
|
||||||
|
|
||||||
|
## Our Responsibilities
|
||||||
|
|
||||||
|
Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
|
||||||
|
|
||||||
|
Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
|
||||||
|
|
||||||
|
## Enforcement
|
||||||
|
|
||||||
|
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at mozillazg101@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
|
||||||
|
|
||||||
|
Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
|
||||||
|
|
||||||
|
## Attribution
|
||||||
|
|
||||||
|
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
|
||||||
|
|
||||||
|
[homepage]: http://contributor-covenant.org
|
||||||
|
[version]: http://contributor-covenant.org/version/1/4/
|
@ -0,0 +1,20 @@
|
|||||||
|
The MIT License (MIT)
|
||||||
|
|
||||||
|
Copyright (c) 2016 mozillazg, 闲耘 <hotoo.cn@gmail.com>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
|
the Software without restriction, including without limitation the rights to
|
||||||
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
@ -0,0 +1,2 @@
|
|||||||
|
include README.rst LICENSE.txt CHANGELOG.rst
|
||||||
|
recursive-include pypinyin *.pyi py.typed
|
@ -0,0 +1,97 @@
|
|||||||
|
help:
|
||||||
|
@echo "test run test"
|
||||||
|
@echo "publish publish to PyPI"
|
||||||
|
@echo "publish_test publish to TestPyPI"
|
||||||
|
@echo "docs_html make html docs"
|
||||||
|
@echo "docs_serve serve docs"
|
||||||
|
@echo "gen_data gen pinyin data"
|
||||||
|
@echo "gen_pinyin_dict gen single hanzi pinyin dict"
|
||||||
|
@echo "gen_phrases_dict gen phrase hanzi pinyin dict"
|
||||||
|
@echo "lint run lint"
|
||||||
|
@echo "clean - remove all build, test, coverage and Python artifacts"
|
||||||
|
@echo "clean-build - remove build artifacts"
|
||||||
|
@echo "clean-pyc - remove Python file artifacts"
|
||||||
|
@echo "clean-test - remove test and coverage artifacts"
|
||||||
|
|
||||||
|
.PHONY: test
|
||||||
|
test: lint
|
||||||
|
@echo "run test"
|
||||||
|
make testonly
|
||||||
|
|
||||||
|
.PHONY: testonly
|
||||||
|
testonly:
|
||||||
|
py.test --random-order --cov pypinyin tests/ pypinyin/
|
||||||
|
|
||||||
|
.PHONY: publish
|
||||||
|
publish: clean
|
||||||
|
@echo "publish to pypi"
|
||||||
|
python setup.py sdist
|
||||||
|
python setup.py bdist_wheel
|
||||||
|
twine upload dist/*
|
||||||
|
|
||||||
|
.PHONY: publish_test
|
||||||
|
publish_test: clean
|
||||||
|
@echo "publish to test pypi"
|
||||||
|
python setup.py sdist
|
||||||
|
python setup.py bdist_wheel
|
||||||
|
twine upload --repository test dist/*
|
||||||
|
|
||||||
|
.PHONY: docs_html
|
||||||
|
docs_html:
|
||||||
|
cd docs && make html
|
||||||
|
|
||||||
|
.PHONY: docs_serve
|
||||||
|
docs_serve: docs_html
|
||||||
|
cd docs/_build/html && python -m http.server
|
||||||
|
|
||||||
|
.PHONY: gen_data
|
||||||
|
gen_data: gen_pinyin_dict gen_phrases_dict
|
||||||
|
|
||||||
|
.PHONY: gen_pinyin_dict
|
||||||
|
gen_pinyin_dict:
|
||||||
|
python gen_pinyin_dict.py pinyin-data/pinyin.txt pypinyin/pinyin_dict.py
|
||||||
|
|
||||||
|
.PHONY: gen_phrases_dict
|
||||||
|
gen_phrases_dict:
|
||||||
|
python gen_phrases_dict.py phrase-pinyin-data/pinyin.txt pypinyin/phrases_dict_large.py
|
||||||
|
python tidy_phrases_dict.py
|
||||||
|
|
||||||
|
.PHONY: lint
|
||||||
|
lint:
|
||||||
|
pre-commit run --all-files
|
||||||
|
mypy --strict pypinyin
|
||||||
|
|
||||||
|
clean: clean-build clean-pyc clean-test
|
||||||
|
|
||||||
|
clean-build:
|
||||||
|
rm -fr build/
|
||||||
|
rm -fr dist/
|
||||||
|
rm -fr .eggs/
|
||||||
|
find . -name '*.egg-info' -exec rm -fr {} +
|
||||||
|
find . -name '*.egg' -exec rm -f {} +
|
||||||
|
|
||||||
|
clean-pyc:
|
||||||
|
find . -name '*.pyc' -exec rm -f {} +
|
||||||
|
find . -name '*.pyo' -exec rm -f {} +
|
||||||
|
find . -name '*~' -exec rm -f {} +
|
||||||
|
find . -name '__pycache__' -exec rm -fr {} +
|
||||||
|
|
||||||
|
clean-test:
|
||||||
|
rm -fr .tox/
|
||||||
|
rm -f .coverage
|
||||||
|
rm -fr htmlcov/
|
||||||
|
|
||||||
|
rebase_master:
|
||||||
|
git fetch origin && git rebase origin/master
|
||||||
|
|
||||||
|
merge_dev:
|
||||||
|
git merge --no-ff origin/develop
|
||||||
|
|
||||||
|
bump_patch:
|
||||||
|
bumpversion --verbose patch
|
||||||
|
|
||||||
|
bump_minor:
|
||||||
|
bumpversion --verbose minor
|
||||||
|
|
||||||
|
start_next:
|
||||||
|
git push && git push --tags && git checkout develop && git rebase master && git push
|
@ -0,0 +1,8 @@
|
|||||||
|
# Modify from
|
||||||
|
* [python-pinyin](https://github.com/mozillazg/python-pinyin.git)
|
||||||
|
commit: 55e524aa1b7b8eec3d15c5306043c6cdd5938b03
|
||||||
|
licence: MIT
|
||||||
|
|
||||||
|
## Features
|
||||||
|
* only support py3
|
||||||
|
* remove pyi
|
@ -0,0 +1 @@
|
|||||||
|
../CHANGELOG.rst
|
@ -0,0 +1,177 @@
|
|||||||
|
# Makefile for Sphinx documentation
|
||||||
|
#
|
||||||
|
|
||||||
|
# You can set these variables from the command line.
|
||||||
|
SPHINXOPTS =
|
||||||
|
SPHINXBUILD = sphinx-build
|
||||||
|
PAPER =
|
||||||
|
BUILDDIR = _build
|
||||||
|
|
||||||
|
# User-friendly check for sphinx-build
|
||||||
|
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
|
||||||
|
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
|
||||||
|
endif
|
||||||
|
|
||||||
|
# Internal variables.
|
||||||
|
PAPEROPT_a4 = -D latex_paper_size=a4
|
||||||
|
PAPEROPT_letter = -D latex_paper_size=letter
|
||||||
|
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
||||||
|
# the i18n builder cannot share the environment and doctrees with the others
|
||||||
|
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
||||||
|
|
||||||
|
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
|
||||||
|
|
||||||
|
help:
|
||||||
|
@echo "Please use \`make <target>' where <target> is one of"
|
||||||
|
@echo " html to make standalone HTML files"
|
||||||
|
@echo " dirhtml to make HTML files named index.html in directories"
|
||||||
|
@echo " singlehtml to make a single large HTML file"
|
||||||
|
@echo " pickle to make pickle files"
|
||||||
|
@echo " json to make JSON files"
|
||||||
|
@echo " htmlhelp to make HTML files and a HTML help project"
|
||||||
|
@echo " qthelp to make HTML files and a qthelp project"
|
||||||
|
@echo " devhelp to make HTML files and a Devhelp project"
|
||||||
|
@echo " epub to make an epub"
|
||||||
|
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
|
||||||
|
@echo " latexpdf to make LaTeX files and run them through pdflatex"
|
||||||
|
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
|
||||||
|
@echo " text to make text files"
|
||||||
|
@echo " man to make manual pages"
|
||||||
|
@echo " texinfo to make Texinfo files"
|
||||||
|
@echo " info to make Texinfo files and run them through makeinfo"
|
||||||
|
@echo " gettext to make PO message catalogs"
|
||||||
|
@echo " changes to make an overview of all changed/added/deprecated items"
|
||||||
|
@echo " xml to make Docutils-native XML files"
|
||||||
|
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
|
||||||
|
@echo " linkcheck to check all external links for integrity"
|
||||||
|
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -rf $(BUILDDIR)/*
|
||||||
|
|
||||||
|
html:
|
||||||
|
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
|
||||||
|
|
||||||
|
dirhtml:
|
||||||
|
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
|
||||||
|
|
||||||
|
singlehtml:
|
||||||
|
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
|
||||||
|
|
||||||
|
pickle:
|
||||||
|
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
|
||||||
|
@echo
|
||||||
|
@echo "Build finished; now you can process the pickle files."
|
||||||
|
|
||||||
|
json:
|
||||||
|
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
|
||||||
|
@echo
|
||||||
|
@echo "Build finished; now you can process the JSON files."
|
||||||
|
|
||||||
|
htmlhelp:
|
||||||
|
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
|
||||||
|
@echo
|
||||||
|
@echo "Build finished; now you can run HTML Help Workshop with the" \
|
||||||
|
".hhp project file in $(BUILDDIR)/htmlhelp."
|
||||||
|
|
||||||
|
qthelp:
|
||||||
|
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
|
||||||
|
@echo
|
||||||
|
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
|
||||||
|
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
|
||||||
|
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/BaiduPCS.qhcp"
|
||||||
|
@echo "To view the help file:"
|
||||||
|
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/BaiduPCS.qhc"
|
||||||
|
|
||||||
|
devhelp:
|
||||||
|
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
|
||||||
|
@echo
|
||||||
|
@echo "Build finished."
|
||||||
|
@echo "To view the help file:"
|
||||||
|
@echo "# mkdir -p $$HOME/.local/share/devhelp/BaiduPCS"
|
||||||
|
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/BaiduPCS"
|
||||||
|
@echo "# devhelp"
|
||||||
|
|
||||||
|
epub:
|
||||||
|
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
|
||||||
|
|
||||||
|
latex:
|
||||||
|
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||||
|
@echo
|
||||||
|
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
|
||||||
|
@echo "Run \`make' in that directory to run these through (pdf)latex" \
|
||||||
|
"(use \`make latexpdf' here to do that automatically)."
|
||||||
|
|
||||||
|
latexpdf:
|
||||||
|
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||||
|
@echo "Running LaTeX files through pdflatex..."
|
||||||
|
$(MAKE) -C $(BUILDDIR)/latex all-pdf
|
||||||
|
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
|
||||||
|
|
||||||
|
latexpdfja:
|
||||||
|
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||||
|
@echo "Running LaTeX files through platex and dvipdfmx..."
|
||||||
|
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
|
||||||
|
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
|
||||||
|
|
||||||
|
text:
|
||||||
|
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The text files are in $(BUILDDIR)/text."
|
||||||
|
|
||||||
|
man:
|
||||||
|
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
|
||||||
|
|
||||||
|
texinfo:
|
||||||
|
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
|
||||||
|
@echo "Run \`make' in that directory to run these through makeinfo" \
|
||||||
|
"(use \`make info' here to do that automatically)."
|
||||||
|
|
||||||
|
info:
|
||||||
|
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
|
||||||
|
@echo "Running Texinfo files through makeinfo..."
|
||||||
|
make -C $(BUILDDIR)/texinfo info
|
||||||
|
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
|
||||||
|
|
||||||
|
gettext:
|
||||||
|
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
|
||||||
|
|
||||||
|
changes:
|
||||||
|
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
|
||||||
|
@echo
|
||||||
|
@echo "The overview file is in $(BUILDDIR)/changes."
|
||||||
|
|
||||||
|
linkcheck:
|
||||||
|
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
|
||||||
|
@echo
|
||||||
|
@echo "Link check complete; look for any errors in the above output " \
|
||||||
|
"or in $(BUILDDIR)/linkcheck/output.txt."
|
||||||
|
|
||||||
|
doctest:
|
||||||
|
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
|
||||||
|
@echo "Testing of doctests in the sources finished, look at the " \
|
||||||
|
"results in $(BUILDDIR)/doctest/output.txt."
|
||||||
|
|
||||||
|
xml:
|
||||||
|
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
|
||||||
|
|
||||||
|
pseudoxml:
|
||||||
|
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
|
@ -0,0 +1,43 @@
|
|||||||
|
API
|
||||||
|
====
|
||||||
|
|
||||||
|
.. _style:
|
||||||
|
|
||||||
|
拼音风格
|
||||||
|
-----------
|
||||||
|
|
||||||
|
.. autoclass:: pypinyin.Style
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:member-order: bysource
|
||||||
|
|
||||||
|
|
||||||
|
.. _core_api:
|
||||||
|
|
||||||
|
核心 API
|
||||||
|
-------------
|
||||||
|
|
||||||
|
.. autofunction:: pypinyin.pinyin
|
||||||
|
|
||||||
|
.. autofunction:: pypinyin.lazy_pinyin
|
||||||
|
|
||||||
|
.. autofunction:: pypinyin.load_single_dict
|
||||||
|
|
||||||
|
.. autofunction:: pypinyin.load_phrases_dict
|
||||||
|
|
||||||
|
.. autofunction:: pypinyin.slug
|
||||||
|
|
||||||
|
|
||||||
|
.. _convert_style:
|
||||||
|
|
||||||
|
|
||||||
|
注册新的拼音风格
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
.. autofunction:: pypinyin.style.register
|
||||||
|
|
||||||
|
|
||||||
|
.. _seg:
|
||||||
|
|
||||||
|
|
||||||
|
.. _#27: https://github.com/mozillazg/python-pinyin/issues/27
|
@ -0,0 +1,309 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# pypinyin documentation build configuration file, created by
|
||||||
|
# sphinx-quickstart on Fri Sep 06 22:22:13 2013.
|
||||||
|
#
|
||||||
|
# This file is execfile()d with the current directory set to its containing dir.
|
||||||
|
#
|
||||||
|
# Note that not all possible configuration values are present in this
|
||||||
|
# autogenerated file.
|
||||||
|
#
|
||||||
|
# All configuration values have a default; values that are commented out
|
||||||
|
# serve to show the default.
|
||||||
|
|
||||||
|
import sys, os
|
||||||
|
|
||||||
|
# If extensions (or modules to document with autodoc) are in another directory,
|
||||||
|
# add these directories to sys.path here. If the directory is relative to the
|
||||||
|
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||||
|
#sys.path.insert(0, os.path.abspath('.'))
|
||||||
|
sys.path.insert(0, os.path.abspath('.'))
|
||||||
|
sys.path.insert(0, os.path.abspath('..'))
|
||||||
|
# sys.path.insert(0, os.path.abspath('../pypinyin'))
|
||||||
|
|
||||||
|
# -- General configuration -----------------------------------------------------
|
||||||
|
|
||||||
|
# If your documentation needs a minimal Sphinx version, state it here.
|
||||||
|
#needs_sphinx = '1.0'
|
||||||
|
|
||||||
|
# Add any Sphinx extension module names here, as strings. They can be extensions
|
||||||
|
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
|
||||||
|
extensions = [
|
||||||
|
'sphinx.ext.autodoc',
|
||||||
|
'sphinx.ext.viewcode',
|
||||||
|
'sphinx.ext.extlinks',
|
||||||
|
'sphinx.ext.todo',
|
||||||
|
]
|
||||||
|
|
||||||
|
# Add any paths that contain templates here, relative to this directory.
|
||||||
|
templates_path = ['_templates']
|
||||||
|
|
||||||
|
# The suffix of source filenames.
|
||||||
|
source_suffix = '.rst'
|
||||||
|
|
||||||
|
# The encoding of source files.
|
||||||
|
#source_encoding = 'utf-8-sig'
|
||||||
|
|
||||||
|
# The master toctree document.
|
||||||
|
master_doc = 'index'
|
||||||
|
|
||||||
|
import pypinyin
|
||||||
|
|
||||||
|
# General information about the project.
|
||||||
|
project = pypinyin.__title__
|
||||||
|
copyright = pypinyin.__copyright__
|
||||||
|
|
||||||
|
# The version info for the project you're documenting, acts as replacement for
|
||||||
|
# |version| and |release|, also used in various other places throughout the
|
||||||
|
# built documents.
|
||||||
|
#
|
||||||
|
# The short X.Y version.
|
||||||
|
version = pypinyin.__version__
|
||||||
|
# The full version, including alpha/beta/rc tags.
|
||||||
|
release = pypinyin.__version__
|
||||||
|
|
||||||
|
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||||
|
# for a list of supported languages.
|
||||||
|
language = 'zh_CN'
|
||||||
|
|
||||||
|
# There are two options for replacing |today|: either, you set today to some
|
||||||
|
# non-false value, then it is used:
|
||||||
|
#today = ''
|
||||||
|
# Else, today_fmt is used as the format for a strftime call.
|
||||||
|
#today_fmt = '%B %d, %Y'
|
||||||
|
|
||||||
|
# List of patterns, relative to source directory, that match files and
|
||||||
|
# directories to ignore when looking for source files.
|
||||||
|
exclude_patterns = ['_build']
|
||||||
|
|
||||||
|
# The reST default role (used for this markup: `text`) to use for all documents.
|
||||||
|
#default_role = None
|
||||||
|
|
||||||
|
# If true, '()' will be appended to :func: etc. cross-reference text.
|
||||||
|
#add_function_parentheses = True
|
||||||
|
|
||||||
|
# If true, the current module name will be prepended to all description
|
||||||
|
# unit titles (such as .. function::).
|
||||||
|
#add_module_names = True
|
||||||
|
|
||||||
|
# If true, sectionauthor and moduleauthor directives will be shown in the
|
||||||
|
# output. They are ignored by default.
|
||||||
|
#show_authors = False
|
||||||
|
|
||||||
|
# The name of the Pygments (syntax highlighting) style to use.
|
||||||
|
pygments_style = 'sphinx'
|
||||||
|
|
||||||
|
# A list of ignored prefixes for module index sorting.
|
||||||
|
#modindex_common_prefix = []
|
||||||
|
|
||||||
|
# If true, keep warnings as "system message" paragraphs in the built documents.
|
||||||
|
#keep_warnings = False
|
||||||
|
|
||||||
|
# -- Options for HTML output ---------------------------------------------------
|
||||||
|
|
||||||
|
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||||
|
# a list of builtin themes.
|
||||||
|
html_theme = 'nature'
|
||||||
|
|
||||||
|
# Theme options are theme-specific and customize the look and feel of a theme
|
||||||
|
# further. For a list of options available for each theme, see the
|
||||||
|
# documentation.
|
||||||
|
#html_theme_options = {}
|
||||||
|
|
||||||
|
# Add any paths that contain custom themes here, relative to this directory.
|
||||||
|
#html_theme_path = []
|
||||||
|
|
||||||
|
# The name for this set of Sphinx documents. If None, it defaults to
|
||||||
|
# "<project> v<release> documentation".
|
||||||
|
#html_title = None
|
||||||
|
|
||||||
|
# A shorter title for the navigation bar. Default is the same as html_title.
|
||||||
|
#html_short_title = None
|
||||||
|
|
||||||
|
# The name of an image file (relative to this directory) to place at the top
|
||||||
|
# of the sidebar.
|
||||||
|
#html_logo = None
|
||||||
|
|
||||||
|
# The name of an image file (within the static path) to use as favicon of the
|
||||||
|
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
|
||||||
|
# pixels large.
|
||||||
|
#html_favicon = None
|
||||||
|
|
||||||
|
# Add any paths that contain custom static files (such as style sheets) here,
|
||||||
|
# relative to this directory. They are copied after the builtin static files,
|
||||||
|
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||||
|
html_static_path = ['_static']
|
||||||
|
|
||||||
|
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
|
||||||
|
# using the given strftime format.
|
||||||
|
#html_last_updated_fmt = '%b %d, %Y'
|
||||||
|
|
||||||
|
# If true, SmartyPants will be used to convert quotes and dashes to
|
||||||
|
# typographically correct entities.
|
||||||
|
#html_use_smartypants = True
|
||||||
|
|
||||||
|
# Custom sidebar templates, maps document names to template names.
|
||||||
|
#html_sidebars = {}
|
||||||
|
|
||||||
|
# Additional templates that should be rendered to pages, maps page names to
|
||||||
|
# template names.
|
||||||
|
#html_additional_pages = {}
|
||||||
|
|
||||||
|
# If false, no module index is generated.
|
||||||
|
#html_domain_indices = True
|
||||||
|
|
||||||
|
# If false, no index is generated.
|
||||||
|
#html_use_index = True
|
||||||
|
|
||||||
|
# If true, the index is split into individual pages for each letter.
|
||||||
|
#html_split_index = False
|
||||||
|
|
||||||
|
# If true, links to the reST sources are added to the pages.
|
||||||
|
#html_show_sourcelink = True
|
||||||
|
|
||||||
|
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
|
||||||
|
#html_show_sphinx = True
|
||||||
|
|
||||||
|
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
|
||||||
|
#html_show_copyright = True
|
||||||
|
|
||||||
|
# If true, an OpenSearch description file will be output, and all pages will
|
||||||
|
# contain a <link> tag referring to it. The value of this option must be the
|
||||||
|
# base URL from which the finished HTML is served.
|
||||||
|
#html_use_opensearch = ''
|
||||||
|
|
||||||
|
# This is the file name suffix for HTML files (e.g. ".xhtml").
|
||||||
|
#html_file_suffix = None
|
||||||
|
|
||||||
|
# Output file base name for HTML help builder.
|
||||||
|
htmlhelp_basename = 'pypinyindoc'
|
||||||
|
|
||||||
|
# -- Options for LaTeX output --------------------------------------------------
|
||||||
|
|
||||||
|
latex_elements = {
|
||||||
|
# The paper size ('letterpaper' or 'a4paper').
|
||||||
|
#'papersize': 'letterpaper',
|
||||||
|
|
||||||
|
# The font size ('10pt', '11pt' or '12pt').
|
||||||
|
#'pointsize': '10pt',
|
||||||
|
|
||||||
|
# Additional stuff for the LaTeX preamble.
|
||||||
|
#'preamble': '',
|
||||||
|
}
|
||||||
|
|
||||||
|
# Grouping the document tree into LaTeX files. List of tuples
|
||||||
|
# (source start file, target name, title, author, documentclass [howto/manual]).
|
||||||
|
latex_documents = [
|
||||||
|
('index', 'pypinyin.tex', 'pypinyin Documentation', 'mozillazg', 'manual'),
|
||||||
|
]
|
||||||
|
|
||||||
|
# The name of an image file (relative to this directory) to place at the top of
|
||||||
|
# the title page.
|
||||||
|
#latex_logo = None
|
||||||
|
|
||||||
|
# For "manual" documents, if this is true, then toplevel headings are parts,
|
||||||
|
# not chapters.
|
||||||
|
#latex_use_parts = False
|
||||||
|
|
||||||
|
# If true, show page references after internal links.
|
||||||
|
#latex_show_pagerefs = False
|
||||||
|
|
||||||
|
# If true, show URL addresses after external links.
|
||||||
|
#latex_show_urls = False
|
||||||
|
|
||||||
|
# Documents to append as an appendix to all manuals.
|
||||||
|
#latex_appendices = []
|
||||||
|
|
||||||
|
# If false, no module index is generated.
|
||||||
|
#latex_domain_indices = True
|
||||||
|
|
||||||
|
# -- Options for manual page output --------------------------------------------
|
||||||
|
|
||||||
|
# One entry per manual page. List of tuples
|
||||||
|
# (source start file, name, description, authors, manual section).
|
||||||
|
man_pages = [('index', 'pypinyin', 'pypinyin Documentation', ['mozillazg'], 1)]
|
||||||
|
|
||||||
|
# If true, show URL addresses after external links.
|
||||||
|
#man_show_urls = False
|
||||||
|
|
||||||
|
# -- Options for Texinfo output ------------------------------------------------
|
||||||
|
|
||||||
|
# Grouping the document tree into Texinfo files. List of tuples
|
||||||
|
# (source start file, target name, title, author,
|
||||||
|
# dir menu entry, description, category)
|
||||||
|
texinfo_documents = [
|
||||||
|
('index', 'pypinyin', 'pypinyin Documentation', 'mozillazg', 'pypinyin',
|
||||||
|
'One line description of project.', 'Miscellaneous'),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Documents to append as an appendix to all manuals.
|
||||||
|
#texinfo_appendices = []
|
||||||
|
|
||||||
|
# If false, no module index is generated.
|
||||||
|
#texinfo_domain_indices = True
|
||||||
|
|
||||||
|
# How to display URL addresses: 'footnote', 'no', or 'inline'.
|
||||||
|
#texinfo_show_urls = 'footnote'
|
||||||
|
|
||||||
|
# If true, do not generate a @detailmenu in the "Top" node's menu.
|
||||||
|
#texinfo_no_detailmenu = False
|
||||||
|
|
||||||
|
# -- Options for Epub output ---------------------------------------------------
|
||||||
|
|
||||||
|
# Bibliographic Dublin Core info.
|
||||||
|
epub_title = 'pypinyin'
|
||||||
|
epub_author = 'mozillazg'
|
||||||
|
epub_publisher = 'mozillazg'
|
||||||
|
epub_copyright = '2016 mozillazg'
|
||||||
|
|
||||||
|
# The language of the text. It defaults to the language option
|
||||||
|
# or en if the language is not set.
|
||||||
|
#epub_language = ''
|
||||||
|
|
||||||
|
# The scheme of the identifier. Typical schemes are ISBN or URL.
|
||||||
|
#epub_scheme = ''
|
||||||
|
|
||||||
|
# The unique identifier of the text. This can be a ISBN number
|
||||||
|
# or the project homepage.
|
||||||
|
#epub_identifier = ''
|
||||||
|
|
||||||
|
# A unique identification for the text.
|
||||||
|
#epub_uid = ''
|
||||||
|
|
||||||
|
# A tuple containing the cover image and cover page html template filenames.
|
||||||
|
#epub_cover = ()
|
||||||
|
|
||||||
|
# A sequence of (type, uri, title) tuples for the guide element of content.opf.
|
||||||
|
#epub_guide = ()
|
||||||
|
|
||||||
|
# HTML files that should be inserted before the pages created by sphinx.
|
||||||
|
# The format is a list of tuples containing the path and title.
|
||||||
|
#epub_pre_files = []
|
||||||
|
|
||||||
|
# HTML files that should be inserted after the pages created by sphinx.
|
||||||
|
# The format is a list of tuples containing the path and title.
|
||||||
|
#epub_post_files = []
|
||||||
|
|
||||||
|
# A list of files that should not be packed into the epub file.
|
||||||
|
#epub_exclude_files = []
|
||||||
|
|
||||||
|
# The depth of the table of contents in toc.ncx.
|
||||||
|
#epub_tocdepth = 3
|
||||||
|
|
||||||
|
# Allow duplicate toc entries.
|
||||||
|
#epub_tocdup = True
|
||||||
|
|
||||||
|
# Fix unsupported image types using the PIL.
|
||||||
|
#epub_fix_images = False
|
||||||
|
|
||||||
|
# Scale large images.
|
||||||
|
#epub_max_image_width = 0
|
||||||
|
|
||||||
|
# If 'no', URL addresses will not be shown.
|
||||||
|
#epub_show_urls = 'inline'
|
||||||
|
|
||||||
|
# If false, no index is generated.
|
||||||
|
#epub_use_index = True
|
||||||
|
|
||||||
|
# Example configuration for intersphinx: refer to the Python standard library.
|
||||||
|
intersphinx_mapping = {'http://docs.python.org/': None}
|
@ -0,0 +1,38 @@
|
|||||||
|
.. _contrib:
|
||||||
|
|
||||||
|
contrib
|
||||||
|
========
|
||||||
|
|
||||||
|
.. _tone_convert:
|
||||||
|
|
||||||
|
拼音转换
|
||||||
|
--------
|
||||||
|
|
||||||
|
.. autofunction:: pypinyin.contrib.tone_convert.to_normal
|
||||||
|
.. autofunction:: pypinyin.contrib.tone_convert.to_tone
|
||||||
|
.. autofunction:: pypinyin.contrib.tone_convert.to_tone2
|
||||||
|
.. autofunction:: pypinyin.contrib.tone_convert.to_tone3
|
||||||
|
|
||||||
|
.. autofunction:: pypinyin.contrib.tone_convert.tone_to_normal
|
||||||
|
.. autofunction:: pypinyin.contrib.tone_convert.tone_to_tone2
|
||||||
|
.. autofunction:: pypinyin.contrib.tone_convert.tone_to_tone3
|
||||||
|
|
||||||
|
.. autofunction:: pypinyin.contrib.tone_convert.tone2_to_normal
|
||||||
|
.. autofunction:: pypinyin.contrib.tone_convert.tone2_to_tone
|
||||||
|
.. autofunction:: pypinyin.contrib.tone_convert.tone2_to_tone3
|
||||||
|
|
||||||
|
.. autofunction:: pypinyin.contrib.tone_convert.tone3_to_normal
|
||||||
|
.. autofunction:: pypinyin.contrib.tone_convert.tone3_to_tone
|
||||||
|
.. autofunction:: pypinyin.contrib.tone_convert.tone3_to_tone2
|
||||||
|
|
||||||
|
|
||||||
|
V2UMixin
|
||||||
|
---------
|
||||||
|
|
||||||
|
.. autoclass:: pypinyin.contrib.uv.V2UMixin
|
||||||
|
|
||||||
|
|
||||||
|
NeutralToneWith5Mixin
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
.. autoclass:: pypinyin.contrib.neutral_tone.NeutralToneWith5Mixin
|
@ -0,0 +1,20 @@
|
|||||||
|
安装
|
||||||
|
======
|
||||||
|
|
||||||
|
可以使用 pip 进行安装:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
$ pip install pypinyin
|
||||||
|
|
||||||
|
easy_install 安装:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
$ easy_install pypinyin
|
||||||
|
|
||||||
|
源码安装:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
$ python setup.py install
|
@ -0,0 +1,242 @@
|
|||||||
|
@ECHO OFF
|
||||||
|
|
||||||
|
REM Command file for Sphinx documentation
|
||||||
|
|
||||||
|
if "%SPHINXBUILD%" == "" (
|
||||||
|
set SPHINXBUILD=sphinx-build
|
||||||
|
)
|
||||||
|
set BUILDDIR=_build
|
||||||
|
set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
|
||||||
|
set I18NSPHINXOPTS=%SPHINXOPTS% .
|
||||||
|
if NOT "%PAPER%" == "" (
|
||||||
|
set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
|
||||||
|
set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "" goto help
|
||||||
|
|
||||||
|
if "%1" == "help" (
|
||||||
|
:help
|
||||||
|
echo.Please use `make ^<target^>` where ^<target^> is one of
|
||||||
|
echo. html to make standalone HTML files
|
||||||
|
echo. dirhtml to make HTML files named index.html in directories
|
||||||
|
echo. singlehtml to make a single large HTML file
|
||||||
|
echo. pickle to make pickle files
|
||||||
|
echo. json to make JSON files
|
||||||
|
echo. htmlhelp to make HTML files and a HTML help project
|
||||||
|
echo. qthelp to make HTML files and a qthelp project
|
||||||
|
echo. devhelp to make HTML files and a Devhelp project
|
||||||
|
echo. epub to make an epub
|
||||||
|
echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
|
||||||
|
echo. text to make text files
|
||||||
|
echo. man to make manual pages
|
||||||
|
echo. texinfo to make Texinfo files
|
||||||
|
echo. gettext to make PO message catalogs
|
||||||
|
echo. changes to make an overview over all changed/added/deprecated items
|
||||||
|
echo. xml to make Docutils-native XML files
|
||||||
|
echo. pseudoxml to make pseudoxml-XML files for display purposes
|
||||||
|
echo. linkcheck to check all external links for integrity
|
||||||
|
echo. doctest to run all doctests embedded in the documentation if enabled
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "clean" (
|
||||||
|
for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
|
||||||
|
del /q /s %BUILDDIR%\*
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
%SPHINXBUILD% 2> nul
|
||||||
|
if errorlevel 9009 (
|
||||||
|
echo.
|
||||||
|
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||||
|
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||||
|
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||||
|
echo.may add the Sphinx directory to PATH.
|
||||||
|
echo.
|
||||||
|
echo.If you don't have Sphinx installed, grab it from
|
||||||
|
echo.http://sphinx-doc.org/
|
||||||
|
exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "html" (
|
||||||
|
%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished. The HTML pages are in %BUILDDIR%/html.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "dirhtml" (
|
||||||
|
%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "singlehtml" (
|
||||||
|
%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "pickle" (
|
||||||
|
%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished; now you can process the pickle files.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "json" (
|
||||||
|
%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished; now you can process the JSON files.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "htmlhelp" (
|
||||||
|
%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished; now you can run HTML Help Workshop with the ^
|
||||||
|
.hhp project file in %BUILDDIR%/htmlhelp.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "qthelp" (
|
||||||
|
%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished; now you can run "qcollectiongenerator" with the ^
|
||||||
|
.qhcp project file in %BUILDDIR%/qthelp, like this:
|
||||||
|
echo.^> qcollectiongenerator %BUILDDIR%\qthelp\BaiduPCS.qhcp
|
||||||
|
echo.To view the help file:
|
||||||
|
echo.^> assistant -collectionFile %BUILDDIR%\qthelp\BaiduPCS.ghc
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "devhelp" (
|
||||||
|
%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "epub" (
|
||||||
|
%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished. The epub file is in %BUILDDIR%/epub.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "latex" (
|
||||||
|
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "latexpdf" (
|
||||||
|
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
|
||||||
|
cd %BUILDDIR%/latex
|
||||||
|
make all-pdf
|
||||||
|
cd %BUILDDIR%/..
|
||||||
|
echo.
|
||||||
|
echo.Build finished; the PDF files are in %BUILDDIR%/latex.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "latexpdfja" (
|
||||||
|
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
|
||||||
|
cd %BUILDDIR%/latex
|
||||||
|
make all-pdf-ja
|
||||||
|
cd %BUILDDIR%/..
|
||||||
|
echo.
|
||||||
|
echo.Build finished; the PDF files are in %BUILDDIR%/latex.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "text" (
|
||||||
|
%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished. The text files are in %BUILDDIR%/text.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "man" (
|
||||||
|
%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished. The manual pages are in %BUILDDIR%/man.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "texinfo" (
|
||||||
|
%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "gettext" (
|
||||||
|
%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "changes" (
|
||||||
|
%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.The overview file is in %BUILDDIR%/changes.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "linkcheck" (
|
||||||
|
%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Link check complete; look for any errors in the above output ^
|
||||||
|
or in %BUILDDIR%/linkcheck/output.txt.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "doctest" (
|
||||||
|
%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Testing of doctests in the sources finished, look at the ^
|
||||||
|
results in %BUILDDIR%/doctest/output.txt.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "xml" (
|
||||||
|
%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished. The XML files are in %BUILDDIR%/xml.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "pseudoxml" (
|
||||||
|
%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
:end
|
@ -0,0 +1,10 @@
|
|||||||
|
Related Projects
|
||||||
|
===================
|
||||||
|
|
||||||
|
* `hotoo/pinyin`__: 汉字拼音转换工具 Node.js/JavaScript 版。
|
||||||
|
* `mozillazg/go-pinyin`__: 汉字拼音转换工具 Go 版。
|
||||||
|
* `mozillazg/rust-pinyin`__: 汉字拼音转换工具 Rust 版。
|
||||||
|
|
||||||
|
__ https://github.com/hotoo/pinyin
|
||||||
|
__ https://github.com/mozillazg/go-pinyin
|
||||||
|
__ https://github.com/mozillazg/rust-pinyin
|
@ -0,0 +1,60 @@
|
|||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def remove_dup_items(lst):
|
||||||
|
new_lst = []
|
||||||
|
for item in lst:
|
||||||
|
if item not in new_lst:
|
||||||
|
new_lst.append(item)
|
||||||
|
return new_lst
|
||||||
|
|
||||||
|
|
||||||
|
def parse(fp):
|
||||||
|
phrases_dict = {}
|
||||||
|
for line in in_fp.readlines():
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith('#') or not line:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 中国: zhōng guó
|
||||||
|
data = line.split('#')[0]
|
||||||
|
hanzi, pinyin = data.strip().split(':')
|
||||||
|
hanzi = hanzi.strip()
|
||||||
|
# [[zhōng], [guó]]
|
||||||
|
pinyin_list = [[s] for s in pinyin.split()]
|
||||||
|
|
||||||
|
if hanzi not in phrases_dict:
|
||||||
|
phrases_dict[hanzi] = pinyin_list
|
||||||
|
else:
|
||||||
|
for index, value in enumerate(phrases_dict[hanzi]):
|
||||||
|
value.extend(pinyin_list[index])
|
||||||
|
phrases_dict[hanzi][index] = remove_dup_items(value)
|
||||||
|
|
||||||
|
return phrases_dict
|
||||||
|
|
||||||
|
|
||||||
|
def main(in_fp, out_fp):
|
||||||
|
out_fp.write('''# Warning: Auto-generated file, don't edit.
|
||||||
|
phrases_dict = {
|
||||||
|
''')
|
||||||
|
|
||||||
|
hanzi_pairs = sorted(parse(in_fp).items(), key=lambda x: x[0])
|
||||||
|
for hanzi, pinyin_list in hanzi_pairs:
|
||||||
|
# 中国: [[zhōng], [guó]]
|
||||||
|
new_line = " '{hanzi}': {pinyin_list},\n".format(
|
||||||
|
hanzi=hanzi.strip(), pinyin_list=pinyin_list)
|
||||||
|
out_fp.write(new_line)
|
||||||
|
|
||||||
|
out_fp.write('}\n')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
if len(sys.argv) == 1:
|
||||||
|
print('python gen_phrases_dict.py INPUT OUTPUT')
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
in_f = sys.argv[1]
|
||||||
|
out_f = sys.argv[2]
|
||||||
|
|
||||||
|
with open(in_f) as in_fp, open(out_f, 'w') as out_fp:
|
||||||
|
main(in_fp, out_fp)
|
@ -0,0 +1,37 @@
|
|||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def main(in_fp, out_fp):
|
||||||
|
out_fp.write('''# Warning: Auto-generated file, don't edit.
|
||||||
|
pinyin_dict = {
|
||||||
|
''')
|
||||||
|
|
||||||
|
for line in in_fp.readlines():
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith('#') or not line:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# line is U+4E2D: zhōng,zhòng # 中
|
||||||
|
# raw_line U+4E2D: zhōng,zhòng
|
||||||
|
raw_line = line.split('#')[0].strip()
|
||||||
|
# 0x4E2D: zhōng,zhòng
|
||||||
|
new_line = raw_line.replace('U+', '0x')
|
||||||
|
# 0x4E2D: 'zhōng,zhòng
|
||||||
|
new_line = new_line.replace(': ', ": '")
|
||||||
|
# 0x4E2D: 'zhōng,zhòng'\n
|
||||||
|
new_line = " {new_line}',\n".format(new_line=new_line)
|
||||||
|
out_fp.write(new_line)
|
||||||
|
|
||||||
|
out_fp.write('}\n')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
if len(sys.argv) == 1:
|
||||||
|
print('python gen_pinyin_dict.py INPUT OUTPUT')
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
in_f = sys.argv[1]
|
||||||
|
out_f = sys.argv[2]
|
||||||
|
|
||||||
|
with open(in_f) as in_fp, open(out_f, 'w') as out_fp:
|
||||||
|
main(in_fp, out_fp)
|
@ -0,0 +1,11 @@
|
|||||||
|
[bumpversion]
|
||||||
|
commit = True
|
||||||
|
tag = True
|
||||||
|
current_version = 0.10.5
|
||||||
|
|
||||||
|
[bumpversion:file:merge.py]
|
||||||
|
|
||||||
|
[bumpversion:file:pinyin.txt]
|
||||||
|
|
||||||
|
[bumpversion:file:large_pinyin.txt]
|
||||||
|
|
@ -0,0 +1,92 @@
|
|||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
env/
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*,cover
|
||||||
|
.hypothesis/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
target/
|
||||||
|
|
||||||
|
# IPython Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
.python-version
|
||||||
|
|
||||||
|
# celery beat schedule file
|
||||||
|
celerybeat-schedule
|
||||||
|
|
||||||
|
# dotenv
|
||||||
|
.env
|
||||||
|
|
||||||
|
# virtualenv
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
new.txt
|
||||||
|
cc-cedict.zip
|
||||||
|
cedict_ts.u8
|
@ -0,0 +1,6 @@
|
|||||||
|
language: python
|
||||||
|
python:
|
||||||
|
- 3.6
|
||||||
|
|
||||||
|
script:
|
||||||
|
- make merge
|
@ -0,0 +1,213 @@
|
|||||||
|
# ChangeLog
|
||||||
|
|
||||||
|
## [0.10.5] (2020-11-22)
|
||||||
|
|
||||||
|
* 增加 `还君明珠` 的拼音。
|
||||||
|
|
||||||
|
|
||||||
|
## [0.10.4] (2020-10-08)
|
||||||
|
|
||||||
|
* 纠正一些词语的拼音。
|
||||||
|
|
||||||
|
|
||||||
|
## [0.10.3] (2020-07-05)
|
||||||
|
|
||||||
|
* 增加 `还珠` 的拼音。
|
||||||
|
|
||||||
|
|
||||||
|
## [0.10.2] (2019-10-26)
|
||||||
|
|
||||||
|
* 纠正一些词语的拼音。
|
||||||
|
|
||||||
|
|
||||||
|
## [0.10.1] (2019-07-06)
|
||||||
|
|
||||||
|
* 修正部分拼音数据。
|
||||||
|
|
||||||
|
|
||||||
|
## [0.10.0] (2019-05-10)
|
||||||
|
|
||||||
|
* 新增 `cc_cedict.txt`: [cc-cedict.org](https://cc-cedict.org/) 拼音数据。Thanks [@hanabi1224]
|
||||||
|
* 纠正一些词语的拼音
|
||||||
|
|
||||||
|
|
||||||
|
## [0.9.2] (2019-04-06)
|
||||||
|
|
||||||
|
* 修复部分词语的拼音声调标错了位置的问题
|
||||||
|
|
||||||
|
|
||||||
|
## [0.9.1] (2019-03-31)
|
||||||
|
|
||||||
|
* 纠正一批词语的的拼音:
|
||||||
|
* `鸟事`
|
||||||
|
* `虮虱相吊`
|
||||||
|
* `别鹤离鸾`
|
||||||
|
* `年华垂暮`
|
||||||
|
* `本枝百世`
|
||||||
|
* `操戈同室`
|
||||||
|
* 部分词语中 `丢` 的拼音
|
||||||
|
|
||||||
|
|
||||||
|
## [0.9.0] (2019-02-23)
|
||||||
|
|
||||||
|
* 新增 `腌臢: ā zā`
|
||||||
|
* `朝阳` 增加 `cháo yáng` 这个音
|
||||||
|
* 新增 `土地`、`领地`、`基地`
|
||||||
|
|
||||||
|
|
||||||
|
## [0.8.5] (2018-12-26)
|
||||||
|
|
||||||
|
* 纠正 `油炸`、`洗发` 的拼音
|
||||||
|
|
||||||
|
|
||||||
|
## [0.8.4] (2018-09-16)
|
||||||
|
|
||||||
|
* 纠正 `步履蹒跚` 的拼音
|
||||||
|
* 纠正部分词语中 `长` 的拼音
|
||||||
|
|
||||||
|
|
||||||
|
## [0.8.3] (2018-08-04)
|
||||||
|
|
||||||
|
* 纠正部分 `查`、`大` 的读音 (via [ee1ded4])
|
||||||
|
|
||||||
|
|
||||||
|
## [0.8.2] (2018-07-28)
|
||||||
|
|
||||||
|
* 纠正 `有一只` 的读音 (via [330b348])
|
||||||
|
|
||||||
|
|
||||||
|
## [0.8.1] (2018-07-28)
|
||||||
|
|
||||||
|
* 纠正几个 `一` 的读音 (via [6e3b9eb])
|
||||||
|
* 修复部分拼音包含 `xh` 的问题 (via [ae12df98])
|
||||||
|
|
||||||
|
|
||||||
|
## [0.8.0] (2018-07-08)
|
||||||
|
|
||||||
|
* 纠正 `称雨道晴` 的拼音 (via [67412ab])
|
||||||
|
* 纠正部分词语中 `干` 的拼音 (via [38474cb])
|
||||||
|
* 增加 `时长` 的拼音 (via [c40b965])
|
||||||
|
|
||||||
|
|
||||||
|
## [0.7.3] (2018-06-10)
|
||||||
|
|
||||||
|
* 纠正 `一语中的`, `一语中人` 的拼音 (via [3b62ed3])
|
||||||
|
|
||||||
|
|
||||||
|
## [0.7.2] (2018-06-10)
|
||||||
|
|
||||||
|
* 纠正部分拼音数据 (via [af5d783])
|
||||||
|
|
||||||
|
|
||||||
|
## [0.7.1] (2018-06-04)
|
||||||
|
|
||||||
|
* 纠正 `负债累累` `经纶济世` 的拼音 (via [#16])
|
||||||
|
|
||||||
|
|
||||||
|
## [0.7.0] (2018-05-27)
|
||||||
|
|
||||||
|
* 新增 zdic_cibs.txt 和 zdic_cybs.txt (via [#13])
|
||||||
|
* `zdic_cibs.txt`: [汉典网](http://www.zdic.net) 汉语词典拼音数据
|
||||||
|
* `zdic_cybs.txt`: [汉典网](http://www.zdic.net) 成语词典拼音数据
|
||||||
|
* 增加基于 zdic_cibs.txt 和 zdic_cybs.txt 的 large_pinyin.txt (via [#13])
|
||||||
|
* 纠正部分读音(via [#10],[#11], [#15])
|
||||||
|
|
||||||
|
|
||||||
|
## [0.6.0] (2018-03-11)
|
||||||
|
|
||||||
|
* Revert [#3](https://github.com/mozillazg/phrase-pinyin-data/pull/3) 增加的拼音数据(错误有点多)
|
||||||
|
|
||||||
|
|
||||||
|
## [0.5.1] (2017-10-25)
|
||||||
|
|
||||||
|
* 修正一批缺少 ā 和 dī 不对的词语(via [#7][#7])
|
||||||
|
|
||||||
|
|
||||||
|
## [0.5.0] (2017-07-09)
|
||||||
|
|
||||||
|
* 增加 `还贷` 的拼音(Thanks [@zhuangh](https://github.com/zhuangh))
|
||||||
|
|
||||||
|
|
||||||
|
## [0.4.1] (2017-04-10)
|
||||||
|
|
||||||
|
* 纠正 `朝阳`, `昂昂自若` 的拼音(via [e6d6d27][e6d6d27], [6e7ea16][6e7ea16])
|
||||||
|
|
||||||
|
|
||||||
|
## [0.4.0] (2017-03-22)
|
||||||
|
|
||||||
|
* 新增2万多个词组拼音数据(via [fc50fcd][fc50fcd], 感谢 [@onsunsl][@onsunsl] 分享他/她收集的43400个拼音数据: [#3][#3] ).
|
||||||
|
|
||||||
|
|
||||||
|
## [0.3.1] (2017-03-13)
|
||||||
|
|
||||||
|
* 纠正 `斯事体大` 的拼音
|
||||||
|
|
||||||
|
|
||||||
|
## [0.3.0] (2017-03-12)
|
||||||
|
|
||||||
|
* 增加 overwrite.txt 用于新增/纠正拼音数据
|
||||||
|
* 纠正 `便宜`, `所长`, `打开天窗说亮话` 的拼音数据
|
||||||
|
* 增加 `朝阳区`
|
||||||
|
|
||||||
|
|
||||||
|
## [0.2.0] (2017-03-04)
|
||||||
|
|
||||||
|
* 添加一批拼音(via [04de9f7][04de9f7])。
|
||||||
|
|
||||||
|
|
||||||
|
## 0.1.0 (2017-03-04)
|
||||||
|
|
||||||
|
* Initial Release
|
||||||
|
|
||||||
|
|
||||||
|
[0.10.4]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.10.3...v0.10.4
|
||||||
|
[0.10.3]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.10.2...v0.10.3
|
||||||
|
[0.10.2]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.10.1...v0.10.2
|
||||||
|
[0.10.1]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.10.0...v0.10.1
|
||||||
|
[0.10.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.9.2...v0.10.0
|
||||||
|
[0.9.2]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.9.1...v0.9.2
|
||||||
|
[0.9.1]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.9.0...v0.9.1
|
||||||
|
[0.9.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.8.5...v0.9.0
|
||||||
|
[0.8.5]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.8.4...v0.8.5
|
||||||
|
[0.8.4]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.8.3...v0.8.4
|
||||||
|
[0.8.3]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.8.2...v0.8.3
|
||||||
|
[0.8.2]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.8.1...v0.8.2
|
||||||
|
[0.8.1]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.8.0...v0.8.1
|
||||||
|
[0.8.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.7.3...v0.8.0
|
||||||
|
[0.7.3]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.7.2...v0.7.3
|
||||||
|
[0.7.2]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.7.1...v0.7.2
|
||||||
|
[0.7.1]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.7.0...v0.7.1
|
||||||
|
[0.7.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.6.0...v0.7.0
|
||||||
|
[0.6.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.5.0...v0.6.0
|
||||||
|
[0.5.1]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.5.0...v0.5.1
|
||||||
|
[0.5.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.4.1...v0.5.0
|
||||||
|
[0.4.1]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.4.0...v0.4.1
|
||||||
|
[0.4.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.3.1...v0.4.0
|
||||||
|
[0.3.1]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.3.0...v0.3.1
|
||||||
|
[0.3.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.2.0...v0.3.0
|
||||||
|
[0.2.0]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.1.0...v0.2.0
|
||||||
|
|
||||||
|
|
||||||
|
[04de9f7]: https://github.com/mozillazg/phrase-pinyin-data/commit/04de9f7f520e2f2188cb4c468c30d6fb811a20ba
|
||||||
|
[fc50fcd]: https://github.com/mozillazg/phrase-pinyin-data/commit/fc50fcd7faa94205096d582fc7a1b31265943a85
|
||||||
|
[@onsunsl]: https://github.com/onsunsl
|
||||||
|
[#3]: https://github.com/mozillazg/phrase-pinyin-data/pull/3
|
||||||
|
[e6d6d27]: https://github.com/mozillazg/phrase-pinyin-data/commit/e6d6d270900fdca32ccbe9a414ea4642e537e522
|
||||||
|
[6e7ea16]: https://github.com/mozillazg/phrase-pinyin-data/commit/6e7ea167dee0c812514f0bf9701ff5c103a566af
|
||||||
|
[#7]: https://github.com/mozillazg/phrase-pinyin-data/pull/7
|
||||||
|
[#10]: https://github.com/mozillazg/phrase-pinyin-data/pull/10
|
||||||
|
[#11]: https://github.com/mozillazg/phrase-pinyin-data/pull/11
|
||||||
|
[#13]: https://github.com/mozillazg/phrase-pinyin-data/pull/13
|
||||||
|
[#15]: https://github.com/mozillazg/phrase-pinyin-data/pull/15
|
||||||
|
[#16]: https://github.com/mozillazg/phrase-pinyin-data/pull/16
|
||||||
|
[af5d783]: https://github.com/mozillazg/phrase-pinyin-data/commit/af5d7831b0e84e4a5306e304b3b2da3268e35f17
|
||||||
|
[3b62ed3]: https://github.com/mozillazg/phrase-pinyin-data/commit/3b62ed303f129868c7ccee4f2d5e44dcea7d30d4
|
||||||
|
[67412ab]: https://github.com/mozillazg/phrase-pinyin-data/commit/67412abbf8570ac80a41dc012f228c0864823a62
|
||||||
|
[38474cb]: https://github.com/mozillazg/phrase-pinyin-data/commit/38474cb91dedd27b3d51b39811704f3d045837b1
|
||||||
|
[c40b965]: https://github.com/mozillazg/phrase-pinyin-data/commit/c40b9653ea2ab066d1c0606e9e07dd4225ff2485
|
||||||
|
[6e3b9eb]: https://github.com/mozillazg/phrase-pinyin-data/commit/6e3b9eb805ed3e3a5955c179e752ec5e1293216f
|
||||||
|
[ae12df98]: https://github.com/mozillazg/phrase-pinyin-data/commit/ae12df98438a508249bdf591334b6415bb5ccf8d
|
||||||
|
[330b348]: https://github.com/mozillazg/phrase-pinyin-data/commit/330b3481ba350de07b580991a5a8b7a83aaefde9
|
||||||
|
[ee1ded4]: https://github.com/mozillazg/phrase-pinyin-data/commit/ee1ded4938624ac4ce3dc7991ab370e09dbd745c
|
||||||
|
[@hanabi1224]: https://github.com/hanabi1224
|
||||||
|
[0.10.5]: https://github.com/mozillazg/phrase-pinyin-data/compare/v0.10.4...v0.10.5
|
@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2017 mozillazg
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
@ -0,0 +1,54 @@
|
|||||||
|
# phrase-pinyin-data [](https://travis-ci.org/mozillazg/phrase-pinyin-data)
|
||||||
|
|
||||||
|
词语拼音数据。
|
||||||
|
|
||||||
|
|
||||||
|
## 数据介绍
|
||||||
|
|
||||||
|
拼音数据的格式:
|
||||||
|
|
||||||
|
```
|
||||||
|
{phrase}: {pinyin}
|
||||||
|
```
|
||||||
|
|
||||||
|
* 以 `#` 开头的行是注释
|
||||||
|
* 行尾的 `#` 也是注释
|
||||||
|
* `{phrase}` 汉字词语
|
||||||
|
* `{pinyin}` 词语的拼音,使用空格分隔每个汉字的拼音
|
||||||
|
* 一行一个词语的读音,有多个音的词语会出现在多行
|
||||||
|
* 示例:
|
||||||
|
|
||||||
|
```
|
||||||
|
# 注释
|
||||||
|
中国: zhōng guó
|
||||||
|
北京: běi jīng # 注释
|
||||||
|
```
|
||||||
|
|
||||||
|
文件说明:
|
||||||
|
|
||||||
|
* `overwrite.txt`: 手工纠正的拼音数据
|
||||||
|
* `pinyin.txt`: `pinyin.txt + overwrite.txt` 后的拼音数据
|
||||||
|
* `zdic_cibs.txt`: [汉典网](http://www.zdic.net/) 汉语词典拼音数据
|
||||||
|
* `zdic_cybs.txt`: [汉典网](http://www.zdic.net/) 成语词典拼音数据
|
||||||
|
* `cc_cedict.txt`: [cc-cedict.org](https://cc-cedict.org/) 拼音数据
|
||||||
|
* `large_pinyin.txt`: `zdic_cibs.txt + zdic_cybs.txt + cc_cedict.txt + pinyin.txt + overwrite.txt` 后的拼音数据
|
||||||
|
|
||||||
|
## 修改数据
|
||||||
|
|
||||||
|
* 修改 `pinyin.txt` 或 `overwrite.txt` 都可以了
|
||||||
|
* 执行 `make merge` 命令可以按照合并规则生成最新的 `pinyin.txt`
|
||||||
|
|
||||||
|
|
||||||
|
## 参考资料
|
||||||
|
|
||||||
|
* 初始数据基于 [phrases-dict.js](https://github.com/hotoo/pinyin/blob/05f74496c34ccb32db1a0fd0b358a798a22a51e5/data/phrases-dict.js) 和 [phrases_dict.py](https://github.com/mozillazg/python-pinyin/blob/366de0363ff1fb9a718ce668448bea59de09a4bf/pypinyin/phrases_dict.py)
|
||||||
|
* [汉典 zdic.net](http://www.zdic.net/)
|
||||||
|
* [字海网,叶典网](http://zisea.com/)
|
||||||
|
* [国学大师_国学网](http://www.guoxuedashi.com/)
|
||||||
|
* [CC-CEDICT download - MDBG English to Chinese dictionary](http://www.mdbg.net/chindict/chindict.php?page=cc-cedict)
|
||||||
|
* [漢語大詞典](http://www.ivantsoi.com/hydcd/search.html)
|
||||||
|
|
||||||
|
|
||||||
|
## 相关项目
|
||||||
|
|
||||||
|
* [mozillazg/pinyin-data](https://github.com/mozillazg/pinyin-data): 汉字拼音数据
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,26 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import os
|
||||||
|
import io
|
||||||
|
import shutil
|
||||||
|
import codecs
|
||||||
|
import zipfile
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
ROOT = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
DOWNLOAD_URL = 'https://cc-cedict.org/editor/editor_export_cedict.php?c=zip'
|
||||||
|
|
||||||
|
zip_file_path = os.path.join(ROOT, 'cc-cedict.zip')
|
||||||
|
|
||||||
|
with open(zip_file_path, 'wb') as f:
|
||||||
|
response = requests.get(DOWNLOAD_URL, stream=True)
|
||||||
|
shutil.copyfileobj(response.raw, f)
|
||||||
|
|
||||||
|
with open(zip_file_path, 'rb') as fp:
|
||||||
|
z = zipfile.ZipFile(fp)
|
||||||
|
z.extractall(ROOT)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,58 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import sys
|
||||||
|
import codecs
|
||||||
|
|
||||||
|
|
||||||
|
def parse(lines):
|
||||||
|
"""
|
||||||
|
:yield: hanzi, others
|
||||||
|
"""
|
||||||
|
for line in lines:
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith('#') or not line:
|
||||||
|
continue
|
||||||
|
|
||||||
|
hanzi, others = line.split(':', 1)
|
||||||
|
yield hanzi.strip(), others.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def merge(pinyin_d_list):
|
||||||
|
"""
|
||||||
|
:rtype: dict
|
||||||
|
"""
|
||||||
|
final_d = {}
|
||||||
|
for overwrite_d in pinyin_d_list:
|
||||||
|
final_d.update(overwrite_d)
|
||||||
|
return final_d
|
||||||
|
|
||||||
|
|
||||||
|
def sort(pinyin_d):
|
||||||
|
"""
|
||||||
|
:rtype: list
|
||||||
|
"""
|
||||||
|
return sorted(pinyin_d.items(), key=lambda x: x[0])
|
||||||
|
|
||||||
|
|
||||||
|
def output(pinyin_s):
|
||||||
|
print('# version: 0.10.5')
|
||||||
|
print('# source: https://github.com/mozillazg/phrase-pinyin-data')
|
||||||
|
for hanzi, pinyin in pinyin_s:
|
||||||
|
hanzi = hanzi.split('_')[0]
|
||||||
|
print('{hanzi}: {pinyin}'.format(hanzi=hanzi, pinyin=pinyin))
|
||||||
|
|
||||||
|
|
||||||
|
def main(files):
|
||||||
|
pinyin_d_list = []
|
||||||
|
for name in files:
|
||||||
|
with codecs.open(name, 'r', 'utf-8-sig') as fp:
|
||||||
|
d = {}
|
||||||
|
for h, p in parse(fp):
|
||||||
|
d.setdefault(h, p)
|
||||||
|
pinyin_d_list.append(d)
|
||||||
|
|
||||||
|
pinyin_d = merge(pinyin_d_list)
|
||||||
|
output(sort(pinyin_d))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main(sys.argv[1:])
|
@ -0,0 +1,7 @@
|
|||||||
|
# 新增或纠正的拼音数据
|
||||||
|
# 升级版本的时候会合并回 pinyin.txt
|
||||||
|
# 示例
|
||||||
|
斯事体大: sī shì tǐ dà
|
||||||
|
朝阳: zhāo yáng
|
||||||
|
朝阳_2: cháo yáng
|
||||||
|
还君明珠: huán jūn míng zhū
|
@ -0,0 +1,74 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import os
|
||||||
|
import io
|
||||||
|
import re
|
||||||
|
import codecs
|
||||||
|
from pypinyin.phonetic_symbol import phonetic_symbol
|
||||||
|
from pypinyin.pinyin_dict import pinyin_dict
|
||||||
|
from pypinyin.style.tone import ToneConverter
|
||||||
|
|
||||||
|
ROOT = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
|
|
||||||
|
tone_converter = ToneConverter()
|
||||||
|
tone3_2_tone_dict = {}
|
||||||
|
for k, v in pinyin_dict.items():
|
||||||
|
parts = v.split(',')
|
||||||
|
for part in parts:
|
||||||
|
part = part.strip()
|
||||||
|
if part:
|
||||||
|
tone3 = tone_converter.to_tone3(part).strip().lower()
|
||||||
|
if tone3:
|
||||||
|
tone3_2_tone_dict[tone3] = part
|
||||||
|
|
||||||
|
|
||||||
|
def tone3_to_tone1(tone3):
|
||||||
|
tone3 = tone3.strip().lower()
|
||||||
|
# 儿化
|
||||||
|
if tone3 == 'r5':
|
||||||
|
return 'er'
|
||||||
|
# 轻声
|
||||||
|
if '5' in tone3:
|
||||||
|
new = tone3.replace('5', '')
|
||||||
|
if new:
|
||||||
|
return new
|
||||||
|
# 律
|
||||||
|
if 'u:' in tone3:
|
||||||
|
tone3 = tone3.replace('u:', 'v')
|
||||||
|
|
||||||
|
return tone3_2_tone_dict[tone3]
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
LINE_PARTS_RE = re.compile(
|
||||||
|
r'(?P<zht>\w+)\s+(?P<zhs>\w+)\s+\[(?P<py>.+?)\]')
|
||||||
|
LETTER_DIGIT_RE = re.compile(r'[a-zA-Z0-9]')
|
||||||
|
cnt = 0
|
||||||
|
with codecs.open(os.path.join(ROOT, 'cc_cedict.txt'), 'w', 'utf-8-sig') as fpw:
|
||||||
|
with codecs.open(os.path.join(ROOT, 'cedict_ts.u8'), 'r', 'utf-8-sig') as fpr:
|
||||||
|
for line in fpr:
|
||||||
|
line_stripped = line.strip()
|
||||||
|
if not line or line_stripped[0] == '#' or line_stripped[0] == '%':
|
||||||
|
continue
|
||||||
|
# print(line_stripped)
|
||||||
|
parts = LINE_PARTS_RE.match(line_stripped)
|
||||||
|
if not parts:
|
||||||
|
continue
|
||||||
|
zhs = parts.group('zhs')
|
||||||
|
py = parts.group('py').split()
|
||||||
|
try:
|
||||||
|
tone1 = [tone3_to_tone1(i) for i in py]
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
#input()
|
||||||
|
continue
|
||||||
|
#print(zhs, py, tone1)
|
||||||
|
if LETTER_DIGIT_RE.search(zhs):
|
||||||
|
continue
|
||||||
|
if len(zhs) < 2:
|
||||||
|
continue
|
||||||
|
fpw.write(f'{zhs}: {" ".join(tone1)}\n')
|
||||||
|
cnt += 1
|
||||||
|
if cnt % 10000 == 0:
|
||||||
|
print(f'{cnt} lines processed...')
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,2 @@
|
|||||||
|
requests
|
||||||
|
pypinyin
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,9 @@
|
|||||||
|
[bumpversion]
|
||||||
|
commit = True
|
||||||
|
tag = True
|
||||||
|
current_version = 0.10.2
|
||||||
|
|
||||||
|
[bumpversion:file:merge_unihan.py]
|
||||||
|
|
||||||
|
[bumpversion:file:pinyin.txt]
|
||||||
|
|
@ -0,0 +1,31 @@
|
|||||||
|
# This workflow will install Python dependencies, run tests and lint with a single version of Python
|
||||||
|
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
|
||||||
|
|
||||||
|
name: Python application
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ ]
|
||||||
|
pull_request:
|
||||||
|
branches: [ ]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- name: Set up Python 3.9
|
||||||
|
uses: actions/setup-python@v2
|
||||||
|
with:
|
||||||
|
python-version: 3.9
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install flake8 pytest
|
||||||
|
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||||
|
|
||||||
|
- name: try merge_unihan
|
||||||
|
run: |
|
||||||
|
make merge_unihan
|
@ -0,0 +1,62 @@
|
|||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
env/
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*,cover
|
||||||
|
.hypothesis/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
target/
|
||||||
|
|
||||||
|
#Ipython Notebook
|
||||||
|
.ipynb_checkpoints
|
@ -0,0 +1,6 @@
|
|||||||
|
language: python
|
||||||
|
python:
|
||||||
|
- "3.5"
|
||||||
|
|
||||||
|
script:
|
||||||
|
- make merge_unihan
|
@ -0,0 +1,21 @@
|
|||||||
|
The MIT License (MIT)
|
||||||
|
|
||||||
|
Copyright (c) 2016 mozillazg
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,67 @@
|
|||||||
|
U+389C: kāng # 㢜
|
||||||
|
U+60B7: lì # 悷
|
||||||
|
U+417F: huá # 䅿
|
||||||
|
U+46BE: rén # 䚾
|
||||||
|
U+4B78: fù # 䭸
|
||||||
|
U+4B7B: fēn # 䭻
|
||||||
|
U+4CC9: dōng # 䳉
|
||||||
|
U+4D7B: huì # 䵻
|
||||||
|
U+57D4: pǔ # 埔
|
||||||
|
U+5A47: cǎi # 婇
|
||||||
|
U+5F6F: piāo # 彯
|
||||||
|
U+5F77: páng # 彷
|
||||||
|
U+60B7: lì # 悷
|
||||||
|
U+65FD: tūn # 旽
|
||||||
|
U+6A0B: tōng # 樋
|
||||||
|
U+6ADA: lǘ # 櫚
|
||||||
|
U+6E5E: zhēn # 湞
|
||||||
|
U+73D6: guāng # 珖
|
||||||
|
U+77A1: guī # 瞡
|
||||||
|
U+7BC9: zhù # 築
|
||||||
|
U+815C: méi # 腜
|
||||||
|
U+816C: róu # 腬
|
||||||
|
U+8192: ōu # 膒
|
||||||
|
U+8491: yīn # 蒑
|
||||||
|
U+8A09: fàn # 訉
|
||||||
|
U+90D8: lǚ # 郘
|
||||||
|
U+9D24: zhōng # 鴤
|
||||||
|
U+2031A: nòng # 𠌚
|
||||||
|
U+2141D: fú # 𡐝
|
||||||
|
U+21594: nuó # 𡖔
|
||||||
|
U+2199D: xiāo # 𡦝
|
||||||
|
U+21B0D: mí # 𡬍
|
||||||
|
U+21B10: yí # 𡬐
|
||||||
|
U+21B15: lóng # 𡬕
|
||||||
|
U+2243F: rǎng # 𢐿
|
||||||
|
U+2273D: kuí # 𢜽
|
||||||
|
U+22741: hōng # 𢝁
|
||||||
|
U+22892: sū # 𢢒
|
||||||
|
U+22A10: jí # 𢨐
|
||||||
|
U+245ED: xià # 𤗭
|
||||||
|
U+24704: huái # 𤜄
|
||||||
|
U+247AE: zhài # 𤞮
|
||||||
|
U+24856: yán # 𤡖
|
||||||
|
U+248B5: lài # 𤢵
|
||||||
|
U+249EB: jīn # 𤧫
|
||||||
|
U+2546B: kān # 𥑫
|
||||||
|
U+2588D: hù # 𥢍
|
||||||
|
U+2588F: diàn # 𥢏
|
||||||
|
U+25C1F: yuán # 𥰟
|
||||||
|
U+272D5: kùn # 𧋕
|
||||||
|
U+2757A: shuāng # 𧕺
|
||||||
|
U+275C8: nú # 𧗈
|
||||||
|
U+27956: lí # 𧥖
|
||||||
|
U+280A2: jí # 𨂢
|
||||||
|
U+2824B: tuō # 𨉋
|
||||||
|
U+284A8: hài # 𨒨
|
||||||
|
U+28ABF: liú # 𨪿
|
||||||
|
U+28DED: chán # 𨷭
|
||||||
|
U+28E30: jú # 𨸰
|
||||||
|
U+293CF: wéi # 𩏏
|
||||||
|
U+295F5: zhēng # 𩗵
|
||||||
|
U+29B5D: wǒ # 𩭝
|
||||||
|
U+2A048: zhuāng # 𪁈
|
||||||
|
U+2A2A2: shí # 𪊢
|
||||||
|
U+8B9D: zhán # 讝
|
||||||
|
U+3D14: jí # 㴔
|
||||||
|
U+8B26: qǐng # 謦
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,32 @@
|
|||||||
|
U+5302: yún # 匂 yún 为日本汉字读音; xiōng 为现代汉语读音;
|
||||||
|
U+4E3C: dǎn # 丼 dǎn 为日本汉字读音; jǐng 为现代汉语读音;
|
||||||
|
U+8FBB: shí # 辻
|
||||||
|
U+8FBC: rù # 込
|
||||||
|
U+51E7: jīn # 凧
|
||||||
|
U+6763: shān # 杣
|
||||||
|
U+67A0: zá # 枠
|
||||||
|
U+7551: tián # 畑
|
||||||
|
U+6803: lì # 栃
|
||||||
|
U+6802: méi # 栂
|
||||||
|
U+5CE0: kǎ # 峠
|
||||||
|
U+4FE3: yǔ # 俣
|
||||||
|
U+7C7E: rèn # 籾
|
||||||
|
U+7560: tián # 畠
|
||||||
|
U+96EB: xià # 雫
|
||||||
|
U+7B39: shì # 笹
|
||||||
|
U+5840: píng # 塀
|
||||||
|
U+6919: chāng # 椙
|
||||||
|
U+7872: yù # 硲
|
||||||
|
U+86EF: lǎo # 蛯
|
||||||
|
U+55B0: cān # 喰
|
||||||
|
U+643E: zhà # 搾
|
||||||
|
U+698A: shén # 榊
|
||||||
|
U+50CD: dòng # 働
|
||||||
|
U+7CC0: huā # 糀
|
||||||
|
U+9786: bǐng # 鞆
|
||||||
|
U+69C7: zhēn # 槇
|
||||||
|
U+6A2B: jiān # 樫
|
||||||
|
U+9D2B: tián # 鴫
|
||||||
|
U+567A: xīn # 噺
|
||||||
|
U+7C17: liáng # 簗
|
||||||
|
U+9EBF: mó # 麿
|
@ -0,0 +1,123 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import collections
|
||||||
|
|
||||||
|
|
||||||
|
def code_to_hanzi(code):
|
||||||
|
hanzi = chr(int(code.replace('U+', '0x'), 16))
|
||||||
|
return hanzi
|
||||||
|
|
||||||
|
|
||||||
|
def sort_pinyin_dict(pinyin_dict):
|
||||||
|
return collections.OrderedDict(
|
||||||
|
sorted(pinyin_dict.items(),
|
||||||
|
key=lambda item: int(item[0].replace('U+', '0x'), 16))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def remove_dup_items(lst):
|
||||||
|
new_lst = []
|
||||||
|
for item in lst:
|
||||||
|
if item not in new_lst:
|
||||||
|
new_lst.append(item)
|
||||||
|
return new_lst
|
||||||
|
|
||||||
|
|
||||||
|
def parse_pinyins(fp):
|
||||||
|
pinyin_map = {}
|
||||||
|
for line in fp:
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith('#') or not line:
|
||||||
|
continue
|
||||||
|
code, pinyin = line.split('#')[0].split(':')
|
||||||
|
pinyin = ','.join([x.strip() for x in pinyin.split() if x.strip()])
|
||||||
|
pinyin_map[code.strip()] = pinyin.split(',')
|
||||||
|
return pinyin_map
|
||||||
|
|
||||||
|
|
||||||
|
def merge(raw_pinyin_map, adjust_pinyin_map, overwrite_pinyin_map):
|
||||||
|
new_pinyin_map = {}
|
||||||
|
for code, pinyins in raw_pinyin_map.items():
|
||||||
|
if code in overwrite_pinyin_map:
|
||||||
|
pinyins = overwrite_pinyin_map[code]
|
||||||
|
elif code in adjust_pinyin_map:
|
||||||
|
pinyins = adjust_pinyin_map[code] + pinyins
|
||||||
|
new_pinyin_map[code] = remove_dup_items(pinyins)
|
||||||
|
|
||||||
|
return new_pinyin_map
|
||||||
|
|
||||||
|
|
||||||
|
def save_data(pinyin_map, writer):
|
||||||
|
for code, pinyins in pinyin_map.items():
|
||||||
|
hanzi = code_to_hanzi(code)
|
||||||
|
line = '{code}: {pinyin} # {hanzi}\n'.format(
|
||||||
|
code=code, pinyin=','.join(pinyins), hanzi=hanzi
|
||||||
|
)
|
||||||
|
writer.write(line)
|
||||||
|
|
||||||
|
|
||||||
|
def extend_pinyins(old_map, new_map, only_no_exists=False):
|
||||||
|
for code, pinyins in new_map.items():
|
||||||
|
if only_no_exists: # 只当 code 不存在时才更新
|
||||||
|
if code not in old_map:
|
||||||
|
old_map[code] = pinyins
|
||||||
|
else:
|
||||||
|
old_map.setdefault(code, []).extend(pinyins)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
raw_pinyin_map = {}
|
||||||
|
with open('kHanyuPinyin.txt') as fp:
|
||||||
|
khanyupinyin = parse_pinyins(fp)
|
||||||
|
raw_pinyin_map.update(khanyupinyin)
|
||||||
|
with open('kXHC1983.txt') as fp:
|
||||||
|
kxhc1983 = parse_pinyins(fp)
|
||||||
|
extend_pinyins(raw_pinyin_map, kxhc1983)
|
||||||
|
with open('nonCJKUI.txt') as fp:
|
||||||
|
noncjkui = parse_pinyins(fp)
|
||||||
|
extend_pinyins(raw_pinyin_map, noncjkui)
|
||||||
|
with open('kMandarin_8105.txt') as fp:
|
||||||
|
adjust_pinyin_map = parse_pinyins(fp)
|
||||||
|
extend_pinyins(raw_pinyin_map, adjust_pinyin_map)
|
||||||
|
with open('kMandarin_overwrite.txt') as fp:
|
||||||
|
_map = parse_pinyins(fp)
|
||||||
|
extend_pinyins(adjust_pinyin_map, _map)
|
||||||
|
extend_pinyins(raw_pinyin_map, adjust_pinyin_map)
|
||||||
|
with open('kMandarin.txt') as fp:
|
||||||
|
_map = parse_pinyins(fp)
|
||||||
|
extend_pinyins(adjust_pinyin_map, _map)
|
||||||
|
extend_pinyins(raw_pinyin_map, adjust_pinyin_map)
|
||||||
|
with open('kTGHZ2013.txt') as fp:
|
||||||
|
_map = parse_pinyins(fp)
|
||||||
|
extend_pinyins(adjust_pinyin_map, _map)
|
||||||
|
extend_pinyins(raw_pinyin_map, adjust_pinyin_map)
|
||||||
|
with open('kHanyuPinlu.txt') as fp:
|
||||||
|
khanyupinyinlu = parse_pinyins(fp)
|
||||||
|
extend_pinyins(adjust_pinyin_map, _map)
|
||||||
|
extend_pinyins(raw_pinyin_map, adjust_pinyin_map)
|
||||||
|
with open('GBK_PUA.txt') as fp:
|
||||||
|
pua_pinyin_map = parse_pinyins(fp)
|
||||||
|
extend_pinyins(raw_pinyin_map, pua_pinyin_map)
|
||||||
|
with open('kanji.txt') as fp:
|
||||||
|
_map = parse_pinyins(fp)
|
||||||
|
extend_pinyins(raw_pinyin_map, _map, only_no_exists=True)
|
||||||
|
|
||||||
|
with open('overwrite.txt') as fp:
|
||||||
|
overwrite_pinyin_map = parse_pinyins(fp)
|
||||||
|
extend_pinyins(raw_pinyin_map, overwrite_pinyin_map)
|
||||||
|
|
||||||
|
new_pinyin_map = merge(raw_pinyin_map, adjust_pinyin_map,
|
||||||
|
overwrite_pinyin_map)
|
||||||
|
new_pinyin_map = sort_pinyin_dict(new_pinyin_map)
|
||||||
|
|
||||||
|
assert len(new_pinyin_map) == len(raw_pinyin_map)
|
||||||
|
code_set = set(new_pinyin_map.keys())
|
||||||
|
assert set(khanyupinyin.keys()) - code_set == set()
|
||||||
|
assert set(khanyupinyinlu.keys()) - code_set == set()
|
||||||
|
assert set(kxhc1983.keys()) - code_set == set()
|
||||||
|
assert set(adjust_pinyin_map.keys()) - code_set == set()
|
||||||
|
assert set(overwrite_pinyin_map.keys()) - code_set == set()
|
||||||
|
assert set(pua_pinyin_map.keys()) - code_set == set()
|
||||||
|
with open('pinyin.txt', 'w') as fp:
|
||||||
|
fp.write('# version: 0.10.2\n')
|
||||||
|
fp.write('# source: https://github.com/mozillazg/pinyin-data\n')
|
||||||
|
save_data(new_pinyin_map, fp)
|
@ -0,0 +1,63 @@
|
|||||||
|
# 手工纠正错误的拼音数据
|
||||||
|
# 井号开头的行将会被忽略,可以用作注释
|
||||||
|
# 数据格式:{code point}: {pinyins} # {hanzi}
|
||||||
|
# 示例:
|
||||||
|
# U+4E2D: zhōng,zhòng # 中
|
||||||
|
U+5353: zhuó,zhuō # 卓
|
||||||
|
U+5565: shá,shà # 啥
|
||||||
|
U+5666: yuě,huì # 噦
|
||||||
|
U+59B3: nǐ,nǎi # 妳
|
||||||
|
U+8BB8: xǔ,hǔ # 许
|
||||||
|
U+94AD: tǒu,dǒu # 钭
|
||||||
|
U+9E00: chǔ,zhú,chù # 鸀
|
||||||
|
U+E815: yè #
|
||||||
|
U+E816: zuǒ,yǒu #
|
||||||
|
U+E81B: zhòu,zhū #
|
||||||
|
U+E81D: jié,jiē #
|
||||||
|
U+E824: zhòu #
|
||||||
|
U+E826: shǒu #
|
||||||
|
U+E82B: fēng #
|
||||||
|
U+E82C: gòng #
|
||||||
|
U+E82E: huì,kuì #
|
||||||
|
U+E830: jiān #
|
||||||
|
U+E831: ēn #
|
||||||
|
U+E832: xiǎo #
|
||||||
|
U+E834: lóu,lǘ #
|
||||||
|
U+E835: cǎn,shān,cēn #
|
||||||
|
U+E836: zhú #
|
||||||
|
U+E838: wǎng #
|
||||||
|
U+E83A: yáng,xiáng #
|
||||||
|
U+E83D: bà,bēi #
|
||||||
|
U+E83F: zhuān,zhuán,chuǎn,chún #
|
||||||
|
U+E842: kuì,huì #
|
||||||
|
U+E843: juǎn #
|
||||||
|
U+E846: qíng #
|
||||||
|
U+E84A: yé,yá #
|
||||||
|
U+E850: chuài #
|
||||||
|
U+E854: zhuó #
|
||||||
|
U+E864: luán #
|
||||||
|
U+241FE: yíng # 𤇾
|
||||||
|
U+275C8: nú # 𧗈
|
||||||
|
U+47C1: xiāo,chāo # 䟁
|
||||||
|
U+9EBF: mí # 麿
|
||||||
|
U+7C17: zhù # 簗
|
||||||
|
U+8279: cǎo # 艹
|
||||||
|
U+88CF: lǐ # 裏
|
||||||
|
U+88E1: lǐ # 裡
|
||||||
|
U+5206: fēn,fèn,fén # 分
|
||||||
|
U+208E1: fèng # 𠣡
|
||||||
|
U+2589F: hù # 𥢟
|
||||||
|
U+258F9: ràn # 𥣹
|
||||||
|
U+287B3: qú # 𨞳
|
||||||
|
U+2A008: yuān # 𪀈
|
||||||
|
U+9EFE: mǐn,miǎn,měng # 黾
|
||||||
|
U+55A3: xǔ # 喣
|
||||||
|
U+529A: zhú # 劚
|
||||||
|
U+532E: kuì,guì # 匮
|
||||||
|
U+9400: kuì,guì # 鐀
|
||||||
|
U+87AB: shì,zhē # 螫
|
||||||
|
U+5C82: qǐ,kǎi # 岂
|
||||||
|
U+534E: huá,huà,huā # 华
|
||||||
|
U+5455: ǒu,ōu,òu # 呕
|
||||||
|
U+4ECE: cóng,zòng # 从
|
||||||
|
U+513F: ér,er,rén # 儿
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,48 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""生成初始的 kMandarin_8105.txt"""
|
||||||
|
from merge_unihan import parse_pinyins, code_to_hanzi
|
||||||
|
|
||||||
|
|
||||||
|
def parse_china_x():
|
||||||
|
with open('tools/china-8105-06062014.txt') as fp:
|
||||||
|
for line in fp:
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith('#') or not line:
|
||||||
|
continue
|
||||||
|
yield line.split()[0]
|
||||||
|
|
||||||
|
|
||||||
|
def parse_zdic():
|
||||||
|
with open('zdic.txt') as fp:
|
||||||
|
return parse_pinyins(fp)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_kmandain():
|
||||||
|
with open('pinyin.txt') as fp:
|
||||||
|
return parse_pinyins(fp)
|
||||||
|
|
||||||
|
|
||||||
|
def diff(kmandarin, zdic, commons):
|
||||||
|
for key in commons:
|
||||||
|
hanzi = code_to_hanzi(key)
|
||||||
|
if key in kmandarin:
|
||||||
|
value = kmandarin[key][0]
|
||||||
|
if key in zdic and value != zdic[key][0]:
|
||||||
|
yield '{0}: {1} # {2} -> {3}'.format(
|
||||||
|
key, value, hanzi, zdic[key][0]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
yield '{0}: {1} # {2}'.format(key, value, hanzi)
|
||||||
|
elif key in zdic:
|
||||||
|
value = zdic[key][0]
|
||||||
|
yield '{0}: {1} # {2}'.format(key, value, hanzi)
|
||||||
|
else:
|
||||||
|
yield '# {0}: {1} # {2}'.format(key, '<-', hanzi)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
zdic = parse_zdic()
|
||||||
|
kmandarin = parse_kmandain()
|
||||||
|
commons = parse_china_x()
|
||||||
|
lst = diff(kmandarin, zdic, commons)
|
||||||
|
for x in lst:
|
||||||
|
print(x)
|
@ -0,0 +1,2 @@
|
|||||||
|
pyquery==1.2.13
|
||||||
|
requests==2.20.0
|
@ -0,0 +1 @@
|
|||||||
|
Unihan*
|
@ -0,0 +1,20 @@
|
|||||||
|
.PHONY: help
|
||||||
|
help:
|
||||||
|
@echo "parse parse Unihan database "
|
||||||
|
@echo "update update Unihan database"
|
||||||
|
@echo "diff diff between Unihan data and parsed data"
|
||||||
|
|
||||||
|
.PHONY:parse
|
||||||
|
parse:
|
||||||
|
@python parse_pinyin.py
|
||||||
|
|
||||||
|
.PHONY:update
|
||||||
|
update:
|
||||||
|
-rm Unihan*
|
||||||
|
wget ftp://ftp.unicode.org/Public/UNIDATA/Unihan.zip -O Unihan.zip
|
||||||
|
unzip Unihan.zip
|
||||||
|
python parse_pinyin.py
|
||||||
|
|
||||||
|
.PHONY:diff
|
||||||
|
diff:
|
||||||
|
@bash diff.sh
|
@ -0,0 +1,9 @@
|
|||||||
|
# Unihan Database
|
||||||
|
|
||||||
|
http://www.unicode.org/charts/unihan.html
|
||||||
|
|
||||||
|
Update Unihan databse:
|
||||||
|
|
||||||
|
```
|
||||||
|
make update
|
||||||
|
```
|
@ -0,0 +1,14 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
IFS=$'\n\t'
|
||||||
|
|
||||||
|
function main() {
|
||||||
|
printf '%-14s %-8s %-8s\n' '' 'parsed' 'Unihan'
|
||||||
|
for kind in 'kHanyuPinyin' 'kMandarin' 'kHanyuPinlu' 'kXHC1983'
|
||||||
|
do
|
||||||
|
unihanCount=$(less Unihan_Readings.txt |grep -v '^#' |grep -c "$kind")
|
||||||
|
parsedCount=$(less "$kind".txt | grep -c "")
|
||||||
|
printf '%-14s %-8s %-8s\n' "$kind" "$parsedCount" "$unihanCount"
|
||||||
|
done
|
||||||
|
}
|
||||||
|
main
|
@ -0,0 +1 @@
|
|||||||
|
../kHanyuPinlu.txt
|
@ -0,0 +1 @@
|
|||||||
|
../kHanyuPinyin.txt
|
@ -0,0 +1 @@
|
|||||||
|
../kMandarin.txt
|
@ -0,0 +1 @@
|
|||||||
|
../kTGHZ2013.txt
|
@ -0,0 +1 @@
|
|||||||
|
../kXHC1983.txt
|
@ -0,0 +1,102 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import functools
|
||||||
|
import operator
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
def re_match_pinyin_line(kind):
|
||||||
|
return re.compile(
|
||||||
|
r'^U\+(?P<code>[0-9A-Z]+)\t{}\t(?P<pinyin>.+)$'.format(kind)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
PINYIN = r'[^\d\.,]+'
|
||||||
|
re_khanyupinyin = re.compile(r'''
|
||||||
|
(?:\d{5}\.\d{2}0,)*\d{5}\.\d{2}0:
|
||||||
|
((?:%(pinyin)s,)*)
|
||||||
|
(%(pinyin)s)
|
||||||
|
''' % ({'pinyin': PINYIN}), re.X)
|
||||||
|
re_kmandarin = re.compile(r'''
|
||||||
|
()()
|
||||||
|
({pinyin})
|
||||||
|
'''.format(pinyin=PINYIN), re.X)
|
||||||
|
re_kxhc1983 = re.compile(r'''
|
||||||
|
()()[0-9]{4}\.[0-9]{3}\*?
|
||||||
|
(?:,[0-9]{4}\.[0-9]{3}\*?)*:
|
||||||
|
(%(pinyin)s)
|
||||||
|
''' % ({'pinyin': PINYIN}), re.X)
|
||||||
|
re_khanyupinlu = re.compile(r'''
|
||||||
|
()()({pinyin})\([0-9]+\)
|
||||||
|
'''.format(pinyin=PINYIN), re.X)
|
||||||
|
re_ktghz2013 = re.compile(r'''
|
||||||
|
()()[0-9]{3}\.[0-9]{3}
|
||||||
|
(?:,[0-9]{3}\.[0-9]{3})*:
|
||||||
|
(%(pinyin)s)
|
||||||
|
''' % ({'pinyin': PINYIN}), re.X)
|
||||||
|
re_kinds_map = {
|
||||||
|
'kHanyuPinyin': re_khanyupinyin,
|
||||||
|
'kMandarin': re_kmandarin,
|
||||||
|
'kXHC1983': re_kxhc1983,
|
||||||
|
'kHanyuPinlu': re_khanyupinlu,
|
||||||
|
'kTGHZ2013': re_ktghz2013,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def remove_dup_items(lst):
|
||||||
|
new_list = []
|
||||||
|
for item in lst:
|
||||||
|
if item not in new_list:
|
||||||
|
new_list.append(item)
|
||||||
|
return new_list
|
||||||
|
|
||||||
|
|
||||||
|
def parse(lines, kind='kHanyuPinyin', ignore_prefix='#') -> str:
|
||||||
|
re_line = re_match_pinyin_line(kind)
|
||||||
|
re_pinyin = re_kinds_map[kind]
|
||||||
|
for line in lines:
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith(ignore_prefix):
|
||||||
|
continue
|
||||||
|
match = re_line.match(line)
|
||||||
|
if match is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
code = match.group('code')
|
||||||
|
raw_pinyin = match.group('pinyin')
|
||||||
|
raw_pinyins = re_pinyin.findall(raw_pinyin)
|
||||||
|
# 处理有三个或三个以上拼音的情况,此时 raw_pinyins 类似
|
||||||
|
# [(' xī,', 'lǔ '), (' lǔ,', 'xī')] or [('shú,dú,', 'tù')]
|
||||||
|
for n, values in enumerate(raw_pinyins):
|
||||||
|
value = []
|
||||||
|
for v in values:
|
||||||
|
value.extend(v.split(','))
|
||||||
|
raw_pinyins[n] = value
|
||||||
|
|
||||||
|
pinyins = functools.reduce(
|
||||||
|
operator.add, raw_pinyins
|
||||||
|
)
|
||||||
|
pinyins = [x.strip() for x in pinyins if x.strip()]
|
||||||
|
pinyins = remove_dup_items(pinyins)
|
||||||
|
pinyin = ','.join(pinyins)
|
||||||
|
yield code, pinyin
|
||||||
|
|
||||||
|
|
||||||
|
def save_data(pinyins, writer):
|
||||||
|
for code, pinyin in pinyins:
|
||||||
|
gl = {}
|
||||||
|
exec('hanzi=chr(0x{})'.format(code), gl)
|
||||||
|
hanzi = gl['hanzi']
|
||||||
|
line = 'U+{code}: {pinyin} # {hanzi}\n'.format(
|
||||||
|
code=code, pinyin=pinyin, hanzi=hanzi
|
||||||
|
)
|
||||||
|
writer.write(line)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
with open('Unihan_Readings.txt') as fp:
|
||||||
|
for kind in ('kHanyuPinyin', 'kMandarin',
|
||||||
|
'kHanyuPinlu', 'kXHC1983', 'kTGHZ2013'):
|
||||||
|
fp.seek(0)
|
||||||
|
with open('{}.txt'.format(kind), 'w') as writer:
|
||||||
|
pinyins = parse(fp.readlines(), kind=kind)
|
||||||
|
save_data(pinyins, writer)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,52 @@
|
|||||||
|
"""汉字拼音转换工具."""
|
||||||
|
from pypinyin.constants import BOPOMOFO
|
||||||
|
from pypinyin.constants import BOPOMOFO_FIRST
|
||||||
|
from pypinyin.constants import CYRILLIC
|
||||||
|
from pypinyin.constants import CYRILLIC_FIRST
|
||||||
|
from pypinyin.constants import FINALS
|
||||||
|
from pypinyin.constants import FINALS_TONE
|
||||||
|
from pypinyin.constants import FINALS_TONE2
|
||||||
|
from pypinyin.constants import FINALS_TONE3
|
||||||
|
from pypinyin.constants import FIRST_LETTER
|
||||||
|
from pypinyin.constants import INITIALS
|
||||||
|
from pypinyin.constants import NORMAL
|
||||||
|
from pypinyin.constants import Style
|
||||||
|
from pypinyin.constants import STYLE_BOPOMOFO
|
||||||
|
from pypinyin.constants import STYLE_BOPOMOFO_FIRST
|
||||||
|
from pypinyin.constants import STYLE_CYRILLIC
|
||||||
|
from pypinyin.constants import STYLE_CYRILLIC_FIRST
|
||||||
|
from pypinyin.constants import STYLE_FINALS
|
||||||
|
from pypinyin.constants import STYLE_FINALS_TONE
|
||||||
|
from pypinyin.constants import STYLE_FINALS_TONE2
|
||||||
|
from pypinyin.constants import STYLE_FINALS_TONE3
|
||||||
|
from pypinyin.constants import STYLE_FIRST_LETTER
|
||||||
|
from pypinyin.constants import STYLE_INITIALS
|
||||||
|
from pypinyin.constants import STYLE_NORMAL
|
||||||
|
from pypinyin.constants import STYLE_TONE
|
||||||
|
from pypinyin.constants import STYLE_TONE2
|
||||||
|
from pypinyin.constants import STYLE_TONE3
|
||||||
|
from pypinyin.constants import TONE
|
||||||
|
from pypinyin.constants import TONE2
|
||||||
|
from pypinyin.constants import TONE3
|
||||||
|
from pypinyin.core import lazy_pinyin
|
||||||
|
from pypinyin.core import load_phrases_dict
|
||||||
|
from pypinyin.core import load_single_dict
|
||||||
|
from pypinyin.core import pinyin
|
||||||
|
from pypinyin.core import slug
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
'pinyin', 'lazy_pinyin', 'slug', 'load_single_dict', 'load_phrases_dict',
|
||||||
|
'Style', 'STYLE_NORMAL', 'NORMAL', 'STYLE_TONE', 'TONE', 'STYLE_TONE2',
|
||||||
|
'TONE2', 'STYLE_TONE3', 'TONE3', 'STYLE_INITIALS', 'INITIALS',
|
||||||
|
'STYLE_FINALS', 'FINALS', 'STYLE_FINALS_TONE', 'FINALS_TONE',
|
||||||
|
'STYLE_FINALS_TONE2', 'FINALS_TONE2', 'STYLE_FINALS_TONE3', 'FINALS_TONE3',
|
||||||
|
'STYLE_FIRST_LETTER', 'FIRST_LETTER', 'STYLE_BOPOMOFO', 'BOPOMOFO',
|
||||||
|
'STYLE_BOPOMOFO_FIRST', 'BOPOMOFO_FIRST', 'STYLE_CYRILLIC', 'CYRILLIC',
|
||||||
|
'STYLE_CYRILLIC_FIRST', 'CYRILLIC_FIRST'
|
||||||
|
]
|
||||||
|
|
||||||
|
__title__ = 'pypinyin'
|
||||||
|
__version__ = '0.41.0'
|
||||||
|
__license__ = 'MIT'
|
||||||
|
__author__ = 'Hui Zhang'
|
||||||
|
__copyright__ = 'Copyright (c) 2021 Hui Zhang'
|
@ -0,0 +1,5 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
from pypinyin.runner import main
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -0,0 +1,68 @@
|
|||||||
|
import re
|
||||||
|
from typing import Any
|
||||||
|
from typing import Optional
|
||||||
|
from typing import Text
|
||||||
|
from typing import Tuple
|
||||||
|
|
||||||
|
from pypinyin import Style
|
||||||
|
from pypinyin.contrib._tone_rule import right_mark_index
|
||||||
|
|
||||||
|
_re_number = re.compile(r'\d')
|
||||||
|
|
||||||
|
|
||||||
|
class NeutralToneWith5Mixin():
|
||||||
|
"""声调使用数字表示的相关拼音风格下的结果使用 5 标识轻声。
|
||||||
|
使用方法::
|
||||||
|
from pypinyin import lazy_pinyin, Style
|
||||||
|
from pypinyin.contrib.neutral_tone import NeutralToneWith5Mixin
|
||||||
|
from pypinyin.converter import DefaultConverter
|
||||||
|
from pypinyin.core import Pinyin
|
||||||
|
# 原来的结果中不会标识轻声
|
||||||
|
print(lazy_pinyin('好了', style=Style.TONE2))
|
||||||
|
# 输出: ['ha3o', 'le']
|
||||||
|
class MyConverter(NeutralToneWith5Mixin, DefaultConverter):
|
||||||
|
pass
|
||||||
|
my_pinyin = Pinyin(MyConverter())
|
||||||
|
pinyin = my_pinyin.pinyin
|
||||||
|
lazy_pinyin = my_pinyin.lazy_pinyin
|
||||||
|
# 新的结果中使用 ``5`` 标识轻声
|
||||||
|
print(lazy_pinyin('好了', style=Style.TONE2))
|
||||||
|
# 输出: ['ha3o', 'le5']
|
||||||
|
print(pinyin('好了', style=Style.TONE2))
|
||||||
|
# 输出:[['ha3o'], ['le5']]
|
||||||
|
"""
|
||||||
|
|
||||||
|
NUMBER_TONE = (Style.TONE2, Style.TONE3, Style.FINALS_TONE2,
|
||||||
|
Style.FINALS_TONE3) # type: Tuple[Style]
|
||||||
|
NUMBER_AT_END = (Style.TONE3, Style.FINALS_TONE3) # type: Tuple[Style]
|
||||||
|
|
||||||
|
def post_convert_style(self,
|
||||||
|
han: Text,
|
||||||
|
orig_pinyin: Text,
|
||||||
|
converted_pinyin: Text,
|
||||||
|
style: Style,
|
||||||
|
strict: bool,
|
||||||
|
**kwargs: Any) -> Optional[Text]:
|
||||||
|
|
||||||
|
pre_data = super().post_convert_style(
|
||||||
|
han, orig_pinyin, converted_pinyin, style, strict, **kwargs)
|
||||||
|
|
||||||
|
if style not in self.NUMBER_TONE:
|
||||||
|
return pre_data
|
||||||
|
|
||||||
|
if pre_data is not None:
|
||||||
|
converted_pinyin = pre_data
|
||||||
|
|
||||||
|
# 有声调,跳过
|
||||||
|
if _re_number.search(converted_pinyin):
|
||||||
|
return converted_pinyin
|
||||||
|
|
||||||
|
if style in self.NUMBER_AT_END:
|
||||||
|
return '{}5'.format(converted_pinyin)
|
||||||
|
|
||||||
|
# 找到应该在哪个字母上标声调
|
||||||
|
mark_index = right_mark_index(converted_pinyin)
|
||||||
|
before = converted_pinyin[:mark_index + 1]
|
||||||
|
after = converted_pinyin[mark_index + 1:]
|
||||||
|
|
||||||
|
return '{}5{}'.format(before, after)
|
@ -0,0 +1,341 @@
|
|||||||
|
import re
|
||||||
|
from typing import Optional
|
||||||
|
from typing import Text
|
||||||
|
|
||||||
|
from pypinyin.contrib._tone_rule import right_mark_index
|
||||||
|
from pypinyin.style._constants import RE_TONE3
|
||||||
|
from pypinyin.style.tone import converter
|
||||||
|
from pypinyin.utils import _replace_tone2_style_dict_to_default
|
||||||
|
|
||||||
|
_re_number = re.compile(r'\d')
|
||||||
|
|
||||||
|
|
||||||
|
def _v_to_u(pinyin: Text, replace: bool=False) -> Text:
|
||||||
|
"""replace v to u
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pinyin (Text): pinyin
|
||||||
|
replace (bool, optional): True, v to u; False, v as it is. Defaults to False.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Text: new pinyin
|
||||||
|
"""
|
||||||
|
if not replace:
|
||||||
|
return pinyin
|
||||||
|
return pinyin.replace('v', 'ü')
|
||||||
|
|
||||||
|
|
||||||
|
def _fix_v_u(origin_py: Text, new_py: Text, v_to_u: bool) -> Text:
|
||||||
|
""" fix v u
|
||||||
|
|
||||||
|
Args:
|
||||||
|
origin_py (Text): origin pinyin
|
||||||
|
new_py (Text): new pinyin
|
||||||
|
v_to_u (bool): True, replace v to u; False, v as it is.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Text:
|
||||||
|
"""
|
||||||
|
if not v_to_u:
|
||||||
|
if 'ü' in new_py and 'ü' not in origin_py:
|
||||||
|
return new_py.replace('ü', 'v')
|
||||||
|
|
||||||
|
return _v_to_u(new_py, replace=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_number_from_pinyin(pinyin: Text) -> Optional[int]:
|
||||||
|
"""get tone number
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pinyin (Text): [description]
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Optional[int]: int or None
|
||||||
|
"""
|
||||||
|
numbers = _re_number.findall(pinyin)
|
||||||
|
if numbers:
|
||||||
|
number = numbers[0]
|
||||||
|
else:
|
||||||
|
number = None
|
||||||
|
return number
|
||||||
|
|
||||||
|
|
||||||
|
def _improve_tone3(tone3: Text, neutral_tone_with_5: bool=False) -> Text:
|
||||||
|
"""neutral tone with 5 number if need.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tone3 (Text): [description]
|
||||||
|
neutral_tone_with_5 (bool, optional): True, neutral tone with 5 number. Defaults to False.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Text: [description]
|
||||||
|
"""
|
||||||
|
number = _get_number_from_pinyin(tone3)
|
||||||
|
if number is None and neutral_tone_with_5:
|
||||||
|
tone3 = '{}5'.format(tone3)
|
||||||
|
return tone3
|
||||||
|
|
||||||
|
|
||||||
|
def tone_to_tone3(tone: Text,
|
||||||
|
v_to_u: bool=False,
|
||||||
|
neutral_tone_with_5: bool=False) -> Text:
|
||||||
|
"""将 :py:attr:`~pypinyin.Style.TONE` 风格的拼音转换为
|
||||||
|
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音
|
||||||
|
:param tone: :py:attr:`~pypinyin.Style.TONE` 风格的拼音
|
||||||
|
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
|
||||||
|
:param neutral_tone_with_5: 是否使用 ``5`` 标识轻声
|
||||||
|
:return: :py:attr:`~pypinyin.Style.TONE3` 风格的拼音
|
||||||
|
Usage::
|
||||||
|
>>> from pypinyin.contrib.tone_convert import tone_to_tone3
|
||||||
|
>>> tone_to_tone3('zhōng')
|
||||||
|
'zhong1'
|
||||||
|
>>> tone_to_tone3('shang', neutral_tone_with_5=True)
|
||||||
|
'shang5'
|
||||||
|
>>> tone_to_tone3('lüè', v_to_u=True)
|
||||||
|
'lüe4'
|
||||||
|
"""
|
||||||
|
tone3 = converter.to_tone3(tone)
|
||||||
|
s = _improve_tone3(tone3, neutral_tone_with_5=neutral_tone_with_5)
|
||||||
|
return _v_to_u(s, v_to_u)
|
||||||
|
|
||||||
|
|
||||||
|
def tone_to_tone2(tone: Text,
|
||||||
|
v_to_u: bool=False,
|
||||||
|
neutral_tone_with_5: bool=False) -> Text:
|
||||||
|
"""将 :py:attr:`~pypinyin.Style.TONE` 风格的拼音转换为
|
||||||
|
:py:attr:`~pypinyin.Style.TONE2` 风格的拼音
|
||||||
|
:param tone: :py:attr:`~pypinyin.Style.TONE` 风格的拼音
|
||||||
|
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
|
||||||
|
:param neutral_tone_with_5: 是否使用 ``5`` 标识轻声
|
||||||
|
:return: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
|
||||||
|
Usage::
|
||||||
|
>>> from pypinyin.contrib.tone_convert import tone_to_tone2
|
||||||
|
>>> tone_to_tone2('zhōng')
|
||||||
|
'zho1ng'
|
||||||
|
>>> tone_to_tone2('shang', neutral_tone_with_5=True)
|
||||||
|
'sha5ng'
|
||||||
|
>>> tone_to_tone2('lüè', v_to_u=True)
|
||||||
|
'lüe4'
|
||||||
|
"""
|
||||||
|
tone3 = tone_to_tone3(
|
||||||
|
tone, v_to_u=v_to_u, neutral_tone_with_5=neutral_tone_with_5)
|
||||||
|
s = tone3_to_tone2(tone3)
|
||||||
|
return _v_to_u(s, v_to_u)
|
||||||
|
|
||||||
|
|
||||||
|
def tone_to_normal(tone: Text, v_to_u: bool=False) -> Text:
|
||||||
|
"""将 :py:attr:`~pypinyin.Style.TONE` 风格的拼音转换为
|
||||||
|
:py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
|
||||||
|
:param tone: :py:attr:`~pypinyin.Style.TONE` 风格的拼音
|
||||||
|
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
|
||||||
|
:return: :py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
|
||||||
|
Usage::
|
||||||
|
>>> from pypinyin.contrib.tone_convert import tone_to_normal
|
||||||
|
>>> tone_to_normal('zhōng')
|
||||||
|
'zhong'
|
||||||
|
>>> tone_to_normal('lüè', v_to_u=True)
|
||||||
|
'lüe'
|
||||||
|
"""
|
||||||
|
s = tone_to_tone2(tone, v_to_u=v_to_u)
|
||||||
|
s = _re_number.sub('', s)
|
||||||
|
return _v_to_u(s, v_to_u)
|
||||||
|
|
||||||
|
|
||||||
|
def tone2_to_normal(tone2: Text, v_to_u: bool=False) -> Text:
|
||||||
|
"""将 :py:attr:`~pypinyin.Style.TONE2` 风格的拼音转换为
|
||||||
|
:py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
|
||||||
|
:param tone2: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
|
||||||
|
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
|
||||||
|
:return: Style.NORMAL 风格的拼音
|
||||||
|
Usage::
|
||||||
|
>>> from pypinyin.contrib.tone_convert import tone2_to_normal
|
||||||
|
>>> tone2_to_normal('zho1ng')
|
||||||
|
'zhong'
|
||||||
|
>>> tone2_to_normal('lüe4', v_to_u=True)
|
||||||
|
'lüe'
|
||||||
|
"""
|
||||||
|
s = _re_number.sub('', tone2)
|
||||||
|
return _v_to_u(s, v_to_u)
|
||||||
|
|
||||||
|
|
||||||
|
def tone2_to_tone(tone2: Text) -> Text:
|
||||||
|
"""将 :py:attr:`~pypinyin.Style.TONE2` 风格的拼音转换为
|
||||||
|
:py:attr:`~pypinyin.Style.TONE` 风格的拼音
|
||||||
|
:param tone2: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
|
||||||
|
:return: Style.TONE 风格的拼音
|
||||||
|
Usage::
|
||||||
|
>>> from pypinyin.contrib.tone_convert import tone2_to_tone
|
||||||
|
>>> tone2_to_tone('zho1ng')
|
||||||
|
'zhōng'
|
||||||
|
"""
|
||||||
|
return _replace_tone2_style_dict_to_default(tone2)
|
||||||
|
|
||||||
|
|
||||||
|
def tone2_to_tone3(tone2: Text) -> Text:
|
||||||
|
"""将 :py:attr:`~pypinyin.Style.TONE2` 风格的拼音转换为
|
||||||
|
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音
|
||||||
|
:param tone2: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
|
||||||
|
:return: :py:attr:`~pypinyin.Style.TONE3` 风格的拼音
|
||||||
|
Usage::
|
||||||
|
>>> from pypinyin.contrib.tone_convert import tone2_to_tone3
|
||||||
|
>>> tone2_to_tone3('zho1ng')
|
||||||
|
'zhong1'
|
||||||
|
"""
|
||||||
|
tone3 = RE_TONE3.sub(r'\1\3\2', tone2)
|
||||||
|
return tone3
|
||||||
|
|
||||||
|
|
||||||
|
def tone3_to_normal(tone3: Text, v_to_u: bool=False) -> Text:
|
||||||
|
"""将 :py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为
|
||||||
|
:py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
|
||||||
|
:param tone3: :py:attr:`~pypinyin.Style.TONE3` 风格的拼音
|
||||||
|
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
|
||||||
|
:return: :py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
|
||||||
|
Usage::
|
||||||
|
>>> from pypinyin.contrib.tone_convert import tone3_to_normal
|
||||||
|
>>> tone3_to_normal('zhong1')
|
||||||
|
'zhong'
|
||||||
|
>>> tone3_to_normal('lüe4', v_to_u=True)
|
||||||
|
'lüe'
|
||||||
|
"""
|
||||||
|
s = _re_number.sub('', tone3)
|
||||||
|
return _v_to_u(s, v_to_u)
|
||||||
|
|
||||||
|
|
||||||
|
def tone3_to_tone(tone3: Text) -> Text:
|
||||||
|
"""将 :py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为
|
||||||
|
:py:attr:`~pypinyin.Style.TONE` 风格的拼音
|
||||||
|
:param tone3: :py:attr:`~pypinyin.Style.TONE3` 风格的拼音
|
||||||
|
:return: :py:attr:`~pypinyin.Style.TONE` 风格的拼音
|
||||||
|
Usage::
|
||||||
|
>>> from pypinyin.contrib.tone_convert import tone3_to_tone
|
||||||
|
>>> tone3_to_tone('zhong1')
|
||||||
|
'zhōng'
|
||||||
|
"""
|
||||||
|
tone2 = tone3_to_tone2(tone3)
|
||||||
|
return tone2_to_tone(tone2)
|
||||||
|
|
||||||
|
|
||||||
|
def tone3_to_tone2(tone3: Text) -> Text:
|
||||||
|
"""将 :py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为
|
||||||
|
:py:attr:`~pypinyin.Style.TONE2` 风格的拼音
|
||||||
|
:param tone3: :py:attr:`~pypinyin.Style.TONE3` 风格的拼音
|
||||||
|
:return: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
|
||||||
|
Usage::
|
||||||
|
>>> from pypinyin.contrib.tone_convert import tone3_to_tone2
|
||||||
|
>>> tone3_to_tone2('zhong1')
|
||||||
|
'zho1ng'
|
||||||
|
"""
|
||||||
|
no_number_tone3 = tone3_to_normal(tone3)
|
||||||
|
mark_index = right_mark_index(no_number_tone3)
|
||||||
|
if mark_index is None:
|
||||||
|
mark_index = len(no_number_tone3) - 1
|
||||||
|
before = no_number_tone3[:mark_index + 1]
|
||||||
|
after = no_number_tone3[mark_index + 1:]
|
||||||
|
|
||||||
|
number = _get_number_from_pinyin(tone3)
|
||||||
|
if number is None:
|
||||||
|
return tone3
|
||||||
|
|
||||||
|
return '{}{}{}'.format(before, number, after)
|
||||||
|
|
||||||
|
|
||||||
|
def to_normal(pinyin: Text, v_to_u: bool=False) -> Text:
|
||||||
|
"""将 :py:attr:`~pypinyin.Style.TONE`、
|
||||||
|
:py:attr:`~pypinyin.Style.TONE2` 或
|
||||||
|
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为
|
||||||
|
:py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
|
||||||
|
:param pinyin: :py:attr:`~pypinyin.Style.TONE`、
|
||||||
|
:py:attr:`~pypinyin.Style.TONE2` 或
|
||||||
|
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音
|
||||||
|
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``. True, v to u; False, v as it is.
|
||||||
|
:return: :py:attr:`~pypinyin.Style.NORMAL` 风格的拼音
|
||||||
|
Usage::
|
||||||
|
>>> from pypinyin.contrib.tone_convert import to_normal
|
||||||
|
>>> to_normal('zhōng')
|
||||||
|
'zhong'
|
||||||
|
>>> to_normal('zho1ng')
|
||||||
|
'zhong'
|
||||||
|
>>> to_normal('zhong1')
|
||||||
|
'zhong'
|
||||||
|
>>> to_normal('lüè', v_to_u=True)
|
||||||
|
'lüe'
|
||||||
|
"""
|
||||||
|
s = tone_to_tone2(pinyin, v_to_u=True)
|
||||||
|
s = tone2_to_normal(s)
|
||||||
|
return _fix_v_u(pinyin, s, v_to_u)
|
||||||
|
|
||||||
|
|
||||||
|
def to_tone(pinyin: Text) -> Text:
|
||||||
|
"""将 :py:attr:`~pypinyin.Style.TONE2` 或
|
||||||
|
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为
|
||||||
|
:py:attr:`~pypinyin.Style.TONE` 风格的拼音
|
||||||
|
:param pinyin: :py:attr:`~pypinyin.Style.TONE2` 或
|
||||||
|
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音
|
||||||
|
:return: :py:attr:`~pypinyin.Style.TONE` 风格的拼音
|
||||||
|
Usage::
|
||||||
|
>>> from pypinyin.contrib.tone_convert import to_tone
|
||||||
|
>>> to_tone('zho1ng')
|
||||||
|
'zhōng'
|
||||||
|
>>> to_tone('zhong1')
|
||||||
|
'zhōng'
|
||||||
|
"""
|
||||||
|
if not _re_number.search(pinyin):
|
||||||
|
return pinyin
|
||||||
|
|
||||||
|
s = tone_to_tone2(pinyin)
|
||||||
|
s = tone2_to_tone(s)
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
def to_tone2(pinyin: Text, v_to_u: bool=False,
|
||||||
|
neutral_tone_with_5: bool=False) -> Text:
|
||||||
|
"""将 :py:attr:`~pypinyin.Style.TONE` 或
|
||||||
|
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音转换为
|
||||||
|
:py:attr:`~pypinyin.Style.TONE2` 风格的拼音
|
||||||
|
:param pinyin: :py:attr:`~pypinyin.Style.TONE` 或
|
||||||
|
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音
|
||||||
|
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
|
||||||
|
:param neutral_tone_with_5: 是否使用 ``5`` 标识轻声
|
||||||
|
:return: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
|
||||||
|
Usage::
|
||||||
|
>>> from pypinyin.contrib.tone_convert import to_tone2
|
||||||
|
>>> to_tone2('zhōng')
|
||||||
|
'zho1ng'
|
||||||
|
>>> to_tone2('zhong1')
|
||||||
|
'zho1ng'
|
||||||
|
>>> to_tone2('shang', neutral_tone_with_5=True)
|
||||||
|
'sha5ng'
|
||||||
|
>>> to_tone2('lüè', v_to_u=True)
|
||||||
|
'lüe4'
|
||||||
|
"""
|
||||||
|
s = tone_to_tone3(
|
||||||
|
pinyin, v_to_u=True, neutral_tone_with_5=neutral_tone_with_5)
|
||||||
|
s = tone3_to_tone2(s)
|
||||||
|
return _fix_v_u(pinyin, s, v_to_u)
|
||||||
|
|
||||||
|
|
||||||
|
def to_tone3(pinyin: Text, v_to_u: bool=False, neutral_tone_with_5: bool=False):
|
||||||
|
"""将 :py:attr:`~pypinyin.Style.TONE` 或
|
||||||
|
:py:attr:`~pypinyin.Style.TONE2` 风格的拼音转换为
|
||||||
|
:py:attr:`~pypinyin.Style.TONE3` 风格的拼音
|
||||||
|
:param pinyin: :py:attr:`~pypinyin.Style.TONE` 或
|
||||||
|
:py:attr:`~pypinyin.Style.TONE2` 风格的拼音
|
||||||
|
:param v_to_u: 是否使用 ``ü`` 代替原来的 ``v``
|
||||||
|
:param neutral_tone_with_5: 是否使用 ``5`` 标识轻声
|
||||||
|
:return: :py:attr:`~pypinyin.Style.TONE2` 风格的拼音
|
||||||
|
Usage::
|
||||||
|
>>> from pypinyin.contrib.tone_convert import to_tone3
|
||||||
|
>>> to_tone3('zhōng')
|
||||||
|
'zhong1'
|
||||||
|
>>> to_tone3('zho1ng')
|
||||||
|
'zhong1'
|
||||||
|
>>> to_tone3('shang', neutral_tone_with_5=True)
|
||||||
|
'shang5'
|
||||||
|
>>> to_tone3('lüè', v_to_u=True)
|
||||||
|
'lüe4'
|
||||||
|
"""
|
||||||
|
s = tone_to_tone2(
|
||||||
|
pinyin, v_to_u=True, neutral_tone_with_5=neutral_tone_with_5)
|
||||||
|
s = tone2_to_tone3(s)
|
||||||
|
return _fix_v_u(pinyin, s, v_to_u)
|
@ -0,0 +1,44 @@
|
|||||||
|
from typing import Any
|
||||||
|
from typing import Optional
|
||||||
|
from typing import Text
|
||||||
|
|
||||||
|
from pypinyin.constants import Style
|
||||||
|
|
||||||
|
|
||||||
|
class V2UMixin():
|
||||||
|
"""无声调相关拼音风格下的结果使用 ``ü`` 代替原来的 ``v``
|
||||||
|
使用方法::
|
||||||
|
from pypinyin import lazy_pinyin, Style
|
||||||
|
from pypinyin.contrib.uv import V2UMixin
|
||||||
|
from pypinyin.converter import DefaultConverter
|
||||||
|
from pypinyin.core import Pinyin
|
||||||
|
# 原来的结果中会使用 ``v`` 表示 ``ü``
|
||||||
|
print(lazy_pinyin('战略'))
|
||||||
|
# 输出:['zhan', 'lve']
|
||||||
|
class MyConverter(V2UMixin, DefaultConverter):
|
||||||
|
pass
|
||||||
|
my_pinyin = Pinyin(MyConverter())
|
||||||
|
pinyin = my_pinyin.pinyin
|
||||||
|
lazy_pinyin = my_pinyin.lazy_pinyin
|
||||||
|
# 新的结果中使用 ``ü`` 代替原来的 ``v``
|
||||||
|
print(lazy_pinyin('战略'))
|
||||||
|
# 输出: ['zhan', 'lüe']
|
||||||
|
print(pinyin('战略', style=Style.NORMAL))
|
||||||
|
# 输出:[['zhan'], ['lüe']]
|
||||||
|
"""
|
||||||
|
|
||||||
|
def post_convert_style(self,
|
||||||
|
han: Text,
|
||||||
|
orig_pinyin: Text,
|
||||||
|
converted_pinyin: Text,
|
||||||
|
style: Style,
|
||||||
|
strict: bool,
|
||||||
|
**kwargs: Any) -> Optional[Text]:
|
||||||
|
|
||||||
|
pre_data = super().post_convert_style(
|
||||||
|
han, orig_pinyin, converted_pinyin, style, strict, **kwargs)
|
||||||
|
|
||||||
|
if pre_data is not None:
|
||||||
|
converted_pinyin = pre_data
|
||||||
|
|
||||||
|
return converted_pinyin.replace('v', 'ü')
|
@ -0,0 +1,41 @@
|
|||||||
|
# 带声调字符。
|
||||||
|
phonetic_symbol = {
|
||||||
|
"ā": "a1",
|
||||||
|
"á": "a2",
|
||||||
|
"ǎ": "a3",
|
||||||
|
"à": "a4",
|
||||||
|
"ē": "e1",
|
||||||
|
"é": "e2",
|
||||||
|
"ě": "e3",
|
||||||
|
"è": "e4",
|
||||||
|
"ō": "o1",
|
||||||
|
"ó": "o2",
|
||||||
|
"ǒ": "o3",
|
||||||
|
"ò": "o4",
|
||||||
|
"ī": "i1",
|
||||||
|
"í": "i2",
|
||||||
|
"ǐ": "i3",
|
||||||
|
"ì": "i4",
|
||||||
|
"ū": "u1",
|
||||||
|
"ú": "u2",
|
||||||
|
"ǔ": "u3",
|
||||||
|
"ù": "u4",
|
||||||
|
|
||||||
|
# üe
|
||||||
|
"ü": "v",
|
||||||
|
"ǖ": "v1",
|
||||||
|
"ǘ": "v2",
|
||||||
|
"ǚ": "v3",
|
||||||
|
"ǜ": "v4",
|
||||||
|
"ń": "n2",
|
||||||
|
"ň": "n3",
|
||||||
|
"ǹ": "n4",
|
||||||
|
"m̄": "m1", # len('m̄') == 2
|
||||||
|
"ḿ": "m2",
|
||||||
|
"m̀": "m4", # len("m̀") == 2
|
||||||
|
"ê̄": "ê1", # len('ê̄') == 2
|
||||||
|
"ế": "ê2",
|
||||||
|
"ê̌": "ê3", # len('ê̌') == 2
|
||||||
|
"ề": "ê4",
|
||||||
|
}
|
||||||
|
phonetic_symbol_reverse = dict((v, k) for k, v in phonetic_symbol.items())
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue