You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
PaddleSpeech/third_party/python-pinyin/pypinyin/style/_utils.py

80 lines
2.4 KiB

from pypinyin.standard import convert_finals
from pypinyin.style._constants import _INITIALS
from pypinyin.style._constants import _INITIALS_NOT_STRICT
from pypinyin.style._constants import PHONETIC_SYMBOL_DICT
from pypinyin.style._constants import PHONETIC_SYMBOL_DICT_KEY_LENGTH_NOT_ONE
from pypinyin.style._constants import RE_NUMBER
from pypinyin.style._constants import RE_PHONETIC_SYMBOL
from typing import Text
def get_initials(pinyin: Text, strict: bool) -> Text:
"""获取单个拼音中的声母.
:param pinyin: 单个拼音
:type pinyin: unicode
:param strict: 是否严格遵照《汉语拼音方案》来处理声母和韵母
:return: 声母
:rtype: unicode
"""
if strict:
_initials = _INITIALS
else:
_initials = _INITIALS_NOT_STRICT
for i in _initials:
if pinyin.startswith(i):
return i
return ''
def get_finals(pinyin: Text, strict: bool) -> Text:
"""获取单个拼音中的韵母.
:param pinyin: 单个拼音
:type pinyin: unicode
:param strict: 是否严格遵照《汉语拼音方案》来处理声母和韵母
:return: 韵母
:rtype: unicode
"""
if strict:
pinyin = convert_finals(pinyin)
initials = get_initials(pinyin, strict=strict) or ''
# 没有声母,整个都是韵母
if not initials:
return pinyin
# 按声母分割,剩下的就是韵母
return ''.join(pinyin.split(initials, 1))
def has_finals(pinyin: Text) -> bool:
"""判断是否有韵母"""
# 鼻音: 'm̄', 'ḿ', 'm̀', 'ń', 'ň', 'ǹ ' 没有韵母
for symbol in ['', 'ḿ', '', 'ń', 'ň', 'ǹ', 'ê̄', 'ế', 'ê̌', '']:
if symbol in pinyin:
return False
return True
def replace_symbol_to_number(pinyin: Text) -> Text:
"""把声调替换为数字"""
def _replace(match):
symbol = match.group(0) # 带声调的字符
# 返回使用数字标识声调的字符
return PHONETIC_SYMBOL_DICT[symbol]
# 替换拼音中的带声调字符
value = RE_PHONETIC_SYMBOL.sub(_replace, pinyin)
for symbol, to in PHONETIC_SYMBOL_DICT_KEY_LENGTH_NOT_ONE.items():
value = value.replace(symbol, to)
return value
def replace_symbol_to_no_symbol(pinyin: Text) -> Text:
"""把带声调字符替换为没有声调的字符"""
value = replace_symbol_to_number(pinyin)
return RE_NUMBER.sub('', value)