You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

33 lines
1.0 KiB

import re
from typing import Dict
from typing import List
from typing import Text
from pypinyin import phonetic_symbol
# 声母表
_INITIALS = 'b,p,m,f,d,t,n,l,g,k,h,j,q,x,zh,ch,sh,r,z,c,s'.split(
',') # type: List[Text]
# 声母表, 把 y, w 也当作声母
_INITIALS_NOT_STRICT = _INITIALS + ['y', 'w'] # type: List[Text]
# 带声调字符与数字表示声调的对应关系
PHONETIC_SYMBOL_DICT = phonetic_symbol.phonetic_symbol.copy(
) # type: Dict[Text, Text]
(k, v) for k, v in PHONETIC_SYMBOL_DICT.items()
if len(k) > 1) # type: Dict[Text, Text]
# 匹配带声调字符的正则表达式
RE_PHONETIC_SYMBOL = re.compile(r'[{0}]'.format(
re.escape(''.join(x for x in PHONETIC_SYMBOL_DICT if len(x) == 1))))
# 匹配使用数字标识声调的字符的正则表达式
RE_TONE2 = re.compile(r'([aeoiuvnm])([1-5])$')
# 匹配 TONE2 中标识韵母声调的正则表达式
RE_TONE3 = re.compile(r'^([a-z]+)([1-5])([a-z]*)$')
# 匹配单个数字
RE_NUMBER = re.compile(r'\d')