You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
69 lines
2.3 KiB
69 lines
2.3 KiB
4 years ago
|
import re
|
||
|
from typing import Any
|
||
|
from typing import Optional
|
||
|
from typing import Text
|
||
|
from typing import Tuple
|
||
|
|
||
|
from pypinyin import Style
|
||
|
from pypinyin.contrib._tone_rule import right_mark_index
|
||
|
|
||
|
_re_number = re.compile(r'\d')
|
||
|
|
||
|
|
||
|
class NeutralToneWith5Mixin():
|
||
|
"""声调使用数字表示的相关拼音风格下的结果使用 5 标识轻声。
|
||
|
使用方法::
|
||
|
from pypinyin import lazy_pinyin, Style
|
||
|
from pypinyin.contrib.neutral_tone import NeutralToneWith5Mixin
|
||
|
from pypinyin.converter import DefaultConverter
|
||
|
from pypinyin.core import Pinyin
|
||
|
# 原来的结果中不会标识轻声
|
||
|
print(lazy_pinyin('好了', style=Style.TONE2))
|
||
|
# 输出: ['ha3o', 'le']
|
||
|
class MyConverter(NeutralToneWith5Mixin, DefaultConverter):
|
||
|
pass
|
||
|
my_pinyin = Pinyin(MyConverter())
|
||
|
pinyin = my_pinyin.pinyin
|
||
|
lazy_pinyin = my_pinyin.lazy_pinyin
|
||
|
# 新的结果中使用 ``5`` 标识轻声
|
||
|
print(lazy_pinyin('好了', style=Style.TONE2))
|
||
|
# 输出: ['ha3o', 'le5']
|
||
|
print(pinyin('好了', style=Style.TONE2))
|
||
|
# 输出:[['ha3o'], ['le5']]
|
||
|
"""
|
||
|
|
||
|
NUMBER_TONE = (Style.TONE2, Style.TONE3, Style.FINALS_TONE2,
|
||
|
Style.FINALS_TONE3) # type: Tuple[Style]
|
||
|
NUMBER_AT_END = (Style.TONE3, Style.FINALS_TONE3) # type: Tuple[Style]
|
||
|
|
||
|
def post_convert_style(self,
|
||
|
han: Text,
|
||
|
orig_pinyin: Text,
|
||
|
converted_pinyin: Text,
|
||
|
style: Style,
|
||
|
strict: bool,
|
||
|
**kwargs: Any) -> Optional[Text]:
|
||
|
|
||
|
pre_data = super().post_convert_style(
|
||
|
han, orig_pinyin, converted_pinyin, style, strict, **kwargs)
|
||
|
|
||
|
if style not in self.NUMBER_TONE:
|
||
|
return pre_data
|
||
|
|
||
|
if pre_data is not None:
|
||
|
converted_pinyin = pre_data
|
||
|
|
||
|
# 有声调,跳过
|
||
|
if _re_number.search(converted_pinyin):
|
||
|
return converted_pinyin
|
||
|
|
||
|
if style in self.NUMBER_AT_END:
|
||
|
return '{}5'.format(converted_pinyin)
|
||
|
|
||
|
# 找到应该在哪个字母上标声调
|
||
|
mark_index = right_mark_index(converted_pinyin)
|
||
|
before = converted_pinyin[:mark_index + 1]
|
||
|
after = converted_pinyin[mark_index + 1:]
|
||
|
|
||
|
return '{}5{}'.format(before, after)
|