【Hackathon 8th No.7】Python版本适配 5 (#3972)

* Update setup.py

* fit with pypinyin

* Apply suggestions from code review

* Apply suggestions from code review

* Update tone_sandhi.py

* Apply suggestions from code review
pull/3980/head
张春乔 8 months ago committed by GitHub
parent 69985c2869
commit cb0ba54d6e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -243,8 +243,10 @@ class ToneSandhi():
if skip_next: if skip_next:
skip_next = False skip_next = False
continue continue
if i - 1 >= 0 and word == "" and i + 1 < len(seg) and seg[i - 1][0] == seg[i + 1][0] and seg[i - 1][1] == "v": if i - 1 >= 0 and word == "" and i + 1 < len(seg) and seg[i - 1][
new_seg[-1] = (new_seg[-1][0] + "" + seg[i + 1][0], new_seg[-1][1]) 0] == seg[i + 1][0] and seg[i - 1][1] == "v":
new_seg[-1] = (new_seg[-1][0] + "" + seg[i + 1][0],
new_seg[-1][1])
skip_next = True skip_next = True
else: else:
new_seg.append((word, pos)) new_seg.append((word, pos))
@ -262,11 +264,16 @@ class ToneSandhi():
def _merge_continuous_three_tones( def _merge_continuous_three_tones(
self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]: self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
new_seg = [] new_seg = []
sub_finals_list = [ sub_finals_list = []
lazy_pinyin( for (word, pos) in seg:
orig_finals = lazy_pinyin(
word, neutral_tone_with_five=True, style=Style.FINALS_TONE3) word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
for (word, pos) in seg # after pypinyin==0.44.0, '嗯' need to be n2, cause the initial and final consonants cannot be empty at the same time
] en_index = [index for index, c in enumerate(word) if c == ""]
for i in en_index:
orig_finals[i] = "n2"
sub_finals_list.append(orig_finals)
assert len(sub_finals_list) == len(seg) assert len(sub_finals_list) == len(seg)
merge_last = [False] * len(seg) merge_last = [False] * len(seg)
for i, (word, pos) in enumerate(seg): for i, (word, pos) in enumerate(seg):
@ -292,11 +299,15 @@ class ToneSandhi():
def _merge_continuous_three_tones_2( def _merge_continuous_three_tones_2(
self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]: self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
new_seg = [] new_seg = []
sub_finals_list = [ sub_finals_list = []
lazy_pinyin( for (word, pos) in seg:
orig_finals = lazy_pinyin(
word, neutral_tone_with_five=True, style=Style.FINALS_TONE3) word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
for (word, pos) in seg # after pypinyin==0.44.0, '嗯' need to be n2, cause the initial and final consonants cannot be empty at the same time
] en_index = [index for index, c in enumerate(word) if c == ""]
for i in en_index:
orig_finals[i] = "n2"
sub_finals_list.append(orig_finals)
assert len(sub_finals_list) == len(seg) assert len(sub_finals_list) == len(seg)
merge_last = [False] * len(seg) merge_last = [False] * len(seg)
for i, (word, pos) in enumerate(seg): for i, (word, pos) in enumerate(seg):

@ -173,6 +173,11 @@ class Frontend():
word, neutral_tone_with_five=True, style=Style.INITIALS) word, neutral_tone_with_five=True, style=Style.INITIALS)
orig_finals = lazy_pinyin( orig_finals = lazy_pinyin(
word, neutral_tone_with_five=True, style=Style.FINALS_TONE3) word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
# after pypinyin==0.44.0, '嗯' need to be n2, cause the initial and final consonants cannot be empty at the same time
en_index = [index for index, c in enumerate(word) if c == ""]
for i in en_index:
orig_finals[i] = "n2"
for c, v in zip(orig_initials, orig_finals): for c, v in zip(orig_initials, orig_finals):
if re.match(r'i\d', v): if re.match(r'i\d', v):
if c in ['z', 'c', 's']: if c in ['z', 'c', 's']:

@ -107,7 +107,7 @@ base = [
"praatio>=6.0.0", "praatio>=6.0.0",
"prettytable", "prettytable",
"pydantic", "pydantic",
"pypinyin<=0.44.0", "pypinyin",
"pypinyin-dict", "pypinyin-dict",
"python-dateutil", "python-dateutil",
"pyworld>=0.2.12", "pyworld>=0.2.12",

Loading…
Cancel
Save