You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

572 lines
25 KiB

E2E/Streaming Transformer/Conformer ASR (#578) * add cmvn and label smoothing loss layer * add layer for transformer * add glu and conformer conv * add torch compatiable hack, mask funcs * not hack size since it exists * add test; attention * add attention, common utils, hack paddle * add audio utils * conformer batch padding mask bug fix #223 * fix typo, python infer fix rnn mem opt name error and batchnorm1d, will be available at 2.0.2 * fix ci * fix ci * add encoder * refactor egs * add decoder * refactor ctc, add ctc align, refactor ckpt, add warmup lr scheduler, cmvn utils * refactor docs * add fix * fix readme * fix bugs, refactor collator, add pad_sequence, fix ckpt bugs * fix docstring * refactor data feed order * add u2 model * refactor cmvn, test * add utils * add u2 config * fix bugs * fix bugs * fix autograd maybe has problem when using inplace operation * refactor data, build vocab; add format data * fix text featurizer * refactor build vocab * add fbank, refactor feature of speech * refactor audio feat * refactor data preprare * refactor data * model init from config * add u2 bins * flake8 * can train * fix bugs, add coverage, add scripts * test can run * fix data * speed perturb with sox * add spec aug * fix for train * fix train logitc * fix logger * log valid loss, time dataset process * using np for speed perturb, remove some debug log of grad clip * fix logger * fix build vocab * fix logger name * using module logger as default * fix * fix install * reorder imports * fix board logger * fix logger * kaldi fbank and mfcc * fix cmvn and print prarams * fix add_eos_sos and cmvn * fix cmvn compute * fix logger and cmvn * fix subsampling, label smoothing loss, remove useless * add notebook test * fix log * fix tb logger * multi gpu valid * fix log * fix log * fix config * fix compute cmvn, need paddle 2.1 * add cmvn notebook * fix layer tools * fix compute cmvn * add rtf * fix decoding * fix layer tools * fix log, add avg script * more avg and test info * fix dataset pickle problem; using 2.1 paddle; num_workers can > 0; ckpt save in exp dir;fix; * add vimrc * refactor tiny script, add transformer and stream conf * spm demo; librisppech scripts and confs * fix log * add librispeech scripts * refactor data pipe; fix conf; fix u2 default params * fix bugs * refactor aishell scripts * fix test * fix cmvn * fix s0 scripts * fix ds2 scripts and bugs * fix dev & test dataset filter * fix dataset filter * filter dev * fix ckpt path * filter test, since librispeech will cause OOM, but all test wer will be worse, since mismatch train with test * add comment * add syllable doc * fix ds2 configs * add doc * add pypinyin tools * fix decoder using blank_id=0 * mmseg with pybind11 * format code
4 years ago
#!/usr/bin/env python3
from itertools import chain
import pytest
from pypinyin import (lazy_pinyin, pinyin, NORMAL, TONE, TONE2, TONE3, INITIALS,
# test data from # noqa
# 声母表
data_for_initials = [
['', dict(style=INITIALS), ['b']],
['', dict(style=INITIALS), ['p']],
['', dict(style=INITIALS), ['m']],
['', dict(style=INITIALS), ['f']],
['', dict(style=INITIALS), ['d']],
['', dict(style=INITIALS), ['t']],
['', dict(style=INITIALS), ['n']],
['', dict(style=INITIALS), ['l']],
['', dict(style=INITIALS), ['g']],
['', dict(style=INITIALS), ['k']],
['', dict(style=INITIALS), ['h']],
['', dict(style=INITIALS), ['j']],
['', dict(style=INITIALS), ['q']],
['', dict(style=INITIALS), ['x']],
['', dict(style=INITIALS), ['zh']],
['', dict(style=INITIALS), ['ch']],
['', dict(style=INITIALS), ['sh']],
['', dict(style=INITIALS), ['r']],
['', dict(style=INITIALS), ['z']],
['', dict(style=INITIALS), ['c']],
['', dict(style=INITIALS), ['s']],
@pytest.mark.parametrize('hans, kwargs, result', data_for_initials)
def test_initials(hans, kwargs, result):
assert lazy_pinyin(hans, **kwargs) == result
assert list(chain(*pinyin(hans, **kwargs))) == result
# 韵母表
data_for_finals = [
['', dict(style=FINALS), ['i']],
['', dict(style=FINALS), ['u']],
['', dict(style=FINALS), ['v']],
['', dict(style=FINALS), ['a']],
['', dict(style=FINALS), ['ia']],
['', dict(style=FINALS), ['ua']],
['', dict(style=FINALS), ['o']],
['', dict(style=FINALS), ['uo']],
['', dict(style=FINALS), ['e']],
['', dict(style=FINALS), ['ie']],
['', dict(style=FINALS), ['ve']],
['', dict(style=FINALS), ['ai']],
['', dict(style=FINALS), ['uai']],
# ['欸', dict(style=FINALS), ['ei']],
['', dict(style=FINALS), ['ei']],
['', dict(style=FINALS), ['uei']],
['', dict(style=FINALS), ['ao']],
['', dict(style=FINALS), ['iao']],
['', dict(style=FINALS), ['ou']],
['', dict(style=FINALS), ['iou']],
['', dict(style=FINALS), ['an']],
['', dict(style=FINALS), ['ian']],
['', dict(style=FINALS), ['uan']],
['', dict(style=FINALS), ['van']],
['', dict(style=FINALS), ['en']],
['', dict(style=FINALS), ['in']],
['', dict(style=FINALS), ['uen']],
['', dict(style=FINALS), ['vn']],
['', dict(style=FINALS), ['ang']],
['', dict(style=FINALS), ['iang']],
['', dict(style=FINALS), ['uang']],
['', dict(style=FINALS), ['eng']],
['', dict(style=FINALS), ['ing']],
['', dict(style=FINALS), ['ueng']],
['', dict(style=FINALS), ['ong']],
['', dict(style=FINALS), ['iong']],
['', dict(style=FINALS), ['er']],
@pytest.mark.parametrize('hans, kwargs, result', data_for_finals)
def test_finals(hans, kwargs, result):
assert lazy_pinyin(hans, **kwargs) == result
assert pinyin(hans, **kwargs) == [result]
# 零声母
data_for_zero_consonant = [
# i行的韵母前面没有声母的时候写成yi(衣)ya(呀)ye(耶)yao(腰)
# you(忧)yan(烟)yin(因)yang(央)ying(英)yong(雍)。
['', dict(style=NORMAL), ['yi']],
['', dict(style=FINALS), ['i']],
['', dict(style=FINALS, strict=False), ['i']],
['', dict(style=NORMAL), ['ya']],
['', dict(style=FINALS), ['ia']],
['', dict(style=FINALS, strict=False), ['a']],
['', dict(style=NORMAL), ['ye']],
['', dict(style=FINALS), ['ie']],
['', dict(style=FINALS, strict=False), ['e']],
['', dict(style=NORMAL), ['yao']],
['', dict(style=FINALS), ['iao']],
['', dict(style=FINALS, strict=False), ['ao']],
['', dict(style=NORMAL), ['you']],
['', dict(style=FINALS), ['iou']],
['', dict(style=FINALS, strict=False), ['ou']],
['', dict(style=NORMAL), ['yan']],
['', dict(style=FINALS), ['ian']],
['', dict(style=FINALS, strict=False), ['an']],
['', dict(style=NORMAL), ['yin']],
['', dict(style=FINALS), ['in']],
['', dict(style=FINALS, strict=False), ['in']],
['', dict(style=NORMAL), ['yang']],
['', dict(style=FINALS), ['iang']],
['', dict(style=FINALS, strict=False), ['ang']],
['', dict(style=NORMAL), ['ying']],
['', dict(style=FINALS), ['ing']],
['', dict(style=FINALS, strict=False), ['ing']],
['', dict(style=NORMAL), ['yong']],
['', dict(style=FINALS), ['iong']],
['', dict(style=FINALS, strict=False), ['ong']],
['', dict(style=NORMAL), ['yi']],
['', dict(style=NORMAL, strict=False), ['yi']],
['', dict(style=TONE), ['']],
['', dict(style=TONE, strict=False), ['']],
['', dict(style=TONE2), ['yi2']],
['', dict(style=TONE2, strict=False), ['yi2']],
['', dict(style=TONE3), ['yi2']],
['', dict(style=TONE3, strict=False), ['yi2']],
['', dict(style=INITIALS), ['']],
['', dict(style=INITIALS, strict=False), ['y']],
['', dict(style=FIRST_LETTER), ['y']],
['', dict(style=FIRST_LETTER, strict=False), ['y']],
['', dict(style=FINALS), ['i']],
['', dict(style=FINALS, strict=False), ['i']],
['', dict(style=FINALS_TONE), ['í']],
['', dict(style=FINALS_TONE, strict=False), ['í']],
['', dict(style=FINALS_TONE2), ['i2']],
['', dict(style=FINALS_TONE2, strict=False), ['i2']],
['', dict(style=FINALS_TONE3), ['i2']],
['', dict(style=FINALS_TONE3, strict=False), ['i2']],
['', dict(style=NORMAL), ['yan']],
['', dict(style=NORMAL, strict=False), ['yan']],
['', dict(style=TONE), ['yán']],
['', dict(style=TONE, strict=False), ['yán']],
['', dict(style=TONE2), ['ya2n']],
['', dict(style=TONE2, strict=False), ['ya2n']],
['', dict(style=TONE3), ['yan2']],
['', dict(style=TONE3, strict=False), ['yan2']],
['', dict(style=INITIALS), ['']],
['', dict(style=INITIALS, strict=False), ['y']],
['', dict(style=FIRST_LETTER), ['y']],
['', dict(style=FIRST_LETTER, strict=False), ['y']],
['', dict(style=FINALS), ['ian']],
['', dict(style=FINALS, strict=False), ['an']],
['', dict(style=FINALS_TONE), ['ián']],
['', dict(style=FINALS_TONE, strict=False), ['án']],
['', dict(style=FINALS_TONE2), ['ia2n']],
['', dict(style=FINALS_TONE2, strict=False), ['a2n']],
['', dict(style=FINALS_TONE3), ['ian2']],
['', dict(style=FINALS_TONE3, strict=False), ['an2']],
# u行的韵母前面没有声母的时候写成wu(乌)wa(蛙)wo(窝)wai(歪)
# wei(威)wan(弯)wen(温)wang(汪)weng(翁)。
['', dict(style=NORMAL), ['wu']],
['', dict(style=FINALS), ['u']],
['', dict(style=FINALS, strict=False), ['u']],
['', dict(style=NORMAL), ['wa']],
['', dict(style=FINALS), ['ua']],
['', dict(style=FINALS, strict=False), ['a']],
['', dict(style=NORMAL), ['wo']],
['', dict(style=FINALS), ['uo']],
['', dict(style=FINALS, strict=False), ['o']],
['', dict(style=NORMAL), ['wai']],
['', dict(style=FINALS), ['uai']],
['', dict(style=FINALS, strict=False), ['ai']],
['', dict(style=NORMAL), ['wei']],
['', dict(style=FINALS), ['uei']],
['', dict(style=FINALS, strict=False), ['ei']],
['', dict(style=NORMAL), ['wan']],
['', dict(style=FINALS), ['uan']],
['', dict(style=FINALS, strict=False), ['an']],
['', dict(style=NORMAL), ['wen']],
['', dict(style=FINALS), ['uen']],
['', dict(style=FINALS, strict=False), ['en']],
['', dict(style=NORMAL), ['wang']],
['', dict(style=FINALS), ['uang']],
['', dict(style=FINALS, strict=False), ['ang']],
['', dict(style=NORMAL), ['weng']],
['', dict(style=FINALS), ['ueng']],
['', dict(style=FINALS, strict=False), ['eng']],
['', dict(style=NORMAL), ['wu']],
['', dict(style=NORMAL, strict=False), ['wu']],
['', dict(style=TONE), ['']],
['', dict(style=TONE, strict=False), ['']],
['', dict(style=TONE2), ['wu3']],
['', dict(style=TONE2, strict=False), ['wu3']],
['', dict(style=TONE3), ['wu3']],
['', dict(style=TONE3, strict=False), ['wu3']],
['', dict(style=INITIALS), ['']],
['', dict(style=INITIALS, strict=False), ['w']],
['', dict(style=FIRST_LETTER), ['w']],
['', dict(style=FIRST_LETTER, strict=False), ['w']],
['', dict(style=FINALS), ['u']],
['', dict(style=FINALS, strict=False), ['u']],
['', dict(style=FINALS_TONE), ['ǔ']],
['', dict(style=FINALS_TONE, strict=False), ['ǔ']],
['', dict(style=FINALS_TONE2), ['u3']],
['', dict(style=FINALS_TONE2, strict=False), ['u3']],
['', dict(style=FINALS_TONE3), ['u3']],
['', dict(style=FINALS_TONE3, strict=False), ['u3']],
['', dict(style=NORMAL), ['wang']],
['', dict(style=NORMAL, strict=False), ['wang']],
['', dict(style=TONE), ['wàng']],
['', dict(style=TONE, strict=False), ['wàng']],
['', dict(style=TONE2), ['wa4ng']],
['', dict(style=TONE2, strict=False), ['wa4ng']],
['', dict(style=TONE3), ['wang4']],
['', dict(style=TONE3, strict=False), ['wang4']],
['', dict(style=INITIALS), ['']],
['', dict(style=INITIALS, strict=False), ['w']],
['', dict(style=FIRST_LETTER), ['w']],
['', dict(style=FIRST_LETTER, strict=False), ['w']],
['', dict(style=FINALS), ['uang']],
['', dict(style=FINALS, strict=False), ['ang']],
['', dict(style=FINALS_TONE), ['uàng']],
['', dict(style=FINALS_TONE, strict=False), ['àng']],
['', dict(style=FINALS_TONE2), ['ua4ng']],
['', dict(style=FINALS_TONE2, strict=False), ['a4ng']],
['', dict(style=FINALS_TONE3), ['uang4']],
['', dict(style=FINALS_TONE3, strict=False), ['ang4']],
# ü行的韵母前面没有声母的时候写成yu(迂)yue(约)yuan(冤)
['', dict(style=NORMAL), ['yu']],
['', dict(style=FINALS), ['v']],
['', dict(style=FINALS, strict=False), ['u']],
['', dict(style=NORMAL), ['yue']],
['', dict(style=FINALS), ['ve']],
['', dict(style=FINALS, strict=False), ['ue']],
['', dict(style=NORMAL), ['yuan']],
['', dict(style=FINALS), ['van']],
['', dict(style=FINALS, strict=False), ['uan']],
['', dict(style=NORMAL), ['yu']],
['', dict(style=NORMAL, strict=False), ['yu']],
['', dict(style=TONE), ['']],
['', dict(style=TONE, strict=False), ['']],
['', dict(style=TONE2), ['yu2']],
['', dict(style=TONE2, strict=False), ['yu2']],
['', dict(style=TONE3), ['yu2']],
['', dict(style=TONE3, strict=False), ['yu2']],
['', dict(style=INITIALS), ['']],
['', dict(style=INITIALS, strict=False), ['y']],
['', dict(style=FIRST_LETTER), ['y']],
['', dict(style=FIRST_LETTER, strict=False), ['y']],
['', dict(style=FINALS), ['v']],
['', dict(style=FINALS, strict=False), ['u']],
['', dict(style=FINALS_TONE), ['ǘ']],
['', dict(style=FINALS_TONE, strict=False), ['ú']],
['', dict(style=FINALS_TONE2), ['v2']],
['', dict(style=FINALS_TONE2, strict=False), ['u2']],
['', dict(style=FINALS_TONE3), ['v2']],
['', dict(style=FINALS_TONE3, strict=False), ['u2']],
['', dict(style=NORMAL), ['yue']],
['', dict(style=NORMAL, strict=False), ['yue']],
['', dict(style=TONE), ['yuē']],
['', dict(style=TONE, strict=False), ['yuē']],
['', dict(style=TONE2), ['yue1']],
['', dict(style=TONE2, strict=False), ['yue1']],
['', dict(style=TONE3), ['yue1']],
['', dict(style=TONE3, strict=False), ['yue1']],
['', dict(style=INITIALS), ['']],
['', dict(style=INITIALS, strict=False), ['y']],
['', dict(style=FIRST_LETTER), ['y']],
['', dict(style=FIRST_LETTER, strict=False), ['y']],
['', dict(style=FINALS), ['ve']],
['', dict(style=FINALS, strict=False), ['ue']],
['', dict(style=FINALS_TONE), ['üē']],
['', dict(style=FINALS_TONE, strict=False), ['']],
['', dict(style=FINALS_TONE2), ['ve1']],
['', dict(style=FINALS_TONE2, strict=False), ['ue1']],
['', dict(style=FINALS_TONE3), ['ve1']],
['', dict(style=FINALS_TONE3, strict=False), ['ue1']],
['', dict(style=NORMAL), ['yuan']],
['', dict(style=NORMAL, strict=False), ['yuan']],
['', dict(style=TONE), ['yuán']],
['', dict(style=TONE, strict=False), ['yuán']],
['', dict(style=TONE2), ['yua2n']],
['', dict(style=TONE2, strict=False), ['yua2n']],
['', dict(style=TONE3), ['yuan2']],
['', dict(style=TONE3, strict=False), ['yuan2']],
['', dict(style=INITIALS), ['']],
['', dict(style=INITIALS, strict=False), ['y']],
['', dict(style=FIRST_LETTER), ['y']],
['', dict(style=FIRST_LETTER, strict=False), ['y']],
['', dict(style=FINALS), ['van']],
['', dict(style=FINALS, strict=False), ['uan']],
['', dict(style=FINALS_TONE), ['üán']],
['', dict(style=FINALS_TONE, strict=False), ['uán']],
['', dict(style=FINALS_TONE2), ['va2n']],
['', dict(style=FINALS_TONE2, strict=False), ['ua2n']],
['', dict(style=FINALS_TONE3), ['van2']],
['', dict(style=FINALS_TONE3, strict=False), ['uan2']],
# yun 不应该受 un -> uen 规则的影响
['', dict(style=NORMAL), ['yun']],
['', dict(style=NORMAL, strict=False), ['yun']],
['', dict(style=TONE), ['yūn']],
['', dict(style=TONE, strict=False), ['yūn']],
['', dict(style=TONE2), ['yu1n']],
['', dict(style=TONE2, strict=False), ['yu1n']],
['', dict(style=TONE3), ['yun1']],
['', dict(style=TONE3, strict=False), ['yun1']],
['', dict(style=INITIALS), ['']],
['', dict(style=INITIALS, strict=False), ['y']],
['', dict(style=FIRST_LETTER), ['y']],
['', dict(style=FIRST_LETTER, strict=False), ['y']],
['', dict(style=FINALS), ['vn']],
['', dict(style=FINALS, strict=False), ['un']],
['', dict(style=FINALS_TONE), ['ǖn']],
['', dict(style=FINALS_TONE, strict=False), ['ūn']],
['', dict(style=FINALS_TONE2), ['v1n']],
['', dict(style=FINALS_TONE2, strict=False), ['u1n']],
['', dict(style=FINALS_TONE3), ['vn1']],
['', dict(style=FINALS_TONE3, strict=False), ['un1']],
@pytest.mark.parametrize('hans, kwargs, result', data_for_zero_consonant)
def test_zero_consonant(hans, kwargs, result):
assert lazy_pinyin(hans, **kwargs) == result
assert pinyin(hans, **kwargs) == [result]
data_for_uv = [
# ü行的韵跟声母jqx拼的时候写成ju(居)qu(区)xu(虚)
# ü上两点也省略但是跟声母nl拼的时候仍然写成nü(女)lü(吕)。
['', dict(style=NORMAL), ['ju']],
['', dict(style=FINALS), ['v']],
['', dict(style=FINALS, strict=False), ['u']],
['', dict(style=NORMAL), ['qu']],
['', dict(style=FINALS), ['v']],
['', dict(style=FINALS, strict=False), ['u']],
['', dict(style=NORMAL), ['xu']],
['', dict(style=FINALS), ['v']],
['', dict(style=FINALS, strict=False), ['u']],
['', dict(style=NORMAL), ['nv']],
['', dict(style=FINALS), ['v']],
['', dict(style=FINALS, strict=False), ['v']],
['', dict(style=NORMAL), ['lv']],
['', dict(style=FINALS), ['v']],
['', dict(style=FINALS, strict=False), ['v']],
['', dict(style=NORMAL), ['ju']],
['', dict(style=NORMAL, strict=False), ['ju']],
['', dict(style=TONE), ['']],
['', dict(style=TONE, strict=False), ['']],
['', dict(style=TONE2), ['ju4']],
['', dict(style=TONE2, strict=False), ['ju4']],
['', dict(style=TONE3), ['ju4']],
['', dict(style=TONE3, strict=False), ['ju4']],
['', dict(style=INITIALS), ['j']],
['', dict(style=INITIALS, strict=False), ['j']],
['', dict(style=FIRST_LETTER), ['j']],
['', dict(style=FIRST_LETTER, strict=False), ['j']],
['', dict(style=FINALS), ['v']],
['', dict(style=FINALS, strict=False), ['u']],
['', dict(style=FINALS_TONE), ['ǜ']],
['', dict(style=FINALS_TONE, strict=False), ['ù']],
['', dict(style=FINALS_TONE2), ['v4']],
['', dict(style=FINALS_TONE2, strict=False), ['u4']],
['', dict(style=FINALS_TONE3), ['v4']],
['', dict(style=FINALS_TONE3, strict=False), ['u4']],
['', dict(style=NORMAL), ['qu']],
['', dict(style=NORMAL, strict=False), ['qu']],
['', dict(style=TONE), ['']],
['', dict(style=TONE, strict=False), ['']],
['', dict(style=TONE2), ['qu3']],
['', dict(style=TONE2, strict=False), ['qu3']],
['', dict(style=TONE3), ['qu3']],
['', dict(style=TONE3, strict=False), ['qu3']],
['', dict(style=INITIALS), ['q']],
['', dict(style=INITIALS, strict=False), ['q']],
['', dict(style=FIRST_LETTER), ['q']],
['', dict(style=FIRST_LETTER, strict=False), ['q']],
['', dict(style=FINALS), ['v']],
['', dict(style=FINALS, strict=False), ['u']],
['', dict(style=FINALS_TONE), ['ǚ']],
['', dict(style=FINALS_TONE, strict=False), ['ǔ']],
['', dict(style=FINALS_TONE2), ['v3']],
['', dict(style=FINALS_TONE2, strict=False), ['u3']],
['', dict(style=FINALS_TONE3), ['v3']],
['', dict(style=FINALS_TONE3, strict=False), ['u3']],
['', dict(style=NORMAL), ['xu']],
['', dict(style=NORMAL, strict=False), ['xu']],
['', dict(style=TONE), ['']],
['', dict(style=TONE, strict=False), ['']],
['', dict(style=TONE2), ['xu2']],
['', dict(style=TONE2, strict=False), ['xu2']],
['', dict(style=TONE3), ['xu2']],
['', dict(style=TONE3, strict=False), ['xu2']],
['', dict(style=INITIALS), ['x']],
['', dict(style=INITIALS, strict=False), ['x']],
['', dict(style=FIRST_LETTER), ['x']],
['', dict(style=FIRST_LETTER, strict=False), ['x']],
['', dict(style=FINALS), ['v']],
['', dict(style=FINALS, strict=False), ['u']],
['', dict(style=FINALS_TONE), ['ǘ']],
['', dict(style=FINALS_TONE, strict=False), ['ú']],
['', dict(style=FINALS_TONE2), ['v2']],
['', dict(style=FINALS_TONE2, strict=False), ['u2']],
['', dict(style=FINALS_TONE3), ['v2']],
['', dict(style=FINALS_TONE3, strict=False), ['u2']],
['', dict(style=NORMAL), ['nv']],
['', dict(style=NORMAL, strict=False), ['nv']],
['', dict(style=TONE), ['']],
['', dict(style=TONE, strict=False), ['']],
['', dict(style=TONE2), ['nv3']],
['', dict(style=TONE2, strict=False), ['nv3']],
['', dict(style=TONE3), ['nv3']],
['', dict(style=TONE3, strict=False), ['nv3']],
['', dict(style=INITIALS), ['n']],
['', dict(style=INITIALS, strict=False), ['n']],
['', dict(style=FIRST_LETTER), ['n']],
['', dict(style=FIRST_LETTER, strict=False), ['n']],
['', dict(style=FINALS), ['v']],
['', dict(style=FINALS, strict=False), ['v']],
['', dict(style=FINALS_TONE), ['ǚ']],
['', dict(style=FINALS_TONE, strict=False), ['ǚ']],
['', dict(style=FINALS_TONE2), ['v3']],
['', dict(style=FINALS_TONE2, strict=False), ['v3']],
['', dict(style=FINALS_TONE3), ['v3']],
['', dict(style=FINALS_TONE3, strict=False), ['v3']],
['', dict(style=NORMAL), ['lv']],
['', dict(style=NORMAL, strict=False), ['lv']],
['', dict(style=TONE), ['']],
['', dict(style=TONE, strict=False), ['']],
['', dict(style=TONE2), ['lv3']],
['', dict(style=TONE2, strict=False), ['lv3']],
['', dict(style=TONE3), ['lv3']],
['', dict(style=TONE3, strict=False), ['lv3']],
['', dict(style=INITIALS), ['l']],
['', dict(style=INITIALS, strict=False), ['l']],
['', dict(style=FIRST_LETTER), ['l']],
['', dict(style=FIRST_LETTER, strict=False), ['l']],
['', dict(style=FINALS), ['v']],
['', dict(style=FINALS, strict=False), ['v']],
['', dict(style=FINALS_TONE), ['ǚ']],
['', dict(style=FINALS_TONE, strict=False), ['ǚ']],
['', dict(style=FINALS_TONE2), ['v3']],
['', dict(style=FINALS_TONE2, strict=False), ['v3']],
['', dict(style=FINALS_TONE3), ['v3']],
['', dict(style=FINALS_TONE3, strict=False), ['v3']],
@pytest.mark.parametrize('hans, kwargs, result', data_for_uv)
def test_uv(hans, kwargs, result):
assert lazy_pinyin(hans, **kwargs) == result
assert pinyin(hans, **kwargs) == [result]
data_for_iou = [
# iouueiuen前面加声母的时候写成iuuiun。
# 例如niu(牛)gui(归)lun(论)。
['', dict(style=NORMAL), ['niu']],
['', dict(style=FINALS), ['iou']],
['', dict(style=FINALS, strict=False), ['iu']],
['', dict(style=NORMAL), ['gui']],
['', dict(style=FINALS), ['uei']],
['', dict(style=FINALS, strict=False), ['ui']],
['', dict(style=NORMAL), ['lun']],
['', dict(style=FINALS), ['uen']],
['', dict(style=FINALS, strict=False), ['un']],
['', dict(style=NORMAL), ['niu']],
['', dict(style=NORMAL, strict=False), ['niu']],
['', dict(style=TONE), ['niú']],
['', dict(style=TONE, strict=False), ['niú']],
['', dict(style=TONE2), ['niu2']],
['', dict(style=TONE2, strict=False), ['niu2']],
['', dict(style=TONE3), ['niu2']],
['', dict(style=TONE3, strict=False), ['niu2']],
['', dict(style=INITIALS), ['n']],
['', dict(style=INITIALS, strict=False), ['n']],
['', dict(style=FIRST_LETTER), ['n']],
['', dict(style=FIRST_LETTER, strict=False), ['n']],
['', dict(style=FINALS), ['iou']],
['', dict(style=FINALS, strict=False), ['iu']],
['', dict(style=FINALS_TONE), ['ioú']],
['', dict(style=FINALS_TONE, strict=False), ['']],
['', dict(style=FINALS_TONE2), ['iou2']],
['', dict(style=FINALS_TONE2, strict=False), ['iu2']],
['', dict(style=FINALS_TONE3), ['iou2']],
['', dict(style=FINALS_TONE3, strict=False), ['iu2']],
@pytest.mark.parametrize('hans, kwargs, result', data_for_iou)
def test_iou(hans, kwargs, result):
assert lazy_pinyin(hans, **kwargs) == result
assert pinyin(hans, **kwargs) == [result]
data_for_uei = [
# iouueiuen前面加声母的时候写成iuuiun。
# 例如niu(牛)gui(归)lun(论)。
['', dict(style=NORMAL), ['gui']],
['', dict(style=NORMAL, strict=False), ['gui']],
['', dict(style=TONE), ['guǐ']],
['', dict(style=TONE, strict=False), ['guǐ']],
['', dict(style=TONE2), ['gui3']],
['', dict(style=TONE2, strict=False), ['gui3']],
['', dict(style=TONE3), ['gui3']],
['', dict(style=TONE3, strict=False), ['gui3']],
['', dict(style=INITIALS), ['g']],
['', dict(style=INITIALS, strict=False), ['g']],
['', dict(style=FIRST_LETTER), ['g']],
['', dict(style=FIRST_LETTER, strict=False), ['g']],
['', dict(style=FINALS), ['uei']],
['', dict(style=FINALS, strict=False), ['ui']],
['', dict(style=FINALS_TONE), ['ueǐ']],
['', dict(style=FINALS_TONE, strict=False), ['']],
['', dict(style=FINALS_TONE2), ['uei3']],
['', dict(style=FINALS_TONE2, strict=False), ['ui3']],
['', dict(style=FINALS_TONE3), ['uei3']],
['', dict(style=FINALS_TONE3, strict=False), ['ui3']],
@pytest.mark.parametrize('hans, kwargs, result', data_for_uei)
def test_uei(hans, kwargs, result):
assert lazy_pinyin(hans, **kwargs) == result
assert pinyin(hans, **kwargs) == [result]
data_for_uen = [
# iouueiuen前面加声母的时候写成iuuiun。
# 例如niu(牛)gui(归)lun(论)。
['', dict(style=NORMAL), ['lun']],
['', dict(style=TONE), ['lùn']],
['', dict(style=TONE, strict=False), ['lùn']],
['', dict(style=TONE2), ['lu4n']],
['', dict(style=TONE2, strict=False), ['lu4n']],
['', dict(style=TONE3), ['lun4']],
['', dict(style=TONE3, strict=False), ['lun4']],
['', dict(style=INITIALS), ['l']],
['', dict(style=INITIALS, strict=False), ['l']],
['', dict(style=FIRST_LETTER), ['l']],
['', dict(style=FIRST_LETTER, strict=False), ['l']],
['', dict(style=FINALS), ['uen']],
['', dict(style=FINALS, strict=False), ['un']],
['', dict(style=FINALS_TONE), ['ùen']],
['', dict(style=FINALS_TONE, strict=False), ['ùn']],
['', dict(style=FINALS_TONE2), ['u4en']],
['', dict(style=FINALS_TONE2, strict=False), ['u4n']],
['', dict(style=FINALS_TONE3), ['uen4']],
['', dict(style=FINALS_TONE3, strict=False), ['un4']],
@pytest.mark.parametrize('hans, kwargs, result', data_for_uen)
def test_uen(hans, kwargs, result):
assert lazy_pinyin(hans, **kwargs) == result
assert pinyin(hans, **kwargs) == [result]
if __name__ == '__main__':
import pytest