PaddleSpeech/third_party/python-pinyin/pypinyin/contrib/_tone_rule.py

from typing import Optional
from typing import Text


def right_mark_index(pinyin_no_tone: Text) -> Optional[int]:
    """
    标调位置
        有 ɑ 不放过，
    　　没 ɑ 找 o、e；
    　　ɑ、o、e、i、u、ü
    　　标调就按这顺序；
    　　i、u 若是连在一起，
    　　谁在后面就标谁。

    有ɑ不放过（有ɑ一定要标在ɑ上）；
    无ɑ找oe（没有ɑ的时候标在o上,如果没有o则标在e上）;
    iu并列标在后（iu, ui的情况,标在后面的字母上,比如说iu应该标u,ui应该标i）；
    单个韵母不用说（只能标在单韵母上）

    http://www.hwjyw.com/resource/content/2010/06/04/8183.shtml
    https://www.zhihu.com/question/23655297
    https://github.com/mozillazg/python-pinyin/issues/160
    http://www.pinyin.info/rules/where.html
    """

    # 有 ɑ 不放过, 没 ɑ 找 o、e
    for c in ['a', 'o', 'e']:
        if c in pinyin_no_tone:
            return pinyin_no_tone.index(c)

    # i、u 若是连在一起，谁在后面就标谁
    for c in ['iu', 'ui']:
        if c in pinyin_no_tone:
            return pinyin_no_tone.index(c) + 1

    # ɑ、o、e、i、u、ü
    for c in ['i', 'u', 'v', 'ü']:
        if c in pinyin_no_tone:
            return pinyin_no_tone.index(c)

    # n, m, ê
    for c in ['n', 'm', 'ê']:
        if c in pinyin_no_tone:
            return pinyin_no_tone.index(c)
-												E2E/Streaming Transformer/Conformer ASR (#578)

* add cmvn and label smoothing loss layer

* add layer for transformer

* add glu and conformer conv

* add torch compatiable hack, mask funcs

* not hack size since it exists

* add test; attention

* add attention, common utils, hack paddle

* add audio utils

* conformer batch padding mask bug fix #223

* fix typo, python infer fix rnn mem opt name error and batchnorm1d, will be available at 2.0.2

* fix ci

* fix ci

* add encoder

* refactor egs

* add decoder

* refactor ctc, add ctc align, refactor ckpt, add warmup lr scheduler, cmvn utils

* refactor docs

* add fix

* fix readme

* fix bugs, refactor collator, add pad_sequence, fix ckpt bugs

* fix docstring

* refactor data feed order

* add u2 model

* refactor cmvn, test

* add utils

* add u2 config

* fix bugs

* fix bugs

* fix autograd maybe has problem when using inplace operation

* refactor data, build vocab; add format data

* fix text featurizer

* refactor build vocab

* add fbank, refactor feature of speech

* refactor audio feat

* refactor data preprare

* refactor data

* model init from config

* add u2 bins

* flake8

* can train

* fix bugs, add coverage, add scripts

* test can run

* fix data

* speed perturb with sox

* add spec aug

* fix for train

* fix train logitc

* fix logger

* log valid loss, time dataset process

* using np for speed perturb, remove some debug log of grad clip

* fix logger

* fix build vocab

* fix logger name

* using module logger as default

* fix

* fix install

* reorder imports

* fix board logger

* fix logger

* kaldi fbank and mfcc

* fix cmvn and print prarams

* fix add_eos_sos and cmvn

* fix cmvn compute

* fix logger and cmvn

* fix subsampling, label smoothing loss, remove useless

* add notebook test

* fix log

* fix tb logger

* multi gpu valid

* fix log

* fix log

* fix config

* fix compute cmvn, need paddle 2.1

* add cmvn notebook

* fix layer tools

* fix compute cmvn

* add rtf

* fix decoding

* fix layer tools

* fix log, add avg script

* more avg and test info

* fix dataset pickle problem; using 2.1 paddle; num_workers can > 0; ckpt save in exp dir;fix setup.sh;

* add vimrc

* refactor tiny script, add transformer and stream conf

* spm demo; librisppech scripts and confs

* fix log

* add librispeech scripts

* refactor data pipe; fix conf; fix u2 default params

* fix bugs

* refactor aishell scripts

* fix test

* fix cmvn

* fix s0 scripts

* fix ds2 scripts and bugs

* fix dev & test dataset filter

* fix dataset filter

* filter dev

* fix ckpt path

* filter test, since librispeech will cause OOM, but all test wer will be worse, since mismatch train with test

* add comment

* add syllable doc

* fix ds2 configs

* add doc

* add pypinyin tools

* fix decoder using blank_id=0

* mmseg with pybind11

* format code
											
										
										
											4 years ago
+								from typing import Optional
 								from typing import Text
 								def right_mark_index(pinyin_no_tone: Text) -> Optional[int]:
 								    """
 								    标调位置
 								        有 ɑ 不放过，
 								    　　没 ɑ 找 o、e；
 								    　　ɑ、o、e、i、u、ü
 								    　　标调就按这顺序；
 								    　　i、u 若是连在一起，
 								    　　谁在后面就标谁。
 								    有ɑ不放过（有ɑ一定要标在ɑ上）；
 								    无ɑ找oe（没有ɑ的时候标在o上,如果没有o则标在e上）;
 								    iu并列标在后（iu, ui的情况,标在后面的字母上,比如说iu应该标u,ui应该标i）；
 								    单个韵母不用说（只能标在单韵母上）
 								    http://www.hwjyw.com/resource/content/2010/06/04/8183.shtml
 								    https://www.zhihu.com/question/23655297
 								    https://github.com/mozillazg/python-pinyin/issues/160
 								    http://www.pinyin.info/rules/where.html
 								    """
 								    # 有 ɑ 不放过, 没 ɑ 找 o、e
 								    for c in ['a', 'o', 'e']:
 								        if c in pinyin_no_tone:
 								            return pinyin_no_tone.index(c)
 								    # i、u 若是连在一起，谁在后面就标谁
 								    for c in ['iu', 'ui']:
 								        if c in pinyin_no_tone:
 								            return pinyin_no_tone.index(c) + 1
 								    # ɑ、o、e、i、u、ü
 								    for c in ['i', 'u', 'v', 'ü']:
 								        if c in pinyin_no_tone:
 								            return pinyin_no_tone.index(c)
 								    # n, m, ê
 								    for c in ['n', 'm', 'ê']:
 								        if c in pinyin_no_tone:
 								            return pinyin_no_tone.index(c)