PaddleSpeech/third_party/python-pinyin/pypinyin/runner.py

#!/usr/bin/env python3

from typing import Union, Text, ByteString
import logging
import sys
from argparse import ArgumentParser

import pypinyin

style_map = {
    'NORMAL': pypinyin.Style.NORMAL,
    'zhao': pypinyin.Style.NORMAL,
    'TONE': pypinyin.Style.TONE,
    'zh4ao': pypinyin.Style.TONE,
    'TONE2': pypinyin.Style.TONE2,
    'zha4o': pypinyin.Style.TONE2,
    'TONE3': pypinyin.Style.TONE3,
    'zhao4': pypinyin.Style.TONE3,
    'INITIALS': pypinyin.Style.INITIALS,
    'zh': pypinyin.Style.INITIALS,
    'FIRST_LETTER': pypinyin.Style.FIRST_LETTER,
    'z': pypinyin.Style.FIRST_LETTER,
    'FINALS': pypinyin.Style.FINALS,
    'ao': pypinyin.Style.FINALS,
    'FINALS_TONE': pypinyin.Style.FINALS_TONE,
    '4ao': pypinyin.Style.FINALS_TONE,
    'FINALS_TONE2': pypinyin.Style.FINALS_TONE2,
    'a4o': pypinyin.Style.FINALS_TONE2,
    'FINALS_TONE3': pypinyin.Style.FINALS_TONE3,
    'ao4': pypinyin.Style.FINALS_TONE3,
    'BOPOMOFO': pypinyin.Style.BOPOMOFO,
    'BOPOMOFO_FIRST': pypinyin.Style.BOPOMOFO_FIRST,
    'CYRILLIC': pypinyin.Style.CYRILLIC,
    'CYRILLIC_FIRST': pypinyin.Style.CYRILLIC_FIRST,
}

func_map = {
    'pinyin': pypinyin.pinyin,
    'slug': pypinyin.slug,
}

default_style = 'zh4ao'


class NullWriter():
    """数据流黑洞，类似 linux/unix 下 /dev/null 的效果。"""

    def write(self, string: Union[Text, ByteString]) -> None:
        pass


def get_parser() -> ArgumentParser:
    parser = ArgumentParser(description='convert chinese to pinyin.')
    parser.add_argument(
        '-V',
        '--version',
        action='version',
        version='{0} {1}'.format(pypinyin.__title__, pypinyin.__version__))
    # 要执行的函数名称
    parser.add_argument(
        '-f',
        '--func',
        help='function name (default: "pinyin")',
        choices=['pinyin', 'slug'],
        default='pinyin')
    # 拼音风格
    parser.add_argument(
        '-s',
        '--style',
        help='pinyin style (default: "{0}")'.format(default_style),
        choices=style_map.keys(),
        default=default_style)
    parser.add_argument(
        '-p', '--separator', help='slug separator (default: "-")', default='-')
    parser.add_argument(
        '-e',
        '--errors',
        help=('how to handle none-pinyin string'
              ' (default: "default")'),
        choices=['default', 'ignore', 'replace'],
        default='default')
    # 输出多音字
    parser.add_argument(
        '-m', '--heteronym', help='enable heteronym', action='store_true')
    # 要查询的汉字
    parser.add_argument('hans', help='chinese string')
    return parser


def main():
    # 禁用除 CRITICAL 外的日志消息
    logging.disable(logging.CRITICAL)

    # read hans from stdin
    if not sys.stdin.isatty():
        pipe_data = sys.stdin.read().strip()
    else:
        pipe_data = ''

    args = sys.argv[1:]
    if pipe_data:
        args.append(pipe_data)

    # 获取命令行选项和参数
    parser = get_parser()
    options = parser.parse_args(args)

    hans = options.hans
    func = getattr(pypinyin, options.func)
    style = style_map[options.style]
    heteronym = options.heteronym
    separator = options.separator
    errors = options.errors

    func_kwargs = {
        'pinyin': {
            'heteronym': heteronym,
            'errors': errors
        },
        'slug': {
            'heteronym': heteronym,
            'separator': separator,
            'errors': errors
        },
    }

    kwargs = func_kwargs[func.__name__]

    # 重设标准输出流和标准错误流
    # 不输出任何字符，防止污染命令行命令的输出结果
    # 其实主要是为了干掉 jieba 内的 print 语句 ;)
    sys.stdout = sys.stderr = NullWriter()
    result = func(hans, style=style, **kwargs)
    # 恢复默认
    sys.stdout = sys.__stdout__
    sys.stderr = sys.__stderr__

    if not result:
        print('')
    elif result and isinstance(result, (list, tuple)):
        if isinstance(result[0], (list, tuple)):
            print(' '.join([','.join(s) for s in result]))
        else:
            print(result)
    else:
        print(result)


if __name__ == '__main__':
    main()
E2E/Streaming Transformer/Conformer ASR (#578) * add cmvn and label smoothing loss layer * add layer for transformer * add glu and conformer conv * add torch compatiable hack, mask funcs * not hack size since it exists * add test; attention * add attention, common utils, hack paddle * add audio utils * conformer batch padding mask bug fix #223 * fix typo, python infer fix rnn mem opt name error and batchnorm1d, will be available at 2.0.2 * fix ci * fix ci * add encoder * refactor egs * add decoder * refactor ctc, add ctc align, refactor ckpt, add warmup lr scheduler, cmvn utils * refactor docs * add fix * fix readme * fix bugs, refactor collator, add pad_sequence, fix ckpt bugs * fix docstring * refactor data feed order * add u2 model * refactor cmvn, test * add utils * add u2 config * fix bugs * fix bugs * fix autograd maybe has problem when using inplace operation * refactor data, build vocab; add format data * fix text featurizer * refactor build vocab * add fbank, refactor feature of speech * refactor audio feat * refactor data preprare * refactor data * model init from config * add u2 bins * flake8 * can train * fix bugs, add coverage, add scripts * test can run * fix data * speed perturb with sox * add spec aug * fix for train * fix train logitc * fix logger * log valid loss, time dataset process * using np for speed perturb, remove some debug log of grad clip * fix logger * fix build vocab * fix logger name * using module logger as default * fix * fix install * reorder imports * fix board logger * fix logger * kaldi fbank and mfcc * fix cmvn and print prarams * fix add_eos_sos and cmvn * fix cmvn compute * fix logger and cmvn * fix subsampling, label smoothing loss, remove useless * add notebook test * fix log * fix tb logger * multi gpu valid * fix log * fix log * fix config * fix compute cmvn, need paddle 2.1 * add cmvn notebook * fix layer tools * fix compute cmvn * add rtf * fix decoding * fix layer tools * fix log, add avg script * more avg and test info * fix dataset pickle problem; using 2.1 paddle; num_workers can > 0; ckpt save in exp dir;fix setup.sh; * add vimrc * refactor tiny script, add transformer and stream conf * spm demo; librisppech scripts and confs * fix log * add librispeech scripts * refactor data pipe; fix conf; fix u2 default params * fix bugs * refactor aishell scripts * fix test * fix cmvn * fix s0 scripts * fix ds2 scripts and bugs * fix dev & test dataset filter * fix dataset filter * filter dev * fix ckpt path * filter test, since librispeech will cause OOM, but all test wer will be worse, since mismatch train with test * add comment * add syllable doc * fix ds2 configs * add doc * add pypinyin tools * fix decoder using blank_id=0 * mmseg with pybind11 * format code 3 years ago			`#!/usr/bin/env python3`

			`from typing import Union, Text, ByteString`
			`import logging`
			`import sys`
			`from argparse import ArgumentParser`

			`import pypinyin`

			`style_map = {`
			`'NORMAL': pypinyin.Style.NORMAL,`
			`'zhao': pypinyin.Style.NORMAL,`
			`'TONE': pypinyin.Style.TONE,`
			`'zh4ao': pypinyin.Style.TONE,`
			`'TONE2': pypinyin.Style.TONE2,`
			`'zha4o': pypinyin.Style.TONE2,`
			`'TONE3': pypinyin.Style.TONE3,`
			`'zhao4': pypinyin.Style.TONE3,`
			`'INITIALS': pypinyin.Style.INITIALS,`
			`'zh': pypinyin.Style.INITIALS,`
			`'FIRST_LETTER': pypinyin.Style.FIRST_LETTER,`
			`'z': pypinyin.Style.FIRST_LETTER,`
			`'FINALS': pypinyin.Style.FINALS,`
			`'ao': pypinyin.Style.FINALS,`
			`'FINALS_TONE': pypinyin.Style.FINALS_TONE,`
			`'4ao': pypinyin.Style.FINALS_TONE,`
			`'FINALS_TONE2': pypinyin.Style.FINALS_TONE2,`
			`'a4o': pypinyin.Style.FINALS_TONE2,`
			`'FINALS_TONE3': pypinyin.Style.FINALS_TONE3,`
			`'ao4': pypinyin.Style.FINALS_TONE3,`
			`'BOPOMOFO': pypinyin.Style.BOPOMOFO,`
			`'BOPOMOFO_FIRST': pypinyin.Style.BOPOMOFO_FIRST,`
			`'CYRILLIC': pypinyin.Style.CYRILLIC,`
			`'CYRILLIC_FIRST': pypinyin.Style.CYRILLIC_FIRST,`
			`}`

			`func_map = {`
			`'pinyin': pypinyin.pinyin,`
			`'slug': pypinyin.slug,`
			`}`

			`default_style = 'zh4ao'`


			`class NullWriter():`
			`"""数据流黑洞，类似 linux/unix 下 /dev/null 的效果。"""`

			`def write(self, string: Union[Text, ByteString]) -> None:`
			`pass`


			`def get_parser() -> ArgumentParser:`
			`parser = ArgumentParser(description='convert chinese to pinyin.')`
			`parser.add_argument(`
			`'-V',`
			`'--version',`
			`action='version',`
			`version='{0} {1}'.format(pypinyin.__title__, pypinyin.__version__))`
			`# 要执行的函数名称`
			`parser.add_argument(`
			`'-f',`
			`'--func',`
			`help='function name (default: "pinyin")',`
			`choices=['pinyin', 'slug'],`
			`default='pinyin')`
			`# 拼音风格`
			`parser.add_argument(`
			`'-s',`
			`'--style',`
			`help='pinyin style (default: "{0}")'.format(default_style),`
			`choices=style_map.keys(),`
			`default=default_style)`
			`parser.add_argument(`
			`'-p', '--separator', help='slug separator (default: "-")', default='-')`
			`parser.add_argument(`
			`'-e',`
			`'--errors',`
			`help=('how to handle none-pinyin string'`
			`' (default: "default")'),`
			`choices=['default', 'ignore', 'replace'],`
			`default='default')`
			`# 输出多音字`
			`parser.add_argument(`
			`'-m', '--heteronym', help='enable heteronym', action='store_true')`
			`# 要查询的汉字`
			`parser.add_argument('hans', help='chinese string')`
			`return parser`


			`def main():`
			`# 禁用除 CRITICAL 外的日志消息`
			`logging.disable(logging.CRITICAL)`

			`# read hans from stdin`
			`if not sys.stdin.isatty():`
			`pipe_data = sys.stdin.read().strip()`
			`else:`
			`pipe_data = ''`

			`args = sys.argv[1:]`
			`if pipe_data:`
			`args.append(pipe_data)`

			`# 获取命令行选项和参数`
			`parser = get_parser()`
			`options = parser.parse_args(args)`

			`hans = options.hans`
			`func = getattr(pypinyin, options.func)`
			`style = style_map[options.style]`
			`heteronym = options.heteronym`
			`separator = options.separator`
			`errors = options.errors`

			`func_kwargs = {`
			`'pinyin': {`
			`'heteronym': heteronym,`
			`'errors': errors`
			`},`
			`'slug': {`
			`'heteronym': heteronym,`
			`'separator': separator,`
			`'errors': errors`
			`},`
			`}`

			`kwargs = func_kwargs[func.__name__]`

			`# 重设标准输出流和标准错误流`
			`# 不输出任何字符，防止污染命令行命令的输出结果`
			`# 其实主要是为了干掉 jieba 内的 print 语句 ;)`
			`sys.stdout = sys.stderr = NullWriter()`
			`result = func(hans, style=style, **kwargs)`
			`# 恢复默认`
			`sys.stdout = sys.__stdout__`
			`sys.stderr = sys.__stderr__`

			`if not result:`
			`print('')`
			`elif result and isinstance(result, (list, tuple)):`
			`if isinstance(result[0], (list, tuple)):`
			`print(' '.join([','.join(s) for s in result]))`
			`else:`
			`print(result)`
			`else:`
			`print(result)`


			`if __name__ == '__main__':`
			`main()`