From 5d4f3fbd7b2b81abebe87641dd25f1e2ebb1e53d Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 21 May 2021 09:55:52 +0000 Subject: [PATCH] format --- README.md | 2 +- deepspeech/__init__.py | 1 - deepspeech/frontend/normalizer.py | 3 ++- deepspeech/modules/mask.py | 1 + doc/src/alignment.md | 1 - doc/src/asr_text_backend.md | 2 +- doc/src/benchmark.md | 1 - doc/src/chinese_syllable.md | 2 +- doc/src/dataset.md | 2 +- doc/src/decoding.md | 1 - doc/src/feature_list.md | 2 +- doc/src/ngram_lm.md | 2 +- doc/src/praat_textgrid.md | 15 ++++++------ doc/src/tools.md | 1 - doc/src/tts_text_front_end.md | 6 ++--- doc/src/vad.md | 1 - examples/aishell/s1/conf/conformer.yaml | 4 ++-- examples/cc-cedict/local/parser.py | 31 +++++++++++++++---------- requirements.txt | 4 ++-- third_party/phkit/README.md | 2 +- 20 files changed, 43 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index a2de1783a..424dc485e 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ ## Features - See [feature list](doc/src/feature_list.md) for more information. + See [feature list](doc/src/feature_list.md) for more information. ## Setup diff --git a/deepspeech/__init__.py b/deepspeech/__init__.py index ac9ccdc77..c942de0cf 100644 --- a/deepspeech/__init__.py +++ b/deepspeech/__init__.py @@ -421,7 +421,6 @@ logger.warn( ) F.ctc_loss = ctc_loss - ########### hcak paddle.nn ############# if not hasattr(paddle.nn, 'Module'): logger.warn("register user Module to paddle.nn, remove this when fixed!") diff --git a/deepspeech/frontend/normalizer.py b/deepspeech/frontend/normalizer.py index 6b224080b..287b51e58 100644 --- a/deepspeech/frontend/normalizer.py +++ b/deepspeech/frontend/normalizer.py @@ -179,7 +179,8 @@ class FeatureNormalizer(object): wav_number += batch_size if wav_number % 1000 == 0: - logger.info(f'process {wav_number} wavs,{all_number} frames.') + logger.info( + f'process {wav_number} wavs,{all_number} frames.') self.cmvn_info = { 'mean_stat': list(all_mean_stat.tolist()), diff --git a/deepspeech/modules/mask.py b/deepspeech/modules/mask.py index c506f127b..74d4e30a6 100644 --- a/deepspeech/modules/mask.py +++ b/deepspeech/modules/mask.py @@ -23,6 +23,7 @@ __all__ = [ "mask_finished_preds" ] + def make_pad_mask(lengths: paddle.Tensor) -> paddle.Tensor: """Make mask tensor containing indices of padded part. See description of make_non_pad_mask. diff --git a/doc/src/alignment.md b/doc/src/alignment.md index fa63894f2..9d3231c89 100644 --- a/doc/src/alignment.md +++ b/doc/src/alignment.md @@ -18,4 +18,3 @@ * [ctc alignment](https://mp.weixin.qq.com/s/4aGehNN7PpIvCh03qTT5oA) * [时间戳和N-Best](https://mp.weixin.qq.com/s?__biz=MzU2NjUwMTgxOQ==&mid=2247483956&idx=1&sn=80ce595238d84155d50f08c0d52267d3&chksm=fcaacae0cbdd43f62b1da60c8e8671a9e0bb2aeee94f58751839b03a1c45b9a3889b96705080&scene=21#wechat_redirect) - diff --git a/doc/src/asr_text_backend.md b/doc/src/asr_text_backend.md index 879e56f8a..c3c9896c7 100644 --- a/doc/src/asr_text_backend.md +++ b/doc/src/asr_text_backend.md @@ -98,4 +98,4 @@ ## Text Filter -* 敏感词(黄暴、涉政、违法违禁等) \ No newline at end of file +* 敏感词(黄暴、涉政、违法违禁等) diff --git a/doc/src/benchmark.md b/doc/src/benchmark.md index f3af25552..9c1c86fd7 100644 --- a/doc/src/benchmark.md +++ b/doc/src/benchmark.md @@ -14,4 +14,3 @@ We compare the training time with 1, 2, 4, 8 Tesla V100 GPUs (with a subset of L | 8 | 6.95 X | `utils/profile.sh` provides such a demo profiling tool, you can change it as need. - diff --git a/doc/src/chinese_syllable.md b/doc/src/chinese_syllable.md index b7fd93223..3ada44f4e 100644 --- a/doc/src/chinese_syllable.md +++ b/doc/src/chinese_syllable.md @@ -67,4 +67,4 @@ * https://github.com/KuangDD/phkit * https://github.com/mozillazg/python-pinyin * https://github.com/Kyubyong/g2pC -* https://github.com/kakaobrain/g2pM \ No newline at end of file +* https://github.com/kakaobrain/g2pM diff --git a/doc/src/dataset.md b/doc/src/dataset.md index d70d0e0d2..aaa805510 100644 --- a/doc/src/dataset.md +++ b/doc/src/dataset.md @@ -18,4 +18,4 @@ ### ASR Noise -* [asr-noises](https://github.com/speechio/asr-noises) \ No newline at end of file +* [asr-noises](https://github.com/speechio/asr-noises) diff --git a/doc/src/decoding.md b/doc/src/decoding.md index ade06c4cb..347a4098b 100644 --- a/doc/src/decoding.md +++ b/doc/src/decoding.md @@ -3,4 +3,3 @@ ## Reference * [时间戳和N-Best](https://mp.weixin.qq.com/s?__biz=MzU2NjUwMTgxOQ==&mid=2247483956&idx=1&sn=80ce595238d84155d50f08c0d52267d3&chksm=fcaacae0cbdd43f62b1da60c8e8671a9e0bb2aeee94f58751839b03a1c45b9a3889b96705080&scene=21#wechat_redirect) - diff --git a/doc/src/feature_list.md b/doc/src/feature_list.md index 57641d5ea..573669fa2 100644 --- a/doc/src/feature_list.md +++ b/doc/src/feature_list.md @@ -58,4 +58,4 @@ ### Grapheme To Phoneme * syallable -* phoneme \ No newline at end of file +* phoneme diff --git a/doc/src/ngram_lm.md b/doc/src/ngram_lm.md index 07aa5411c..119a3b21c 100644 --- a/doc/src/ngram_lm.md +++ b/doc/src/ngram_lm.md @@ -83,4 +83,4 @@ Please notice that the released language models only contain Chinese simplified ``` build/bin/build_binary ./result/people2014corpus_words.arps ./result/people2014corpus_words.klm - ``` \ No newline at end of file + ``` diff --git a/doc/src/praat_textgrid.md b/doc/src/praat_textgrid.md index c25c760ae..06c4f8791 100644 --- a/doc/src/praat_textgrid.md +++ b/doc/src/praat_textgrid.md @@ -76,7 +76,7 @@ pip3 install textgrid tg.read('file.TextGrid') # 'file.TextGrid' 是文件名 ``` - tg.tiers属性: + tg.tiers属性: 会把文件中的所有item打印出来, print(tg.tiers) 的结果如下: ```text @@ -86,7 +86,7 @@ pip3 install textgrid Interval(1361.89250, 1362.01250, R), Interval(1362.01250, 1362.13250, AY1), Interval(1362.13250, 1362.16250, T), - + ... ] ) @@ -113,7 +113,7 @@ pip3 install textgrid Interval 可以理解为时长 ``` - + 2. textgrid库中的对象 **IntervalTier** 对象: @@ -148,7 +148,7 @@ pip3 install textgrid strict -- > 返回bool值, 表示是否严格TextGrid格式 ``` - ​ + ​ **PointTier** 对象: 方法 @@ -174,7 +174,7 @@ pip3 install textgrid name 返回name ``` - + **Point** 对象: 支持比较大小, 支持加减运算 @@ -185,7 +185,7 @@ pip3 install textgrid time: ``` - ​ + ​ **Interval** 对象: 支持比较大小, 支持加减运算 @@ -250,10 +250,9 @@ pip3 install textgrid grids: --> 返回读取的grids的列表 ``` - + ## Reference * https://zh.wikipedia.org/wiki/Praat%E8%AF%AD%E9%9F%B3%E5%AD%A6%E8%BD%AF%E4%BB%B6 * https://blog.csdn.net/duxin_csdn/article/details/88966295 - diff --git a/doc/src/tools.md b/doc/src/tools.md index 4ec09f6a2..5fcca9239 100644 --- a/doc/src/tools.md +++ b/doc/src/tools.md @@ -1,4 +1,3 @@ # Useful Tools * [正则可视化和常用正则表达式](https://wangwl.net/static/projects/visualRegex/#) - diff --git a/doc/src/tts_text_front_end.md b/doc/src/tts_text_front_end.md index fe0f7e247..b13ab615c 100644 --- a/doc/src/tts_text_front_end.md +++ b/doc/src/tts_text_front_end.md @@ -23,7 +23,7 @@ Therefore, procedures like stemming and lemmatization are not useful for Chinese ### Tokenization -**Tokenizing breaks up text data into shorter pre-set strings**, which help build context and meaning for the machine learning model. +**Tokenizing breaks up text data into shorter pre-set strings**, which help build context and meaning for the machine learning model. These “tags” label the part of speech. There are 24 part of speech tags and 4 proper name category labels in the `**jieba**` package’s existing dictionary. @@ -31,7 +31,7 @@ These “tags” label the part of speech. There are 24 part of speech tags and ### Stop Words -In NLP, **stop words are “meaningless” words** that make the data too noisy or ambiguous. +In NLP, **stop words are “meaningless” words** that make the data too noisy or ambiguous. Instead of manually removing them, you could import the `**stopwordsiso**` package for a full list of Chinese stop words. More information can be found [here](https://pypi.org/project/stopwordsiso/). And with this, we can easily create code to filter out any stop words in large text data. @@ -209,4 +209,4 @@ TN: 基于规则的方法 ## Reference * [Text Front End](https://slyne.github.io/%E5%85%AC%E5%BC%80%E8%AF%BE/2020/10/03/TTS1/) * [Chinese Natural Language (Pre)processing: An Introduction](https://towardsdatascience.com/chinese-natural-language-pre-processing-an-introduction-995d16c2705f) -* [Beginner’s Guide to Sentiment Analysis for Simplified Chinese using SnowNLP](https://towardsdatascience.com/beginners-guide-to-sentiment-analysis-for-simplified-chinese-using-snownlp-ce88a8407efb) \ No newline at end of file +* [Beginner’s Guide to Sentiment Analysis for Simplified Chinese using SnowNLP](https://towardsdatascience.com/beginners-guide-to-sentiment-analysis-for-simplified-chinese-using-snownlp-ce88a8407efb) diff --git a/doc/src/vad.md b/doc/src/vad.md index 56fe95879..e73e9cf7a 100644 --- a/doc/src/vad.md +++ b/doc/src/vad.md @@ -29,4 +29,3 @@ * [Endpoint 检测](https://mp.weixin.qq.com/s?__biz=MzU2NjUwMTgxOQ==&mid=2247484024&idx=1&sn=12da2ee76347de4a18856274ba6ba61f&chksm=fcaacaaccbdd43ba6b3e996bbf1e2ac6d5f1b449dfd80fcaccfbbe0a240fa1668b931dbf4bd5&scene=21#wechat_redirect) * Kaldi: *https://github.com/kaldi-asr/kaldi/blob/6260b27d146e466c7e1e5c60858e8da9fd9c78ae/src/online2/online-endpoint.h#L132-L150* * End-to-End Automatic Speech Recognition Integrated with CTC-Based Voice Activity Detection: *https://arxiv.org/pdf/2002.00551.pdf* - diff --git a/examples/aishell/s1/conf/conformer.yaml b/examples/aishell/s1/conf/conformer.yaml index 10c3a2822..36d56723b 100644 --- a/examples/aishell/s1/conf/conformer.yaml +++ b/examples/aishell/s1/conf/conformer.yaml @@ -24,7 +24,7 @@ data: n_fft: None stride_ms: 10.0 window_ms: 25.0 - use_dB_normalization: False + use_dB_normalization: True target_dB: -20 random_seed: 0 keep_transcription_text: False @@ -76,7 +76,7 @@ model: training: n_epoch: 240 accum_grad: 2 - global_grad_clip: 5.0 + global_grad_clip: 3.0 optim: adam optim_conf: lr: 0.002 diff --git a/examples/cc-cedict/local/parser.py b/examples/cc-cedict/local/parser.py index d6acb834f..e1e10b3d0 100644 --- a/examples/cc-cedict/local/parser.py +++ b/examples/cc-cedict/local/parser.py @@ -1,18 +1,24 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # https://github.com/rubber-duck-dragon/rubber-duck-dragon.github.io/blob/master/cc-cedict_parser/parser.py - #A parser for the CC-Cedict. Convert the Chinese-English dictionary into a list of python dictionaries with "traditional","simplified", "pinyin", and "english" keys. - #Make sure that the cedict_ts.u8 file is in the same folder as this file, and that the name matches the file name on line 13. - #Before starting, open the CEDICT text file and delete the copyright information at the top. Otherwise the program will try to parse it and you will get an error message. - #Characters that are commonly used as surnames have two entries in CC-CEDICT. This program will remove the surname entry if there is another entry for the character. If you want to include the surnames, simply delete lines 59 and 60. - #This code was written by Franki Allegra in February 2020. - - -import sys import json +import sys # usage: bin ccedict dump.json @@ -50,9 +56,10 @@ with open(sys.argv[1], 'rt') as file: list_of_dicts.append(parsed) def remove_surnames(): - for x in range(len(list_of_dicts)-1, -1, -1): + for x in range(len(list_of_dicts) - 1, -1, -1): if "surname " in list_of_dicts[x]['english']: - if list_of_dicts[x]['traditional'] == list_of_dicts[x+1]['traditional']: + if list_of_dicts[x]['traditional'] == list_of_dicts[x + 1][ + 'traditional']: list_of_dicts.pop(x) def main(): @@ -60,13 +67,12 @@ with open(sys.argv[1], 'rt') as file: #make each line into a dictionary print("Parsing dictionary . . .") for line in dict_lines: - parse_line(line) + parse_line(line) #remove entries for surnames from the data (optional): print("Removing Surnames . . .") remove_surnames() - print("Saving to database (this may take a few minutes) . . .") with open(sys.argv[2], 'wt') as fout: for one_dict in list_of_dicts: @@ -74,5 +80,6 @@ with open(sys.argv[1], 'rt') as file: fout.write(json_str + "\n") print('Done!') + list_of_dicts = [] parsed_dict = main() diff --git a/requirements.txt b/requirements.txt index a6facb6cb..57a951bbd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ coverage pre-commit +pybind11 resampy==0.2.2 scipy==1.2.1 sentencepiece @@ -7,7 +8,6 @@ snakeviz SoundFile==0.9.0.post1 sox tensorboardX +textgrid typeguard yacs -pybind11 -textgrid diff --git a/third_party/phkit/README.md b/third_party/phkit/README.md index e8f0745ce..002425bad 100644 --- a/third_party/phkit/README.md +++ b/third_party/phkit/README.md @@ -39,7 +39,7 @@ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 标点: ! ? . , ; : " # ( ) -注:!=!!|?=??|.=.。|,=,,、|;=;;|:=::|"="“|#=#   |(=(([[{{【<《|)=))]]}}】>》 +注:!=!!|?=??|.=.。|,=,,、|;=;;|:=::|"="“|#=#   |(=(([[{{【<《|)=))]]}}】>》 预留: w y 0 6 7 8 9