diff --git a/docs/source/dependencies.md b/docs/source/dependencies.md index 5f641f35e..905730e47 100644 --- a/docs/source/dependencies.md +++ b/docs/source/dependencies.md @@ -13,7 +13,7 @@ bc flac jq vim tig tree pkg-config libsndfile1 libflac-dev libvorbis-dev libboos ``` build-essential cmake libboost-system-dev libboost-thread-dev libboost-program-options-dev libboost-test-dev libeigen3-dev zlib1g-dev libbz2-dev liblzma-dev gcc-5 g++-5 ``` - + ### The dependencies of sox: ``` @@ -25,7 +25,7 @@ libvorbis-dev libmp3lame-dev libmad-ocaml-dev ``` kenlm -sox +sox mfa openblas kaldi diff --git a/paddlespeech/s2t/exps/u2/model.py b/paddlespeech/s2t/exps/u2/model.py index b0310de15..cf02d7d3b 100644 --- a/paddlespeech/s2t/exps/u2/model.py +++ b/paddlespeech/s2t/exps/u2/model.py @@ -554,10 +554,11 @@ class U2Tester(U2Trainer): @paddle.no_grad() def align(self): - ctc_utils.ctc_align(self.config, - self.model, self.align_loader, self.config.decoding.batch_size, - self.align_loader.collate_fn.stride_ms, - self.align_loader.collate_fn.vocab_list, self.args.result_file) + ctc_utils.ctc_align(self.config, self.model, self.align_loader, + self.config.decoding.batch_size, + self.align_loader.collate_fn.stride_ms, + self.align_loader.collate_fn.vocab_list, + self.args.result_file) def load_inferspec(self): """infer model and input spec. diff --git a/paddlespeech/s2t/exps/u2_kaldi/model.py b/paddlespeech/s2t/exps/u2_kaldi/model.py index 60685fb70..0d8508c20 100644 --- a/paddlespeech/s2t/exps/u2_kaldi/model.py +++ b/paddlespeech/s2t/exps/u2_kaldi/model.py @@ -527,10 +527,11 @@ class U2Tester(U2Trainer): @paddle.no_grad() def align(self): - ctc_utils.ctc_align(self.config, - self.model, self.align_loader, self.config.decoding.batch_size, - self.align_loader.collate_fn.stride_ms, - self.align_loader.collate_fn.vocab_list, self.args.result_file) + ctc_utils.ctc_align(self.config, self.model, self.align_loader, + self.config.decoding.batch_size, + self.align_loader.collate_fn.stride_ms, + self.align_loader.collate_fn.vocab_list, + self.args.result_file) def load_inferspec(self): """infer model and input spec. diff --git a/paddlespeech/s2t/exps/u2_st/model.py b/paddlespeech/s2t/exps/u2_st/model.py index 9141b3613..91390afe5 100644 --- a/paddlespeech/s2t/exps/u2_st/model.py +++ b/paddlespeech/s2t/exps/u2_st/model.py @@ -543,10 +543,10 @@ class U2STTester(U2STTrainer): @paddle.no_grad() def align(self): - ctc_utils.ctc_align(self.config, - self.model, self.align_loader, self.config.decoding.batch_size, - self.config.collator.stride_ms, - self.vocab_list, self.args.result_file) + ctc_utils.ctc_align(self.config, self.model, self.align_loader, + self.config.decoding.batch_size, + self.config.collator.stride_ms, self.vocab_list, + self.args.result_file) def load_inferspec(self): """infer model and input spec. diff --git a/paddlespeech/s2t/utils/ctc_utils.py b/paddlespeech/s2t/utils/ctc_utils.py index f5822e5dd..886b72033 100644 --- a/paddlespeech/s2t/utils/ctc_utils.py +++ b/paddlespeech/s2t/utils/ctc_utils.py @@ -12,8 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # Modified from wenet(https://github.com/wenet-e2e/wenet) -from typing import List from pathlib import Path +from typing import List + import numpy as np import paddle diff --git a/paddlespeech/t2s/exps/tacotron2/ljspeech.py b/paddlespeech/t2s/exps/tacotron2/ljspeech.py index 4facde405..08db2a646 100644 --- a/paddlespeech/t2s/exps/tacotron2/ljspeech.py +++ b/paddlespeech/t2s/exps/tacotron2/ljspeech.py @@ -67,19 +67,16 @@ class LJSpeechCollector(object): # Sort by text_len in descending order texts = [ - i - for i, _ in sorted( + i for i, _ in sorted( zip(texts, text_lens), key=lambda x: x[1], reverse=True) ] mels = [ - i - for i, _ in sorted( + i for i, _ in sorted( zip(mels, text_lens), key=lambda x: x[1], reverse=True) ] mel_lens = [ - i - for i, _ in sorted( + i for i, _ in sorted( zip(mel_lens, text_lens), key=lambda x: x[1], reverse=True) ]