fix format

4 years ago · 56480e1033
parent 7ec0ed4aaf
commit 56480e1033
14 changed files with 46 additions and 31 deletions
--- a/examples/dataset/ted_en_zh/ted_en_zh.py
+++ b/examples/dataset/ted_en_zh/ted_en_zh.py
@ -73,7 +73,6 @@ def create_manifest(data_dir, manifest_path_prefix):
                audio_data, samplerate = soundfile.read(audio_path)
                duration = float(len(audio_data) / samplerate)
                translation_str = " ".join(translation.split())
                trancription_str = " ".join(trancription.split())
                json_lines.append(
--- a/examples/dataset/thchs30/thchs30.py
+++ b/examples/dataset/thchs30/thchs30.py
@ -124,7 +124,7 @@ def create_manifest(data_dir, manifest_path_prefix):
                    json.dumps(
                        {
                            'utt': audio_id,
-                            'utt2spk', spk,
+                            'utt2spk': spk,
                            'feat': audio_path,
                            'feat_shape': (duration, ),  # second
                            'text': word_text,  # charactor
--- a/examples/dataset/timit/timit_kaldi_standard_split.py
+++ b/examples/dataset/timit/timit_kaldi_standard_split.py
@ -22,9 +22,9 @@ import argparse
 import codecs
 import json
 import os
 from pathlib import Path
 import soundfile
 from pathlib import Path
 parser = argparse.ArgumentParser(description=__doc__)
 parser.add_argument(
--- a/examples/wenetspeech/asr1/local/extract_meta.py
+++ b/examples/wenetspeech/asr1/local/extract_meta.py
@ -1,6 +1,18 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Copyright 2021  Xiaomi Corporation (Author: Yongqing Wang)
 #                 Mobvoi Inc(Author: Di Wu, Binbin Zhang)
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@ -12,11 +24,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import sys
 import os
 import argparse
 import json
 import os
 import sys
 def get_args():
@ -85,13 +96,13 @@ def meta_analysis(input_json, output_dir):
                            else:
                                utt2text.write(f'{sid}\t{text}\n')
                                segments.write(
-                                    f'{sid}\t{aid}\t{start_time}\t{end_time}\n'
+                                    f'{sid}\t{aid}\t{start_time}\t{end_time}\n')
                                )
                                utt2dur.write(f'{sid}\t{dur}\n')
                                segment_sub_names = " ".join(segment_subsets)
                                utt2subsets.write(
                                    f'{sid}\t{segment_sub_names}\n')
 def main():
    args = get_args()
--- a/examples/wenetspeech/asr1/local/process_opus.py
+++ b/examples/wenetspeech/asr1/local/process_opus.py
@ -1,5 +1,17 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Copyright 2021  NPU, ASLP Group (Author: Qijie Shao)
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@ -11,14 +23,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # process_opus.py: segmentation and downsampling of opus audio
 # usage: python3 process_opus.py wav.scp segments output_wav.scp
 import os
 import sys
 from pydub import AudioSegment
 import sys
 import os
 def read_file(wav_scp, segments):
--- a/paddlespeech/s2t/exps/u2/model.py
+++ b/paddlespeech/s2t/exps/u2/model.py
@ -24,15 +24,10 @@ import jsonlines
 import numpy as np
 import paddle
 from paddle import distributed as dist
 from paddle.io import DataLoader
 from yacs.config import CfgNode
 from paddlespeech.s2t.frontend.featurizer import TextFeaturizer
 from paddlespeech.s2t.io.collator import SpeechCollator
 from paddlespeech.s2t.io.dataloader import BatchDataLoader
 from paddlespeech.s2t.io.dataset import ManifestDataset
 from paddlespeech.s2t.io.sampler import SortagradBatchSampler
 from paddlespeech.s2t.io.sampler import SortagradDistributedBatchSampler
 from paddlespeech.s2t.models.u2 import U2Model
 from paddlespeech.s2t.training.optimizer import OptimizerFactory
 from paddlespeech.s2t.training.reporter import ObsScope
@ -215,7 +210,7 @@ class U2Trainer(Trainer):
                            msg += f"{v:>.8f}" if isinstance(v,
                                                             float) else f"{v}"
                            msg += f" {k.split(',')[1]}" if len(
-                                k.split(',')) == 2 else f""
+                                k.split(',')) == 2 else ""
                            msg += ","
                        msg = msg[:-1]  # remove the last ","
                        if (batch_index + 1
--- a/paddlespeech/s2t/frontend/featurizer/text_featurizer.py
+++ b/paddlespeech/s2t/frontend/featurizer/text_featurizer.py
@ -57,7 +57,7 @@ class TextFeaturizer():
                vocab_filepath, maskctc)
            self.vocab_size = len(self.vocab_list)
        else:
-            logger.warning(f"TextFeaturizer: not have vocab file.")
+            logger.warning("TextFeaturizer: not have vocab file.")
        if unit_type == 'spm':
            spm_model = spm_model_prefix + '.model'