fix the prc-commit

pull/1020/head
huangyuxin 3 years ago
parent 4537e900ef
commit 8aebfeac81

@ -82,7 +82,7 @@ def create_manifest(data_dir, manifest_path_prefix):
# if no transcription for audio then skipped
if audio_id not in transcript_dict:
continue
utt2spk = Path(audio_path).parent.name
audio_data, samplerate = soundfile.read(audio_path)
duration = float(len(audio_data) / samplerate)

@ -73,7 +73,6 @@ def create_manifest(data_dir, manifest_path_prefix):
audio_data, samplerate = soundfile.read(audio_path)
duration = float(len(audio_data) / samplerate)
translation_str = " ".join(translation.split())
trancription_str = " ".join(trancription.split())
json_lines.append(
@ -82,7 +81,7 @@ def create_manifest(data_dir, manifest_path_prefix):
'utt': utt,
'feat': audio_path,
'feat_shape': (duration, ), # second
'text': [translation_str, trancription_str],
'text': [translation_str, trancription_str],
},
ensure_ascii=False))

@ -124,7 +124,7 @@ def create_manifest(data_dir, manifest_path_prefix):
json.dumps(
{
'utt': audio_id,
'utt2spk', spk,
'utt2spk': spk,
'feat': audio_path,
'feat_shape': (duration, ), # second
'text': word_text, # charactor

@ -22,9 +22,9 @@ import argparse
import codecs
import json
import os
from pathlib import Path
import soundfile
from pathlib import Path
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(

@ -24,4 +24,4 @@
| transformer | 32.52 M | conf/transformer.yaml | spec_aug | test-clean | attention | 6.805267604192098, | 0.049795 |
| transformer | 32.52 M | conf/transformer.yaml | spec_aug | test-clean | ctc_greedy_search | 6.805267604192098, | 0.054892 |
| transformer | 32.52 M | conf/transformer.yaml | spec_aug | test-clean | ctc_prefix_beam_search | 6.805267604192098, | 0.054531 |
| transformer | 32.52 M | conf/transformer.yaml | spec_aug | test-clean | attention_rescoring | 6.805267604192098, | 0.042244 |
| transformer | 32.52 M | conf/transformer.yaml | spec_aug | test-clean | attention_rescoring | 6.805267604192098, | 0.042244 |

@ -4,4 +4,4 @@ asr model with phone unit
* asr0 - deepspeech2 Streaming/Non-Streaming
* asr1 - transformer/conformer Streaming/Non-Streaming
* asr2 - transformer/conformer Streaming/Non-Streaming with Kaldi feature
* asr2 - transformer/conformer Streaming/Non-Streaming with Kaldi feature

@ -55,4 +55,4 @@ As shown in the following table, we provide 3 training subsets, namely `S`, `M`
|-----------------|-------|--------------|-----------------------------------------------------------------------------------------|
| DEV | 20 | Internet | Specially designed for some speech tools which require cross-validation set in training |
| TEST\_NET | 23 | Internet | Match test |
| TEST\_MEETING | 15 | Real meeting | Mismatch test which is a far-field, conversational, spontaneous, and meeting dataset |
| TEST\_MEETING | 15 | Real meeting | Mismatch test which is a far-field, conversational, spontaneous, and meeting dataset |

@ -21,4 +21,4 @@ Pretrain model from http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/wen
| conformer | 32.52 M | conf/conformer.yaml | spec_aug | aishell1 | attention | - | 0.048456 |
| conformer | 32.52 M | conf/conformer.yaml | spec_aug | aishell1 | ctc_greedy_search | - | 0.052534 |
| conformer | 32.52 M | conf/conformer.yaml | spec_aug | aishell1 | ctc_prefix_beam_search | - | 0.052915 |
| conformer | 32.52 M | conf/conformer.yaml | spec_aug | aishell1 | attention_rescoring | - | 0.047904 |
| conformer | 32.52 M | conf/conformer.yaml | spec_aug | aishell1 | attention_rescoring | - | 0.047904 |

@ -1,6 +1,18 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 Xiaomi Corporation (Author: Yongqing Wang)
# Mobvoi Inc(Author: Di Wu, Binbin Zhang)
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@ -12,11 +24,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
import argparse
import json
import os
import sys
def get_args():
@ -85,13 +96,13 @@ def meta_analysis(input_json, output_dir):
else:
utt2text.write(f'{sid}\t{text}\n')
segments.write(
f'{sid}\t{aid}\t{start_time}\t{end_time}\n'
)
f'{sid}\t{aid}\t{start_time}\t{end_time}\n')
utt2dur.write(f'{sid}\t{dur}\n')
segment_sub_names = " ".join(segment_subsets)
utt2subsets.write(
f'{sid}\t{segment_sub_names}\n')
def main():
args = get_args()
@ -99,4 +110,4 @@ def main():
if __name__ == '__main__':
main()
main()

@ -1,5 +1,17 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 NPU, ASLP Group (Author: Qijie Shao)
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@ -11,14 +23,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# process_opus.py: segmentation and downsampling of opus audio
# usage: python3 process_opus.py wav.scp segments output_wav.scp
import os
import sys
from pydub import AudioSegment
import sys
import os
def read_file(wav_scp, segments):
@ -86,4 +96,4 @@ def main():
if __name__ == '__main__':
main()
main()

@ -409,7 +409,7 @@ class DeepSpeech2ExportTester(DeepSpeech2Tester):
@paddle.no_grad()
def test(self):
logger.info(f"Test Total Examples: {len(self.test_loader.dataset)}")
if self.args.enable_auto_log == True:
if self.args.enable_auto_log is True:
from paddlespeech.s2t.utils.log import Autolog
self.autolog = Autolog(
batch_size=self.config.decoding.batch_size,
@ -438,7 +438,7 @@ class DeepSpeech2ExportTester(DeepSpeech2Tester):
msg += "Final error rate [%s] (%d/%d) = %f" % (
error_rate_type, num_ins, num_ins, errors_sum / len_refs)
logger.info(msg)
if self.args.enable_auto_log == True:
if self.args.enable_auto_log is True:
self.autolog.report()
def compute_result_transcripts(self, audio, audio_len, vocab_list, cfg):
@ -512,7 +512,7 @@ class DeepSpeech2ExportTester(DeepSpeech2Tester):
x_len_list = np.split(x_len_batch, batch_size, axis=0)
for x, x_len in zip(x_list, x_len_list):
if self.args.enable_auto_log == True:
if self.args.enable_auto_log is True:
self.autolog.times.start()
x_len = x_len[0]
assert (chunk_size <= x_len)
@ -547,7 +547,7 @@ class DeepSpeech2ExportTester(DeepSpeech2Tester):
probs_chunk_list = []
probs_chunk_lens_list = []
if self.args.enable_auto_log == True:
if self.args.enable_auto_log is True:
# record the model preprocessing time
self.autolog.times.stamp()
@ -606,7 +606,7 @@ class DeepSpeech2ExportTester(DeepSpeech2Tester):
[output_probs, output_probs_padding], axis=1)
output_probs_list.append(output_probs)
output_lens_list.append(output_lens)
if self.args.enable_auto_log == True:
if self.args.enable_auto_log is True:
# record the model inference time
self.autolog.times.stamp()
# record the post processing time
@ -641,12 +641,12 @@ class DeepSpeech2ExportTester(DeepSpeech2Tester):
audio_len_handle.reshape(x_len.shape)
audio_len_handle.copy_from_cpu(x_len)
if self.args.enable_auto_log == True:
if self.args.enable_auto_log is True:
self.autolog.times.start()
# record the prefix processing time
self.autolog.times.stamp()
self.predictor.run()
if self.args.enable_auto_log == True:
if self.args.enable_auto_log is True:
# record the model inference time
self.autolog.times.stamp()
# record the post processing time

@ -24,15 +24,10 @@ import jsonlines
import numpy as np
import paddle
from paddle import distributed as dist
from paddle.io import DataLoader
from yacs.config import CfgNode
from paddlespeech.s2t.frontend.featurizer import TextFeaturizer
from paddlespeech.s2t.io.collator import SpeechCollator
from paddlespeech.s2t.io.dataloader import BatchDataLoader
from paddlespeech.s2t.io.dataset import ManifestDataset
from paddlespeech.s2t.io.sampler import SortagradBatchSampler
from paddlespeech.s2t.io.sampler import SortagradDistributedBatchSampler
from paddlespeech.s2t.models.u2 import U2Model
from paddlespeech.s2t.training.optimizer import OptimizerFactory
from paddlespeech.s2t.training.reporter import ObsScope
@ -215,7 +210,7 @@ class U2Trainer(Trainer):
msg += f"{v:>.8f}" if isinstance(v,
float) else f"{v}"
msg += f" {k.split(',')[1]}" if len(
k.split(',')) == 2 else f""
k.split(',')) == 2 else ""
msg += ","
msg = msg[:-1] # remove the last ","
if (batch_index + 1

Loading…
Cancel
Save