pull/1019/head
Hui Zhang 3 years ago
parent 7ec0ed4aaf
commit 56480e1033

@ -73,7 +73,6 @@ def create_manifest(data_dir, manifest_path_prefix):
audio_data, samplerate = soundfile.read(audio_path) audio_data, samplerate = soundfile.read(audio_path)
duration = float(len(audio_data) / samplerate) duration = float(len(audio_data) / samplerate)
translation_str = " ".join(translation.split()) translation_str = " ".join(translation.split())
trancription_str = " ".join(trancription.split()) trancription_str = " ".join(trancription.split())
json_lines.append( json_lines.append(

@ -124,7 +124,7 @@ def create_manifest(data_dir, manifest_path_prefix):
json.dumps( json.dumps(
{ {
'utt': audio_id, 'utt': audio_id,
'utt2spk', spk, 'utt2spk': spk,
'feat': audio_path, 'feat': audio_path,
'feat_shape': (duration, ), # second 'feat_shape': (duration, ), # second
'text': word_text, # charactor 'text': word_text, # charactor

@ -22,9 +22,9 @@ import argparse
import codecs import codecs
import json import json
import os import os
from pathlib import Path
import soundfile import soundfile
from pathlib import Path
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument( parser.add_argument(

@ -1,6 +1,18 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 Xiaomi Corporation (Author: Yongqing Wang) # Copyright 2021 Xiaomi Corporation (Author: Yongqing Wang)
# Mobvoi Inc(Author: Di Wu, Binbin Zhang) # Mobvoi Inc(Author: Di Wu, Binbin Zhang)
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
@ -12,11 +24,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import sys
import os
import argparse import argparse
import json import json
import os
import sys
def get_args(): def get_args():
@ -85,13 +96,13 @@ def meta_analysis(input_json, output_dir):
else: else:
utt2text.write(f'{sid}\t{text}\n') utt2text.write(f'{sid}\t{text}\n')
segments.write( segments.write(
f'{sid}\t{aid}\t{start_time}\t{end_time}\n' f'{sid}\t{aid}\t{start_time}\t{end_time}\n')
)
utt2dur.write(f'{sid}\t{dur}\n') utt2dur.write(f'{sid}\t{dur}\n')
segment_sub_names = " ".join(segment_subsets) segment_sub_names = " ".join(segment_subsets)
utt2subsets.write( utt2subsets.write(
f'{sid}\t{segment_sub_names}\n') f'{sid}\t{segment_sub_names}\n')
def main(): def main():
args = get_args() args = get_args()

@ -1,5 +1,17 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 NPU, ASLP Group (Author: Qijie Shao) # Copyright 2021 NPU, ASLP Group (Author: Qijie Shao)
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
@ -11,14 +23,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# process_opus.py: segmentation and downsampling of opus audio # process_opus.py: segmentation and downsampling of opus audio
# usage: python3 process_opus.py wav.scp segments output_wav.scp # usage: python3 process_opus.py wav.scp segments output_wav.scp
import os
import sys
from pydub import AudioSegment from pydub import AudioSegment
import sys
import os
def read_file(wav_scp, segments): def read_file(wav_scp, segments):

@ -24,15 +24,10 @@ import jsonlines
import numpy as np import numpy as np
import paddle import paddle
from paddle import distributed as dist from paddle import distributed as dist
from paddle.io import DataLoader
from yacs.config import CfgNode from yacs.config import CfgNode
from paddlespeech.s2t.frontend.featurizer import TextFeaturizer from paddlespeech.s2t.frontend.featurizer import TextFeaturizer
from paddlespeech.s2t.io.collator import SpeechCollator
from paddlespeech.s2t.io.dataloader import BatchDataLoader from paddlespeech.s2t.io.dataloader import BatchDataLoader
from paddlespeech.s2t.io.dataset import ManifestDataset
from paddlespeech.s2t.io.sampler import SortagradBatchSampler
from paddlespeech.s2t.io.sampler import SortagradDistributedBatchSampler
from paddlespeech.s2t.models.u2 import U2Model from paddlespeech.s2t.models.u2 import U2Model
from paddlespeech.s2t.training.optimizer import OptimizerFactory from paddlespeech.s2t.training.optimizer import OptimizerFactory
from paddlespeech.s2t.training.reporter import ObsScope from paddlespeech.s2t.training.reporter import ObsScope
@ -215,7 +210,7 @@ class U2Trainer(Trainer):
msg += f"{v:>.8f}" if isinstance(v, msg += f"{v:>.8f}" if isinstance(v,
float) else f"{v}" float) else f"{v}"
msg += f" {k.split(',')[1]}" if len( msg += f" {k.split(',')[1]}" if len(
k.split(',')) == 2 else f"" k.split(',')) == 2 else ""
msg += "," msg += ","
msg = msg[:-1] # remove the last "," msg = msg[:-1] # remove the last ","
if (batch_index + 1 if (batch_index + 1

@ -57,7 +57,7 @@ class TextFeaturizer():
vocab_filepath, maskctc) vocab_filepath, maskctc)
self.vocab_size = len(self.vocab_list) self.vocab_size = len(self.vocab_list)
else: else:
logger.warning(f"TextFeaturizer: not have vocab file.") logger.warning("TextFeaturizer: not have vocab file.")
if unit_type == 'spm': if unit_type == 'spm':
spm_model = spm_model_prefix + '.model' spm_model = spm_model_prefix + '.model'

Loading…
Cancel
Save