You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
PaddleSpeech/examples/other/tts_finetune/tts3/local/label_process.py

64 lines
2.4 KiB

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from pathlib import Path
from typing import List
from typing import Union
def change_baker_label(baker_label_file: Union[str, Path],
out_label_file: Union[str, Path]):
"""change baker label file to regular label file
Args:
baker_label_file (Union[str, Path]): Original baker label file
out_label_file (Union[str, Path]): regular label file
"""
with open(baker_label_file) as f:
lines = f.readlines()
with open(out_label_file, "w") as fw:
for i in range(0, len(lines), 2):
utt_id = lines[i].split()[0]
transcription = lines[i + 1].strip()
fw.write(utt_id + "|" + transcription + "\n")
def get_single_label(label_file: Union[str, Path],
oov_files: List[Union[str, Path]],
input_dir: Union[str, Path]):
"""Divide the label file into individual files according to label_file
Args:
label_file (str or Path): label file, format: utt_id|phones id
input_dir (Path): input dir including audios
"""
input_dir = Path(input_dir).expanduser()
new_dir = input_dir / "newdir"
new_dir.mkdir(parents=True, exist_ok=True)
with open(label_file, "r") as f:
for line in f.readlines():
utt_id = line.split("|")[0]
if utt_id not in oov_files:
transcription = line.split("|")[1].strip()
wav_file = str(input_dir) + "/" + utt_id + ".wav"
new_wav_file = str(new_dir) + "/" + utt_id + ".wav"
os.system("cp %s %s" % (wav_file, new_wav_file))
single_file = str(new_dir) + "/" + utt_id + ".txt"
with open(single_file, "w") as fw:
fw.write(transcription)
return new_dir