[TTS]Cantonese TTS MFA pipeline (#2899)

Co-authored-by: TianYuan <white-sky@qq.com>
pull/2918/head
HuangLiangJie 1 year ago committed by GitHub
parent 047092de8e
commit c75906462e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -38,6 +38,7 @@ sphinx-markdown-tables
sphinx_rtd_theme
textgrid
timer
ToJyutping
typeguard
webrtcvad
websockets

@ -7,3 +7,10 @@ Run the following script to get started, for more detail, please see `run.sh`.
# Rhythm tags for MFA
If you want to get rhythm tags with duration through MFA tool, you may add flag `--rhy-with-duration` in the first two commands in `run.sh`
Note that only CSMSC dataset is supported so far, and we replace `#` with `sp` in rhythm tags for MFA.
# MFA for Cantonese language
First, go download these datasets [Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence](https://magichub.com/datasets/guangzhou-cantonese-scripted-speech-corpus-daily-use-sentence/) and [Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle](https://magichub.com/datasets/guangzhou-cantonese-scripted-speech-corpus-in-the-vehicle/) under `~/datasets/`.
Then,
```bash
./run_canton.sh
```

@ -0,0 +1,80 @@
import argparse
import os
import re
import shutil
import ToJyutping
def check(str):
my_re = re.compile(r'[A-Za-z]', re.S)
res = re.findall(my_re, str)
if len(res):
return True
else:
return False
consonants = [
'p', 'b', 't', 'd', 'ts', 'dz', 'k', 'g', 'kw', 'gw', 'f', 'h', 'l', 'm',
'ng', 'n', 's', 'y', 'w', 'c', 'z', 'j'
]
def get_lines(canton):
for consonant in consonants:
if canton.startswith(consonant):
c, v = canton[:len(consonant)], canton[len(consonant):]
return canton + ' ' + c + ' ' + v
return canton + ' ' + canton
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Generate lexicon for Cantonese pinyin to phoneme for MFA")
parser.add_argument(
"--output_lexicon", type=str, help="Path to save lexicon.")
parser.add_argument(
"--output_wavlabs",
type=str,
help="Path of wavs and labs for MFA training.")
parser.add_argument(
"--inputs", type=str, nargs="+", help="Path to the cantonese datasets.")
args = parser.parse_args()
os.mkdir(args.output_wavlabs)
utterance_info = []
all_canton = []
for input_ in args.inputs:
utt = "UTTRANSINFO.txt" if "Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence" in input_ else "UTTERANCEINFO.txt"
input_utttxt = os.path.join(input_, utt)
with open(input_utttxt, 'r') as f:
utterance_info = f.readlines()[1:]
for utterance_line in utterance_info:
_, wav_name, spk, _, text = utterance_line.split('\t')
text = text.strip().replace(' ', '')
# check the characters and drop the short text.
if not check(text) and len(text) > 2:
source_path = os.path.join(input_, 'WAV', spk, wav_name)
out_spk_path = os.path.join(args.output_wavlabs, spk)
os.makedirs(out_spk_path, exist_ok=True)
target_path = os.path.join(out_spk_path, wav_name)
shutil.copy(source_path, target_path)
lab_name = wav_name.split('.')[0] + '.lab'
lab_target_path = os.path.join(out_spk_path, lab_name)
canton_list = ToJyutping.get_jyutping_text(text)
with open(lab_target_path, 'w') as f:
f.write(canton_list)
canton_list = canton_list.split(' ')
all_canton.extend(canton_list)
all_canton = set(all_canton)
with open(args.output_lexicon, 'w') as f:
for canton in all_canton:
f.write(get_lines(canton) + '\n')

@ -0,0 +1,34 @@
EXP_DIR=exp
mkdir -p $EXP_DIR
LEXICON_NAME='canton'
if [ ! -f "$EXP_DIR/$LEXICON_NAME.lexicon" ]; then
echo "generating lexicon and training data..."
python local/generate_canton_lexicon_wavlabs.py --output_lexicon "$EXP_DIR/$LEXICON_NAME.lexicon" --output_wavlabs "$EXP_DIR/$LEXICON_NAME"_wavlabs --inputs ~/datasets/Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence ~/datasets/Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle
echo "lexicon and training data done"
fi
MFA_DOWNLOAD_DIR=local/
if [ ! -f "$MFA_DOWNLOAD_DIR/montreal-forced-aligner_linux.tar.gz" ]; then
echo "downloading mfa..."
(cd $MFA_DOWNLOAD_DIR && wget https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner/releases/download/v1.0.1/montreal-forced-aligner_linux.tar.gz)
echo "download mfa done!"
fi
if [ ! -d "$MFA_DOWNLOAD_DIR/montreal-forced-aligner" ]; then
echo "extracting mfa..."
(cd $MFA_DOWNLOAD_DIR && tar xvf "montreal-forced-aligner_linux.tar.gz")
echo "extraction done!"
fi
export PATH="$MFA_DOWNLOAD_DIR/montreal-forced-aligner/bin"
if [ ! -d "$EXP_DIR/canton_alignment" ]; then
echo "Start MFA training..."
mfa_train_and_align "$EXP_DIR/$LEXICON_NAME"_wavlabs "$EXP_DIR/$LEXICON_NAME.lexicon" $EXP_DIR/canton_alignment -o $EXP_DIR/canton_model --clean --verbose --temp_directory $EXP_DIR/.mfa_train_and_align
echo "training done!"
echo "results: $EXP_DIR/canton_alignment"
echo "model: $EXP_DIR/canton_model"
fi

@ -58,6 +58,7 @@ base = [
"sacrebleu",
"textgrid",
"timer",
"ToJyutping",
"typeguard",
"webrtcvad",
"yacs~=0.1.8",
@ -294,7 +295,8 @@ setup_info = dict(
},
# Package info
packages=find_packages(include=['paddlespeech*'], exclude=['utils', 'third_party']),
packages=find_packages(
include=['paddlespeech*'], exclude=['utils', 'third_party']),
zip_safe=True,
classifiers=[
'Development Status :: 5 - Production/Stable',

Loading…
Cancel
Save