Cantonese FastSpeech2 e2e infer, test=tts

3 years ago · 8ba2e6136d
parent 2e825a4cec
commit 8ba2e6136d
3 changed files with 5 additions and 7 deletions
--- a/examples/canton/tts3/local/synthesize_e2e.sh
+++ b/examples/canton/tts3/local/synthesize_e2e.sh
@ -43,7 +43,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
        --voc_config=hifigan_aishell3_ckpt_0.2.0/default.yaml \
        --voc_ckpt=hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \
-        --lang=zh \
+        --lang=canton \
        --text=${BIN_DIR}/../sentences_canton.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
--- a/paddlespeech/t2s/exps/syn_utils.py
+++ b/paddlespeech/t2s/exps/syn_utils.py
@ -112,14 +112,12 @@ def get_sentences(text_file: Optional[os.PathLike], lang: str='zh'):
            if line.strip() != "":
                items = re.split(r"\s+", line.strip(), 1)
                utt_id = items[0]
-                if lang == 'zh':
+                if lang == 'zh' or lang == 'canton':
                    sentence = "".join(items[1:])
                elif lang == 'en':
                    sentence = " ".join(items[1:])
                elif lang == 'mix':
                    sentence = " ".join(items[1:])
-                elif lang == 'canton':
-                    sentence = " ".join(items[1:])
            sentences.append((utt_id, sentence))
    return sentences

@ -269,7 +267,7 @@ def get_frontend(lang: str='zh',
            phone_vocab_path=phones_dict,
            tone_vocab_path=tones_dict,
            use_rhy=use_rhy)
-    if lang == 'canton':
+    elif lang == 'canton':
        frontend = CantonFrontend(phone_vocab_path=phones_dict)
    elif lang == 'en':
        frontend = English(phone_vocab_path=phones_dict)
@ -307,7 +305,7 @@ def run_frontend(frontend: object,
        if get_tone_ids:
            tone_ids = input_ids["tone_ids"]
            outs.update({'tone_ids': tone_ids})
-    if lang == 'canton':
+    elif lang == 'canton':
        input_ids = frontend.get_input_ids(
            text, merge_sentences=merge_sentences, to_tensor=to_tensor)
        phone_ids = input_ids["phone_ids"]
--- a/paddlespeech/t2s/frontend/canton_frontend.py
+++ b/paddlespeech/t2s/frontend/canton_frontend.py
@ -27,7 +27,7 @@ INITIALS = [
 INITIALS += ['sp', 'spl', 'spn', 'sil']


-def get_lines(cantons):
+def get_lines(cantons: List[str]):
    phones = []
    for canton in cantons:
        for consonant in INITIALS: