From c15e5eecc540a4da2590d85e21ee7335c82fe93d Mon Sep 17 00:00:00 2001
From: WongLaw <mailoflawrence@gmail.com>
Date: Thu, 23 Feb 2023 02:12:00 +0000
Subject: [PATCH] Canton phonetic fix, test=tts

---
 examples/canton/tts3/local/synthesize_e2e.sh       |  4 ++--
 .../mfa/local/generate_canton_lexicon_wavlabs.py   | 14 ++++++++------
 paddlespeech/t2s/exps/sentences_canton.txt         |  3 ++-
 paddlespeech/t2s/frontend/canton_frontend.py       |  6 ++++--
 4 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/examples/canton/tts3/local/synthesize_e2e.sh b/examples/canton/tts3/local/synthesize_e2e.sh
index 509129e3d..8cf7eb22b 100755
--- a/examples/canton/tts3/local/synthesize_e2e.sh
+++ b/examples/canton/tts3/local/synthesize_e2e.sh
@@ -25,7 +25,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
         --output_dir=${train_output_path}/test_e2e \
         --phones_dict=dump/phone_id_map.txt \
         --speaker_dict=dump/speaker_id_map.txt \
-        --spk_id=0 \
+        --spk_id=10 \
         --inference_dir=${train_output_path}/inference
 fi
 
@@ -48,6 +48,6 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
         --output_dir=${train_output_path}/test_e2e \
         --phones_dict=dump/phone_id_map.txt \
         --speaker_dict=dump/speaker_id_map.txt \
-        --spk_id=0 \
+        --spk_id=10 \
         --inference_dir=${train_output_path}/inference
     fi
diff --git a/examples/other/mfa/local/generate_canton_lexicon_wavlabs.py b/examples/other/mfa/local/generate_canton_lexicon_wavlabs.py
index 36bb74467..6b412e917 100644
--- a/examples/other/mfa/local/generate_canton_lexicon_wavlabs.py
+++ b/examples/other/mfa/local/generate_canton_lexicon_wavlabs.py
@@ -15,16 +15,18 @@ def check(str):
         return False
 
 
-consonants = [
-    'p', 'b', 't', 'd', 'ts', 'dz', 'k', 'g', 'kw', 'gw', 'f', 'h', 'l', 'm',
-    'ng', 'n', 's', 'y', 'w', 'c', 'z', 'j'
+INITIALS = [
+    'aa', 'aai', 'aak', 'aap', 'aat', 'aau', 'ai', 'au', 'ap', 'at', 'ak', 'a',
+    'p', 'b', 'e', 'ts', 't', 'dz', 'd', 'kw', 'k', 'gw', 'g', 'f', 'h', 'l',
+    'm', 'ng', 'n', 's', 'y', 'w', 'c', 'z', 'j', 'ong', 'on', 'ou', 'oi', 'ok',
+    'o', 'uk', 'ung'
 ]
 
 
 def get_lines(canton):
-    for consonant in consonants:
-        if canton.startswith(consonant):
-            c, v = canton[:len(consonant)], canton[len(consonant):]
+    for init in INITIALS:
+        if canton.startswith(init):
+            c, v = canton[:len(init)], canton[len(init):]
             return canton + ' ' + c + ' ' + v
     return canton + ' ' + canton
 
diff --git a/paddlespeech/t2s/exps/sentences_canton.txt b/paddlespeech/t2s/exps/sentences_canton.txt
index 5ab5f7f36..5eb3a780e 100644
--- a/paddlespeech/t2s/exps/sentences_canton.txt
+++ b/paddlespeech/t2s/exps/sentences_canton.txt
@@ -17,4 +17,5 @@
 017 佢晨早啪奶茶，同场追加奶绿，又狂怼西米露，喫啫啫猪脚煲
 018 喂！三点几嚟，饮茶先啦，做咁多都冇用嘅，老细唔锡你嘅嚟
 019 嗱嗱声即刻走去搵嘢做，人必须知道自己嘅用途
-020 人人都揸住枝苏格兰场非工业用国际线路自动溶雪16哇佬风油軚垂直升降镭射彩色洗衣干衣气垫毛笔一枝
\ No newline at end of file
+020 人人都揸住枝苏格兰场非工业用国际线路自动溶雪16哇佬风油軚垂直升降镭射彩色洗衣干衣气垫毛笔一枝
+021 各个国家有各个国家嘅国歌
\ No newline at end of file
diff --git a/paddlespeech/t2s/frontend/canton_frontend.py b/paddlespeech/t2s/frontend/canton_frontend.py
index f81526839..350e55935 100644
--- a/paddlespeech/t2s/frontend/canton_frontend.py
+++ b/paddlespeech/t2s/frontend/canton_frontend.py
@@ -21,8 +21,10 @@ import ToJyutping
 from paddlespeech.t2s.frontend.zh_normalization.text_normlization import TextNormalizer
 
 INITIALS = [
-    'p', 'b', 't', 'd', 'ts', 'dz', 'k', 'g', 'kw', 'gw', 'f', 'h', 'l', 'm',
-    'ng', 'n', 's', 'y', 'w', 'c', 'z', 'j'
+    'aa', 'aai', 'aak', 'aap', 'aat', 'aau', 'ai', 'au', 'ap', 'at', 'ak', 'a',
+    'p', 'b', 'e', 'ts', 't', 'dz', 'd', 'kw', 'k', 'gw', 'g', 'f', 'h', 'l',
+    'm', 'ng', 'n', 's', 'y', 'w', 'c', 'z', 'j', 'ong', 'on', 'ou', 'oi', 'ok',
+    'o', 'uk', 'ung'
 ]
 INITIALS += ['sp', 'spl', 'spn', 'sil']