From dad1cbbcd6cfc8d2530de48cdff3b325b6d2de8c Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Fri, 26 Nov 2021 09:12:29 +0000
Subject: [PATCH 1/2] update text frontend

---
 demos/style_fs2/style_syn.py                  |  4 ++-
 examples/ljspeech/voc1/README.md              |  2 +-
 .../t2s/exps/fastspeech2/inference.py         |  4 ++-
 .../fastspeech2/multi_spk_synthesize_e2e.py   |  4 ++-
 .../t2s/exps/fastspeech2/synthesize_e2e.py    |  4 ++-
 .../exps/fastspeech2/synthesize_e2e_melgan.py |  4 ++-
 .../t2s/exps/speedyspeech/inference.py        |  4 ++-
 .../t2s/exps/speedyspeech/synthesize_e2e.py   |  4 ++-
 paddlespeech/t2s/frontend/zh_frontend.py      |  7 ++++-
 .../frontend/zh_normalization/chronology.py   | 26 +++++++++++++++++++
 .../frontend/zh_normalization/phonecode.py    |  7 +++--
 .../zh_normalization/text_normlization.py     | 10 +++++++
 .../t2s/models/fastspeech2/fastspeech2.py     |  2 +-
 13 files changed, 70 insertions(+), 12 deletions(-)

diff --git a/demos/style_fs2/style_syn.py b/demos/style_fs2/style_syn.py
index 5b8ce3513..9bd615790 100644
--- a/demos/style_fs2/style_syn.py
+++ b/demos/style_fs2/style_syn.py
@@ -34,7 +34,9 @@ def evaluate(args, fastspeech2_config, pwg_config):
     sentences = []
     with open(args.text, 'rt') as f:
         for line in f:
-            utt_id, sentence = line.strip().split()
+            items = line.strip().split()
+            utt_id = items[0]
+            sentence = ",".join(items[1:])
             sentences.append((utt_id, sentence))
 
     with open(args.phones_dict, "r") as f:
diff --git a/examples/ljspeech/voc1/README.md b/examples/ljspeech/voc1/README.md
index 13cc6ed7e..3830156f9 100644
--- a/examples/ljspeech/voc1/README.md
+++ b/examples/ljspeech/voc1/README.md
@@ -137,4 +137,4 @@ pwg_ljspeech_ckpt_0.5
 └── pwg_stats.npy                 # statistics used to normalize spectrogram when training parallel wavegan
 ```
 ## Acknowledgement
-We adapted some code from https://github.com/kan-bayashi/ParallelWaveGAN.
\ No newline at end of file
+We adapted some code from https://github.com/kan-bayashi/ParallelWaveGAN.
diff --git a/paddlespeech/t2s/exps/fastspeech2/inference.py b/paddlespeech/t2s/exps/fastspeech2/inference.py
index 07e9ed7ee..8ea64b993 100644
--- a/paddlespeech/t2s/exps/fastspeech2/inference.py
+++ b/paddlespeech/t2s/exps/fastspeech2/inference.py
@@ -82,7 +82,9 @@ def main():
 
     with open(args.text, 'rt') as f:
         for line in f:
-            utt_id, sentence = line.strip().split()
+            items = line.strip().split()
+            utt_id = items[0]
+            sentence = ",".join(items[1:])
             sentences.append((utt_id, sentence))
 
     for utt_id, sentence in sentences:
diff --git a/paddlespeech/t2s/exps/fastspeech2/multi_spk_synthesize_e2e.py b/paddlespeech/t2s/exps/fastspeech2/multi_spk_synthesize_e2e.py
index 1839415e9..a2f8ada69 100644
--- a/paddlespeech/t2s/exps/fastspeech2/multi_spk_synthesize_e2e.py
+++ b/paddlespeech/t2s/exps/fastspeech2/multi_spk_synthesize_e2e.py
@@ -37,7 +37,9 @@ def evaluate(args, fastspeech2_config, pwg_config):
     sentences = []
     with open(args.text, 'rt') as f:
         for line in f:
-            utt_id, sentence = line.strip().split()
+            items = line.strip().split()
+            utt_id = items[0]
+            sentence = ",".join(items[1:])
             sentences.append((utt_id, sentence))
 
     with open(args.phones_dict, "r") as f:
diff --git a/paddlespeech/t2s/exps/fastspeech2/synthesize_e2e.py b/paddlespeech/t2s/exps/fastspeech2/synthesize_e2e.py
index ff9a41eab..aac2c054e 100644
--- a/paddlespeech/t2s/exps/fastspeech2/synthesize_e2e.py
+++ b/paddlespeech/t2s/exps/fastspeech2/synthesize_e2e.py
@@ -40,7 +40,9 @@ def evaluate(args, fastspeech2_config, pwg_config):
     sentences = []
     with open(args.text, 'rt') as f:
         for line in f:
-            utt_id, sentence = line.strip().split()
+            items = line.strip().split()
+            utt_id = items[0]
+            sentence = ",".join(items[1:])
             sentences.append((utt_id, sentence))
 
     with open(args.phones_dict, "r") as f:
diff --git a/paddlespeech/t2s/exps/fastspeech2/synthesize_e2e_melgan.py b/paddlespeech/t2s/exps/fastspeech2/synthesize_e2e_melgan.py
index f0ff5655d..527e5d410 100644
--- a/paddlespeech/t2s/exps/fastspeech2/synthesize_e2e_melgan.py
+++ b/paddlespeech/t2s/exps/fastspeech2/synthesize_e2e_melgan.py
@@ -40,7 +40,9 @@ def evaluate(args, fastspeech2_config, melgan_config):
     sentences = []
     with open(args.text, 'rt') as f:
         for line in f:
-            utt_id, sentence = line.strip().split()
+            items = line.strip().split()
+            utt_id = items[0]
+            sentence = ",".join(items[1:])
             sentences.append((utt_id, sentence))
 
     with open(args.phones_dict, "r") as f:
diff --git a/paddlespeech/t2s/exps/speedyspeech/inference.py b/paddlespeech/t2s/exps/speedyspeech/inference.py
index 617848c58..75f937dec 100644
--- a/paddlespeech/t2s/exps/speedyspeech/inference.py
+++ b/paddlespeech/t2s/exps/speedyspeech/inference.py
@@ -87,7 +87,9 @@ def main():
 
     with open(args.text, 'rt') as f:
         for line in f:
-            utt_id, sentence = line.strip().split()
+            items = line.strip().split()
+            utt_id = items[0]
+            sentence = ",".join(items[1:])
             sentences.append((utt_id, sentence))
 
     for utt_id, sentence in sentences:
diff --git a/paddlespeech/t2s/exps/speedyspeech/synthesize_e2e.py b/paddlespeech/t2s/exps/speedyspeech/synthesize_e2e.py
index 0e64088dc..b04189405 100644
--- a/paddlespeech/t2s/exps/speedyspeech/synthesize_e2e.py
+++ b/paddlespeech/t2s/exps/speedyspeech/synthesize_e2e.py
@@ -40,7 +40,9 @@ def evaluate(args, speedyspeech_config, pwg_config):
     sentences = []
     with open(args.text, 'rt') as f:
         for line in f:
-            utt_id, sentence = line.strip().split()
+            items = line.strip().split()
+            utt_id = items[0]
+            sentence = ",".join(items[1:])
             sentences.append((utt_id, sentence))
 
     with open(args.phones_dict, "r") as f:
diff --git a/paddlespeech/t2s/frontend/zh_frontend.py b/paddlespeech/t2s/frontend/zh_frontend.py
index d49c09378..5b69477da 100644
--- a/paddlespeech/t2s/frontend/zh_frontend.py
+++ b/paddlespeech/t2s/frontend/zh_frontend.py
@@ -149,9 +149,14 @@ class Frontend():
         if word not in self.must_erhua and (word in self.not_erhua or
                                             pos in {"a", "j", "nr"}):
             return initials, finals
+        # "……" 等情况直接返回
+        if len(finals) != len(word):
+            return initials, finals
+
+        assert len(finals) == len(word)
+
         new_initials = []
         new_finals = []
-        assert len(finals) == len(word)
         for i, phn in enumerate(finals):
             if i == len(finals) - 1 and word[i] == "儿" and phn in {
                     "er2", "er5"
diff --git a/paddlespeech/t2s/frontend/zh_normalization/chronology.py b/paddlespeech/t2s/frontend/zh_normalization/chronology.py
index b8d711564..8801baa0d 100644
--- a/paddlespeech/t2s/frontend/zh_normalization/chronology.py
+++ b/paddlespeech/t2s/frontend/zh_normalization/chronology.py
@@ -32,6 +32,15 @@ RE_TIME = re.compile(r'([0-1]?[0-9]|2[0-3])'
                      r':([0-5][0-9])'
                      r'(:([0-5][0-9]))?')
 
+# 时间范围，如8:30-12:30
+RE_TIME_RANGE = re.compile(r'([0-1]?[0-9]|2[0-3])'
+                           r':([0-5][0-9])'
+                           r'(:([0-5][0-9]))?'
+                           r'(~|-)'
+                           r'([0-1]?[0-9]|2[0-3])'
+                           r':([0-5][0-9])'
+                           r'(:([0-5][0-9]))?')
+
 
 def replace_time(match) -> str:
     """
@@ -42,15 +51,32 @@ def replace_time(match) -> str:
     ----------
     str
     """
+
+    is_range = len(match.groups()) > 5
+
     hour = match.group(1)
     minute = match.group(2)
     second = match.group(4)
 
+    if is_range:
+        hour_2 = match.group(6)
+        minute_2 = match.group(7)
+        second_2 = match.group(9)
+
     result = f"{num2str(hour)}点"
     if minute.lstrip('0'):
         result += f"{_time_num2str(minute)}分"
     if second and second.lstrip('0'):
         result += f"{_time_num2str(second)}秒"
+
+    if is_range:
+        result += "至"
+        result += f"{num2str(hour_2)}点"
+        if minute_2.lstrip('0'):
+            result += f"{_time_num2str(minute_2)}分"
+        if second_2 and second_2.lstrip('0'):
+            result += f"{_time_num2str(second_2)}秒"
+
     return result
 
 
diff --git a/paddlespeech/t2s/frontend/zh_normalization/phonecode.py b/paddlespeech/t2s/frontend/zh_normalization/phonecode.py
index be159c239..b7b69b41b 100644
--- a/paddlespeech/t2s/frontend/zh_normalization/phonecode.py
+++ b/paddlespeech/t2s/frontend/zh_normalization/phonecode.py
@@ -26,16 +26,19 @@ RE_MOBILE_PHONE = re.compile(
 RE_TELEPHONE = re.compile(
     r"(?<!\d)((0(10|2[1-3]|[3-9]\d{2})-?)?[1-9]\d{7,8})(?!\d)")
 
+# 全国统一的号码400开头
+RE_NATIONAL_UNIFORM_NUMBER = re.compile(r"(400)(-)?\d{3}(-)?\d{4}")
+
 
 def phone2str(phone_string: str, mobile=True) -> str:
     if mobile:
         sp_parts = phone_string.strip('+').split()
-        result = ''.join(
+        result = '，'.join(
             [verbalize_digit(part, alt_one=True) for part in sp_parts])
         return result
     else:
         sil_parts = phone_string.split('-')
-        result = ''.join(
+        result = '，'.join(
             [verbalize_digit(part, alt_one=True) for part in sil_parts])
         return result
 
diff --git a/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py b/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py
index e25e99019..c3885fb9b 100644
--- a/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py
+++ b/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py
@@ -18,6 +18,7 @@ from .char_convert import tranditional_to_simplified
 from .chronology import RE_DATE
 from .chronology import RE_DATE2
 from .chronology import RE_TIME
+from .chronology import RE_TIME_RANGE
 from .chronology import replace_date
 from .chronology import replace_date2
 from .chronology import replace_time
@@ -40,6 +41,7 @@ from .num import replace_percentage
 from .num import replace_positive_quantifier
 from .num import replace_range
 from .phonecode import RE_MOBILE_PHONE
+from .phonecode import RE_NATIONAL_UNIFORM_NUMBER
 from .phonecode import RE_TELEPHONE
 from .phonecode import replace_mobile
 from .phonecode import replace_phone
@@ -76,12 +78,19 @@ class TextNormalizer():
         # number related NSW verbalization
         sentence = RE_DATE.sub(replace_date, sentence)
         sentence = RE_DATE2.sub(replace_date2, sentence)
+
+        # range first
+        sentence = RE_TIME_RANGE.sub(replace_time, sentence)
         sentence = RE_TIME.sub(replace_time, sentence)
+
         sentence = RE_TEMPERATURE.sub(replace_temperature, sentence)
         sentence = RE_FRAC.sub(replace_frac, sentence)
         sentence = RE_PERCENTAGE.sub(replace_percentage, sentence)
         sentence = RE_MOBILE_PHONE.sub(replace_mobile, sentence)
+
         sentence = RE_TELEPHONE.sub(replace_phone, sentence)
+        sentence = RE_NATIONAL_UNIFORM_NUMBER.sub(replace_phone, sentence)
+
         sentence = RE_RANGE.sub(replace_range, sentence)
         sentence = RE_INTEGER.sub(replace_negative_num, sentence)
         sentence = RE_DECIMAL_NUM.sub(replace_number, sentence)
@@ -94,5 +103,6 @@ class TextNormalizer():
 
     def normalize(self, text: str) -> List[str]:
         sentences = self._split(text)
+
         sentences = [self.normalize_sentence(sent) for sent in sentences]
         return sentences
diff --git a/paddlespeech/t2s/models/fastspeech2/fastspeech2.py b/paddlespeech/t2s/models/fastspeech2/fastspeech2.py
index aa42a83de..cdec03abc 100644
--- a/paddlespeech/t2s/models/fastspeech2/fastspeech2.py
+++ b/paddlespeech/t2s/models/fastspeech2/fastspeech2.py
@@ -307,7 +307,7 @@ class FastSpeech2(nn.Layer):
             num_embeddings=idim,
             embedding_dim=adim,
             padding_idx=self.padding_idx)
-            
+
         if encoder_type == "transformer":
             print("encoder_type is transformer")
             self.encoder = TransformerEncoder(

From a861e56e91b42b65eaab2781ba615efd4f95ecc3 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Fri, 26 Nov 2021 11:04:29 +0000
Subject: [PATCH 2/2] rm space for pure Chinese

---
 demos/style_fs2/style_syn.py                                    | 2 +-
 paddlespeech/t2s/exps/fastspeech2/inference.py                  | 2 +-
 paddlespeech/t2s/exps/fastspeech2/multi_spk_synthesize_e2e.py   | 2 +-
 paddlespeech/t2s/exps/fastspeech2/synthesize_e2e.py             | 2 +-
 paddlespeech/t2s/exps/fastspeech2/synthesize_e2e_melgan.py      | 2 +-
 paddlespeech/t2s/exps/speedyspeech/inference.py                 | 2 +-
 paddlespeech/t2s/exps/speedyspeech/synthesize_e2e.py            | 2 +-
 paddlespeech/t2s/frontend/zh_frontend.py                        | 2 ++
 paddlespeech/t2s/frontend/zh_normalization/text_normlization.py | 2 ++
 9 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/demos/style_fs2/style_syn.py b/demos/style_fs2/style_syn.py
index 9bd615790..0ed87e7cb 100644
--- a/demos/style_fs2/style_syn.py
+++ b/demos/style_fs2/style_syn.py
@@ -36,7 +36,7 @@ def evaluate(args, fastspeech2_config, pwg_config):
         for line in f:
             items = line.strip().split()
             utt_id = items[0]
-            sentence = ",".join(items[1:])
+            sentence = "".join(items[1:])
             sentences.append((utt_id, sentence))
 
     with open(args.phones_dict, "r") as f:
diff --git a/paddlespeech/t2s/exps/fastspeech2/inference.py b/paddlespeech/t2s/exps/fastspeech2/inference.py
index 8ea64b993..1d6ea667a 100644
--- a/paddlespeech/t2s/exps/fastspeech2/inference.py
+++ b/paddlespeech/t2s/exps/fastspeech2/inference.py
@@ -84,7 +84,7 @@ def main():
         for line in f:
             items = line.strip().split()
             utt_id = items[0]
-            sentence = ",".join(items[1:])
+            sentence = "".join(items[1:])
             sentences.append((utt_id, sentence))
 
     for utt_id, sentence in sentences:
diff --git a/paddlespeech/t2s/exps/fastspeech2/multi_spk_synthesize_e2e.py b/paddlespeech/t2s/exps/fastspeech2/multi_spk_synthesize_e2e.py
index a2f8ada69..9dc3ab4b6 100644
--- a/paddlespeech/t2s/exps/fastspeech2/multi_spk_synthesize_e2e.py
+++ b/paddlespeech/t2s/exps/fastspeech2/multi_spk_synthesize_e2e.py
@@ -39,7 +39,7 @@ def evaluate(args, fastspeech2_config, pwg_config):
         for line in f:
             items = line.strip().split()
             utt_id = items[0]
-            sentence = ",".join(items[1:])
+            sentence = "".join(items[1:])
             sentences.append((utt_id, sentence))
 
     with open(args.phones_dict, "r") as f:
diff --git a/paddlespeech/t2s/exps/fastspeech2/synthesize_e2e.py b/paddlespeech/t2s/exps/fastspeech2/synthesize_e2e.py
index aac2c054e..47c8a5e7a 100644
--- a/paddlespeech/t2s/exps/fastspeech2/synthesize_e2e.py
+++ b/paddlespeech/t2s/exps/fastspeech2/synthesize_e2e.py
@@ -42,7 +42,7 @@ def evaluate(args, fastspeech2_config, pwg_config):
         for line in f:
             items = line.strip().split()
             utt_id = items[0]
-            sentence = ",".join(items[1:])
+            sentence = "".join(items[1:])
             sentences.append((utt_id, sentence))
 
     with open(args.phones_dict, "r") as f:
diff --git a/paddlespeech/t2s/exps/fastspeech2/synthesize_e2e_melgan.py b/paddlespeech/t2s/exps/fastspeech2/synthesize_e2e_melgan.py
index 527e5d410..4d5d1ac41 100644
--- a/paddlespeech/t2s/exps/fastspeech2/synthesize_e2e_melgan.py
+++ b/paddlespeech/t2s/exps/fastspeech2/synthesize_e2e_melgan.py
@@ -42,7 +42,7 @@ def evaluate(args, fastspeech2_config, melgan_config):
         for line in f:
             items = line.strip().split()
             utt_id = items[0]
-            sentence = ",".join(items[1:])
+            sentence = "".join(items[1:])
             sentences.append((utt_id, sentence))
 
     with open(args.phones_dict, "r") as f:
diff --git a/paddlespeech/t2s/exps/speedyspeech/inference.py b/paddlespeech/t2s/exps/speedyspeech/inference.py
index 75f937dec..0ed2e0bf1 100644
--- a/paddlespeech/t2s/exps/speedyspeech/inference.py
+++ b/paddlespeech/t2s/exps/speedyspeech/inference.py
@@ -89,7 +89,7 @@ def main():
         for line in f:
             items = line.strip().split()
             utt_id = items[0]
-            sentence = ",".join(items[1:])
+            sentence = "".join(items[1:])
             sentences.append((utt_id, sentence))
 
     for utt_id, sentence in sentences:
diff --git a/paddlespeech/t2s/exps/speedyspeech/synthesize_e2e.py b/paddlespeech/t2s/exps/speedyspeech/synthesize_e2e.py
index b04189405..403d35088 100644
--- a/paddlespeech/t2s/exps/speedyspeech/synthesize_e2e.py
+++ b/paddlespeech/t2s/exps/speedyspeech/synthesize_e2e.py
@@ -42,7 +42,7 @@ def evaluate(args, speedyspeech_config, pwg_config):
         for line in f:
             items = line.strip().split()
             utt_id = items[0]
-            sentence = ",".join(items[1:])
+            sentence = "".join(items[1:])
             sentences.append((utt_id, sentence))
 
     with open(args.phones_dict, "r") as f:
diff --git a/paddlespeech/t2s/frontend/zh_frontend.py b/paddlespeech/t2s/frontend/zh_frontend.py
index 5b69477da..84852b9ce 100644
--- a/paddlespeech/t2s/frontend/zh_frontend.py
+++ b/paddlespeech/t2s/frontend/zh_frontend.py
@@ -129,6 +129,8 @@ class Frontend():
                 # we discriminate i, ii and iii
                 if c and c not in self.punc:
                     phones.append(c)
+                if c and c in self.punc:
+                    phones.append('sp')
                 if v and v not in self.punc:
                     phones.append(v)
             # add sp between sentence (replace the last punc with sp)
diff --git a/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py b/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py
index c3885fb9b..c68caeeb7 100644
--- a/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py
+++ b/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py
@@ -64,6 +64,8 @@ class TextNormalizer():
         List[str]
             Sentences.
         """
+        # Only for pure Chinese here
+        text = text.replace(" ", "")
         text = self.SENTENCE_SPLITOR.sub(r'\1\n', text)
         text = text.strip()
         sentences = [sentence.strip() for sentence in re.split(r'\n+', text)]