update text frontend

pull/1040/head
TianYuan 3 years ago committed by root
parent b6ade97b32
commit dad1cbbcd6

@ -34,7 +34,9 @@ def evaluate(args, fastspeech2_config, pwg_config):
sentences = []
with open(args.text, 'rt') as f:
for line in f:
utt_id, sentence = line.strip().split()
items = line.strip().split()
utt_id = items[0]
sentence = ",".join(items[1:])
sentences.append((utt_id, sentence))
with open(args.phones_dict, "r") as f:

@ -137,4 +137,4 @@ pwg_ljspeech_ckpt_0.5
└── pwg_stats.npy # statistics used to normalize spectrogram when training parallel wavegan
```
## Acknowledgement
We adapted some code from https://github.com/kan-bayashi/ParallelWaveGAN.
We adapted some code from https://github.com/kan-bayashi/ParallelWaveGAN.

@ -82,7 +82,9 @@ def main():
with open(args.text, 'rt') as f:
for line in f:
utt_id, sentence = line.strip().split()
items = line.strip().split()
utt_id = items[0]
sentence = ",".join(items[1:])
sentences.append((utt_id, sentence))
for utt_id, sentence in sentences:

@ -37,7 +37,9 @@ def evaluate(args, fastspeech2_config, pwg_config):
sentences = []
with open(args.text, 'rt') as f:
for line in f:
utt_id, sentence = line.strip().split()
items = line.strip().split()
utt_id = items[0]
sentence = ",".join(items[1:])
sentences.append((utt_id, sentence))
with open(args.phones_dict, "r") as f:

@ -40,7 +40,9 @@ def evaluate(args, fastspeech2_config, pwg_config):
sentences = []
with open(args.text, 'rt') as f:
for line in f:
utt_id, sentence = line.strip().split()
items = line.strip().split()
utt_id = items[0]
sentence = ",".join(items[1:])
sentences.append((utt_id, sentence))
with open(args.phones_dict, "r") as f:

@ -40,7 +40,9 @@ def evaluate(args, fastspeech2_config, melgan_config):
sentences = []
with open(args.text, 'rt') as f:
for line in f:
utt_id, sentence = line.strip().split()
items = line.strip().split()
utt_id = items[0]
sentence = ",".join(items[1:])
sentences.append((utt_id, sentence))
with open(args.phones_dict, "r") as f:

@ -87,7 +87,9 @@ def main():
with open(args.text, 'rt') as f:
for line in f:
utt_id, sentence = line.strip().split()
items = line.strip().split()
utt_id = items[0]
sentence = ",".join(items[1:])
sentences.append((utt_id, sentence))
for utt_id, sentence in sentences:

@ -40,7 +40,9 @@ def evaluate(args, speedyspeech_config, pwg_config):
sentences = []
with open(args.text, 'rt') as f:
for line in f:
utt_id, sentence = line.strip().split()
items = line.strip().split()
utt_id = items[0]
sentence = ",".join(items[1:])
sentences.append((utt_id, sentence))
with open(args.phones_dict, "r") as f:

@ -149,9 +149,14 @@ class Frontend():
if word not in self.must_erhua and (word in self.not_erhua or
pos in {"a", "j", "nr"}):
return initials, finals
# "……" 等情况直接返回
if len(finals) != len(word):
return initials, finals
assert len(finals) == len(word)
new_initials = []
new_finals = []
assert len(finals) == len(word)
for i, phn in enumerate(finals):
if i == len(finals) - 1 and word[i] == "" and phn in {
"er2", "er5"

@ -32,6 +32,15 @@ RE_TIME = re.compile(r'([0-1]?[0-9]|2[0-3])'
r':([0-5][0-9])'
r'(:([0-5][0-9]))?')
# 时间范围如8:30-12:30
RE_TIME_RANGE = re.compile(r'([0-1]?[0-9]|2[0-3])'
r':([0-5][0-9])'
r'(:([0-5][0-9]))?'
r'(~|-)'
r'([0-1]?[0-9]|2[0-3])'
r':([0-5][0-9])'
r'(:([0-5][0-9]))?')
def replace_time(match) -> str:
"""
@ -42,15 +51,32 @@ def replace_time(match) -> str:
----------
str
"""
is_range = len(match.groups()) > 5
hour = match.group(1)
minute = match.group(2)
second = match.group(4)
if is_range:
hour_2 = match.group(6)
minute_2 = match.group(7)
second_2 = match.group(9)
result = f"{num2str(hour)}"
if minute.lstrip('0'):
result += f"{_time_num2str(minute)}"
if second and second.lstrip('0'):
result += f"{_time_num2str(second)}"
if is_range:
result += ""
result += f"{num2str(hour_2)}"
if minute_2.lstrip('0'):
result += f"{_time_num2str(minute_2)}"
if second_2 and second_2.lstrip('0'):
result += f"{_time_num2str(second_2)}"
return result

@ -26,16 +26,19 @@ RE_MOBILE_PHONE = re.compile(
RE_TELEPHONE = re.compile(
r"(?<!\d)((0(10|2[1-3]|[3-9]\d{2})-?)?[1-9]\d{7,8})(?!\d)")
# 全国统一的号码400开头
RE_NATIONAL_UNIFORM_NUMBER = re.compile(r"(400)(-)?\d{3}(-)?\d{4}")
def phone2str(phone_string: str, mobile=True) -> str:
if mobile:
sp_parts = phone_string.strip('+').split()
result = ''.join(
result = ''.join(
[verbalize_digit(part, alt_one=True) for part in sp_parts])
return result
else:
sil_parts = phone_string.split('-')
result = ''.join(
result = ''.join(
[verbalize_digit(part, alt_one=True) for part in sil_parts])
return result

@ -18,6 +18,7 @@ from .char_convert import tranditional_to_simplified
from .chronology import RE_DATE
from .chronology import RE_DATE2
from .chronology import RE_TIME
from .chronology import RE_TIME_RANGE
from .chronology import replace_date
from .chronology import replace_date2
from .chronology import replace_time
@ -40,6 +41,7 @@ from .num import replace_percentage
from .num import replace_positive_quantifier
from .num import replace_range
from .phonecode import RE_MOBILE_PHONE
from .phonecode import RE_NATIONAL_UNIFORM_NUMBER
from .phonecode import RE_TELEPHONE
from .phonecode import replace_mobile
from .phonecode import replace_phone
@ -76,12 +78,19 @@ class TextNormalizer():
# number related NSW verbalization
sentence = RE_DATE.sub(replace_date, sentence)
sentence = RE_DATE2.sub(replace_date2, sentence)
# range first
sentence = RE_TIME_RANGE.sub(replace_time, sentence)
sentence = RE_TIME.sub(replace_time, sentence)
sentence = RE_TEMPERATURE.sub(replace_temperature, sentence)
sentence = RE_FRAC.sub(replace_frac, sentence)
sentence = RE_PERCENTAGE.sub(replace_percentage, sentence)
sentence = RE_MOBILE_PHONE.sub(replace_mobile, sentence)
sentence = RE_TELEPHONE.sub(replace_phone, sentence)
sentence = RE_NATIONAL_UNIFORM_NUMBER.sub(replace_phone, sentence)
sentence = RE_RANGE.sub(replace_range, sentence)
sentence = RE_INTEGER.sub(replace_negative_num, sentence)
sentence = RE_DECIMAL_NUM.sub(replace_number, sentence)
@ -94,5 +103,6 @@ class TextNormalizer():
def normalize(self, text: str) -> List[str]:
sentences = self._split(text)
sentences = [self.normalize_sentence(sent) for sent in sentences]
return sentences

@ -307,7 +307,7 @@ class FastSpeech2(nn.Layer):
num_embeddings=idim,
embedding_dim=adim,
padding_idx=self.padding_idx)
if encoder_type == "transformer":
print("encoder_type is transformer")
self.encoder = TransformerEncoder(

Loading…
Cancel
Save