From 586d6a11143de8890a52e0fa6efc4f57d585dfaa Mon Sep 17 00:00:00 2001 From: liangym Date: Tue, 21 Mar 2023 03:08:28 +0000 Subject: [PATCH] add pinyin_to_phone --- examples/opencpop/svs1/README.md | 6 +- examples/opencpop/svs1/README_cn.md | 4 +- .../opencpop/svs1/local/pinyin_to_phone.txt | 418 ++++++++++++++++++ .../opencpop/svs1/local/synthesize_e2e.sh | 4 +- examples/opencpop/svs1/run.sh | 8 +- paddlespeech/t2s/frontend/sing_frontend.py | 22 +- 6 files changed, 432 insertions(+), 30 deletions(-) create mode 100644 examples/opencpop/svs1/local/pinyin_to_phone.txt diff --git a/examples/opencpop/svs1/README.md b/examples/opencpop/svs1/README.md index 90f513949..1600d0c76 100644 --- a/examples/opencpop/svs1/README.md +++ b/examples/opencpop/svs1/README.md @@ -157,8 +157,8 @@ optional arguments: The min and max values of the mel spectrum, using on diffusion of diffsinger. ``` -`./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file. -You need to download the pinyin-to-phone mapping file in advance: `wget https://paddlespeech.bj.bcebos.com/t2s/svs/opencpop/pinyin_to_phone.txt` +`./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file. +`local/pinyin_to_phone.txt` comes from the readme of the opencpop dataset, indicating the mapping from pinyin to phonemes in opencpop. ```bash CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} @@ -273,4 +273,4 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \ --pinyin_phone=diffsinger_opencpop_ckpt_1.4.0/pinyin_to_phone.txt \ --speech_stretchs=diffsinger_opencpop_ckpt_1.4.0/speech_stretchs.npy -``` \ No newline at end of file +``` diff --git a/examples/opencpop/svs1/README_cn.md b/examples/opencpop/svs1/README_cn.md index eda086019..1435b42ec 100644 --- a/examples/opencpop/svs1/README_cn.md +++ b/examples/opencpop/svs1/README_cn.md @@ -163,7 +163,7 @@ optional arguments: ``` `./local/synthesize_e2e.sh` 调用 `${BIN_DIR}/../synthesize_e2e.py`,即可从文本文件中合成波形。 -需要提前下载拼音映射音素的文件:`wget https://paddlespeech.bj.bcebos.com/t2s/svs/opencpop/pinyin_to_phone.txt` +`local/pinyin_to_phone.txt`来源于opencpop数据集中的README,表示opencpop中拼音到音素的映射。 ```bash CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} @@ -277,4 +277,4 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \ --pinyin_phone=diffsinger_opencpop_ckpt_1.4.0/pinyin_to_phone.txt \ --speech_stretchs=diffsinger_opencpop_ckpt_1.4.0/speech_stretchs.npy -``` \ No newline at end of file +``` diff --git a/examples/opencpop/svs1/local/pinyin_to_phone.txt b/examples/opencpop/svs1/local/pinyin_to_phone.txt new file mode 100644 index 000000000..34ed079d7 --- /dev/null +++ b/examples/opencpop/svs1/local/pinyin_to_phone.txt @@ -0,0 +1,418 @@ +a|a +ai|ai +an|an +ang|ang +ao|ao +ba|b a +bai|b ai +ban|b an +bang|b ang +bao|b ao +bei|b ei +ben|b en +beng|b eng +bi|b i +bian|b ian +biao|b iao +bie|b ie +bin|b in +bing|b ing +bo|b o +bu|b u +ca|c a +cai|c ai +can|c an +cang|c ang +cao|c ao +ce|c e +cei|c ei +cen|c en +ceng|c eng +cha|ch a +chai|ch ai +chan|ch an +chang|ch ang +chao|ch ao +che|ch e +chen|ch en +cheng|ch eng +chi|ch i +chong|ch ong +chou|ch ou +chu|ch u +chua|ch ua +chuai|ch uai +chuan|ch uan +chuang|ch uang +chui|ch ui +chun|ch un +chuo|ch uo +ci|c i +cong|c ong +cou|c ou +cu|c u +cuan|c uan +cui|c ui +cun|c un +cuo|c uo +da|d a +dai|d ai +dan|d an +dang|d ang +dao|d ao +de|d e +dei|d ei +den|d en +deng|d eng +di|d i +dia|d ia +dian|d ian +diao|d iao +die|d ie +ding|d ing +diu|d iu +dong|d ong +dou|d ou +du|d u +duan|d uan +dui|d ui +dun|d un +duo|d uo +e|e +ei|ei +en|en +eng|eng +er|er +fa|f a +fan|f an +fang|f ang +fei|f ei +fen|f en +feng|f eng +fo|f o +fou|f ou +fu|f u +ga|g a +gai|g ai +gan|g an +gang|g ang +gao|g ao +ge|g e +gei|g ei +gen|g en +geng|g eng +gong|g ong +gou|g ou +gu|g u +gua|g ua +guai|g uai +guan|g uan +guang|g uang +gui|g ui +gun|g un +guo|g uo +ha|h a +hai|h ai +han|h an +hang|h ang +hao|h ao +he|h e +hei|h ei +hen|h en +heng|h eng +hm|h m +hng|h ng +hong|h ong +hou|h ou +hu|h u +hua|h ua +huai|h uai +huan|h uan +huang|h uang +hui|h ui +hun|h un +huo|h uo +ji|j i +jia|j ia +jian|j ian +jiang|j iang +jiao|j iao +jie|j ie +jin|j in +jing|j ing +jiong|j iong +jiu|j iu +ju|j v +juan|j van +jue|j ve +jun|j vn +ka|k a +kai|k ai +kan|k an +kang|k ang +kao|k ao +ke|k e +kei|k ei +ken|k en +keng|k eng +kong|k ong +kou|k ou +ku|k u +kua|k ua +kuai|k uai +kuan|k uan +kuang|k uang +kui|k ui +kun|k un +kuo|k uo +la|l a +lai|l ai +lan|l an +lang|l ang +lao|l ao +le|l e +lei|l ei +leng|l eng +li|l i +lia|l ia +lian|l ian +liang|l iang +liao|l iao +lie|l ie +lin|l in +ling|l ing +liu|l iu +lo|l o +long|l ong +lou|l ou +lu|l u +luan|l uan +lun|l un +luo|l uo +lv|l v +lve|l ve +m|m +ma|m a +mai|m ai +man|m an +mang|m ang +mao|m ao +me|m e +mei|m ei +men|m en +meng|m eng +mi|m i +mian|m ian +miao|m iao +mie|m ie +min|m in +ming|m ing +miu|m iu +mo|m o +mou|m ou +mu|m u +n|n +na|n a +nai|n ai +nan|n an +nang|n ang +nao|n ao +ne|n e +nei|n ei +nen|n en +neng|n eng +ng|n g +ni|n i +nian|n ian +niang|n iang +niao|n iao +nie|n ie +nin|n in +ning|n ing +niu|n iu +nong|n ong +nou|n ou +nu|n u +nuan|n uan +nun|n un +nuo|n uo +nv|n v +nve|n ve +o|o +ou|ou +pa|p a +pai|p ai +pan|p an +pang|p ang +pao|p ao +pei|p ei +pen|p en +peng|p eng +pi|p i +pian|p ian +piao|p iao +pie|p ie +pin|p in +ping|p ing +po|p o +pou|p ou +pu|p u +qi|q i +qia|q ia +qian|q ian +qiang|q iang +qiao|q iao +qie|q ie +qin|q in +qing|q ing +qiong|q iong +qiu|q iu +qu|q v +quan|q van +que|q ve +qun|q vn +ran|r an +rang|r ang +rao|r ao +re|r e +ren|r en +reng|r eng +ri|r i +rong|r ong +rou|r ou +ru|r u +rua|r ua +ruan|r uan +rui|r ui +run|r un +ruo|r uo +sa|s a +sai|s ai +san|s an +sang|s ang +sao|s ao +se|s e +sen|s en +seng|s eng +sha|sh a +shai|sh ai +shan|sh an +shang|sh ang +shao|sh ao +she|sh e +shei|sh ei +shen|sh en +sheng|sh eng +shi|sh i +shou|sh ou +shu|sh u +shua|sh ua +shuai|sh uai +shuan|sh uan +shuang|sh uang +shui|sh ui +shun|sh un +shuo|sh uo +si|s i +song|s ong +sou|s ou +su|s u +suan|s uan +sui|s ui +sun|s un +suo|s uo +ta|t a +tai|t ai +tan|t an +tang|t ang +tao|t ao +te|t e +tei|t ei +teng|t eng +ti|t i +tian|t ian +tiao|t iao +tie|t ie +ting|t ing +tong|t ong +tou|t ou +tu|t u +tuan|t uan +tui|t ui +tun|t un +tuo|t uo +wa|w a +wai|w ai +wan|w an +wang|w ang +wei|w ei +wen|w en +weng|w eng +wo|w o +wu|w u +xi|x i +xia|x ia +xian|x ian +xiang|x iang +xiao|x iao +xie|x ie +xin|x in +xing|x ing +xiong|x iong +xiu|x iu +xu|x v +xuan|x van +xue|x ve +xun|x vn +ya|y a +yan|y an +yang|y ang +yao|y ao +ye|y e +yi|y i +yin|y in +ying|y ing +yo|y o +yong|y ong +you|y ou +yu|y v +yuan|y van +yue|y ve +yun|y vn +za|z a +zai|z ai +zan|z an +zang|z ang +zao|z ao +ze|z e +zei|z ei +zen|z en +zeng|z eng +zha|zh a +zhai|zh ai +zhan|zh an +zhang|zh ang +zhao|zh ao +zhe|zh e +zhei|zh ei +zhen|zh en +zheng|zh eng +zhi|zh i +zhong|zh ong +zhou|zh ou +zhu|zh u +zhua|zh ua +zhuai|zh uai +zhuan|zh uan +zhuang|zh uang +zhui|zh ui +zhun|zh un +zhuo|zh uo +zi|z i +zong|z ong +zou|z ou +zu|z u +zuan|z uan +zui|z ui +zun|z un +zuo|z uo \ No newline at end of file diff --git a/examples/opencpop/svs1/local/synthesize_e2e.sh b/examples/opencpop/svs1/local/synthesize_e2e.sh index 179b1b061..b3dc29b11 100755 --- a/examples/opencpop/svs1/local/synthesize_e2e.sh +++ b/examples/opencpop/svs1/local/synthesize_e2e.sh @@ -25,7 +25,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --speech_stretchs=dump/train/speech_stretchs.npy \ - --pinyin_phone=./pinyin_to_phone.txt + --pinyin_phone=local/pinyin_to_phone.txt fi # for more GAN Vocoders @@ -48,6 +48,6 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --speech_stretchs=dump/train/speech_stretchs.npy \ - --pinyin_phone=./pinyin_to_phone.txt + --pinyin_phone=local/pinyin_to_phone.txt fi diff --git a/examples/opencpop/svs1/run.sh b/examples/opencpop/svs1/run.sh index d3dea7f5c..bfe5b6594 100755 --- a/examples/opencpop/svs1/run.sh +++ b/examples/opencpop/svs1/run.sh @@ -3,13 +3,13 @@ set -e source path.sh -gpus=4 -stage=3 -stop_stage=3 +gpus=0 +stage=0 +stop_stage=100 conf_path=conf/default.yaml train_output_path=exp/default -ckpt_name=snapshot_iter_160000.pdz +ckpt_name=snapshot_iter_320000.pdz # with the following command, you can choose the stage range you want to run # such as `./run.sh --stage 0 --stop-stage 0` diff --git a/paddlespeech/t2s/frontend/sing_frontend.py b/paddlespeech/t2s/frontend/sing_frontend.py index 115ebf0a1..c2aecf273 100644 --- a/paddlespeech/t2s/frontend/sing_frontend.py +++ b/paddlespeech/t2s/frontend/sing_frontend.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,23 +18,7 @@ from typing import List import librosa import numpy as np import paddle -import ToJyutping from pypinyin import lazy_pinyin -from pypinyin import pinyin -from pypinyin import Style - -from paddlespeech.t2s.frontend.zh_normalization.text_normlization import TextNormalizer - -INITIALS = [ - 'zh', 'ch', 'sh', 'b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h', - 'j', 'q', 'x', 'r', 'z', 'c', 's', 'y', 'w' -] -FINALS = [ - 'a', 'ai', 'an', 'ang', 'ao', 'e', 'ei', 'en', 'eng', 'er', 'i', 'ia', - 'ian', 'iang', 'iao', 'ie', 'in', 'ing', 'iong', 'iu', 'ng', 'o', 'ong', - 'ou', 'u', 'ua', 'uai', 'uan', 'uang', 'ui', 'un', 'uo', 'v', 'van', 've', - 'vn' -] class SingFrontend(): @@ -64,14 +48,14 @@ class SingFrontend(): self.vocab_phones[phn] = int(id) def get_phones(self, sentence: str) -> List[int]: - """_summary_ + """get phone list Args: sentence (str): sentence Returns: List[int]: phones list - + Example: sentence = "你好" phones = ['n i', 'h ao']