Merge branch 'develop' into patch-13

pull/3972/head
张春乔 8 months ago committed by GitHub
commit 058950dae8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -32,7 +32,7 @@ def get_baker_data(root_dir):
alignment_fp, includeEmptyIntervals=True) alignment_fp, includeEmptyIntervals=True)
# only with baker's annotation # only with baker's annotation
utt_id = alignment.tierNameList[0].split(".")[0] utt_id = alignment.tierNameList[0].split(".")[0]
intervals = alignment.tierDict[alignment.tierNameList[0]].entryList intervals = alignment.getTier(alignment.tierNameList[0]).entries
phones = [] phones = []
for interval in intervals: for interval in intervals:
label = interval.label label = interval.label

@ -65,7 +65,7 @@ class TTSRequest(BaseModel):
speed: float = 1.0 speed: float = 1.0
volume: float = 1.0 volume: float = 1.0
sample_rate: int = 0 sample_rate: int = 0
save_path: str = None save_path: Optional[str] = None
#****************************************************************************************/ #****************************************************************************************/

@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from typing import List from typing import List
from typing import Optional
from pydantic import BaseModel from pydantic import BaseModel
@ -62,7 +63,7 @@ class TTSResult(BaseModel):
volume: float = 1.0 volume: float = 1.0
sample_rate: int sample_rate: int
duration: float duration: float
save_path: str = None save_path: Optional[str] = None
audio: str audio: str

@ -41,11 +41,11 @@ def _readtg(tg_path: str, lang: str='en', fs: int=24000, n_shift: int=300):
ends = [] ends = []
words = [] words = []
for interval in alignment.tierDict['words'].entryList: for interval in alignment.getTier('words').entries:
word = interval.label word = interval.label
if word: if word:
words.append(word) words.append(word)
for interval in alignment.tierDict['phones'].entryList: for interval in alignment.getTier('phones').entries:
phone = interval.label phone = interval.label
phones.append(phone) phones.append(phone)
ends.append(interval.end) ends.append(interval.end)

@ -51,6 +51,26 @@ def determine_opencc_version():
return "opencc" # default return "opencc" # default
def determine_scipy_version():
# get python version
python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
# determine scipy version
if python_version == "3.8":
return "scipy>=1.4.0, <=1.12.0" # Python3.8 need scipy>=1.4.0, <=1.12.0
return "scipy" # default
def determine_matplotlib_version():
# get python version
python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
# determine matplotlib version
if python_version == "3.8" or python_version == "3.9":
return "matplotlib<=3.8.4" # Python3.8/9 need matplotlib<=3.8.4
return "matplotlib" # default
base = [ base = [
"braceexpand", "braceexpand",
"editdistance", "editdistance",
@ -63,9 +83,9 @@ base = [
# paddleaudio align with librosa==0.8.1, which need numpy==1.23.x # paddleaudio align with librosa==0.8.1, which need numpy==1.23.x
"numpy==1.23.5", "numpy==1.23.5",
"librosa==0.8.1", "librosa==0.8.1",
"scipy>=1.4.0, <=1.12.0", determine_scipy_version(), # scipy or scipy>=1.4.0, <=1.12.0
"loguru", "loguru",
"matplotlib<=3.8.4", determine_matplotlib_version(), # matplotlib or matplotlib<=3.8.4
"nara_wpe", "nara_wpe",
"onnxruntime>=1.11.0", "onnxruntime>=1.11.0",
determine_opencc_version(), # opencc or opencc==1.1.6 determine_opencc_version(), # opencc or opencc==1.1.6
@ -76,9 +96,9 @@ base = [
"paddleslim>=2.3.4", "paddleslim>=2.3.4",
"ppdiffusers>=0.9.0", "ppdiffusers>=0.9.0",
"paddlespeech_feat", "paddlespeech_feat",
"praatio>=5.0.0, <=5.1.1", "praatio>=6.0.0",
"prettytable", "prettytable",
"pydantic>=1.10.14, <2.0", "pydantic",
"pypinyin", "pypinyin",
"pypinyin-dict", "pypinyin-dict",
"python-dateutil", "python-dateutil",
@ -92,7 +112,7 @@ base = [
"ToJyutping", "ToJyutping",
"typeguard", "typeguard",
"webrtcvad", "webrtcvad",
"yacs~=0.1.8", "yacs>=0.1.8",
"zhon", "zhon",
] ]

@ -14,16 +14,16 @@
import paddle import paddle
import torch import torch
from paddle.device.cuda import synchronize from paddle.device.cuda import synchronize
from parallel_wavegan import models as pwgan
from parallel_wavegan.layers import residual_block from parallel_wavegan.layers import residual_block
from parallel_wavegan.layers import upsample from parallel_wavegan.layers import upsample
from parallel_wavegan.models import parallel_wavegan as pwgan
from timer import timer from timer import timer
from paddlespeech.t2s.models.parallel_wavegan import ConvInUpsampleNet from paddlespeech.t2s.models.parallel_wavegan import ConvInUpsampleNet
from paddlespeech.t2s.models.parallel_wavegan import PWGDiscriminator from paddlespeech.t2s.models.parallel_wavegan import PWGDiscriminator
from paddlespeech.t2s.models.parallel_wavegan import PWGGenerator from paddlespeech.t2s.models.parallel_wavegan import PWGGenerator
from paddlespeech.t2s.models.parallel_wavegan import ResidualBlock
from paddlespeech.t2s.models.parallel_wavegan import ResidualPWGDiscriminator from paddlespeech.t2s.models.parallel_wavegan import ResidualPWGDiscriminator
from paddlespeech.t2s.modules.residual_block import WaveNetResidualBlock
from paddlespeech.t2s.utils.layer_tools import summary from paddlespeech.t2s.utils.layer_tools import summary
paddle.set_device("gpu:0") paddle.set_device("gpu:0")
@ -79,8 +79,8 @@ def test_convin_upsample_net():
def test_residual_block(): def test_residual_block():
net = ResidualBlock(dilation=9) net = WaveNetResidualBlock(dilation=9)
net2 = residual_block.ResidualBlock(dilation=9) net2 = residual_block.WaveNetResidualBlock(dilation=9)
summary(net) summary(net)
summary(net2) summary(net2)
for k, v in net2.named_parameters(): for k, v in net2.named_parameters():

@ -26,7 +26,7 @@ def readtg(tg_path, sample_rate=24000, n_shift=300):
alignment = textgrid.openTextgrid(tg_path, includeEmptyIntervals=True) alignment = textgrid.openTextgrid(tg_path, includeEmptyIntervals=True)
phones = [] phones = []
ends = [] ends = []
for interval in alignment.tierDict["phones"].entryList: for interval in alignment.getTier("phones").entries:
phone = interval.label phone = interval.label
phones.append(phone) phones.append(phone)
ends.append(interval.end) ends.append(interval.end)

Loading…
Cancel
Save