Merge branch 'develop' into patch-13

pull/3972/head
张春乔 8 months ago committed by GitHub
commit 058950dae8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -32,7 +32,7 @@ def get_baker_data(root_dir):
alignment_fp, includeEmptyIntervals=True)
# only with baker's annotation
utt_id = alignment.tierNameList[0].split(".")[0]
intervals = alignment.tierDict[alignment.tierNameList[0]].entryList
intervals = alignment.getTier(alignment.tierNameList[0]).entries
phones = []
for interval in intervals:
label = interval.label

@ -65,7 +65,7 @@ class TTSRequest(BaseModel):
speed: float = 1.0
volume: float = 1.0
sample_rate: int = 0
save_path: str = None
save_path: Optional[str] = None
#****************************************************************************************/

@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List
from typing import Optional
from pydantic import BaseModel
@ -62,7 +63,7 @@ class TTSResult(BaseModel):
volume: float = 1.0
sample_rate: int
duration: float
save_path: str = None
save_path: Optional[str] = None
audio: str

@ -41,11 +41,11 @@ def _readtg(tg_path: str, lang: str='en', fs: int=24000, n_shift: int=300):
ends = []
words = []
for interval in alignment.tierDict['words'].entryList:
for interval in alignment.getTier('words').entries:
word = interval.label
if word:
words.append(word)
for interval in alignment.tierDict['phones'].entryList:
for interval in alignment.getTier('phones').entries:
phone = interval.label
phones.append(phone)
ends.append(interval.end)

@ -51,6 +51,26 @@ def determine_opencc_version():
return "opencc" # default
def determine_scipy_version():
# get python version
python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
# determine scipy version
if python_version == "3.8":
return "scipy>=1.4.0, <=1.12.0" # Python3.8 need scipy>=1.4.0, <=1.12.0
return "scipy" # default
def determine_matplotlib_version():
# get python version
python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
# determine matplotlib version
if python_version == "3.8" or python_version == "3.9":
return "matplotlib<=3.8.4" # Python3.8/9 need matplotlib<=3.8.4
return "matplotlib" # default
base = [
"braceexpand",
"editdistance",
@ -63,9 +83,9 @@ base = [
# paddleaudio align with librosa==0.8.1, which need numpy==1.23.x
"numpy==1.23.5",
"librosa==0.8.1",
"scipy>=1.4.0, <=1.12.0",
determine_scipy_version(), # scipy or scipy>=1.4.0, <=1.12.0
"loguru",
"matplotlib<=3.8.4",
determine_matplotlib_version(), # matplotlib or matplotlib<=3.8.4
"nara_wpe",
"onnxruntime>=1.11.0",
determine_opencc_version(), # opencc or opencc==1.1.6
@ -76,9 +96,9 @@ base = [
"paddleslim>=2.3.4",
"ppdiffusers>=0.9.0",
"paddlespeech_feat",
"praatio>=5.0.0, <=5.1.1",
"praatio>=6.0.0",
"prettytable",
"pydantic>=1.10.14, <2.0",
"pydantic",
"pypinyin",
"pypinyin-dict",
"python-dateutil",
@ -92,7 +112,7 @@ base = [
"ToJyutping",
"typeguard",
"webrtcvad",
"yacs~=0.1.8",
"yacs>=0.1.8",
"zhon",
]

@ -14,16 +14,16 @@
import paddle
import torch
from paddle.device.cuda import synchronize
from parallel_wavegan import models as pwgan
from parallel_wavegan.layers import residual_block
from parallel_wavegan.layers import upsample
from parallel_wavegan.models import parallel_wavegan as pwgan
from timer import timer
from paddlespeech.t2s.models.parallel_wavegan import ConvInUpsampleNet
from paddlespeech.t2s.models.parallel_wavegan import PWGDiscriminator
from paddlespeech.t2s.models.parallel_wavegan import PWGGenerator
from paddlespeech.t2s.models.parallel_wavegan import ResidualBlock
from paddlespeech.t2s.models.parallel_wavegan import ResidualPWGDiscriminator
from paddlespeech.t2s.modules.residual_block import WaveNetResidualBlock
from paddlespeech.t2s.utils.layer_tools import summary
paddle.set_device("gpu:0")
@ -79,8 +79,8 @@ def test_convin_upsample_net():
def test_residual_block():
net = ResidualBlock(dilation=9)
net2 = residual_block.ResidualBlock(dilation=9)
net = WaveNetResidualBlock(dilation=9)
net2 = residual_block.WaveNetResidualBlock(dilation=9)
summary(net)
summary(net2)
for k, v in net2.named_parameters():

@ -26,7 +26,7 @@ def readtg(tg_path, sample_rate=24000, n_shift=300):
alignment = textgrid.openTextgrid(tg_path, includeEmptyIntervals=True)
phones = []
ends = []
for interval in alignment.tierDict["phones"].entryList:
for interval in alignment.getTier("phones").entries:
phone = interval.label
phones.append(phone)
ends.append(interval.end)

Loading…
Cancel
Save