diff --git a/examples/other/g2p/get_g2p_data.py b/examples/other/g2p/get_g2p_data.py index 8fa3e53cd..87e7b9fdc 100644 --- a/examples/other/g2p/get_g2p_data.py +++ b/examples/other/g2p/get_g2p_data.py @@ -32,7 +32,7 @@ def get_baker_data(root_dir): alignment_fp, includeEmptyIntervals=True) # only with baker's annotation utt_id = alignment.tierNameList[0].split(".")[0] - intervals = alignment.tierDict[alignment.tierNameList[0]].entryList + intervals = alignment.getTier(alignment.tierNameList[0]).entries phones = [] for interval in intervals: label = interval.label diff --git a/paddlespeech/server/restful/request.py b/paddlespeech/server/restful/request.py index b7a32481f..068694de3 100644 --- a/paddlespeech/server/restful/request.py +++ b/paddlespeech/server/restful/request.py @@ -65,7 +65,7 @@ class TTSRequest(BaseModel): speed: float = 1.0 volume: float = 1.0 sample_rate: int = 0 - save_path: str = None + save_path: Optional[str] = None #****************************************************************************************/ diff --git a/paddlespeech/server/restful/response.py b/paddlespeech/server/restful/response.py index 3d991de43..12b264c02 100644 --- a/paddlespeech/server/restful/response.py +++ b/paddlespeech/server/restful/response.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from typing import List +from typing import Optional from pydantic import BaseModel @@ -62,7 +63,7 @@ class TTSResult(BaseModel): volume: float = 1.0 sample_rate: int duration: float - save_path: str = None + save_path: Optional[str] = None audio: str diff --git a/paddlespeech/t2s/exps/ernie_sat/align.py b/paddlespeech/t2s/exps/ernie_sat/align.py index a802d0295..e7c8083a8 100755 --- a/paddlespeech/t2s/exps/ernie_sat/align.py +++ b/paddlespeech/t2s/exps/ernie_sat/align.py @@ -41,11 +41,11 @@ def _readtg(tg_path: str, lang: str='en', fs: int=24000, n_shift: int=300): ends = [] words = [] - for interval in alignment.tierDict['words'].entryList: + for interval in alignment.getTier('words').entries: word = interval.label if word: words.append(word) - for interval in alignment.tierDict['phones'].entryList: + for interval in alignment.getTier('phones').entries: phone = interval.label phones.append(phone) ends.append(interval.end) diff --git a/setup.py b/setup.py index 4c284a559..a56524791 100644 --- a/setup.py +++ b/setup.py @@ -51,6 +51,26 @@ def determine_opencc_version(): return "opencc" # default +def determine_scipy_version(): + # get python version + python_version = f"{sys.version_info.major}.{sys.version_info.minor}" + + # determine scipy version + if python_version == "3.8": + return "scipy>=1.4.0, <=1.12.0" # Python3.8 need scipy>=1.4.0, <=1.12.0 + return "scipy" # default + + +def determine_matplotlib_version(): + # get python version + python_version = f"{sys.version_info.major}.{sys.version_info.minor}" + + # determine matplotlib version + if python_version == "3.8" or python_version == "3.9": + return "matplotlib<=3.8.4" # Python3.8/9 need matplotlib<=3.8.4 + return "matplotlib" # default + + base = [ "braceexpand", "editdistance", @@ -63,9 +83,9 @@ base = [ # paddleaudio align with librosa==0.8.1, which need numpy==1.23.x "numpy==1.23.5", "librosa==0.8.1", - "scipy>=1.4.0, <=1.12.0", + determine_scipy_version(), # scipy or scipy>=1.4.0, <=1.12.0 "loguru", - "matplotlib<=3.8.4", + determine_matplotlib_version(), # matplotlib or matplotlib<=3.8.4 "nara_wpe", "onnxruntime>=1.11.0", determine_opencc_version(), # opencc or opencc==1.1.6 @@ -76,9 +96,9 @@ base = [ "paddleslim>=2.3.4", "ppdiffusers>=0.9.0", "paddlespeech_feat", - "praatio>=5.0.0, <=5.1.1", + "praatio>=6.0.0", "prettytable", - "pydantic>=1.10.14, <2.0", + "pydantic", "pypinyin", "pypinyin-dict", "python-dateutil", @@ -92,7 +112,7 @@ base = [ "ToJyutping", "typeguard", "webrtcvad", - "yacs~=0.1.8", + "yacs>=0.1.8", "zhon", ] diff --git a/tests/unit/tts/test_pwg.py b/tests/unit/tts/test_pwg.py index 10c82c9fd..bcdb5aafc 100644 --- a/tests/unit/tts/test_pwg.py +++ b/tests/unit/tts/test_pwg.py @@ -14,16 +14,16 @@ import paddle import torch from paddle.device.cuda import synchronize +from parallel_wavegan import models as pwgan from parallel_wavegan.layers import residual_block from parallel_wavegan.layers import upsample -from parallel_wavegan.models import parallel_wavegan as pwgan from timer import timer from paddlespeech.t2s.models.parallel_wavegan import ConvInUpsampleNet from paddlespeech.t2s.models.parallel_wavegan import PWGDiscriminator from paddlespeech.t2s.models.parallel_wavegan import PWGGenerator -from paddlespeech.t2s.models.parallel_wavegan import ResidualBlock from paddlespeech.t2s.models.parallel_wavegan import ResidualPWGDiscriminator +from paddlespeech.t2s.modules.residual_block import WaveNetResidualBlock from paddlespeech.t2s.utils.layer_tools import summary paddle.set_device("gpu:0") @@ -79,8 +79,8 @@ def test_convin_upsample_net(): def test_residual_block(): - net = ResidualBlock(dilation=9) - net2 = residual_block.ResidualBlock(dilation=9) + net = WaveNetResidualBlock(dilation=9) + net2 = residual_block.WaveNetResidualBlock(dilation=9) summary(net) summary(net2) for k, v in net2.named_parameters(): diff --git a/utils/gen_duration_from_textgrid.py b/utils/gen_duration_from_textgrid.py index 9ee0c05cc..54427665a 100755 --- a/utils/gen_duration_from_textgrid.py +++ b/utils/gen_duration_from_textgrid.py @@ -26,7 +26,7 @@ def readtg(tg_path, sample_rate=24000, n_shift=300): alignment = textgrid.openTextgrid(tg_path, includeEmptyIntervals=True) phones = [] ends = [] - for interval in alignment.tierDict["phones"].entryList: + for interval in alignment.getTier("phones").entries: phone = interval.label phones.append(phone) ends.append(interval.end)