From a196c052cb7f7ca4efed4193229bcabd2e56d29c Mon Sep 17 00:00:00 2001
From: co63oc <co63oc@users.noreply.github.com>
Date: Fri, 17 Jan 2025 17:00:47 +0800
Subject: [PATCH 01/46] Fix (#3974)

---
 tests/unit/tts/test_pwg.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/unit/tts/test_pwg.py b/tests/unit/tts/test_pwg.py
index 10c82c9fd..bcdb5aafc 100644
--- a/tests/unit/tts/test_pwg.py
+++ b/tests/unit/tts/test_pwg.py
@@ -14,16 +14,16 @@
 import paddle
 import torch
 from paddle.device.cuda import synchronize
+from parallel_wavegan import models as pwgan
 from parallel_wavegan.layers import residual_block
 from parallel_wavegan.layers import upsample
-from parallel_wavegan.models import parallel_wavegan as pwgan
 from timer import timer
 
 from paddlespeech.t2s.models.parallel_wavegan import ConvInUpsampleNet
 from paddlespeech.t2s.models.parallel_wavegan import PWGDiscriminator
 from paddlespeech.t2s.models.parallel_wavegan import PWGGenerator
-from paddlespeech.t2s.models.parallel_wavegan import ResidualBlock
 from paddlespeech.t2s.models.parallel_wavegan import ResidualPWGDiscriminator
+from paddlespeech.t2s.modules.residual_block import WaveNetResidualBlock
 from paddlespeech.t2s.utils.layer_tools import summary
 
 paddle.set_device("gpu:0")
@@ -79,8 +79,8 @@ def test_convin_upsample_net():
 
 
 def test_residual_block():
-    net = ResidualBlock(dilation=9)
-    net2 = residual_block.ResidualBlock(dilation=9)
+    net = WaveNetResidualBlock(dilation=9)
+    net2 = residual_block.WaveNetResidualBlock(dilation=9)
     summary(net)
     summary(net2)
     for k, v in net2.named_parameters():

From 65dbf46cdbdc681af3d86497d468f4ccb8a89e91 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?=
 <83450930+Liyulingyue@users.noreply.github.com>
Date: Fri, 17 Jan 2025 17:06:09 +0800
Subject: [PATCH 02/46] =?UTF-8?q?=E3=80=90Hackathon=208th=20No.7=E3=80=91P?=
 =?UTF-8?q?ython=E7=89=88=E6=9C=AC=E9=80=82=E9=85=8D=204=20(#3970)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Update setup.py

* auto scipy

* auto matplotlib

* Update setup.py

* Apply suggestions from code review
---
 setup.py | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index 59a3e7db1..fa53b5d7e 100644
--- a/setup.py
+++ b/setup.py
@@ -51,6 +51,26 @@ def determine_opencc_version():
     return "opencc"  # default
 
 
+def determine_scipy_version():
+    # get python version
+    python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
+
+    # determine scipy version
+    if python_version == "3.8":
+        return "scipy>=1.4.0, <=1.12.0"  # Python3.8 need scipy>=1.4.0, <=1.12.0
+    return "scipy"  # default
+
+
+def determine_matplotlib_version():
+    # get python version
+    python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
+
+    # determine matplotlib version
+    if python_version == "3.8" or python_version == "3.9":
+        return "matplotlib<=3.8.4"  # Python3.8/9 need matplotlib<=3.8.4
+    return "matplotlib"  # default
+
+
 base = [
     "braceexpand",
     "editdistance",
@@ -63,9 +83,9 @@ base = [
     # paddleaudio align with librosa==0.8.1, which need numpy==1.23.x
     "numpy==1.23.5",
     "librosa==0.8.1",
-    "scipy>=1.4.0, <=1.12.0",
+    determine_scipy_version(),  # scipy or scipy>=1.4.0, <=1.12.0
     "loguru",
-    "matplotlib<=3.8.4",
+    determine_matplotlib_version(),  # matplotlib or matplotlib<=3.8.4
     "nara_wpe",
     "onnxruntime>=1.11.0",
     determine_opencc_version(),  # opencc or opencc==1.1.6
@@ -92,7 +112,7 @@ base = [
     "ToJyutping",
     "typeguard",
     "webrtcvad",
-    "yacs~=0.1.8",
+    "yacs>=0.1.8",
     "zhon",
 ]
 

From 85de840d073533795d5b2f83bedd62acb5b6dc4c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?=
 <83450930+Liyulingyue@users.noreply.github.com>
Date: Tue, 21 Jan 2025 11:21:30 +0800
Subject: [PATCH 03/46] =?UTF-8?q?=E3=80=90Hackathon=208th=20No.7=E3=80=91P?=
 =?UTF-8?q?ython=E7=89=88=E6=9C=AC=E9=80=82=E9=85=8D=203=20(#3969)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Update setup.py

* add optional

* fit with praatio>=6.0.0

* Apply suggestions from code review

* Apply suggestions from code review

* Apply suggestions from code review
---
 examples/other/g2p/get_g2p_data.py       | 2 +-
 paddlespeech/server/restful/request.py   | 2 +-
 paddlespeech/server/restful/response.py  | 3 ++-
 paddlespeech/t2s/exps/ernie_sat/align.py | 4 ++--
 setup.py                                 | 4 ++--
 utils/gen_duration_from_textgrid.py      | 2 +-
 6 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/examples/other/g2p/get_g2p_data.py b/examples/other/g2p/get_g2p_data.py
index 8fa3e53cd..87e7b9fdc 100644
--- a/examples/other/g2p/get_g2p_data.py
+++ b/examples/other/g2p/get_g2p_data.py
@@ -32,7 +32,7 @@ def get_baker_data(root_dir):
             alignment_fp, includeEmptyIntervals=True)
         # only with baker's annotation
         utt_id = alignment.tierNameList[0].split(".")[0]
-        intervals = alignment.tierDict[alignment.tierNameList[0]].entryList
+        intervals = alignment.getTier(alignment.tierNameList[0]).entries
         phones = []
         for interval in intervals:
             label = interval.label
diff --git a/paddlespeech/server/restful/request.py b/paddlespeech/server/restful/request.py
index b7a32481f..068694de3 100644
--- a/paddlespeech/server/restful/request.py
+++ b/paddlespeech/server/restful/request.py
@@ -65,7 +65,7 @@ class TTSRequest(BaseModel):
     speed: float = 1.0
     volume: float = 1.0
     sample_rate: int = 0
-    save_path: str = None
+    save_path: Optional[str] = None
 
 
 #****************************************************************************************/
diff --git a/paddlespeech/server/restful/response.py b/paddlespeech/server/restful/response.py
index 3d991de43..12b264c02 100644
--- a/paddlespeech/server/restful/response.py
+++ b/paddlespeech/server/restful/response.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from typing import List
+from typing import Optional
 
 from pydantic import BaseModel
 
@@ -62,7 +63,7 @@ class TTSResult(BaseModel):
     volume: float = 1.0
     sample_rate: int
     duration: float
-    save_path: str = None
+    save_path: Optional[str] = None
     audio: str
 
 
diff --git a/paddlespeech/t2s/exps/ernie_sat/align.py b/paddlespeech/t2s/exps/ernie_sat/align.py
index a802d0295..e7c8083a8 100755
--- a/paddlespeech/t2s/exps/ernie_sat/align.py
+++ b/paddlespeech/t2s/exps/ernie_sat/align.py
@@ -41,11 +41,11 @@ def _readtg(tg_path: str, lang: str='en', fs: int=24000, n_shift: int=300):
     ends = []
     words = []
 
-    for interval in alignment.tierDict['words'].entryList:
+    for interval in alignment.getTier('words').entries:
         word = interval.label
         if word:
             words.append(word)
-    for interval in alignment.tierDict['phones'].entryList:
+    for interval in alignment.getTier('phones').entries:
         phone = interval.label
         phones.append(phone)
         ends.append(interval.end)
diff --git a/setup.py b/setup.py
index fa53b5d7e..184205926 100644
--- a/setup.py
+++ b/setup.py
@@ -96,9 +96,9 @@ base = [
     "paddleslim>=2.3.4",
     "ppdiffusers>=0.9.0",
     "paddlespeech_feat",
-    "praatio>=5.0.0, <=5.1.1",
+    "praatio>=6.0.0",
     "prettytable",
-    "pydantic>=1.10.14, <2.0",
+    "pydantic",
     "pypinyin<=0.44.0",
     "pypinyin-dict",
     "python-dateutil",
diff --git a/utils/gen_duration_from_textgrid.py b/utils/gen_duration_from_textgrid.py
index 9ee0c05cc..54427665a 100755
--- a/utils/gen_duration_from_textgrid.py
+++ b/utils/gen_duration_from_textgrid.py
@@ -26,7 +26,7 @@ def readtg(tg_path, sample_rate=24000, n_shift=300):
     alignment = textgrid.openTextgrid(tg_path, includeEmptyIntervals=True)
     phones = []
     ends = []
-    for interval in alignment.tierDict["phones"].entryList:
+    for interval in alignment.getTier("phones").entries:
         phone = interval.label
         phones.append(phone)
         ends.append(interval.end)

From 76cd9db6c579f3a0975c2f1900b80e5c86109af6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?=
 <83450930+Liyulingyue@users.noreply.github.com>
Date: Wed, 22 Jan 2025 16:53:53 +0800
Subject: [PATCH 04/46] def PythonDetermine in setup.py (#3975)

* extract python version

* Update setup.py
---
 setup.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 184205926..8e8265749 100644
--- a/setup.py
+++ b/setup.py
@@ -32,6 +32,14 @@ VERSION = '0.0.0'
 COMMITID = 'none'
 
 
+def determine_python_version():
+    """
+    Determine the current python version. The function return a string such as '3.7'.
+    """
+    python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
+    return python_version
+
+
 def determine_opencc_version():
     # get gcc version
     gcc_version = None
@@ -53,7 +61,7 @@ def determine_opencc_version():
 
 def determine_scipy_version():
     # get python version
-    python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
+    python_version = determine_python_version()
 
     # determine scipy version
     if python_version == "3.8":
@@ -63,7 +71,7 @@ def determine_scipy_version():
 
 def determine_matplotlib_version():
     # get python version
-    python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
+    python_version = determine_python_version()
 
     # determine matplotlib version
     if python_version == "3.8" or python_version == "3.9":

From 69985c28698587c881eaf0c5a55519db4a570b13 Mon Sep 17 00:00:00 2001
From: zxcd <228587199@qq.com>
Date: Thu, 23 Jan 2025 15:04:52 +0800
Subject: [PATCH 05/46] Fix readme (#3978)

* Update README.md

* Update README_cn.md
---
 README.md    | 4 ++--
 README_cn.md | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 00367d787..39cb1bc9d 100644
--- a/README.md
+++ b/README.md
@@ -228,12 +228,12 @@ Via the easy-to-use, efficient, flexible and scalable implementation, our vision
 
 ## Installation
 
-We strongly recommend our users to install PaddleSpeech in **Linux** with *python>=3.8* and *paddlepaddle<=2.5.1*. Some new versions of Paddle do not have support for adaptation in PaddleSpeech, so currently only versions 2.5.1 and earlier can be supported.
+We strongly recommend our users to install PaddleSpeech in **Linux** with *python>=3.8*. 
 
 ### **Dependency Introduction**
 
 + gcc >= 4.8.5
-+ paddlepaddle <= 2.5.1
++ paddlepaddle
 + python >= 3.8
 + OS support:  Linux(recommend), Windows, Mac OSX
 
diff --git a/README_cn.md b/README_cn.md
index d70940dd2..a644e4c9f 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -238,11 +238,11 @@
 <a name="安装"></a>
 ## 安装
 
-我们强烈建议用户在 **Linux** 环境下，*3.8* 以上版本的 *python* 上安装 PaddleSpeech。同时，有一些Paddle新版本的内容没有在做适配的支持，因此目前只能使用2.5.1及之前的版本。
+我们强烈建议用户在 **Linux** 环境下，*3.8* 以上版本的 *python* 上安装 PaddleSpeech。
 
 ### 相关依赖
 + gcc >= 4.8.5
-+ paddlepaddle <= 2.5.1
++ paddlepaddle
 + python >= 3.8
 + linux(推荐), mac, windows
 

From cb0ba54d6ed592d8e00d4db5a32aa24a00b7477b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?=
 <83450930+Liyulingyue@users.noreply.github.com>
Date: Thu, 23 Jan 2025 15:11:14 +0800
Subject: [PATCH 06/46] =?UTF-8?q?=E3=80=90Hackathon=208th=20No.7=E3=80=91P?=
 =?UTF-8?q?ython=E7=89=88=E6=9C=AC=E9=80=82=E9=85=8D=205=20(#3972)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Update setup.py

* fit with pypinyin

* Apply suggestions from code review

* Apply suggestions from code review

* Update tone_sandhi.py

* Apply suggestions from code review
---
 paddlespeech/t2s/frontend/tone_sandhi.py | 31 ++++++++++++++++--------
 paddlespeech/t2s/frontend/zh_frontend.py |  5 ++++
 setup.py                                 |  2 +-
 3 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/paddlespeech/t2s/frontend/tone_sandhi.py b/paddlespeech/t2s/frontend/tone_sandhi.py
index 3558064cd..d8688115b 100644
--- a/paddlespeech/t2s/frontend/tone_sandhi.py
+++ b/paddlespeech/t2s/frontend/tone_sandhi.py
@@ -243,8 +243,10 @@ class ToneSandhi():
             if skip_next:
                 skip_next = False
                 continue
-            if i - 1 >= 0 and word == "一" and i + 1 < len(seg) and seg[i - 1][0] == seg[i + 1][0] and seg[i - 1][1] == "v":
-                new_seg[-1] = (new_seg[-1][0] + "一" + seg[i + 1][0], new_seg[-1][1])
+            if i - 1 >= 0 and word == "一" and i + 1 < len(seg) and seg[i - 1][
+                    0] == seg[i + 1][0] and seg[i - 1][1] == "v":
+                new_seg[-1] = (new_seg[-1][0] + "一" + seg[i + 1][0],
+                               new_seg[-1][1])
                 skip_next = True
             else:
                 new_seg.append((word, pos))
@@ -262,11 +264,16 @@ class ToneSandhi():
     def _merge_continuous_three_tones(
             self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
         new_seg = []
-        sub_finals_list = [
-            lazy_pinyin(
+        sub_finals_list = []
+        for (word, pos) in seg:
+            orig_finals = lazy_pinyin(
                 word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
-            for (word, pos) in seg
-        ]
+            # after pypinyin==0.44.0, '嗯' need to be n2, cause the initial and final consonants cannot be empty at the same time
+            en_index = [index for index, c in enumerate(word) if c == "嗯"]
+            for i in en_index:
+                orig_finals[i] = "n2"
+            sub_finals_list.append(orig_finals)
+
         assert len(sub_finals_list) == len(seg)
         merge_last = [False] * len(seg)
         for i, (word, pos) in enumerate(seg):
@@ -292,11 +299,15 @@ class ToneSandhi():
     def _merge_continuous_three_tones_2(
             self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
         new_seg = []
-        sub_finals_list = [
-            lazy_pinyin(
+        sub_finals_list = []
+        for (word, pos) in seg:
+            orig_finals = lazy_pinyin(
                 word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
-            for (word, pos) in seg
-        ]
+            # after pypinyin==0.44.0, '嗯' need to be n2, cause the initial and final consonants cannot be empty at the same time
+            en_index = [index for index, c in enumerate(word) if c == "嗯"]
+            for i in en_index:
+                orig_finals[i] = "n2"
+            sub_finals_list.append(orig_finals)
         assert len(sub_finals_list) == len(seg)
         merge_last = [False] * len(seg)
         for i, (word, pos) in enumerate(seg):
diff --git a/paddlespeech/t2s/frontend/zh_frontend.py b/paddlespeech/t2s/frontend/zh_frontend.py
index 1431bc6d8..95c75a7f0 100644
--- a/paddlespeech/t2s/frontend/zh_frontend.py
+++ b/paddlespeech/t2s/frontend/zh_frontend.py
@@ -173,6 +173,11 @@ class Frontend():
                 word, neutral_tone_with_five=True, style=Style.INITIALS)
             orig_finals = lazy_pinyin(
                 word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
+            # after pypinyin==0.44.0, '嗯' need to be n2, cause the initial and final consonants cannot be empty at the same time
+            en_index = [index for index, c in enumerate(word) if c == "嗯"]
+            for i in en_index:
+                orig_finals[i] = "n2"
+
             for c, v in zip(orig_initials, orig_finals):
                 if re.match(r'i\d', v):
                     if c in ['z', 'c', 's']:
diff --git a/setup.py b/setup.py
index 8e8265749..8c2a4c1b7 100644
--- a/setup.py
+++ b/setup.py
@@ -107,7 +107,7 @@ base = [
     "praatio>=6.0.0",
     "prettytable",
     "pydantic",
-    "pypinyin<=0.44.0",
+    "pypinyin",
     "pypinyin-dict",
     "python-dateutil",
     "pyworld>=0.2.12",

From 675863ba662a376b302b2934efca57e6e780e913 Mon Sep 17 00:00:00 2001
From: co63oc <co63oc@users.noreply.github.com>
Date: Thu, 23 Jan 2025 16:40:49 +0800
Subject: [PATCH 07/46] Fix (#3976)

---
 tests/unit/tts/test_snapshot.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/unit/tts/test_snapshot.py b/tests/unit/tts/test_snapshot.py
index 6ceff3e5a..fb18c7d78 100644
--- a/tests/unit/tts/test_snapshot.py
+++ b/tests/unit/tts/test_snapshot.py
@@ -19,10 +19,11 @@ from paddle.optimizer import Adam
 
 from paddlespeech.t2s.training.extensions.snapshot import Snapshot
 from paddlespeech.t2s.training.trainer import Trainer
-from paddlespeech.t2s.training.updater import StandardUpdater
 
+# from paddlespeech.t2s.training.updater import StandardUpdater
 
-def test_snapshot():
+
+def _test_snapshot():
     model = nn.Linear(3, 4)
     optimizer = Adam(parameters=model.parameters())
 

From 59d641bc14bf5a8532b8643e0655af46f7a73173 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?=
 <83450930+Liyulingyue@users.noreply.github.com>
Date: Mon, 27 Jan 2025 11:20:05 +0800
Subject: [PATCH 08/46] =?UTF-8?q?=E3=80=90Hackathon=208th=20No.7=E3=80=91A?=
 =?UTF-8?q?dd=20hints=20for=20installing=20with=20-e=20(#3979)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Update README_cn.md

* Update README.md
---
 README.md    | 2 ++
 README_cn.md | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/README.md b/README.md
index 39cb1bc9d..6594a4b8f 100644
--- a/README.md
+++ b/README.md
@@ -265,6 +265,8 @@ git clone https://github.com/PaddlePaddle/PaddleSpeech.git
 cd PaddleSpeech
 pip install pytest-runner
 pip install .
+# If you need to install in editable mode, you need to use --use-pep517. The command is as follows:
+# pip install -e . --use-pep517
 ```
 
 For more installation problems, such as conda environment, librosa-dependent, gcc problems, kaldi installation, etc., you can refer to this [installation document](./docs/source/install.md). If you encounter problems during installation, you can leave a message on [#2150](https://github.com/PaddlePaddle/PaddleSpeech/issues/2150) and find related problems
diff --git a/README_cn.md b/README_cn.md
index a644e4c9f..5b95a2879 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -272,6 +272,8 @@ git clone https://github.com/PaddlePaddle/PaddleSpeech.git
 cd PaddleSpeech
 pip install pytest-runner
 pip install .
+# 如果需要在可编辑模式下安装，需要使用 --use-pep517，命令如下
+# pip install -e . --use-pep517
 ```
 
 更多关于安装问题，如 conda 环境，librosa 依赖的系统库，gcc 环境问题，kaldi 安装等，可以参考这篇[安装文档](docs/source/install_cn.md)，如安装上遇到问题可以在 [#2150](https://github.com/PaddlePaddle/PaddleSpeech/issues/2150) 上留言以及查找相关问题

From bb77a7f7db286f62f520a1055bae1292809d51bc Mon Sep 17 00:00:00 2001
From: co63oc <co63oc@users.noreply.github.com>
Date: Mon, 27 Jan 2025 11:35:49 +0800
Subject: [PATCH 09/46] Fix (#3980)

---
 .github/ISSUE_TEMPLATE/bug-report-s2t.md           |  2 +-
 .github/ISSUE_TEMPLATE/bug-report-tts.md           |  2 +-
 audio/paddleaudio/backends/soundfile_backend.py    |  2 +-
 audio/paddleaudio/compliance/kaldi.py              |  6 +++---
 audio/paddleaudio/datasets/dataset.py              |  2 +-
 audio/paddleaudio/datasets/esc50.py                |  2 +-
 audio/paddleaudio/datasets/gtzan.py                |  2 +-
 audio/paddleaudio/datasets/tess.py                 |  2 +-
 audio/paddleaudio/datasets/urban_sound.py          |  2 +-
 audio/paddleaudio/datasets/voxceleb.py             |  4 ++--
 audio/paddleaudio/features/layers.py               |  2 +-
 audio/paddleaudio/functional/functional.py         |  2 +-
 audio/paddleaudio/metric/eer.py                    |  4 ++--
 audio/paddleaudio/sox_effects/sox_effects.py       | 14 +++++++-------
 .../src/pybind/kaldi/feature_common_inl.h          |  4 ++--
 .../src/pybind/kaldi/kaldi_feature_wrapper.cc      |  2 +-
 audio/paddleaudio/src/pybind/sox/effects.cpp       | 10 +++++-----
 audio/paddleaudio/src/pybind/sox/effects_chain.cpp |  4 ++--
 audio/paddleaudio/src/pybind/sox/utils.cpp         |  4 ++--
 audio/paddleaudio/src/pybind/sox/utils.h           |  2 +-
 audio/paddleaudio/third_party/sox/CMakeLists.txt   |  4 ++--
 audio/paddleaudio/utils/download.py                |  4 ++--
 audio/paddleaudio/utils/log.py                     |  2 +-
 audio/paddleaudio/utils/sox_utils.py               |  2 +-
 audio/paddleaudio/utils/tensor_utils.py            |  8 ++++----
 25 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/bug-report-s2t.md b/.github/ISSUE_TEMPLATE/bug-report-s2t.md
index 512cdbb01..e9732ad8c 100644
--- a/.github/ISSUE_TEMPLATE/bug-report-s2t.md
+++ b/.github/ISSUE_TEMPLATE/bug-report-s2t.md
@@ -33,7 +33,7 @@ If applicable, add screenshots to help explain your problem.
  - Python Version [e.g. 3.7]
  - PaddlePaddle Version [e.g. 2.0.0]
  - Model Version [e.g. 2.0.0]
- - GPU/DRIVER Informationo [e.g. Tesla V100-SXM2-32GB/440.64.00]
+ - GPU/DRIVER Information [e.g. Tesla V100-SXM2-32GB/440.64.00]
  - CUDA/CUDNN Version [e.g. cuda-10.2]
  - MKL Version
 - TensorRT Version
diff --git a/.github/ISSUE_TEMPLATE/bug-report-tts.md b/.github/ISSUE_TEMPLATE/bug-report-tts.md
index e2322c239..b4c5dabdd 100644
--- a/.github/ISSUE_TEMPLATE/bug-report-tts.md
+++ b/.github/ISSUE_TEMPLATE/bug-report-tts.md
@@ -32,7 +32,7 @@ If applicable, add screenshots to help explain your problem.
  - Python Version [e.g. 3.7]
  - PaddlePaddle Version [e.g. 2.0.0]
  - Model Version [e.g. 2.0.0]
- - GPU/DRIVER Informationo [e.g. Tesla V100-SXM2-32GB/440.64.00]
+ - GPU/DRIVER Information [e.g. Tesla V100-SXM2-32GB/440.64.00]
  - CUDA/CUDNN Version [e.g. cuda-10.2]
  - MKL Version
 - TensorRT Version
diff --git a/audio/paddleaudio/backends/soundfile_backend.py b/audio/paddleaudio/backends/soundfile_backend.py
index 9195ea097..dcd2b4b1e 100644
--- a/audio/paddleaudio/backends/soundfile_backend.py
+++ b/audio/paddleaudio/backends/soundfile_backend.py
@@ -61,7 +61,7 @@ def resample(y: np.ndarray,
     if mode == 'kaiser_best':
         warnings.warn(
             f'Using resampy in kaiser_best to {src_sr}=>{target_sr}. This function is pretty slow, \
-        we recommend the mode kaiser_fast in large scale audio trainning')
+        we recommend the mode kaiser_fast in large scale audio training')
 
     if not isinstance(y, np.ndarray):
         raise ParameterError(
diff --git a/audio/paddleaudio/compliance/kaldi.py b/audio/paddleaudio/compliance/kaldi.py
index eb92ec1f2..a94ec4053 100644
--- a/audio/paddleaudio/compliance/kaldi.py
+++ b/audio/paddleaudio/compliance/kaldi.py
@@ -233,7 +233,7 @@ def spectrogram(waveform: Tensor,
         round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
             to FFT. Defaults to True.
         sr (int, optional): Sample rate of input waveform. Defaults to 16000.
-        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a singal frame when it
+        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it
             is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
         subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
         window_type (str, optional): Choose type of window for FFT computation. Defaults to "povey".
@@ -443,7 +443,7 @@ def fbank(waveform: Tensor,
         round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
             to FFT. Defaults to True.
         sr (int, optional): Sample rate of input waveform. Defaults to 16000.
-        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a singal frame when it
+        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it
             is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
         subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
         use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False.
@@ -566,7 +566,7 @@ def mfcc(waveform: Tensor,
         round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
             to FFT. Defaults to True.
         sr (int, optional): Sample rate of input waveform. Defaults to 16000.
-        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a singal frame when it
+        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it
             is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
         subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
         use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False.
diff --git a/audio/paddleaudio/datasets/dataset.py b/audio/paddleaudio/datasets/dataset.py
index f1dfc1ea3..170e91669 100644
--- a/audio/paddleaudio/datasets/dataset.py
+++ b/audio/paddleaudio/datasets/dataset.py
@@ -47,7 +47,7 @@ class AudioClassificationDataset(paddle.io.Dataset):
             files (:obj:`List[str]`): A list of absolute path of audio files.
             labels (:obj:`List[int]`): Labels of audio files.
             feat_type (:obj:`str`, `optional`, defaults to `raw`):
-                It identifies the feature type that user wants to extrace of an audio file.
+                It identifies the feature type that user wants to extract of an audio file.
         """
         super(AudioClassificationDataset, self).__init__()
 
diff --git a/audio/paddleaudio/datasets/esc50.py b/audio/paddleaudio/datasets/esc50.py
index e7477d40e..fd8c8503e 100644
--- a/audio/paddleaudio/datasets/esc50.py
+++ b/audio/paddleaudio/datasets/esc50.py
@@ -117,7 +117,7 @@ class ESC50(AudioClassificationDataset):
             split (:obj:`int`, `optional`, defaults to 1):
                 It specify the fold of dev dataset.
             feat_type (:obj:`str`, `optional`, defaults to `raw`):
-                It identifies the feature type that user wants to extrace of an audio file.
+                It identifies the feature type that user wants to extract of an audio file.
         """
         files, labels = self._get_data(mode, split)
         super(ESC50, self).__init__(
diff --git a/audio/paddleaudio/datasets/gtzan.py b/audio/paddleaudio/datasets/gtzan.py
index cfea6f37e..a76e9208e 100644
--- a/audio/paddleaudio/datasets/gtzan.py
+++ b/audio/paddleaudio/datasets/gtzan.py
@@ -67,7 +67,7 @@ class GTZAN(AudioClassificationDataset):
             split (:obj:`int`, `optional`, defaults to 1):
                 It specify the fold of dev dataset.
             feat_type (:obj:`str`, `optional`, defaults to `raw`):
-                It identifies the feature type that user wants to extrace of an audio file.
+                It identifies the feature type that user wants to extract of an audio file.
         """
         assert split <= n_folds, f'The selected split should not be larger than n_fold, but got {split} > {n_folds}'
         files, labels = self._get_data(mode, seed, n_folds, split)
diff --git a/audio/paddleaudio/datasets/tess.py b/audio/paddleaudio/datasets/tess.py
index 8faab9c39..e34eaea37 100644
--- a/audio/paddleaudio/datasets/tess.py
+++ b/audio/paddleaudio/datasets/tess.py
@@ -76,7 +76,7 @@ class TESS(AudioClassificationDataset):
             split (:obj:`int`, `optional`, defaults to 1):
                 It specify the fold of dev dataset.
             feat_type (:obj:`str`, `optional`, defaults to `raw`):
-                It identifies the feature type that user wants to extrace of an audio file.
+                It identifies the feature type that user wants to extract of an audio file.
         """
         assert split <= n_folds, f'The selected split should not be larger than n_fold, but got {split} > {n_folds}'
         files, labels = self._get_data(mode, seed, n_folds, split)
diff --git a/audio/paddleaudio/datasets/urban_sound.py b/audio/paddleaudio/datasets/urban_sound.py
index d97c4d1dc..43d1b36c4 100644
--- a/audio/paddleaudio/datasets/urban_sound.py
+++ b/audio/paddleaudio/datasets/urban_sound.py
@@ -68,7 +68,7 @@ class UrbanSound8K(AudioClassificationDataset):
             split (:obj:`int`, `optional`, defaults to 1):
                 It specify the fold of dev dataset.
             feat_type (:obj:`str`, `optional`, defaults to `raw`):
-                It identifies the feature type that user wants to extrace of an audio file.
+                It identifies the feature type that user wants to extract of an audio file.
         """
 
     def _get_meta_info(self):
diff --git a/audio/paddleaudio/datasets/voxceleb.py b/audio/paddleaudio/datasets/voxceleb.py
index b7160b24c..1fafb5176 100644
--- a/audio/paddleaudio/datasets/voxceleb.py
+++ b/audio/paddleaudio/datasets/voxceleb.py
@@ -262,8 +262,8 @@ class VoxCeleb(Dataset):
                      split_chunks: bool=True):
         print(f'Generating csv: {output_file}')
         header = ["id", "duration", "wav", "start", "stop", "spk_id"]
-        # Note: this may occurs c++ execption, but the program will execute fine
-        # so we can ignore the execption 
+        # Note: this may occurs c++ exception, but the program will execute fine
+        # so we can ignore the exception 
         with Pool(cpu_count()) as p:
             infos = list(
                 tqdm(
diff --git a/audio/paddleaudio/features/layers.py b/audio/paddleaudio/features/layers.py
index 292363e64..801ae34ce 100644
--- a/audio/paddleaudio/features/layers.py
+++ b/audio/paddleaudio/features/layers.py
@@ -34,7 +34,7 @@ __all__ = [
 
 class Spectrogram(nn.Layer):
     """Compute spectrogram of given signals, typically audio waveforms.
-    The spectorgram is defined as the complex norm of the short-time Fourier transformation.
+    The spectrogram is defined as the complex norm of the short-time Fourier transformation.
 
     Args:
         n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512.
diff --git a/audio/paddleaudio/functional/functional.py b/audio/paddleaudio/functional/functional.py
index 19c63a9ae..7c20f9013 100644
--- a/audio/paddleaudio/functional/functional.py
+++ b/audio/paddleaudio/functional/functional.py
@@ -247,7 +247,7 @@ def create_dct(n_mfcc: int,
     Args:
         n_mfcc (int): Number of mel frequency cepstral coefficients. 
         n_mels (int): Number of mel filterbanks.
-        norm (Optional[str], optional): Normalizaiton type. Defaults to 'ortho'.
+        norm (Optional[str], optional): Normalization type. Defaults to 'ortho'.
         dtype (str, optional): The data type of the return matrix. Defaults to 'float32'.
 
     Returns:
diff --git a/audio/paddleaudio/metric/eer.py b/audio/paddleaudio/metric/eer.py
index a1166d3f9..a55695ac1 100644
--- a/audio/paddleaudio/metric/eer.py
+++ b/audio/paddleaudio/metric/eer.py
@@ -22,8 +22,8 @@ def compute_eer(labels: np.ndarray, scores: np.ndarray) -> List[float]:
     """Compute EER and return score threshold.
 
     Args:
-        labels (np.ndarray): the trial label, shape: [N], one-dimention, N refer to the samples num
-        scores (np.ndarray): the trial scores, shape: [N], one-dimention, N refer to the samples num
+        labels (np.ndarray): the trial label, shape: [N], one-dimension, N refer to the samples num
+        scores (np.ndarray): the trial scores, shape: [N], one-dimension, N refer to the samples num
 
     Returns:
         List[float]: eer and the specific threshold
diff --git a/audio/paddleaudio/sox_effects/sox_effects.py b/audio/paddleaudio/sox_effects/sox_effects.py
index cb7e1b0b9..aa282b572 100644
--- a/audio/paddleaudio/sox_effects/sox_effects.py
+++ b/audio/paddleaudio/sox_effects/sox_effects.py
@@ -121,8 +121,8 @@ def apply_effects_tensor(
 
     """
     tensor_np = tensor.numpy()
-    ret = paddleaudio._paddleaudio.sox_effects_apply_effects_tensor(tensor_np, sample_rate,
-                                                       effects, channels_first)
+    ret = paddleaudio._paddleaudio.sox_effects_apply_effects_tensor(
+        tensor_np, sample_rate, effects, channels_first)
     if ret is not None:
         return (paddle.to_tensor(ret[0]), ret[1])
     raise RuntimeError("Failed to apply sox effect")
@@ -139,7 +139,7 @@ def apply_effects_file(
 
     Note:
         This function works in the way very similar to ``sox`` command, however there are slight
-        differences. For example, ``sox`` commnad adds certain effects automatically (such as
+        differences. For example, ``sox`` command adds certain effects automatically (such as
         ``rate`` effect after ``speed``, ``pitch`` etc), but this function only applies the given
         effects. Therefore, to actually apply ``speed`` effect, you also need to give ``rate``
         effect with desired sampling rate, because internally, ``speed`` effects only alter sampling
@@ -228,14 +228,14 @@ def apply_effects_file(
         >>>     pass
     """
     if hasattr(path, "read"):
-        ret = paddleaudio._paddleaudio.apply_effects_fileobj(path, effects, normalize,
-                                                channels_first, format)
+        ret = paddleaudio._paddleaudio.apply_effects_fileobj(
+            path, effects, normalize, channels_first, format)
         if ret is None:
             raise RuntimeError("Failed to load audio from {}".format(path))
         return (paddle.to_tensor(ret[0]), ret[1])
     path = os.fspath(path)
-    ret = paddleaudio._paddleaudio.sox_effects_apply_effects_file(path, effects, normalize,
-                                                     channels_first, format)
+    ret = paddleaudio._paddleaudio.sox_effects_apply_effects_file(
+        path, effects, normalize, channels_first, format)
     if ret is not None:
         return (paddle.to_tensor(ret[0]), ret[1])
     raise RuntimeError("Failed to load audio from {}".format(path))
diff --git a/audio/paddleaudio/src/pybind/kaldi/feature_common_inl.h b/audio/paddleaudio/src/pybind/kaldi/feature_common_inl.h
index 985d586fe..3c62bb0d4 100644
--- a/audio/paddleaudio/src/pybind/kaldi/feature_common_inl.h
+++ b/audio/paddleaudio/src/pybind/kaldi/feature_common_inl.h
@@ -26,7 +26,7 @@ template <class F>
 bool StreamingFeatureTpl<F>::ComputeFeature(
     const std::vector<float>& wav,
     std::vector<float>* feats) {
-    // append remaned waves
+    // append remained waves
     int wav_len = wav.size();
     if (wav_len == 0) return false;
     int left_len = remained_wav_.size();
@@ -38,7 +38,7 @@ bool StreamingFeatureTpl<F>::ComputeFeature(
                 wav.data(),
                 wav_len * sizeof(float));
 
-    // cache remaned waves
+    // cache remained waves
     knf::FrameExtractionOptions frame_opts = computer_.GetFrameOptions();
     int num_frames = knf::NumFrames(waves.size(), frame_opts);
     int frame_shift = frame_opts.WindowShift();
diff --git a/audio/paddleaudio/src/pybind/kaldi/kaldi_feature_wrapper.cc b/audio/paddleaudio/src/pybind/kaldi/kaldi_feature_wrapper.cc
index 8b8ff18be..6fdf68af2 100644
--- a/audio/paddleaudio/src/pybind/kaldi/kaldi_feature_wrapper.cc
+++ b/audio/paddleaudio/src/pybind/kaldi/kaldi_feature_wrapper.cc
@@ -44,5 +44,5 @@ py::array_t<float> KaldiFeatureWrapper::ComputeFbank(
     return result.reshape(shape);
 }
 
-}  // namesapce kaldi
+}  // namespace kaldi
 }  // namespace paddleaudio
diff --git a/audio/paddleaudio/src/pybind/sox/effects.cpp b/audio/paddleaudio/src/pybind/sox/effects.cpp
index ea77527bb..5b8959f6c 100644
--- a/audio/paddleaudio/src/pybind/sox/effects.cpp
+++ b/audio/paddleaudio/src/pybind/sox/effects.cpp
@@ -12,9 +12,9 @@ using namespace paddleaudio::sox_utils;
 namespace paddleaudio::sox_effects {
 
 // Streaming decoding over file-like object is tricky because libsox operates on
-// FILE pointer. The folloing is what `sox` and `play` commands do
+// FILE pointer. The following is what `sox` and `play` commands do
 //  - file input -> FILE pointer
-//  - URL input -> call wget in suprocess and pipe the data -> FILE pointer
+//  - URL input -> call wget in subprocess and pipe the data -> FILE pointer
 //  - stdin -> FILE pointer
 //
 // We want to, instead, fetch byte strings chunk by chunk, consume them, and
@@ -127,12 +127,12 @@ namespace {
 
 enum SoxEffectsResourceState { NotInitialized, Initialized, ShutDown };
 SoxEffectsResourceState SOX_RESOURCE_STATE = NotInitialized;
-std::mutex SOX_RESOUCE_STATE_MUTEX;
+std::mutex SOX_RESOURCE_STATE_MUTEX;
 
 } // namespace
 
 void initialize_sox_effects() {
-  const std::lock_guard<std::mutex> lock(SOX_RESOUCE_STATE_MUTEX);
+  const std::lock_guard<std::mutex> lock(SOX_RESOURCE_STATE_MUTEX);
 
   switch (SOX_RESOURCE_STATE) {
     case NotInitialized:
@@ -150,7 +150,7 @@ void initialize_sox_effects() {
 };
 
 void shutdown_sox_effects() {
-  const std::lock_guard<std::mutex> lock(SOX_RESOUCE_STATE_MUTEX);
+  const std::lock_guard<std::mutex> lock(SOX_RESOURCE_STATE_MUTEX);
 
   switch (SOX_RESOURCE_STATE) {
     case NotInitialized:
diff --git a/audio/paddleaudio/src/pybind/sox/effects_chain.cpp b/audio/paddleaudio/src/pybind/sox/effects_chain.cpp
index 0204fb309..54f54840f 100644
--- a/audio/paddleaudio/src/pybind/sox/effects_chain.cpp
+++ b/audio/paddleaudio/src/pybind/sox/effects_chain.cpp
@@ -14,7 +14,7 @@ namespace {
 
 /// helper classes for passing the location of input tensor and output buffer
 ///
-/// drain/flow callback functions require plaing C style function signature and
+/// drain/flow callback functions require plain C style function signature and
 /// the way to pass extra data is to attach data to sox_effect_t::priv pointer.
 /// The following structs will be assigned to sox_effect_t::priv pointer which
 /// gives sox_effect_t an access to input Tensor and output buffer object.
@@ -50,7 +50,7 @@ int tensor_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) {
   *osamp -= *osamp % num_channels;
 
   // Slice the input Tensor
-  // refacor this module, chunk
+  // refactor this module, chunk
   auto i_frame = index / num_channels;
   auto num_frames = *osamp / num_channels;
 
diff --git a/audio/paddleaudio/src/pybind/sox/utils.cpp b/audio/paddleaudio/src/pybind/sox/utils.cpp
index bc32b7407..acdef8040 100644
--- a/audio/paddleaudio/src/pybind/sox/utils.cpp
+++ b/audio/paddleaudio/src/pybind/sox/utils.cpp
@@ -162,7 +162,7 @@ py::dtype get_dtype(
         }
       default:
         // default to float32 for the other formats, including
-        // 32-bit flaoting-point WAV,
+        // 32-bit floating-point WAV,
         // MP3,
         // FLAC,
         // VORBIS etc...
@@ -177,7 +177,7 @@ py::array convert_to_tensor(
     const py::dtype dtype,
     const bool normalize,
     const bool channels_first) {
-  // todo refector later(SGoat)
+  // todo refactor later(SGoat)
   py::array t;
   uint64_t dummy = 0;
   SOX_SAMPLE_LOCALS;
diff --git a/audio/paddleaudio/src/pybind/sox/utils.h b/audio/paddleaudio/src/pybind/sox/utils.h
index 6fce66714..c98e8f9ed 100644
--- a/audio/paddleaudio/src/pybind/sox/utils.h
+++ b/audio/paddleaudio/src/pybind/sox/utils.h
@@ -76,7 +76,7 @@ py::dtype get_dtype(
 /// Tensor.
 /// @param dtype Target dtype. Determines the output dtype and value range in
 /// conjunction with normalization.
-/// @param noramlize Perform normalization. Only effective when dtype is not
+/// @param normalize Perform normalization. Only effective when dtype is not
 /// kFloat32. When effective, the output tensor is kFloat32 type and value range
 /// is [-1.0, 1.0]
 /// @param channels_first When True, output Tensor has shape of [num_channels,
diff --git a/audio/paddleaudio/third_party/sox/CMakeLists.txt b/audio/paddleaudio/third_party/sox/CMakeLists.txt
index 8a5bc55c7..91be289bd 100644
--- a/audio/paddleaudio/third_party/sox/CMakeLists.txt
+++ b/audio/paddleaudio/third_party/sox/CMakeLists.txt
@@ -8,9 +8,9 @@ set(patch_dir ${CMAKE_CURRENT_SOURCE_DIR}/../patches)
 set(COMMON_ARGS --quiet --disable-shared --enable-static --prefix=${INSTALL_DIR} --with-pic --disable-dependency-tracking --disable-debug --disable-examples --disable-doc)
 
 # To pass custom environment variables to ExternalProject_Add command,
-# we need to do `${CMAKE_COMMAND} -E env ${envs} <COMMANAD>`.
+# we need to do `${CMAKE_COMMAND} -E env ${envs} <COMMAND>`.
 # https://stackoverflow.com/a/62437353
-# We constrcut the custom environment variables here
+# We construct the custom environment variables here
 set(envs
   "PKG_CONFIG_PATH=${INSTALL_DIR}/lib/pkgconfig"
   "LDFLAGS=-L${INSTALL_DIR}/lib $ENV{LDFLAGS}"
diff --git a/audio/paddleaudio/utils/download.py b/audio/paddleaudio/utils/download.py
index 07d5eea84..f47345dfc 100644
--- a/audio/paddleaudio/utils/download.py
+++ b/audio/paddleaudio/utils/download.py
@@ -41,14 +41,14 @@ def download_and_decompress(archives: List[Dict[str, str]],
                             path: str,
                             decompress: bool=True):
     """
-    Download archieves and decompress to specific path.
+    Download archives and decompress to specific path.
     """
     if not os.path.isdir(path):
         os.makedirs(path)
 
     for archive in archives:
         assert 'url' in archive and 'md5' in archive, \
-            'Dictionary keys of "url" and "md5" are required in the archive, but got: {list(archieve.keys())}'
+            'Dictionary keys of "url" and "md5" are required in the archive, but got: {list(archive.keys())}'
         download.get_path_from_url(
             archive['url'], path, archive['md5'], decompress=decompress)
 
diff --git a/audio/paddleaudio/utils/log.py b/audio/paddleaudio/utils/log.py
index 5656b286a..ddc8fd669 100644
--- a/audio/paddleaudio/utils/log.py
+++ b/audio/paddleaudio/utils/log.py
@@ -58,7 +58,7 @@ log_config = {
 
 class Logger(object):
     '''
-    Deafult logger in PaddleAudio
+    Default logger in PaddleAudio
     Args:
         name(str) : Logger name, default is 'PaddleAudio'
     '''
diff --git a/audio/paddleaudio/utils/sox_utils.py b/audio/paddleaudio/utils/sox_utils.py
index 305bb68b0..7665238ef 100644
--- a/audio/paddleaudio/utils/sox_utils.py
+++ b/audio/paddleaudio/utils/sox_utils.py
@@ -55,7 +55,7 @@ def set_use_threads(use_threads: bool):
 
     Args:
         use_threads (bool): When ``True``, enables ``libsox``'s parallel effects channels processing.
-            To use mutlithread, the underlying ``libsox`` has to be compiled with OpenMP support.
+            To use multithread, the underlying ``libsox`` has to be compiled with OpenMP support.
 
     See Also:
         http://sox.sourceforge.net/sox.html
diff --git a/audio/paddleaudio/utils/tensor_utils.py b/audio/paddleaudio/utils/tensor_utils.py
index cfd490b9a..1448d48a3 100644
--- a/audio/paddleaudio/utils/tensor_utils.py
+++ b/audio/paddleaudio/utils/tensor_utils.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Unility functions for Transformer."""
+"""Utility functions for Transformer."""
 from typing import List
 from typing import Tuple
 
@@ -80,7 +80,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
     # assuming trailing dimensions and type of all the Tensors
     # in sequences are same and fetching those from sequences[0]
     max_size = paddle.shape(sequences[0])
-    # (TODO Hui Zhang): slice not supprot `end==start`
+    # (TODO Hui Zhang): slice not support `end==start`
     # trailing_dims = max_size[1:]
     trailing_dims = tuple(
         max_size[1:].numpy().tolist()) if sequences[0].ndim >= 2 else ()
@@ -94,7 +94,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
         length = tensor.shape[0]
         # use index notation to prevent duplicate references to the tensor
         if batch_first:
-            # TODO (Hui Zhang): set_value op not supprot `end==start`
+            # TODO (Hui Zhang): set_value op not support `end==start`
             # TODO (Hui Zhang): set_value op not support int16
             # TODO (Hui Zhang): set_varbase 2 rank not support [0,0,...]
             # out_tensor[i, :length, ...] = tensor
@@ -103,7 +103,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
             else:
                 out_tensor[i, length] = tensor
         else:
-            # TODO (Hui Zhang): set_value op not supprot `end==start`
+            # TODO (Hui Zhang): set_value op not support `end==start`
             # out_tensor[:length, i, ...] = tensor
             if length != 0:
                 out_tensor[:length, i] = tensor

From c74a6be99882905e1f19b3b103904fec7853724c Mon Sep 17 00:00:00 2001
From: co63oc <co63oc@users.noreply.github.com>
Date: Mon, 27 Jan 2025 11:40:11 +0800
Subject: [PATCH 10/46] Fix (#3981)

---
 audio/paddleaudio/utils/time.py               |  2 +-
 audio/tests/backends/base.py                  |  2 +-
 audio/tests/backends/soundfile/base.py        |  2 +-
 audio/tests/backends/soundfile/save_test.py   |  4 +-
 audio/tests/common_utils/data_utils.py        |  2 +-
 audio/tests/common_utils/sox_utils.py         | 43 ++++++++++++-------
 audio/tests/features/base.py                  |  4 +-
 audio/tests/features/test_istft.py            |  2 +-
 audio/tests/features/test_kaldi.py            |  3 +-
 audio/tests/features/test_librosa.py          |  5 +--
 .../tests/features/test_log_melspectrogram.py |  2 +-
 audio/tests/features/test_spectrogram.py      |  2 +-
 audio/tests/features/test_stft.py             |  4 +-
 dataset/librispeech/librispeech.py            |  2 +-
 dataset/ted_en_zh/ted_en_zh.py                |  2 +-
 dataset/thchs30/thchs30.py                    |  4 +-
 dataset/timit/timit.py                        |  2 +-
 dataset/timit/timit_kaldi_standard_split.py   |  2 +-
 dataset/voxceleb/voxceleb1.py                 |  2 +-
 dataset/voxceleb/voxceleb2.py                 |  4 +-
 demos/audio_content_search/README.md          |  2 +-
 demos/audio_searching/README.md               |  2 +-
 demos/audio_tagging/README.md                 |  2 +-
 demos/automatic_video_subtitiles/README.md    |  2 +-
 demos/keyword_spotting/README.md              |  2 +-
 demos/punctuation_restoration/README.md       |  2 +-
 demos/speaker_verification/README.md          |  2 +-
 demos/speech_recognition/README.md            |  2 +-
 demos/speech_server/README.md                 |  2 +-
 demos/speech_ssl/README.md                    |  2 +-
 demos/speech_translation/README.md            |  2 +-
 demos/streaming_asr_server/README.md          |  2 +-
 demos/streaming_tts_server/README.md          |  2 +-
 demos/text_to_speech/README.md                |  2 +-
 demos/whisper/README.md                       |  2 +-
 35 files changed, 67 insertions(+), 58 deletions(-)

diff --git a/audio/paddleaudio/utils/time.py b/audio/paddleaudio/utils/time.py
index 105208f91..4ea413282 100644
--- a/audio/paddleaudio/utils/time.py
+++ b/audio/paddleaudio/utils/time.py
@@ -21,7 +21,7 @@ __all__ = [
 
 
 class Timer(object):
-    '''Calculate runing speed and estimated time of arrival(ETA)'''
+    '''Calculate running speed and estimated time of arrival(ETA)'''
 
     def __init__(self, total_step: int):
         self.total_step = total_step
diff --git a/audio/tests/backends/base.py b/audio/tests/backends/base.py
index a67191887..c2d53d209 100644
--- a/audio/tests/backends/base.py
+++ b/audio/tests/backends/base.py
@@ -30,5 +30,5 @@ class BackendTest(unittest.TestCase):
                 urllib.request.urlretrieve(url, os.path.basename(url))
             self.files.append(os.path.basename(url))
 
-    def initParmas(self):
+    def initParams(self):
         raise NotImplementedError
diff --git a/audio/tests/backends/soundfile/base.py b/audio/tests/backends/soundfile/base.py
index a67191887..c2d53d209 100644
--- a/audio/tests/backends/soundfile/base.py
+++ b/audio/tests/backends/soundfile/base.py
@@ -30,5 +30,5 @@ class BackendTest(unittest.TestCase):
                 urllib.request.urlretrieve(url, os.path.basename(url))
             self.files.append(os.path.basename(url))
 
-    def initParmas(self):
+    def initParams(self):
         raise NotImplementedError
diff --git a/audio/tests/backends/soundfile/save_test.py b/audio/tests/backends/soundfile/save_test.py
index 4f3df6e48..4b5facd08 100644
--- a/audio/tests/backends/soundfile/save_test.py
+++ b/audio/tests/backends/soundfile/save_test.py
@@ -103,7 +103,7 @@ class MockedSaveTest(unittest.TestCase):
             encoding=encoding,
             bits_per_sample=bits_per_sample, )
 
-        # on +Py3.8 call_args.kwargs is more descreptive
+        # on +Py3.8 call_args.kwargs is more descriptive
         args = mocked_write.call_args[1]
         assert args["file"] == filepath
         assert args["samplerate"] == sample_rate
@@ -191,7 +191,7 @@ class SaveTestBase(TempDirMixin, unittest.TestCase):
     def _assert_non_wav(self, fmt, dtype, sample_rate, num_channels):
         """`soundfile_backend.save` can save non-wav format.
 
-        Due to precision missmatch, and the lack of alternative way to decode the
+        Due to precision mismatch, and the lack of alternative way to decode the
         resulting files without using soundfile, only meta data are validated.
         """
         num_frames = sample_rate * 3
diff --git a/audio/tests/common_utils/data_utils.py b/audio/tests/common_utils/data_utils.py
index b5618618c..16f575701 100644
--- a/audio/tests/common_utils/data_utils.py
+++ b/audio/tests/common_utils/data_utils.py
@@ -81,7 +81,7 @@ def convert_tensor_encoding(
 #dtype = getattr(paddle, dtype)
 #if dtype not in [paddle.float64, paddle.float32, paddle.int32, paddle.int16, paddle.uint8]:
 #raise NotImplementedError(f"dtype {dtype} is not supported.")
-## According to the doc, folking rng on all CUDA devices is slow when there are many CUDA devices,
+## According to the doc, forking rng on all CUDA devices is slow when there are many CUDA devices,
 ## so we only fork on CPU, generate values and move the data to the given device
 #with paddle.random.fork_rng([]):
 #paddle.random.manual_seed(seed)
diff --git a/audio/tests/common_utils/sox_utils.py b/audio/tests/common_utils/sox_utils.py
index 6ceae081e..4c0866ed9 100644
--- a/audio/tests/common_utils/sox_utils.py
+++ b/audio/tests/common_utils/sox_utils.py
@@ -24,20 +24,21 @@ def get_bit_depth(dtype):
 
 
 def gen_audio_file(
-    path,
-    sample_rate,
-    num_channels,
-    *,
-    encoding=None,
-    bit_depth=None,
-    compression=None,
-    attenuation=None,
-    duration=1,
-    comment_file=None,
-):
+        path,
+        sample_rate,
+        num_channels,
+        *,
+        encoding=None,
+        bit_depth=None,
+        compression=None,
+        attenuation=None,
+        duration=1,
+        comment_file=None, ):
     """Generate synthetic audio file with `sox` command."""
     if path.endswith(".wav"):
-        warnings.warn("Use get_wav_data and save_wav to generate wav file for accurate result.")
+        warnings.warn(
+            "Use get_wav_data and save_wav to generate wav file for accurate result."
+        )
     command = [
         "sox",
         "-V3",  # verbose
@@ -81,7 +82,12 @@ def gen_audio_file(
     subprocess.run(command, check=True)
 
 
-def convert_audio_file(src_path, dst_path, *, encoding=None, bit_depth=None, compression=None):
+def convert_audio_file(src_path,
+                       dst_path,
+                       *,
+                       encoding=None,
+                       bit_depth=None,
+                       compression=None):
     """Convert audio file with `sox` command."""
     command = ["sox", "-V3", "--no-dither", "-R", str(src_path)]
     if encoding is not None:
@@ -95,7 +101,7 @@ def convert_audio_file(src_path, dst_path, *, encoding=None, bit_depth=None, com
     subprocess.run(command, check=True)
 
 
-def _flattern(effects):
+def _flatten(effects):
     if not effects:
         return effects
     if isinstance(effects[0], str):
@@ -103,9 +109,14 @@ def _flattern(effects):
     return [item for sublist in effects for item in sublist]
 
 
-def run_sox_effect(input_file, output_file, effect, *, output_sample_rate=None, output_bitdepth=None):
+def run_sox_effect(input_file,
+                   output_file,
+                   effect,
+                   *,
+                   output_sample_rate=None,
+                   output_bitdepth=None):
     """Run sox effects"""
-    effect = _flattern(effect)
+    effect = _flatten(effect)
     command = ["sox", "-V", "--no-dither", input_file]
     if output_bitdepth:
         command += ["--bits", str(output_bitdepth)]
diff --git a/audio/tests/features/base.py b/audio/tests/features/base.py
index 3bb1d1dde..4a44e04bb 100644
--- a/audio/tests/features/base.py
+++ b/audio/tests/features/base.py
@@ -24,7 +24,7 @@ wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
 
 class FeatTest(unittest.TestCase):
     def setUp(self):
-        self.initParmas()
+        self.initParams()
         self.initWavInput()
         self.setUpDevice()
 
@@ -44,5 +44,5 @@ class FeatTest(unittest.TestCase):
         if dim == 1:
             self.waveform = np.expand_dims(self.waveform, 0)
 
-    def initParmas(self):
+    def initParams(self):
         raise NotImplementedError
diff --git a/audio/tests/features/test_istft.py b/audio/tests/features/test_istft.py
index ea1ee5cb6..862a1d753 100644
--- a/audio/tests/features/test_istft.py
+++ b/audio/tests/features/test_istft.py
@@ -23,7 +23,7 @@ from paddlespeech.audio.transform.spectrogram import Stft
 
 
 class TestIstft(FeatTest):
-    def initParmas(self):
+    def initParams(self):
         self.n_fft = 512
         self.hop_length = 128
         self.window_str = 'hann'
diff --git a/audio/tests/features/test_kaldi.py b/audio/tests/features/test_kaldi.py
index 2bd5dc734..50e2571ca 100644
--- a/audio/tests/features/test_kaldi.py
+++ b/audio/tests/features/test_kaldi.py
@@ -18,12 +18,11 @@ import paddle
 import paddleaudio
 import torch
 import torchaudio
-
 from base import FeatTest
 
 
 class TestKaldi(FeatTest):
-    def initParmas(self):
+    def initParams(self):
         self.window_size = 1024
         self.dtype = 'float32'
 
diff --git a/audio/tests/features/test_librosa.py b/audio/tests/features/test_librosa.py
index 8cda25b19..07b117cb0 100644
--- a/audio/tests/features/test_librosa.py
+++ b/audio/tests/features/test_librosa.py
@@ -17,13 +17,12 @@ import librosa
 import numpy as np
 import paddle
 import paddleaudio
-from paddleaudio.functional.window import get_window
-
 from base import FeatTest
+from paddleaudio.functional.window import get_window
 
 
 class TestLibrosa(FeatTest):
-    def initParmas(self):
+    def initParams(self):
         self.n_fft = 512
         self.hop_length = 128
         self.n_mels = 40
diff --git a/audio/tests/features/test_log_melspectrogram.py b/audio/tests/features/test_log_melspectrogram.py
index b2765d3be..6152d6ff2 100644
--- a/audio/tests/features/test_log_melspectrogram.py
+++ b/audio/tests/features/test_log_melspectrogram.py
@@ -22,7 +22,7 @@ from paddlespeech.audio.transform.spectrogram import LogMelSpectrogram
 
 
 class TestLogMelSpectrogram(FeatTest):
-    def initParmas(self):
+    def initParams(self):
         self.n_fft = 512
         self.hop_length = 128
         self.n_mels = 40
diff --git a/audio/tests/features/test_spectrogram.py b/audio/tests/features/test_spectrogram.py
index 6f4609632..c2dced2e7 100644
--- a/audio/tests/features/test_spectrogram.py
+++ b/audio/tests/features/test_spectrogram.py
@@ -22,7 +22,7 @@ from paddlespeech.audio.transform.spectrogram import Spectrogram
 
 
 class TestSpectrogram(FeatTest):
-    def initParmas(self):
+    def initParams(self):
         self.n_fft = 512
         self.hop_length = 128
 
diff --git a/audio/tests/features/test_stft.py b/audio/tests/features/test_stft.py
index 9511a2926..5bab170be 100644
--- a/audio/tests/features/test_stft.py
+++ b/audio/tests/features/test_stft.py
@@ -22,7 +22,7 @@ from paddlespeech.audio.transform.spectrogram import Stft
 
 
 class TestStft(FeatTest):
-    def initParmas(self):
+    def initParams(self):
         self.n_fft = 512
         self.hop_length = 128
         self.window_str = 'hann'
@@ -30,7 +30,7 @@ class TestStft(FeatTest):
     def test_stft(self):
         ps_stft = Stft(self.n_fft, self.hop_length)
         ps_res = ps_stft(
-            self.waveform.T).squeeze(1).T  # (n_fft//2 + 1, n_frmaes)
+            self.waveform.T).squeeze(1).T  # (n_fft//2 + 1, n_frames)
 
         x = paddle.to_tensor(self.waveform)
         window = get_window(self.window_str, self.n_fft, dtype=x.dtype)
diff --git a/dataset/librispeech/librispeech.py b/dataset/librispeech/librispeech.py
index 2f5f9016c..ccf8d4b49 100644
--- a/dataset/librispeech/librispeech.py
+++ b/dataset/librispeech/librispeech.py
@@ -132,7 +132,7 @@ def create_manifest(data_dir, manifest_path):
 
 
 def prepare_dataset(url, md5sum, target_dir, manifest_path):
-    """Download, unpack and create summmary manifest file.
+    """Download, unpack and create summary manifest file.
     """
     if not os.path.exists(os.path.join(target_dir, "LibriSpeech")):
         # download
diff --git a/dataset/ted_en_zh/ted_en_zh.py b/dataset/ted_en_zh/ted_en_zh.py
index 2d1fc6710..66810c85e 100644
--- a/dataset/ted_en_zh/ted_en_zh.py
+++ b/dataset/ted_en_zh/ted_en_zh.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 """Prepare Ted-En-Zh speech translation dataset
 
-Create manifest files from splited datased. 
+Create manifest files from splited dataset. 
 dev set: tst2010, test set: tst2015
 Manifest file is a json-format file with each line containing the
 meta data (i.e. audio filepath, transcript and audio duration)
diff --git a/dataset/thchs30/thchs30.py b/dataset/thchs30/thchs30.py
index c5c3eb7a8..fc8338984 100644
--- a/dataset/thchs30/thchs30.py
+++ b/dataset/thchs30/thchs30.py
@@ -71,7 +71,7 @@ def read_trn(filepath):
     with open(filepath, 'r') as f:
         lines = f.read().strip().split('\n')
         assert len(lines) == 3, lines
-    # charactor text, remove withespace
+    # character text, remove whitespace
     texts.append(''.join(lines[0].split()))
     texts.extend(lines[1:])
     return texts
@@ -127,7 +127,7 @@ def create_manifest(data_dir, manifest_path_prefix):
                             'utt2spk': spk,
                             'feat': audio_path,
                             'feat_shape': (duration, ),  # second
-                            'text': word_text,  # charactor
+                            'text': word_text,  # character
                             'syllable': syllable_text,
                             'phone': phone_text,
                         },
diff --git a/dataset/timit/timit.py b/dataset/timit/timit.py
index f3889d176..2943ff548 100644
--- a/dataset/timit/timit.py
+++ b/dataset/timit/timit.py
@@ -123,7 +123,7 @@ def read_algin(filepath: str) -> str:
         filepath (str): [description]
 
     Returns:
-        str: token sepearte by <space>
+        str: token separate by <space>
     """
     aligns = []  # (start, end, token)
     with open(filepath, 'r') as f:
diff --git a/dataset/timit/timit_kaldi_standard_split.py b/dataset/timit/timit_kaldi_standard_split.py
index 473fc856f..59ce2e64a 100644
--- a/dataset/timit/timit_kaldi_standard_split.py
+++ b/dataset/timit/timit_kaldi_standard_split.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 """Prepare TIMIT dataset (Standard split from Kaldi)
 
-Create manifest files from splited datased.
+Create manifest files from splited dataset.
 Manifest file is a json-format file with each line containing the
 meta data (i.e. audio filepath, transcript and audio duration)
 of each audio file in the data set.
diff --git a/dataset/voxceleb/voxceleb1.py b/dataset/voxceleb/voxceleb1.py
index 8d4100678..49a2a6baa 100644
--- a/dataset/voxceleb/voxceleb1.py
+++ b/dataset/voxceleb/voxceleb1.py
@@ -167,7 +167,7 @@ def prepare_dataset(base_url, data_list, target_dir, manifest_path,
 
         # check the target zip file md5sum
         if not check_md5sum(target_name, target_md5sum):
-            raise RuntimeError("{} MD5 checkssum failed".format(target_name))
+            raise RuntimeError("{} MD5 checksum failed".format(target_name))
         else:
             print("Check {} md5sum successfully".format(target_name))
 
diff --git a/dataset/voxceleb/voxceleb2.py b/dataset/voxceleb/voxceleb2.py
index 6df6d1f38..faa3b99bc 100644
--- a/dataset/voxceleb/voxceleb2.py
+++ b/dataset/voxceleb/voxceleb2.py
@@ -179,7 +179,7 @@ def download_dataset(base_url, data_list, target_data, target_dir, dataset):
 
         # check the target zip file md5sum
         if not check_md5sum(target_name, target_md5sum):
-            raise RuntimeError("{} MD5 checkssum failed".format(target_name))
+            raise RuntimeError("{} MD5 checksum failed".format(target_name))
         else:
             print("Check {} md5sum successfully".format(target_name))
 
@@ -187,7 +187,7 @@ def download_dataset(base_url, data_list, target_data, target_dir, dataset):
             # we need make the test directory
             unzip(target_name, os.path.join(target_dir, "test"))
         else:
-            # upzip dev zip pacakge and will create the dev directory
+            # unzip dev zip package and will create the dev directory
             unzip(target_name, target_dir)
 
 
diff --git a/demos/audio_content_search/README.md b/demos/audio_content_search/README.md
index f04ac447e..89b1c0d89 100644
--- a/demos/audio_content_search/README.md
+++ b/demos/audio_content_search/README.md
@@ -14,7 +14,7 @@ Now, the search word in demo is:
 ### 1. Installation
 see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
 
-You can choose one way from meduim and hard to install paddlespeech.
+You can choose one way from medium and hard to install paddlespeech.
 
 The dependency refers to the requirements.txt, and install the dependency as follows:
 
diff --git a/demos/audio_searching/README.md b/demos/audio_searching/README.md
index 0fc901432..528fce9e8 100644
--- a/demos/audio_searching/README.md
+++ b/demos/audio_searching/README.md
@@ -19,7 +19,7 @@ Note：this demo uses the [CN-Celeb](http://openslr.org/82/) dataset of at least
 ### 1. Prepare PaddleSpeech
 Audio vector extraction requires PaddleSpeech training model, so please make sure that PaddleSpeech has been installed before running. Specific installation steps: See [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).  
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 ### 2. Prepare MySQL and Milvus services by docker-compose
 The audio similarity search system requires Milvus, MySQL services. We can start these containers with one click through [docker-compose.yaml](./docker-compose.yaml), so please make sure you have [installed Docker Engine](https://docs.docker.com/engine/install/) and [Docker Compose](https://docs.docker.com/compose/install/) before running. then
diff --git a/demos/audio_tagging/README.md b/demos/audio_tagging/README.md
index fc4a334ea..b602c6022 100644
--- a/demos/audio_tagging/README.md
+++ b/demos/audio_tagging/README.md
@@ -11,7 +11,7 @@ This demo is an implementation to tag an audio file with 527 [AudioSet](https://
 ### 1. Installation
 see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 ### 2. Prepare Input File
 The input of this demo should be a WAV file(`.wav`).
diff --git a/demos/automatic_video_subtitiles/README.md b/demos/automatic_video_subtitiles/README.md
index b815425ec..89d8c73c9 100644
--- a/demos/automatic_video_subtitiles/README.md
+++ b/demos/automatic_video_subtitiles/README.md
@@ -10,7 +10,7 @@ This demo is an implementation to automatic video subtitles from a video file. I
 ### 1. Installation
 see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). 
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 ### 2. Prepare Input
 Get a video file with the speech of the specific language:
diff --git a/demos/keyword_spotting/README.md b/demos/keyword_spotting/README.md
index 6544cf71e..b55c71124 100644
--- a/demos/keyword_spotting/README.md
+++ b/demos/keyword_spotting/README.md
@@ -10,7 +10,7 @@ This demo is an implementation to recognize keyword from a specific audio file.
 ### 1. Installation
 see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 ### 2. Prepare Input File
 The input of this demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.
diff --git a/demos/punctuation_restoration/README.md b/demos/punctuation_restoration/README.md
index 458ab92f9..3544a2060 100644
--- a/demos/punctuation_restoration/README.md
+++ b/demos/punctuation_restoration/README.md
@@ -9,7 +9,7 @@ This demo is an implementation to restore punctuation from raw text. It can be d
 ### 1. Installation
 see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 ### 2. Prepare Input
 The input of this demo should be a text of the specific language that can be passed via argument.
diff --git a/demos/speaker_verification/README.md b/demos/speaker_verification/README.md
index 55f9a7360..37c6bf3b9 100644
--- a/demos/speaker_verification/README.md
+++ b/demos/speaker_verification/README.md
@@ -11,7 +11,7 @@ This demo is an implementation to extract speaker embedding from a specific audi
 ### 1. Installation
 see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 ### 2. Prepare Input File
 The input of this cli demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.
diff --git a/demos/speech_recognition/README.md b/demos/speech_recognition/README.md
index ee2acd6fd..e406590d2 100644
--- a/demos/speech_recognition/README.md
+++ b/demos/speech_recognition/README.md
@@ -10,7 +10,7 @@ This demo is an implementation to recognize text from a specific audio file. It
 ### 1. Installation
 see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 ### 2. Prepare Input File
 The input of this demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.
diff --git a/demos/speech_server/README.md b/demos/speech_server/README.md
index 116f1fd7b..08788a89e 100644
--- a/demos/speech_server/README.md
+++ b/demos/speech_server/README.md
@@ -15,7 +15,7 @@ see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/doc
 
 It is recommended to use **paddlepaddle 2.4rc** or above.
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 **If you install in easy mode, you need to prepare the yaml file by yourself, you can refer to the yaml file in the conf directory.**
 
diff --git a/demos/speech_ssl/README.md b/demos/speech_ssl/README.md
index ef9b2237d..8677ebc57 100644
--- a/demos/speech_ssl/README.md
+++ b/demos/speech_ssl/README.md
@@ -10,7 +10,7 @@ This demo is an implementation to recognize text or produce the acoustic represe
 ### 1. Installation
 see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 ### 2. Prepare Input File
 The input of this demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.
diff --git a/demos/speech_translation/README.md b/demos/speech_translation/README.md
index 00a9c7932..4866336c0 100644
--- a/demos/speech_translation/README.md
+++ b/demos/speech_translation/README.md
@@ -9,7 +9,7 @@ This demo is an implementation to recognize text from a specific audio file and
 ### 1. Installation
 see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 
 ### 2. Prepare Input File
diff --git a/demos/streaming_asr_server/README.md b/demos/streaming_asr_server/README.md
index 136863b96..423485466 100644
--- a/demos/streaming_asr_server/README.md
+++ b/demos/streaming_asr_server/README.md
@@ -18,7 +18,7 @@ see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/doc
 
 It is recommended to use **paddlepaddle 2.4rc** or above.
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 **If you install in easy mode, you need to prepare the yaml file by yourself, you can refer to 
 
diff --git a/demos/streaming_tts_server/README.md b/demos/streaming_tts_server/README.md
index ca5d6f1f8..ad87bebdc 100644
--- a/demos/streaming_tts_server/README.md
+++ b/demos/streaming_tts_server/README.md
@@ -15,7 +15,7 @@ see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/doc
 
 It is recommended to use **paddlepaddle 2.4rc** or above.
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 **If you install in easy mode, you need to prepare the yaml file by yourself, you can refer to the yaml file in the conf directory.**
 
diff --git a/demos/text_to_speech/README.md b/demos/text_to_speech/README.md
index d7bb8ca1c..b58777def 100644
--- a/demos/text_to_speech/README.md
+++ b/demos/text_to_speech/README.md
@@ -10,7 +10,7 @@ This demo is an implementation to generate audio from the given text. It can be
 ### 1. Installation
 see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
 
-You can choose one way from easy, meduim and hard to install paddlespeech.
+You can choose one way from easy, medium and hard to install paddlespeech.
 
 ### 2. Prepare Input
 The input of this demo should be a text of the specific language that can be passed via argument.
diff --git a/demos/whisper/README.md b/demos/whisper/README.md
index 9b12554e6..6e1b8011f 100644
--- a/demos/whisper/README.md
+++ b/demos/whisper/README.md
@@ -9,7 +9,7 @@ Whisper model trained by OpenAI whisper https://github.com/openai/whisper
  ### 1. Installation
  see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
 
- You can choose one way from easy, meduim and hard to install paddlespeech.
+ You can choose one way from easy, medium and hard to install paddlespeech.
 
  ### 2. Prepare Input File
  The input of this demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.

From 4e5181c949bab59bbbaa11b945165e06fc7689cd Mon Sep 17 00:00:00 2001
From: zxcd <228587199@qq.com>
Date: Mon, 27 Jan 2025 12:29:14 +0800
Subject: [PATCH 11/46] add some pir model (#3982)

---
 examples/vctk/tts3/README.md      | 3 +++
 examples/zh_en_tts/tts3/README.md | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/examples/vctk/tts3/README.md b/examples/vctk/tts3/README.md
index 3a6f3e1b9..183a20f0a 100644
--- a/examples/vctk/tts3/README.md
+++ b/examples/vctk/tts3/README.md
@@ -221,6 +221,9 @@ Pretrained FastSpeech2 model with no silence in the edge of audios:
 The static model can be downloaded here:
 - [fastspeech2_vctk_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_static_1.1.0.zip)
 
+The PIR static model can be downloaded here:
+ - [fastspeech2_vctk_static_pir_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_static_pir_1.1.0.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)
+
 The ONNX model can be downloaded here:
 - [fastspeech2_vctk_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_onnx_1.1.0.zip)
 
diff --git a/examples/zh_en_tts/tts3/README.md b/examples/zh_en_tts/tts3/README.md
index 15de3f487..9c3cd4079 100644
--- a/examples/zh_en_tts/tts3/README.md
+++ b/examples/zh_en_tts/tts3/README.md
@@ -260,6 +260,9 @@ Pretrained FastSpeech2 model with no silence in the edge of audios:
 The static model can be downloaded here:
 - [fastspeech2_mix_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_0.2.0.zip)
 
+The PIR static model can be downloaded here:
+- [fastspeech2_mix_static_pir_0.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_pir_0.2.0.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)
+
 The ONNX model can be downloaded here:
 - [fastspeech2_mix_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_onnx_0.2.0.zip)
 

From f3a5df2049740ccdcac5ec88d329aa916931a87a Mon Sep 17 00:00:00 2001
From: co63oc <co63oc@users.noreply.github.com>
Date: Thu, 6 Feb 2025 11:10:04 +0800
Subject: [PATCH 12/46] Fix typos (#3984)

* Fix

* Fix
---
 audio/paddleaudio/backends/soundfile_backend.py | 16 ++++++++--------
 audio/paddleaudio/compliance/librosa.py         |  8 ++++----
 audio/paddleaudio/src/pybind/sox/utils.cpp      |  2 +-
 audio/tests/backends/soundfile/save_test.py     |  2 +-
 dataset/chime3_background/chime3_background.py  |  2 +-
 dataset/mini_librispeech/mini_librispeech.py    |  2 +-
 6 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/audio/paddleaudio/backends/soundfile_backend.py b/audio/paddleaudio/backends/soundfile_backend.py
index dcd2b4b1e..7611fd297 100644
--- a/audio/paddleaudio/backends/soundfile_backend.py
+++ b/audio/paddleaudio/backends/soundfile_backend.py
@@ -183,7 +183,7 @@ def soundfile_save(y: np.ndarray, sr: int, file: os.PathLike) -> None:
     Args:
         y (np.ndarray): Input waveform array in 1D or 2D.
         sr (int): Sample rate.
-        file (os.PathLike): Path of auido file to save.
+        file (os.PathLike): Path of audio file to save.
     """
     if not file.endswith('.wav'):
         raise ParameterError(
@@ -216,10 +216,10 @@ def soundfile_load(
         duration: Optional[int]=None,
         dtype: str='float32',
         resample_mode: str='kaiser_fast') -> Tuple[np.ndarray, int]:
-    """Load audio file from disk. This function loads audio from disk using using audio beackend.
+    """Load audio file from disk. This function loads audio from disk using using audio backend.
 
     Args:
-        file (os.PathLike): Path of auido file to load.
+        file (os.PathLike): Path of audio file to load.
         sr (Optional[int], optional): Sample rate of loaded waveform. Defaults to None.
         mono (bool, optional): Return waveform with mono channel. Defaults to True.
         merge_type (str, optional): Merge type of multi-channels waveform. Defaults to 'average'.
@@ -250,14 +250,14 @@ def soundfile_load(
     if normal:
         y = normalize(y, norm_type, norm_mul_factor)
     elif dtype in ['int8', 'int16']:
-        # still need to do normalization, before depth convertion
+        # still need to do normalization, before depth conversion
         y = normalize(y, 'linear', 1.0)
 
     y = depth_convert(y, dtype)
     return y, r
 
 
-#the code below token form: https://github.com/pytorch/audio/blob/main/torchaudio/backend/soundfile_backend.py with modificaion.
+#The code below is taken from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/soundfile_backend.py, with some modifications.
 
 
 def _get_subtype_for_wav(dtype: paddle.dtype,
@@ -382,7 +382,7 @@ def save(
         channels_first (bool, optional): If ``True``, the given tensor is interpreted as `[channel, time]`,
             otherwise `[time, channel]`.
         compression (float of None, optional): Not used.
-            It is here only for interface compatibility reson with "sox_io" backend.
+            It is here only for interface compatibility reason with "sox_io" backend.
         format (str or None, optional): Override the audio format.
             When ``filepath`` argument is path-like object, audio format is
             inferred from file extension. If the file extension is missing or
@@ -394,8 +394,8 @@ def save(
             Valid values are ``"wav"``, ``"ogg"``, ``"vorbis"``,
             ``"flac"`` and ``"sph"``.
         encoding (str or None, optional): Changes the encoding for supported formats.
-            This argument is effective only for supported formats, sush as
-            ``"wav"``, ``""flac"`` and ``"sph"``. Valid values are;
+            This argument is effective only for supported formats, such as
+            ``"wav"``, ``""flac"`` and ``"sph"``. Valid values are:
 
                 - ``"PCM_S"`` (signed integer Linear PCM)
                 - ``"PCM_U"`` (unsigned integer Linear PCM)
diff --git a/audio/paddleaudio/compliance/librosa.py b/audio/paddleaudio/compliance/librosa.py
index 168632d7c..c24d6d497 100644
--- a/audio/paddleaudio/compliance/librosa.py
+++ b/audio/paddleaudio/compliance/librosa.py
@@ -626,7 +626,7 @@ def mu_decode(y: np.ndarray, mu: int=255, quantized: bool=True) -> np.ndarray:
 def _randint(high: int) -> int:
     """Generate one random integer in range [0 high)
 
-     This is a helper function for random data augmentaiton
+     This is a helper function for random data augmentation
     """
     return int(np.random.randint(0, high=high))
 
@@ -659,7 +659,7 @@ def depth_augment(y: np.ndarray,
 def adaptive_spect_augment(spect: np.ndarray,
                            tempo_axis: int=0,
                            level: float=0.1) -> np.ndarray:
-    """Do adpative spectrogram augmentation. The level of the augmentation is gowern by the paramter level, ranging from 0 to 1, with 0 represents no augmentation.
+    """Do adaptive spectrogram augmentation. The level of the augmentation is govern by the parameter level, ranging from 0 to 1, with 0 represents no augmentation.
 
     Args:
         spect (np.ndarray): Input spectrogram.
@@ -711,9 +711,9 @@ def spect_augment(spect: np.ndarray,
         spect (np.ndarray): Input spectrogram.
         tempo_axis (int, optional): Indicate the tempo axis. Defaults to 0.
         max_time_mask (int, optional): Maximum number of time masking. Defaults to 3.
-        max_freq_mask (int, optional): Maximum number of frenquence masking. Defaults to 3.
+        max_freq_mask (int, optional): Maximum number of frequency masking. Defaults to 3.
         max_time_mask_width (int, optional): Maximum width of time masking. Defaults to 30.
-        max_freq_mask_width (int, optional): Maximum width of frenquence masking. Defaults to 20.
+        max_freq_mask_width (int, optional): Maximum width of frequency masking. Defaults to 20.
 
     Returns:
         np.ndarray: The augmented spectrogram.
diff --git a/audio/paddleaudio/src/pybind/sox/utils.cpp b/audio/paddleaudio/src/pybind/sox/utils.cpp
index acdef8040..1d38dff90 100644
--- a/audio/paddleaudio/src/pybind/sox/utils.cpp
+++ b/audio/paddleaudio/src/pybind/sox/utils.cpp
@@ -449,7 +449,7 @@ unsigned get_precision(const std::string filetype, py::dtype dtype) {
     return SOX_UNSPEC;
   if (filetype == "wav" || filetype == "amb") {
     switch (dtype.num()) {
-      case 1: // byte in numpy dype num
+      case 1: // byte in numpy dtype num
         return 8;
       case 3: // short, in numpy dtype num
         return 16;
diff --git a/audio/tests/backends/soundfile/save_test.py b/audio/tests/backends/soundfile/save_test.py
index 4b5facd08..0dce592c8 100644
--- a/audio/tests/backends/soundfile/save_test.py
+++ b/audio/tests/backends/soundfile/save_test.py
@@ -58,7 +58,7 @@ class MockedSaveTest(unittest.TestCase):
             encoding=encoding,
             bits_per_sample=bits_per_sample, )
 
-        # on +Py3.8 call_args.kwargs is more descreptive
+        # on +Py3.8 call_args.kwargs is more descriptive
         args = mocked_write.call_args[1]
         assert args["file"] == filepath
         assert args["samplerate"] == sample_rate
diff --git a/dataset/chime3_background/chime3_background.py b/dataset/chime3_background/chime3_background.py
index 1f5439aab..4f081e6c3 100644
--- a/dataset/chime3_background/chime3_background.py
+++ b/dataset/chime3_background/chime3_background.py
@@ -58,7 +58,7 @@ def download(url, md5sum, target_dir, filename=None):
     if not (os.path.exists(filepath) and md5file(filepath) == md5sum):
         print("Downloading %s ..." % url)
         wget.download(url, target_dir)
-        print("\nMD5 Chesksum %s ..." % filepath)
+        print("\nMD5 Checksum %s ..." % filepath)
         if not md5file(filepath) == md5sum:
             raise RuntimeError("MD5 checksum failed.")
     else:
diff --git a/dataset/mini_librispeech/mini_librispeech.py b/dataset/mini_librispeech/mini_librispeech.py
index 24bd98d8c..3a60ef22b 100644
--- a/dataset/mini_librispeech/mini_librispeech.py
+++ b/dataset/mini_librispeech/mini_librispeech.py
@@ -108,7 +108,7 @@ def create_manifest(data_dir, manifest_path):
 
 
 def prepare_dataset(url, md5sum, target_dir, manifest_path):
-    """Download, unpack and create summmary manifest file.
+    """Download, unpack and create summary manifest file.
     """
     if not os.path.exists(os.path.join(target_dir, "LibriSpeech")):
         # download

From 0479cce8ffa31a9b7bb10310de99dbbdab3f46a1 Mon Sep 17 00:00:00 2001
From: zxcd <228587199@qq.com>
Date: Sat, 8 Feb 2025 12:43:19 +0800
Subject: [PATCH 13/46] =?UTF-8?q?=E3=80=90audio=E3=80=91remove=20paddleaud?=
 =?UTF-8?q?io=20from=20paddlespeech=20(#3986)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* remove paddleaudio from paddlespeech

* use scikit-learn instead sklearn

* add pathos

* remove utils

* add kaldiio

* remove useless print
---
 audio/paddleaudio/backends/common.py          |  32 +-
 docs/source/cls/custom_dataset.md             |   6 +-
 docs/tutorial/cls/cls_tutorial.ipynb          |  16 +-
 examples/tess/cls0/local/train.py             |   4 +-
 examples/voxceleb/sv0/local/data_prepare.py   |   2 +-
 .../make_rirs_noise_csv_dataset_from_json.py  |   2 +-
 .../local/make_vox_csv_dataset_from_json.py   |   2 +-
 paddlespeech/audio/__init__.py                |   4 +
 paddlespeech/audio/backends/__init__.py       |  20 +
 paddlespeech/audio/backends/common.py         |  53 ++
 .../audio/backends/soundfile_backend.py       | 677 +++++++++++++++
 paddlespeech/audio/compliance/__init__.py     |  15 +
 paddlespeech/audio/compliance/kaldi.py        | 643 ++++++++++++++
 paddlespeech/audio/compliance/librosa.py      | 788 ++++++++++++++++++
 paddlespeech/audio/datasets/__init__.py       |  15 +
 paddlespeech/audio/datasets/dataset.py        | 100 +++
 paddlespeech/audio/datasets/esc50.py          | 152 ++++
 paddlespeech/audio/datasets/voxceleb.py       | 356 ++++++++
 paddlespeech/audio/functional/__init__.py     |  20 +
 paddlespeech/audio/functional/functional.py   | 266 ++++++
 paddlespeech/audio/functional/window.py       | 373 +++++++++
 paddlespeech/audio/streamdata/autodecode.py   |   4 +-
 paddlespeech/audio/streamdata/filters.py      |   4 +-
 paddlespeech/audio/streamdata/soundfile.py    | 677 +++++++++++++++
 paddlespeech/audio/streamdata/tariterators.py |   6 +-
 paddlespeech/audio/transform/spectrogram.py   |   3 +-
 paddlespeech/cli/cls/infer.py                 |   2 +-
 paddlespeech/cli/kws/infer.py                 |   4 +-
 paddlespeech/cli/vector/infer.py              |   4 +-
 paddlespeech/cls/exps/panns/deploy/predict.py |   2 +-
 paddlespeech/cls/exps/panns/export_model.py   |   2 +-
 paddlespeech/cls/exps/panns/predict.py        |   5 +-
 paddlespeech/cls/exps/panns/train.py          |   4 +-
 paddlespeech/cls/models/panns/panns.py        |   2 +-
 paddlespeech/kws/exps/mdtc/train.py           |   4 +-
 .../frontend/featurizer/audio_featurizer.py   |   3 +-
 paddlespeech/s2t/modules/fbank.py             |   2 +-
 .../engine/vector/python/vector_engine.py     |   4 +-
 paddlespeech/server/util.py                   |   4 +-
 .../starganv2_vc/AuxiliaryASR/layers.py       |   4 +-
 .../vector/exps/ecapa_tdnn/extract_emb.py     |   4 +-
 paddlespeech/vector/exps/ecapa_tdnn/test.py   |  19 +-
 paddlespeech/vector/exps/ecapa_tdnn/train.py  |   2 +-
 paddlespeech/vector/io/dataset.py             |   4 +-
 paddlespeech/vector/io/dataset_from_json.py   |   7 +-
 setup.py                                      |   4 +-
 .../unit/audiotools/core/test_audio_signal.py |   6 +-
 47 files changed, 4254 insertions(+), 78 deletions(-)
 create mode 100644 paddlespeech/audio/backends/__init__.py
 create mode 100644 paddlespeech/audio/backends/common.py
 create mode 100644 paddlespeech/audio/backends/soundfile_backend.py
 create mode 100644 paddlespeech/audio/compliance/__init__.py
 create mode 100644 paddlespeech/audio/compliance/kaldi.py
 create mode 100644 paddlespeech/audio/compliance/librosa.py
 create mode 100644 paddlespeech/audio/datasets/__init__.py
 create mode 100644 paddlespeech/audio/datasets/dataset.py
 create mode 100644 paddlespeech/audio/datasets/esc50.py
 create mode 100644 paddlespeech/audio/datasets/voxceleb.py
 create mode 100644 paddlespeech/audio/functional/__init__.py
 create mode 100644 paddlespeech/audio/functional/functional.py
 create mode 100644 paddlespeech/audio/functional/window.py
 create mode 100644 paddlespeech/audio/streamdata/soundfile.py

diff --git a/audio/paddleaudio/backends/common.py b/audio/paddleaudio/backends/common.py
index 9d3edf812..3065fe89f 100644
--- a/audio/paddleaudio/backends/common.py
+++ b/audio/paddleaudio/backends/common.py
@@ -1,4 +1,5 @@
-# Token form https://github.com/pytorch/audio/blob/main/torchaudio/backend/common.py with modification.
+# Token from https://github.com/pytorch/audio/blob/main/torchaudio/backend/common.py with modification.
+
 
 class AudioInfo:
     """return of info function.
@@ -30,13 +31,12 @@ class AudioInfo:
     """
 
     def __init__(
-        self,
-        sample_rate: int,
-        num_frames: int,
-        num_channels: int,
-        bits_per_sample: int,
-        encoding: str,
-    ):
+            self,
+            sample_rate: int,
+            num_frames: int,
+            num_channels: int,
+            bits_per_sample: int,
+            encoding: str, ):
         self.sample_rate = sample_rate
         self.num_frames = num_frames
         self.num_channels = num_channels
@@ -44,12 +44,10 @@ class AudioInfo:
         self.encoding = encoding
 
     def __str__(self):
-        return (
-            f"AudioMetaData("
-            f"sample_rate={self.sample_rate}, "
-            f"num_frames={self.num_frames}, "
-            f"num_channels={self.num_channels}, "
-            f"bits_per_sample={self.bits_per_sample}, "
-            f"encoding={self.encoding}"
-            f")"
-        )
+        return (f"AudioMetaData("
+                f"sample_rate={self.sample_rate}, "
+                f"num_frames={self.num_frames}, "
+                f"num_channels={self.num_channels}, "
+                f"bits_per_sample={self.bits_per_sample}, "
+                f"encoding={self.encoding}"
+                f")")
diff --git a/docs/source/cls/custom_dataset.md b/docs/source/cls/custom_dataset.md
index 7482d5edf..26bd60b25 100644
--- a/docs/source/cls/custom_dataset.md
+++ b/docs/source/cls/custom_dataset.md
@@ -2,7 +2,7 @@
 
 Following this tutorial you can customize your dataset for audio classification task by using `paddlespeech`.
 
-A base class of classification dataset is `paddlespeech.audio.dataset.AudioClassificationDataset`. To customize your dataset you should write a dataset class derived from `AudioClassificationDataset`. 
+A base class of classification dataset is `paddlespeech.audio.datasets.dataset.AudioClassificationDataset`. To customize your dataset you should write a dataset class derived from `AudioClassificationDataset`. 
 
 Assuming you have some wave files that stored in your own directory. You should prepare a meta file with the information of filepaths and labels. For example the absolute path of it is `/PATH/TO/META_FILE.txt`:
 ```
@@ -14,7 +14,7 @@ Assuming you have some wave files that stored in your own directory. You should
 Here is an example to build your custom dataset in `custom_dataset.py`:
 
 ```python
-from paddleaudio.datasets.dataset import AudioClassificationDataset
+from paddlespeech.audio.datasets.dataset import AudioClassificationDataset
 
 class CustomDataset(AudioClassificationDataset):
     meta_file = '/PATH/TO/META_FILE.txt'
@@ -48,7 +48,7 @@ class CustomDataset(AudioClassificationDataset):
 Then you can build dataset and data loader from `CustomDataset`:
 ```python
 import paddle
-from paddleaudio.features import LogMelSpectrogram
+from paddlespeech.audio.transform.spectrogram import LogMelSpectrogram
 
 from custom_dataset import CustomDataset
 
diff --git a/docs/tutorial/cls/cls_tutorial.ipynb b/docs/tutorial/cls/cls_tutorial.ipynb
index 3cee64991..e37b086f7 100644
--- a/docs/tutorial/cls/cls_tutorial.ipynb
+++ b/docs/tutorial/cls/cls_tutorial.ipynb
@@ -52,8 +52,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# 环境准备：安装paddlespeech和paddleaudio\n",
-    "!pip install --upgrade pip && pip install paddlespeech paddleaudio -U"
+    "# 环境准备：安装paddlespeech\n",
+    "!pip install --upgrade pip && pip install paddlespeech -U"
    ]
   },
   {
@@ -100,7 +100,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from paddleaudio import load\n",
+    "from paddlespeech.audio.backends import load\n",
     "data, sr = load(file='./dog.wav', mono=True, dtype='float32')  # 单通道，float32音频样本点\n",
     "print('wav shape: {}'.format(data.shape))\n",
     "print('sample rate: {}'.format(sr))\n",
@@ -191,7 +191,7 @@
     "<center>图片来源：https://ww2.mathworks.cn/help/audio/ref/mfcc.html</center>\n",
     "\n",
     "<br></br>\n",
-    "下面例子采用 `paddleaudio.features.LogMelSpectrogram` 演示如何提取示例音频的 LogFBank:"
+    "下面例子采用 `paddlespeech.audio.transform.spectrogram.LogMelSpectrogram` 演示如何提取示例音频的 LogFBank:"
    ]
   },
   {
@@ -200,7 +200,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from paddleaudio.features import LogMelSpectrogram\n",
+    "from paddlespeech.audio.transform.spectrogram import LogMelSpectrogram\n",
     "\n",
     "f_min=50.0\n",
     "f_max=14000.0\n",
@@ -337,7 +337,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from paddleaudio.datasets import ESC50\n",
+    "from paddlespeech.audio.datasets import ESC50\n",
     "\n",
     "train_ds = ESC50(mode='train', sample_rate=sr)\n",
     "dev_ds = ESC50(mode='dev', sample_rate=sr)"
@@ -348,7 +348,7 @@
    "metadata": {},
    "source": [
     "### 3.1.2 特征提取\n",
-    "通过下列代码，用 `paddleaudio.features.LogMelSpectrogram` 初始化一个音频特征提取器，在训练过程中实时提取音频的 LogFBank 特征，其中主要的参数如下：  "
+    "通过下列代码，用 `paddlespeech.audio.transform.spectrogram.LogMelSpectrogram` 初始化一个音频特征提取器，在训练过程中实时提取音频的 LogFBank 特征，其中主要的参数如下：  "
    ]
   },
   {
@@ -481,7 +481,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from paddleaudio.utils import logger\n",
+    "from paddlespeech.audio.utils import logger\n",
     "\n",
     "epochs = 20\n",
     "steps_per_epoch = len(train_loader)\n",
diff --git a/examples/tess/cls0/local/train.py b/examples/tess/cls0/local/train.py
index f023a37b7..ad4926d76 100644
--- a/examples/tess/cls0/local/train.py
+++ b/examples/tess/cls0/local/train.py
@@ -16,9 +16,9 @@ import os
 
 import paddle
 import yaml
-from paddleaudio.utils import logger
-from paddleaudio.utils import Timer
 
+from paddlespeech.audio.utils import logger
+from paddlespeech.audio.utils.time import Timer
 from paddlespeech.cls.models import SoundClassifier
 from paddlespeech.utils.dynamic_import import dynamic_import
 
diff --git a/examples/voxceleb/sv0/local/data_prepare.py b/examples/voxceleb/sv0/local/data_prepare.py
index b4486b6f0..e5a5dff7b 100644
--- a/examples/voxceleb/sv0/local/data_prepare.py
+++ b/examples/voxceleb/sv0/local/data_prepare.py
@@ -14,9 +14,9 @@
 import argparse
 
 import paddle
-from paddleaudio.datasets.voxceleb import VoxCeleb
 from yacs.config import CfgNode
 
+from paddlespeech.audio.datasets.voxceleb import VoxCeleb
 from paddlespeech.s2t.utils.log import Log
 from paddlespeech.vector.io.augment import build_augment_pipeline
 from paddlespeech.vector.training.seeding import seed_everything
diff --git a/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py b/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py
index 11908fe63..b65fa35b4 100644
--- a/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py
+++ b/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py
@@ -21,9 +21,9 @@ import os
 from typing import List
 
 import tqdm
-from paddleaudio.backends import soundfile_load as load_audio
 from yacs.config import CfgNode
 
+from paddlespeech.audio.backends import soundfile_load as load_audio
 from paddlespeech.s2t.utils.log import Log
 from paddlespeech.vector.utils.vector_utils import get_chunks
 
diff --git a/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py b/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py
index ebeb598a4..6ef2064a0 100644
--- a/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py
+++ b/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py
@@ -22,9 +22,9 @@ import os
 import random
 
 import tqdm
-from paddleaudio.backends import soundfile_load as load_audio
 from yacs.config import CfgNode
 
+from paddlespeech.audio.backends import soundfile_load as load_audio
 from paddlespeech.s2t.utils.log import Log
 from paddlespeech.vector.utils.vector_utils import get_chunks
 
diff --git a/paddlespeech/audio/__init__.py b/paddlespeech/audio/__init__.py
index a7cf6caaf..0e120be29 100644
--- a/paddlespeech/audio/__init__.py
+++ b/paddlespeech/audio/__init__.py
@@ -11,6 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from . import backends
+from . import compliance
+from . import datasets
+from . import functional
 from . import streamdata
 from . import text
 from . import transform
diff --git a/paddlespeech/audio/backends/__init__.py b/paddlespeech/audio/backends/__init__.py
new file mode 100644
index 000000000..7e4ee6506
--- /dev/null
+++ b/paddlespeech/audio/backends/__init__.py
@@ -0,0 +1,20 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .soundfile_backend import depth_convert
+from .soundfile_backend import load
+from .soundfile_backend import normalize
+from .soundfile_backend import resample
+from .soundfile_backend import soundfile_load
+from .soundfile_backend import soundfile_save
+from .soundfile_backend import to_mono
diff --git a/paddlespeech/audio/backends/common.py b/paddlespeech/audio/backends/common.py
new file mode 100644
index 000000000..3065fe89f
--- /dev/null
+++ b/paddlespeech/audio/backends/common.py
@@ -0,0 +1,53 @@
+# Token from https://github.com/pytorch/audio/blob/main/torchaudio/backend/common.py with modification.
+
+
+class AudioInfo:
+    """return of info function.
+
+    This class is used by :ref:`"sox_io" backend<sox_io_backend>` and
+    :ref:`"soundfile" backend with the new interface<soundfile_backend>`.
+
+    :ivar int sample_rate: Sample rate
+    :ivar int num_frames: The number of frames
+    :ivar int num_channels: The number of channels
+    :ivar int bits_per_sample: The number of bits per sample. This is 0 for lossy formats,
+        or when it cannot be accurately inferred.
+    :ivar str encoding: Audio encoding
+        The values encoding can take are one of the following:
+
+            * ``PCM_S``: Signed integer linear PCM
+            * ``PCM_U``: Unsigned integer linear PCM
+            * ``PCM_F``: Floating point linear PCM
+            * ``FLAC``: Flac, Free Lossless Audio Codec
+            * ``ULAW``: Mu-law
+            * ``ALAW``: A-law
+            * ``MP3`` : MP3, MPEG-1 Audio Layer III
+            * ``VORBIS``: OGG Vorbis
+            * ``AMR_WB``: Adaptive Multi-Rate
+            * ``AMR_NB``: Adaptive Multi-Rate Wideband
+            * ``OPUS``: Opus
+            * ``HTK``: Single channel 16-bit PCM
+            * ``UNKNOWN`` : None of above
+    """
+
+    def __init__(
+            self,
+            sample_rate: int,
+            num_frames: int,
+            num_channels: int,
+            bits_per_sample: int,
+            encoding: str, ):
+        self.sample_rate = sample_rate
+        self.num_frames = num_frames
+        self.num_channels = num_channels
+        self.bits_per_sample = bits_per_sample
+        self.encoding = encoding
+
+    def __str__(self):
+        return (f"AudioMetaData("
+                f"sample_rate={self.sample_rate}, "
+                f"num_frames={self.num_frames}, "
+                f"num_channels={self.num_channels}, "
+                f"bits_per_sample={self.bits_per_sample}, "
+                f"encoding={self.encoding}"
+                f")")
diff --git a/paddlespeech/audio/backends/soundfile_backend.py b/paddlespeech/audio/backends/soundfile_backend.py
new file mode 100644
index 000000000..7611fd297
--- /dev/null
+++ b/paddlespeech/audio/backends/soundfile_backend.py
@@ -0,0 +1,677 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import warnings
+from typing import Optional
+from typing import Tuple
+
+import numpy as np
+import paddle
+import resampy
+import soundfile
+from scipy.io import wavfile
+
+from ..utils import depth_convert
+from ..utils import ParameterError
+from .common import AudioInfo
+
+__all__ = [
+    'resample',
+    'to_mono',
+    'normalize',
+    'save',
+    'soundfile_save',
+    'load',
+    'soundfile_load',
+    'info',
+]
+NORMALMIZE_TYPES = ['linear', 'gaussian']
+MERGE_TYPES = ['ch0', 'ch1', 'random', 'average']
+RESAMPLE_MODES = ['kaiser_best', 'kaiser_fast']
+EPS = 1e-8
+
+
+def resample(y: np.ndarray,
+             src_sr: int,
+             target_sr: int,
+             mode: str='kaiser_fast') -> np.ndarray:
+    """Audio resampling.
+
+    Args:
+        y (np.ndarray): Input waveform array in 1D or 2D.
+        src_sr (int): Source sample rate.
+        target_sr (int): Target sample rate.
+        mode (str, optional): The resampling filter to use. Defaults to 'kaiser_fast'.
+
+    Returns:
+        np.ndarray: `y` resampled to `target_sr`
+    """
+
+    if mode == 'kaiser_best':
+        warnings.warn(
+            f'Using resampy in kaiser_best to {src_sr}=>{target_sr}. This function is pretty slow, \
+        we recommend the mode kaiser_fast in large scale audio training')
+
+    if not isinstance(y, np.ndarray):
+        raise ParameterError(
+            'Only support numpy np.ndarray, but received y in {type(y)}')
+
+    if mode not in RESAMPLE_MODES:
+        raise ParameterError(f'resample mode must in {RESAMPLE_MODES}')
+
+    return resampy.resample(y, src_sr, target_sr, filter=mode)
+
+
+def to_mono(y: np.ndarray, merge_type: str='average') -> np.ndarray:
+    """Convert sterior audio to mono.
+
+    Args:
+        y (np.ndarray): Input waveform array in 1D or 2D.
+        merge_type (str, optional): Merge type to generate mono waveform. Defaults to 'average'.
+
+    Returns:
+        np.ndarray: `y` with mono channel.
+    """
+
+    if merge_type not in MERGE_TYPES:
+        raise ParameterError(
+            f'Unsupported merge type {merge_type}, available types are {MERGE_TYPES}'
+        )
+    if y.ndim > 2:
+        raise ParameterError(
+            f'Unsupported audio array,  y.ndim > 2, the shape is {y.shape}')
+    if y.ndim == 1:  # nothing to merge
+        return y
+
+    if merge_type == 'ch0':
+        return y[0]
+    if merge_type == 'ch1':
+        return y[1]
+    if merge_type == 'random':
+        return y[np.random.randint(0, 2)]
+
+    # need to do averaging according to dtype
+
+    if y.dtype == 'float32':
+        y_out = (y[0] + y[1]) * 0.5
+    elif y.dtype == 'int16':
+        y_out = y.astype('int32')
+        y_out = (y_out[0] + y_out[1]) // 2
+        y_out = np.clip(y_out, np.iinfo(y.dtype).min,
+                        np.iinfo(y.dtype).max).astype(y.dtype)
+
+    elif y.dtype == 'int8':
+        y_out = y.astype('int16')
+        y_out = (y_out[0] + y_out[1]) // 2
+        y_out = np.clip(y_out, np.iinfo(y.dtype).min,
+                        np.iinfo(y.dtype).max).astype(y.dtype)
+    else:
+        raise ParameterError(f'Unsupported dtype: {y.dtype}')
+    return y_out
+
+
+def soundfile_load_(file: os.PathLike,
+                    offset: Optional[float]=None,
+                    dtype: str='int16',
+                    duration: Optional[int]=None) -> Tuple[np.ndarray, int]:
+    """Load audio using soundfile library. This function load audio file using libsndfile.
+
+    Args:
+        file (os.PathLike): File of waveform.
+        offset (Optional[float], optional): Offset to the start of waveform. Defaults to None.
+        dtype (str, optional): Data type of waveform. Defaults to 'int16'.
+        duration (Optional[int], optional): Duration of waveform to read. Defaults to None.
+
+    Returns:
+        Tuple[np.ndarray, int]: Waveform in ndarray and its samplerate.
+    """
+    with soundfile.SoundFile(file) as sf_desc:
+        sr_native = sf_desc.samplerate
+        if offset:
+            sf_desc.seek(int(offset * sr_native))
+        if duration is not None:
+            frame_duration = int(duration * sr_native)
+        else:
+            frame_duration = -1
+        y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
+
+    return y, sf_desc.samplerate
+
+
+def normalize(y: np.ndarray, norm_type: str='linear',
+              mul_factor: float=1.0) -> np.ndarray:
+    """Normalize an input audio with additional multiplier.
+
+    Args:
+        y (np.ndarray): Input waveform array in 1D or 2D.
+        norm_type (str, optional): Type of normalization. Defaults to 'linear'.
+        mul_factor (float, optional): Scaling factor. Defaults to 1.0.
+
+    Returns:
+        np.ndarray: `y` after normalization.
+    """
+
+    if norm_type == 'linear':
+        amax = np.max(np.abs(y))
+        factor = 1.0 / (amax + EPS)
+        y = y * factor * mul_factor
+    elif norm_type == 'gaussian':
+        amean = np.mean(y)
+        astd = np.std(y)
+        astd = max(astd, EPS)
+        y = mul_factor * (y - amean) / astd
+    else:
+        raise NotImplementedError(f'norm_type should be in {NORMALMIZE_TYPES}')
+
+    return y
+
+
+def soundfile_save(y: np.ndarray, sr: int, file: os.PathLike) -> None:
+    """Save audio file to disk. This function saves audio to disk using scipy.io.wavfile, with additional step to convert input waveform to int16.
+
+    Args:
+        y (np.ndarray): Input waveform array in 1D or 2D.
+        sr (int): Sample rate.
+        file (os.PathLike): Path of audio file to save.
+    """
+    if not file.endswith('.wav'):
+        raise ParameterError(
+            f'only .wav file supported, but dst file name is: {file}')
+
+    if sr <= 0:
+        raise ParameterError(
+            f'Sample rate should be larger than 0, received sr = {sr}')
+
+    if y.dtype not in ['int16', 'int8']:
+        warnings.warn(
+            f'input data type is {y.dtype}, will convert data to int16 format before saving'
+        )
+        y_out = depth_convert(y, 'int16')
+    else:
+        y_out = y
+
+    wavfile.write(file, sr, y_out)
+
+
+def soundfile_load(
+        file: os.PathLike,
+        sr: Optional[int]=None,
+        mono: bool=True,
+        merge_type: str='average',  # ch0,ch1,random,average
+        normal: bool=True,
+        norm_type: str='linear',
+        norm_mul_factor: float=1.0,
+        offset: float=0.0,
+        duration: Optional[int]=None,
+        dtype: str='float32',
+        resample_mode: str='kaiser_fast') -> Tuple[np.ndarray, int]:
+    """Load audio file from disk. This function loads audio from disk using using audio backend.
+
+    Args:
+        file (os.PathLike): Path of audio file to load.
+        sr (Optional[int], optional): Sample rate of loaded waveform. Defaults to None.
+        mono (bool, optional): Return waveform with mono channel. Defaults to True.
+        merge_type (str, optional): Merge type of multi-channels waveform. Defaults to 'average'.
+        normal (bool, optional): Waveform normalization. Defaults to True.
+        norm_type (str, optional): Type of normalization. Defaults to 'linear'.
+        norm_mul_factor (float, optional): Scaling factor. Defaults to 1.0.
+        offset (float, optional): Offset to the start of waveform. Defaults to 0.0.
+        duration (Optional[int], optional): Duration of waveform to read. Defaults to None.
+        dtype (str, optional): Data type of waveform. Defaults to 'float32'.
+        resample_mode (str, optional): The resampling filter to use. Defaults to 'kaiser_fast'.
+
+    Returns:
+        Tuple[np.ndarray, int]: Waveform in ndarray and its samplerate.
+    """
+
+    y, r = soundfile_load_(file, offset=offset, dtype=dtype, duration=duration)
+
+    if not ((y.ndim == 1 and len(y) > 0) or (y.ndim == 2 and len(y[0]) > 0)):
+        raise ParameterError(f'audio file {file} looks empty')
+
+    if mono:
+        y = to_mono(y, merge_type)
+
+    if sr is not None and sr != r:
+        y = resample(y, r, sr, mode=resample_mode)
+        r = sr
+
+    if normal:
+        y = normalize(y, norm_type, norm_mul_factor)
+    elif dtype in ['int8', 'int16']:
+        # still need to do normalization, before depth conversion
+        y = normalize(y, 'linear', 1.0)
+
+    y = depth_convert(y, dtype)
+    return y, r
+
+
+#The code below is taken from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/soundfile_backend.py, with some modifications.
+
+
+def _get_subtype_for_wav(dtype: paddle.dtype,
+                         encoding: str,
+                         bits_per_sample: int):
+    if not encoding:
+        if not bits_per_sample:
+            subtype = {
+                paddle.uint8: "PCM_U8",
+                paddle.int16: "PCM_16",
+                paddle.int32: "PCM_32",
+                paddle.float32: "FLOAT",
+                paddle.float64: "DOUBLE",
+            }.get(dtype)
+            if not subtype:
+                raise ValueError(f"Unsupported dtype for wav: {dtype}")
+            return subtype
+        if bits_per_sample == 8:
+            return "PCM_U8"
+        return f"PCM_{bits_per_sample}"
+    if encoding == "PCM_S":
+        if not bits_per_sample:
+            return "PCM_32"
+        if bits_per_sample == 8:
+            raise ValueError("wav does not support 8-bit signed PCM encoding.")
+        return f"PCM_{bits_per_sample}"
+    if encoding == "PCM_U":
+        if bits_per_sample in (None, 8):
+            return "PCM_U8"
+        raise ValueError("wav only supports 8-bit unsigned PCM encoding.")
+    if encoding == "PCM_F":
+        if bits_per_sample in (None, 32):
+            return "FLOAT"
+        if bits_per_sample == 64:
+            return "DOUBLE"
+        raise ValueError("wav only supports 32/64-bit float PCM encoding.")
+    if encoding == "ULAW":
+        if bits_per_sample in (None, 8):
+            return "ULAW"
+        raise ValueError("wav only supports 8-bit mu-law encoding.")
+    if encoding == "ALAW":
+        if bits_per_sample in (None, 8):
+            return "ALAW"
+        raise ValueError("wav only supports 8-bit a-law encoding.")
+    raise ValueError(f"wav does not support {encoding}.")
+
+
+def _get_subtype_for_sphere(encoding: str, bits_per_sample: int):
+    if encoding in (None, "PCM_S"):
+        return f"PCM_{bits_per_sample}" if bits_per_sample else "PCM_32"
+    if encoding in ("PCM_U", "PCM_F"):
+        raise ValueError(f"sph does not support {encoding} encoding.")
+    if encoding == "ULAW":
+        if bits_per_sample in (None, 8):
+            return "ULAW"
+        raise ValueError("sph only supports 8-bit for mu-law encoding.")
+    if encoding == "ALAW":
+        return "ALAW"
+    raise ValueError(f"sph does not support {encoding}.")
+
+
+def _get_subtype(dtype: paddle.dtype,
+                 format: str,
+                 encoding: str,
+                 bits_per_sample: int):
+    if format == "wav":
+        return _get_subtype_for_wav(dtype, encoding, bits_per_sample)
+    if format == "flac":
+        if encoding:
+            raise ValueError("flac does not support encoding.")
+        if not bits_per_sample:
+            return "PCM_16"
+        if bits_per_sample > 24:
+            raise ValueError("flac does not support bits_per_sample > 24.")
+        return "PCM_S8" if bits_per_sample == 8 else f"PCM_{bits_per_sample}"
+    if format in ("ogg", "vorbis"):
+        if encoding or bits_per_sample:
+            raise ValueError(
+                "ogg/vorbis does not support encoding/bits_per_sample.")
+        return "VORBIS"
+    if format == "sph":
+        return _get_subtype_for_sphere(encoding, bits_per_sample)
+    if format in ("nis", "nist"):
+        return "PCM_16"
+    raise ValueError(f"Unsupported format: {format}")
+
+
+def save(
+        filepath: str,
+        src: paddle.Tensor,
+        sample_rate: int,
+        channels_first: bool=True,
+        compression: Optional[float]=None,
+        format: Optional[str]=None,
+        encoding: Optional[str]=None,
+        bits_per_sample: Optional[int]=None, ):
+    """Save audio data to file.
+
+    Note:
+        The formats this function can handle depend on the soundfile installation.
+        This function is tested on the following formats;
+
+        * WAV
+
+            * 32-bit floating-point
+            * 32-bit signed integer
+            * 16-bit signed integer
+            * 8-bit unsigned integer
+
+        * FLAC
+        * OGG/VORBIS
+        * SPHERE
+
+    Note:
+        ``filepath`` argument is intentionally annotated as ``str`` only, even though it accepts
+        ``pathlib.Path`` object as well. This is for the consistency with ``"sox_io"`` backend,
+
+    Args:
+        filepath (str or pathlib.Path): Path to audio file.
+        src (paddle.Tensor): Audio data to save. must be 2D tensor.
+        sample_rate (int): sampling rate
+        channels_first (bool, optional): If ``True``, the given tensor is interpreted as `[channel, time]`,
+            otherwise `[time, channel]`.
+        compression (float of None, optional): Not used.
+            It is here only for interface compatibility reason with "sox_io" backend.
+        format (str or None, optional): Override the audio format.
+            When ``filepath`` argument is path-like object, audio format is
+            inferred from file extension. If the file extension is missing or
+            different, you can specify the correct format with this argument.
+
+            When ``filepath`` argument is file-like object,
+            this argument is required.
+
+            Valid values are ``"wav"``, ``"ogg"``, ``"vorbis"``,
+            ``"flac"`` and ``"sph"``.
+        encoding (str or None, optional): Changes the encoding for supported formats.
+            This argument is effective only for supported formats, such as
+            ``"wav"``, ``""flac"`` and ``"sph"``. Valid values are:
+
+                - ``"PCM_S"`` (signed integer Linear PCM)
+                - ``"PCM_U"`` (unsigned integer Linear PCM)
+                - ``"PCM_F"`` (floating point PCM)
+                - ``"ULAW"`` (mu-law)
+                - ``"ALAW"`` (a-law)
+
+        bits_per_sample (int or None, optional): Changes the bit depth for the
+            supported formats.
+            When ``format`` is one of ``"wav"``, ``"flac"`` or ``"sph"``,
+            you can change the bit depth.
+            Valid values are ``8``, ``16``, ``24``, ``32`` and ``64``.
+
+    Supported formats/encodings/bit depth/compression are:
+
+    ``"wav"``
+        - 32-bit floating-point PCM
+        - 32-bit signed integer PCM
+        - 24-bit signed integer PCM
+        - 16-bit signed integer PCM
+        - 8-bit unsigned integer PCM
+        - 8-bit mu-law
+        - 8-bit a-law
+
+        Note:
+            Default encoding/bit depth is determined by the dtype of
+            the input Tensor.
+
+    ``"flac"``
+        - 8-bit
+        - 16-bit (default)
+        - 24-bit
+
+    ``"ogg"``, ``"vorbis"``
+        - Doesn't accept changing configuration.
+
+    ``"sph"``
+        - 8-bit signed integer PCM
+        - 16-bit signed integer PCM
+        - 24-bit signed integer PCM
+        - 32-bit signed integer PCM (default)
+        - 8-bit mu-law
+        - 8-bit a-law
+        - 16-bit a-law
+        - 24-bit a-law
+        - 32-bit a-law
+
+    """
+    if src.ndim != 2:
+        raise ValueError(f"Expected 2D Tensor, got {src.ndim}D.")
+    if compression is not None:
+        warnings.warn(
+            '`save` function of "soundfile" backend does not support "compression" parameter. '
+            "The argument is silently ignored.")
+    if hasattr(filepath, "write"):
+        if format is None:
+            raise RuntimeError(
+                "`format` is required when saving to file object.")
+        ext = format.lower()
+    else:
+        ext = str(filepath).split(".")[-1].lower()
+
+    if bits_per_sample not in (None, 8, 16, 24, 32, 64):
+        raise ValueError("Invalid bits_per_sample.")
+    if bits_per_sample == 24:
+        warnings.warn(
+            "Saving audio with 24 bits per sample might warp samples near -1. "
+            "Using 16 bits per sample might be able to avoid this.")
+    subtype = _get_subtype(src.dtype, ext, encoding, bits_per_sample)
+
+    # sph is a extension used in TED-LIUM but soundfile does not recognize it as NIST format,
+    # so we extend the extensions manually here
+    if ext in ["nis", "nist", "sph"] and format is None:
+        format = "NIST"
+
+    if channels_first:
+        src = src.t()
+
+    soundfile.write(
+        file=filepath,
+        data=src,
+        samplerate=sample_rate,
+        subtype=subtype,
+        format=format)
+
+
+_SUBTYPE2DTYPE = {
+    "PCM_S8": "int8",
+    "PCM_U8": "uint8",
+    "PCM_16": "int16",
+    "PCM_32": "int32",
+    "FLOAT": "float32",
+    "DOUBLE": "float64",
+}
+
+
+def load(
+        filepath: str,
+        frame_offset: int=0,
+        num_frames: int=-1,
+        normalize: bool=True,
+        channels_first: bool=True,
+        format: Optional[str]=None, ) -> Tuple[paddle.Tensor, int]:
+    """Load audio data from file.
+
+    Note:
+        The formats this function can handle depend on the soundfile installation.
+        This function is tested on the following formats;
+
+        * WAV
+
+            * 32-bit floating-point
+            * 32-bit signed integer
+            * 16-bit signed integer
+            * 8-bit unsigned integer
+
+        * FLAC
+        * OGG/VORBIS
+        * SPHERE
+
+    By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with
+    ``float32`` dtype and the shape of `[channel, time]`.
+    The samples are normalized to fit in the range of ``[-1.0, 1.0]``.
+
+    When the input format is WAV with integer type, such as 32-bit signed integer, 16-bit
+    signed integer and 8-bit unsigned integer (24-bit signed integer is not supported),
+    by providing ``normalize=False``, this function can return integer Tensor, where the samples
+    are expressed within the whole range of the corresponding dtype, that is, ``int32`` tensor
+    for 32-bit signed PCM, ``int16`` for 16-bit signed PCM and ``uint8`` for 8-bit unsigned PCM.
+
+    ``normalize`` parameter has no effect on 32-bit floating-point WAV and other formats, such as
+    ``flac`` and ``mp3``.
+    For these formats, this function always returns ``float32`` Tensor with values normalized to
+    ``[-1.0, 1.0]``.
+
+    Note:
+        ``filepath`` argument is intentionally annotated as ``str`` only, even though it accepts
+        ``pathlib.Path`` object as well. This is for the consistency with ``"sox_io"`` backend.
+
+    Args:
+        filepath (path-like object or file-like object):
+            Source of audio data.
+        frame_offset (int, optional):
+            Number of frames to skip before start reading data.
+        num_frames (int, optional):
+            Maximum number of frames to read. ``-1`` reads all the remaining samples,
+            starting from ``frame_offset``.
+            This function may return the less number of frames if there is not enough
+            frames in the given file.
+        normalize (bool, optional):
+            When ``True``, this function always return ``float32``, and sample values are
+            normalized to ``[-1.0, 1.0]``.
+            If input file is integer WAV, giving ``False`` will change the resulting Tensor type to
+            integer type.
+            This argument has no effect for formats other than integer WAV type.
+        channels_first (bool, optional):
+            When True, the returned Tensor has dimension `[channel, time]`.
+            Otherwise, the returned Tensor's dimension is `[time, channel]`.
+        format (str or None, optional):
+            Not used. PySoundFile does not accept format hint.
+
+    Returns:
+        (paddle.Tensor, int): Resulting Tensor and sample rate.
+            If the input file has integer wav format and normalization is off, then it has
+            integer type, else ``float32`` type. If ``channels_first=True``, it has
+            `[channel, time]` else `[time, channel]`.
+    """
+    with soundfile.SoundFile(filepath, "r") as file_:
+        if file_.format != "WAV" or normalize:
+            dtype = "float32"
+        elif file_.subtype not in _SUBTYPE2DTYPE:
+            raise ValueError(f"Unsupported subtype: {file_.subtype}")
+        else:
+            dtype = _SUBTYPE2DTYPE[file_.subtype]
+
+        frames = file_._prepare_read(frame_offset, None, num_frames)
+        waveform = file_.read(frames, dtype, always_2d=True)
+        sample_rate = file_.samplerate
+
+    waveform = paddle.to_tensor(waveform)
+    if channels_first:
+        waveform = paddle.transpose(waveform, perm=[1, 0])
+    return waveform, sample_rate
+
+
+# Mapping from soundfile subtype to number of bits per sample.
+# This is mostly heuristical and the value is set to 0 when it is irrelevant
+# (lossy formats) or when it can't be inferred.
+# For ADPCM (and G72X) subtypes, it's hard to infer the bit depth because it's not part of the standard:
+# According to https://en.wikipedia.org/wiki/Adaptive_differential_pulse-code_modulation#In_telephony,
+# the default seems to be 8 bits but it can be compressed further to 4 bits.
+# The dict is inspired from
+# https://github.com/bastibe/python-soundfile/blob/744efb4b01abc72498a96b09115b42a4cabd85e4/soundfile.py#L66-L94
+_SUBTYPE_TO_BITS_PER_SAMPLE = {
+    "PCM_S8": 8,  # Signed 8 bit data
+    "PCM_16": 16,  # Signed 16 bit data
+    "PCM_24": 24,  # Signed 24 bit data
+    "PCM_32": 32,  # Signed 32 bit data
+    "PCM_U8": 8,  # Unsigned 8 bit data (WAV and RAW only)
+    "FLOAT": 32,  # 32 bit float data
+    "DOUBLE": 64,  # 64 bit float data
+    "ULAW": 8,  # U-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types
+    "ALAW": 8,  # A-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types
+    "IMA_ADPCM": 0,  # IMA ADPCM.
+    "MS_ADPCM": 0,  # Microsoft ADPCM.
+    "GSM610":
+    0,  # GSM 6.10 encoding. (Wikipedia says 1.625 bit depth?? https://en.wikipedia.org/wiki/Full_Rate)
+    "VOX_ADPCM": 0,  # OKI / Dialogix ADPCM
+    "G721_32": 0,  # 32kbs G721 ADPCM encoding.
+    "G723_24": 0,  # 24kbs G723 ADPCM encoding.
+    "G723_40": 0,  # 40kbs G723 ADPCM encoding.
+    "DWVW_12": 12,  # 12 bit Delta Width Variable Word encoding.
+    "DWVW_16": 16,  # 16 bit Delta Width Variable Word encoding.
+    "DWVW_24": 24,  # 24 bit Delta Width Variable Word encoding.
+    "DWVW_N": 0,  # N bit Delta Width Variable Word encoding.
+    "DPCM_8": 8,  # 8 bit differential PCM (XI only)
+    "DPCM_16": 16,  # 16 bit differential PCM (XI only)
+    "VORBIS": 0,  # Xiph Vorbis encoding. (lossy)
+    "ALAC_16": 16,  # Apple Lossless Audio Codec (16 bit).
+    "ALAC_20": 20,  # Apple Lossless Audio Codec (20 bit).
+    "ALAC_24": 24,  # Apple Lossless Audio Codec (24 bit).
+    "ALAC_32": 32,  # Apple Lossless Audio Codec (32 bit).
+}
+
+
+def _get_bit_depth(subtype):
+    if subtype not in _SUBTYPE_TO_BITS_PER_SAMPLE:
+        warnings.warn(
+            f"The {subtype} subtype is unknown to PaddleAudio. As a result, the bits_per_sample "
+            "attribute will be set to 0. If you are seeing this warning, please "
+            "report by opening an issue on github (after checking for existing/closed ones). "
+            "You may otherwise ignore this warning.")
+    return _SUBTYPE_TO_BITS_PER_SAMPLE.get(subtype, 0)
+
+
+_SUBTYPE_TO_ENCODING = {
+    "PCM_S8": "PCM_S",
+    "PCM_16": "PCM_S",
+    "PCM_24": "PCM_S",
+    "PCM_32": "PCM_S",
+    "PCM_U8": "PCM_U",
+    "FLOAT": "PCM_F",
+    "DOUBLE": "PCM_F",
+    "ULAW": "ULAW",
+    "ALAW": "ALAW",
+    "VORBIS": "VORBIS",
+}
+
+
+def _get_encoding(format: str, subtype: str):
+    if format == "FLAC":
+        return "FLAC"
+    return _SUBTYPE_TO_ENCODING.get(subtype, "UNKNOWN")
+
+
+def info(filepath: str, format: Optional[str]=None) -> AudioInfo:
+    """Get signal information of an audio file.
+
+    Note:
+        ``filepath`` argument is intentionally annotated as ``str`` only, even though it accepts
+        ``pathlib.Path`` object as well. This is for the consistency with ``"sox_io"`` backend,
+
+    Args:
+        filepath (path-like object or file-like object):
+            Source of audio data.
+        format (str or None, optional):
+            Not used. PySoundFile does not accept format hint.
+
+    Returns:
+        AudioInfo: meta data of the given audio.
+
+    """
+    sinfo = soundfile.info(filepath)
+    return AudioInfo(
+        sinfo.samplerate,
+        sinfo.frames,
+        sinfo.channels,
+        bits_per_sample=_get_bit_depth(sinfo.subtype),
+        encoding=_get_encoding(sinfo.format, sinfo.subtype), )
diff --git a/paddlespeech/audio/compliance/__init__.py b/paddlespeech/audio/compliance/__init__.py
new file mode 100644
index 000000000..c08f9ab11
--- /dev/null
+++ b/paddlespeech/audio/compliance/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from . import kaldi
+from . import librosa
diff --git a/paddlespeech/audio/compliance/kaldi.py b/paddlespeech/audio/compliance/kaldi.py
new file mode 100644
index 000000000..a94ec4053
--- /dev/null
+++ b/paddlespeech/audio/compliance/kaldi.py
@@ -0,0 +1,643 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Modified from torchaudio(https://github.com/pytorch/audio)
+import math
+from typing import Tuple
+
+import paddle
+from paddle import Tensor
+
+from ..functional import create_dct
+from ..functional.window import get_window
+
+__all__ = [
+    'spectrogram',
+    'fbank',
+    'mfcc',
+]
+
+# window types
+HANNING = 'hann'
+HAMMING = 'hamming'
+POVEY = 'povey'
+RECTANGULAR = 'rect'
+BLACKMAN = 'blackman'
+
+
+def _get_epsilon(dtype):
+    return paddle.to_tensor(1e-07, dtype=dtype)
+
+
+def _next_power_of_2(x: int) -> int:
+    return 1 if x == 0 else 2**(x - 1).bit_length()
+
+
+def _get_strided(waveform: Tensor,
+                 window_size: int,
+                 window_shift: int,
+                 snip_edges: bool) -> Tensor:
+    assert waveform.dim() == 1
+    num_samples = waveform.shape[0]
+
+    if snip_edges:
+        if num_samples < window_size:
+            return paddle.empty((0, 0), dtype=waveform.dtype)
+        else:
+            m = 1 + (num_samples - window_size) // window_shift
+    else:
+        reversed_waveform = paddle.flip(waveform, [0])
+        m = (num_samples + (window_shift // 2)) // window_shift
+        pad = window_size // 2 - window_shift // 2
+        pad_right = reversed_waveform
+        if pad > 0:
+            pad_left = reversed_waveform[-pad:]
+            waveform = paddle.concat((pad_left, waveform, pad_right), axis=0)
+        else:
+            waveform = paddle.concat((waveform[-pad:], pad_right), axis=0)
+
+    return paddle.signal.frame(waveform, window_size, window_shift)[:, :m].T
+
+
+def _feature_window_function(
+        window_type: str,
+        window_size: int,
+        blackman_coeff: float,
+        dtype: int, ) -> Tensor:
+    if window_type == "hann":
+        return get_window('hann', window_size, fftbins=False, dtype=dtype)
+    elif window_type == "hamming":
+        return get_window('hamming', window_size, fftbins=False, dtype=dtype)
+    elif window_type == "povey":
+        return get_window(
+            'hann', window_size, fftbins=False, dtype=dtype).pow(0.85)
+    elif window_type == "rect":
+        return paddle.ones([window_size], dtype=dtype)
+    elif window_type == "blackman":
+        a = 2 * math.pi / (window_size - 1)
+        window_function = paddle.arange(window_size, dtype=dtype)
+        return (blackman_coeff - 0.5 * paddle.cos(a * window_function) +
+                (0.5 - blackman_coeff) * paddle.cos(2 * a * window_function)
+                ).astype(dtype)
+    else:
+        raise Exception('Invalid window type ' + window_type)
+
+
+def _get_log_energy(strided_input: Tensor, epsilon: Tensor,
+                    energy_floor: float) -> Tensor:
+    log_energy = paddle.maximum(strided_input.pow(2).sum(1), epsilon).log()
+    if energy_floor == 0.0:
+        return log_energy
+    return paddle.maximum(
+        log_energy,
+        paddle.to_tensor(math.log(energy_floor), dtype=strided_input.dtype))
+
+
+def _get_waveform_and_window_properties(
+        waveform: Tensor,
+        channel: int,
+        sr: int,
+        frame_shift: float,
+        frame_length: float,
+        round_to_power_of_two: bool,
+        preemphasis_coefficient: float) -> Tuple[Tensor, int, int, int]:
+    channel = max(channel, 0)
+    assert channel < waveform.shape[0], (
+        'Invalid channel {} for size {}'.format(channel, waveform.shape[0]))
+    waveform = waveform[channel, :]  # size (n)
+    window_shift = int(
+        sr * frame_shift *
+        0.001)  # pass frame_shift and frame_length in milliseconds
+    window_size = int(sr * frame_length * 0.001)
+    padded_window_size = _next_power_of_2(
+        window_size) if round_to_power_of_two else window_size
+
+    assert 2 <= window_size <= len(waveform), (
+        'choose a window size {} that is [2, {}]'.format(window_size,
+                                                         len(waveform)))
+    assert 0 < window_shift, '`window_shift` must be greater than 0'
+    assert padded_window_size % 2 == 0, 'the padded `window_size` must be divisible by two.' \
+                                        ' use `round_to_power_of_two` or change `frame_length`'
+    assert 0. <= preemphasis_coefficient <= 1.0, '`preemphasis_coefficient` must be between [0,1]'
+    assert sr > 0, '`sr` must be greater than zero'
+    return waveform, window_shift, window_size, padded_window_size
+
+
+def _get_window(waveform: Tensor,
+                padded_window_size: int,
+                window_size: int,
+                window_shift: int,
+                window_type: str,
+                blackman_coeff: float,
+                snip_edges: bool,
+                raw_energy: bool,
+                energy_floor: float,
+                dither: float,
+                remove_dc_offset: bool,
+                preemphasis_coefficient: float) -> Tuple[Tensor, Tensor]:
+    dtype = waveform.dtype
+    epsilon = _get_epsilon(dtype)
+
+    # (m, window_size)
+    strided_input = _get_strided(waveform, window_size, window_shift,
+                                 snip_edges)
+
+    if dither != 0.0:
+        x = paddle.maximum(epsilon,
+                           paddle.rand(strided_input.shape, dtype=dtype))
+        rand_gauss = paddle.sqrt(-2 * x.log()) * paddle.cos(2 * math.pi * x)
+        strided_input = strided_input + rand_gauss * dither
+
+    if remove_dc_offset:
+        row_means = paddle.mean(strided_input, axis=1).unsqueeze(1)  # (m, 1)
+        strided_input = strided_input - row_means
+
+    if raw_energy:
+        signal_log_energy = _get_log_energy(strided_input, epsilon,
+                                            energy_floor)  # (m)
+
+    if preemphasis_coefficient != 0.0:
+        offset_strided_input = paddle.nn.functional.pad(
+            strided_input.unsqueeze(0), (1, 0),
+            data_format='NCL',
+            mode='replicate').squeeze(0)  # (m, window_size + 1)
+        strided_input = strided_input - preemphasis_coefficient * offset_strided_input[:, :
+                                                                                       -1]
+
+    window_function = _feature_window_function(
+        window_type, window_size, blackman_coeff,
+        dtype).unsqueeze(0)  # (1, window_size)
+    strided_input = strided_input * window_function  # (m, window_size)
+
+    # (m, padded_window_size)
+    if padded_window_size != window_size:
+        padding_right = padded_window_size - window_size
+        strided_input = paddle.nn.functional.pad(
+            strided_input.unsqueeze(0), (0, padding_right),
+            data_format='NCL',
+            mode='constant',
+            value=0).squeeze(0)
+
+    if not raw_energy:
+        signal_log_energy = _get_log_energy(strided_input, epsilon,
+                                            energy_floor)  # size (m)
+
+    return strided_input, signal_log_energy
+
+
+def _subtract_column_mean(tensor: Tensor, subtract_mean: bool) -> Tensor:
+    if subtract_mean:
+        col_means = paddle.mean(tensor, axis=0).unsqueeze(0)
+        tensor = tensor - col_means
+    return tensor
+
+
+def spectrogram(waveform: Tensor,
+                blackman_coeff: float=0.42,
+                channel: int=-1,
+                dither: float=0.0,
+                energy_floor: float=1.0,
+                frame_length: float=25.0,
+                frame_shift: float=10.0,
+                preemphasis_coefficient: float=0.97,
+                raw_energy: bool=True,
+                remove_dc_offset: bool=True,
+                round_to_power_of_two: bool=True,
+                sr: int=16000,
+                snip_edges: bool=True,
+                subtract_mean: bool=False,
+                window_type: str="povey") -> Tensor:
+    """Compute and return a spectrogram from a waveform. The output is identical to Kaldi's.
+
+    Args:
+        waveform (Tensor): A waveform tensor with shape `(C, T)`.
+        blackman_coeff (float, optional): Coefficient for Blackman window.. Defaults to 0.42.
+        channel (int, optional): Select the channel of waveform. Defaults to -1.
+        dither (float, optional): Dithering constant . Defaults to 0.0.
+        energy_floor (float, optional): Floor on energy of the output Spectrogram. Defaults to 1.0.
+        frame_length (float, optional): Frame length in milliseconds. Defaults to 25.0.
+        frame_shift (float, optional): Shift between adjacent frames in milliseconds. Defaults to 10.0.
+        preemphasis_coefficient (float, optional): Preemphasis coefficient for input waveform. Defaults to 0.97.
+        raw_energy (bool, optional): Whether to compute before preemphasis and windowing. Defaults to True.
+        remove_dc_offset (bool, optional): Whether to subtract mean from waveform on frames. Defaults to True.
+        round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
+            to FFT. Defaults to True.
+        sr (int, optional): Sample rate of input waveform. Defaults to 16000.
+        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it
+            is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
+        subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
+        window_type (str, optional): Choose type of window for FFT computation. Defaults to "povey".
+
+    Returns:
+        Tensor: A spectrogram tensor with shape `(m, padded_window_size // 2 + 1)` where m is the number of frames
+            depends on frame_length and frame_shift.
+    """
+    dtype = waveform.dtype
+    epsilon = _get_epsilon(dtype)
+
+    waveform, window_shift, window_size, padded_window_size = _get_waveform_and_window_properties(
+        waveform, channel, sr, frame_shift, frame_length, round_to_power_of_two,
+        preemphasis_coefficient)
+
+    strided_input, signal_log_energy = _get_window(
+        waveform, padded_window_size, window_size, window_shift, window_type,
+        blackman_coeff, snip_edges, raw_energy, energy_floor, dither,
+        remove_dc_offset, preemphasis_coefficient)
+
+    # (m, padded_window_size // 2 + 1, 2)
+    fft = paddle.fft.rfft(strided_input)
+
+    power_spectrum = paddle.maximum(
+        fft.abs().pow(2.), epsilon).log()  # (m, padded_window_size // 2 + 1)
+    power_spectrum[:, 0] = signal_log_energy
+
+    power_spectrum = _subtract_column_mean(power_spectrum, subtract_mean)
+    return power_spectrum
+
+
+def _inverse_mel_scale_scalar(mel_freq: float) -> float:
+    return 700.0 * (math.exp(mel_freq / 1127.0) - 1.0)
+
+
+def _inverse_mel_scale(mel_freq: Tensor) -> Tensor:
+    return 700.0 * ((mel_freq / 1127.0).exp() - 1.0)
+
+
+def _mel_scale_scalar(freq: float) -> float:
+    return 1127.0 * math.log(1.0 + freq / 700.0)
+
+
+def _mel_scale(freq: Tensor) -> Tensor:
+    return 1127.0 * (1.0 + freq / 700.0).log()
+
+
+def _vtln_warp_freq(vtln_low_cutoff: float,
+                    vtln_high_cutoff: float,
+                    low_freq: float,
+                    high_freq: float,
+                    vtln_warp_factor: float,
+                    freq: Tensor) -> Tensor:
+    assert vtln_low_cutoff > low_freq, 'be sure to set the vtln_low option higher than low_freq'
+    assert vtln_high_cutoff < high_freq, 'be sure to set the vtln_high option lower than high_freq [or negative]'
+    l = vtln_low_cutoff * max(1.0, vtln_warp_factor)
+    h = vtln_high_cutoff * min(1.0, vtln_warp_factor)
+    scale = 1.0 / vtln_warp_factor
+    Fl = scale * l
+    Fh = scale * h
+    assert l > low_freq and h < high_freq
+    scale_left = (Fl - low_freq) / (l - low_freq)
+    scale_right = (high_freq - Fh) / (high_freq - h)
+    res = paddle.empty_like(freq)
+
+    outside_low_high_freq = paddle.less_than(freq, paddle.to_tensor(low_freq)) \
+        | paddle.greater_than(freq, paddle.to_tensor(high_freq))
+    before_l = paddle.less_than(freq, paddle.to_tensor(l))
+    before_h = paddle.less_than(freq, paddle.to_tensor(h))
+    after_h = paddle.greater_equal(freq, paddle.to_tensor(h))
+
+    res[after_h] = high_freq + scale_right * (freq[after_h] - high_freq)
+    res[before_h] = scale * freq[before_h]
+    res[before_l] = low_freq + scale_left * (freq[before_l] - low_freq)
+    res[outside_low_high_freq] = freq[outside_low_high_freq]
+
+    return res
+
+
+def _vtln_warp_mel_freq(vtln_low_cutoff: float,
+                        vtln_high_cutoff: float,
+                        low_freq,
+                        high_freq: float,
+                        vtln_warp_factor: float,
+                        mel_freq: Tensor) -> Tensor:
+    return _mel_scale(
+        _vtln_warp_freq(vtln_low_cutoff, vtln_high_cutoff, low_freq, high_freq,
+                        vtln_warp_factor, _inverse_mel_scale(mel_freq)))
+
+
+def _get_mel_banks(num_bins: int,
+                   window_length_padded: int,
+                   sample_freq: float,
+                   low_freq: float,
+                   high_freq: float,
+                   vtln_low: float,
+                   vtln_high: float,
+                   vtln_warp_factor: float) -> Tuple[Tensor, Tensor]:
+    assert num_bins > 3, 'Must have at least 3 mel bins'
+    assert window_length_padded % 2 == 0
+    num_fft_bins = window_length_padded / 2
+    nyquist = 0.5 * sample_freq
+
+    if high_freq <= 0.0:
+        high_freq += nyquist
+
+    assert (0.0 <= low_freq < nyquist) and (0.0 < high_freq <= nyquist) and (low_freq < high_freq), \
+        ('Bad values in options: low-freq {} and high-freq {} vs. nyquist {}'.format(low_freq, high_freq, nyquist))
+
+    fft_bin_width = sample_freq / window_length_padded
+    mel_low_freq = _mel_scale_scalar(low_freq)
+    mel_high_freq = _mel_scale_scalar(high_freq)
+
+    mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1)
+
+    if vtln_high < 0.0:
+        vtln_high += nyquist
+
+    assert vtln_warp_factor == 1.0 or ((low_freq < vtln_low < high_freq) and
+                                       (0.0 < vtln_high < high_freq) and (vtln_low < vtln_high)), \
+        ('Bad values in options: vtln-low {} and vtln-high {}, versus '
+         'low-freq {} and high-freq {}'.format(vtln_low, vtln_high, low_freq, high_freq))
+
+    bin = paddle.arange(num_bins, dtype=paddle.float32).unsqueeze(1)
+    # left_mel = mel_low_freq + bin * mel_freq_delta  # (num_bins, 1)
+    # center_mel = mel_low_freq + (bin + 1.0) * mel_freq_delta  # (num_bins, 1)
+    # right_mel = mel_low_freq + (bin + 2.0) * mel_freq_delta  # (num_bins, 1)
+    left_mel = mel_low_freq + bin * mel_freq_delta  # (num_bins, 1)
+    center_mel = left_mel + mel_freq_delta
+    right_mel = center_mel + mel_freq_delta
+
+    if vtln_warp_factor != 1.0:
+        left_mel = _vtln_warp_mel_freq(vtln_low, vtln_high, low_freq, high_freq,
+                                       vtln_warp_factor, left_mel)
+        center_mel = _vtln_warp_mel_freq(vtln_low, vtln_high, low_freq,
+                                         high_freq, vtln_warp_factor,
+                                         center_mel)
+        right_mel = _vtln_warp_mel_freq(vtln_low, vtln_high, low_freq,
+                                        high_freq, vtln_warp_factor, right_mel)
+
+    center_freqs = _inverse_mel_scale(center_mel)  # (num_bins)
+    # (1, num_fft_bins)
+    mel = _mel_scale(fft_bin_width * paddle.arange(
+        num_fft_bins, dtype=paddle.float32)).unsqueeze(0)
+
+    # (num_bins, num_fft_bins)
+    up_slope = (mel - left_mel) / (center_mel - left_mel)
+    down_slope = (right_mel - mel) / (right_mel - center_mel)
+
+    if vtln_warp_factor == 1.0:
+        bins = paddle.maximum(
+            paddle.zeros([1]), paddle.minimum(up_slope, down_slope))
+    else:
+        bins = paddle.zeros_like(up_slope)
+        up_idx = paddle.greater_than(mel, left_mel) & paddle.less_than(
+            mel, center_mel)
+        down_idx = paddle.greater_than(mel, center_mel) & paddle.less_than(
+            mel, right_mel)
+        bins[up_idx] = up_slope[up_idx]
+        bins[down_idx] = down_slope[down_idx]
+
+    return bins, center_freqs
+
+
+def fbank(waveform: Tensor,
+          blackman_coeff: float=0.42,
+          channel: int=-1,
+          dither: float=0.0,
+          energy_floor: float=1.0,
+          frame_length: float=25.0,
+          frame_shift: float=10.0,
+          high_freq: float=0.0,
+          htk_compat: bool=False,
+          low_freq: float=20.0,
+          n_mels: int=23,
+          preemphasis_coefficient: float=0.97,
+          raw_energy: bool=True,
+          remove_dc_offset: bool=True,
+          round_to_power_of_two: bool=True,
+          sr: int=16000,
+          snip_edges: bool=True,
+          subtract_mean: bool=False,
+          use_energy: bool=False,
+          use_log_fbank: bool=True,
+          use_power: bool=True,
+          vtln_high: float=-500.0,
+          vtln_low: float=100.0,
+          vtln_warp: float=1.0,
+          window_type: str="povey") -> Tensor:
+    """Compute and return filter banks from a waveform. The output is identical to Kaldi's.
+
+    Args:
+        waveform (Tensor): A waveform tensor with shape `(C, T)`. `C` is in the range [0,1]. 
+        blackman_coeff (float, optional): Coefficient for Blackman window.. Defaults to 0.42.
+        channel (int, optional): Select the channel of waveform. Defaults to -1.
+        dither (float, optional): Dithering constant . Defaults to 0.0.
+        energy_floor (float, optional): Floor on energy of the output Spectrogram. Defaults to 1.0.
+        frame_length (float, optional): Frame length in milliseconds. Defaults to 25.0.
+        frame_shift (float, optional): Shift between adjacent frames in milliseconds. Defaults to 10.0.
+        high_freq (float, optional): The upper cut-off frequency. Defaults to 0.0.
+        htk_compat (bool, optional): Put energy to the last when it is set True. Defaults to False.
+        low_freq (float, optional): The lower cut-off frequency. Defaults to 20.0.
+        n_mels (int, optional): Number of output mel bins. Defaults to 23.
+        preemphasis_coefficient (float, optional): Preemphasis coefficient for input waveform. Defaults to 0.97.
+        raw_energy (bool, optional): Whether to compute before preemphasis and windowing. Defaults to True.
+        remove_dc_offset (bool, optional): Whether to subtract mean from waveform on frames. Defaults to True.
+        round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
+            to FFT. Defaults to True.
+        sr (int, optional): Sample rate of input waveform. Defaults to 16000.
+        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it
+            is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
+        subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
+        use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False.
+        use_log_fbank (bool, optional): Return log fbank when it is set True. Defaults to True.
+        use_power (bool, optional): Whether to use power instead of magnitude. Defaults to True.
+        vtln_high (float, optional): High inflection point in piecewise linear VTLN warping function. Defaults to -500.0.
+        vtln_low (float, optional): Low inflection point in piecewise linear VTLN warping function. Defaults to 100.0.
+        vtln_warp (float, optional): Vtln warp factor. Defaults to 1.0.
+        window_type (str, optional): Choose type of window for FFT computation. Defaults to "povey".
+
+    Returns:
+        Tensor: A filter banks tensor with shape `(m, n_mels)`.
+    """
+    dtype = waveform.dtype
+
+    waveform, window_shift, window_size, padded_window_size = _get_waveform_and_window_properties(
+        waveform, channel, sr, frame_shift, frame_length, round_to_power_of_two,
+        preemphasis_coefficient)
+
+    strided_input, signal_log_energy = _get_window(
+        waveform, padded_window_size, window_size, window_shift, window_type,
+        blackman_coeff, snip_edges, raw_energy, energy_floor, dither,
+        remove_dc_offset, preemphasis_coefficient)
+
+    # (m, padded_window_size // 2 + 1)
+    spectrum = paddle.fft.rfft(strided_input).abs()
+    if use_power:
+        spectrum = spectrum.pow(2.)
+
+    # (n_mels, padded_window_size // 2)
+    mel_energies, _ = _get_mel_banks(n_mels, padded_window_size, sr, low_freq,
+                                     high_freq, vtln_low, vtln_high, vtln_warp)
+    # mel_energies = mel_energies.astype(dtype)
+    assert mel_energies.dtype == dtype
+
+    # (n_mels, padded_window_size // 2 + 1)
+    mel_energies = paddle.nn.functional.pad(
+        mel_energies.unsqueeze(0), (0, 1),
+        data_format='NCL',
+        mode='constant',
+        value=0).squeeze(0)
+
+    # (m, n_mels)
+    mel_energies = paddle.mm(spectrum, mel_energies.T)
+    if use_log_fbank:
+        mel_energies = paddle.maximum(mel_energies, _get_epsilon(dtype)).log()
+
+    if use_energy:
+        signal_log_energy = signal_log_energy.unsqueeze(1)
+        if htk_compat:
+            mel_energies = paddle.concat(
+                (mel_energies, signal_log_energy), axis=1)
+        else:
+            mel_energies = paddle.concat(
+                (signal_log_energy, mel_energies), axis=1)
+
+    # (m, n_mels + 1)
+    mel_energies = _subtract_column_mean(mel_energies, subtract_mean)
+    return mel_energies
+
+
+def _get_dct_matrix(n_mfcc: int, n_mels: int) -> Tensor:
+    dct_matrix = create_dct(n_mels, n_mels, 'ortho')
+    dct_matrix[:, 0] = math.sqrt(1 / float(n_mels))
+    dct_matrix = dct_matrix[:, :n_mfcc]  # (n_mels, n_mfcc)
+    return dct_matrix
+
+
+def _get_lifter_coeffs(n_mfcc: int, cepstral_lifter: float) -> Tensor:
+    i = paddle.arange(n_mfcc)
+    return 1.0 + 0.5 * cepstral_lifter * paddle.sin(math.pi * i /
+                                                    cepstral_lifter)
+
+
+def mfcc(waveform: Tensor,
+         blackman_coeff: float=0.42,
+         cepstral_lifter: float=22.0,
+         channel: int=-1,
+         dither: float=0.0,
+         energy_floor: float=1.0,
+         frame_length: float=25.0,
+         frame_shift: float=10.0,
+         high_freq: float=0.0,
+         htk_compat: bool=False,
+         low_freq: float=20.0,
+         n_mfcc: int=13,
+         n_mels: int=23,
+         preemphasis_coefficient: float=0.97,
+         raw_energy: bool=True,
+         remove_dc_offset: bool=True,
+         round_to_power_of_two: bool=True,
+         sr: int=16000,
+         snip_edges: bool=True,
+         subtract_mean: bool=False,
+         use_energy: bool=False,
+         vtln_high: float=-500.0,
+         vtln_low: float=100.0,
+         vtln_warp: float=1.0,
+         window_type: str="povey") -> Tensor:
+    """Compute and return mel frequency cepstral coefficients from a waveform. The output is
+            identical to Kaldi's.
+
+    Args:
+        waveform (Tensor): A waveform tensor with shape `(C, T)`.
+        blackman_coeff (float, optional): Coefficient for Blackman window.. Defaults to 0.42.
+        cepstral_lifter (float, optional): Scaling of output mfccs. Defaults to 22.0.
+        channel (int, optional): Select the channel of waveform. Defaults to -1.
+        dither (float, optional): Dithering constant . Defaults to 0.0.
+        energy_floor (float, optional): Floor on energy of the output Spectrogram. Defaults to 1.0.
+        frame_length (float, optional): Frame length in milliseconds. Defaults to 25.0.
+        frame_shift (float, optional): Shift between adjacent frames in milliseconds. Defaults to 10.0.
+        high_freq (float, optional): The upper cut-off frequency. Defaults to 0.0.
+        htk_compat (bool, optional): Put energy to the last when it is set True. Defaults to False.
+        low_freq (float, optional): The lower cut-off frequency. Defaults to 20.0.
+        n_mfcc (int, optional): Number of cepstra in MFCC. Defaults to 13.
+        n_mels (int, optional): Number of output mel bins. Defaults to 23.
+        preemphasis_coefficient (float, optional): Preemphasis coefficient for input waveform. Defaults to 0.97.
+        raw_energy (bool, optional): Whether to compute before preemphasis and windowing. Defaults to True.
+        remove_dc_offset (bool, optional): Whether to subtract mean from waveform on frames. Defaults to True.
+        round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
+            to FFT. Defaults to True.
+        sr (int, optional): Sample rate of input waveform. Defaults to 16000.
+        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it
+            is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
+        subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
+        use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False.
+        vtln_high (float, optional): High inflection point in piecewise linear VTLN warping function. Defaults to -500.0.
+        vtln_low (float, optional): Low inflection point in piecewise linear VTLN warping function. Defaults to 100.0.
+        vtln_warp (float, optional): Vtln warp factor. Defaults to 1.0.
+        window_type (str, optional): Choose type of window for FFT computation. Defaults to POVEY.
+
+    Returns:
+        Tensor: A mel frequency cepstral coefficients tensor with shape `(m, n_mfcc)`.
+    """
+    assert n_mfcc <= n_mels, 'n_mfcc cannot be larger than n_mels: %d vs %d' % (
+        n_mfcc, n_mels)
+
+    dtype = waveform.dtype
+
+    # (m, n_mels + use_energy)
+    feature = fbank(
+        waveform=waveform,
+        blackman_coeff=blackman_coeff,
+        channel=channel,
+        dither=dither,
+        energy_floor=energy_floor,
+        frame_length=frame_length,
+        frame_shift=frame_shift,
+        high_freq=high_freq,
+        htk_compat=htk_compat,
+        low_freq=low_freq,
+        n_mels=n_mels,
+        preemphasis_coefficient=preemphasis_coefficient,
+        raw_energy=raw_energy,
+        remove_dc_offset=remove_dc_offset,
+        round_to_power_of_two=round_to_power_of_two,
+        sr=sr,
+        snip_edges=snip_edges,
+        subtract_mean=False,
+        use_energy=use_energy,
+        use_log_fbank=True,
+        use_power=True,
+        vtln_high=vtln_high,
+        vtln_low=vtln_low,
+        vtln_warp=vtln_warp,
+        window_type=window_type)
+
+    if use_energy:
+        # (m)
+        signal_log_energy = feature[:, n_mels if htk_compat else 0]
+        mel_offset = int(not htk_compat)
+        feature = feature[:, mel_offset:(n_mels + mel_offset)]
+
+    # (n_mels, n_mfcc)
+    dct_matrix = _get_dct_matrix(n_mfcc, n_mels).astype(dtype=dtype)
+
+    # (m, n_mfcc)
+    feature = feature.matmul(dct_matrix)
+
+    if cepstral_lifter != 0.0:
+        # (1, n_mfcc)
+        lifter_coeffs = _get_lifter_coeffs(n_mfcc, cepstral_lifter).unsqueeze(0)
+        feature *= lifter_coeffs.astype(dtype=dtype)
+
+    if use_energy:
+        feature[:, 0] = signal_log_energy
+
+    if htk_compat:
+        energy = feature[:, 0].unsqueeze(1)  # (m, 1)
+        feature = feature[:, 1:]  # (m, n_mfcc - 1)
+        if not use_energy:
+            energy *= math.sqrt(2)
+
+        feature = paddle.concat((feature, energy), axis=1)
+
+    feature = _subtract_column_mean(feature, subtract_mean)
+    return feature
diff --git a/paddlespeech/audio/compliance/librosa.py b/paddlespeech/audio/compliance/librosa.py
new file mode 100644
index 000000000..c671d4fb8
--- /dev/null
+++ b/paddlespeech/audio/compliance/librosa.py
@@ -0,0 +1,788 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Modified from librosa(https://github.com/librosa/librosa)
+import warnings
+from typing import List
+from typing import Optional
+from typing import Union
+
+import numpy as np
+import scipy
+from numpy.lib.stride_tricks import as_strided
+from scipy import signal
+
+from ..utils import depth_convert
+from ..utils import ParameterError
+
+__all__ = [
+    # dsp
+    'stft',
+    'mfcc',
+    'hz_to_mel',
+    'mel_to_hz',
+    'mel_frequencies',
+    'power_to_db',
+    'compute_fbank_matrix',
+    'melspectrogram',
+    'spectrogram',
+    'mu_encode',
+    'mu_decode',
+    # augmentation
+    'depth_augment',
+    'spect_augment',
+    'random_crop1d',
+    'random_crop2d',
+    'adaptive_spect_augment',
+]
+
+
+def _pad_center(data: np.ndarray, size: int, axis: int=-1,
+                **kwargs) -> np.ndarray:
+    """Pad an array to a target length along a target axis.
+
+    This differs from `np.pad` by centering the data prior to padding,
+    analogous to `str.center`
+    """
+
+    kwargs.setdefault("mode", "constant")
+    n = data.shape[axis]
+    lpad = int((size - n) // 2)
+    lengths = [(0, 0)] * data.ndim
+    lengths[axis] = (lpad, int(size - n - lpad))
+
+    if lpad < 0:
+        raise ParameterError(("Target size ({size:d}) must be "
+                              "at least input size ({n:d})"))
+
+    return np.pad(data, lengths, **kwargs)
+
+
+def _split_frames(x: np.ndarray,
+                  frame_length: int,
+                  hop_length: int,
+                  axis: int=-1) -> np.ndarray:
+    """Slice a data array into (overlapping) frames.
+
+    This function is aligned with librosa.frame
+    """
+
+    if not isinstance(x, np.ndarray):
+        raise ParameterError(
+            f"Input must be of type numpy.ndarray, given type(x)={type(x)}")
+
+    if x.shape[axis] < frame_length:
+        raise ParameterError(f"Input is too short (n={x.shape[axis]:d})"
+                             f" for frame_length={frame_length:d}")
+
+    if hop_length < 1:
+        raise ParameterError(f"Invalid hop_length: {hop_length:d}")
+
+    if axis == -1 and not x.flags["F_CONTIGUOUS"]:
+        warnings.warn(f"librosa.util.frame called with axis={axis} "
+                      "on a non-contiguous input. This will result in a copy.")
+        x = np.asfortranarray(x)
+    elif axis == 0 and not x.flags["C_CONTIGUOUS"]:
+        warnings.warn(f"librosa.util.frame called with axis={axis} "
+                      "on a non-contiguous input. This will result in a copy.")
+        x = np.ascontiguousarray(x)
+
+    n_frames = 1 + (x.shape[axis] - frame_length) // hop_length
+    strides = np.asarray(x.strides)
+
+    new_stride = np.prod(strides[strides > 0] // x.itemsize) * x.itemsize
+
+    if axis == -1:
+        shape = list(x.shape)[:-1] + [frame_length, n_frames]
+        strides = list(strides) + [hop_length * new_stride]
+
+    elif axis == 0:
+        shape = [n_frames, frame_length] + list(x.shape)[1:]
+        strides = [hop_length * new_stride] + list(strides)
+
+    else:
+        raise ParameterError(f"Frame axis={axis} must be either 0 or -1")
+
+    return as_strided(x, shape=shape, strides=strides)
+
+
+def _check_audio(y, mono=True) -> bool:
+    """Determine whether a variable contains valid audio data.
+
+    The audio y must be a np.ndarray, ether 1-channel or two channel
+    """
+    if not isinstance(y, np.ndarray):
+        raise ParameterError("Audio data must be of type numpy.ndarray")
+    if y.ndim > 2:
+        raise ParameterError(
+            f"Invalid shape for audio ndim={y.ndim:d}, shape={y.shape}")
+
+    if mono and y.ndim == 2:
+        raise ParameterError(
+            f"Invalid shape for mono audio ndim={y.ndim:d}, shape={y.shape}")
+
+    if (mono and len(y) == 0) or (not mono and y.shape[1] < 0):
+        raise ParameterError(f"Audio is empty ndim={y.ndim:d}, shape={y.shape}")
+
+    if not np.issubdtype(y.dtype, np.floating):
+        raise ParameterError("Audio data must be floating-point")
+
+    if not np.isfinite(y).all():
+        raise ParameterError("Audio buffer is not finite everywhere")
+
+    return True
+
+
+def hz_to_mel(frequencies: Union[float, List[float], np.ndarray],
+              htk: bool=False) -> np.ndarray:
+    """Convert Hz to Mels.
+
+    Args:
+        frequencies (Union[float, List[float], np.ndarray]): Frequencies in Hz.
+        htk (bool, optional): Use htk scaling. Defaults to False.
+
+    Returns:
+        np.ndarray: Frequency in mels.
+    """
+    freq = np.asanyarray(frequencies)
+
+    if htk:
+        return 2595.0 * np.log10(1.0 + freq / 700.0)
+
+    # Fill in the linear part
+    f_min = 0.0
+    f_sp = 200.0 / 3
+
+    mels = (freq - f_min) / f_sp
+
+    # Fill in the log-scale part
+
+    min_log_hz = 1000.0  # beginning of log region (Hz)
+    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
+    logstep = np.log(6.4) / 27.0  # step size for log region
+
+    if freq.ndim:
+        # If we have array data, vectorize
+        log_t = freq >= min_log_hz
+        mels[log_t] = min_log_mel + \
+            np.log(freq[log_t] / min_log_hz) / logstep
+    elif freq >= min_log_hz:
+        # If we have scalar data, heck directly
+        mels = min_log_mel + np.log(freq / min_log_hz) / logstep
+
+    return mels
+
+
+def mel_to_hz(mels: Union[float, List[float], np.ndarray],
+              htk: int=False) -> np.ndarray:
+    """Convert mel bin numbers to frequencies.
+
+    Args:
+        mels (Union[float, List[float], np.ndarray]): Frequency in mels.
+        htk (bool, optional): Use htk scaling. Defaults to False.
+
+    Returns:
+        np.ndarray: Frequencies in Hz.
+    """
+    mel_array = np.asanyarray(mels)
+
+    if htk:
+        return 700.0 * (10.0**(mel_array / 2595.0) - 1.0)
+
+    # Fill in the linear scale
+    f_min = 0.0
+    f_sp = 200.0 / 3
+    freqs = f_min + f_sp * mel_array
+
+    # And now the nonlinear scale
+    min_log_hz = 1000.0  # beginning of log region (Hz)
+    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
+    logstep = np.log(6.4) / 27.0  # step size for log region
+
+    if mel_array.ndim:
+        # If we have vector data, vectorize
+        log_t = mel_array >= min_log_mel
+        freqs[log_t] = min_log_hz * \
+            np.exp(logstep * (mel_array[log_t] - min_log_mel))
+    elif mel_array >= min_log_mel:
+        # If we have scalar data, check directly
+        freqs = min_log_hz * np.exp(logstep * (mel_array - min_log_mel))
+
+    return freqs
+
+
+def mel_frequencies(n_mels: int=128,
+                    fmin: float=0.0,
+                    fmax: float=11025.0,
+                    htk: bool=False) -> np.ndarray:
+    """Compute mel frequencies.
+
+    Args:
+        n_mels (int, optional): Number of mel bins. Defaults to 128.
+        fmin (float, optional): Minimum frequency in Hz. Defaults to 0.0.
+        fmax (float, optional): Maximum frequency in Hz. Defaults to 11025.0.
+        htk (bool, optional): Use htk scaling. Defaults to False.
+
+    Returns:
+        np.ndarray: Vector of n_mels frequencies in Hz with shape `(n_mels,)`.
+    """
+    # 'Center freqs' of mel bands - uniformly spaced between limits
+    min_mel = hz_to_mel(fmin, htk=htk)
+    max_mel = hz_to_mel(fmax, htk=htk)
+
+    mels = np.linspace(min_mel, max_mel, n_mels)
+
+    return mel_to_hz(mels, htk=htk)
+
+
+def fft_frequencies(sr: int, n_fft: int) -> np.ndarray:
+    """Compute fourier frequencies.
+
+    Args:
+        sr (int): Sample rate.
+        n_fft (int): FFT size.
+
+    Returns:
+        np.ndarray: FFT frequencies in Hz with shape `(n_fft//2 + 1,)`.
+    """
+    return np.linspace(0, float(sr) / 2, int(1 + n_fft // 2), endpoint=True)
+
+
+def compute_fbank_matrix(sr: int,
+                         n_fft: int,
+                         n_mels: int=128,
+                         fmin: float=0.0,
+                         fmax: Optional[float]=None,
+                         htk: bool=False,
+                         norm: str="slaney",
+                         dtype: type=np.float32) -> np.ndarray:
+    """Compute fbank matrix.
+
+    Args:
+        sr (int): Sample rate.
+        n_fft (int): FFT size.
+        n_mels (int, optional): Number of mel bins. Defaults to 128.
+        fmin (float, optional): Minimum frequency in Hz. Defaults to 0.0.
+        fmax (Optional[float], optional): Maximum frequency in Hz. Defaults to None.
+        htk (bool, optional): Use htk scaling. Defaults to False.
+        norm (str, optional): Type of normalization. Defaults to "slaney".
+        dtype (type, optional): Data type. Defaults to np.float32.
+
+
+    Returns:
+        np.ndarray: Mel transform matrix with shape `(n_mels, n_fft//2 + 1)`.
+    """
+    if norm != "slaney":
+        raise ParameterError('norm must set to slaney')
+
+    if fmax is None:
+        fmax = float(sr) / 2
+
+    # Initialize the weights
+    n_mels = int(n_mels)
+    weights = np.zeros((n_mels, int(1 + n_fft // 2)), dtype=dtype)
+
+    # Center freqs of each FFT bin
+    fftfreqs = fft_frequencies(sr=sr, n_fft=n_fft)
+
+    # 'Center freqs' of mel bands - uniformly spaced between limits
+    mel_f = mel_frequencies(n_mels + 2, fmin=fmin, fmax=fmax, htk=htk)
+
+    fdiff = np.diff(mel_f)
+    ramps = np.subtract.outer(mel_f, fftfreqs)
+
+    for i in range(n_mels):
+        # lower and upper slopes for all bins
+        lower = -ramps[i] / fdiff[i]
+        upper = ramps[i + 2] / fdiff[i + 1]
+
+        # .. then intersect them with each other and zero
+        weights[i] = np.maximum(0, np.minimum(lower, upper))
+
+    if norm == "slaney":
+        # Slaney-style mel is scaled to be approx constant energy per channel
+        enorm = 2.0 / (mel_f[2:n_mels + 2] - mel_f[:n_mels])
+        weights *= enorm[:, np.newaxis]
+
+    # Only check weights if f_mel[0] is positive
+    if not np.all((mel_f[:-2] == 0) | (weights.max(axis=1) > 0)):
+        # This means we have an empty channel somewhere
+        warnings.warn("Empty filters detected in mel frequency basis. "
+                      "Some channels will produce empty responses. "
+                      "Try increasing your sampling rate (and fmax) or "
+                      "reducing n_mels.")
+
+    return weights
+
+
+def stft(x: np.ndarray,
+         n_fft: int=2048,
+         hop_length: Optional[int]=None,
+         win_length: Optional[int]=None,
+         window: str="hann",
+         center: bool=True,
+         dtype: type=np.complex64,
+         pad_mode: str="reflect") -> np.ndarray:
+    """Short-time Fourier transform (STFT).
+
+    Args:
+        x (np.ndarray): Input waveform in one dimension.
+        n_fft (int, optional): FFT size. Defaults to 2048.
+        hop_length (Optional[int], optional): Number of steps to advance between adjacent windows. Defaults to None.
+        win_length (Optional[int], optional): The size of window. Defaults to None.
+        window (str, optional): A string of window specification. Defaults to "hann".
+        center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
+        dtype (type, optional): Data type of STFT results. Defaults to np.complex64.
+        pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to "reflect".
+
+    Returns:
+        np.ndarray: The complex STFT output with shape `(n_fft//2 + 1, num_frames)`.
+    """
+    _check_audio(x)
+
+    # By default, use the entire frame
+    if win_length is None:
+        win_length = n_fft
+
+    # Set the default hop, if it's not already specified
+    if hop_length is None:
+        hop_length = int(win_length // 4)
+
+    fft_window = signal.get_window(window, win_length, fftbins=True)
+
+    # Pad the window out to n_fft size
+    fft_window = _pad_center(fft_window, n_fft)
+
+    # Reshape so that the window can be broadcast
+    fft_window = fft_window.reshape((-1, 1))
+
+    # Pad the time series so that frames are centered
+    if center:
+        if n_fft > x.shape[-1]:
+            warnings.warn(
+                f"n_fft={n_fft} is too small for input signal of length={x.shape[-1]}"
+            )
+        x = np.pad(x, int(n_fft // 2), mode=pad_mode)
+
+    elif n_fft > x.shape[-1]:
+        raise ParameterError(
+            f"n_fft={n_fft} is too small for input signal of length={x.shape[-1]}"
+        )
+
+    # Window the time series.
+    x_frames = _split_frames(x, frame_length=n_fft, hop_length=hop_length)
+    # Pre-allocate the STFT matrix
+    stft_matrix = np.empty(
+        (int(1 + n_fft // 2), x_frames.shape[1]), dtype=dtype, order="F")
+    fft = np.fft  # use numpy fft as default
+    # Constrain STFT block sizes to 256 KB
+    MAX_MEM_BLOCK = 2**8 * 2**10
+    # how many columns can we fit within MAX_MEM_BLOCK?
+    n_columns = MAX_MEM_BLOCK // (stft_matrix.shape[0] * stft_matrix.itemsize)
+    n_columns = max(n_columns, 1)
+
+    for bl_s in range(0, stft_matrix.shape[1], n_columns):
+        bl_t = min(bl_s + n_columns, stft_matrix.shape[1])
+        stft_matrix[:, bl_s:bl_t] = fft.rfft(
+            fft_window * x_frames[:, bl_s:bl_t], axis=0)
+
+    return stft_matrix
+
+
+def power_to_db(spect: np.ndarray,
+                ref: float=1.0,
+                amin: float=1e-10,
+                top_db: Optional[float]=80.0) -> np.ndarray:
+    """Convert a power spectrogram (amplitude squared) to decibel (dB) units. The function computes the scaling `10 * log10(x / ref)` in a numerically stable way.
+
+    Args:
+        spect (np.ndarray): STFT power spectrogram of an input waveform.
+        ref (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0.
+        amin (float, optional): Minimum threshold. Defaults to 1e-10.
+        top_db (Optional[float], optional): Threshold the output at `top_db` below the peak. Defaults to 80.0.
+
+    Returns:
+        np.ndarray: Power spectrogram in db scale.
+    """
+    spect = np.asarray(spect)
+
+    if amin <= 0:
+        raise ParameterError("amin must be strictly positive")
+
+    if np.issubdtype(spect.dtype, np.complexfloating):
+        warnings.warn(
+            "power_to_db was called on complex input so phase "
+            "information will be discarded. To suppress this warning, "
+            "call power_to_db(np.abs(D)**2) instead.")
+        magnitude = np.abs(spect)
+    else:
+        magnitude = spect
+
+    if callable(ref):
+        # User supplied a function to calculate reference power
+        ref_value = ref(magnitude)
+    else:
+        ref_value = np.abs(ref)
+
+    log_spec = 10.0 * np.log10(np.maximum(amin, magnitude))
+    log_spec -= 10.0 * np.log10(np.maximum(amin, ref_value))
+
+    if top_db is not None:
+        if top_db < 0:
+            raise ParameterError("top_db must be non-negative")
+        log_spec = np.maximum(log_spec, log_spec.max() - top_db)
+
+    return log_spec
+
+
+def mfcc(x: np.ndarray,
+         sr: int=16000,
+         spect: Optional[np.ndarray]=None,
+         n_mfcc: int=20,
+         dct_type: int=2,
+         norm: str="ortho",
+         lifter: int=0,
+         **kwargs) -> np.ndarray:
+    """Mel-frequency cepstral coefficients (MFCCs)
+
+    Args:
+        x (np.ndarray): Input waveform in one dimension.
+        sr (int, optional): Sample rate. Defaults to 16000.
+        spect (Optional[np.ndarray], optional): Input log-power Mel spectrogram. Defaults to None.
+        n_mfcc (int, optional): Number of cepstra in MFCC. Defaults to 20.
+        dct_type (int, optional): Discrete cosine transform (DCT) type. Defaults to 2.
+        norm (str, optional): Type of normalization. Defaults to "ortho".
+        lifter (int, optional): Cepstral filtering. Defaults to 0.
+
+    Returns:
+        np.ndarray: Mel frequency cepstral coefficients array with shape `(n_mfcc, num_frames)`.
+    """
+    if spect is None:
+        spect = melspectrogram(x, sr=sr, **kwargs)
+
+    M = scipy.fftpack.dct(spect, axis=0, type=dct_type, norm=norm)[:n_mfcc]
+
+    if lifter > 0:
+        factor = np.sin(np.pi * np.arange(1, 1 + n_mfcc, dtype=M.dtype) /
+                        lifter)
+        return M * factor[:, np.newaxis]
+    elif lifter == 0:
+        return M
+    else:
+        raise ParameterError(
+            f"MFCC lifter={lifter} must be a non-negative number")
+
+
+def melspectrogram(x: np.ndarray,
+                   sr: int=16000,
+                   window_size: int=512,
+                   hop_length: int=320,
+                   n_mels: int=64,
+                   fmin: float=50.0,
+                   fmax: Optional[float]=None,
+                   window: str='hann',
+                   center: bool=True,
+                   pad_mode: str='reflect',
+                   power: float=2.0,
+                   to_db: bool=True,
+                   ref: float=1.0,
+                   amin: float=1e-10,
+                   top_db: Optional[float]=None) -> np.ndarray:
+    """Compute mel-spectrogram.
+
+    Args:
+        x (np.ndarray): Input waveform in one dimension.
+        sr (int, optional): Sample rate. Defaults to 16000.
+        window_size (int, optional): Size of FFT and window length. Defaults to 512.
+        hop_length (int, optional): Number of steps to advance between adjacent windows. Defaults to 320.
+        n_mels (int, optional): Number of mel bins. Defaults to 64.
+        fmin (float, optional): Minimum frequency in Hz. Defaults to 50.0.
+        fmax (Optional[float], optional): Maximum frequency in Hz. Defaults to None.
+        window (str, optional): A string of window specification. Defaults to "hann".
+        center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
+        pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to "reflect".
+        power (float, optional): Exponent for the magnitude melspectrogram. Defaults to 2.0.
+        to_db (bool, optional): Enable db scale. Defaults to True.
+        ref (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0.
+        amin (float, optional): Minimum threshold. Defaults to 1e-10.
+        top_db (Optional[float], optional): Threshold the output at `top_db` below the peak. Defaults to None.
+
+    Returns:
+        np.ndarray: The mel-spectrogram in power scale or db scale with shape `(n_mels, num_frames)`.
+    """
+    _check_audio(x, mono=True)
+    if len(x) <= 0:
+        raise ParameterError('The input waveform is empty')
+
+    if fmax is None:
+        fmax = sr // 2
+    if fmin < 0 or fmin >= fmax:
+        raise ParameterError('fmin and fmax must statisfy 0<fmin<fmax')
+
+    s = stft(
+        x,
+        n_fft=window_size,
+        hop_length=hop_length,
+        win_length=window_size,
+        window=window,
+        center=center,
+        pad_mode=pad_mode)
+
+    spect_power = np.abs(s)**power
+    fb_matrix = compute_fbank_matrix(
+        sr=sr, n_fft=window_size, n_mels=n_mels, fmin=fmin, fmax=fmax)
+    mel_spect = np.matmul(fb_matrix, spect_power)
+    if to_db:
+        return power_to_db(mel_spect, ref=ref, amin=amin, top_db=top_db)
+    else:
+        return mel_spect
+
+
+def spectrogram(x: np.ndarray,
+                sr: int=16000,
+                window_size: int=512,
+                hop_length: int=320,
+                window: str='hann',
+                center: bool=True,
+                pad_mode: str='reflect',
+                power: float=2.0) -> np.ndarray:
+    """Compute spectrogram.
+
+    Args:
+        x (np.ndarray): Input waveform in one dimension.
+        sr (int, optional): Sample rate. Defaults to 16000.
+        window_size (int, optional): Size of FFT and window length. Defaults to 512.
+        hop_length (int, optional): Number of steps to advance between adjacent windows. Defaults to 320.
+        window (str, optional): A string of window specification. Defaults to "hann".
+        center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
+        pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to "reflect".
+        power (float, optional): Exponent for the magnitude melspectrogram. Defaults to 2.0.
+
+    Returns:
+        np.ndarray: The STFT spectrogram in power scale `(n_fft//2 + 1, num_frames)`.
+    """
+
+    s = stft(
+        x,
+        n_fft=window_size,
+        hop_length=hop_length,
+        win_length=window_size,
+        window=window,
+        center=center,
+        pad_mode=pad_mode)
+
+    return np.abs(s)**power
+
+
+def mu_encode(x: np.ndarray, mu: int=255, quantized: bool=True) -> np.ndarray:
+    """Mu-law encoding. Encode waveform based on mu-law companding. When quantized is True, the result will be converted to integer in range `[0,mu-1]`. Otherwise, the resulting waveform is in range `[-1,1]`.
+
+    Args:
+        x (np.ndarray): The input waveform to encode.
+        mu (int, optional): The endoceding parameter. Defaults to 255.
+        quantized (bool, optional): If `True`, quantize the encoded values into `1 + mu` distinct integer values. Defaults to True.
+
+    Returns:
+        np.ndarray: The mu-law encoded waveform.
+    """
+    mu = 255
+    y = np.sign(x) * np.log1p(mu * np.abs(x)) / np.log1p(mu)
+    if quantized:
+        y = np.floor((y + 1) / 2 * mu + 0.5)  # convert to [0 , mu-1]
+    return y
+
+
+def mu_decode(y: np.ndarray, mu: int=255, quantized: bool=True) -> np.ndarray:
+    """Mu-law decoding. Compute the mu-law decoding given an input code. It assumes that the input `y` is in range `[0,mu-1]` when quantize is True and `[-1,1]` otherwise.
+
+    Args:
+        y (np.ndarray): The encoded waveform.
+        mu (int, optional): The endoceding parameter. Defaults to 255.
+        quantized (bool, optional): If `True`, the input is assumed to be quantized to `1 + mu` distinct integer values. Defaults to True.
+
+    Returns:
+        np.ndarray: The mu-law decoded waveform.
+    """
+    if mu < 1:
+        raise ParameterError('mu is typically set as 2**k-1, k=1, 2, 3,...')
+
+    mu = mu - 1
+    if quantized:  # undo the quantization
+        y = y * 2 / mu - 1
+    x = np.sign(y) / mu * ((1 + mu)**np.abs(y) - 1)
+    return x
+
+
+def _randint(high: int) -> int:
+    """Generate one random integer in range [0 high)
+
+     This is a helper function for random data augmentation
+    """
+    return int(np.random.randint(0, high=high))
+
+
+def depth_augment(y: np.ndarray,
+                  choices: List=['int8', 'int16'],
+                  probs: List[float]=[0.5, 0.5]) -> np.ndarray:
+    """ Audio depth augmentation. Do audio depth augmentation to simulate the distortion brought by quantization.
+
+    Args:
+        y (np.ndarray): Input waveform array in 1D or 2D.
+        choices (List, optional): A list of data type to depth conversion. Defaults to ['int8', 'int16'].
+        probs (List[float], optional): Probabilities to depth conversion. Defaults to [0.5, 0.5].
+
+    Returns:
+        np.ndarray: The augmented waveform.
+    """
+    assert len(probs) == len(
+        choices
+    ), 'number of choices {} must be equal to size of probs {}'.format(
+        len(choices), len(probs))
+    depth = np.random.choice(choices, p=probs)
+    src_depth = y.dtype
+    y1 = depth_convert(y, depth)
+    y2 = depth_convert(y1, src_depth)
+
+    return y2
+
+
+def adaptive_spect_augment(spect: np.ndarray,
+                           tempo_axis: int=0,
+                           level: float=0.1) -> np.ndarray:
+    """Do adaptive spectrogram augmentation. The level of the augmentation is govern by the parameter level, ranging from 0 to 1, with 0 represents no augmentation.
+
+    Args:
+        spect (np.ndarray): Input spectrogram.
+        tempo_axis (int, optional): Indicate the tempo axis. Defaults to 0.
+        level (float, optional): The level factor of masking. Defaults to 0.1.
+
+    Returns:
+        np.ndarray: The augmented spectrogram.
+    """
+    assert spect.ndim == 2., 'only supports 2d tensor or numpy array'
+    if tempo_axis == 0:
+        nt, nf = spect.shape
+    else:
+        nf, nt = spect.shape
+
+    time_mask_width = int(nt * level * 0.5)
+    freq_mask_width = int(nf * level * 0.5)
+
+    num_time_mask = int(10 * level)
+    num_freq_mask = int(10 * level)
+
+    if tempo_axis == 0:
+        for _ in range(num_time_mask):
+            start = _randint(nt - time_mask_width)
+            spect[start:start + time_mask_width, :] = 0
+        for _ in range(num_freq_mask):
+            start = _randint(nf - freq_mask_width)
+            spect[:, start:start + freq_mask_width] = 0
+    else:
+        for _ in range(num_time_mask):
+            start = _randint(nt - time_mask_width)
+            spect[:, start:start + time_mask_width] = 0
+        for _ in range(num_freq_mask):
+            start = _randint(nf - freq_mask_width)
+            spect[start:start + freq_mask_width, :] = 0
+
+    return spect
+
+
+def spect_augment(spect: np.ndarray,
+                  tempo_axis: int=0,
+                  max_time_mask: int=3,
+                  max_freq_mask: int=3,
+                  max_time_mask_width: int=30,
+                  max_freq_mask_width: int=20) -> np.ndarray:
+    """Do spectrogram augmentation in both time and freq axis.
+
+    Args:
+        spect (np.ndarray): Input spectrogram.
+        tempo_axis (int, optional): Indicate the tempo axis. Defaults to 0.
+        max_time_mask (int, optional): Maximum number of time masking. Defaults to 3.
+        max_freq_mask (int, optional): Maximum number of frequency masking. Defaults to 3.
+        max_time_mask_width (int, optional): Maximum width of time masking. Defaults to 30.
+        max_freq_mask_width (int, optional): Maximum width of frequency masking. Defaults to 20.
+
+    Returns:
+        np.ndarray: The augmented spectrogram.
+    """
+    assert spect.ndim == 2., 'only supports 2d tensor or numpy array'
+    if tempo_axis == 0:
+        nt, nf = spect.shape
+    else:
+        nf, nt = spect.shape
+
+    num_time_mask = _randint(max_time_mask)
+    num_freq_mask = _randint(max_freq_mask)
+
+    time_mask_width = _randint(max_time_mask_width)
+    freq_mask_width = _randint(max_freq_mask_width)
+
+    if tempo_axis == 0:
+        for _ in range(num_time_mask):
+            start = _randint(nt - time_mask_width)
+            spect[start:start + time_mask_width, :] = 0
+        for _ in range(num_freq_mask):
+            start = _randint(nf - freq_mask_width)
+            spect[:, start:start + freq_mask_width] = 0
+    else:
+        for _ in range(num_time_mask):
+            start = _randint(nt - time_mask_width)
+            spect[:, start:start + time_mask_width] = 0
+        for _ in range(num_freq_mask):
+            start = _randint(nf - freq_mask_width)
+            spect[start:start + freq_mask_width, :] = 0
+
+    return spect
+
+
+def random_crop1d(y: np.ndarray, crop_len: int) -> np.ndarray:
+    """ Random cropping on a input waveform.
+
+    Args:
+        y (np.ndarray): Input waveform array in 1D.
+        crop_len (int): Length of waveform to crop.
+
+    Returns:
+        np.ndarray: The cropped waveform.
+    """
+    if y.ndim != 1:
+        'only accept 1d tensor or numpy array'
+    n = len(y)
+    idx = _randint(n - crop_len)
+    return y[idx:idx + crop_len]
+
+
+def random_crop2d(s: np.ndarray, crop_len: int,
+                  tempo_axis: int=0) -> np.ndarray:
+    """ Random cropping on a spectrogram.
+
+    Args:
+        s (np.ndarray): Input spectrogram in 2D.
+        crop_len (int): Length of spectrogram to crop.
+        tempo_axis (int, optional): Indicate the tempo axis. Defaults to 0.
+
+    Returns:
+        np.ndarray: The cropped spectrogram.
+    """
+    if tempo_axis >= s.ndim:
+        raise ParameterError('axis out of range')
+
+    n = s.shape[tempo_axis]
+    idx = _randint(high=n - crop_len)
+    sli = [slice(None) for i in range(s.ndim)]
+    sli[tempo_axis] = slice(idx, idx + crop_len)
+    out = s[tuple(sli)]
+    return out
diff --git a/paddlespeech/audio/datasets/__init__.py b/paddlespeech/audio/datasets/__init__.py
new file mode 100644
index 000000000..8068fa9d3
--- /dev/null
+++ b/paddlespeech/audio/datasets/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .esc50 import ESC50
+from .voxceleb import VoxCeleb
diff --git a/paddlespeech/audio/datasets/dataset.py b/paddlespeech/audio/datasets/dataset.py
new file mode 100644
index 000000000..170e91669
--- /dev/null
+++ b/paddlespeech/audio/datasets/dataset.py
@@ -0,0 +1,100 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import List
+
+import numpy as np
+import paddle
+
+from ..backends.soundfile_backend import soundfile_load as load_audio
+from ..compliance.kaldi import fbank as kaldi_fbank
+from ..compliance.kaldi import mfcc as kaldi_mfcc
+from ..compliance.librosa import melspectrogram
+from ..compliance.librosa import mfcc
+
+feat_funcs = {
+    'raw': None,
+    'melspectrogram': melspectrogram,
+    'mfcc': mfcc,
+    'kaldi_fbank': kaldi_fbank,
+    'kaldi_mfcc': kaldi_mfcc,
+}
+
+
+class AudioClassificationDataset(paddle.io.Dataset):
+    """
+    Base class of audio classification dataset.
+    """
+
+    def __init__(self,
+                 files: List[str],
+                 labels: List[int],
+                 feat_type: str='raw',
+                 sample_rate: int=None,
+                 **kwargs):
+        """
+        Ags:
+            files (:obj:`List[str]`): A list of absolute path of audio files.
+            labels (:obj:`List[int]`): Labels of audio files.
+            feat_type (:obj:`str`, `optional`, defaults to `raw`):
+                It identifies the feature type that user wants to extract of an audio file.
+        """
+        super(AudioClassificationDataset, self).__init__()
+
+        if feat_type not in feat_funcs.keys():
+            raise RuntimeError(
+                f"Unknown feat_type: {feat_type}, it must be one in {list(feat_funcs.keys())}"
+            )
+
+        self.files = files
+        self.labels = labels
+
+        self.feat_type = feat_type
+        self.sample_rate = sample_rate
+        self.feat_config = kwargs  # Pass keyword arguments to customize feature config
+
+    def _get_data(self, input_file: str):
+        raise NotImplementedError
+
+    def _convert_to_record(self, idx):
+        file, label = self.files[idx], self.labels[idx]
+
+        if self.sample_rate is None:
+            waveform, sample_rate = load_audio(file)
+        else:
+            waveform, sample_rate = load_audio(file, sr=self.sample_rate)
+
+        feat_func = feat_funcs[self.feat_type]
+
+        record = {}
+        if self.feat_type in ['kaldi_fbank', 'kaldi_mfcc']:
+            waveform = paddle.to_tensor(waveform).unsqueeze(0)  # (C, T)
+            record['feat'] = feat_func(
+                waveform=waveform, sr=self.sample_rate, **self.feat_config)
+        else:
+            record['feat'] = feat_func(
+                waveform, sample_rate,
+                **self.feat_config) if feat_func else waveform
+        record['label'] = label
+        return record
+
+    def __getitem__(self, idx):
+        record = self._convert_to_record(idx)
+        if self.feat_type in ['kaldi_fbank', 'kaldi_mfcc']:
+            return self.keys[idx], record['feat'], record['label']
+        else:
+            return np.array(record['feat']).transpose(), np.array(
+                record['label'], dtype=np.int64)
+
+    def __len__(self):
+        return len(self.files)
diff --git a/paddlespeech/audio/datasets/esc50.py b/paddlespeech/audio/datasets/esc50.py
new file mode 100644
index 000000000..684a8b8f5
--- /dev/null
+++ b/paddlespeech/audio/datasets/esc50.py
@@ -0,0 +1,152 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import collections
+import os
+from typing import List
+from typing import Tuple
+
+from ...utils.env import DATA_HOME
+from ..utils.download import download_and_decompress
+from .dataset import AudioClassificationDataset
+
+__all__ = ['ESC50']
+
+
+class ESC50(AudioClassificationDataset):
+    """
+    The ESC-50 dataset is a labeled collection of 2000 environmental audio recordings
+    suitable for benchmarking methods of environmental sound classification. The dataset
+    consists of 5-second-long recordings organized into 50 semantical classes (with
+    40 examples per class)
+
+    Reference:
+        ESC: Dataset for Environmental Sound Classification
+        http://dx.doi.org/10.1145/2733373.2806390
+    """
+
+    archieves = [
+        {
+            'url':
+            'https://paddleaudio.bj.bcebos.com/datasets/ESC-50-master.zip',
+            'md5': '7771e4b9d86d0945acce719c7a59305a',
+        },
+    ]
+    label_list = [
+        # Animals
+        'Dog',
+        'Rooster',
+        'Pig',
+        'Cow',
+        'Frog',
+        'Cat',
+        'Hen',
+        'Insects (flying)',
+        'Sheep',
+        'Crow',
+        # Natural soundscapes & water sounds
+        'Rain',
+        'Sea waves',
+        'Crackling fire',
+        'Crickets',
+        'Chirping birds',
+        'Water drops',
+        'Wind',
+        'Pouring water',
+        'Toilet flush',
+        'Thunderstorm',
+        # Human, non-speech sounds
+        'Crying baby',
+        'Sneezing',
+        'Clapping',
+        'Breathing',
+        'Coughing',
+        'Footsteps',
+        'Laughing',
+        'Brushing teeth',
+        'Snoring',
+        'Drinking, sipping',
+        # Interior/domestic sounds
+        'Door knock',
+        'Mouse click',
+        'Keyboard typing',
+        'Door, wood creaks',
+        'Can opening',
+        'Washing machine',
+        'Vacuum cleaner',
+        'Clock alarm',
+        'Clock tick',
+        'Glass breaking',
+        # Exterior/urban noises
+        'Helicopter',
+        'Chainsaw',
+        'Siren',
+        'Car horn',
+        'Engine',
+        'Train',
+        'Church bells',
+        'Airplane',
+        'Fireworks',
+        'Hand saw',
+    ]
+    meta = os.path.join('ESC-50-master', 'meta', 'esc50.csv')
+    meta_info = collections.namedtuple(
+        'META_INFO',
+        ('filename', 'fold', 'target', 'category', 'esc10', 'src_file', 'take'))
+    audio_path = os.path.join('ESC-50-master', 'audio')
+
+    def __init__(self,
+                 mode: str='train',
+                 split: int=1,
+                 feat_type: str='raw',
+                 **kwargs):
+        """
+        Ags:
+            mode (:obj:`str`, `optional`, defaults to `train`):
+                It identifies the dataset mode (train or dev).
+            split (:obj:`int`, `optional`, defaults to 1):
+                It specify the fold of dev dataset.
+            feat_type (:obj:`str`, `optional`, defaults to `raw`):
+                It identifies the feature type that user wants to extract of an audio file.
+        """
+        files, labels = self._get_data(mode, split)
+        super(ESC50, self).__init__(
+            files=files, labels=labels, feat_type=feat_type, **kwargs)
+
+    def _get_meta_info(self) -> List[collections.namedtuple]:
+        ret = []
+        with open(os.path.join(DATA_HOME, self.meta), 'r') as rf:
+            for line in rf.readlines()[1:]:
+                ret.append(self.meta_info(*line.strip().split(',')))
+        return ret
+
+    def _get_data(self, mode: str, split: int) -> Tuple[List[str], List[int]]:
+        if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \
+            not os.path.isfile(os.path.join(DATA_HOME, self.meta)):
+            download_and_decompress(self.archieves, DATA_HOME)
+
+        meta_info = self._get_meta_info()
+
+        files = []
+        labels = []
+        for sample in meta_info:
+            filename, fold, target, _, _, _, _ = sample
+            if mode == 'train' and int(fold) != split:
+                files.append(os.path.join(DATA_HOME, self.audio_path, filename))
+                labels.append(int(target))
+
+            if mode != 'train' and int(fold) == split:
+                files.append(os.path.join(DATA_HOME, self.audio_path, filename))
+                labels.append(int(target))
+
+        return files, labels
diff --git a/paddlespeech/audio/datasets/voxceleb.py b/paddlespeech/audio/datasets/voxceleb.py
new file mode 100644
index 000000000..4daa6bf6f
--- /dev/null
+++ b/paddlespeech/audio/datasets/voxceleb.py
@@ -0,0 +1,356 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import collections
+import csv
+import glob
+import os
+import random
+from multiprocessing import cpu_count
+from typing import List
+
+from paddle.io import Dataset
+from pathos.multiprocessing import Pool
+from tqdm import tqdm
+
+from ...utils.env import DATA_HOME
+from ..backends.soundfile_backend import soundfile_load as load_audio
+from ..utils.download import decompress
+from ..utils.download import download_and_decompress
+from .dataset import feat_funcs
+
+__all__ = ['VoxCeleb']
+
+
+class VoxCeleb(Dataset):
+    source_url = 'https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/'
+    archieves_audio_dev = [
+        {
+            'url': source_url + 'vox1_dev_wav_partaa',
+            'md5': 'e395d020928bc15670b570a21695ed96',
+        },
+        {
+            'url': source_url + 'vox1_dev_wav_partab',
+            'md5': 'bbfaaccefab65d82b21903e81a8a8020',
+        },
+        {
+            'url': source_url + 'vox1_dev_wav_partac',
+            'md5': '017d579a2a96a077f40042ec33e51512',
+        },
+        {
+            'url': source_url + 'vox1_dev_wav_partad',
+            'md5': '7bb1e9f70fddc7a678fa998ea8b3ba19',
+        },
+    ]
+    archieves_audio_test = [
+        {
+            'url': source_url + 'vox1_test_wav.zip',
+            'md5': '185fdc63c3c739954633d50379a3d102',
+        },
+    ]
+    archieves_meta = [
+        {
+            'url':
+            'https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test2.txt',
+            'md5':
+            'b73110731c9223c1461fe49cb48dddfc',
+        },
+    ]
+
+    num_speakers = 1211  # 1211 vox1, 5994 vox2, 7205 vox1+2, test speakers: 41
+    sample_rate = 16000
+    meta_info = collections.namedtuple(
+        'META_INFO', ('id', 'duration', 'wav', 'start', 'stop', 'spk_id'))
+    base_path = os.path.join(DATA_HOME, 'vox1')
+    wav_path = os.path.join(base_path, 'wav')
+    meta_path = os.path.join(base_path, 'meta')
+    veri_test_file = os.path.join(meta_path, 'veri_test2.txt')
+    csv_path = os.path.join(base_path, 'csv')
+    subsets = ['train', 'dev', 'enroll', 'test']
+
+    def __init__(
+            self,
+            subset: str='train',
+            feat_type: str='raw',
+            random_chunk: bool=True,
+            chunk_duration: float=3.0,  # seconds
+            split_ratio: float=0.9,  # train split ratio
+            seed: int=0,
+            target_dir: str=None,
+            vox2_base_path=None,
+            **kwargs):
+        """VoxCeleb data prepare and get the specific dataset audio info
+
+        Args:
+            subset (str, optional): dataset name, such as train, dev, enroll or test. Defaults to 'train'.
+            feat_type (str, optional): feat type, such raw, melspectrogram(fbank) or mfcc . Defaults to 'raw'.
+            random_chunk (bool, optional): random select a duration from audio. Defaults to True.
+            chunk_duration (float, optional): chunk duration if random_chunk flag is set. Defaults to 3.0.
+            target_dir (str, optional): data dir, audio info will be stored in this directory. Defaults to None.
+            vox2_base_path (_type_, optional): vox2 directory. vox2 data must be converted from m4a to wav. Defaults to None.
+        """
+        assert subset in self.subsets, \
+            'Dataset subset must be one in {}, but got {}'.format(self.subsets, subset)
+
+        self.subset = subset
+        self.spk_id2label = {}
+        self.feat_type = feat_type
+        self.feat_config = kwargs
+        self.random_chunk = random_chunk
+        self.chunk_duration = chunk_duration
+        self.split_ratio = split_ratio
+        self.target_dir = target_dir if target_dir else VoxCeleb.base_path
+        self.vox2_base_path = vox2_base_path
+
+        # if we set the target dir, we will change the vox data info data from base path to target dir
+        VoxCeleb.csv_path = os.path.join(
+            target_dir, "voxceleb", 'csv') if target_dir else VoxCeleb.csv_path
+        VoxCeleb.meta_path = os.path.join(
+            target_dir, "voxceleb",
+            'meta') if target_dir else VoxCeleb.meta_path
+        VoxCeleb.veri_test_file = os.path.join(VoxCeleb.meta_path,
+                                               'veri_test2.txt')
+        # self._data = self._get_data()[:1000]  # KP: Small dataset test.
+        self._data = self._get_data()
+        super(VoxCeleb, self).__init__()
+
+        # Set up a seed to reproduce training or predicting result.
+        # random.seed(seed)
+
+    def _get_data(self):
+        # Download audio files.
+        # We need the users to decompress all vox1/dev/wav and vox1/test/wav/ to vox1/wav/ dir
+        # so, we check the vox1/wav dir status
+        print(f"wav base path: {self.wav_path}")
+        if not os.path.isdir(self.wav_path):
+            print("start to download the voxceleb1 dataset")
+            download_and_decompress(  # multi-zip parts concatenate to vox1_dev_wav.zip
+                self.archieves_audio_dev,
+                self.base_path,
+                decompress=False)
+            download_and_decompress(  # download the vox1_test_wav.zip and unzip
+                self.archieves_audio_test,
+                self.base_path,
+                decompress=True)
+
+            # Download all parts and concatenate the files into one zip file.
+            dev_zipfile = os.path.join(self.base_path, 'vox1_dev_wav.zip')
+            print(f'Concatenating all parts to: {dev_zipfile}')
+            os.system(
+                f'cat {os.path.join(self.base_path, "vox1_dev_wav_parta*")} > {dev_zipfile}'
+            )
+
+            # Extract all audio files of dev and test set.
+            decompress(dev_zipfile, self.base_path)
+
+        # Download meta files.
+        if not os.path.isdir(self.meta_path):
+            print("prepare the meta data")
+            download_and_decompress(
+                self.archieves_meta, self.meta_path, decompress=False)
+
+        # Data preparation.
+        if not os.path.isdir(self.csv_path):
+            os.makedirs(self.csv_path)
+            self.prepare_data()
+
+        data = []
+        print(
+            f"read the {self.subset} from {os.path.join(self.csv_path, f'{self.subset}.csv')}"
+        )
+        with open(os.path.join(self.csv_path, f'{self.subset}.csv'), 'r') as rf:
+            for line in rf.readlines()[1:]:
+                audio_id, duration, wav, start, stop, spk_id = line.strip(
+                ).split(',')
+                data.append(
+                    self.meta_info(audio_id,
+                                   float(duration), wav,
+                                   int(start), int(stop), spk_id))
+
+        with open(os.path.join(self.meta_path, 'spk_id2label.txt'), 'r') as f:
+            for line in f.readlines():
+                spk_id, label = line.strip().split(' ')
+                self.spk_id2label[spk_id] = int(label)
+
+        return data
+
+    def _convert_to_record(self, idx: int):
+        sample = self._data[idx]
+
+        record = {}
+        # To show all fields in a namedtuple: `type(sample)._fields`
+        for field in type(sample)._fields:
+            record[field] = getattr(sample, field)
+
+        waveform, sr = load_audio(record['wav'])
+
+        # random select a chunk audio samples from the audio
+        if self.random_chunk:
+            num_wav_samples = waveform.shape[0]
+            num_chunk_samples = int(self.chunk_duration * sr)
+            start = random.randint(0, num_wav_samples - num_chunk_samples - 1)
+            stop = start + num_chunk_samples
+        else:
+            start = record['start']
+            stop = record['stop']
+
+        waveform = waveform[start:stop]
+
+        assert self.feat_type in feat_funcs.keys(), \
+            f"Unknown feat_type: {self.feat_type}, it must be one in {list(feat_funcs.keys())}"
+        feat_func = feat_funcs[self.feat_type]
+        feat = feat_func(
+            waveform, sr=sr, **self.feat_config) if feat_func else waveform
+
+        record.update({'feat': feat})
+        if self.subset in ['train',
+                           'dev']:  # Labels are available in train and dev.
+            record.update({'label': self.spk_id2label[record['spk_id']]})
+
+        return record
+
+    @staticmethod
+    def _get_chunks(seg_dur, audio_id, audio_duration):
+        num_chunks = int(audio_duration / seg_dur)  # all in milliseconds
+
+        chunk_lst = [
+            audio_id + "_" + str(i * seg_dur) + "_" + str(i * seg_dur + seg_dur)
+            for i in range(num_chunks)
+        ]
+        return chunk_lst
+
+    def _get_audio_info(self, wav_file: str,
+                        split_chunks: bool) -> List[List[str]]:
+        waveform, sr = load_audio(wav_file)
+        spk_id, sess_id, utt_id = wav_file.split("/")[-3:]
+        audio_id = '-'.join([spk_id, sess_id, utt_id.split(".")[0]])
+        audio_duration = waveform.shape[0] / sr
+
+        ret = []
+        if split_chunks:  # Split into pieces of self.chunk_duration seconds.
+            uniq_chunks_list = self._get_chunks(self.chunk_duration, audio_id,
+                                                audio_duration)
+
+            for chunk in uniq_chunks_list:
+                s, e = chunk.split("_")[-2:]  # Timestamps of start and end
+                start_sample = int(float(s) * sr)
+                end_sample = int(float(e) * sr)
+                # id, duration, wav, start, stop, spk_id
+                ret.append([
+                    chunk, audio_duration, wav_file, start_sample, end_sample,
+                    spk_id
+                ])
+        else:  # Keep whole audio.
+            ret.append([
+                audio_id, audio_duration, wav_file, 0, waveform.shape[0], spk_id
+            ])
+        return ret
+
+    def generate_csv(self,
+                     wav_files: List[str],
+                     output_file: str,
+                     split_chunks: bool=True):
+        print(f'Generating csv: {output_file}')
+        header = ["id", "duration", "wav", "start", "stop", "spk_id"]
+        # Note: this may occurs c++ exception, but the program will execute fine
+        # so we can ignore the exception 
+        with Pool(cpu_count()) as p:
+            infos = list(
+                tqdm(
+                    p.imap(lambda x: self._get_audio_info(x, split_chunks),
+                           wav_files),
+                    total=len(wav_files)))
+
+        csv_lines = []
+        for info in infos:
+            csv_lines.extend(info)
+
+        with open(output_file, mode="w") as csv_f:
+            csv_writer = csv.writer(
+                csv_f, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
+            csv_writer.writerow(header)
+            for line in csv_lines:
+                csv_writer.writerow(line)
+
+    def prepare_data(self):
+        # Audio of speakers in veri_test_file should not be included in training set.
+        print("start to prepare the data csv file")
+        enroll_files = set()
+        test_files = set()
+        # get the enroll and test audio file path
+        with open(self.veri_test_file, 'r') as f:
+            for line in f.readlines():
+                _, enrol_file, test_file = line.strip().split(' ')
+                enroll_files.add(os.path.join(self.wav_path, enrol_file))
+                test_files.add(os.path.join(self.wav_path, test_file))
+            enroll_files = sorted(enroll_files)
+            test_files = sorted(test_files)
+
+        # get the enroll and test speakers
+        test_spks = set()
+        for file in (enroll_files + test_files):
+            spk = file.split('/wav/')[1].split('/')[0]
+            test_spks.add(spk)
+
+        # get all the train and dev audios file path
+        audio_files = []
+        speakers = set()
+        print("Getting file list...")
+        for path in [self.wav_path, self.vox2_base_path]:
+            # if vox2 directory is not set and vox2 is not a directory 
+            # we will not process this directory
+            if not path or not os.path.exists(path):
+                print(f"{path} is an invalid path, please check again, "
+                      "and we will ignore the vox2 base path")
+                continue
+            for file in glob.glob(
+                    os.path.join(path, "**", "*.wav"), recursive=True):
+                spk = file.split('/wav/')[1].split('/')[0]
+                if spk in test_spks:
+                    continue
+                speakers.add(spk)
+                audio_files.append(file)
+
+        print(
+            f"start to generate the {os.path.join(self.meta_path, 'spk_id2label.txt')}"
+        )
+        # encode the train and dev speakers label to spk_id2label.txt
+        with open(os.path.join(self.meta_path, 'spk_id2label.txt'), 'w') as f:
+            for label, spk_id in enumerate(
+                    sorted(speakers)):  # 1211 vox1, 5994 vox2, 7205 vox1+2
+                f.write(f'{spk_id} {label}\n')
+
+        audio_files = sorted(audio_files)
+        random.shuffle(audio_files)
+        split_idx = int(self.split_ratio * len(audio_files))
+        # split_ratio to train
+        train_files, dev_files = audio_files[:split_idx], audio_files[
+            split_idx:]
+
+        self.generate_csv(train_files, os.path.join(self.csv_path, 'train.csv'))
+        self.generate_csv(dev_files, os.path.join(self.csv_path, 'dev.csv'))
+
+        self.generate_csv(
+            enroll_files,
+            os.path.join(self.csv_path, 'enroll.csv'),
+            split_chunks=False)
+        self.generate_csv(
+            test_files,
+            os.path.join(self.csv_path, 'test.csv'),
+            split_chunks=False)
+
+    def __getitem__(self, idx):
+        return self._convert_to_record(idx)
+
+    def __len__(self):
+        return len(self._data)
diff --git a/paddlespeech/audio/functional/__init__.py b/paddlespeech/audio/functional/__init__.py
new file mode 100644
index 000000000..c85232df1
--- /dev/null
+++ b/paddlespeech/audio/functional/__init__.py
@@ -0,0 +1,20 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .functional import compute_fbank_matrix
+from .functional import create_dct
+from .functional import fft_frequencies
+from .functional import hz_to_mel
+from .functional import mel_frequencies
+from .functional import mel_to_hz
+from .functional import power_to_db
diff --git a/paddlespeech/audio/functional/functional.py b/paddlespeech/audio/functional/functional.py
new file mode 100644
index 000000000..7c20f9013
--- /dev/null
+++ b/paddlespeech/audio/functional/functional.py
@@ -0,0 +1,266 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Modified from librosa(https://github.com/librosa/librosa)
+import math
+from typing import Optional
+from typing import Union
+
+import paddle
+from paddle import Tensor
+
+__all__ = [
+    'hz_to_mel',
+    'mel_to_hz',
+    'mel_frequencies',
+    'fft_frequencies',
+    'compute_fbank_matrix',
+    'power_to_db',
+    'create_dct',
+]
+
+
+def hz_to_mel(freq: Union[Tensor, float],
+              htk: bool=False) -> Union[Tensor, float]:
+    """Convert Hz to Mels.
+
+    Args:
+        freq (Union[Tensor, float]): The input tensor with arbitrary shape.
+        htk (bool, optional): Use htk scaling. Defaults to False.
+
+    Returns:
+        Union[Tensor, float]: Frequency in mels.
+    """
+
+    if htk:
+        if isinstance(freq, Tensor):
+            return 2595.0 * paddle.log10(1.0 + freq / 700.0)
+        else:
+            return 2595.0 * math.log10(1.0 + freq / 700.0)
+
+    # Fill in the linear part
+    f_min = 0.0
+    f_sp = 200.0 / 3
+
+    mels = (freq - f_min) / f_sp
+
+    # Fill in the log-scale part
+
+    min_log_hz = 1000.0  # beginning of log region (Hz)
+    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
+    logstep = math.log(6.4) / 27.0  # step size for log region
+
+    if isinstance(freq, Tensor):
+        target = min_log_mel + paddle.log(
+            freq / min_log_hz + 1e-10) / logstep  # prevent nan with 1e-10
+        mask = (freq > min_log_hz).astype(freq.dtype)
+        mels = target * mask + mels * (
+            1 - mask)  # will replace by masked_fill OP in future
+    else:
+        if freq >= min_log_hz:
+            mels = min_log_mel + math.log(freq / min_log_hz + 1e-10) / logstep
+
+    return mels
+
+
+def mel_to_hz(mel: Union[float, Tensor],
+              htk: bool=False) -> Union[float, Tensor]:
+    """Convert mel bin numbers to frequencies.
+
+    Args:
+        mel (Union[float, Tensor]): The mel frequency represented as a tensor with arbitrary shape.
+        htk (bool, optional): Use htk scaling. Defaults to False.
+
+    Returns:
+        Union[float, Tensor]: Frequencies in Hz.
+    """
+    if htk:
+        return 700.0 * (10.0**(mel / 2595.0) - 1.0)
+
+    f_min = 0.0
+    f_sp = 200.0 / 3
+    freqs = f_min + f_sp * mel
+    # And now the nonlinear scale
+    min_log_hz = 1000.0  # beginning of log region (Hz)
+    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
+    logstep = math.log(6.4) / 27.0  # step size for log region
+    if isinstance(mel, Tensor):
+        target = min_log_hz * paddle.exp(logstep * (mel - min_log_mel))
+        mask = (mel > min_log_mel).astype(mel.dtype)
+        freqs = target * mask + freqs * (
+            1 - mask)  # will replace by masked_fill OP in future
+    else:
+        if mel >= min_log_mel:
+            freqs = min_log_hz * math.exp(logstep * (mel - min_log_mel))
+
+    return freqs
+
+
+def mel_frequencies(n_mels: int=64,
+                    f_min: float=0.0,
+                    f_max: float=11025.0,
+                    htk: bool=False,
+                    dtype: str='float32') -> Tensor:
+    """Compute mel frequencies.
+
+    Args:
+        n_mels (int, optional): Number of mel bins. Defaults to 64.
+        f_min (float, optional): Minimum frequency in Hz. Defaults to 0.0.
+        fmax (float, optional): Maximum frequency in Hz. Defaults to 11025.0.
+        htk (bool, optional): Use htk scaling. Defaults to False.
+        dtype (str, optional): The data type of the return frequencies. Defaults to 'float32'.
+
+    Returns:
+        Tensor: Tensor of n_mels frequencies in Hz with shape `(n_mels,)`.
+    """
+    # 'Center freqs' of mel bands - uniformly spaced between limits
+    min_mel = hz_to_mel(f_min, htk=htk)
+    max_mel = hz_to_mel(f_max, htk=htk)
+    mels = paddle.linspace(min_mel, max_mel, n_mels, dtype=dtype)
+    freqs = mel_to_hz(mels, htk=htk)
+    return freqs
+
+
+def fft_frequencies(sr: int, n_fft: int, dtype: str='float32') -> Tensor:
+    """Compute fourier frequencies.
+
+    Args:
+        sr (int): Sample rate.
+        n_fft (int): Number of fft bins.
+        dtype (str, optional): The data type of the return frequencies. Defaults to 'float32'.
+
+    Returns:
+        Tensor: FFT frequencies in Hz with shape `(n_fft//2 + 1,)`.
+    """
+    return paddle.linspace(0, float(sr) / 2, int(1 + n_fft // 2), dtype=dtype)
+
+
+def compute_fbank_matrix(sr: int,
+                         n_fft: int,
+                         n_mels: int=64,
+                         f_min: float=0.0,
+                         f_max: Optional[float]=None,
+                         htk: bool=False,
+                         norm: Union[str, float]='slaney',
+                         dtype: str='float32') -> Tensor:
+    """Compute fbank matrix.
+
+    Args:
+        sr (int): Sample rate.
+        n_fft (int): Number of fft bins.
+        n_mels (int, optional): Number of mel bins. Defaults to 64.
+        f_min (float, optional): Minimum frequency in Hz. Defaults to 0.0.
+        f_max (Optional[float], optional): Maximum frequency in Hz. Defaults to None.
+        htk (bool, optional): Use htk scaling. Defaults to False.
+        norm (Union[str, float], optional): Type of normalization. Defaults to 'slaney'.
+        dtype (str, optional): The data type of the return matrix. Defaults to 'float32'.
+
+    Returns:
+        Tensor: Mel transform matrix with shape `(n_mels, n_fft//2 + 1)`.
+    """
+
+    if f_max is None:
+        f_max = float(sr) / 2
+
+    # Initialize the weights
+    weights = paddle.zeros((n_mels, int(1 + n_fft // 2)), dtype=dtype)
+
+    # Center freqs of each FFT bin
+    fftfreqs = fft_frequencies(sr=sr, n_fft=n_fft, dtype=dtype)
+
+    # 'Center freqs' of mel bands - uniformly spaced between limits
+    mel_f = mel_frequencies(
+        n_mels + 2, f_min=f_min, f_max=f_max, htk=htk, dtype=dtype)
+
+    fdiff = mel_f[1:] - mel_f[:-1]  #np.diff(mel_f)
+    ramps = mel_f.unsqueeze(1) - fftfreqs.unsqueeze(0)
+    #ramps = np.subtract.outer(mel_f, fftfreqs)
+
+    for i in range(n_mels):
+        # lower and upper slopes for all bins
+        lower = -ramps[i] / fdiff[i]
+        upper = ramps[i + 2] / fdiff[i + 1]
+
+        # .. then intersect them with each other and zero
+        weights[i] = paddle.maximum(
+            paddle.zeros_like(lower), paddle.minimum(lower, upper))
+
+    # Slaney-style mel is scaled to be approx constant energy per channel
+    if norm == 'slaney':
+        enorm = 2.0 / (mel_f[2:n_mels + 2] - mel_f[:n_mels])
+        weights *= enorm.unsqueeze(1)
+    elif isinstance(norm, int) or isinstance(norm, float):
+        weights = paddle.nn.functional.normalize(weights, p=norm, axis=-1)
+
+    return weights
+
+
+def power_to_db(spect: Tensor,
+                ref_value: float=1.0,
+                amin: float=1e-10,
+                top_db: Optional[float]=None) -> Tensor:
+    """Convert a power spectrogram (amplitude squared) to decibel (dB) units. The function computes the scaling `10 * log10(x / ref)` in a numerically stable way.
+
+    Args:
+        spect (Tensor): STFT power spectrogram.
+        ref_value (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0.
+        amin (float, optional): Minimum threshold. Defaults to 1e-10.
+        top_db (Optional[float], optional): Threshold the output at `top_db` below the peak. Defaults to None.
+
+    Returns:
+        Tensor: Power spectrogram in db scale.
+    """
+    if amin <= 0:
+        raise Exception("amin must be strictly positive")
+
+    if ref_value <= 0:
+        raise Exception("ref_value must be strictly positive")
+
+    ones = paddle.ones_like(spect)
+    log_spec = 10.0 * paddle.log10(paddle.maximum(ones * amin, spect))
+    log_spec -= 10.0 * math.log10(max(ref_value, amin))
+
+    if top_db is not None:
+        if top_db < 0:
+            raise Exception("top_db must be non-negative")
+        log_spec = paddle.maximum(log_spec, ones * (log_spec.max() - top_db))
+
+    return log_spec
+
+
+def create_dct(n_mfcc: int,
+               n_mels: int,
+               norm: Optional[str]='ortho',
+               dtype: str='float32') -> Tensor:
+    """Create a discrete cosine transform(DCT) matrix.
+
+    Args:
+        n_mfcc (int): Number of mel frequency cepstral coefficients. 
+        n_mels (int): Number of mel filterbanks.
+        norm (Optional[str], optional): Normalization type. Defaults to 'ortho'.
+        dtype (str, optional): The data type of the return matrix. Defaults to 'float32'.
+
+    Returns:
+        Tensor: The DCT matrix with shape `(n_mels, n_mfcc)`.
+    """
+    n = paddle.arange(n_mels, dtype=dtype)
+    k = paddle.arange(n_mfcc, dtype=dtype).unsqueeze(1)
+    dct = paddle.cos(math.pi / float(n_mels) * (n + 0.5) *
+                     k)  # size (n_mfcc, n_mels)
+    if norm is None:
+        dct *= 2.0
+    else:
+        assert norm == "ortho"
+        dct[0] *= 1.0 / math.sqrt(2.0)
+        dct *= math.sqrt(2.0 / float(n_mels))
+    return dct.T
diff --git a/paddlespeech/audio/functional/window.py b/paddlespeech/audio/functional/window.py
new file mode 100644
index 000000000..c518dbab3
--- /dev/null
+++ b/paddlespeech/audio/functional/window.py
@@ -0,0 +1,373 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+import math
+from typing import List
+from typing import Tuple
+from typing import Union
+
+import paddle
+from paddle import Tensor
+
+
+class WindowFunctionRegister(object):
+    def __init__(self):
+        self._functions_dict = dict()
+
+    def register(self):
+        def add_subfunction(func):
+            name = func.__name__
+            self._functions_dict[name] = func
+            return func
+
+        return add_subfunction
+
+    def get(self, name):
+        return self._functions_dict[name]
+
+
+window_function_register = WindowFunctionRegister()
+
+
+@window_function_register.register()
+def _cat(x: List[Tensor], data_type: str) -> Tensor:
+    l = [paddle.to_tensor(_, data_type) for _ in x]
+    return paddle.concat(l)
+
+
+@window_function_register.register()
+def _acosh(x: Union[Tensor, float]) -> Tensor:
+    if isinstance(x, float):
+        return math.log(x + math.sqrt(x**2 - 1))
+    return paddle.log(x + paddle.sqrt(paddle.square(x) - 1))
+
+
+@window_function_register.register()
+def _extend(M: int, sym: bool) -> bool:
+    """Extend window by 1 sample if needed for DFT-even symmetry."""
+    if not sym:
+        return M + 1, True
+    else:
+        return M, False
+
+
+@window_function_register.register()
+def _len_guards(M: int) -> bool:
+    """Handle small or incorrect window lengths."""
+    if int(M) != M or M < 0:
+        raise ValueError('Window length M must be a non-negative integer')
+
+    return M <= 1
+
+
+@window_function_register.register()
+def _truncate(w: Tensor, needed: bool) -> Tensor:
+    """Truncate window by 1 sample if needed for DFT-even symmetry."""
+    if needed:
+        return w[:-1]
+    else:
+        return w
+
+
+@window_function_register.register()
+def _general_gaussian(M: int, p, sig, sym: bool=True,
+                      dtype: str='float64') -> Tensor:
+    """Compute a window with a generalized Gaussian shape.
+    This function is consistent with scipy.signal.windows.general_gaussian().
+    """
+    if _len_guards(M):
+        return paddle.ones((M, ), dtype=dtype)
+    M, needs_trunc = _extend(M, sym)
+
+    n = paddle.arange(0, M, dtype=dtype) - (M - 1.0) / 2.0
+    w = paddle.exp(-0.5 * paddle.abs(n / sig)**(2 * p))
+
+    return _truncate(w, needs_trunc)
+
+
+@window_function_register.register()
+def _general_cosine(M: int, a: float, sym: bool=True,
+                    dtype: str='float64') -> Tensor:
+    """Compute a generic weighted sum of cosine terms window.
+    This function is consistent with scipy.signal.windows.general_cosine().
+    """
+    if _len_guards(M):
+        return paddle.ones((M, ), dtype=dtype)
+    M, needs_trunc = _extend(M, sym)
+    fac = paddle.linspace(-math.pi, math.pi, M, dtype=dtype)
+    w = paddle.zeros((M, ), dtype=dtype)
+    for k in range(len(a)):
+        w += a[k] * paddle.cos(k * fac)
+    return _truncate(w, needs_trunc)
+
+
+@window_function_register.register()
+def _general_hamming(M: int, alpha: float, sym: bool=True,
+                     dtype: str='float64') -> Tensor:
+    """Compute a generalized Hamming window.
+    This function is consistent with scipy.signal.windows.general_hamming()
+    """
+    return _general_cosine(M, [alpha, 1.0 - alpha], sym, dtype=dtype)
+
+
+@window_function_register.register()
+def _taylor(M: int,
+            nbar=4,
+            sll=30,
+            norm=True,
+            sym: bool=True,
+            dtype: str='float64') -> Tensor:
+    """Compute a Taylor window.
+    The Taylor window taper function approximates the Dolph-Chebyshev window's
+    constant sidelobe level for a parameterized number of near-in sidelobes.
+    """
+    if _len_guards(M):
+        return paddle.ones((M, ), dtype=dtype)
+    M, needs_trunc = _extend(M, sym)
+    # Original text uses a negative sidelobe level parameter and then negates
+    # it in the calculation of B. To keep consistent with other methods we
+    # assume the sidelobe level parameter to be positive.
+    B = 10**(sll / 20)
+    A = _acosh(B) / math.pi
+    s2 = nbar**2 / (A**2 + (nbar - 0.5)**2)
+    ma = paddle.arange(1, nbar, dtype=dtype)
+
+    Fm = paddle.empty((nbar - 1, ), dtype=dtype)
+    signs = paddle.empty_like(ma)
+    signs[::2] = 1
+    signs[1::2] = -1
+    m2 = ma * ma
+    for mi in range(len(ma)):
+        numer = signs[mi] * paddle.prod(1 - m2[mi] / s2 / (A**2 + (ma - 0.5)**2
+                                                           ))
+        if mi == 0:
+            denom = 2 * paddle.prod(1 - m2[mi] / m2[mi + 1:])
+        elif mi == len(ma) - 1:
+            denom = 2 * paddle.prod(1 - m2[mi] / m2[:mi])
+        else:
+            denom = (2 * paddle.prod(1 - m2[mi] / m2[:mi]) *
+                     paddle.prod(1 - m2[mi] / m2[mi + 1:]))
+
+        Fm[mi] = numer / denom
+
+    def W(n):
+        return 1 + 2 * paddle.matmul(
+            Fm.unsqueeze(0),
+            paddle.cos(2 * math.pi * ma.unsqueeze(1) *
+                       (n - M / 2.0 + 0.5) / M), )
+
+    w = W(paddle.arange(0, M, dtype=dtype))
+
+    # normalize (Note that this is not described in the original text [1])
+    if norm:
+        scale = 1.0 / W((M - 1) / 2)
+        w *= scale
+    w = w.squeeze()
+    return _truncate(w, needs_trunc)
+
+
+@window_function_register.register()
+def _hamming(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
+    """Compute a Hamming window.
+    The Hamming window is a taper formed by using a raised cosine with
+    non-zero endpoints, optimized to minimize the nearest side lobe.
+    """
+    return _general_hamming(M, 0.54, sym, dtype=dtype)
+
+
+@window_function_register.register()
+def _hann(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
+    """Compute a Hann window.
+    The Hann window is a taper formed by using a raised cosine or sine-squared
+    with ends that touch zero.
+    """
+    return _general_hamming(M, 0.5, sym, dtype=dtype)
+
+
+@window_function_register.register()
+def _tukey(M: int, alpha=0.5, sym: bool=True, dtype: str='float64') -> Tensor:
+    """Compute a Tukey window.
+    The Tukey window is also known as a tapered cosine window.
+    """
+    if _len_guards(M):
+        return paddle.ones((M, ), dtype=dtype)
+
+    if alpha <= 0:
+        return paddle.ones((M, ), dtype=dtype)
+    elif alpha >= 1.0:
+        return hann(M, sym=sym)
+
+    M, needs_trunc = _extend(M, sym)
+
+    n = paddle.arange(0, M, dtype=dtype)
+    width = int(alpha * (M - 1) / 2.0)
+    n1 = n[0:width + 1]
+    n2 = n[width + 1:M - width - 1]
+    n3 = n[M - width - 1:]
+
+    w1 = 0.5 * (1 + paddle.cos(math.pi * (-1 + 2.0 * n1 / alpha / (M - 1))))
+    w2 = paddle.ones(n2.shape, dtype=dtype)
+    w3 = 0.5 * (1 + paddle.cos(math.pi * (-2.0 / alpha + 1 + 2.0 * n3 / alpha /
+                                          (M - 1))))
+    w = paddle.concat([w1, w2, w3])
+
+    return _truncate(w, needs_trunc)
+
+
+@window_function_register.register()
+def _gaussian(M: int, std: float, sym: bool=True,
+              dtype: str='float64') -> Tensor:
+    """Compute a Gaussian window.
+    The Gaussian widows has a Gaussian shape defined by the standard deviation(std).
+    """
+    if _len_guards(M):
+        return paddle.ones((M, ), dtype=dtype)
+    M, needs_trunc = _extend(M, sym)
+
+    n = paddle.arange(0, M, dtype=dtype) - (M - 1.0) / 2.0
+    sig2 = 2 * std * std
+    w = paddle.exp(-(n**2) / sig2)
+
+    return _truncate(w, needs_trunc)
+
+
+@window_function_register.register()
+def _exponential(M: int,
+                 center=None,
+                 tau=1.0,
+                 sym: bool=True,
+                 dtype: str='float64') -> Tensor:
+    """Compute an exponential (or Poisson) window."""
+    if sym and center is not None:
+        raise ValueError("If sym==True, center must be None.")
+    if _len_guards(M):
+        return paddle.ones((M, ), dtype=dtype)
+    M, needs_trunc = _extend(M, sym)
+
+    if center is None:
+        center = (M - 1) / 2
+
+    n = paddle.arange(0, M, dtype=dtype)
+    w = paddle.exp(-paddle.abs(n - center) / tau)
+
+    return _truncate(w, needs_trunc)
+
+
+@window_function_register.register()
+def _triang(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
+    """Compute a triangular window."""
+    if _len_guards(M):
+        return paddle.ones((M, ), dtype=dtype)
+    M, needs_trunc = _extend(M, sym)
+
+    n = paddle.arange(1, (M + 1) // 2 + 1, dtype=dtype)
+    if M % 2 == 0:
+        w = (2 * n - 1.0) / M
+        w = paddle.concat([w, w[::-1]])
+    else:
+        w = 2 * n / (M + 1.0)
+        w = paddle.concat([w, w[-2::-1]])
+
+    return _truncate(w, needs_trunc)
+
+
+@window_function_register.register()
+def _bohman(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
+    """Compute a Bohman window.
+    The Bohman window is the autocorrelation of a cosine window.
+    """
+    if _len_guards(M):
+        return paddle.ones((M, ), dtype=dtype)
+    M, needs_trunc = _extend(M, sym)
+
+    fac = paddle.abs(paddle.linspace(-1, 1, M, dtype=dtype)[1:-1])
+    w = (1 - fac) * paddle.cos(math.pi * fac) + 1.0 / math.pi * paddle.sin(
+        math.pi * fac)
+    w = _cat([0, w, 0], dtype)
+
+    return _truncate(w, needs_trunc)
+
+
+@window_function_register.register()
+def _blackman(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
+    """Compute a Blackman window.
+    The Blackman window is a taper formed by using the first three terms of
+    a summation of cosines. It was designed to have close to the minimal
+    leakage possible.  It is close to optimal, only slightly worse than a
+    Kaiser window.
+    """
+    return _general_cosine(M, [0.42, 0.50, 0.08], sym, dtype=dtype)
+
+
+@window_function_register.register()
+def _cosine(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
+    """Compute a window with a simple cosine shape."""
+    if _len_guards(M):
+        return paddle.ones((M, ), dtype=dtype)
+    M, needs_trunc = _extend(M, sym)
+    w = paddle.sin(math.pi / M * (paddle.arange(0, M, dtype=dtype) + 0.5))
+
+    return _truncate(w, needs_trunc)
+
+
+def get_window(
+        window: Union[str, Tuple[str, float]],
+        win_length: int,
+        fftbins: bool=True,
+        dtype: str='float64', ) -> Tensor:
+    """Return a window of a given length and type.
+
+    Args:
+        window (Union[str, Tuple[str, float]]): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'gaussian', 'general_gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'.
+        win_length (int): Number of samples.
+        fftbins (bool, optional): If True, create a "periodic" window. Otherwise, create a "symmetric" window, for use in filter design. Defaults to True.
+        dtype (str, optional): The data type of the return window. Defaults to 'float64'.
+
+    Returns:
+        Tensor: The window represented as a tensor.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            n_fft = 512
+            cosine_window = paddle.audio.functional.get_window('cosine', n_fft)
+
+            std = 7
+            gaussian_window = paddle.audio.functional.get_window(('gaussian',std), n_fft)
+    """
+    sym = not fftbins
+
+    args = ()
+    if isinstance(window, tuple):
+        winstr = window[0]
+        if len(window) > 1:
+            args = window[1:]
+    elif isinstance(window, str):
+        if window in ['gaussian', 'exponential']:
+            raise ValueError("The '" + window + "' window needs one or "
+                             "more parameters -- pass a tuple.")
+        else:
+            winstr = window
+    else:
+        raise ValueError("%s as window type is not supported." %
+                         str(type(window)))
+
+    try:
+        winfunc = window_function_register.get('_' + winstr)
+    except KeyError as e:
+        raise ValueError("Unknown window type.") from e
+
+    params = (win_length, ) + args
+    kwargs = {'sym': sym}
+    return winfunc(*params, dtype=dtype, **kwargs)
diff --git a/paddlespeech/audio/streamdata/autodecode.py b/paddlespeech/audio/streamdata/autodecode.py
index 2e82226df..664509842 100644
--- a/paddlespeech/audio/streamdata/autodecode.py
+++ b/paddlespeech/audio/streamdata/autodecode.py
@@ -304,13 +304,11 @@ def paddle_audio(key, data):
     if extension not in ["flac", "mp3", "sox", "wav", "m4a", "ogg", "wma"]:
         return None
 
-    import paddleaudio
-
     with tempfile.TemporaryDirectory() as dirname:
         fname = os.path.join(dirname, f"file.{extension}")
         with open(fname, "wb") as stream:
             stream.write(data)
-        return paddleaudio.backends.soundfile_load(fname)
+        return paddlespeech.audio.backends.soundfile_load(fname)
 
 
 ################################################################
diff --git a/paddlespeech/audio/streamdata/filters.py b/paddlespeech/audio/streamdata/filters.py
index 110b4a304..9a00c2dc6 100644
--- a/paddlespeech/audio/streamdata/filters.py
+++ b/paddlespeech/audio/streamdata/filters.py
@@ -22,8 +22,6 @@ from fnmatch import fnmatch
 from functools import reduce
 
 import paddle
-from paddleaudio import backends
-from paddleaudio.compliance import kaldi
 
 from . import autodecode
 from . import utils
@@ -33,6 +31,8 @@ from ..transform.spec_augment import time_mask
 from ..transform.spec_augment import time_warp
 from ..utils.tensor_utils import pad_sequence
 from .utils import PipelineStage
+from paddlespeech.audio import backends
+from paddlespeech.audio.compliance import kaldi
 
 
 class FilterFunction(object):
diff --git a/paddlespeech/audio/streamdata/soundfile.py b/paddlespeech/audio/streamdata/soundfile.py
new file mode 100644
index 000000000..7611fd297
--- /dev/null
+++ b/paddlespeech/audio/streamdata/soundfile.py
@@ -0,0 +1,677 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import warnings
+from typing import Optional
+from typing import Tuple
+
+import numpy as np
+import paddle
+import resampy
+import soundfile
+from scipy.io import wavfile
+
+from ..utils import depth_convert
+from ..utils import ParameterError
+from .common import AudioInfo
+
+__all__ = [
+    'resample',
+    'to_mono',
+    'normalize',
+    'save',
+    'soundfile_save',
+    'load',
+    'soundfile_load',
+    'info',
+]
+NORMALMIZE_TYPES = ['linear', 'gaussian']
+MERGE_TYPES = ['ch0', 'ch1', 'random', 'average']
+RESAMPLE_MODES = ['kaiser_best', 'kaiser_fast']
+EPS = 1e-8
+
+
+def resample(y: np.ndarray,
+             src_sr: int,
+             target_sr: int,
+             mode: str='kaiser_fast') -> np.ndarray:
+    """Audio resampling.
+
+    Args:
+        y (np.ndarray): Input waveform array in 1D or 2D.
+        src_sr (int): Source sample rate.
+        target_sr (int): Target sample rate.
+        mode (str, optional): The resampling filter to use. Defaults to 'kaiser_fast'.
+
+    Returns:
+        np.ndarray: `y` resampled to `target_sr`
+    """
+
+    if mode == 'kaiser_best':
+        warnings.warn(
+            f'Using resampy in kaiser_best to {src_sr}=>{target_sr}. This function is pretty slow, \
+        we recommend the mode kaiser_fast in large scale audio training')
+
+    if not isinstance(y, np.ndarray):
+        raise ParameterError(
+            'Only support numpy np.ndarray, but received y in {type(y)}')
+
+    if mode not in RESAMPLE_MODES:
+        raise ParameterError(f'resample mode must in {RESAMPLE_MODES}')
+
+    return resampy.resample(y, src_sr, target_sr, filter=mode)
+
+
+def to_mono(y: np.ndarray, merge_type: str='average') -> np.ndarray:
+    """Convert sterior audio to mono.
+
+    Args:
+        y (np.ndarray): Input waveform array in 1D or 2D.
+        merge_type (str, optional): Merge type to generate mono waveform. Defaults to 'average'.
+
+    Returns:
+        np.ndarray: `y` with mono channel.
+    """
+
+    if merge_type not in MERGE_TYPES:
+        raise ParameterError(
+            f'Unsupported merge type {merge_type}, available types are {MERGE_TYPES}'
+        )
+    if y.ndim > 2:
+        raise ParameterError(
+            f'Unsupported audio array,  y.ndim > 2, the shape is {y.shape}')
+    if y.ndim == 1:  # nothing to merge
+        return y
+
+    if merge_type == 'ch0':
+        return y[0]
+    if merge_type == 'ch1':
+        return y[1]
+    if merge_type == 'random':
+        return y[np.random.randint(0, 2)]
+
+    # need to do averaging according to dtype
+
+    if y.dtype == 'float32':
+        y_out = (y[0] + y[1]) * 0.5
+    elif y.dtype == 'int16':
+        y_out = y.astype('int32')
+        y_out = (y_out[0] + y_out[1]) // 2
+        y_out = np.clip(y_out, np.iinfo(y.dtype).min,
+                        np.iinfo(y.dtype).max).astype(y.dtype)
+
+    elif y.dtype == 'int8':
+        y_out = y.astype('int16')
+        y_out = (y_out[0] + y_out[1]) // 2
+        y_out = np.clip(y_out, np.iinfo(y.dtype).min,
+                        np.iinfo(y.dtype).max).astype(y.dtype)
+    else:
+        raise ParameterError(f'Unsupported dtype: {y.dtype}')
+    return y_out
+
+
+def soundfile_load_(file: os.PathLike,
+                    offset: Optional[float]=None,
+                    dtype: str='int16',
+                    duration: Optional[int]=None) -> Tuple[np.ndarray, int]:
+    """Load audio using soundfile library. This function load audio file using libsndfile.
+
+    Args:
+        file (os.PathLike): File of waveform.
+        offset (Optional[float], optional): Offset to the start of waveform. Defaults to None.
+        dtype (str, optional): Data type of waveform. Defaults to 'int16'.
+        duration (Optional[int], optional): Duration of waveform to read. Defaults to None.
+
+    Returns:
+        Tuple[np.ndarray, int]: Waveform in ndarray and its samplerate.
+    """
+    with soundfile.SoundFile(file) as sf_desc:
+        sr_native = sf_desc.samplerate
+        if offset:
+            sf_desc.seek(int(offset * sr_native))
+        if duration is not None:
+            frame_duration = int(duration * sr_native)
+        else:
+            frame_duration = -1
+        y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
+
+    return y, sf_desc.samplerate
+
+
+def normalize(y: np.ndarray, norm_type: str='linear',
+              mul_factor: float=1.0) -> np.ndarray:
+    """Normalize an input audio with additional multiplier.
+
+    Args:
+        y (np.ndarray): Input waveform array in 1D or 2D.
+        norm_type (str, optional): Type of normalization. Defaults to 'linear'.
+        mul_factor (float, optional): Scaling factor. Defaults to 1.0.
+
+    Returns:
+        np.ndarray: `y` after normalization.
+    """
+
+    if norm_type == 'linear':
+        amax = np.max(np.abs(y))
+        factor = 1.0 / (amax + EPS)
+        y = y * factor * mul_factor
+    elif norm_type == 'gaussian':
+        amean = np.mean(y)
+        astd = np.std(y)
+        astd = max(astd, EPS)
+        y = mul_factor * (y - amean) / astd
+    else:
+        raise NotImplementedError(f'norm_type should be in {NORMALMIZE_TYPES}')
+
+    return y
+
+
+def soundfile_save(y: np.ndarray, sr: int, file: os.PathLike) -> None:
+    """Save audio file to disk. This function saves audio to disk using scipy.io.wavfile, with additional step to convert input waveform to int16.
+
+    Args:
+        y (np.ndarray): Input waveform array in 1D or 2D.
+        sr (int): Sample rate.
+        file (os.PathLike): Path of audio file to save.
+    """
+    if not file.endswith('.wav'):
+        raise ParameterError(
+            f'only .wav file supported, but dst file name is: {file}')
+
+    if sr <= 0:
+        raise ParameterError(
+            f'Sample rate should be larger than 0, received sr = {sr}')
+
+    if y.dtype not in ['int16', 'int8']:
+        warnings.warn(
+            f'input data type is {y.dtype}, will convert data to int16 format before saving'
+        )
+        y_out = depth_convert(y, 'int16')
+    else:
+        y_out = y
+
+    wavfile.write(file, sr, y_out)
+
+
+def soundfile_load(
+        file: os.PathLike,
+        sr: Optional[int]=None,
+        mono: bool=True,
+        merge_type: str='average',  # ch0,ch1,random,average
+        normal: bool=True,
+        norm_type: str='linear',
+        norm_mul_factor: float=1.0,
+        offset: float=0.0,
+        duration: Optional[int]=None,
+        dtype: str='float32',
+        resample_mode: str='kaiser_fast') -> Tuple[np.ndarray, int]:
+    """Load audio file from disk. This function loads audio from disk using using audio backend.
+
+    Args:
+        file (os.PathLike): Path of audio file to load.
+        sr (Optional[int], optional): Sample rate of loaded waveform. Defaults to None.
+        mono (bool, optional): Return waveform with mono channel. Defaults to True.
+        merge_type (str, optional): Merge type of multi-channels waveform. Defaults to 'average'.
+        normal (bool, optional): Waveform normalization. Defaults to True.
+        norm_type (str, optional): Type of normalization. Defaults to 'linear'.
+        norm_mul_factor (float, optional): Scaling factor. Defaults to 1.0.
+        offset (float, optional): Offset to the start of waveform. Defaults to 0.0.
+        duration (Optional[int], optional): Duration of waveform to read. Defaults to None.
+        dtype (str, optional): Data type of waveform. Defaults to 'float32'.
+        resample_mode (str, optional): The resampling filter to use. Defaults to 'kaiser_fast'.
+
+    Returns:
+        Tuple[np.ndarray, int]: Waveform in ndarray and its samplerate.
+    """
+
+    y, r = soundfile_load_(file, offset=offset, dtype=dtype, duration=duration)
+
+    if not ((y.ndim == 1 and len(y) > 0) or (y.ndim == 2 and len(y[0]) > 0)):
+        raise ParameterError(f'audio file {file} looks empty')
+
+    if mono:
+        y = to_mono(y, merge_type)
+
+    if sr is not None and sr != r:
+        y = resample(y, r, sr, mode=resample_mode)
+        r = sr
+
+    if normal:
+        y = normalize(y, norm_type, norm_mul_factor)
+    elif dtype in ['int8', 'int16']:
+        # still need to do normalization, before depth conversion
+        y = normalize(y, 'linear', 1.0)
+
+    y = depth_convert(y, dtype)
+    return y, r
+
+
+#The code below is taken from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/soundfile_backend.py, with some modifications.
+
+
+def _get_subtype_for_wav(dtype: paddle.dtype,
+                         encoding: str,
+                         bits_per_sample: int):
+    if not encoding:
+        if not bits_per_sample:
+            subtype = {
+                paddle.uint8: "PCM_U8",
+                paddle.int16: "PCM_16",
+                paddle.int32: "PCM_32",
+                paddle.float32: "FLOAT",
+                paddle.float64: "DOUBLE",
+            }.get(dtype)
+            if not subtype:
+                raise ValueError(f"Unsupported dtype for wav: {dtype}")
+            return subtype
+        if bits_per_sample == 8:
+            return "PCM_U8"
+        return f"PCM_{bits_per_sample}"
+    if encoding == "PCM_S":
+        if not bits_per_sample:
+            return "PCM_32"
+        if bits_per_sample == 8:
+            raise ValueError("wav does not support 8-bit signed PCM encoding.")
+        return f"PCM_{bits_per_sample}"
+    if encoding == "PCM_U":
+        if bits_per_sample in (None, 8):
+            return "PCM_U8"
+        raise ValueError("wav only supports 8-bit unsigned PCM encoding.")
+    if encoding == "PCM_F":
+        if bits_per_sample in (None, 32):
+            return "FLOAT"
+        if bits_per_sample == 64:
+            return "DOUBLE"
+        raise ValueError("wav only supports 32/64-bit float PCM encoding.")
+    if encoding == "ULAW":
+        if bits_per_sample in (None, 8):
+            return "ULAW"
+        raise ValueError("wav only supports 8-bit mu-law encoding.")
+    if encoding == "ALAW":
+        if bits_per_sample in (None, 8):
+            return "ALAW"
+        raise ValueError("wav only supports 8-bit a-law encoding.")
+    raise ValueError(f"wav does not support {encoding}.")
+
+
+def _get_subtype_for_sphere(encoding: str, bits_per_sample: int):
+    if encoding in (None, "PCM_S"):
+        return f"PCM_{bits_per_sample}" if bits_per_sample else "PCM_32"
+    if encoding in ("PCM_U", "PCM_F"):
+        raise ValueError(f"sph does not support {encoding} encoding.")
+    if encoding == "ULAW":
+        if bits_per_sample in (None, 8):
+            return "ULAW"
+        raise ValueError("sph only supports 8-bit for mu-law encoding.")
+    if encoding == "ALAW":
+        return "ALAW"
+    raise ValueError(f"sph does not support {encoding}.")
+
+
+def _get_subtype(dtype: paddle.dtype,
+                 format: str,
+                 encoding: str,
+                 bits_per_sample: int):
+    if format == "wav":
+        return _get_subtype_for_wav(dtype, encoding, bits_per_sample)
+    if format == "flac":
+        if encoding:
+            raise ValueError("flac does not support encoding.")
+        if not bits_per_sample:
+            return "PCM_16"
+        if bits_per_sample > 24:
+            raise ValueError("flac does not support bits_per_sample > 24.")
+        return "PCM_S8" if bits_per_sample == 8 else f"PCM_{bits_per_sample}"
+    if format in ("ogg", "vorbis"):
+        if encoding or bits_per_sample:
+            raise ValueError(
+                "ogg/vorbis does not support encoding/bits_per_sample.")
+        return "VORBIS"
+    if format == "sph":
+        return _get_subtype_for_sphere(encoding, bits_per_sample)
+    if format in ("nis", "nist"):
+        return "PCM_16"
+    raise ValueError(f"Unsupported format: {format}")
+
+
+def save(
+        filepath: str,
+        src: paddle.Tensor,
+        sample_rate: int,
+        channels_first: bool=True,
+        compression: Optional[float]=None,
+        format: Optional[str]=None,
+        encoding: Optional[str]=None,
+        bits_per_sample: Optional[int]=None, ):
+    """Save audio data to file.
+
+    Note:
+        The formats this function can handle depend on the soundfile installation.
+        This function is tested on the following formats;
+
+        * WAV
+
+            * 32-bit floating-point
+            * 32-bit signed integer
+            * 16-bit signed integer
+            * 8-bit unsigned integer
+
+        * FLAC
+        * OGG/VORBIS
+        * SPHERE
+
+    Note:
+        ``filepath`` argument is intentionally annotated as ``str`` only, even though it accepts
+        ``pathlib.Path`` object as well. This is for the consistency with ``"sox_io"`` backend,
+
+    Args:
+        filepath (str or pathlib.Path): Path to audio file.
+        src (paddle.Tensor): Audio data to save. must be 2D tensor.
+        sample_rate (int): sampling rate
+        channels_first (bool, optional): If ``True``, the given tensor is interpreted as `[channel, time]`,
+            otherwise `[time, channel]`.
+        compression (float of None, optional): Not used.
+            It is here only for interface compatibility reason with "sox_io" backend.
+        format (str or None, optional): Override the audio format.
+            When ``filepath`` argument is path-like object, audio format is
+            inferred from file extension. If the file extension is missing or
+            different, you can specify the correct format with this argument.
+
+            When ``filepath`` argument is file-like object,
+            this argument is required.
+
+            Valid values are ``"wav"``, ``"ogg"``, ``"vorbis"``,
+            ``"flac"`` and ``"sph"``.
+        encoding (str or None, optional): Changes the encoding for supported formats.
+            This argument is effective only for supported formats, such as
+            ``"wav"``, ``""flac"`` and ``"sph"``. Valid values are:
+
+                - ``"PCM_S"`` (signed integer Linear PCM)
+                - ``"PCM_U"`` (unsigned integer Linear PCM)
+                - ``"PCM_F"`` (floating point PCM)
+                - ``"ULAW"`` (mu-law)
+                - ``"ALAW"`` (a-law)
+
+        bits_per_sample (int or None, optional): Changes the bit depth for the
+            supported formats.
+            When ``format`` is one of ``"wav"``, ``"flac"`` or ``"sph"``,
+            you can change the bit depth.
+            Valid values are ``8``, ``16``, ``24``, ``32`` and ``64``.
+
+    Supported formats/encodings/bit depth/compression are:
+
+    ``"wav"``
+        - 32-bit floating-point PCM
+        - 32-bit signed integer PCM
+        - 24-bit signed integer PCM
+        - 16-bit signed integer PCM
+        - 8-bit unsigned integer PCM
+        - 8-bit mu-law
+        - 8-bit a-law
+
+        Note:
+            Default encoding/bit depth is determined by the dtype of
+            the input Tensor.
+
+    ``"flac"``
+        - 8-bit
+        - 16-bit (default)
+        - 24-bit
+
+    ``"ogg"``, ``"vorbis"``
+        - Doesn't accept changing configuration.
+
+    ``"sph"``
+        - 8-bit signed integer PCM
+        - 16-bit signed integer PCM
+        - 24-bit signed integer PCM
+        - 32-bit signed integer PCM (default)
+        - 8-bit mu-law
+        - 8-bit a-law
+        - 16-bit a-law
+        - 24-bit a-law
+        - 32-bit a-law
+
+    """
+    if src.ndim != 2:
+        raise ValueError(f"Expected 2D Tensor, got {src.ndim}D.")
+    if compression is not None:
+        warnings.warn(
+            '`save` function of "soundfile" backend does not support "compression" parameter. '
+            "The argument is silently ignored.")
+    if hasattr(filepath, "write"):
+        if format is None:
+            raise RuntimeError(
+                "`format` is required when saving to file object.")
+        ext = format.lower()
+    else:
+        ext = str(filepath).split(".")[-1].lower()
+
+    if bits_per_sample not in (None, 8, 16, 24, 32, 64):
+        raise ValueError("Invalid bits_per_sample.")
+    if bits_per_sample == 24:
+        warnings.warn(
+            "Saving audio with 24 bits per sample might warp samples near -1. "
+            "Using 16 bits per sample might be able to avoid this.")
+    subtype = _get_subtype(src.dtype, ext, encoding, bits_per_sample)
+
+    # sph is a extension used in TED-LIUM but soundfile does not recognize it as NIST format,
+    # so we extend the extensions manually here
+    if ext in ["nis", "nist", "sph"] and format is None:
+        format = "NIST"
+
+    if channels_first:
+        src = src.t()
+
+    soundfile.write(
+        file=filepath,
+        data=src,
+        samplerate=sample_rate,
+        subtype=subtype,
+        format=format)
+
+
+_SUBTYPE2DTYPE = {
+    "PCM_S8": "int8",
+    "PCM_U8": "uint8",
+    "PCM_16": "int16",
+    "PCM_32": "int32",
+    "FLOAT": "float32",
+    "DOUBLE": "float64",
+}
+
+
+def load(
+        filepath: str,
+        frame_offset: int=0,
+        num_frames: int=-1,
+        normalize: bool=True,
+        channels_first: bool=True,
+        format: Optional[str]=None, ) -> Tuple[paddle.Tensor, int]:
+    """Load audio data from file.
+
+    Note:
+        The formats this function can handle depend on the soundfile installation.
+        This function is tested on the following formats;
+
+        * WAV
+
+            * 32-bit floating-point
+            * 32-bit signed integer
+            * 16-bit signed integer
+            * 8-bit unsigned integer
+
+        * FLAC
+        * OGG/VORBIS
+        * SPHERE
+
+    By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with
+    ``float32`` dtype and the shape of `[channel, time]`.
+    The samples are normalized to fit in the range of ``[-1.0, 1.0]``.
+
+    When the input format is WAV with integer type, such as 32-bit signed integer, 16-bit
+    signed integer and 8-bit unsigned integer (24-bit signed integer is not supported),
+    by providing ``normalize=False``, this function can return integer Tensor, where the samples
+    are expressed within the whole range of the corresponding dtype, that is, ``int32`` tensor
+    for 32-bit signed PCM, ``int16`` for 16-bit signed PCM and ``uint8`` for 8-bit unsigned PCM.
+
+    ``normalize`` parameter has no effect on 32-bit floating-point WAV and other formats, such as
+    ``flac`` and ``mp3``.
+    For these formats, this function always returns ``float32`` Tensor with values normalized to
+    ``[-1.0, 1.0]``.
+
+    Note:
+        ``filepath`` argument is intentionally annotated as ``str`` only, even though it accepts
+        ``pathlib.Path`` object as well. This is for the consistency with ``"sox_io"`` backend.
+
+    Args:
+        filepath (path-like object or file-like object):
+            Source of audio data.
+        frame_offset (int, optional):
+            Number of frames to skip before start reading data.
+        num_frames (int, optional):
+            Maximum number of frames to read. ``-1`` reads all the remaining samples,
+            starting from ``frame_offset``.
+            This function may return the less number of frames if there is not enough
+            frames in the given file.
+        normalize (bool, optional):
+            When ``True``, this function always return ``float32``, and sample values are
+            normalized to ``[-1.0, 1.0]``.
+            If input file is integer WAV, giving ``False`` will change the resulting Tensor type to
+            integer type.
+            This argument has no effect for formats other than integer WAV type.
+        channels_first (bool, optional):
+            When True, the returned Tensor has dimension `[channel, time]`.
+            Otherwise, the returned Tensor's dimension is `[time, channel]`.
+        format (str or None, optional):
+            Not used. PySoundFile does not accept format hint.
+
+    Returns:
+        (paddle.Tensor, int): Resulting Tensor and sample rate.
+            If the input file has integer wav format and normalization is off, then it has
+            integer type, else ``float32`` type. If ``channels_first=True``, it has
+            `[channel, time]` else `[time, channel]`.
+    """
+    with soundfile.SoundFile(filepath, "r") as file_:
+        if file_.format != "WAV" or normalize:
+            dtype = "float32"
+        elif file_.subtype not in _SUBTYPE2DTYPE:
+            raise ValueError(f"Unsupported subtype: {file_.subtype}")
+        else:
+            dtype = _SUBTYPE2DTYPE[file_.subtype]
+
+        frames = file_._prepare_read(frame_offset, None, num_frames)
+        waveform = file_.read(frames, dtype, always_2d=True)
+        sample_rate = file_.samplerate
+
+    waveform = paddle.to_tensor(waveform)
+    if channels_first:
+        waveform = paddle.transpose(waveform, perm=[1, 0])
+    return waveform, sample_rate
+
+
+# Mapping from soundfile subtype to number of bits per sample.
+# This is mostly heuristical and the value is set to 0 when it is irrelevant
+# (lossy formats) or when it can't be inferred.
+# For ADPCM (and G72X) subtypes, it's hard to infer the bit depth because it's not part of the standard:
+# According to https://en.wikipedia.org/wiki/Adaptive_differential_pulse-code_modulation#In_telephony,
+# the default seems to be 8 bits but it can be compressed further to 4 bits.
+# The dict is inspired from
+# https://github.com/bastibe/python-soundfile/blob/744efb4b01abc72498a96b09115b42a4cabd85e4/soundfile.py#L66-L94
+_SUBTYPE_TO_BITS_PER_SAMPLE = {
+    "PCM_S8": 8,  # Signed 8 bit data
+    "PCM_16": 16,  # Signed 16 bit data
+    "PCM_24": 24,  # Signed 24 bit data
+    "PCM_32": 32,  # Signed 32 bit data
+    "PCM_U8": 8,  # Unsigned 8 bit data (WAV and RAW only)
+    "FLOAT": 32,  # 32 bit float data
+    "DOUBLE": 64,  # 64 bit float data
+    "ULAW": 8,  # U-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types
+    "ALAW": 8,  # A-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types
+    "IMA_ADPCM": 0,  # IMA ADPCM.
+    "MS_ADPCM": 0,  # Microsoft ADPCM.
+    "GSM610":
+    0,  # GSM 6.10 encoding. (Wikipedia says 1.625 bit depth?? https://en.wikipedia.org/wiki/Full_Rate)
+    "VOX_ADPCM": 0,  # OKI / Dialogix ADPCM
+    "G721_32": 0,  # 32kbs G721 ADPCM encoding.
+    "G723_24": 0,  # 24kbs G723 ADPCM encoding.
+    "G723_40": 0,  # 40kbs G723 ADPCM encoding.
+    "DWVW_12": 12,  # 12 bit Delta Width Variable Word encoding.
+    "DWVW_16": 16,  # 16 bit Delta Width Variable Word encoding.
+    "DWVW_24": 24,  # 24 bit Delta Width Variable Word encoding.
+    "DWVW_N": 0,  # N bit Delta Width Variable Word encoding.
+    "DPCM_8": 8,  # 8 bit differential PCM (XI only)
+    "DPCM_16": 16,  # 16 bit differential PCM (XI only)
+    "VORBIS": 0,  # Xiph Vorbis encoding. (lossy)
+    "ALAC_16": 16,  # Apple Lossless Audio Codec (16 bit).
+    "ALAC_20": 20,  # Apple Lossless Audio Codec (20 bit).
+    "ALAC_24": 24,  # Apple Lossless Audio Codec (24 bit).
+    "ALAC_32": 32,  # Apple Lossless Audio Codec (32 bit).
+}
+
+
+def _get_bit_depth(subtype):
+    if subtype not in _SUBTYPE_TO_BITS_PER_SAMPLE:
+        warnings.warn(
+            f"The {subtype} subtype is unknown to PaddleAudio. As a result, the bits_per_sample "
+            "attribute will be set to 0. If you are seeing this warning, please "
+            "report by opening an issue on github (after checking for existing/closed ones). "
+            "You may otherwise ignore this warning.")
+    return _SUBTYPE_TO_BITS_PER_SAMPLE.get(subtype, 0)
+
+
+_SUBTYPE_TO_ENCODING = {
+    "PCM_S8": "PCM_S",
+    "PCM_16": "PCM_S",
+    "PCM_24": "PCM_S",
+    "PCM_32": "PCM_S",
+    "PCM_U8": "PCM_U",
+    "FLOAT": "PCM_F",
+    "DOUBLE": "PCM_F",
+    "ULAW": "ULAW",
+    "ALAW": "ALAW",
+    "VORBIS": "VORBIS",
+}
+
+
+def _get_encoding(format: str, subtype: str):
+    if format == "FLAC":
+        return "FLAC"
+    return _SUBTYPE_TO_ENCODING.get(subtype, "UNKNOWN")
+
+
+def info(filepath: str, format: Optional[str]=None) -> AudioInfo:
+    """Get signal information of an audio file.
+
+    Note:
+        ``filepath`` argument is intentionally annotated as ``str`` only, even though it accepts
+        ``pathlib.Path`` object as well. This is for the consistency with ``"sox_io"`` backend,
+
+    Args:
+        filepath (path-like object or file-like object):
+            Source of audio data.
+        format (str or None, optional):
+            Not used. PySoundFile does not accept format hint.
+
+    Returns:
+        AudioInfo: meta data of the given audio.
+
+    """
+    sinfo = soundfile.info(filepath)
+    return AudioInfo(
+        sinfo.samplerate,
+        sinfo.frames,
+        sinfo.channels,
+        bits_per_sample=_get_bit_depth(sinfo.subtype),
+        encoding=_get_encoding(sinfo.format, sinfo.subtype), )
diff --git a/paddlespeech/audio/streamdata/tariterators.py b/paddlespeech/audio/streamdata/tariterators.py
index 3adf4892a..8429e6f77 100644
--- a/paddlespeech/audio/streamdata/tariterators.py
+++ b/paddlespeech/audio/streamdata/tariterators.py
@@ -20,9 +20,9 @@ trace = False
 meta_prefix = "__"
 meta_suffix = "__"
 
-import paddleaudio
 import paddle
 import numpy as np
+from paddlespeech.audio.backends import soundfile_load
 
 AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])
 
@@ -111,7 +111,7 @@ def tar_file_iterator(fileobj,
             assert pos > 0
             prefix, postfix = name[:pos], name[pos + 1:]
             if postfix == 'wav':
-                waveform, sample_rate = paddleaudio.backends.soundfile_load(
+                waveform, sample_rate = soundfile_load(
                     stream.extractfile(tarinfo), normal=False)
                 result = dict(
                     fname=prefix, wav=waveform, sample_rate=sample_rate)
@@ -163,7 +163,7 @@ def tar_file_and_group_iterator(fileobj,
                 if postfix == 'txt':
                     example['txt'] = file_obj.read().decode('utf8').strip()
                 elif postfix in AUDIO_FORMAT_SETS:
-                    waveform, sample_rate = paddleaudio.backends.soundfile_load(
+                    waveform, sample_rate = soundfile_load(
                         file_obj, normal=False)
                     waveform = paddle.to_tensor(
                         np.expand_dims(np.array(waveform), 0),
diff --git a/paddlespeech/audio/transform/spectrogram.py b/paddlespeech/audio/transform/spectrogram.py
index f2dab3169..a4da86ec7 100644
--- a/paddlespeech/audio/transform/spectrogram.py
+++ b/paddlespeech/audio/transform/spectrogram.py
@@ -15,9 +15,10 @@
 import librosa
 import numpy as np
 import paddle
-from paddleaudio.compliance import kaldi
 from python_speech_features import logfbank
 
+from paddlespeech.audio.compliance import kaldi
+
 
 def stft(x,
          n_fft,
diff --git a/paddlespeech/cli/cls/infer.py b/paddlespeech/cli/cls/infer.py
index 5e2168e3d..fa49f7bdb 100644
--- a/paddlespeech/cli/cls/infer.py
+++ b/paddlespeech/cli/cls/infer.py
@@ -22,11 +22,11 @@ import numpy as np
 import paddle
 import yaml
 from paddle.audio.features import LogMelSpectrogram
-from paddleaudio.backends import soundfile_load as load
 
 from ..executor import BaseExecutor
 from ..log import logger
 from ..utils import stats_wrapper
+from paddlespeech.audio.backends import soundfile_load as load
 
 __all__ = ['CLSExecutor']
 
diff --git a/paddlespeech/cli/kws/infer.py b/paddlespeech/cli/kws/infer.py
index ce2f3f461..6dee4cc84 100644
--- a/paddlespeech/cli/kws/infer.py
+++ b/paddlespeech/cli/kws/infer.py
@@ -20,12 +20,12 @@ from typing import Union
 
 import paddle
 import yaml
-from paddleaudio.backends import soundfile_load as load_audio
-from paddleaudio.compliance.kaldi import fbank as kaldi_fbank
 
 from ..executor import BaseExecutor
 from ..log import logger
 from ..utils import stats_wrapper
+from paddlespeech.audio.backends import soundfile_load as load_audio
+from paddlespeech.audio.compliance.kaldi import fbank as kaldi_fbank
 
 __all__ = ['KWSExecutor']
 
diff --git a/paddlespeech/cli/vector/infer.py b/paddlespeech/cli/vector/infer.py
index 57a781656..c4ae11c75 100644
--- a/paddlespeech/cli/vector/infer.py
+++ b/paddlespeech/cli/vector/infer.py
@@ -22,13 +22,13 @@ from typing import Union
 
 import paddle
 import soundfile
-from paddleaudio.backends import soundfile_load as load_audio
-from paddleaudio.compliance.librosa import melspectrogram
 from yacs.config import CfgNode
 
 from ..executor import BaseExecutor
 from ..log import logger
 from ..utils import stats_wrapper
+from paddlespeech.audio.backends import soundfile_load as load_audio
+from paddlespeech.audio.compliance.librosa import melspectrogram
 from paddlespeech.vector.io.batch import feature_normalize
 from paddlespeech.vector.modules.sid_model import SpeakerIdetification
 
diff --git a/paddlespeech/cls/exps/panns/deploy/predict.py b/paddlespeech/cls/exps/panns/deploy/predict.py
index a6b735335..3085a8482 100644
--- a/paddlespeech/cls/exps/panns/deploy/predict.py
+++ b/paddlespeech/cls/exps/panns/deploy/predict.py
@@ -19,10 +19,10 @@ import paddle
 from paddle import inference
 from paddle.audio.datasets import ESC50
 from paddle.audio.features import LogMelSpectrogram
-from paddleaudio.backends import soundfile_load as load_audio
 from scipy.special import softmax
 
 import paddlespeech.utils
+from paddlespeech.audio.backends import soundfile_load as load_audio
 
 # yapf: disable
 parser = argparse.ArgumentParser()
diff --git a/paddlespeech/cls/exps/panns/export_model.py b/paddlespeech/cls/exps/panns/export_model.py
index e860b54aa..5163dbacf 100644
--- a/paddlespeech/cls/exps/panns/export_model.py
+++ b/paddlespeech/cls/exps/panns/export_model.py
@@ -15,8 +15,8 @@ import argparse
 import os
 
 import paddle
-from paddleaudio.datasets import ESC50
 
+from paddlespeech.audio.datasets import ESC50
 from paddlespeech.cls.models import cnn14
 from paddlespeech.cls.models import SoundClassifier
 
diff --git a/paddlespeech/cls/exps/panns/predict.py b/paddlespeech/cls/exps/panns/predict.py
index 4681e4dc9..6b0eb9f68 100644
--- a/paddlespeech/cls/exps/panns/predict.py
+++ b/paddlespeech/cls/exps/panns/predict.py
@@ -18,12 +18,11 @@ import paddle
 import paddle.nn.functional as F
 import yaml
 from paddle.audio.features import LogMelSpectrogram
-from paddleaudio.backends import soundfile_load as load_audio
-from paddleaudio.utils import logger
 
+from paddlespeech.audio.backends import soundfile_load as load_audio
+from paddlespeech.audio.utils import logger
 from paddlespeech.cls.models import SoundClassifier
 from paddlespeech.utils.dynamic_import import dynamic_import
-#from paddleaudio.features import LogMelSpectrogram
 
 # yapf: disable
 parser = argparse.ArgumentParser(__doc__)
diff --git a/paddlespeech/cls/exps/panns/train.py b/paddlespeech/cls/exps/panns/train.py
index b768919be..5e5e0809d 100644
--- a/paddlespeech/cls/exps/panns/train.py
+++ b/paddlespeech/cls/exps/panns/train.py
@@ -17,9 +17,9 @@ import os
 import paddle
 import yaml
 from paddle.audio.features import LogMelSpectrogram
-from paddleaudio.utils import logger
-from paddleaudio.utils import Timer
 
+from paddlespeech.audio.utils import logger
+from paddlespeech.audio.utils import Timer
 from paddlespeech.cls.models import SoundClassifier
 from paddlespeech.utils.dynamic_import import dynamic_import
 
diff --git a/paddlespeech/cls/models/panns/panns.py b/paddlespeech/cls/models/panns/panns.py
index 6f9af9b52..37deae80c 100644
--- a/paddlespeech/cls/models/panns/panns.py
+++ b/paddlespeech/cls/models/panns/panns.py
@@ -15,8 +15,8 @@ import os
 
 import paddle.nn as nn
 import paddle.nn.functional as F
-from paddleaudio.utils.download import load_state_dict_from_url
 
+from paddlespeech.audio.utils.download import load_state_dict_from_url
 from paddlespeech.utils.env import MODEL_HOME
 
 __all__ = ['CNN14', 'CNN10', 'CNN6', 'cnn14', 'cnn10', 'cnn6']
diff --git a/paddlespeech/kws/exps/mdtc/train.py b/paddlespeech/kws/exps/mdtc/train.py
index bb727d36a..d5bb5e020 100644
--- a/paddlespeech/kws/exps/mdtc/train.py
+++ b/paddlespeech/kws/exps/mdtc/train.py
@@ -14,10 +14,10 @@
 import os
 
 import paddle
-from paddleaudio.utils import logger
-from paddleaudio.utils import Timer
 from yacs.config import CfgNode
 
+from paddlespeech.audio.utils import logger
+from paddlespeech.audio.utils import Timer
 from paddlespeech.kws.exps.mdtc.collate import collate_features
 from paddlespeech.kws.models.loss import max_pooling_loss
 from paddlespeech.kws.models.mdtc import KWSModel
diff --git a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py
index 22329d5e0..ac5720fd5 100644
--- a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py
+++ b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py
@@ -14,10 +14,11 @@
 """Contains the audio featurizer class."""
 import numpy as np
 import paddle
-import paddleaudio.compliance.kaldi as kaldi
 from python_speech_features import delta
 from python_speech_features import mfcc
 
+import paddlespeech.audio.compliance.kaldi as kaldi
+
 
 class AudioFeaturizer():
     """Audio featurizer, for extracting features from audio contents of
diff --git a/paddlespeech/s2t/modules/fbank.py b/paddlespeech/s2t/modules/fbank.py
index 30671c274..8d76a4727 100644
--- a/paddlespeech/s2t/modules/fbank.py
+++ b/paddlespeech/s2t/modules/fbank.py
@@ -1,7 +1,7 @@
 import paddle
 from paddle import nn
-from paddleaudio.compliance import kaldi
 
+from paddlespeech.audio.compliance import kaldi
 from paddlespeech.s2t.utils.log import Log
 
 logger = Log(__name__).getlog()
diff --git a/paddlespeech/server/engine/vector/python/vector_engine.py b/paddlespeech/server/engine/vector/python/vector_engine.py
index 7d86f3df7..f02a942fb 100644
--- a/paddlespeech/server/engine/vector/python/vector_engine.py
+++ b/paddlespeech/server/engine/vector/python/vector_engine.py
@@ -16,9 +16,9 @@ from collections import OrderedDict
 
 import numpy as np
 import paddle
-from paddleaudio.backends import soundfile_load as load_audio
-from paddleaudio.compliance.librosa import melspectrogram
 
+from paddlespeech.audio.backends import soundfile_load as load_audio
+from paddlespeech.audio.compliance.librosa import melspectrogram
 from paddlespeech.cli.log import logger
 from paddlespeech.cli.vector.infer import VectorExecutor
 from paddlespeech.server.engine.base_engine import BaseEngine
diff --git a/paddlespeech/server/util.py b/paddlespeech/server/util.py
index 6aa6fd589..47871922b 100644
--- a/paddlespeech/server/util.py
+++ b/paddlespeech/server/util.py
@@ -24,13 +24,13 @@ from typing import Any
 from typing import Dict
 
 import paddle
-import paddleaudio
 import requests
 import yaml
 from paddle.framework import load
 
 from .entry import client_commands
 from .entry import server_commands
+from paddlespeech.audio.backends import soundfile_load
 from paddlespeech.cli import download
 try:
     from .. import __version__
@@ -289,7 +289,7 @@ def _note_one_stat(cls_name, params={}):
 
     if 'audio_file' in params:
         try:
-            _, sr = paddleaudio.backends.soundfile_load(params['audio_file'])
+            _, sr = soundfile_load(params['audio_file'])
         except Exception:
             sr = -1
 
diff --git a/paddlespeech/t2s/models/starganv2_vc/AuxiliaryASR/layers.py b/paddlespeech/t2s/models/starganv2_vc/AuxiliaryASR/layers.py
index 5901c805a..b29d0863e 100644
--- a/paddlespeech/t2s/models/starganv2_vc/AuxiliaryASR/layers.py
+++ b/paddlespeech/t2s/models/starganv2_vc/AuxiliaryASR/layers.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 import paddle
 import paddle.nn.functional as F
-import paddleaudio.functional as audio_F
 from paddle import nn
 
+from paddlespeech.audio.functional import create_dct
 from paddlespeech.utils.initialize import _calculate_gain
 from paddlespeech.utils.initialize import xavier_uniform_
 
@@ -243,7 +243,7 @@ class MFCC(nn.Layer):
         self.n_mfcc = n_mfcc
         self.n_mels = n_mels
         self.norm = 'ortho'
-        dct_mat = audio_F.create_dct(self.n_mfcc, self.n_mels, self.norm)
+        dct_mat = create_dct(self.n_mfcc, self.n_mels, self.norm)
         self.register_buffer('dct_mat', dct_mat)
 
     def forward(self, mel_specgram: paddle.Tensor):
diff --git a/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py b/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py
index 821b1deed..a2a19cb66 100644
--- a/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py
+++ b/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py
@@ -16,10 +16,10 @@ import os
 import time
 
 import paddle
-from paddleaudio.backends import soundfile_load as load_audio
-from paddleaudio.compliance.librosa import melspectrogram
 from yacs.config import CfgNode
 
+from paddlespeech.audio.backends import soundfile_load as load_audio
+from paddlespeech.audio.compliance.librosa import melspectrogram
 from paddlespeech.s2t.utils.log import Log
 from paddlespeech.vector.io.batch import feature_normalize
 from paddlespeech.vector.models.ecapa_tdnn import EcapaTdnn
diff --git a/paddlespeech/vector/exps/ecapa_tdnn/test.py b/paddlespeech/vector/exps/ecapa_tdnn/test.py
index f15dbf9b7..167b82422 100644
--- a/paddlespeech/vector/exps/ecapa_tdnn/test.py
+++ b/paddlespeech/vector/exps/ecapa_tdnn/test.py
@@ -18,7 +18,7 @@ import numpy as np
 import paddle
 from paddle.io import BatchSampler
 from paddle.io import DataLoader
-from paddleaudio.metric import compute_eer
+from sklearn.metrics import roc_curve
 from tqdm import tqdm
 from yacs.config import CfgNode
 
@@ -129,6 +129,23 @@ def compute_verification_scores(id2embedding, train_cohort, config):
     return scores, labels
 
 
+def compute_eer(labels: np.ndarray, scores: np.ndarray) -> List[float]:
+    """Compute EER and return score threshold.
+
+    Args:
+        labels (np.ndarray): the trial label, shape: [N], one-dimension, N refer to the samples num
+        scores (np.ndarray): the trial scores, shape: [N], one-dimension, N refer to the samples num
+
+    Returns:
+        List[float]: eer and the specific threshold
+    """
+    fpr, tpr, threshold = roc_curve(y_true=labels, y_score=scores)
+    fnr = 1 - tpr
+    eer_threshold = threshold[np.nanargmin(np.absolute((fnr - fpr)))]
+    eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
+    return eer, eer_threshold
+
+
 def main(args, config):
     """The main process for test the speaker verification model
 
diff --git a/paddlespeech/vector/exps/ecapa_tdnn/train.py b/paddlespeech/vector/exps/ecapa_tdnn/train.py
index 2dc7a7164..3966a900d 100644
--- a/paddlespeech/vector/exps/ecapa_tdnn/train.py
+++ b/paddlespeech/vector/exps/ecapa_tdnn/train.py
@@ -20,9 +20,9 @@ import paddle
 from paddle.io import BatchSampler
 from paddle.io import DataLoader
 from paddle.io import DistributedBatchSampler
-from paddleaudio.compliance.librosa import melspectrogram
 from yacs.config import CfgNode
 
+from paddlespeech.audio.compliance.librosa import melspectrogram
 from paddlespeech.s2t.utils.log import Log
 from paddlespeech.vector.io.augment import build_augment_pipeline
 from paddlespeech.vector.io.augment import waveform_augment
diff --git a/paddlespeech/vector/io/dataset.py b/paddlespeech/vector/io/dataset.py
index dff8ad9fd..ae5c83637 100644
--- a/paddlespeech/vector/io/dataset.py
+++ b/paddlespeech/vector/io/dataset.py
@@ -15,9 +15,9 @@ from dataclasses import dataclass
 from dataclasses import fields
 
 from paddle.io import Dataset
-from paddleaudio.backends import soundfile_load as load_audio
-from paddleaudio.compliance.librosa import melspectrogram
 
+from paddlespeech.audio.backends import soundfile_load as load_audio
+from paddlespeech.audio.compliance.librosa import melspectrogram
 from paddlespeech.s2t.utils.log import Log
 logger = Log(__name__).getlog()
 
diff --git a/paddlespeech/vector/io/dataset_from_json.py b/paddlespeech/vector/io/dataset_from_json.py
index 852f39a94..1d1a4ad9c 100644
--- a/paddlespeech/vector/io/dataset_from_json.py
+++ b/paddlespeech/vector/io/dataset_from_json.py
@@ -16,9 +16,10 @@ from dataclasses import dataclass
 from dataclasses import fields
 
 from paddle.io import Dataset
-from paddleaudio.backends import soundfile_load as load_audio
-from paddleaudio.compliance.librosa import melspectrogram
-from paddleaudio.compliance.librosa import mfcc
+
+from paddlespeech.audio.backends import soundfile_load as load_audio
+from paddlespeech.audio.compliance.librosa import melspectrogram
+from paddlespeech.audio.compliance.librosa import mfcc
 
 
 @dataclass
diff --git a/setup.py b/setup.py
index 8c2a4c1b7..5996ff178 100644
--- a/setup.py
+++ b/setup.py
@@ -99,7 +99,6 @@ base = [
     determine_opencc_version(),  # opencc or opencc==1.1.6
     "opencc-python-reimplemented",
     "pandas",
-    "paddleaudio>=1.1.0",
     "paddlenlp>=2.4.8",
     "paddleslim>=2.3.4",
     "ppdiffusers>=0.9.0",
@@ -122,6 +121,9 @@ base = [
     "webrtcvad",
     "yacs>=0.1.8",
     "zhon",
+    "scikit-learn",
+    "pathos",
+    "kaldiio",
 ]
 
 server = ["pattern_singleton", "websockets"]
diff --git a/tests/unit/audiotools/core/test_audio_signal.py b/tests/unit/audiotools/core/test_audio_signal.py
index 0e82ae9d5..19575828c 100644
--- a/tests/unit/audiotools/core/test_audio_signal.py
+++ b/tests/unit/audiotools/core/test_audio_signal.py
@@ -26,14 +26,14 @@ def test_io():
         signal_from_file = AudioSignal(f.name)
 
     mp3_signal = AudioSignal(audio_path.replace("wav", "mp3"))
-    print(mp3_signal)
 
     assert signal == signal_from_file
-    print(signal)
-    print(signal.markdown())
 
     mp3_signal = AudioSignal.excerpt(
         audio_path.replace("wav", "mp3"), offset=5, duration=5)
+
+    assert mp3_signal.sample_rate == 44100
+    assert mp3_signal.signal_length == 220500
     assert mp3_signal.signal_duration == 5.0
     assert mp3_signal.duration == 5.0
     assert mp3_signal.length == mp3_signal.signal_length

From 367b665ca17a209b3c0ab6746efa137c1d77d7b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?=
 <83450930+Liyulingyue@users.noreply.github.com>
Date: Mon, 17 Feb 2025 17:25:11 +0800
Subject: [PATCH 14/46] lower the install requirements (#3985)

* lower the install requirements

* Update setup.py
---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 5996ff178..881b06deb 100644
--- a/setup.py
+++ b/setup.py
@@ -54,8 +54,8 @@ def determine_opencc_version():
 
     # determine opencc version
     if gcc_version:
-        if int(gcc_version.split(".")[0]) <= 9:
-            return "opencc==1.1.6"  # GCC<=9 need opencc==1.1.6
+        if int(gcc_version.split(".")[0]) < 9:
+            return "opencc==1.1.6"  # GCC<9 need opencc==1.1.6
     return "opencc"  # default
 
 

From 793a89d53c8904103488ab806b255f2a5467ea86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?=
 <83450930+Liyulingyue@users.noreply.github.com>
Date: Thu, 20 Feb 2025 20:30:36 +0800
Subject: [PATCH 15/46] fit with librosa (#3989)

* fit with librosa

* Update base_commands.py

* Apply suggestions from code review

* Apply suggestions from code review
---
 paddlespeech/cli/base_commands.py | 3 +++
 setup.py                          | 5 ++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/paddlespeech/cli/base_commands.py b/paddlespeech/cli/base_commands.py
index dfeb5cae5..fb5a190ed 100644
--- a/paddlespeech/cli/base_commands.py
+++ b/paddlespeech/cli/base_commands.py
@@ -122,6 +122,9 @@ class StatsCommand:
                 elif "multilingual" in key:
                     line[4], line[1] = line[1].split("_")[0], line[1].split(
                         "_")[1:]
+                # Avoid having arrays within the elements of the input parameters when passing them to numpy.array
+                if type(line[1]) is list:
+                    line[1] = "/".join(line[1])
                 tmp = numpy.array(line)
                 idx = [0, 5, 3, 4, 1, 2]
                 line = tmp[idx]
diff --git a/setup.py b/setup.py
index 881b06deb..71e7aaf2a 100644
--- a/setup.py
+++ b/setup.py
@@ -88,9 +88,8 @@ base = [
     "hyperpyyaml",
     "inflect",
     "jsonlines",
-    # paddleaudio align with librosa==0.8.1, which need numpy==1.23.x
-    "numpy==1.23.5",
-    "librosa==0.8.1",
+    "numpy",
+    "librosa",
     determine_scipy_version(),  # scipy or scipy>=1.4.0, <=1.12.0
     "loguru",
     determine_matplotlib_version(),  # matplotlib or matplotlib<=3.8.4

From afa6f12ba14d6f7abddbc6faaa93dbfcc9581033 Mon Sep 17 00:00:00 2001
From: zxcd <228587199@qq.com>
Date: Wed, 26 Feb 2025 11:16:14 +0800
Subject: [PATCH 16/46] paddlespeech/audiotools/ml/basemodel.py (#3994)

---
 paddlespeech/audiotools/ml/basemodel.py | 37 ++++++++++---------------
 1 file changed, 14 insertions(+), 23 deletions(-)

diff --git a/paddlespeech/audiotools/ml/basemodel.py b/paddlespeech/audiotools/ml/basemodel.py
index 97c31ff7a..2d5683266 100644
--- a/paddlespeech/audiotools/ml/basemodel.py
+++ b/paddlespeech/audiotools/ml/basemodel.py
@@ -110,7 +110,8 @@ class BaseModel(nn.Layer):
             state_dict = {"state_dict": self.state_dict(), "metadata": metadata}
             paddle.save(state_dict, str(path))
         else:
-            self._save_package(path, intern=intern, extern=extern, mock=mock)
+            raise NotImplementedError(
+                "Currently Paddle does not support packaging")
 
         return path
 
@@ -151,31 +152,21 @@ class BaseModel(nn.Layer):
         BaseModel
             A model that inherits from BaseModel.
         """
-        try:
-            model = cls._load_package(location, package_name=package_name)
-        except:
-            model_dict = paddle.load(location)
-            metadata = model_dict["metadata"]
-            metadata["kwargs"].update(kwargs)
-
-            sig = inspect.signature(cls)
-            class_keys = list(sig.parameters.keys())
-            for k in list(metadata["kwargs"].keys()):
-                if k not in class_keys:
-                    metadata["kwargs"].pop(k)
-
-            model = cls(*args, **metadata["kwargs"])
-            model.set_state_dict(model_dict["state_dict"])
-            model.metadata = metadata
+        model_dict = paddle.load(location)
+        metadata = model_dict["metadata"]
+        metadata["kwargs"].update(kwargs)
 
-        return model
+        sig = inspect.signature(cls)
+        class_keys = list(sig.parameters.keys())
+        for k in list(metadata["kwargs"].keys()):
+            if k not in class_keys:
+                metadata["kwargs"].pop(k)
 
-    def _save_package(self, path, intern=[], extern=[], mock=[], **kwargs):
-        raise NotImplementedError("Currently Paddle does not support packaging")
+        model = cls(*args, **metadata["kwargs"])
+        model.set_state_dict(model_dict["state_dict"])
+        model.metadata = metadata
 
-    @classmethod
-    def _load_package(cls, path, package_name=None):
-        raise NotImplementedError("Currently Paddle does not support packaging")
+        return model
 
     def save_to_folder(
             self,

From d7bf91561d5a8a025f3cfc4bd7b28368fd98d102 Mon Sep 17 00:00:00 2001
From: cchenhaifeng <134115991+cchenhaifeng@users.noreply.github.com>
Date: Wed, 26 Feb 2025 16:46:34 +0800
Subject: [PATCH 17/46] =?UTF-8?q?=E3=80=90Hackathon=208th=20No.9=E3=80=91?=
 =?UTF-8?q?=E5=9C=A8=20PaddleSpeech=20=E4=B8=AD=E5=A4=8D=E7=8E=B0=20DAC=20?=
 =?UTF-8?q?=E8=AE=AD=E7=BB=83=E9=9C=80=E8=A6=81=E7=94=A8=E5=88=B0=E7=9A=84?=
 =?UTF-8?q?=20loss=20(#3988)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add DAC loss

* fix bug

* fix codestyle

* fix codestyle

* fix codestyle

* fix codestyle

* fix codestyle

* fix codestyle
---
 paddlespeech/__init__.py                 |   4 -
 paddlespeech/audiotools/core/__init__.py |   4 +-
 paddlespeech/audiotools/core/_julius.py  |   3 +-
 paddlespeech/audiotools/core/util.py     |   8 +-
 paddlespeech/t2s/modules/losses.py       | 279 +++++++++++++++++++++++
 tests/unit/audiotools/core/test_util.py  |   5 +-
 tests/unit/audiotools/test_audiotools.sh |   1 -
 tests/unit/ci.sh                         |   2 +
 tests/unit/tts/test_losses.py            |  61 +++++
 9 files changed, 351 insertions(+), 16 deletions(-)
 create mode 100644 tests/unit/tts/test_losses.py

diff --git a/paddlespeech/__init__.py b/paddlespeech/__init__.py
index 969d189f5..6c7e75c1f 100644
--- a/paddlespeech/__init__.py
+++ b/paddlespeech/__init__.py
@@ -13,7 +13,3 @@
 # limitations under the License.
 import _locale
 _locale._getdefaultlocale = (lambda *args: ['en_US', 'utf8'])
-
-__version__ = '0.0.0'
-
-__commit__ = '9cf8c1985a98bb380c183116123672976bdfe5c9'
diff --git a/paddlespeech/audiotools/core/__init__.py b/paddlespeech/audiotools/core/__init__.py
index 609d6a34a..3443a7676 100644
--- a/paddlespeech/audiotools/core/__init__.py
+++ b/paddlespeech/audiotools/core/__init__.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from . import util
-from ._julius import fft_conv1d
-from ._julius import FFTConv1D
+from ...t2s.modules import fft_conv1d
+from ...t2s.modules import FFTConv1D
 from ._julius import highpass_filter
 from ._julius import highpass_filters
 from ._julius import lowpass_filter
diff --git a/paddlespeech/audiotools/core/_julius.py b/paddlespeech/audiotools/core/_julius.py
index aef51f98f..113475cdd 100644
--- a/paddlespeech/audiotools/core/_julius.py
+++ b/paddlespeech/audiotools/core/_julius.py
@@ -20,8 +20,6 @@ import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 
-from paddlespeech.t2s.modules import fft_conv1d
-from paddlespeech.t2s.modules import FFTConv1D
 from paddlespeech.utils import satisfy_paddle_version
 
 __all__ = [
@@ -312,6 +310,7 @@ class LowPassFilters(nn.Layer):
                 mode="replicate",
                 data_format="NCL")
         if self.fft:
+            from paddlespeech.t2s.modules import fft_conv1d
             out = fft_conv1d(_input, self.filters, stride=self.stride)
         else:
             out = F.conv1d(_input, self.filters, stride=self.stride)
diff --git a/paddlespeech/audiotools/core/util.py b/paddlespeech/audiotools/core/util.py
index 6da927a6f..676d57704 100644
--- a/paddlespeech/audiotools/core/util.py
+++ b/paddlespeech/audiotools/core/util.py
@@ -32,7 +32,6 @@ import soundfile
 from flatten_dict import flatten
 from flatten_dict import unflatten
 
-from .audio_signal import AudioSignal
 from paddlespeech.utils import satisfy_paddle_version
 from paddlespeech.vector.training.seeding import seed_everything
 
@@ -232,8 +231,7 @@ def ensure_tensor(
 
 def _get_value(other):
     # 
-    from . import AudioSignal
-
+    from .audio_signal import AudioSignal
     if isinstance(other, AudioSignal):
         return other.audio_data
     return other
@@ -784,6 +782,8 @@ def collate(list_of_dicts: list, n_splits: int=None):
         Dictionary containing batched data.
     """
 
+    from .audio_signal import AudioSignal
+
     batches = []
     list_len = len(list_of_dicts)
 
@@ -873,7 +873,7 @@ def generate_chord_dataset(
 
     """
     import librosa
-    from . import AudioSignal
+    from .audio_signal import AudioSignal
     from ..data.preprocess import create_csv
 
     min_midi = librosa.note_to_midi(min_note)
diff --git a/paddlespeech/t2s/modules/losses.py b/paddlespeech/t2s/modules/losses.py
index f819352d6..a1a65a9dc 100644
--- a/paddlespeech/t2s/modules/losses.py
+++ b/paddlespeech/t2s/modules/losses.py
@@ -12,7 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import math
+from typing import Callable
+from typing import List
+from typing import Optional
 from typing import Tuple
+from typing import Union
 
 import librosa
 import numpy as np
@@ -23,6 +27,8 @@ from scipy import signal
 from scipy.stats import betabinom
 from typeguard import typechecked
 
+from paddlespeech.audiotools.core.audio_signal import AudioSignal
+from paddlespeech.audiotools.core.audio_signal import STFTParams
 from paddlespeech.t2s.modules.nets_utils import make_non_pad_mask
 from paddlespeech.t2s.modules.predictor.duration_predictor import (
     DurationPredictorLoss,  # noqa: H301
@@ -1326,3 +1332,276 @@ class ForwardSumLoss(nn.Layer):
             bb_prior[bidx, :T, :N] = prob
 
         return bb_prior
+
+
+class MultiScaleSTFTLoss(nn.Layer):
+    """Computes the multi-scale STFT loss from [1].
+
+    References
+    ----------
+
+    1.  Engel, Jesse, Chenjie Gu, and Adam Roberts.
+        "DDSP: Differentiable Digital Signal Processing."
+        International Conference on Learning Representations. 2019.
+
+    Implementation copied from: https://github.com/descriptinc/audiotools/blob/master/audiotools/metrics/spectral.py
+    """
+
+    def __init__(
+            self,
+            window_lengths: List[int]=[2048, 512],
+            loss_fn: Callable=nn.L1Loss(),
+            clamp_eps: float=1e-5,
+            mag_weight: float=1.0,
+            log_weight: float=1.0,
+            pow: float=2.0,
+            weight: float=1.0,
+            match_stride: bool=False,
+            window_type: Optional[str]=None, ):
+        """
+        Args:
+            window_lengths : List[int], optional
+                Length of each window of each STFT, by default [2048, 512]
+            loss_fn : typing.Callable, optional
+                How to compare each loss, by default nn.L1Loss()
+            clamp_eps : float, optional
+                Clamp on the log magnitude, below, by default 1e-5
+            mag_weight : float, optional
+                Weight of raw magnitude portion of loss, by default 1.0
+            log_weight : float, optional
+                Weight of log magnitude portion of loss, by default 1.0
+            pow : float, optional
+                Power to raise magnitude to before taking log, by default 2.0
+            weight : float, optional
+                Weight of this loss, by default 1.0
+            match_stride : bool, optional
+                Whether to match the stride of convolutional layers, by default False
+            window_type : str, optional
+                Type of window to use, by default None.
+        """
+        super().__init__()
+
+        self.stft_params = [
+            STFTParams(
+                window_length=w,
+                hop_length=w // 4,
+                match_stride=match_stride,
+                window_type=window_type, ) for w in window_lengths
+        ]
+        self.loss_fn = loss_fn
+        self.log_weight = log_weight
+        self.mag_weight = mag_weight
+        self.clamp_eps = clamp_eps
+        self.weight = weight
+        self.pow = pow
+
+    def forward(self, x: AudioSignal, y: AudioSignal):
+        """Computes multi-scale STFT between an estimate and a reference
+        signal.
+
+        Args:
+            x : AudioSignal
+                Estimate signal
+            y : AudioSignal
+                Reference signal
+
+        Returns:
+            paddle.Tensor
+                Multi-scale STFT loss.
+        
+        Example:
+            >>> from paddlespeech.audiotools.core.audio_signal import AudioSignal
+            >>> import paddle
+
+            >>> x = AudioSignal("https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav", 2_05)
+            >>> y = x * 0.01
+            >>> loss = MultiScaleSTFTLoss()
+            >>> loss(x, y).numpy()
+            7.562150
+        """
+        for s in self.stft_params:
+            x.stft(s.window_length, s.hop_length, s.window_type)
+            y.stft(s.window_length, s.hop_length, s.window_type)
+            loss += self.log_weight * self.loss_fn(
+                x.magnitude.clip(self.clamp_eps).pow(self.pow).log10(),
+                y.magnitude.clip(self.clamp_eps).pow(self.pow).log10(), )
+            loss += self.mag_weight * self.loss_fn(x.magnitude, y.magnitude)
+        return loss
+
+
+class GANLoss(nn.Layer):
+    """
+    Computes a discriminator loss, given a discriminator on
+    generated waveforms/spectrograms compared to ground truth
+    waveforms/spectrograms. Computes the loss for both the
+    discriminator and the generator in separate functions.
+
+    Example:
+    >>> from paddlespeech.audiotools.core.audio_signal import AudioSignal
+    >>> import paddle
+
+    >>> x = AudioSignal("https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav", 2_05)
+    >>> y = x * 0.01
+    >>> class My_discriminator0:
+    >>>     def __call__(self, x):
+    >>>         return x.sum()
+    >>> loss = GANLoss(My_discriminator0())
+    >>> [loss(x, y)[0].numpy(), loss(x, y)[1].numpy()]
+    [-0.102722, -0.001027]
+
+    >>> class My_discriminator1:
+    >>>     def __call__(self, x):
+    >>>         return x.sum()
+    >>> loss = GANLoss(My_discriminator1())
+    >>> [loss.generator_loss(x, y)[0].numpy(), loss.generator_loss(x, y)[1].numpy()]
+    [1.00019, 0]
+
+    >>> loss.discriminator_loss(x, y)
+    1.000200
+    """
+
+    def __init__(self, discriminator):
+        """
+        Args:
+            discriminator : paddle.nn.layer
+                Discriminator model
+        """
+        super().__init__()
+        self.discriminator = discriminator
+
+    def forward(self,
+                fake: Union[AudioSignal, paddle.Tensor],
+                real: Union[AudioSignal, paddle.Tensor]):
+        if isinstance(fake, AudioSignal):
+            d_fake = self.discriminator(fake.audio_data)
+        else:
+            d_fake = self.discriminator(fake)
+
+        if isinstance(real, AudioSignal):
+            d_real = self.discriminator(real.audio_data)
+        else:
+            d_real = self.discriminator(real)
+        return d_fake, d_real
+
+    def discriminator_loss(self, fake, real):
+        d_fake, d_real = self.forward(fake, real)
+
+        loss_d = 0
+        for x_fake, x_real in zip(d_fake, d_real):
+            loss_d += paddle.mean(x_fake[-1]**2)
+            loss_d += paddle.mean((1 - x_real[-1])**2)
+        return loss_d
+
+    def generator_loss(self, fake, real):
+        d_fake, d_real = self.forward(fake, real)
+
+        loss_g = 0
+        for x_fake in d_fake:
+            loss_g += paddle.mean((1 - x_fake[-1])**2)
+
+        loss_feature = 0
+
+        for i in range(len(d_fake)):
+            for j in range(len(d_fake[i]) - 1):
+                loss_feature += F.l1_loss(d_fake[i][j], d_real[i][j]())
+        return loss_g, loss_feature
+
+
+class SISDRLoss(nn.Layer):
+    """
+    Computes the Scale-Invariant Source-to-Distortion Ratio between a batch
+    of estimated and reference audio signals or aligned features.
+
+    Implementation copied from: https://github.com/descriptinc/audiotools/blob/master/audiotools/metrics/distance.py
+
+    Example:
+    >>> from paddlespeech.audiotools.core.audio_signal import AudioSignal
+    >>> import paddle
+
+    >>> x = AudioSignal("https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav", 2_05)
+    >>> y = x * 0.01
+    >>> sisdr = SISDRLoss()
+    >>> sisdr(x, y).numpy()
+    -145.377640
+    """
+
+    def __init__(
+            self,
+            scaling: bool=True,
+            reduction: str="mean",
+            zero_mean: bool=True,
+            clip_min: Optional[int]=None,
+            weight: float=1.0, ):
+        """
+        Args:
+            scaling : bool, optional
+                Whether to use scale-invariant (True) or
+                signal-to-noise ratio (False), by default True
+            reduction : str, optional
+                How to reduce across the batch (either 'mean',
+                'sum', or none).], by default ' mean'
+            zero_mean : bool, optional
+                Zero mean the references and estimates before
+                computing the loss, by default True
+            clip_min : int, optional
+                The minimum possible loss value. Helps network
+                to not focus on making already good examples better, by default None
+            weight : float, optional
+                Weight of this loss, defaults to 1.0.
+        """
+        self.scaling = scaling
+        self.reduction = reduction
+        self.zero_mean = zero_mean
+        self.clip_min = clip_min
+        self.weight = weight
+        super().__init__()
+
+    def forward(self,
+                x: Union[AudioSignal, paddle.Tensor],
+                y: Union[AudioSignal, paddle.Tensor]):
+        eps = 1e-8
+        # B, C, T
+        if isinstance(x, AudioSignal):
+            references = x.audio_data
+            estimates = y.audio_data
+        else:
+            references = x
+            estimates = y
+
+        nb = references.shape[0]
+        references = references.reshape([nb, 1, -1]).transpose([0, 2, 1])
+        estimates = estimates.reshape([nb, 1, -1]).transpose([0, 2, 1])
+
+        # samples now on axis 1
+        if self.zero_mean:
+            mean_reference = references.mean(axis=1, keepdim=True)
+            mean_estimate = estimates.mean(axis=1, keepdim=True)
+        else:
+            mean_reference = 0
+            mean_estimate = 0
+
+        _references = references - mean_reference
+        _estimates = estimates - mean_estimate
+
+        references_projection = (_references**2).sum(axis=-2) + eps
+        references_on_estimates = (_estimates * _references).sum(axis=-2) + eps
+
+        scale = (
+            (references_on_estimates / references_projection).unsqueeze(axis=1)
+            if self.scaling else 1)
+
+        e_true = scale * _references
+        e_res = _estimates - e_true
+
+        signal = (e_true**2).sum(axis=1)
+        noise = (e_res**2).sum(axis=1)
+        sdr = -10 * paddle.log10(signal / noise + eps)
+
+        if self.clip_min != None:
+            sdr = paddle.clip(sdr, min=self.clip_min)
+
+        if self.reduction == "mean":
+            sdr = sdr.mean()
+        elif self.reduction == "sum":
+            sdr = sdr.sum()
+        return sdr
diff --git a/tests/unit/audiotools/core/test_util.py b/tests/unit/audiotools/core/test_util.py
index 155686acd..16e5d5e92 100644
--- a/tests/unit/audiotools/core/test_util.py
+++ b/tests/unit/audiotools/core/test_util.py
@@ -13,7 +13,6 @@ import pytest
 
 from paddlespeech.audiotools import util
 from paddlespeech.audiotools.core.audio_signal import AudioSignal
-from paddlespeech.vector.training.seeding import seed_everything
 
 
 def test_check_random_state():
@@ -36,12 +35,12 @@ def test_check_random_state():
 
 
 def test_seed():
-    seed_everything(0)
+    util.seed_everything(0)
     paddle_result_a = paddle.randn([1])
     np_result_a = np.random.randn(1)
     py_result_a = random.random()
 
-    seed_everything(0)
+    util.seed_everything(0)
     paddle_result_b = paddle.randn([1])
     np_result_b = np.random.randn(1)
     py_result_b = random.random()
diff --git a/tests/unit/audiotools/test_audiotools.sh b/tests/unit/audiotools/test_audiotools.sh
index 3a0161900..f69447d62 100644
--- a/tests/unit/audiotools/test_audiotools.sh
+++ b/tests/unit/audiotools/test_audiotools.sh
@@ -1,4 +1,3 @@
-python -m pip install -r ../../../paddlespeech/audiotools/requirements.txt
 wget  https://paddlespeech.bj.bcebos.com/PaddleAudio/audio_tools/audio.tar.gz
 wget  https://paddlespeech.bj.bcebos.com/PaddleAudio/audio_tools/regression.tar.gz
 tar -zxvf audio.tar.gz
diff --git a/tests/unit/ci.sh b/tests/unit/ci.sh
index 6beff0707..567af2210 100644
--- a/tests/unit/ci.sh
+++ b/tests/unit/ci.sh
@@ -1,6 +1,7 @@
 function main(){
   set -ex
   speech_ci_path=`pwd`
+  python -m pip install -r ../../paddlespeech/audiotools/requirements.txt
 
   echo "Start asr"
   cd ${speech_ci_path}/asr
@@ -16,6 +17,7 @@ function main(){
   python test_enfrontend.py
   python test_fftconv1d.py
   python test_mixfrontend.py
+  python test_losses.py
   echo "End TTS"
 
   echo "Start Vector"
diff --git a/tests/unit/tts/test_losses.py b/tests/unit/tts/test_losses.py
new file mode 100644
index 000000000..f99d15d1c
--- /dev/null
+++ b/tests/unit/tts/test_losses.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+
+from paddlespeech.audiotools.core.audio_signal import AudioSignal
+from paddlespeech.t2s.modules.losses import GANLoss
+from paddlespeech.t2s.modules.losses import MultiScaleSTFTLoss
+from paddlespeech.t2s.modules.losses import SISDRLoss
+
+
+def get_input():
+    x = AudioSignal("https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav",
+                    2_05)
+    y = x * 0.01
+    return x, y
+
+
+def test_multi_scale_stft_loss():
+    x, y = get_input()
+    loss = MultiScaleSTFTLoss()
+    pd_loss = loss(x, y)
+    assert np.abs(pd_loss.numpy() - 7.562150) < 1e-06
+
+
+def test_sisdr_loss():
+    x, y = get_input()
+    loss = SISDRLoss()
+    pd_loss = loss(x, y)
+    assert np.abs(pd_loss.numpy() - (-145.377640)) < 1e-06
+
+
+def test_gan_loss():
+    class My_discriminator0:
+        def __call__(self, x):
+            return x.sum()
+
+    class My_discriminator1:
+        def __call__(self, x):
+            return x * (-0.2)
+
+    x, y = get_input()
+    loss = GANLoss(My_discriminator0())
+    pd_loss0, pd_loss1 = loss(x, y)
+    assert np.abs(pd_loss0.numpy() - (-0.102722)) < 1e-06
+    assert np.abs(pd_loss1.numpy() - (-0.001027)) < 1e-06
+    loss = GANLoss(My_discriminator1())
+    pd_loss0, _ = loss.generator_loss(x, y)
+    assert np.abs(pd_loss0.numpy() - 1.000199) < 1e-06
+    pd_loss = loss.discriminator_loss(x, y)
+    assert np.abs(pd_loss.numpy() - 1.000200) < 1e-06

From 48583b453aa590d1027643c8bf6316d8bdc7a772 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?=
 <83450930+Liyulingyue@users.noreply.github.com>
Date: Thu, 27 Feb 2025 11:12:30 +0800
Subject: [PATCH 18/46] Update setup.py (#3995)

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 71e7aaf2a..8039d9245 100644
--- a/setup.py
+++ b/setup.py
@@ -89,7 +89,7 @@ base = [
     "inflect",
     "jsonlines",
     "numpy",
-    "librosa",
+    "librosa>=0.9",
     determine_scipy_version(),  # scipy or scipy>=1.4.0, <=1.12.0
     "loguru",
     determine_matplotlib_version(),  # matplotlib or matplotlib<=3.8.4

From 45f439ad32fdfa8182056da49c748e34f4d18a36 Mon Sep 17 00:00:00 2001
From: zxcd <228587199@qq.com>
Date: Mon, 3 Mar 2025 15:29:50 +0800
Subject: [PATCH 19/46] mv audiotools requirement to setup.py (#3999)

---
 paddlespeech/audiotools/requirements.txt | 5 -----
 setup.py                                 | 5 +++++
 tests/unit/ci.sh                         | 1 -
 3 files changed, 5 insertions(+), 6 deletions(-)
 delete mode 100644 paddlespeech/audiotools/requirements.txt

diff --git a/paddlespeech/audiotools/requirements.txt b/paddlespeech/audiotools/requirements.txt
deleted file mode 100644
index 0a018002e..000000000
--- a/paddlespeech/audiotools/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-ffmpeg-python
-ffmpy
-flatten_dict
-pyloudnorm
-rich
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 8039d9245..47e543ce4 100644
--- a/setup.py
+++ b/setup.py
@@ -123,6 +123,11 @@ base = [
     "scikit-learn",
     "pathos",
     "kaldiio",
+    "ffmpeg-python",
+    "ffmpy",
+    "flatten_dict",
+    "pyloudnorm",
+    "rich",
 ]
 
 server = ["pattern_singleton", "websockets"]
diff --git a/tests/unit/ci.sh b/tests/unit/ci.sh
index 567af2210..c298e3ae8 100644
--- a/tests/unit/ci.sh
+++ b/tests/unit/ci.sh
@@ -1,7 +1,6 @@
 function main(){
   set -ex
   speech_ci_path=`pwd`
-  python -m pip install -r ../../paddlespeech/audiotools/requirements.txt
 
   echo "Start asr"
   cd ${speech_ci_path}/asr

From 9c01a0b980aeca72c87172d0453f3f3b140a6659 Mon Sep 17 00:00:00 2001
From: cyberslack_lee <jeffrey0122@163.com>
Date: Tue, 4 Mar 2025 14:39:50 +0800
Subject: [PATCH 20/46] =?UTF-8?q?=E3=80=90Doc=E3=80=91=E8=A1=A5=E5=85=A8?=
 =?UTF-8?q?=E5=90=88=E6=88=90=E7=B3=BB=E5=88=97=E4=B8=AD=E7=9A=84=E8=84=9A?=
 =?UTF-8?q?=E6=9C=AC=E4=B8=AD=E5=8F=82=E6=95=B0=E7=BC=BA=E5=A4=B1=20No.4?=
 =?UTF-8?q?=20(#3998)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix

* CI
---
 examples/aishell3/tts3/README.md | 6 ++++--
 examples/aishell3/tts3/run.sh    | 8 ++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/examples/aishell3/tts3/README.md b/examples/aishell3/tts3/README.md
index f97a84b50..8f3f66dac 100644
--- a/examples/aishell3/tts3/README.md
+++ b/examples/aishell3/tts3/README.md
@@ -109,8 +109,9 @@ pwg_aishell3_ckpt_0.5
 ```
 `./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
 ```
+`--stage` controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
 ```text
 usage: synthesize.py [-h]
                      [--am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}]
@@ -157,8 +158,9 @@ optional arguments:
 ```
 `./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
 ```
+`--stage` controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
 ```text
 usage: synthesize_e2e.py [-h]
                          [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
diff --git a/examples/aishell3/tts3/run.sh b/examples/aishell3/tts3/run.sh
index 8dcecaa03..3fd5d73c6 100755
--- a/examples/aishell3/tts3/run.sh
+++ b/examples/aishell3/tts3/run.sh
@@ -27,13 +27,13 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # synthesize, vocoder is pwgan by default
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize, vocoder is pwgan by default stage 0, stage 1 will use hifigan as vocoder
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # synthesize_e2e, vocoder is pwgan by default
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize_e2e, vocoder is pwgan by default stage 0, stage 1 will use hifigan as vocoder
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
 
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then

From f54df909d0520ec1933192d54142d0c8bfc393f2 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 11 Mar 2025 11:10:29 +0800
Subject: [PATCH 21/46] Bump axios from 0.26.1 to 1.8.2 in
 /demos/speech_web/web_client (#4001)

Bumps [axios](https://github.com/axios/axios) from 0.26.1 to 1.8.2.
- [Release notes](https://github.com/axios/axios/releases)
- [Changelog](https://github.com/axios/axios/blob/v1.x/CHANGELOG.md)
- [Commits](https://github.com/axios/axios/compare/v0.26.1...v1.8.2)

---
updated-dependencies:
- dependency-name: axios
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 demos/speech_web/web_client/package-lock.json | 596 +++++++++++++++---
 demos/speech_web/web_client/package.json      |   2 +-
 demos/speech_web/web_client/yarn.lock         | 301 ++++++++-
 3 files changed, 791 insertions(+), 108 deletions(-)

diff --git a/demos/speech_web/web_client/package-lock.json b/demos/speech_web/web_client/package-lock.json
index 509be385c..95e082ecd 100644
--- a/demos/speech_web/web_client/package-lock.json
+++ b/demos/speech_web/web_client/package-lock.json
@@ -8,8 +8,9 @@
       "name": "paddlespeechwebclient",
       "version": "0.0.0",
       "dependencies": {
+        "@element-plus/icons-vue": "^2.0.9",
         "ant-design-vue": "^2.2.8",
-        "axios": "^0.26.1",
+        "axios": "^1.8.2",
         "element-plus": "^2.1.9",
         "js-audio-recorder": "0.5.7",
         "lamejs": "^1.2.1",
@@ -18,7 +19,8 @@
       },
       "devDependencies": {
         "@vitejs/plugin-vue": "^2.3.0",
-        "vite": "^2.9.0"
+        "@vue/compiler-sfc": "^3.1.0",
+        "vite": "^2.9.13"
       }
     },
     "node_modules/@ant-design/colors": {
@@ -79,9 +81,9 @@
       }
     },
     "node_modules/@element-plus/icons-vue": {
-      "version": "1.1.4",
-      "resolved": "https://registry.npmmirror.com/@element-plus/icons-vue/-/icons-vue-1.1.4.tgz",
-      "integrity": "sha512-Iz/nHqdp1sFPmdzRwHkEQQA3lKvoObk8azgABZ81QUOpW9s/lUyQVUSh0tNtEPZXQlKwlSh7SPgoVxzrE0uuVQ==",
+      "version": "2.3.1",
+      "resolved": "https://registry.npmjs.org/@element-plus/icons-vue/-/icons-vue-2.3.1.tgz",
+      "integrity": "sha512-XxVUZv48RZAd87ucGS48jPf6pKu0yV5UCg9f4FFwtrYxXOwWuVJo6wOvSLKEoMQKjv8GsX/mhP6UsC1lRwbUWg==",
       "license": "MIT",
       "peerDependencies": {
         "vue": "^3.2.0"
@@ -364,33 +366,46 @@
       "integrity": "sha512-Pj2IR7u8hmUEDOwB++su6baaRi+QvsgajuFB9j95foM1N2gy5HM4z60hfusIO0fBPG5uLAEl6yCJr1jNSVugEQ==",
       "license": "MIT"
     },
+    "node_modules/asynckit": {
+      "version": "0.4.0",
+      "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
+      "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
+      "license": "MIT"
+    },
     "node_modules/axios": {
-      "version": "0.26.1",
-      "resolved": "https://registry.npmmirror.com/axios/-/axios-0.26.1.tgz",
-      "integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==",
+      "version": "1.8.2",
+      "resolved": "https://registry.npmjs.org/axios/-/axios-1.8.2.tgz",
+      "integrity": "sha512-ls4GYBm5aig9vWx8AWDSGLpnpDQRtWAfrjU+EuytuODrFBkqesN2RkOQCBzrA1RQNHw1SmRMSDDDSwzNAYQ6Rg==",
       "license": "MIT",
       "dependencies": {
-        "follow-redirects": "^1.14.8"
+        "follow-redirects": "^1.15.6",
+        "form-data": "^4.0.0",
+        "proxy-from-env": "^1.1.0"
       }
     },
-    "node_modules/axios/node_modules/follow-redirects": {
-      "version": "1.14.9",
-      "resolved": "https://registry.npmmirror.com/follow-redirects/-/follow-redirects-1.14.9.tgz",
-      "integrity": "sha512-MQDfihBQYMcyy5dhRDJUHcw7lb2Pv/TuE6xP1vyraLukNDHKbDxDNaOE3NbCAdKQApno+GPRyo1YAp89yCjK4w==",
-      "funding": [
-        {
-          "type": "individual",
-          "url": "https://github.com/sponsors/RubenVerborgh"
-        }
-      ],
+    "node_modules/call-bind-apply-helpers": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
+      "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
       "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "function-bind": "^1.1.2"
+      },
       "engines": {
-        "node": ">=4.0"
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/combined-stream": {
+      "version": "1.0.8",
+      "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
+      "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
+      "license": "MIT",
+      "dependencies": {
+        "delayed-stream": "~1.0.0"
       },
-      "peerDependenciesMeta": {
-        "debug": {
-          "optional": true
-        }
+      "engines": {
+        "node": ">= 0.8"
       }
     },
     "node_modules/compute-scroll-into-view": {
@@ -424,6 +439,15 @@
       "integrity": "sha512-JLC809s6Y948/FuCZPm5IX8rRhQwOiyMb2TfVVQEixG7P8Lm/gt5S7yoQZmC8x1UehI9Pb7sksEt4xx14m+7Ug==",
       "license": "MIT"
     },
+    "node_modules/delayed-stream": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
+      "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    },
     "node_modules/dom-align": {
       "version": "1.12.3",
       "resolved": "https://registry.npmmirror.com/dom-align/-/dom-align-1.12.3.tgz",
@@ -434,6 +458,20 @@
       "resolved": "https://registry.npmmirror.com/dom-scroll-into-view/-/dom-scroll-into-view-2.0.1.tgz",
       "integrity": "sha512-bvVTQe1lfaUr1oFzZX80ce9KLDlZ3iU+XGNE/bz9HnGdklTieqsbmsLHe+rT2XWqopvL0PckkYqN7ksmm5pe3w=="
     },
+    "node_modules/dunder-proto": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
+      "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
+      "license": "MIT",
+      "dependencies": {
+        "call-bind-apply-helpers": "^1.0.1",
+        "es-errors": "^1.3.0",
+        "gopd": "^1.2.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
     "node_modules/element-plus": {
       "version": "2.1.9",
       "resolved": "https://registry.npmmirror.com/element-plus/-/element-plus-2.1.9.tgz",
@@ -460,6 +498,15 @@
         "vue": "^3.2.0"
       }
     },
+    "node_modules/element-plus/node_modules/@element-plus/icons-vue": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/@element-plus/icons-vue/-/icons-vue-1.1.4.tgz",
+      "integrity": "sha512-Iz/nHqdp1sFPmdzRwHkEQQA3lKvoObk8azgABZ81QUOpW9s/lUyQVUSh0tNtEPZXQlKwlSh7SPgoVxzrE0uuVQ==",
+      "license": "MIT",
+      "peerDependencies": {
+        "vue": "^3.2.0"
+      }
+    },
     "node_modules/errno": {
       "version": "0.1.8",
       "resolved": "https://registry.npmmirror.com/errno/-/errno-0.1.8.tgz",
@@ -472,6 +519,51 @@
         "errno": "cli.js"
       }
     },
+    "node_modules/es-define-property": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
+      "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-errors": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
+      "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-object-atoms": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
+      "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-set-tostringtag": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz",
+      "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==",
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "get-intrinsic": "^1.2.6",
+        "has-tostringtag": "^1.0.2",
+        "hasown": "^2.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
     "node_modules/esbuild": {
       "version": "0.14.36",
       "resolved": "https://registry.npmmirror.com/esbuild/-/esbuild-0.14.36.tgz",
@@ -537,6 +629,41 @@
       "integrity": "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==",
       "license": "MIT"
     },
+    "node_modules/follow-redirects": {
+      "version": "1.15.9",
+      "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.9.tgz",
+      "integrity": "sha512-gew4GsXizNgdoRyqmyfMHyAmXsZDk6mHkSxZFCzW9gwlbtOW44CDtYavM+y+72qD/Vq2l550kMF52DT8fOLJqQ==",
+      "funding": [
+        {
+          "type": "individual",
+          "url": "https://github.com/sponsors/RubenVerborgh"
+        }
+      ],
+      "license": "MIT",
+      "engines": {
+        "node": ">=4.0"
+      },
+      "peerDependenciesMeta": {
+        "debug": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/form-data": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.2.tgz",
+      "integrity": "sha512-hGfm/slu0ZabnNt4oaRZ6uREyfCj6P4fT/n6A1rGV+Z0VdGXjfOhVUpkn6qVQONHGIFwmveGXyDs75+nr6FM8w==",
+      "license": "MIT",
+      "dependencies": {
+        "asynckit": "^0.4.0",
+        "combined-stream": "^1.0.8",
+        "es-set-tostringtag": "^2.1.0",
+        "mime-types": "^2.1.12"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
     "node_modules/fsevents": {
       "version": "2.3.2",
       "resolved": "https://registry.npmmirror.com/fsevents/-/fsevents-2.3.2.tgz",
@@ -552,11 +679,62 @@
       }
     },
     "node_modules/function-bind": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmmirror.com/function-bind/-/function-bind-1.1.1.tgz",
-      "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==",
-      "dev": true,
-      "license": "MIT"
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
+      "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/get-intrinsic": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
+      "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
+      "license": "MIT",
+      "dependencies": {
+        "call-bind-apply-helpers": "^1.0.2",
+        "es-define-property": "^1.0.1",
+        "es-errors": "^1.3.0",
+        "es-object-atoms": "^1.1.1",
+        "function-bind": "^1.1.2",
+        "get-proto": "^1.0.1",
+        "gopd": "^1.2.0",
+        "has-symbols": "^1.1.0",
+        "hasown": "^2.0.2",
+        "math-intrinsics": "^1.1.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/get-proto": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
+      "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
+      "license": "MIT",
+      "dependencies": {
+        "dunder-proto": "^1.0.1",
+        "es-object-atoms": "^1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/gopd": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
+      "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
     },
     "node_modules/graceful-fs": {
       "version": "4.2.10",
@@ -577,6 +755,45 @@
         "node": ">= 0.4.0"
       }
     },
+    "node_modules/has-symbols": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
+      "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/has-tostringtag": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz",
+      "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==",
+      "license": "MIT",
+      "dependencies": {
+        "has-symbols": "^1.0.3"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/hasown": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
+      "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
+      "license": "MIT",
+      "dependencies": {
+        "function-bind": "^1.1.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
     "node_modules/iconv-lite": {
       "version": "0.4.24",
       "resolved": "https://registry.npmmirror.com/iconv-lite/-/iconv-lite-0.4.24.tgz",
@@ -728,6 +945,15 @@
         "node": ">=6"
       }
     },
+    "node_modules/math-intrinsics": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
+      "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
     "node_modules/memoize-one": {
       "version": "6.0.0",
       "resolved": "https://registry.npmmirror.com/memoize-one/-/memoize-one-6.0.0.tgz",
@@ -746,6 +972,27 @@
         "node": ">=4"
       }
     },
+    "node_modules/mime-db": {
+      "version": "1.52.0",
+      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
+      "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/mime-types": {
+      "version": "2.1.35",
+      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
+      "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
+      "license": "MIT",
+      "dependencies": {
+        "mime-db": "1.52.0"
+      },
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
     "node_modules/moment": {
       "version": "2.29.4",
       "resolved": "https://registry.npmjs.org/moment/-/moment-2.29.4.tgz",
@@ -755,9 +1002,15 @@
       }
     },
     "node_modules/nanoid": {
-      "version": "3.3.2",
-      "resolved": "https://registry.npmmirror.com/nanoid/-/nanoid-3.3.2.tgz",
-      "integrity": "sha512-CuHBogktKwpm5g2sRgv83jEy2ijFzBwMoYA60orPDR7ynsLijJDqgsi4RDGj3OJpy3Ieb+LYwiRmIOGyytgITA==",
+      "version": "3.3.9",
+      "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.9.tgz",
+      "integrity": "sha512-SppoicMGpZvbF1l3z4x7No3OlIjP7QJvC9XR7AhZr1kL133KHnKPztkKDc+Ir4aJ/1VhTySrtKhrsycmrMQfvg==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
       "license": "MIT",
       "bin": {
         "nanoid": "bin/nanoid.cjs"
@@ -830,9 +1083,9 @@
       "license": "MIT"
     },
     "node_modules/picocolors": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmmirror.com/picocolors/-/picocolors-1.0.0.tgz",
-      "integrity": "sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==",
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
+      "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==",
       "license": "ISC"
     },
     "node_modules/pify": {
@@ -845,9 +1098,9 @@
       }
     },
     "node_modules/postcss": {
-      "version": "8.4.12",
-      "resolved": "https://registry.npmmirror.com/postcss/-/postcss-8.4.12.tgz",
-      "integrity": "sha512-lg6eITwYe9v6Hr5CncVbK70SoioNQIq81nsaG86ev5hAidQvmOeETBqs7jm43K2F5/Ley3ytDtriImV6TpNiSg==",
+      "version": "8.5.3",
+      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.3.tgz",
+      "integrity": "sha512-dle9A3yYxlBSrt8Fu+IpjGT8SY8hN0mlaA6GY8t0P5PjIOZemULz/E2Bnm/2dcUOena75OTNkHI76uZBNUUq3A==",
       "funding": [
         {
           "type": "opencollective",
@@ -856,18 +1109,28 @@
         {
           "type": "tidelift",
           "url": "https://tidelift.com/funding/github/npm/postcss"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
         }
       ],
       "license": "MIT",
       "dependencies": {
-        "nanoid": "^3.3.1",
-        "picocolors": "^1.0.0",
-        "source-map-js": "^1.0.2"
+        "nanoid": "^3.3.8",
+        "picocolors": "^1.1.1",
+        "source-map-js": "^1.2.1"
       },
       "engines": {
         "node": "^10 || ^12 || >=14"
       }
     },
+    "node_modules/proxy-from-env": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
+      "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==",
+      "license": "MIT"
+    },
     "node_modules/prr": {
       "version": "1.0.1",
       "resolved": "https://registry.npmmirror.com/prr/-/prr-1.0.1.tgz",
@@ -962,9 +1225,9 @@
       }
     },
     "node_modules/source-map-js": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmmirror.com/source-map-js/-/source-map-js-1.0.2.tgz",
-      "integrity": "sha512-R0XvVJ9WusLiqTCEiGCmICCMplcCkIwwR11mOSD9CR5u+IXYdiseeEuXCVAjS54zqwkLcPNnmU4OeJ6tUrWhDw==",
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
+      "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==",
       "license": "BSD-3-Clause",
       "engines": {
         "node": ">=0.10.0"
@@ -1001,16 +1264,16 @@
       "license": "ISC"
     },
     "node_modules/vite": {
-      "version": "2.9.1",
-      "resolved": "https://registry.npmmirror.com/vite/-/vite-2.9.1.tgz",
-      "integrity": "sha512-vSlsSdOYGcYEJfkQ/NeLXgnRv5zZfpAsdztkIrs7AZHV8RCMZQkwjo4DS5BnrYTqoWqLoUe1Cah4aVO4oNNqCQ==",
+      "version": "2.9.18",
+      "resolved": "https://registry.npmjs.org/vite/-/vite-2.9.18.tgz",
+      "integrity": "sha512-sAOqI5wNM9QvSEE70W3UGMdT8cyEn0+PmJMTFvTB8wB0YbYUWw3gUbY62AOyrXosGieF2htmeLATvNxpv/zNyQ==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
         "esbuild": "^0.14.27",
-        "postcss": "^8.4.12",
+        "postcss": "^8.4.13",
         "resolve": "^1.22.0",
-        "rollup": "^2.59.0"
+        "rollup": ">=2.59.0 <2.78.0"
       },
       "bin": {
         "vite": "bin/vite.js"
@@ -1142,9 +1405,9 @@
       "integrity": "sha512-ej5oVy6lykXsvieQtqZxCOaLT+xD4+QNarq78cIYISHmZXshCvROLudpQN3lfL8G0NL7plMSSK+zlyvCaIJ4Iw=="
     },
     "@element-plus/icons-vue": {
-      "version": "1.1.4",
-      "resolved": "https://registry.npmmirror.com/@element-plus/icons-vue/-/icons-vue-1.1.4.tgz",
-      "integrity": "sha512-Iz/nHqdp1sFPmdzRwHkEQQA3lKvoObk8azgABZ81QUOpW9s/lUyQVUSh0tNtEPZXQlKwlSh7SPgoVxzrE0uuVQ==",
+      "version": "2.3.1",
+      "resolved": "https://registry.npmjs.org/@element-plus/icons-vue/-/icons-vue-2.3.1.tgz",
+      "integrity": "sha512-XxVUZv48RZAd87ucGS48jPf6pKu0yV5UCg9f4FFwtrYxXOwWuVJo6wOvSLKEoMQKjv8GsX/mhP6UsC1lRwbUWg==",
       "requires": {}
     },
     "@floating-ui/core": {
@@ -1356,19 +1619,36 @@
       "resolved": "https://registry.npmmirror.com/async-validator/-/async-validator-4.0.7.tgz",
       "integrity": "sha512-Pj2IR7u8hmUEDOwB++su6baaRi+QvsgajuFB9j95foM1N2gy5HM4z60hfusIO0fBPG5uLAEl6yCJr1jNSVugEQ=="
     },
+    "asynckit": {
+      "version": "0.4.0",
+      "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
+      "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
+    },
     "axios": {
-      "version": "0.26.1",
-      "resolved": "https://registry.npmmirror.com/axios/-/axios-0.26.1.tgz",
-      "integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==",
+      "version": "1.8.2",
+      "resolved": "https://registry.npmjs.org/axios/-/axios-1.8.2.tgz",
+      "integrity": "sha512-ls4GYBm5aig9vWx8AWDSGLpnpDQRtWAfrjU+EuytuODrFBkqesN2RkOQCBzrA1RQNHw1SmRMSDDDSwzNAYQ6Rg==",
       "requires": {
-        "follow-redirects": "^1.14.8"
-      },
-      "dependencies": {
-        "follow-redirects": {
-          "version": "1.14.9",
-          "resolved": "https://registry.npmmirror.com/follow-redirects/-/follow-redirects-1.14.9.tgz",
-          "integrity": "sha512-MQDfihBQYMcyy5dhRDJUHcw7lb2Pv/TuE6xP1vyraLukNDHKbDxDNaOE3NbCAdKQApno+GPRyo1YAp89yCjK4w=="
-        }
+        "follow-redirects": "^1.15.6",
+        "form-data": "^4.0.0",
+        "proxy-from-env": "^1.1.0"
+      }
+    },
+    "call-bind-apply-helpers": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
+      "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
+      "requires": {
+        "es-errors": "^1.3.0",
+        "function-bind": "^1.1.2"
+      }
+    },
+    "combined-stream": {
+      "version": "1.0.8",
+      "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
+      "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
+      "requires": {
+        "delayed-stream": "~1.0.0"
       }
     },
     "compute-scroll-into-view": {
@@ -1399,6 +1679,11 @@
       "resolved": "https://registry.npmmirror.com/dayjs/-/dayjs-1.11.0.tgz",
       "integrity": "sha512-JLC809s6Y948/FuCZPm5IX8rRhQwOiyMb2TfVVQEixG7P8Lm/gt5S7yoQZmC8x1UehI9Pb7sksEt4xx14m+7Ug=="
     },
+    "delayed-stream": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
+      "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ=="
+    },
     "dom-align": {
       "version": "1.12.3",
       "resolved": "https://registry.npmmirror.com/dom-align/-/dom-align-1.12.3.tgz",
@@ -1409,6 +1694,16 @@
       "resolved": "https://registry.npmmirror.com/dom-scroll-into-view/-/dom-scroll-into-view-2.0.1.tgz",
       "integrity": "sha512-bvVTQe1lfaUr1oFzZX80ce9KLDlZ3iU+XGNE/bz9HnGdklTieqsbmsLHe+rT2XWqopvL0PckkYqN7ksmm5pe3w=="
     },
+    "dunder-proto": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
+      "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
+      "requires": {
+        "call-bind-apply-helpers": "^1.0.1",
+        "es-errors": "^1.3.0",
+        "gopd": "^1.2.0"
+      }
+    },
     "element-plus": {
       "version": "2.1.9",
       "resolved": "https://registry.npmmirror.com/element-plus/-/element-plus-2.1.9.tgz",
@@ -1429,6 +1724,14 @@
         "lodash-unified": "^1.0.2",
         "memoize-one": "^6.0.0",
         "normalize-wheel-es": "^1.1.2"
+      },
+      "dependencies": {
+        "@element-plus/icons-vue": {
+          "version": "1.1.4",
+          "resolved": "https://registry.npmjs.org/@element-plus/icons-vue/-/icons-vue-1.1.4.tgz",
+          "integrity": "sha512-Iz/nHqdp1sFPmdzRwHkEQQA3lKvoObk8azgABZ81QUOpW9s/lUyQVUSh0tNtEPZXQlKwlSh7SPgoVxzrE0uuVQ==",
+          "requires": {}
+        }
       }
     },
     "errno": {
@@ -1440,6 +1743,35 @@
         "prr": "~1.0.1"
       }
     },
+    "es-define-property": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
+      "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g=="
+    },
+    "es-errors": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
+      "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw=="
+    },
+    "es-object-atoms": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
+      "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
+      "requires": {
+        "es-errors": "^1.3.0"
+      }
+    },
+    "es-set-tostringtag": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz",
+      "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==",
+      "requires": {
+        "es-errors": "^1.3.0",
+        "get-intrinsic": "^1.2.6",
+        "has-tostringtag": "^1.0.2",
+        "hasown": "^2.0.2"
+      }
+    },
     "esbuild": {
       "version": "0.14.36",
       "resolved": "https://registry.npmmirror.com/esbuild/-/esbuild-0.14.36.tgz",
@@ -1485,6 +1817,22 @@
       "resolved": "https://registry.npmmirror.com/estree-walker/-/estree-walker-2.0.2.tgz",
       "integrity": "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w=="
     },
+    "follow-redirects": {
+      "version": "1.15.9",
+      "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.9.tgz",
+      "integrity": "sha512-gew4GsXizNgdoRyqmyfMHyAmXsZDk6mHkSxZFCzW9gwlbtOW44CDtYavM+y+72qD/Vq2l550kMF52DT8fOLJqQ=="
+    },
+    "form-data": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.2.tgz",
+      "integrity": "sha512-hGfm/slu0ZabnNt4oaRZ6uREyfCj6P4fT/n6A1rGV+Z0VdGXjfOhVUpkn6qVQONHGIFwmveGXyDs75+nr6FM8w==",
+      "requires": {
+        "asynckit": "^0.4.0",
+        "combined-stream": "^1.0.8",
+        "es-set-tostringtag": "^2.1.0",
+        "mime-types": "^2.1.12"
+      }
+    },
     "fsevents": {
       "version": "2.3.2",
       "resolved": "https://registry.npmmirror.com/fsevents/-/fsevents-2.3.2.tgz",
@@ -1493,10 +1841,40 @@
       "optional": true
     },
     "function-bind": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmmirror.com/function-bind/-/function-bind-1.1.1.tgz",
-      "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==",
-      "dev": true
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
+      "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA=="
+    },
+    "get-intrinsic": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
+      "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
+      "requires": {
+        "call-bind-apply-helpers": "^1.0.2",
+        "es-define-property": "^1.0.1",
+        "es-errors": "^1.3.0",
+        "es-object-atoms": "^1.1.1",
+        "function-bind": "^1.1.2",
+        "get-proto": "^1.0.1",
+        "gopd": "^1.2.0",
+        "has-symbols": "^1.1.0",
+        "hasown": "^2.0.2",
+        "math-intrinsics": "^1.1.0"
+      }
+    },
+    "get-proto": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
+      "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
+      "requires": {
+        "dunder-proto": "^1.0.1",
+        "es-object-atoms": "^1.0.0"
+      }
+    },
+    "gopd": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
+      "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg=="
     },
     "graceful-fs": {
       "version": "4.2.10",
@@ -1513,6 +1891,27 @@
         "function-bind": "^1.1.1"
       }
     },
+    "has-symbols": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
+      "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ=="
+    },
+    "has-tostringtag": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz",
+      "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==",
+      "requires": {
+        "has-symbols": "^1.0.3"
+      }
+    },
+    "hasown": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
+      "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
+      "requires": {
+        "function-bind": "^1.1.2"
+      }
+    },
     "iconv-lite": {
       "version": "0.4.24",
       "resolved": "https://registry.npmmirror.com/iconv-lite/-/iconv-lite-0.4.24.tgz",
@@ -1624,6 +2023,11 @@
         "semver": "^5.6.0"
       }
     },
+    "math-intrinsics": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
+      "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g=="
+    },
     "memoize-one": {
       "version": "6.0.0",
       "resolved": "https://registry.npmmirror.com/memoize-one/-/memoize-one-6.0.0.tgz",
@@ -1635,15 +2039,28 @@
       "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==",
       "optional": true
     },
+    "mime-db": {
+      "version": "1.52.0",
+      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
+      "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg=="
+    },
+    "mime-types": {
+      "version": "2.1.35",
+      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
+      "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
+      "requires": {
+        "mime-db": "1.52.0"
+      }
+    },
     "moment": {
       "version": "2.29.4",
       "resolved": "https://registry.npmjs.org/moment/-/moment-2.29.4.tgz",
       "integrity": "sha512-5LC9SOxjSc2HF6vO2CyuTDNivEdoz2IvyJJGj6X8DJ0eFyfszE0QiEd+iXmBvUP3WHxSjFH/vIsA0EN00cgr8w=="
     },
     "nanoid": {
-      "version": "3.3.2",
-      "resolved": "https://registry.npmmirror.com/nanoid/-/nanoid-3.3.2.tgz",
-      "integrity": "sha512-CuHBogktKwpm5g2sRgv83jEy2ijFzBwMoYA60orPDR7ynsLijJDqgsi4RDGj3OJpy3Ieb+LYwiRmIOGyytgITA=="
+      "version": "3.3.9",
+      "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.9.tgz",
+      "integrity": "sha512-SppoicMGpZvbF1l3z4x7No3OlIjP7QJvC9XR7AhZr1kL133KHnKPztkKDc+Ir4aJ/1VhTySrtKhrsycmrMQfvg=="
     },
     "nanopop": {
       "version": "2.1.0",
@@ -1700,9 +2117,9 @@
       "dev": true
     },
     "picocolors": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmmirror.com/picocolors/-/picocolors-1.0.0.tgz",
-      "integrity": "sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ=="
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
+      "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA=="
     },
     "pify": {
       "version": "4.0.1",
@@ -1711,15 +2128,20 @@
       "optional": true
     },
     "postcss": {
-      "version": "8.4.12",
-      "resolved": "https://registry.npmmirror.com/postcss/-/postcss-8.4.12.tgz",
-      "integrity": "sha512-lg6eITwYe9v6Hr5CncVbK70SoioNQIq81nsaG86ev5hAidQvmOeETBqs7jm43K2F5/Ley3ytDtriImV6TpNiSg==",
+      "version": "8.5.3",
+      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.3.tgz",
+      "integrity": "sha512-dle9A3yYxlBSrt8Fu+IpjGT8SY8hN0mlaA6GY8t0P5PjIOZemULz/E2Bnm/2dcUOena75OTNkHI76uZBNUUq3A==",
       "requires": {
-        "nanoid": "^3.3.1",
-        "picocolors": "^1.0.0",
-        "source-map-js": "^1.0.2"
+        "nanoid": "^3.3.8",
+        "picocolors": "^1.1.1",
+        "source-map-js": "^1.2.1"
       }
     },
+    "proxy-from-env": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
+      "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
+    },
     "prr": {
       "version": "1.0.1",
       "resolved": "https://registry.npmmirror.com/prr/-/prr-1.0.1.tgz",
@@ -1793,9 +2215,9 @@
       "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g=="
     },
     "source-map-js": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmmirror.com/source-map-js/-/source-map-js-1.0.2.tgz",
-      "integrity": "sha512-R0XvVJ9WusLiqTCEiGCmICCMplcCkIwwR11mOSD9CR5u+IXYdiseeEuXCVAjS54zqwkLcPNnmU4OeJ6tUrWhDw=="
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
+      "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA=="
     },
     "sourcemap-codec": {
       "version": "1.4.8",
@@ -1819,16 +2241,16 @@
       "integrity": "sha512-IeiWvvEXfW5ltKVMkxq6FvNf2LojMKvB2OCeja6+ct24S1XOmQw2dGr2JyndwACWAGJva9B7yPHwAmeA9QCqAQ=="
     },
     "vite": {
-      "version": "2.9.1",
-      "resolved": "https://registry.npmmirror.com/vite/-/vite-2.9.1.tgz",
-      "integrity": "sha512-vSlsSdOYGcYEJfkQ/NeLXgnRv5zZfpAsdztkIrs7AZHV8RCMZQkwjo4DS5BnrYTqoWqLoUe1Cah4aVO4oNNqCQ==",
+      "version": "2.9.18",
+      "resolved": "https://registry.npmjs.org/vite/-/vite-2.9.18.tgz",
+      "integrity": "sha512-sAOqI5wNM9QvSEE70W3UGMdT8cyEn0+PmJMTFvTB8wB0YbYUWw3gUbY62AOyrXosGieF2htmeLATvNxpv/zNyQ==",
       "dev": true,
       "requires": {
         "esbuild": "^0.14.27",
         "fsevents": "~2.3.2",
-        "postcss": "^8.4.12",
+        "postcss": "^8.4.13",
         "resolve": "^1.22.0",
-        "rollup": "^2.59.0"
+        "rollup": ">=2.59.0 <2.78.0"
       }
     },
     "vue": {
diff --git a/demos/speech_web/web_client/package.json b/demos/speech_web/web_client/package.json
index d8c213e4a..f00afbd25 100644
--- a/demos/speech_web/web_client/package.json
+++ b/demos/speech_web/web_client/package.json
@@ -10,7 +10,7 @@
   "dependencies": {
     "@element-plus/icons-vue": "^2.0.9",
     "ant-design-vue": "^2.2.8",
-    "axios": "^0.26.1",
+    "axios": "^1.8.2",
     "element-plus": "^2.1.9",
     "js-audio-recorder": "0.5.7",
     "lamejs": "^1.2.1",
diff --git a/demos/speech_web/web_client/yarn.lock b/demos/speech_web/web_client/yarn.lock
index 7f07daa06..741bfc005 100644
--- a/demos/speech_web/web_client/yarn.lock
+++ b/demos/speech_web/web_client/yarn.lock
@@ -22,11 +22,28 @@
     "@ant-design/colors" "^6.0.0"
     "@ant-design/icons-svg" "^4.2.1"
 
+"@babel/helper-string-parser@^7.25.9":
+  version "7.25.9"
+  resolved "https://registry.yarnpkg.com/@babel/helper-string-parser/-/helper-string-parser-7.25.9.tgz#1aabb72ee72ed35789b4bbcad3ca2862ce614e8c"
+  integrity sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA==
+
+"@babel/helper-validator-identifier@^7.25.9":
+  version "7.25.9"
+  resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.9.tgz#24b64e2c3ec7cd3b3c547729b8d16871f22cbdc7"
+  integrity sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==
+
 "@babel/parser@^7.16.4":
   version "7.17.9"
   resolved "https://registry.npmmirror.com/@babel/parser/-/parser-7.17.9.tgz"
   integrity sha512-vqUSBLP8dQHFPdPi9bc5GK9vRkYHJ49fsZdtoJ8EQ8ibpwk5rPKfvNIwChB0KVXcIjcepEBBd2VHC5r9Gy8ueg==
 
+"@babel/parser@^7.25.3":
+  version "7.26.9"
+  resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.26.9.tgz#d9e78bee6dc80f9efd8f2349dcfbbcdace280fd5"
+  integrity sha512-81NWa1njQblgZbQHxWHpxxCzNsa3ZwvFqpUg7P+NNUU6f3UU2jBEg4OlF/J6rl8+PQGh1q6/zWScd001YwcA5A==
+  dependencies:
+    "@babel/types" "^7.26.9"
+
 "@babel/runtime@^7.10.5":
   version "7.17.9"
   resolved "https://registry.npmmirror.com/@babel/runtime/-/runtime-7.17.9.tgz"
@@ -34,6 +51,14 @@
   dependencies:
     regenerator-runtime "^0.13.4"
 
+"@babel/types@^7.26.9":
+  version "7.26.9"
+  resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.26.9.tgz#08b43dec79ee8e682c2ac631c010bdcac54a21ce"
+  integrity sha512-Y3IR1cRnOxOCDvMmNiym7XpXQ93iGDDPHx+Zj+NM+rg0fBaShfQLkg+hKPaZCEvg5N/LeCo4+Rj/i3FuJsIQaw==
+  dependencies:
+    "@babel/helper-string-parser" "^7.25.9"
+    "@babel/helper-validator-identifier" "^7.25.9"
+
 "@ctrl/tinycolor@^3.4.0":
   version "3.4.1"
   resolved "https://registry.npmmirror.com/@ctrl/tinycolor/-/tinycolor-3.4.1.tgz"
@@ -61,6 +86,11 @@
   dependencies:
     "@floating-ui/core" "^0.6.1"
 
+"@jridgewell/sourcemap-codec@^1.5.0":
+  version "1.5.0"
+  resolved "https://registry.yarnpkg.com/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz#3188bcb273a414b0d215fd22a58540b989b9409a"
+  integrity sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==
+
 "@popperjs/core@^2.11.4":
   version "2.11.5"
   resolved "https://registry.npmmirror.com/@popperjs/core/-/core-2.11.5.tgz"
@@ -101,6 +131,17 @@
     estree-walker "^2.0.2"
     source-map "^0.6.1"
 
+"@vue/compiler-core@3.5.13":
+  version "3.5.13"
+  resolved "https://registry.yarnpkg.com/@vue/compiler-core/-/compiler-core-3.5.13.tgz#b0ae6c4347f60c03e849a05d34e5bf747c9bda05"
+  integrity sha512-oOdAkwqUfW1WqpwSYJce06wvt6HljgY3fGeM9NcVA1HaYOij3mZG9Rkysn0OHuyUAGMbEbARIpsG+LPVlBJ5/Q==
+  dependencies:
+    "@babel/parser" "^7.25.3"
+    "@vue/shared" "3.5.13"
+    entities "^4.5.0"
+    estree-walker "^2.0.2"
+    source-map-js "^1.2.0"
+
 "@vue/compiler-dom@3.2.32":
   version "3.2.32"
   resolved "https://registry.npmmirror.com/@vue/compiler-dom/-/compiler-dom-3.2.32.tgz"
@@ -109,6 +150,14 @@
     "@vue/compiler-core" "3.2.32"
     "@vue/shared" "3.2.32"
 
+"@vue/compiler-dom@3.5.13":
+  version "3.5.13"
+  resolved "https://registry.yarnpkg.com/@vue/compiler-dom/-/compiler-dom-3.5.13.tgz#bb1b8758dbc542b3658dda973b98a1c9311a8a58"
+  integrity sha512-ZOJ46sMOKUjO3e94wPdCzQ6P1Lx/vhp2RSvfaab88Ajexs0AHeV0uasYhi99WPaogmBlRHNRuly8xV75cNTMDA==
+  dependencies:
+    "@vue/compiler-core" "3.5.13"
+    "@vue/shared" "3.5.13"
+
 "@vue/compiler-sfc@3.2.32":
   version "3.2.32"
   resolved "https://registry.npmmirror.com/@vue/compiler-sfc/-/compiler-sfc-3.2.32.tgz"
@@ -125,6 +174,21 @@
     postcss "^8.1.10"
     source-map "^0.6.1"
 
+"@vue/compiler-sfc@^3.1.0":
+  version "3.5.13"
+  resolved "https://registry.yarnpkg.com/@vue/compiler-sfc/-/compiler-sfc-3.5.13.tgz#461f8bd343b5c06fac4189c4fef8af32dea82b46"
+  integrity sha512-6VdaljMpD82w6c2749Zhf5T9u5uLBWKnVue6XWxprDobftnletJ8+oel7sexFfM3qIxNmVE7LSFGTpv6obNyaQ==
+  dependencies:
+    "@babel/parser" "^7.25.3"
+    "@vue/compiler-core" "3.5.13"
+    "@vue/compiler-dom" "3.5.13"
+    "@vue/compiler-ssr" "3.5.13"
+    "@vue/shared" "3.5.13"
+    estree-walker "^2.0.2"
+    magic-string "^0.30.11"
+    postcss "^8.4.48"
+    source-map-js "^1.2.0"
+
 "@vue/compiler-ssr@3.2.32":
   version "3.2.32"
   resolved "https://registry.npmmirror.com/@vue/compiler-ssr/-/compiler-ssr-3.2.32.tgz"
@@ -133,6 +197,14 @@
     "@vue/compiler-dom" "3.2.32"
     "@vue/shared" "3.2.32"
 
+"@vue/compiler-ssr@3.5.13":
+  version "3.5.13"
+  resolved "https://registry.yarnpkg.com/@vue/compiler-ssr/-/compiler-ssr-3.5.13.tgz#e771adcca6d3d000f91a4277c972a996d07f43ba"
+  integrity sha512-wMH6vrYHxQl/IybKJagqbquvxpWCuVYpoUJfCqFZwa/JY1GdATAQ+TgVtgrwwMZ0D07QhA99rs/EAAWfvG6KpA==
+  dependencies:
+    "@vue/compiler-dom" "3.5.13"
+    "@vue/shared" "3.5.13"
+
 "@vue/reactivity-transform@3.2.32":
   version "3.2.32"
   resolved "https://registry.npmmirror.com/@vue/reactivity-transform/-/reactivity-transform-3.2.32.tgz"
@@ -181,6 +253,11 @@
   resolved "https://registry.npmmirror.com/@vue/shared/-/shared-3.2.32.tgz"
   integrity sha512-bjcixPErUsAnTQRQX4Z5IQnICYjIfNCyCl8p29v1M6kfVzvwOICPw+dz48nNuWlTOOx2RHhzHdazJibE8GSnsw==
 
+"@vue/shared@3.5.13":
+  version "3.5.13"
+  resolved "https://registry.yarnpkg.com/@vue/shared/-/shared-3.5.13.tgz#87b309a6379c22b926e696893237826f64339b6f"
+  integrity sha512-/hnE/qP5ZoGpol0a5mDi45bOd7t3tjYJBjsgCsivow7D48cJeV5l05RD82lPqi7gRiphZM37rnhW1l6ZoCNNnQ==
+
 "@vueuse/core@^8.2.4":
   version "8.2.5"
   resolved "https://registry.npmmirror.com/@vueuse/core/-/core-8.2.5.tgz"
@@ -239,12 +316,34 @@ async-validator@^4.0.7:
   resolved "https://registry.npmmirror.com/async-validator/-/async-validator-4.0.7.tgz"
   integrity sha512-Pj2IR7u8hmUEDOwB++su6baaRi+QvsgajuFB9j95foM1N2gy5HM4z60hfusIO0fBPG5uLAEl6yCJr1jNSVugEQ==
 
-axios@^0.26.1:
-  version "0.26.1"
-  resolved "https://registry.npmmirror.com/axios/-/axios-0.26.1.tgz"
-  integrity sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==
+asynckit@^0.4.0:
+  version "0.4.0"
+  resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79"
+  integrity sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==
+
+axios@^1.8.2:
+  version "1.8.2"
+  resolved "https://registry.yarnpkg.com/axios/-/axios-1.8.2.tgz#fabe06e241dfe83071d4edfbcaa7b1c3a40f7979"
+  integrity sha512-ls4GYBm5aig9vWx8AWDSGLpnpDQRtWAfrjU+EuytuODrFBkqesN2RkOQCBzrA1RQNHw1SmRMSDDDSwzNAYQ6Rg==
+  dependencies:
+    follow-redirects "^1.15.6"
+    form-data "^4.0.0"
+    proxy-from-env "^1.1.0"
+
+call-bind-apply-helpers@^1.0.1, call-bind-apply-helpers@^1.0.2:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz#4b5428c222be985d79c3d82657479dbe0b59b2d6"
+  integrity sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==
   dependencies:
-    follow-redirects "^1.14.8"
+    es-errors "^1.3.0"
+    function-bind "^1.1.2"
+
+combined-stream@^1.0.8:
+  version "1.0.8"
+  resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f"
+  integrity sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==
+  dependencies:
+    delayed-stream "~1.0.0"
 
 compute-scroll-into-view@^1.0.17:
   version "1.0.17"
@@ -280,6 +379,11 @@ debug@^3.2.6:
   dependencies:
     ms "^2.1.1"
 
+delayed-stream@~1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619"
+  integrity sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==
+
 dom-align@^1.12.1:
   version "1.12.3"
   resolved "https://registry.npmmirror.com/dom-align/-/dom-align-1.12.3.tgz"
@@ -290,6 +394,15 @@ dom-scroll-into-view@^2.0.0:
   resolved "https://registry.npmmirror.com/dom-scroll-into-view/-/dom-scroll-into-view-2.0.1.tgz"
   integrity sha512-bvVTQe1lfaUr1oFzZX80ce9KLDlZ3iU+XGNE/bz9HnGdklTieqsbmsLHe+rT2XWqopvL0PckkYqN7ksmm5pe3w==
 
+dunder-proto@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/dunder-proto/-/dunder-proto-1.0.1.tgz#d7ae667e1dc83482f8b70fd0f6eefc50da30f58a"
+  integrity sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==
+  dependencies:
+    call-bind-apply-helpers "^1.0.1"
+    es-errors "^1.3.0"
+    gopd "^1.2.0"
+
 element-plus@^2.1.9:
   version "2.1.9"
   resolved "https://registry.npmmirror.com/element-plus/-/element-plus-2.1.9.tgz"
@@ -311,6 +424,11 @@ element-plus@^2.1.9:
     memoize-one "^6.0.0"
     normalize-wheel-es "^1.1.2"
 
+entities@^4.5.0:
+  version "4.5.0"
+  resolved "https://registry.yarnpkg.com/entities/-/entities-4.5.0.tgz#5d268ea5e7113ec74c4d033b79ea5a35a488fb48"
+  integrity sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==
+
 errno@^0.1.1:
   version "0.1.8"
   resolved "https://registry.npmmirror.com/errno/-/errno-0.1.8.tgz"
@@ -318,6 +436,33 @@ errno@^0.1.1:
   dependencies:
     prr "~1.0.1"
 
+es-define-property@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/es-define-property/-/es-define-property-1.0.1.tgz#983eb2f9a6724e9303f61addf011c72e09e0b0fa"
+  integrity sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==
+
+es-errors@^1.3.0:
+  version "1.3.0"
+  resolved "https://registry.yarnpkg.com/es-errors/-/es-errors-1.3.0.tgz#05f75a25dab98e4fb1dcd5e1472c0546d5057c8f"
+  integrity sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==
+
+es-object-atoms@^1.0.0, es-object-atoms@^1.1.1:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/es-object-atoms/-/es-object-atoms-1.1.1.tgz#1c4f2c4837327597ce69d2ca190a7fdd172338c1"
+  integrity sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==
+  dependencies:
+    es-errors "^1.3.0"
+
+es-set-tostringtag@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz#f31dbbe0c183b00a6d26eb6325c810c0fd18bd4d"
+  integrity sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==
+  dependencies:
+    es-errors "^1.3.0"
+    get-intrinsic "^1.2.6"
+    has-tostringtag "^1.0.2"
+    hasown "^2.0.2"
+
 esbuild-android-64@0.14.36:
   version "0.14.36"
   resolved "https://registry.yarnpkg.com/esbuild-android-64/-/esbuild-android-64-0.14.36.tgz#fc5f95ce78c8c3d790fa16bc71bd904f2bb42aa1"
@@ -454,10 +599,20 @@ estree-walker@^2.0.2:
   resolved "https://registry.npmmirror.com/estree-walker/-/estree-walker-2.0.2.tgz"
   integrity sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==
 
-follow-redirects@^1.14.8:
-  version "1.14.9"
-  resolved "https://registry.npmmirror.com/follow-redirects/-/follow-redirects-1.14.9.tgz"
-  integrity sha512-MQDfihBQYMcyy5dhRDJUHcw7lb2Pv/TuE6xP1vyraLukNDHKbDxDNaOE3NbCAdKQApno+GPRyo1YAp89yCjK4w==
+follow-redirects@^1.15.6:
+  version "1.15.9"
+  resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.15.9.tgz#a604fa10e443bf98ca94228d9eebcc2e8a2c8ee1"
+  integrity sha512-gew4GsXizNgdoRyqmyfMHyAmXsZDk6mHkSxZFCzW9gwlbtOW44CDtYavM+y+72qD/Vq2l550kMF52DT8fOLJqQ==
+
+form-data@^4.0.0:
+  version "4.0.2"
+  resolved "https://registry.yarnpkg.com/form-data/-/form-data-4.0.2.tgz#35cabbdd30c3ce73deb2c42d3c8d3ed9ca51794c"
+  integrity sha512-hGfm/slu0ZabnNt4oaRZ6uREyfCj6P4fT/n6A1rGV+Z0VdGXjfOhVUpkn6qVQONHGIFwmveGXyDs75+nr6FM8w==
+  dependencies:
+    asynckit "^0.4.0"
+    combined-stream "^1.0.8"
+    es-set-tostringtag "^2.1.0"
+    mime-types "^2.1.12"
 
 fsevents@~2.3.2:
   version "2.3.2"
@@ -469,11 +624,57 @@ function-bind@^1.1.1:
   resolved "https://registry.npmmirror.com/function-bind/-/function-bind-1.1.1.tgz"
   integrity sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==
 
+function-bind@^1.1.2:
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.2.tgz#2c02d864d97f3ea6c8830c464cbd11ab6eab7a1c"
+  integrity sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==
+
+get-intrinsic@^1.2.6:
+  version "1.3.0"
+  resolved "https://registry.yarnpkg.com/get-intrinsic/-/get-intrinsic-1.3.0.tgz#743f0e3b6964a93a5491ed1bffaae054d7f98d01"
+  integrity sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==
+  dependencies:
+    call-bind-apply-helpers "^1.0.2"
+    es-define-property "^1.0.1"
+    es-errors "^1.3.0"
+    es-object-atoms "^1.1.1"
+    function-bind "^1.1.2"
+    get-proto "^1.0.1"
+    gopd "^1.2.0"
+    has-symbols "^1.1.0"
+    hasown "^2.0.2"
+    math-intrinsics "^1.1.0"
+
+get-proto@^1.0.1:
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/get-proto/-/get-proto-1.0.1.tgz#150b3f2743869ef3e851ec0c49d15b1d14d00ee1"
+  integrity sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==
+  dependencies:
+    dunder-proto "^1.0.1"
+    es-object-atoms "^1.0.0"
+
+gopd@^1.2.0:
+  version "1.2.0"
+  resolved "https://registry.yarnpkg.com/gopd/-/gopd-1.2.0.tgz#89f56b8217bdbc8802bd299df6d7f1081d7e51a1"
+  integrity sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==
+
 graceful-fs@^4.1.2:
   version "4.2.10"
   resolved "https://registry.npmmirror.com/graceful-fs/-/graceful-fs-4.2.10.tgz"
   integrity sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA==
 
+has-symbols@^1.0.3, has-symbols@^1.1.0:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/has-symbols/-/has-symbols-1.1.0.tgz#fc9c6a783a084951d0b971fe1018de813707a338"
+  integrity sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==
+
+has-tostringtag@^1.0.2:
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/has-tostringtag/-/has-tostringtag-1.0.2.tgz#2cdc42d40bef2e5b4eeab7c01a73c54ce7ab5abc"
+  integrity sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==
+  dependencies:
+    has-symbols "^1.0.3"
+
 has@^1.0.3:
   version "1.0.3"
   resolved "https://registry.npmmirror.com/has/-/has-1.0.3.tgz"
@@ -481,6 +682,13 @@ has@^1.0.3:
   dependencies:
     function-bind "^1.1.1"
 
+hasown@^2.0.2:
+  version "2.0.2"
+  resolved "https://registry.yarnpkg.com/hasown/-/hasown-2.0.2.tgz#003eaf91be7adc372e84ec59dc37252cedb80003"
+  integrity sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==
+  dependencies:
+    function-bind "^1.1.2"
+
 iconv-lite@^0.4.4:
   version "0.4.24"
   resolved "https://registry.npmmirror.com/iconv-lite/-/iconv-lite-0.4.24.tgz"
@@ -573,6 +781,13 @@ magic-string@^0.25.7:
   dependencies:
     sourcemap-codec "^1.4.8"
 
+magic-string@^0.30.11:
+  version "0.30.17"
+  resolved "https://registry.yarnpkg.com/magic-string/-/magic-string-0.30.17.tgz#450a449673d2460e5bbcfba9a61916a1714c7453"
+  integrity sha512-sNPKHvyjVf7gyjwS4xGTaW/mCnF8wnjtifKBEhxfZ7E/S8tQ0rssrwGNn6q8JH/ohItJfSQp9mBtQYuTlH5QnA==
+  dependencies:
+    "@jridgewell/sourcemap-codec" "^1.5.0"
+
 make-dir@^2.1.0:
   version "2.1.0"
   resolved "https://registry.npmmirror.com/make-dir/-/make-dir-2.1.0.tgz"
@@ -581,11 +796,28 @@ make-dir@^2.1.0:
     pify "^4.0.1"
     semver "^5.6.0"
 
+math-intrinsics@^1.1.0:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/math-intrinsics/-/math-intrinsics-1.1.0.tgz#a0dd74be81e2aa5c2f27e65ce283605ee4e2b7f9"
+  integrity sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==
+
 memoize-one@^6.0.0:
   version "6.0.0"
   resolved "https://registry.npmmirror.com/memoize-one/-/memoize-one-6.0.0.tgz"
   integrity sha512-rkpe71W0N0c0Xz6QD0eJETuWAJGnJ9afsl1srmwPrI+yBCkge5EycXXbYRyvL29zZVUWQCY7InPRCv3GDXuZNw==
 
+mime-db@1.52.0:
+  version "1.52.0"
+  resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.52.0.tgz#bbabcdc02859f4987301c856e3387ce5ec43bf70"
+  integrity sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==
+
+mime-types@^2.1.12:
+  version "2.1.35"
+  resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.35.tgz#381a871b62a734450660ae3deee44813f70d959a"
+  integrity sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==
+  dependencies:
+    mime-db "1.52.0"
+
 mime@^1.4.1:
   version "1.6.0"
   resolved "https://registry.npmmirror.com/mime/-/mime-1.6.0.tgz"
@@ -606,6 +838,11 @@ nanoid@^3.3.1:
   resolved "https://registry.npmmirror.com/nanoid/-/nanoid-3.3.2.tgz"
   integrity sha512-CuHBogktKwpm5g2sRgv83jEy2ijFzBwMoYA60orPDR7ynsLijJDqgsi4RDGj3OJpy3Ieb+LYwiRmIOGyytgITA==
 
+nanoid@^3.3.8:
+  version "3.3.9"
+  resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.9.tgz#e0097d8e026b3343ff053e9ccd407360a03f503a"
+  integrity sha512-SppoicMGpZvbF1l3z4x7No3OlIjP7QJvC9XR7AhZr1kL133KHnKPztkKDc+Ir4aJ/1VhTySrtKhrsycmrMQfvg==
+
 nanopop@^2.1.0:
   version "2.1.0"
   resolved "https://registry.npmmirror.com/nanopop/-/nanopop-2.1.0.tgz"
@@ -645,12 +882,17 @@ picocolors@^1.0.0:
   resolved "https://registry.npmmirror.com/picocolors/-/picocolors-1.0.0.tgz"
   integrity sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==
 
+picocolors@^1.1.1:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/picocolors/-/picocolors-1.1.1.tgz#3d321af3eab939b083c8f929a1d12cda81c26b6b"
+  integrity sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==
+
 pify@^4.0.1:
   version "4.0.1"
   resolved "https://registry.npmmirror.com/pify/-/pify-4.0.1.tgz"
   integrity sha512-uB80kBFb/tfd68bVleG9T5GGsGPjJrLAUpR5PZIrhBnIaRTQRjqdJSsIKkOP6OAIFbj7GOrcudc5pNjZ+geV2g==
 
-postcss@^8.1.10, postcss@^8.4.12:
+postcss@^8.1.10:
   version "8.4.12"
   resolved "https://registry.npmmirror.com/postcss/-/postcss-8.4.12.tgz"
   integrity sha512-lg6eITwYe9v6Hr5CncVbK70SoioNQIq81nsaG86ev5hAidQvmOeETBqs7jm43K2F5/Ley3ytDtriImV6TpNiSg==
@@ -659,6 +901,20 @@ postcss@^8.1.10, postcss@^8.4.12:
     picocolors "^1.0.0"
     source-map-js "^1.0.2"
 
+postcss@^8.4.13, postcss@^8.4.48:
+  version "8.5.3"
+  resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.5.3.tgz#1463b6f1c7fb16fe258736cba29a2de35237eafb"
+  integrity sha512-dle9A3yYxlBSrt8Fu+IpjGT8SY8hN0mlaA6GY8t0P5PjIOZemULz/E2Bnm/2dcUOena75OTNkHI76uZBNUUq3A==
+  dependencies:
+    nanoid "^3.3.8"
+    picocolors "^1.1.1"
+    source-map-js "^1.2.1"
+
+proxy-from-env@^1.1.0:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/proxy-from-env/-/proxy-from-env-1.1.0.tgz#e102f16ca355424865755d2c9e8ea4f24d58c3e2"
+  integrity sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==
+
 prr@~1.0.1:
   version "1.0.1"
   resolved "https://registry.npmmirror.com/prr/-/prr-1.0.1.tgz"
@@ -683,10 +939,10 @@ resolve@^1.22.0:
     path-parse "^1.0.7"
     supports-preserve-symlinks-flag "^1.0.0"
 
-rollup@^2.59.0:
-  version "2.70.1"
-  resolved "https://registry.npmmirror.com/rollup/-/rollup-2.70.1.tgz"
-  integrity sha512-CRYsI5EuzLbXdxC6RnYhOuRdtz4bhejPMSWjsFLfVM/7w/85n2szZv6yExqUXsBdz5KT8eoubeyDUDjhLHEslA==
+"rollup@>=2.59.0 <2.78.0":
+  version "2.77.3"
+  resolved "https://registry.yarnpkg.com/rollup/-/rollup-2.77.3.tgz#8f00418d3a2740036e15deb653bed1a90ee0cc12"
+  integrity sha512-/qxNTG7FbmefJWoeeYJFbHehJ2HNWnjkAFRKzWN/45eNBBF/r8lo992CwcJXEzyVxs5FmfId+vTSTQDb+bxA+g==
   optionalDependencies:
     fsevents "~2.3.2"
 
@@ -722,6 +978,11 @@ source-map-js@^1.0.2:
   resolved "https://registry.npmmirror.com/source-map-js/-/source-map-js-1.0.2.tgz"
   integrity sha512-R0XvVJ9WusLiqTCEiGCmICCMplcCkIwwR11mOSD9CR5u+IXYdiseeEuXCVAjS54zqwkLcPNnmU4OeJ6tUrWhDw==
 
+source-map-js@^1.2.0, source-map-js@^1.2.1:
+  version "1.2.1"
+  resolved "https://registry.yarnpkg.com/source-map-js/-/source-map-js-1.2.1.tgz#1ce5650fddd87abc099eda37dcff024c2667ae46"
+  integrity sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==
+
 source-map@^0.6.1, source-map@~0.6.0:
   version "0.6.1"
   resolved "https://registry.npmmirror.com/source-map/-/source-map-0.6.1.tgz"
@@ -747,15 +1008,15 @@ use-strict@1.0.1:
   resolved "https://registry.npmmirror.com/use-strict/-/use-strict-1.0.1.tgz"
   integrity sha512-IeiWvvEXfW5ltKVMkxq6FvNf2LojMKvB2OCeja6+ct24S1XOmQw2dGr2JyndwACWAGJva9B7yPHwAmeA9QCqAQ==
 
-vite@^2.9.0:
-  version "2.9.1"
-  resolved "https://registry.npmmirror.com/vite/-/vite-2.9.1.tgz"
-  integrity sha512-vSlsSdOYGcYEJfkQ/NeLXgnRv5zZfpAsdztkIrs7AZHV8RCMZQkwjo4DS5BnrYTqoWqLoUe1Cah4aVO4oNNqCQ==
+vite@^2.9.13:
+  version "2.9.18"
+  resolved "https://registry.yarnpkg.com/vite/-/vite-2.9.18.tgz#74e2a83b29da81e602dac4c293312cc575f091c7"
+  integrity sha512-sAOqI5wNM9QvSEE70W3UGMdT8cyEn0+PmJMTFvTB8wB0YbYUWw3gUbY62AOyrXosGieF2htmeLATvNxpv/zNyQ==
   dependencies:
     esbuild "^0.14.27"
-    postcss "^8.4.12"
+    postcss "^8.4.13"
     resolve "^1.22.0"
-    rollup "^2.59.0"
+    rollup ">=2.59.0 <2.78.0"
   optionalDependencies:
     fsevents "~2.3.2"
 

From f8dc3252568dbe25b76f6662a16b7c04b66c70a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?=
 <83450930+Liyulingyue@users.noreply.github.com>
Date: Thu, 13 Mar 2025 11:55:15 +0800
Subject: [PATCH 22/46] add docker (#4000)

* add docker

* fix unit error > Type promotion

* fix url
---
 docker/ubuntu20-cpu/Dockerfile     | 17 +++++++++++++++++
 tests/unit/asr/reverse_pad_list.py | 11 +++++++----
 2 files changed, 24 insertions(+), 4 deletions(-)
 create mode 100644 docker/ubuntu20-cpu/Dockerfile

diff --git a/docker/ubuntu20-cpu/Dockerfile b/docker/ubuntu20-cpu/Dockerfile
new file mode 100644
index 000000000..bb113b2f2
--- /dev/null
+++ b/docker/ubuntu20-cpu/Dockerfile
@@ -0,0 +1,17 @@
+FROM registry.baidubce.com/paddlepaddle/paddle:3.0.0b1
+LABEL maintainer="ext_paddle_oss@baidu.com"
+
+RUN apt-get update \
+  && apt-get install libsndfile-dev libsndfile1 \
+  && apt-get clean \
+  && rm -rf /var/lib/apt/lists/*
+
+RUN git clone --depth 1 https://github.com/PaddlePaddle/PaddleSpeech.git /home/PaddleSpeech  
+RUN pip3 uninstall mccabe -y ; exit 0;
+RUN pip3 install multiprocess==0.70.12 importlib-metadata==4.2.0 dill==0.3.4
+
+WORKDIR /home/PaddleSpeech/
+RUN python setup.py bdist_wheel
+RUN pip install dist/*.whl -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+CMD ['bash']
diff --git a/tests/unit/asr/reverse_pad_list.py b/tests/unit/asr/reverse_pad_list.py
index 215ed5ceb..1b63890a0 100644
--- a/tests/unit/asr/reverse_pad_list.py
+++ b/tests/unit/asr/reverse_pad_list.py
@@ -65,14 +65,16 @@ def reverse_pad_list_with_sos_eos(r_hyps,
     max_len = paddle.max(r_hyps_lens)
     index_range = paddle.arange(0, max_len, 1)
     seq_len_expand = r_hyps_lens.unsqueeze(1)
-    seq_mask = seq_len_expand > index_range  # (beam, max_len)
+    seq_mask = seq_len_expand > index_range.astype(
+        seq_len_expand.dtype)  # (beam, max_len)
 
-    index = (seq_len_expand - 1) - index_range  # (beam, max_len)
+    index = (seq_len_expand - 1) - index_range.astype(
+        seq_len_expand.dtype)  # (beam, max_len)
     #   >>> index
     #   >>> tensor([[ 2,  1,  0],
     #   >>>         [ 2,  1,  0],
     #   >>>         [ 0, -1, -2]])
-    index = index * seq_mask
+    index = index * seq_mask.astype(index.dtype)
 
     #   >>> index
     #   >>> tensor([[2, 1, 0],
@@ -103,7 +105,8 @@ def reverse_pad_list_with_sos_eos(r_hyps,
     #   >>> tensor([[3, 2, 1],
     #   >>>         [4, 8, 9],
     #   >>>         [2, 2, 2]])
-    r_hyps = paddle.where(seq_mask, r_hyps, eos)
+    r_hyps = paddle.where(seq_mask, r_hyps,
+                          paddle.to_tensor(eos, dtype=r_hyps.dtype))
     #   >>> r_hyps
     #   >>> tensor([[3, 2, 1],
     #   >>>         [4, 8, 9],

From f357ec61720b37f3d51c49854ccdfe365debd451 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?=
 <83450930+Liyulingyue@users.noreply.github.com>
Date: Tue, 18 Mar 2025 11:43:12 +0800
Subject: [PATCH 23/46] =?UTF-8?q?Docker=3D-=3D=20=E5=85=BC=E5=AE=B9?=
 =?UTF-8?q?=E6=80=A7=E9=AA=8C=E8=AF=81=20(#4018)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add docker

* fix unit error > Type promotion

* fix url

* add gpu docker

* Update Dockerfile

* fix pp3.0 0-d tensor problem

* 兼容性验证
---
 paddlespeech/s2t/models/whisper/whisper.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/paddlespeech/s2t/models/whisper/whisper.py b/paddlespeech/s2t/models/whisper/whisper.py
index d20cc04b6..fdd3a6974 100644
--- a/paddlespeech/s2t/models/whisper/whisper.py
+++ b/paddlespeech/s2t/models/whisper/whisper.py
@@ -835,8 +835,14 @@ class BeamSearchDecoder(TokenDecoder):
                 logprob, token = paddle.topk(
                     logprobs[idx], k=self.beam_size + 1)
                 for logprob, token in zip(logprob, token):
-                    new_logprob = (sum_logprobs[idx] + logprob).tolist()[0]
-                    sequence = tuple(prefix + [token.tolist()[0]])
+                    # after Paddle 3.0, tolist in 0-D tensor will return a float/int value instead of a list
+                    new_logprob = (sum_logprobs[idx] + logprob).tolist()
+                    new_logprob = new_logprob if isinstance(
+                        new_logprob, float) else new_logprob[0]
+                    new_token = token.tolist()
+                    new_token = new_token if isinstance(new_token,
+                                                        int) else new_token[0]
+                    sequence = tuple(prefix + [new_token])
                     scores[sequence] = new_logprob
                     sources[sequence] = idx
 

From ca03f4db214b7300a00fd7159ee3a5b0a8fada91 Mon Sep 17 00:00:00 2001
From: Echo-Nie <157974576+Echo-Nie@users.noreply.github.com>
Date: Tue, 18 Mar 2025 14:50:37 +0800
Subject: [PATCH 24/46] =?UTF-8?q?=E3=80=90PaddleSpeech=20No.6=E3=80=91?=
 =?UTF-8?q?=E8=A1=A5=E5=85=A8=E5=90=88=E6=88=90=E7=B3=BB=E5=88=97=E4=B8=AD?=
 =?UTF-8?q?=E7=9A=84=E8=84=9A=E6=9C=AC=E4=B8=AD=E5=8F=82=E6=95=B0=E7=BC=BA?=
 =?UTF-8?q?=E5=A4=B1=20(#4004)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* run.sh修改：为 synthesize 和 synthesize_e2e 添加 --stage 参数控制 vocoder 模型选择，REAMDE.md修改：补充 stage 参数说明，明确 vocoder 选择逻辑

* 添加run.sh中stage参数相关的注释

* HiFiGAN改为MultiBand MelGAN

* cmsc文件改回原位（No.15不修改），这里只对No.6做修改
---
 examples/canton/tts3/README.md | 1 +
 examples/canton/tts3/run.sh    | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/examples/canton/tts3/README.md b/examples/canton/tts3/README.md
index 87ef40907..d2c46f642 100644
--- a/examples/canton/tts3/README.md
+++ b/examples/canton/tts3/README.md
@@ -37,6 +37,7 @@ Run the command below to
 3. train the model.
 4. synthesize wavs.
     - synthesize waveform from `metadata.jsonl`.
+    - `--stage` controls the vocoder model during synthesis (0 = pwgan, 1 = hifigan).
     - synthesize waveform from text file.
 ```bash
 ./run.sh
diff --git a/examples/canton/tts3/run.sh b/examples/canton/tts3/run.sh
index acfc50223..0e1f52a1c 100755
--- a/examples/canton/tts3/run.sh
+++ b/examples/canton/tts3/run.sh
@@ -28,13 +28,13 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # synthesize, vocoder is pwgan by default
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize, vocoder is pwgan by default stage 0, stage 1 will use hifigan as vocoder
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # synthesize_e2e, vocoder is pwgan by default
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize_e2e, vocoder is pwgan by default stage 0, stage 1 will use hifigan as vocoder
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
 
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then

From 05cdbd8d5ebd1189b32b059b76f6201495b56c28 Mon Sep 17 00:00:00 2001
From: zxcd <228587199@qq.com>
Date: Tue, 18 Mar 2025 19:39:28 +0800
Subject: [PATCH 25/46] =?UTF-8?q?=E3=80=90doc=E3=80=91fix=20download=20lin?=
 =?UTF-8?q?k=20case=20abnormal=20traffic=20(#4020)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix download link case abnormal traffic

* fix code style
---
 README.md                                     |  22 +-
 README_cn.md                                  |  22 +-
 audio/tests/backends/base.py                  |   4 +-
 audio/tests/backends/soundfile/base.py        |   4 +-
 audio/tests/benchmark/log_melspectrogram.py   |   5 +-
 audio/tests/benchmark/melspectrogram.py       |   5 +-
 audio/tests/benchmark/mfcc.py                 |   5 +-
 audio/tests/features/base.py                  |   2 +-
 demos/TTSAndroid/README.md                    |   8 +-
 demos/TTSAndroid/app/build.gradle             |   4 +-
 demos/TTSArmLinux/README.md                   |   2 +-
 demos/TTSArmLinux/download.sh                 |   6 +-
 demos/TTSCppFrontend/download.sh              |   8 +-
 demos/audio_content_search/README.md          |   2 +-
 demos/audio_content_search/README_cn.md       |   2 +-
 demos/audio_searching/README.md               |   4 +-
 demos/audio_searching/README_cn.md            |   4 +-
 .../audio_searching/src/test_audio_search.py  |   2 +-
 demos/audio_searching/src/test_vpr_search.py  |   2 +-
 demos/audio_tagging/README.md                 |   2 +-
 demos/audio_tagging/README_cn.md              |   2 +-
 demos/audio_tagging/run.sh                    |   2 +-
 demos/automatic_video_subtitiles/README.md    |   2 +-
 demos/automatic_video_subtitiles/README_cn.md |   2 +-
 demos/automatic_video_subtitiles/run.sh       |   2 +-
 .../custom_streaming_asr/websocket_server.sh  |   2 +-
 demos/keyword_spotting/README.md              |   2 +-
 demos/keyword_spotting/README_cn.md           |   2 +-
 demos/keyword_spotting/run.sh                 |   2 +-
 demos/metaverse/run.sh                        |   6 +-
 demos/speaker_verification/README.md          |   4 +-
 demos/speaker_verification/README_cn.md       |   4 +-
 demos/speaker_verification/run.sh             |   4 +-
 demos/speech_recognition/README.md            |   2 +-
 demos/speech_recognition/README_cn.md         |   2 +-
 demos/speech_recognition/run.sh               |   6 +-
 demos/speech_server/README.md                 |  12 +-
 demos/speech_server/README_cn.md              |  12 +-
 demos/speech_server/asr_client.sh             |   2 +-
 demos/speech_server/cls_client.sh             |   2 +-
 demos/speech_server/sid_client.sh             |   4 +-
 demos/speech_ssl/README.md                    |   2 +-
 demos/speech_ssl/README_cn.md                 |   2 +-
 demos/speech_ssl/run.sh                       |   2 +-
 demos/speech_translation/README.md            |   2 +-
 demos/speech_translation/README_cn.md         |   2 +-
 demos/speech_translation/run.sh               |   2 +-
 demos/speech_web/README.md                    |  20 +-
 demos/story_talker/run.sh                     |   8 +-
 demos/streaming_asr_server/README.md          |   2 +-
 demos/streaming_asr_server/README_cn.md       |   2 +-
 demos/streaming_asr_server/test.sh            |   2 +-
 .../README.md                                 |   4 +-
 .../README_cn.md                              |   4 +-
 demos/style_fs2/run.sh                        |   4 +-
 demos/whisper/README.md                       |   2 +-
 demos/whisper/README_cn.md                    |   2 +-
 demos/whisper/run.sh                          |   2 +-
 docs/source/demo_video.rst                    |   2 +-
 docs/source/install.md                        |   2 +-
 docs/source/install_cn.md                     |   2 +-
 docs/source/released_model.md                 | 114 +++---
 docs/source/streaming_asr_demo_video.rst      |   2 +-
 docs/source/streaming_tts_demo_video.rst      |   2 +-
 docs/source/tts/README.md                     |  24 +-
 docs/source/tts/demo.rst                      | 356 +++++++++---------
 docs/source/tts/demo_2.rst                    |  56 +--
 docs/source/tts/svs_music_score.md            |  26 +-
 docs/source/tts_demo_video.rst                |   2 +-
 docs/topic/ctc/ctc_loss_speed_compare.ipynb   |   2 +-
 docs/topic/gan_vocoder/gan_vocoder.ipynb      |   2 +-
 docs/tutorial/asr/tutorial_deepspeech2.ipynb  |   6 +-
 docs/tutorial/asr/tutorial_transformer.ipynb  |   6 +-
 docs/tutorial/cls/cls_tutorial.ipynb          |   4 +-
 docs/tutorial/st/st_tutorial.ipynb            |  10 +-
 docs/tutorial/tts/tts_tutorial.ipynb          |  22 +-
 examples/aishell/asr0/README.md               |   6 +-
 examples/aishell/asr0/local/test_wav.sh       |   2 +-
 examples/aishell/asr1/README.md               |   6 +-
 examples/aishell/asr1/local/test_wav.sh       |   2 +-
 examples/aishell/asr3/README.md               |  10 +-
 examples/aishell/asr3/local/data.sh           |   2 +-
 examples/aishell/asr3/local/test_wav.sh       |   2 +-
 examples/aishell3/ernie_sat/README.md         |  32 +-
 examples/aishell3/tts3/README.md              |  16 +-
 examples/aishell3/vc0/README.md               |   6 +-
 examples/aishell3/vc1/README.md               |   6 +-
 examples/aishell3/vc2/README.md               |   6 +-
 examples/aishell3/vits-vc/README.md           |   4 +-
 examples/aishell3/vits/README.md              |   4 +-
 examples/aishell3/voc1/README.md              |  12 +-
 examples/aishell3/voc5/README.md              |  12 +-
 examples/aishell3_vctk/ernie_sat/README.md    |  34 +-
 examples/ami/sd0/run.sh                       |   2 +-
 examples/canton/tts3/README.md                |  10 +-
 examples/csmsc/jets/README.md                 |   6 +-
 examples/csmsc/tts0/README.md                 |   8 +-
 examples/csmsc/tts2/README.md                 |  16 +-
 examples/csmsc/tts3/README.md                 |  30 +-
 examples/csmsc/tts3/README_cn.md              |  10 +-
 examples/csmsc/tts3_rhy/README.md             |   4 +-
 examples/csmsc/vits/README.md                 |   4 +-
 examples/csmsc/voc1/README.md                 |  22 +-
 examples/csmsc/voc3/README.md                 |  28 +-
 examples/csmsc/voc4/README.md                 |   4 +-
 examples/csmsc/voc5/README.md                 |  24 +-
 examples/csmsc/voc5/iSTFTNet.md               |   2 +-
 examples/csmsc/voc6/README.md                 |   8 +-
 examples/hey_snips/README.md                  |   2 +-
 examples/iwslt2012/punc0/README.md            |  14 +-
 examples/iwslt2012/punc0/local/data.sh        |   2 +-
 examples/librispeech/asr0/README.md           |   2 +-
 examples/librispeech/asr0/local/test_wav.sh   |   2 +-
 examples/librispeech/asr1/README.md           |   6 +-
 examples/librispeech/asr1/local/test_wav.sh   |   2 +-
 examples/librispeech/asr2/README.md           |   2 +-
 examples/librispeech/asr3/README.md           |  10 +-
 examples/librispeech/asr3/local/data.sh       |   2 +-
 examples/librispeech/asr3/local/test_wav.sh   |   2 +-
 examples/librispeech/asr4/README.md           |  10 +-
 examples/librispeech/asr4/local/data.sh       |   2 +-
 examples/librispeech/asr4/local/test_wav.sh   |   2 +-
 examples/librispeech/asr5/README.md           |  10 +-
 examples/librispeech/asr5/local/data.sh       |   2 +-
 examples/librispeech/asr5/local/test_wav.sh   |   2 +-
 examples/ljspeech/tts0/README.md              |   6 +-
 examples/ljspeech/tts1/README.md              |   4 +-
 examples/ljspeech/tts3/README.md              |  12 +-
 examples/ljspeech/voc0/README.md              |   2 +-
 examples/ljspeech/voc1/README.md              |  10 +-
 examples/ljspeech/voc5/README.md              |  10 +-
 examples/opencpop/svs1/README.md              |   4 +-
 examples/opencpop/svs1/README_cn.md           |   4 +-
 examples/opencpop/voc1/README.md              |   2 +-
 examples/other/ge2e/README.md                 |   2 +-
 examples/other/rhy/README.md                  |   2 +-
 examples/other/rhy/local/data.sh              |   4 +-
 examples/other/tts_finetune/tts3/README.md    |  40 +-
 examples/tal_cs/asr1/README.md                |   6 +-
 examples/tal_cs/asr1/local/test_wav.sh        |   2 +-
 .../ted_en_zh/st1/local/download_pretrain.sh  |   4 +-
 examples/thchs30/align0/README.md             |   6 +-
 examples/vctk/ernie_sat/README.md             |  32 +-
 examples/vctk/tts3/README.md                  |  14 +-
 examples/vctk/vc3/README.md                   |   6 +-
 examples/vctk/voc1/README.md                  |  10 +-
 examples/vctk/voc5/README.md                  |  12 +-
 examples/voxceleb/sv0/README.md               |   2 +-
 examples/wenetspeech/asr1/README.md           |   6 +-
 examples/wenetspeech/asr1/RESULTS.md          |   4 +-
 examples/wenetspeech/asr1/local/test_wav.sh   |   2 +-
 examples/zh_en_tts/tts3/README.md             |  22 +-
 examples/zh_en_tts/tts3/local/mfa_download.sh |   8 +-
 .../zh_en_tts/tts3/local/model_download.sh    |   4 +-
 paddlespeech/cli/st/infer.py                  |   2 +-
 paddlespeech/resource/pretrained_models.py    | 292 +++++++-------
 .../server/tests/asr/online/README.md         |   2 +-
 .../server/tests/asr/online/README_cn.md      |   2 +-
 paddlespeech/t2s/exps/stream_play_tts.py      |   4 +-
 paddlespeech/t2s/modules/losses.py            |   6 +-
 runtime/cmake/fastdeploy.cmake                |   2 +-
 runtime/examples/codelab/decoder/run.sh       |   4 +-
 runtime/examples/codelab/feat/run.sh          |   4 +-
 runtime/examples/codelab/nnet/run.sh          |   2 +-
 runtime/examples/codelab/u2/run.sh            |   4 +-
 runtime/examples/custom_asr/run.sh            |   2 +-
 .../wenetspeech/local/recognizer_wfst.sh      |   2 +-
 .../local/recognizer_wfst_fastdeploy.sh       |   2 +-
 .../wenetspeech/local/run_build_tlg.sh        |   2 +-
 runtime/examples/u2pp_ol/wenetspeech/run.sh   |   8 +-
 tests/benchmark/pwgan/run_all.sh              |   2 +-
 tests/chains/speedyspeech/prepare.sh          |  12 +-
 tests/test_tipc/prepare.sh                    |   8 +-
 .../unit/asr/deepspeech2_online_model_test.sh |   2 +-
 tests/unit/audiotools/test_audiotools.sh      |   4 +-
 tests/unit/cli/test_cli.sh                    |  10 +-
 .../unit/server/offline/test_server_client.sh |   2 +-
 tests/unit/tts/test_losses.py                 |   2 +-
 tools/Makefile                                |   2 +-
 179 files changed, 973 insertions(+), 964 deletions(-)

diff --git a/README.md b/README.md
index 6594a4b8f..ace7f7c57 100644
--- a/README.md
+++ b/README.md
@@ -46,14 +46,14 @@
   <tbody>
    <tr>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav" rel="nofollow">
             <img align="center" src="./docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
       </td>
       <td >I knocked at the door on the ancient side of the building.</td>
     </tr>
     <tr>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav" rel="nofollow">
             <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
       </td>
       <td>我认为跑步最重要的就是给我带来了身体健康。</td>
@@ -76,7 +76,7 @@
   <tbody>
    <tr>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav" rel="nofollow">
             <img align="center" src="./docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
       </td>
       <td >我 在 这栋 建筑 的 古老 门上 敲门。</td>
@@ -99,42 +99,42 @@
    <tr>
       <td>Life was like a box of chocolates, you never know what you're gonna get.</td>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_1.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_1.wav" rel="nofollow">
             <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
       </td>
     </tr>
     <tr>
       <td>早上好，今天是2020/10/29，最低温度是-3°C。</td>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/001.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/001.wav" rel="nofollow">
             <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
       </td>
     </tr>
     <tr>
       <td>季姬寂，集鸡，鸡即棘鸡。棘鸡饥叽，季姬及箕稷济鸡。鸡既济，跻姬笈，季姬忌，急咭鸡，鸡急，继圾几，季姬急，即籍箕击鸡，箕疾击几伎，伎即齑，鸡叽集几基，季姬急极屐击鸡，鸡既殛，季姬激，即记《季姬击鸡记》。</td>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/jijiji.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/jijiji.wav" rel="nofollow">
             <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
       </td>
     </tr>
     <tr>
       <td>大家好，我是 parrot 虚拟老师，我们来读一首诗，我与春风皆过客，I and the spring breeze are passing by，你携秋水揽星河，you take the autumn water to take the galaxy。</td>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/labixiaoxin.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/labixiaoxin.wav" rel="nofollow">
             <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
       </td>
     </tr>
     <tr>
       <td>宜家唔系事必要你讲，但系你所讲嘅说话将会变成呈堂证供。</td>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/chengtangzhenggong.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/chengtangzhenggong.wav" rel="nofollow">
             <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
       </td>
     </tr>
     <tr>
       <td>各个国家有各个国家嘅国歌</td>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/gegege.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/gegege.wav" rel="nofollow">
             <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
       </td>
     </tr>
@@ -283,8 +283,8 @@ Developers can have a try of our models with [PaddleSpeech Command Line](./paddl
 Test audio sample download
 
 ```shell
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
 ```
 
 ### Automatic Speech Recognition
diff --git a/README_cn.md b/README_cn.md
index 5b95a2879..491c61f39 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -51,14 +51,14 @@
   <tbody>
    <tr>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav" rel="nofollow">
             <img align="center" src="./docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
       </td>
       <td >I knocked at the door on the ancient side of the building.</td>
     </tr>
     <tr>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav" rel="nofollow">
             <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
       </td>
       <td>我认为跑步最重要的就是给我带来了身体健康。</td>
@@ -81,7 +81,7 @@
   <tbody>
    <tr>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav" rel="nofollow">
             <img align="center" src="./docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
       </td>
       <td >我 在 这栋 建筑 的 古老 门上 敲门。</td>
@@ -104,42 +104,42 @@
    <tr>
       <td >Life was like a box of chocolates, you never know what you're gonna get.</td>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_1.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_1.wav" rel="nofollow">
             <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
       </td>
     </tr>
     <tr>
       <td >早上好，今天是2020/10/29，最低温度是-3°C。</td>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/001.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/001.wav" rel="nofollow">
             <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
       </td>
     </tr>
     <tr>
       <td >季姬寂，集鸡，鸡即棘鸡。棘鸡饥叽，季姬及箕稷济鸡。鸡既济，跻姬笈，季姬忌，急咭鸡，鸡急，继圾几，季姬急，即籍箕击鸡，箕疾击几伎，伎即齑，鸡叽集几基，季姬急极屐击鸡，鸡既殛，季姬激，即记《季姬击鸡记》。</td>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/jijiji.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/jijiji.wav" rel="nofollow">
             <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
       </td>
     </tr>
     <tr>
       <td>大家好，我是 parrot 虚拟老师，我们来读一首诗，我与春风皆过客，I and the spring breeze are passing by，你携秋水揽星河，you take the autumn water to take the galaxy。</td>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/labixiaoxin.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/labixiaoxin.wav" rel="nofollow">
             <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
       </td>
     </tr>
     <tr>
       <td>宜家唔系事必要你讲，但系你所讲嘅说话将会变成呈堂证供。</td>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/chengtangzhenggong.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/chengtangzhenggong.wav" rel="nofollow">
             <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
       </td>
     </tr>
     <tr>
       <td>各个国家有各个国家嘅国歌</td>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/gegege.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/gegege.wav" rel="nofollow">
             <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
       </td>
     </tr>
@@ -286,8 +286,8 @@ pip install .
 
 测试音频示例下载
 ```shell
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
 ```
 
 ### 语音识别
diff --git a/audio/tests/backends/base.py b/audio/tests/backends/base.py
index c2d53d209..b4f97e89b 100644
--- a/audio/tests/backends/base.py
+++ b/audio/tests/backends/base.py
@@ -15,8 +15,8 @@ import os
 import unittest
 import urllib.request
 
-mono_channel_wav = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
-multi_channels_wav = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/cat.wav'
+mono_channel_wav = 'https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav'
+multi_channels_wav = 'https://paddlespeech.cdn.bcebos.com/PaddleAudio/cat.wav'
 
 
 class BackendTest(unittest.TestCase):
diff --git a/audio/tests/backends/soundfile/base.py b/audio/tests/backends/soundfile/base.py
index c2d53d209..b4f97e89b 100644
--- a/audio/tests/backends/soundfile/base.py
+++ b/audio/tests/backends/soundfile/base.py
@@ -15,8 +15,8 @@ import os
 import unittest
 import urllib.request
 
-mono_channel_wav = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
-multi_channels_wav = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/cat.wav'
+mono_channel_wav = 'https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav'
+multi_channels_wav = 'https://paddlespeech.cdn.bcebos.com/PaddleAudio/cat.wav'
 
 
 class BackendTest(unittest.TestCase):
diff --git a/audio/tests/benchmark/log_melspectrogram.py b/audio/tests/benchmark/log_melspectrogram.py
index 1d03c1df3..1c772b421 100644
--- a/audio/tests/benchmark/log_melspectrogram.py
+++ b/audio/tests/benchmark/log_melspectrogram.py
@@ -21,11 +21,12 @@ import paddleaudio
 import torch
 import torchaudio
 
-wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
+wav_url = 'https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav'
 if not os.path.isfile(os.path.basename(wav_url)):
     urllib.request.urlretrieve(wav_url, os.path.basename(wav_url))
 
-waveform, sr = paddleaudio.backends.soundfile_load(os.path.abspath(os.path.basename(wav_url)))
+waveform, sr = paddleaudio.backends.soundfile_load(
+    os.path.abspath(os.path.basename(wav_url)))
 waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0)
 waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0)
 
diff --git a/audio/tests/benchmark/melspectrogram.py b/audio/tests/benchmark/melspectrogram.py
index 28c4ac806..9df6ce092 100644
--- a/audio/tests/benchmark/melspectrogram.py
+++ b/audio/tests/benchmark/melspectrogram.py
@@ -21,11 +21,12 @@ import paddleaudio
 import torch
 import torchaudio
 
-wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
+wav_url = 'https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav'
 if not os.path.isfile(os.path.basename(wav_url)):
     urllib.request.urlretrieve(wav_url, os.path.basename(wav_url))
 
-waveform, sr = paddleaudio.backends.soundfile_load(os.path.abspath(os.path.basename(wav_url)))
+waveform, sr = paddleaudio.backends.soundfile_load(
+    os.path.abspath(os.path.basename(wav_url)))
 waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0)
 waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0)
 
diff --git a/audio/tests/benchmark/mfcc.py b/audio/tests/benchmark/mfcc.py
index 544a5371b..7b1ecbe03 100644
--- a/audio/tests/benchmark/mfcc.py
+++ b/audio/tests/benchmark/mfcc.py
@@ -21,11 +21,12 @@ import paddleaudio
 import torch
 import torchaudio
 
-wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
+wav_url = 'https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav'
 if not os.path.isfile(os.path.basename(wav_url)):
     urllib.request.urlretrieve(wav_url, os.path.basename(wav_url))
 
-waveform, sr = paddleaudio.backends.soundfile_load(os.path.abspath(os.path.basename(wav_url)))
+waveform, sr = paddleaudio.backends.soundfile_load(
+    os.path.abspath(os.path.basename(wav_url)))
 waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0)
 waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0)
 
diff --git a/audio/tests/features/base.py b/audio/tests/features/base.py
index 4a44e04bb..1d36c13d4 100644
--- a/audio/tests/features/base.py
+++ b/audio/tests/features/base.py
@@ -19,7 +19,7 @@ import numpy as np
 import paddle
 from paddleaudio.backends import soundfile_load as load
 
-wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
+wav_url = 'https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav'
 
 
 class FeatTest(unittest.TestCase):
diff --git a/demos/TTSAndroid/README.md b/demos/TTSAndroid/README.md
index 36848cbe3..a26172cb6 100644
--- a/demos/TTSAndroid/README.md
+++ b/demos/TTSAndroid/README.md
@@ -70,8 +70,8 @@ TTSAndroid/app/src/main/java/com/baidu/paddle/lite/demo/tts/Predictor.java
 ```
 
 2. `fastspeech2_csmsc_arm.nb`  和 `mb_melgan_csmsc_arm.nb`: 模型文件 (opt 工具转化后 Paddle Lite 模型)
-   ，分别来自 [fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip)
-   和 [mb_melgan_csmsc_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_pdlite_1.3.0.zip)。
+   ，分别来自 [fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip)
+   和 [mb_melgan_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_pdlite_1.3.0.zip)。
 
 ```bash
 # 位置：
@@ -161,7 +161,7 @@ Android 示例基于 Java API 开发，调用 Paddle Lite `Java API` 包括以
 - C++ 中文前端 [lym0302/paddlespeech_tts_cpp](https://github.com/lym0302/paddlespeech_tts_cpp)
 - C++ 英文 g2p [yazone/g2pE_mobile](https://github.com/yazone/g2pE_mobile)
 
-`phone_id_map.txt` 请参考 [fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip)。
+`phone_id_map.txt` 请参考 [fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip)。
 
 ## 通过 setting 界面更新语音合成的相关参数
 
@@ -186,7 +186,7 @@ Android 示例基于 Java API 开发，调用 Paddle Lite `Java API` 包括以
 
 ## Release
 
-[2022-11-29-app-release.apk](https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/2022-11-29-app-release.apk)
+[2022-11-29-app-release.apk](https://paddlespeech.cdn.bcebos.com/demos/TTSAndroid/2022-11-29-app-release.apk)
 
 ## More
 本 Demo 合并自 [yt605155624/TTSAndroid](https://github.com/yt605155624/TTSAndroid)。
diff --git a/demos/TTSAndroid/app/build.gradle b/demos/TTSAndroid/app/build.gradle
index 40ee5e123..ee493d622 100644
--- a/demos/TTSAndroid/app/build.gradle
+++ b/demos/TTSAndroid/app/build.gradle
@@ -31,7 +31,7 @@ dependencies {
     implementation files('libs/PaddlePredictor.jar')
 }
 
-def paddleLiteLibs = 'https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/paddle_lite_libs_68b66fd3.tar.gz'
+def paddleLiteLibs = 'https://paddlespeech.cdn.bcebos.com/demos/TTSAndroid/paddle_lite_libs_68b66fd3.tar.gz'
 task downloadAndExtractPaddleLiteLibs(type: DefaultTask) {
     doFirst {
         println "Downloading and extracting Paddle Lite libs"
@@ -73,7 +73,7 @@ task downloadAndExtractPaddleLiteLibs(type: DefaultTask) {
 }
 preBuild.dependsOn downloadAndExtractPaddleLiteLibs
 
-def paddleLiteModels = [['src' : 'https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz',
+def paddleLiteModels = [['src' : 'https://paddlespeech.cdn.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz',
                          'dest': 'src/main/assets/models'],]
 task downloadAndExtractPaddleLiteModels(type: DefaultTask) {
     doFirst {
diff --git a/demos/TTSArmLinux/README.md b/demos/TTSArmLinux/README.md
index a4ccba6c8..cd331155b 100644
--- a/demos/TTSArmLinux/README.md
+++ b/demos/TTSArmLinux/README.md
@@ -21,7 +21,7 @@ sudo yum install cmake wget tar unzip
 
 ### 下载 Paddle Lite 库文件和模型文件
 
-预编译的二进制使用与安卓 Demo 版本相同的 Paddle Lite 推理库（[Paddle-Lite:68b66fd35](https://github.com/PaddlePaddle/Paddle-Lite/tree/68b66fd356c875c92167d311ad458e6093078449)）和模型（[fs2cnn_mbmelgan_cpu_v1.3.0](https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz)）。
+预编译的二进制使用与安卓 Demo 版本相同的 Paddle Lite 推理库（[Paddle-Lite:68b66fd35](https://github.com/PaddlePaddle/Paddle-Lite/tree/68b66fd356c875c92167d311ad458e6093078449)）和模型（[fs2cnn_mbmelgan_cpu_v1.3.0](https://paddlespeech.cdn.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz)）。
 
 可用以下命令下载：
 
diff --git a/demos/TTSArmLinux/download.sh b/demos/TTSArmLinux/download.sh
index 7eaa836a5..2a0b23b27 100755
--- a/demos/TTSArmLinux/download.sh
+++ b/demos/TTSArmLinux/download.sh
@@ -45,17 +45,17 @@ download() {
 echo "Download models..."
 
 download 'inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \
-    'https://paddlespeech.bj.bcebos.com/demos/TTSArmLinux/inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \
+    'https://paddlespeech.cdn.bcebos.com/demos/TTSArmLinux/inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \
     '39e0c6604f97c70f5d13c573d7e709b9' \
     "$LIBS_DIR"
 
 download 'inference_lite_lib.armlinux.armv7hf.gcc.with_extra.with_cv.tar.gz' \
-    'https://paddlespeech.bj.bcebos.com/demos/TTSArmLinux/inference_lite_lib.armlinux.armv7hf.gcc.with_extra.with_cv.tar.gz' \
+    'https://paddlespeech.cdn.bcebos.com/demos/TTSArmLinux/inference_lite_lib.armlinux.armv7hf.gcc.with_extra.with_cv.tar.gz' \
     'f5ceb509f0b610dafb8379889c5f36f8' \
     "$LIBS_DIR"
 
 download 'fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz' \
-    'https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz' \
+    'https://paddlespeech.cdn.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz' \
     '93ef17d44b498aff3bea93e2c5c09a1e' \
     "$MODELS_DIR"
 
diff --git a/demos/TTSCppFrontend/download.sh b/demos/TTSCppFrontend/download.sh
index 0953e3a59..3051ce3f9 100755
--- a/demos/TTSCppFrontend/download.sh
+++ b/demos/TTSCppFrontend/download.sh
@@ -40,22 +40,22 @@ DIST_DIR="$PWD/front_demo/dict"
 mkdir -p "$DIST_DIR"
 
 download 'fastspeech2_nosil_baker_ckpt_0.4.tar.gz' \
-    'https://paddlespeech.bj.bcebos.com/t2s/text_frontend/fastspeech2_nosil_baker_ckpt_0.4.tar.gz' \
+    'https://paddlespeech.cdn.bcebos.com/t2s/text_frontend/fastspeech2_nosil_baker_ckpt_0.4.tar.gz' \
     '7bf1bab1737375fa123c413eb429c573' \
     "$DIST_DIR"
 
 download 'speedyspeech_nosil_baker_ckpt_0.5.tar.gz' \
-    'https://paddlespeech.bj.bcebos.com/t2s/text_frontend/speedyspeech_nosil_baker_ckpt_0.5.tar.gz' \
+    'https://paddlespeech.cdn.bcebos.com/t2s/text_frontend/speedyspeech_nosil_baker_ckpt_0.5.tar.gz' \
     '0b7754b21f324789aef469c61f4d5b8f' \
     "$DIST_DIR"
 
 download 'jieba.tar.gz' \
-    'https://paddlespeech.bj.bcebos.com/t2s/text_frontend/jieba.tar.gz' \
+    'https://paddlespeech.cdn.bcebos.com/t2s/text_frontend/jieba.tar.gz' \
     '6d30f426bd8c0025110a483f051315ca' \
     "$DIST_DIR"
 
 download 'tranditional_to_simplified.tar.gz' \
-    'https://paddlespeech.bj.bcebos.com/t2s/text_frontend/tranditional_to_simplified.tar.gz' \
+    'https://paddlespeech.cdn.bcebos.com/t2s/text_frontend/tranditional_to_simplified.tar.gz' \
     '258f5b59d5ebfe96d02007ca1d274a7f' \
     "$DIST_DIR"
 
diff --git a/demos/audio_content_search/README.md b/demos/audio_content_search/README.md
index 89b1c0d89..d090fdf1e 100644
--- a/demos/audio_content_search/README.md
+++ b/demos/audio_content_search/README.md
@@ -27,7 +27,7 @@ The input of this demo should be a WAV file(`.wav`), and the sample rate must be
 
 Here are sample files for this demo that can be downloaded:
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
 ```
 
 ### 3. run paddlespeech_server
diff --git a/demos/audio_content_search/README_cn.md b/demos/audio_content_search/README_cn.md
index 16c1a3dd7..a3f20c7e1 100644
--- a/demos/audio_content_search/README_cn.md
+++ b/demos/audio_content_search/README_cn.md
@@ -27,7 +27,7 @@ pip install -r requirements.txt
 
 可以下载此 demo 的示例音频：
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
 ```
 
 ### 3. 启动 server
diff --git a/demos/audio_searching/README.md b/demos/audio_searching/README.md
index 528fce9e8..5b3890382 100644
--- a/demos/audio_searching/README.md
+++ b/demos/audio_searching/README.md
@@ -128,7 +128,7 @@ Then to start the system server, and it provides HTTP backend services.
 
     Output：
     ```bash
-    Downloading https://paddlespeech.bj.bcebos.com/vector/audio/example_audio.tar.gz ...
+    Downloading https://paddlespeech.cdn.bcebos.com/vector/audio/example_audio.tar.gz ...
     ...
     Unpacking ./example_audio.tar.gz ...
     [2022-03-26 22:50:54,987] [    INFO] - checking the aduio file format......
@@ -136,7 +136,7 @@ Then to start the system server, and it provides HTTP backend services.
     [2022-03-26 22:50:54,987] [    INFO] - The audio file format is right
     [2022-03-26 22:50:54,988] [    INFO] - device type: cpu
     [2022-03-26 22:50:54,988] [    INFO] - load the pretrained model: ecapatdnn_voxceleb12-16k
-    [2022-03-26 22:50:54,990] [    INFO] - Downloading sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_0.tar.gz from https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_0.tar.gz
+    [2022-03-26 22:50:54,990] [    INFO] - Downloading sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_0.tar.gz from https://paddlespeech.cdn.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_0.tar.gz
     ...
     [2022-03-26 22:51:17,285] [    INFO] - start to dynamic import the model class
     [2022-03-26 22:51:17,285] [    INFO] - model name ecapatdnn
diff --git a/demos/audio_searching/README_cn.md b/demos/audio_searching/README_cn.md
index 6d38b91f5..30ec2a97e 100644
--- a/demos/audio_searching/README_cn.md
+++ b/demos/audio_searching/README_cn.md
@@ -130,7 +130,7 @@ ffce340b3790  minio/minio:RELEASE.2020-12-03T00-03-10Z  "/usr/bin/docker-ent…"
 
     输出：
     ```bash
-    Downloading https://paddlespeech.bj.bcebos.com/vector/audio/example_audio.tar.gz ...
+    Downloading https://paddlespeech.cdn.bcebos.com/vector/audio/example_audio.tar.gz ...
     ...
     Unpacking ./example_audio.tar.gz ...
     [2022-03-26 22:50:54,987] [    INFO] - checking the aduio file format......
@@ -138,7 +138,7 @@ ffce340b3790  minio/minio:RELEASE.2020-12-03T00-03-10Z  "/usr/bin/docker-ent…"
     [2022-03-26 22:50:54,987] [    INFO] - The audio file format is right
     [2022-03-26 22:50:54,988] [    INFO] - device type: cpu
     [2022-03-26 22:50:54,988] [    INFO] - load the pretrained model: ecapatdnn_voxceleb12-16k
-    [2022-03-26 22:50:54,990] [    INFO] - Downloading sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_0.tar.gz from https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_0.tar.gz
+    [2022-03-26 22:50:54,990] [    INFO] - Downloading sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_0.tar.gz from https://paddlespeech.cdn.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_0.tar.gz
     ...
     [2022-03-26 22:51:17,285] [    INFO] - start to dynamic import the model class
     [2022-03-26 22:51:17,285] [    INFO] - model name ecapatdnn
diff --git a/demos/audio_searching/src/test_audio_search.py b/demos/audio_searching/src/test_audio_search.py
index f9ea2929e..5136c0192 100644
--- a/demos/audio_searching/src/test_audio_search.py
+++ b/demos/audio_searching/src/test_audio_search.py
@@ -24,7 +24,7 @@ def download_audio_data():
     """
     Download audio data
     """
-    url = "https://paddlespeech.bj.bcebos.com/vector/audio/example_audio.tar.gz"
+    url = "https://paddlespeech.cdn.bcebos.com/vector/audio/example_audio.tar.gz"
     md5sum = "52ac69316c1aa1fdef84da7dd2c67b39"
     target_dir = "./"
     filepath = download(url, md5sum, target_dir)
diff --git a/demos/audio_searching/src/test_vpr_search.py b/demos/audio_searching/src/test_vpr_search.py
index cc795564e..67442c9d5 100644
--- a/demos/audio_searching/src/test_vpr_search.py
+++ b/demos/audio_searching/src/test_vpr_search.py
@@ -24,7 +24,7 @@ def download_audio_data():
     """
     Download audio data
     """
-    url = "https://paddlespeech.bj.bcebos.com/vector/audio/example_audio.tar.gz"
+    url = "https://paddlespeech.cdn.bcebos.com/vector/audio/example_audio.tar.gz"
     md5sum = "52ac69316c1aa1fdef84da7dd2c67b39"
     target_dir = "./"
     filepath = download(url, md5sum, target_dir)
diff --git a/demos/audio_tagging/README.md b/demos/audio_tagging/README.md
index b602c6022..89f4a944d 100644
--- a/demos/audio_tagging/README.md
+++ b/demos/audio_tagging/README.md
@@ -18,7 +18,7 @@ The input of this demo should be a WAV file(`.wav`).
 
 Here are sample files for this demo that can be downloaded:
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/cat.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/dog.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/cat.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/dog.wav
 ```
 
 ### 3. Usage
diff --git a/demos/audio_tagging/README_cn.md b/demos/audio_tagging/README_cn.md
index 36b5d8aaf..1a46abd62 100644
--- a/demos/audio_tagging/README_cn.md
+++ b/demos/audio_tagging/README_cn.md
@@ -18,7 +18,7 @@
 
 可以下载此 demo 的示例音频：
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/cat.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/dog.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/cat.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/dog.wav
 ```
 
 ### 3. 使用方法
diff --git a/demos/audio_tagging/run.sh b/demos/audio_tagging/run.sh
index b30eba35f..3841af10c 100755
--- a/demos/audio_tagging/run.sh
+++ b/demos/audio_tagging/run.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/cat.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/dog.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/cat.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/dog.wav
 paddlespeech cls --input ./cat.wav --topk 10
diff --git a/demos/automatic_video_subtitiles/README.md b/demos/automatic_video_subtitiles/README.md
index 89d8c73c9..0649f77f9 100644
--- a/demos/automatic_video_subtitiles/README.md
+++ b/demos/automatic_video_subtitiles/README.md
@@ -15,7 +15,7 @@ You can choose one way from easy, medium and hard to install paddlespeech.
 ### 2. Prepare Input
 Get a video file with the speech of the specific language:
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/demos/asr_demos/subtitle_demo1.mp4
+wget -c https://paddlespeech.cdn.bcebos.com/demos/asr_demos/subtitle_demo1.mp4
 ```
 
 Extract `.wav` with one channel and 16000 sample rate from the video:
diff --git a/demos/automatic_video_subtitiles/README_cn.md b/demos/automatic_video_subtitiles/README_cn.md
index 990ff6dbd..7a44ff107 100644
--- a/demos/automatic_video_subtitiles/README_cn.md
+++ b/demos/automatic_video_subtitiles/README_cn.md
@@ -13,7 +13,7 @@
 ### 2. 准备输入
 获取包含特定语言语音的视频文件：
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/demos/asr_demos/subtitle_demo1.mp4
+wget -c https://paddlespeech.cdn.bcebos.com/demos/asr_demos/subtitle_demo1.mp4
 ```
 从视频文件中提取单通道的 16kHz 采样率的 `.wav` 文件：
 ```bash
diff --git a/demos/automatic_video_subtitiles/run.sh b/demos/automatic_video_subtitiles/run.sh
index 9b9fd2ccc..943109099 100755
--- a/demos/automatic_video_subtitiles/run.sh
+++ b/demos/automatic_video_subtitiles/run.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-video_url=https://paddlespeech.bj.bcebos.com/demos/asr_demos/subtitle_demo1.mp4
+video_url=https://paddlespeech.cdn.bcebos.com/demos/asr_demos/subtitle_demo1.mp4
 video_file=$(basename ${video_url})
 audio_file=$(echo ${video_file} | awk -F'.' '{print $1}').wav
 num_channels=1
diff --git a/demos/custom_streaming_asr/websocket_server.sh b/demos/custom_streaming_asr/websocket_server.sh
index 041c345be..a7ee39636 100755
--- a/demos/custom_streaming_asr/websocket_server.sh
+++ b/demos/custom_streaming_asr/websocket_server.sh
@@ -14,7 +14,7 @@ cmvn=./data/cmvn.ark
 
 #paddle_asr_online/resource.tar.gz
 if [ ! -f $cmvn ]; then
-    wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/resource.tar.gz
+    wget -c https://paddlespeech.cdn.bcebos.com/s2t/paddle_asr_online/resource.tar.gz
     tar xzfv resource.tar.gz
     ln -s ./resource/data .
 fi
diff --git a/demos/keyword_spotting/README.md b/demos/keyword_spotting/README.md
index b55c71124..02291ff11 100644
--- a/demos/keyword_spotting/README.md
+++ b/demos/keyword_spotting/README.md
@@ -17,7 +17,7 @@ The input of this demo should be a WAV file(`.wav`), and the sample rate must be
 
 Here are sample files for this demo that can be downloaded:
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/kws/hey_snips.wav https://paddlespeech.bj.bcebos.com/kws/non-keyword.wav
+wget -c https://paddlespeech.cdn.bcebos.com/kws/hey_snips.wav https://paddlespeech.cdn.bcebos.com/kws/non-keyword.wav
 ```
 
 ### 3. Usage
diff --git a/demos/keyword_spotting/README_cn.md b/demos/keyword_spotting/README_cn.md
index 0d8f44a53..b35d22f25 100644
--- a/demos/keyword_spotting/README_cn.md
+++ b/demos/keyword_spotting/README_cn.md
@@ -16,7 +16,7 @@
 
 可以下载此 demo 的示例音频：
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/kws/hey_snips.wav https://paddlespeech.bj.bcebos.com/kws/non-keyword.wav
+wget -c https://paddlespeech.cdn.bcebos.com/kws/hey_snips.wav https://paddlespeech.cdn.bcebos.com/kws/non-keyword.wav
 ```
 ### 3. 使用方法
 - 命令行 (推荐使用)
diff --git a/demos/keyword_spotting/run.sh b/demos/keyword_spotting/run.sh
index 7f9e0ebba..dec3cb9e5 100755
--- a/demos/keyword_spotting/run.sh
+++ b/demos/keyword_spotting/run.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-wget -c https://paddlespeech.bj.bcebos.com/kws/hey_snips.wav https://paddlespeech.bj.bcebos.com/kws/non-keyword.wav
+wget -c https://paddlespeech.cdn.bcebos.com/kws/hey_snips.wav https://paddlespeech.cdn.bcebos.com/kws/non-keyword.wav
 
 # kws
 paddlespeech kws --input ./hey_snips.wav
diff --git a/demos/metaverse/run.sh b/demos/metaverse/run.sh
index 551f0b4e5..02cca15f8 100755
--- a/demos/metaverse/run.sh
+++ b/demos/metaverse/run.sh
@@ -25,12 +25,12 @@ fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     # download pretrained tts models and unzip
-    wget -P download https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip
+    wget -P download https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip
     unzip -d download download/pwg_baker_ckpt_0.4.zip
-    wget -P download https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip
+    wget -P download https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip
     unzip -d download download/fastspeech2_nosil_baker_ckpt_0.4.zip
     # donload sources
-    wget -P download https://paddlespeech.bj.bcebos.com/demos/metaverse/Lamarr.png
+    wget -P download https://paddlespeech.cdn.bcebos.com/demos/metaverse/Lamarr.png
 
 fi
 
diff --git a/demos/speaker_verification/README.md b/demos/speaker_verification/README.md
index 37c6bf3b9..c3055f3d4 100644
--- a/demos/speaker_verification/README.md
+++ b/demos/speaker_verification/README.md
@@ -18,8 +18,8 @@ The input of this cli demo should be a WAV file(`.wav`), and the sample rate mus
 
 Here are sample files for this demo that can be downloaded:
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
-wget -c https://paddlespeech.bj.bcebos.com/vector/audio/123456789.wav
+wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/85236145389.wav
+wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/123456789.wav
 ```
 
 ### 3. Usage
diff --git a/demos/speaker_verification/README_cn.md b/demos/speaker_verification/README_cn.md
index 85224699c..71cb54c02 100644
--- a/demos/speaker_verification/README_cn.md
+++ b/demos/speaker_verification/README_cn.md
@@ -18,8 +18,8 @@
 可以下载此 demo 的示例音频：
 ```bash
 # 该音频的内容是数字串 85236145389
-wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
-wget -c https://paddlespeech.bj.bcebos.com/vector/audio/123456789.wav
+wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/85236145389.wav
+wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/123456789.wav
 ```
 ### 3. 使用方法
 - 命令行 (推荐使用)
diff --git a/demos/speaker_verification/run.sh b/demos/speaker_verification/run.sh
index 6140f7f38..c7c589e82 100755
--- a/demos/speaker_verification/run.sh
+++ b/demos/speaker_verification/run.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
-wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
-wget -c https://paddlespeech.bj.bcebos.com/vector/audio/123456789.wav
+wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/85236145389.wav
+wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/123456789.wav
 
 # vector
 paddlespeech vector --task spk --input ./85236145389.wav
diff --git a/demos/speech_recognition/README.md b/demos/speech_recognition/README.md
index e406590d2..cd8d69c7e 100644
--- a/demos/speech_recognition/README.md
+++ b/demos/speech_recognition/README.md
@@ -17,7 +17,7 @@ The input of this demo should be a WAV file(`.wav`), and the sample rate must be
 
 Here are sample files for this demo that can be downloaded:
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/ch_zh_mix.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/ch_zh_mix.wav
 ```
 
 ### 3. Usage
diff --git a/demos/speech_recognition/README_cn.md b/demos/speech_recognition/README_cn.md
index 62dce3bc9..496f91ca2 100644
--- a/demos/speech_recognition/README_cn.md
+++ b/demos/speech_recognition/README_cn.md
@@ -17,7 +17,7 @@
 
 可以下载此 demo 的示例音频：
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/ch_zh_mix.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/ch_zh_mix.wav
 ```
 ### 3. 使用方法
 - 命令行 (推荐使用)
diff --git a/demos/speech_recognition/run.sh b/demos/speech_recognition/run.sh
index 8ba6e4c3e..20fdb7aaf 100755
--- a/demos/speech_recognition/run.sh
+++ b/demos/speech_recognition/run.sh
@@ -1,8 +1,8 @@
 #!/bin/bash
 
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/ch_zh_mix.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/ch_zh_mix.wav
 
 # asr
 paddlespeech asr --input ./zh.wav
diff --git a/demos/speech_server/README.md b/demos/speech_server/README.md
index 08788a89e..178374428 100644
--- a/demos/speech_server/README.md
+++ b/demos/speech_server/README.md
@@ -85,9 +85,9 @@ The input of  ASR client demo should be a WAV file(`.wav`), and the sample rate
 
 Here are sample files for this ASR client demo that can be downloaded:
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/ch_zh_mix.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/ch_zh_mix.wav
 ```
 
 **Note:** The response time will be slightly longer when using the client for the first time
@@ -204,7 +204,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/ch_zh_mix.wav
 
 Here are sample files for this CLS Client demo that can be downloaded:
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav 
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav 
 ```
 
 **Note:** The response time will be slightly longer when using the client for the first time
@@ -257,8 +257,8 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
 
 Here are sample files for this Speaker Verification Client demo that can be downloaded:
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
-wget -c https://paddlespeech.bj.bcebos.com/vector/audio/123456789.wav
+wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/85236145389.wav
+wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/123456789.wav
 ```
 
 #### 7.1 Extract speaker embedding
diff --git a/demos/speech_server/README_cn.md b/demos/speech_server/README_cn.md
index f2cb349e3..f724e9770 100644
--- a/demos/speech_server/README_cn.md
+++ b/demos/speech_server/README_cn.md
@@ -89,9 +89,9 @@ ASR 客户端的输入是一个 WAV 文件（`.wav`），并且采样率必须
 
 可以下载 ASR 客户端的示例音频：
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/ch_zh_mix.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/ch_zh_mix.wav
 ```
 
 **注意：** 初次使用客户端时响应时间会略长
@@ -211,7 +211,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/ch_zh_mix.wav
 
 可以下载 CLS 客户端的示例音频：
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
 ```
 
 **注意：** 初次使用客户端时响应时间会略长
@@ -264,8 +264,8 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
 
 可以下载声纹客户端的示例音频：
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
-wget -c https://paddlespeech.bj.bcebos.com/vector/audio/123456789.wav
+wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/85236145389.wav
+wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/123456789.wav
 ```
 
 #### 7.1 提取声纹特征
diff --git a/demos/speech_server/asr_client.sh b/demos/speech_server/asr_client.sh
index 37a7ab0b0..47ae1baba 100755
--- a/demos/speech_server/asr_client.sh
+++ b/demos/speech_server/asr_client.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
 
 # If `127.0.0.1` is not accessible, you need to use the actual service IP address.
 paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./zh.wav
diff --git a/demos/speech_server/cls_client.sh b/demos/speech_server/cls_client.sh
index 67012648c..6a9e414f5 100755
--- a/demos/speech_server/cls_client.sh
+++ b/demos/speech_server/cls_client.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
 
 # If `127.0.0.1` is not accessible, you need to use the actual service IP address.
 paddlespeech_client cls --server_ip 127.0.0.1 --port 8090 --input ./zh.wav --topk 1
diff --git a/demos/speech_server/sid_client.sh b/demos/speech_server/sid_client.sh
index 99bab21ae..c9b75f4e0 100755
--- a/demos/speech_server/sid_client.sh
+++ b/demos/speech_server/sid_client.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
-wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
-wget -c https://paddlespeech.bj.bcebos.com/vector/audio/123456789.wav
+wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/85236145389.wav
+wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/123456789.wav
 
 # sid extract
 paddlespeech_client vector --server_ip 127.0.0.1 --port 8090 --task spk --input ./85236145389.wav
diff --git a/demos/speech_ssl/README.md b/demos/speech_ssl/README.md
index 8677ebc57..42449147f 100644
--- a/demos/speech_ssl/README.md
+++ b/demos/speech_ssl/README.md
@@ -17,7 +17,7 @@ The input of this demo should be a WAV file(`.wav`), and the sample rate must be
 
 Here are sample files for this demo that can be downloaded:
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
 ```
 
 ### 3. Usage
diff --git a/demos/speech_ssl/README_cn.md b/demos/speech_ssl/README_cn.md
index 5b209419a..42ffd634d 100644
--- a/demos/speech_ssl/README_cn.md
+++ b/demos/speech_ssl/README_cn.md
@@ -17,7 +17,7 @@
 
 可以下载此 demo 的示例音频：
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
 ```
 ### 3. 使用方法
 - 命令行 (推荐使用)
diff --git a/demos/speech_ssl/run.sh b/demos/speech_ssl/run.sh
index ca94bc5cc..9940207f8 100644
--- a/demos/speech_ssl/run.sh
+++ b/demos/speech_ssl/run.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 # audio download
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
 
 # to recognize text 
 paddlespeech ssl --task asr --lang en --input ./en.wav
diff --git a/demos/speech_translation/README.md b/demos/speech_translation/README.md
index 4866336c0..df75cd353 100644
--- a/demos/speech_translation/README.md
+++ b/demos/speech_translation/README.md
@@ -17,7 +17,7 @@ The input of this demo should be a WAV file(`.wav`).
 
 Here are sample files for this demo that can be downloaded:
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
 ```
 
 ### 3. Usage (not support for Windows now)
diff --git a/demos/speech_translation/README_cn.md b/demos/speech_translation/README_cn.md
index 5119bf9f4..617340f5e 100644
--- a/demos/speech_translation/README_cn.md
+++ b/demos/speech_translation/README_cn.md
@@ -17,7 +17,7 @@
 
 这里给出一些样例文件供 Demo 使用：
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
 ```
 
 ### 3. 使用方法 (暂不支持Windows)
diff --git a/demos/speech_translation/run.sh b/demos/speech_translation/run.sh
index 6619bd91f..6f316b355 100755
--- a/demos/speech_translation/run.sh
+++ b/demos/speech_translation/run.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
 paddlespeech st --input ./en.wav
diff --git a/demos/speech_web/README.md b/demos/speech_web/README.md
index fc1fe7105..507d82186 100644
--- a/demos/speech_web/README.md
+++ b/demos/speech_web/README.md
@@ -100,43 +100,43 @@ cd speech_server
 mkdir -p source/model
 cd source
 # 下载 & 解压 wav （包含VC测试音频）
-wget https://paddlespeech.bj.bcebos.com/demos/speech_web/wav_vc.zip
+wget https://paddlespeech.cdn.bcebos.com/demos/speech_web/wav_vc.zip
 unzip wav_vc.zip
 
 cd model
 # 下载 GE2E 相关模型
 wget https://bj.bcebos.com/paddlespeech/Parakeet/released_models/ge2e/ge2e_ckpt_0.3.zip
 unzip ge2e_ckpt_0.3.zip
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip
 unzip pwg_aishell3_ckpt_0.5.zip
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip
 unzip fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip
 
 # 下载 ECAPA-TDNN 相关模型
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_vc2_1.2.0.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_vc2_1.2.0.zip
 unzip fastspeech2_aishell3_ckpt_vc2_1.2.0.zip
 
 # 下载 ERNIE-SAT 相关模型
 # aishell3 ERNIE-SAT
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/erniesat_aishell3_ckpt_1.2.0.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/erniesat_aishell3_ckpt_1.2.0.zip
 unzip erniesat_aishell3_ckpt_1.2.0.zip
 
 # vctk ERNIE-SAT
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/erniesat_vctk_ckpt_1.2.0.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/erniesat_vctk_ckpt_1.2.0.zip
 unzip erniesat_vctk_ckpt_1.2.0.zip
 
 # aishell3_vctk ERNIE-SAT
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/erniesat_aishell3_vctk_ckpt_1.2.0.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/erniesat_aishell3_vctk_ckpt_1.2.0.zip
 unzip erniesat_aishell3_vctk_ckpt_1.2.0.zip
 
 # 下载 finetune 相关模型
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_1.1.0.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_1.1.0.zip
 unzip fastspeech2_aishell3_ckpt_1.1.0.zip
 
 # 下载声码器
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip
 unzip hifigan_aishell3_ckpt_0.2.0.zip
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip
 unzip hifigan_vctk_ckpt_0.2.0.zip
 
 cd ../../../
diff --git a/demos/story_talker/run.sh b/demos/story_talker/run.sh
index 50335e73b..dadfacba3 100755
--- a/demos/story_talker/run.sh
+++ b/demos/story_talker/run.sh
@@ -19,13 +19,13 @@ fi
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # download pretrained tts models and unzip
-    wget -P download https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip
+    wget -P download https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip
     unzip -d download download/pwg_baker_ckpt_0.4.zip
-    wget -P download https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip
+    wget -P download https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip
     unzip -d download download/fastspeech2_nosil_baker_ckpt_0.4.zip
     # download sources
-    wget -P download https://paddlespeech.bj.bcebos.com/demos/story_talker/simfang.ttf
-    wget -P download/imgs https://paddlespeech.bj.bcebos.com/demos/story_talker/000.jpg
+    wget -P download https://paddlespeech.cdn.bcebos.com/demos/story_talker/simfang.ttf
+    wget -P download/imgs https://paddlespeech.cdn.bcebos.com/demos/story_talker/000.jpg
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
diff --git a/demos/streaming_asr_server/README.md b/demos/streaming_asr_server/README.md
index 423485466..670dce193 100644
--- a/demos/streaming_asr_server/README.md
+++ b/demos/streaming_asr_server/README.md
@@ -32,7 +32,7 @@ The input of  ASR client demo should be a WAV file(`.wav`), and the sample rate
 
 Here are sample files for thisASR client demo that can be downloaded:
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
 ```
 
 ### 3. Server Usage
diff --git a/demos/streaming_asr_server/README_cn.md b/demos/streaming_asr_server/README_cn.md
index f5f477ea1..2bb3f83fa 100644
--- a/demos/streaming_asr_server/README_cn.md
+++ b/demos/streaming_asr_server/README_cn.md
@@ -35,7 +35,7 @@
 
 可以下载此 ASR client的示例音频：
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
 ```
 
 ### 3. 服务端使用方法
diff --git a/demos/streaming_asr_server/test.sh b/demos/streaming_asr_server/test.sh
index 386c7f894..68eb7567f 100755
--- a/demos/streaming_asr_server/test.sh
+++ b/demos/streaming_asr_server/test.sh
@@ -1,5 +1,5 @@
 # download the test wav
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav 
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav 
 
 # read the wav and pass it to only streaming asr service
 # If `127.0.0.1` is not accessible, you need to use the actual service IP address.
diff --git a/demos/streaming_tts_serving_fastdeploy/README.md b/demos/streaming_tts_serving_fastdeploy/README.md
index 3e983a06d..460e257b4 100644
--- a/demos/streaming_tts_serving_fastdeploy/README.md
+++ b/demos/streaming_tts_serving_fastdeploy/README.md
@@ -31,8 +31,8 @@ export LANGUAGE="zh_CN:zh:en_US:en"
 #### 1.3 Download models(inside the docker)
 ```bash
 cd /models/streaming_tts_serving/1
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_onnx_0.2.0.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_onnx_0.2.0.zip
 unzip fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip
 unzip mb_melgan_csmsc_onnx_0.2.0.zip
 ```
diff --git a/demos/streaming_tts_serving_fastdeploy/README_cn.md b/demos/streaming_tts_serving_fastdeploy/README_cn.md
index 7edd32830..5675867b0 100644
--- a/demos/streaming_tts_serving_fastdeploy/README_cn.md
+++ b/demos/streaming_tts_serving_fastdeploy/README_cn.md
@@ -31,8 +31,8 @@ export LANGUAGE="zh_CN:zh:en_US:en"
 #### 1.3 下载模型(在docker内)
 ```bash
 cd /models/streaming_tts_serving/1
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_onnx_0.2.0.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_onnx_0.2.0.zip
 unzip fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip
 unzip mb_melgan_csmsc_onnx_0.2.0.zip
 ```
diff --git a/demos/style_fs2/run.sh b/demos/style_fs2/run.sh
index 45fc0c104..fe86822d0 100755
--- a/demos/style_fs2/run.sh
+++ b/demos/style_fs2/run.sh
@@ -14,9 +14,9 @@ mkdir -p download
 
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     # download pretrained tts models and unzip
-    wget -P download https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip
+    wget -P download https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip
     unzip -d download download/pwg_baker_ckpt_0.4.zip
-    wget -P download https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip
+    wget -P download https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip
     unzip -d download download/fastspeech2_nosil_baker_ckpt_0.4.zip
 fi
 
diff --git a/demos/whisper/README.md b/demos/whisper/README.md
index 6e1b8011f..ccd695d5e 100644
--- a/demos/whisper/README.md
+++ b/demos/whisper/README.md
@@ -16,7 +16,7 @@ Whisper model trained by OpenAI whisper https://github.com/openai/whisper
 
  Here are sample files for this demo that can be downloaded:
  ```bash
- wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+ wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
  ```
 
  ### 3. Usage
diff --git a/demos/whisper/README_cn.md b/demos/whisper/README_cn.md
index 6f7c35f04..868a5579c 100644
--- a/demos/whisper/README_cn.md
+++ b/demos/whisper/README_cn.md
@@ -17,7 +17,7 @@ Whisper模型由OpenAI Whisper训练 https://github.com/openai/whisper
 
  可以下载此 demo 的示例音频：
  ```bash
- wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+ wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
  ```
 
 ### 3. 使用方法
diff --git a/demos/whisper/run.sh b/demos/whisper/run.sh
index b9595735f..7049192cf 100644
--- a/demos/whisper/run.sh
+++ b/demos/whisper/run.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 # audio download
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
 
 # to recognize text 
 paddlespeech whisper --task transcribe --input ./zh.wav
diff --git a/docs/source/demo_video.rst b/docs/source/demo_video.rst
index dc7e718a6..cd56d4bfc 100644
--- a/docs/source/demo_video.rst
+++ b/docs/source/demo_video.rst
@@ -5,7 +5,7 @@ Demo Video
     
     <video controls width="1024">
 
-    <source src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/PaddleSpeech_Demo.mp4"
+    <source src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/PaddleSpeech_Demo.mp4"
             type="video/mp4">
 
     Sorry, your browser doesn't support embedded videos.
diff --git a/docs/source/install.md b/docs/source/install.md
index 205d3e600..b78fdd864 100644
--- a/docs/source/install.md
+++ b/docs/source/install.md
@@ -68,7 +68,7 @@ pip install paddlepaddle==2.4.1 -i https://mirror.baidu.com/pypi/simple
 # install develop version
 pip install paddlepaddle==0.0.0 -f https://www.paddlepaddle.org.cn/whl/linux/cpu-mkl/develop.html
 ```
-> If you encounter problem with downloading **nltk_data** while using paddlespeech, it maybe due to your poor network, we suggest you download the [nltk_data](https://paddlespeech.bj.bcebos.com/Parakeet/tools/nltk_data.tar.gz) provided by us, and extract it to your `${HOME}`.
+> If you encounter problem with downloading **nltk_data** while using paddlespeech, it maybe due to your poor network, we suggest you download the [nltk_data](https://paddlespeech.cdn.bcebos.com/Parakeet/tools/nltk_data.tar.gz) provided by us, and extract it to your `${HOME}`.
 
 > If you fail to install paddlespeech-ctcdecoders, you only can not use deepspeech2 model inference. For other models, it doesn't matter.
  
diff --git a/docs/source/install_cn.md b/docs/source/install_cn.md
index ecfb22f59..c47be0500 100644
--- a/docs/source/install_cn.md
+++ b/docs/source/install_cn.md
@@ -65,7 +65,7 @@ pip install paddlepaddle==2.3.1 -i https://mirror.baidu.com/pypi/simple
 # 安装 develop 版本
 pip install paddlepaddle==0.0.0 -f https://www.paddlepaddle.org.cn/whl/linux/cpu-mkl/develop.html
 ```
-> 如果您在使用 paddlespeech 的过程中遇到关于下载 **nltk_data** 的问题，可能是您的网络不佳，我们建议您下载我们提供的 [nltk_data](https://paddlespeech.bj.bcebos.com/Parakeet/tools/nltk_data.tar.gz) 并解压缩到您的 `${HOME}` 目录下。
+> 如果您在使用 paddlespeech 的过程中遇到关于下载 **nltk_data** 的问题，可能是您的网络不佳，我们建议您下载我们提供的 [nltk_data](https://paddlespeech.cdn.bcebos.com/Parakeet/tools/nltk_data.tar.gz) 并解压缩到您的 `${HOME}` 目录下。
 
 > 如果出现 paddlespeech-ctcdecoders 无法安装的问题，无须担心，这个只影响 deepspeech2 模型的推理，不影响其他模型的使用。
 
diff --git a/docs/source/released_model.md b/docs/source/released_model.md
index 87619a558..7c67685d3 100644
--- a/docs/source/released_model.md
+++ b/docs/source/released_model.md
@@ -7,34 +7,34 @@
 ### Speech Recognition Model
 Acoustic Model | Training Data | Token-based | Size | Descriptions | CER | WER | Hours of speech | Example Link | Inference Type | static_model | 
 :-------------:| :------------:| :-----: | -----: | :-----: |:-----:| :-----:  | :-----:  | :-----: | :-----: | :-----: |
-[Ds2 Online Wenetspeech ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.4.model.tar.gz) | Wenetspeech Dataset | Char-based | 1.2 GB  | 2 Conv + 5 LSTM layers | 0.152 (test\_net, w/o LM) <br> 0.2417 (test\_meeting, w/o LM) <br> 0.053 (aishell, w/ LM) |-| 10000 h | - | onnx/inference/python |-|
-[Ds2 Online Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_0.2.1.model.tar.gz) | Aishell Dataset | Char-based | 491 MB  | 2 Conv + 5 LSTM layers | 0.0666 |-| 151 h | [D2 Online Aishell ASR0](../../examples/aishell/asr0) | onnx/inference/python |-|
-[Ds2 Offline Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz)| Aishell Dataset | Char-based | 1.4 GB | 2 Conv + 5 bidirectional LSTM layers| 0.0554 |-| 151 h | [Ds2 Offline Aishell ASR0](../../examples/aishell/asr0) | inference/python |-|
-[Conformer Online Wenetspeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz) | WenetSpeech Dataset | Char-based | 457 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring| 0.11 (test\_net) 0.1879 (test\_meeting) |-| 10000 h |- | python |-|
-[Conformer U2PP Online Wenetspeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.3.0.model.tar.gz) | WenetSpeech Dataset | Char-based | 540 MB  | Encoder:Conformer, Decoder:BiTransformer, Decoding method: Attention rescoring| 0.047198 (aishell test\_-1) 0.059212 (aishell test\_16) |-| 10000 h |- | python |[FP32](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.3.0.model.tar.gz) </br>[INT8](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/static/asr1_chunk_conformer_u2pp_wenetspeech_static_quant_1.3.0.model.tar.gz) |
-[Conformer Online Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_conformer_aishell_ckpt_1.5.0.model.tar.gz) | Aishell Dataset | Char-based | 189 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring| 0.051968 |-| 151 h | [Conformer Online Aishell ASR1](../../examples/aishell/asr1) | python |-|
-[Conformer Offline Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_conformer_aishell_ckpt_1.0.1.model.tar.gz) | Aishell Dataset | Char-based | 189 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0460 |-| 151 h | [Conformer Offline Aishell ASR1](../../examples/aishell/asr1) | python |-|
-[Transformer Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_transformer_aishell_ckpt_0.1.1.model.tar.gz) | Aishell Dataset | Char-based | 128 MB | Encoder:Transformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0523 || 151 h | [Transformer  Aishell ASR1](../../examples/aishell/asr1) | python |-|
-[Ds2 Offline Librispeech ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr0/asr0_deepspeech2_offline_librispeech_ckpt_1.0.1.model.tar.gz)| Librispeech Dataset | Char-based | 1.3 GB | 2 Conv + 5 bidirectional LSTM layers| - |0.0467| 960 h | [Ds2 Offline Librispeech ASR0](../../examples/librispeech/asr0) | inference/python |-|
-[Conformer Librispeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr1/asr1_conformer_librispeech_ckpt_0.1.1.model.tar.gz) | Librispeech Dataset | subword-based | 191 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring |-| 0.0338 | 960 h | [Conformer Librispeech ASR1](../../examples/librispeech/asr1) | python |-|
-[Transformer Librispeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr1/asr1_transformer_librispeech_ckpt_0.1.1.model.tar.gz) | Librispeech Dataset | subword-based | 131 MB  | Encoder:Transformer, Decoder:Transformer, Decoding method: Attention rescoring |-| 0.0381 | 960 h | [Transformer Librispeech ASR1](../../examples/librispeech/asr1) | python |-|
-[Transformer Librispeech ASR2 Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr2/asr2_transformer_librispeech_ckpt_0.1.1.model.tar.gz) | Librispeech Dataset | subword-based | 131 MB  | Encoder:Transformer, Decoder:Transformer, Decoding method: JoinCTC w/ LM |-| 0.0240 | 960 h | [Transformer Librispeech ASR2](../../examples/librispeech/asr2) | python |-|
-[Conformer TALCS ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/tal_cs/asr1/asr1_conformer_talcs_ckpt_1.4.0.model.tar.gz) | TALCS Dataset | subword-based | 470 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring |-| 0.0844 | 587 h | [Conformer TALCS ASR1](../../examples/tal_cs/asr1) | python |-|
+[Ds2 Online Wenetspeech ASR0 Model](https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.4.model.tar.gz) | Wenetspeech Dataset | Char-based | 1.2 GB  | 2 Conv + 5 LSTM layers | 0.152 (test\_net, w/o LM) <br> 0.2417 (test\_meeting, w/o LM) <br> 0.053 (aishell, w/ LM) |-| 10000 h | - | onnx/inference/python |-|
+[Ds2 Online Aishell ASR0 Model](https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_0.2.1.model.tar.gz) | Aishell Dataset | Char-based | 491 MB  | 2 Conv + 5 LSTM layers | 0.0666 |-| 151 h | [D2 Online Aishell ASR0](../../examples/aishell/asr0) | onnx/inference/python |-|
+[Ds2 Offline Aishell ASR0 Model](https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz)| Aishell Dataset | Char-based | 1.4 GB | 2 Conv + 5 bidirectional LSTM layers| 0.0554 |-| 151 h | [Ds2 Offline Aishell ASR0](../../examples/aishell/asr0) | inference/python |-|
+[Conformer Online Wenetspeech ASR1 Model](https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz) | WenetSpeech Dataset | Char-based | 457 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring| 0.11 (test\_net) 0.1879 (test\_meeting) |-| 10000 h |- | python |-|
+[Conformer U2PP Online Wenetspeech ASR1 Model](https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.3.0.model.tar.gz) | WenetSpeech Dataset | Char-based | 540 MB  | Encoder:Conformer, Decoder:BiTransformer, Decoding method: Attention rescoring| 0.047198 (aishell test\_-1) 0.059212 (aishell test\_16) |-| 10000 h |- | python |[FP32](https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.3.0.model.tar.gz) </br>[INT8](https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/static/asr1_chunk_conformer_u2pp_wenetspeech_static_quant_1.3.0.model.tar.gz) |
+[Conformer Online Aishell ASR1 Model](https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr1/asr1_conformer_aishell_ckpt_1.5.0.model.tar.gz) | Aishell Dataset | Char-based | 189 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring| 0.051968 |-| 151 h | [Conformer Online Aishell ASR1](../../examples/aishell/asr1) | python |-|
+[Conformer Offline Aishell ASR1 Model](https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr1/asr1_conformer_aishell_ckpt_1.0.1.model.tar.gz) | Aishell Dataset | Char-based | 189 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0460 |-| 151 h | [Conformer Offline Aishell ASR1](../../examples/aishell/asr1) | python |-|
+[Transformer Aishell ASR1 Model](https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr1/asr1_transformer_aishell_ckpt_0.1.1.model.tar.gz) | Aishell Dataset | Char-based | 128 MB | Encoder:Transformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0523 || 151 h | [Transformer  Aishell ASR1](../../examples/aishell/asr1) | python |-|
+[Ds2 Offline Librispeech ASR0 Model](https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr0/asr0_deepspeech2_offline_librispeech_ckpt_1.0.1.model.tar.gz)| Librispeech Dataset | Char-based | 1.3 GB | 2 Conv + 5 bidirectional LSTM layers| - |0.0467| 960 h | [Ds2 Offline Librispeech ASR0](../../examples/librispeech/asr0) | inference/python |-|
+[Conformer Librispeech ASR1 Model](https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr1/asr1_conformer_librispeech_ckpt_0.1.1.model.tar.gz) | Librispeech Dataset | subword-based | 191 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring |-| 0.0338 | 960 h | [Conformer Librispeech ASR1](../../examples/librispeech/asr1) | python |-|
+[Transformer Librispeech ASR1 Model](https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr1/asr1_transformer_librispeech_ckpt_0.1.1.model.tar.gz) | Librispeech Dataset | subword-based | 131 MB  | Encoder:Transformer, Decoder:Transformer, Decoding method: Attention rescoring |-| 0.0381 | 960 h | [Transformer Librispeech ASR1](../../examples/librispeech/asr1) | python |-|
+[Transformer Librispeech ASR2 Model](https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr2/asr2_transformer_librispeech_ckpt_0.1.1.model.tar.gz) | Librispeech Dataset | subword-based | 131 MB  | Encoder:Transformer, Decoder:Transformer, Decoding method: JoinCTC w/ LM |-| 0.0240 | 960 h | [Transformer Librispeech ASR2](../../examples/librispeech/asr2) | python |-|
+[Conformer TALCS ASR1 Model](https://paddlespeech.cdn.bcebos.com/s2t/tal_cs/asr1/asr1_conformer_talcs_ckpt_1.4.0.model.tar.gz) | TALCS Dataset | subword-based | 470 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring |-| 0.0844 | 587 h | [Conformer TALCS ASR1](../../examples/tal_cs/asr1) | python |-|
 
 ### Self-Supervised Pre-trained Model
 Model | Pre-Train Method | Pre-Train Data | Finetune Data | Size | Descriptions | CER | WER |  Example Link |
 :-------------:| :------------:| :-----: | -----: | :-----: |:-----:| :-----:  | :-----:  | :-----: | 
-[Wav2vec2-large-960h-lv60-self Model](https://paddlespeech.bj.bcebos.com/wav2vec/wav2vec2-large-960h-lv60-self.pdparams) | wav2vec2 | Librispeech and LV-60k Dataset (5.3w h) | - | 1.18 GB |Pre-trained Wav2vec2.0 Model | - | - | - | 
-[Wav2vec2ASR-large-960h-librispeech Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr3/wav2vec2ASR-large-960h-librispeech_ckpt_1.3.1.model.tar.gz) | wav2vec2 | Librispeech and LV-60k Dataset (5.3w h) | Librispeech (960 h) | 718 MB |Encoder: Wav2vec2.0, Decoder: CTC, Decoding method: Greedy search | - | 0.0189 | [Wav2vecASR Librispeech ASR3](../../examples/librispeech/asr3) |
-[Wav2vec2-large-wenetspeech-self Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr3/wav2vec2-large-wenetspeech-self_ckpt_1.3.0.model.tar.gz) | wav2vec2 | Wenetspeech Dataset (1w h) | - | 714 MB |Pre-trained Wav2vec2.0 Model | - | - | - | 
-[Wav2vec2ASR-large-aishell1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz) | wav2vec2 | Wenetspeech Dataset (1w h) | aishell1 (train set) | 1.18 GB |Encoder: Wav2vec2.0, Decoder: CTC, Decoding method: Greedy search | 0.0510 | - | - |
-[Hubert-large-lv60 Model](https://paddlespeech.bj.bcebos.com/hubert/hubert-large-lv60.pdparams) | hubert | LV-60k Dataset | - | 1.18 GB |Pre-trained hubert Model | - | - | - | 
-[Hubert-large-100h-librispeech Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr4/hubertASR-large-100h-librispeech_ckpt_1.4.0.model.tar.gz) | hubert | LV-60k Dataset | librispeech train-clean-100 | 1.27 GB |Encoder: Hubert, Decoder: Linear + CTC, Decoding method: Greedy search | - | 0.0587 | [HubertASR Librispeech ASR4](../../examples/librispeech/asr4) |
+[Wav2vec2-large-960h-lv60-self Model](https://paddlespeech.cdn.bcebos.com/wav2vec/wav2vec2-large-960h-lv60-self.pdparams) | wav2vec2 | Librispeech and LV-60k Dataset (5.3w h) | - | 1.18 GB |Pre-trained Wav2vec2.0 Model | - | - | - | 
+[Wav2vec2ASR-large-960h-librispeech Model](https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr3/wav2vec2ASR-large-960h-librispeech_ckpt_1.3.1.model.tar.gz) | wav2vec2 | Librispeech and LV-60k Dataset (5.3w h) | Librispeech (960 h) | 718 MB |Encoder: Wav2vec2.0, Decoder: CTC, Decoding method: Greedy search | - | 0.0189 | [Wav2vecASR Librispeech ASR3](../../examples/librispeech/asr3) |
+[Wav2vec2-large-wenetspeech-self Model](https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr3/wav2vec2-large-wenetspeech-self_ckpt_1.3.0.model.tar.gz) | wav2vec2 | Wenetspeech Dataset (1w h) | - | 714 MB |Pre-trained Wav2vec2.0 Model | - | - | - | 
+[Wav2vec2ASR-large-aishell1 Model](https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz) | wav2vec2 | Wenetspeech Dataset (1w h) | aishell1 (train set) | 1.18 GB |Encoder: Wav2vec2.0, Decoder: CTC, Decoding method: Greedy search | 0.0510 | - | - |
+[Hubert-large-lv60 Model](https://paddlespeech.cdn.bcebos.com/hubert/hubert-large-lv60.pdparams) | hubert | LV-60k Dataset | - | 1.18 GB |Pre-trained hubert Model | - | - | - | 
+[Hubert-large-100h-librispeech Model](https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr4/hubertASR-large-100h-librispeech_ckpt_1.4.0.model.tar.gz) | hubert | LV-60k Dataset | librispeech train-clean-100 | 1.27 GB |Encoder: Hubert, Decoder: Linear + CTC, Decoding method: Greedy search | - | 0.0587 | [HubertASR Librispeech ASR4](../../examples/librispeech/asr4) |
 
 ### Whisper Model
 Demo Link | Training Data | Size | Descriptions | CER | Model 
 :-----------: | :-----:| :-------: | :-----: | :-----: |:---------:|
-[Whisper](../../demos/whisper) | 680kh from internet | large: 5.8G,</br>medium: 2.9G,</br>small: 923M,</br>base: 277M,</br>tiny: 145M | Encoder:Transformer,</br> Decoder:Transformer, </br>Decoding method: </br>Greedy search | 0.027 </br>(large, Librispeech) | [whisper-large](https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221122/whisper-large-model.tar.gz) </br>[whisper-medium](https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221122/whisper-medium-model.tar.gz) </br>[whisper-medium-English-only](https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221122/whisper-medium-en-model.tar.gz) </br>[whisper-small](https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221122/whisper-small-model.tar.gz) </br>[whisper-small-English-only](https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221122/whisper-small-en-model.tar.gz) </br>[whisper-base](https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221122/whisper-base-model.tar.gz) </br>[whisper-base-English-only](https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221122/whisper-base-en-model.tar.gz) </br>[whisper-tiny](https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221122/whisper-tiny-model.tar.gz) </br>[whisper-tiny-English-only](https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221122/whisper-tiny-en-model.tar.gz)
+[Whisper](../../demos/whisper) | 680kh from internet | large: 5.8G,</br>medium: 2.9G,</br>small: 923M,</br>base: 277M,</br>tiny: 145M | Encoder:Transformer,</br> Decoder:Transformer, </br>Decoding method: </br>Greedy search | 0.027 </br>(large, Librispeech) | [whisper-large](https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-large-model.tar.gz) </br>[whisper-medium](https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-medium-model.tar.gz) </br>[whisper-medium-English-only](https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-medium-en-model.tar.gz) </br>[whisper-small](https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-small-model.tar.gz) </br>[whisper-small-English-only](https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-small-en-model.tar.gz) </br>[whisper-base](https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-base-model.tar.gz) </br>[whisper-base-English-only](https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-base-en-model.tar.gz) </br>[whisper-tiny](https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-tiny-model.tar.gz) </br>[whisper-tiny-English-only](https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-tiny-en-model.tar.gz)
 
 ### Language Model based on NGram
 |Language Model | Training Data | Token-based | Size | Descriptions|
@@ -47,70 +47,70 @@ Demo Link | Training Data | Size | Descriptions | CER | Model
 
 | Model | Training Data | Token-based | Size | Descriptions | BLEU | Example Link |
 | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: |
-| (only for CLI)[Transformer FAT-ST MTL En-Zh](https://paddlespeech.bj.bcebos.com/s2t/ted_en_zh/st1/st1_transformer_mtl_noam_ted-en-zh_ckpt_0.1.1.model.tar.gz) | Ted-En-Zh| Spm| | Encoder:Transformer, Decoder:Transformer, <br />Decoding method: Attention | 20.80 | [Transformer Ted-En-Zh ST1](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/ted_en_zh/st1) |
+| (only for CLI)[Transformer FAT-ST MTL En-Zh](https://paddlespeech.cdn.bcebos.com/s2t/ted_en_zh/st1/st1_transformer_mtl_noam_ted-en-zh_ckpt_0.1.1.model.tar.gz) | Ted-En-Zh| Spm| | Encoder:Transformer, Decoder:Transformer, <br />Decoding method: Attention | 20.80 | [Transformer Ted-En-Zh ST1](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/ted_en_zh/st1) |
 
 ## Text-to-Speech Models
 
 ### Acoustic Models
 Model Type | Dataset| Example Link | Pretrained Models|Static / ONNX / Paddle-Lite Models|Size (static)
 :-------------:| :------------:| :-----: | :-----:| :-----:| :-----:
-Tacotron2|LJSpeech|[tacotron2-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/tts0)|[tacotron2_ljspeech_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_ljspeech_ckpt_0.2.0.zip)|||
-Tacotron2|CSMSC|[tacotron2-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts0)|[tacotron2_csmsc_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_csmsc_ckpt_0.2.0.zip)|[tacotron2_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_csmsc_static_0.2.0.zip)|103MB|
-TransformerTTS| LJSpeech| [transformer-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/tts1)|[transformer_tts_ljspeech_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/transformer_tts/transformer_tts_ljspeech_ckpt_0.4.zip)|||
-SpeedySpeech| CSMSC | [speedyspeech-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts2)|[speedyspeech_csmsc_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_ckpt_0.2.0.zip)|[speedyspeech_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_static_0.2.0.zip) </br> [speedyspeech_csmsc_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_onnx_0.2.0.zip) </br> [speedyspeech_csmsc_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_pdlite_1.3.0.zip)|13MB|
-FastSpeech2| CSMSC |[fastspeech2-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3)|[fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)|[fastspeech2_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_static_0.2.0.zip) </br> [fastspeech2_csmsc_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip) </br> [fastspeech2_csmsc_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_pdlite_1.3.0.zip)|157MB|
-FastSpeech2-Conformer| CSMSC |[fastspeech2-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3)|[fastspeech2_conformer_baker_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip)|||
-FastSpeech2-CNNDecoder| CSMSC| [fastspeech2-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3)| [fastspeech2_cnndecoder_csmsc_ckpt_1.0.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_ckpt_1.0.0.zip) |  [fastspeech2_cnndecoder_csmsc_static_1.0.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_static_1.0.0.zip) </br>[fastspeech2_cnndecoder_csmsc_streaming_static_1.0.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_static_1.0.0.zip)  </br>[fastspeech2_cnndecoder_csmsc_onnx_1.0.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_onnx_1.0.0.zip)  </br>[fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip)  </br> [fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip) </br> [fastspeech2_cnndecoder_csmsc_streaming_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_pdlite_1.3.0.zip)| 84MB|
-FastSpeech2| AISHELL-3 |[fastspeech2-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/tts3)|[fastspeech2_aishell3_ckpt_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_1.1.0.zip)|[fastspeech2_aishell3_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_static_1.1.0.zip) </br> [fastspeech2_aishell3_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_onnx_1.1.0.zip) </br> [fastspeech2_aishell3_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_pdlite_1.3.0.zip) |147MB|
-FastSpeech2| LJSpeech |[fastspeech2-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/tts3)|[fastspeech2_nosil_ljspeech_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip)|[fastspeech2_ljspeech_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_static_1.1.0.zip) </br> [fastspeech2_ljspeech_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_onnx_1.1.0.zip) </br> [fastspeech2_ljspeech_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_pdlite_1.3.0.zip)|145MB|
-FastSpeech2| VCTK |[fastspeech2-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/tts3)|[fastspeech2_vctk_ckpt_1.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_ckpt_1.2.0.zip)|[fastspeech2_vctk_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_static_1.1.0.zip) </br> [fastspeech2_vctk_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_onnx_1.1.0.zip) </br> [fastspeech2_vctk_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_pdlite_1.3.0.zip)| 145MB|
-FastSpeech2| ZH_EN |[fastspeech2-zh_en](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/zh_en_tts/tts3)|[fastspeech2_mix_ckpt_1.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip)|[fastspeech2_mix_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_0.2.0.zip) </br> [fastspeech2_mix_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_onnx_0.2.0.zip) | 145MB|
-FastSpeech2| male-zh ||[fastspeech2_male_zh_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_ckpt_1.4.0.zip)|[fastspeech2_male_zh_static_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_static_1.4.0.zip) </br> [fastspeech2_male_zh_onnx_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_onnx_1.4.0.zip) |146MB|
-FastSpeech2| male-en ||[fastspeech2_male_en_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_ckpt_1.4.0.zip)|[fastspeech2_male_en_static_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_static_1.4.0.zip) </br> [fastspeech2_male_en_onnx_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_onnx_1.4.0.zip) |145MB|
-FastSpeech2| male-mix ||[fastspeech2_male_mix_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_ckpt_1.4.0.zip)|[fastspeech2_male_mix_static_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_static_1.4.0.zip) </br> [fastspeech2_male_mix_onnx_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_onnx_1.4.0.zip) |146MB|
-FastSpeech2| Cantonese |[fastspeech2-canton](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/canton/tts3)|[fastspeech2_canton_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_ckpt_1.4.0.zip)|[fastspeech2_canton_static_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_static_1.4.0.zip)</br>[fastspeech2_canton_onnx_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_onnx_1.4.0.zip)|146MB|
+Tacotron2|LJSpeech|[tacotron2-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/tts0)|[tacotron2_ljspeech_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_ljspeech_ckpt_0.2.0.zip)|||
+Tacotron2|CSMSC|[tacotron2-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts0)|[tacotron2_csmsc_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_csmsc_ckpt_0.2.0.zip)|[tacotron2_csmsc_static_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_csmsc_static_0.2.0.zip)|103MB|
+TransformerTTS| LJSpeech| [transformer-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/tts1)|[transformer_tts_ljspeech_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/transformer_tts/transformer_tts_ljspeech_ckpt_0.4.zip)|||
+SpeedySpeech| CSMSC | [speedyspeech-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts2)|[speedyspeech_csmsc_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_ckpt_0.2.0.zip)|[speedyspeech_csmsc_static_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_static_0.2.0.zip) </br> [speedyspeech_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_onnx_0.2.0.zip) </br> [speedyspeech_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_pdlite_1.3.0.zip)|13MB|
+FastSpeech2| CSMSC |[fastspeech2-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3)|[fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)|[fastspeech2_csmsc_static_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_static_0.2.0.zip) </br> [fastspeech2_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip) </br> [fastspeech2_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_pdlite_1.3.0.zip)|157MB|
+FastSpeech2-Conformer| CSMSC |[fastspeech2-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3)|[fastspeech2_conformer_baker_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip)|||
+FastSpeech2-CNNDecoder| CSMSC| [fastspeech2-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3)| [fastspeech2_cnndecoder_csmsc_ckpt_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_ckpt_1.0.0.zip) |  [fastspeech2_cnndecoder_csmsc_static_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_static_1.0.0.zip) </br>[fastspeech2_cnndecoder_csmsc_streaming_static_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_static_1.0.0.zip)  </br>[fastspeech2_cnndecoder_csmsc_onnx_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_onnx_1.0.0.zip)  </br>[fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip)  </br> [fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip) </br> [fastspeech2_cnndecoder_csmsc_streaming_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_pdlite_1.3.0.zip)| 84MB|
+FastSpeech2| AISHELL-3 |[fastspeech2-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/tts3)|[fastspeech2_aishell3_ckpt_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_1.1.0.zip)|[fastspeech2_aishell3_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_static_1.1.0.zip) </br> [fastspeech2_aishell3_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_onnx_1.1.0.zip) </br> [fastspeech2_aishell3_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_pdlite_1.3.0.zip) |147MB|
+FastSpeech2| LJSpeech |[fastspeech2-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/tts3)|[fastspeech2_nosil_ljspeech_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip)|[fastspeech2_ljspeech_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_static_1.1.0.zip) </br> [fastspeech2_ljspeech_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_onnx_1.1.0.zip) </br> [fastspeech2_ljspeech_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_pdlite_1.3.0.zip)|145MB|
+FastSpeech2| VCTK |[fastspeech2-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/tts3)|[fastspeech2_vctk_ckpt_1.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_ckpt_1.2.0.zip)|[fastspeech2_vctk_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_static_1.1.0.zip) </br> [fastspeech2_vctk_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_onnx_1.1.0.zip) </br> [fastspeech2_vctk_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_pdlite_1.3.0.zip)| 145MB|
+FastSpeech2| ZH_EN |[fastspeech2-zh_en](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/zh_en_tts/tts3)|[fastspeech2_mix_ckpt_1.2.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip)|[fastspeech2_mix_static_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_0.2.0.zip) </br> [fastspeech2_mix_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_onnx_0.2.0.zip) | 145MB|
+FastSpeech2| male-zh ||[fastspeech2_male_zh_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_ckpt_1.4.0.zip)|[fastspeech2_male_zh_static_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_static_1.4.0.zip) </br> [fastspeech2_male_zh_onnx_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_onnx_1.4.0.zip) |146MB|
+FastSpeech2| male-en ||[fastspeech2_male_en_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_ckpt_1.4.0.zip)|[fastspeech2_male_en_static_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_static_1.4.0.zip) </br> [fastspeech2_male_en_onnx_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_onnx_1.4.0.zip) |145MB|
+FastSpeech2| male-mix ||[fastspeech2_male_mix_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_ckpt_1.4.0.zip)|[fastspeech2_male_mix_static_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_static_1.4.0.zip) </br> [fastspeech2_male_mix_onnx_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_onnx_1.4.0.zip) |146MB|
+FastSpeech2| Cantonese |[fastspeech2-canton](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/canton/tts3)|[fastspeech2_canton_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_ckpt_1.4.0.zip)|[fastspeech2_canton_static_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_static_1.4.0.zip)</br>[fastspeech2_canton_onnx_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_onnx_1.4.0.zip)|146MB|
 
 ### Vocoders
 Model Type | Dataset| Example Link | Pretrained Models| Static / ONNX / Paddle-Lite Models|Size (static)
 :-----:| :-----:| :-----: | :-----:| :-----:| :-----:
-WaveFlow| LJSpeech |[waveflow-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/voc0)|[waveflow_ljspeech_ckpt_0.3.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/waveflow/waveflow_ljspeech_ckpt_0.3.zip)|||
-Parallel WaveGAN| CSMSC |[PWGAN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc1)|[pwg_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip)|[pwg_baker_static_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_static_0.4.zip) </br> [pwgan_csmsc_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_csmsc_onnx_0.2.0.zip) </br> [pwgan_csmsc_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_csmsc_pdlite_1.3.0.zip)|4.8MB|
-Parallel WaveGAN| LJSpeech |[PWGAN-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/voc1)|[pwg_ljspeech_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_ljspeech_ckpt_0.5.zip)|[pwgan_ljspeech_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_static_1.1.0.zip) </br> [pwgan_ljspeech_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_onnx_1.1.0.zip) </br> [pwgan_ljspeech_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_pdlite_1.3.0.zip)|4.8MB|
-Parallel WaveGAN| AISHELL-3 |[PWGAN-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc1)|[pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip)| [pwgan_aishell3_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_static_1.1.0.zip) </br> [pwgan_aishell3_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_onnx_1.1.0.zip) </br> [pwgan_aishell3_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_pdlite_1.3.0.zip)|4.8MB|
-Parallel WaveGAN| VCTK |[PWGAN-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/voc1)|[pwg_vctk_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_vctk_ckpt_0.5.zip)|[pwgan_vctk_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_static_1.1.0.zip) </br> [pwgan_vctk_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_onnx_1.1.0.zip) </br> [pwgan_vctk_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_pdlite_1.3.0.zip)|4.8MB|
-|Multi Band MelGAN | CSMSC |[MB MelGAN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc3) | [mb_melgan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_ckpt_0.1.1.zip) <br>[mb_melgan_baker_finetune_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_baker_finetune_ckpt_0.5.zip)|[mb_melgan_csmsc_static_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_static_0.1.1.zip) </br> [mb_melgan_csmsc_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_onnx_0.2.0.zip) </br> [mb_melgan_csmsc_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_pdlite_1.3.0.zip)|7.6MB|
-Style MelGAN | CSMSC |[Style MelGAN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc4)|[style_melgan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/style_melgan/style_melgan_csmsc_ckpt_0.1.1.zip)| | |
-HiFiGAN | CSMSC |[HiFiGAN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc5)|[hifigan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_ckpt_0.1.1.zip)|[hifigan_csmsc_static_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_static_0.1.1.zip) </br> [hifigan_csmsc_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_onnx_0.2.0.zip) </br> [hifigan_csmsc_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_pdlite_1.3.0.zip)|46MB|
-HiFiGAN | LJSpeech |[HiFiGAN-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/voc5)|[hifigan_ljspeech_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_ckpt_0.2.0.zip)|[hifigan_ljspeech_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_static_1.1.0.zip) </br> [hifigan_ljspeech_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_onnx_1.1.0.zip) </br> [hifigan_ljspeech_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_pdlite_1.3.0.zip) |49MB|
-HiFiGAN | AISHELL-3 |[HiFiGAN-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc5)|[hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip)|[hifigan_aishell3_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_static_1.1.0.zip) </br> [hifigan_aishell3_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_onnx_1.1.0.zip) </br> [hifigan_aishell3_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_pdlite_1.3.0.zip)|46MB|
-HiFiGAN | VCTK |[HiFiGAN-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/voc5)|[hifigan_vctk_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip)|[hifigan_vctk_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_static_1.1.0.zip) </br> [hifigan_vctk_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_onnx_1.1.0.zip) </br> [hifigan_vctk_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_pdlite_1.3.0.zip)|46MB|
-WaveRNN | CSMSC |[WaveRNN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc6)|[wavernn_csmsc_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_ckpt_0.2.0.zip)|[wavernn_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_static_0.2.0.zip)|18MB|
-Parallel WaveGAN| Male ||[pwg_male_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_male_ckpt_1.4.0.zip)|[pwgan_male_static_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_male_static_1.4.0.zip) </br> [pwgan_male_onnx_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_male_onnx_1.4.0.zip)|4.8M|
-HiFiGAN| Male ||[hifigan_male_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_ckpt_1.4.0.zip)|[hifigan_male_static_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_static_1.4.0.zip) </br> [hifigan_male_onnx_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_onnx_1.4.0.zip)|46M|
+WaveFlow| LJSpeech |[waveflow-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/voc0)|[waveflow_ljspeech_ckpt_0.3.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/waveflow/waveflow_ljspeech_ckpt_0.3.zip)|||
+Parallel WaveGAN| CSMSC |[PWGAN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc1)|[pwg_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip)|[pwg_baker_static_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_static_0.4.zip) </br> [pwgan_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_csmsc_onnx_0.2.0.zip) </br> [pwgan_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_csmsc_pdlite_1.3.0.zip)|4.8MB|
+Parallel WaveGAN| LJSpeech |[PWGAN-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/voc1)|[pwg_ljspeech_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_ljspeech_ckpt_0.5.zip)|[pwgan_ljspeech_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_static_1.1.0.zip) </br> [pwgan_ljspeech_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_onnx_1.1.0.zip) </br> [pwgan_ljspeech_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_pdlite_1.3.0.zip)|4.8MB|
+Parallel WaveGAN| AISHELL-3 |[PWGAN-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc1)|[pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip)| [pwgan_aishell3_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_static_1.1.0.zip) </br> [pwgan_aishell3_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_onnx_1.1.0.zip) </br> [pwgan_aishell3_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_pdlite_1.3.0.zip)|4.8MB|
+Parallel WaveGAN| VCTK |[PWGAN-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/voc1)|[pwg_vctk_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_vctk_ckpt_0.5.zip)|[pwgan_vctk_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_static_1.1.0.zip) </br> [pwgan_vctk_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_onnx_1.1.0.zip) </br> [pwgan_vctk_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_pdlite_1.3.0.zip)|4.8MB|
+|Multi Band MelGAN | CSMSC |[MB MelGAN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc3) | [mb_melgan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_ckpt_0.1.1.zip) <br>[mb_melgan_baker_finetune_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_baker_finetune_ckpt_0.5.zip)|[mb_melgan_csmsc_static_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_static_0.1.1.zip) </br> [mb_melgan_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_onnx_0.2.0.zip) </br> [mb_melgan_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_pdlite_1.3.0.zip)|7.6MB|
+Style MelGAN | CSMSC |[Style MelGAN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc4)|[style_melgan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/style_melgan/style_melgan_csmsc_ckpt_0.1.1.zip)| | |
+HiFiGAN | CSMSC |[HiFiGAN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc5)|[hifigan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_ckpt_0.1.1.zip)|[hifigan_csmsc_static_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_static_0.1.1.zip) </br> [hifigan_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_onnx_0.2.0.zip) </br> [hifigan_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_pdlite_1.3.0.zip)|46MB|
+HiFiGAN | LJSpeech |[HiFiGAN-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/voc5)|[hifigan_ljspeech_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_ckpt_0.2.0.zip)|[hifigan_ljspeech_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_static_1.1.0.zip) </br> [hifigan_ljspeech_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_onnx_1.1.0.zip) </br> [hifigan_ljspeech_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_pdlite_1.3.0.zip) |49MB|
+HiFiGAN | AISHELL-3 |[HiFiGAN-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc5)|[hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip)|[hifigan_aishell3_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_static_1.1.0.zip) </br> [hifigan_aishell3_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_onnx_1.1.0.zip) </br> [hifigan_aishell3_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_pdlite_1.3.0.zip)|46MB|
+HiFiGAN | VCTK |[HiFiGAN-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/voc5)|[hifigan_vctk_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip)|[hifigan_vctk_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_static_1.1.0.zip) </br> [hifigan_vctk_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_onnx_1.1.0.zip) </br> [hifigan_vctk_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_pdlite_1.3.0.zip)|46MB|
+WaveRNN | CSMSC |[WaveRNN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc6)|[wavernn_csmsc_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_ckpt_0.2.0.zip)|[wavernn_csmsc_static_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_static_0.2.0.zip)|18MB|
+Parallel WaveGAN| Male ||[pwg_male_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_male_ckpt_1.4.0.zip)|[pwgan_male_static_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_male_static_1.4.0.zip) </br> [pwgan_male_onnx_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_male_onnx_1.4.0.zip)|4.8M|
+HiFiGAN| Male ||[hifigan_male_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_ckpt_1.4.0.zip)|[hifigan_male_static_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_static_1.4.0.zip) </br> [hifigan_male_onnx_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_onnx_1.4.0.zip)|46M|
 
 
 ### Voice Cloning
 Model Type | Dataset| Example Link | Pretrained Models
 :-------------:| :------------:| :-----: | :-----: | 
-GE2E| AISHELL-3, etc. |[ge2e](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/ge2e)|[ge2e_ckpt_0.3.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ge2e/ge2e_ckpt_0.3.zip)
-GE2E + Tacotron2| AISHELL-3 |[ge2e-Tacotron2-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/vc0)|[tacotron2_aishell3_ckpt_vc0_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_aishell3_ckpt_vc0_0.2.0.zip)
-GE2E + FastSpeech2 | AISHELL-3  |[ge2e-fastspeech2-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/vc1)|[fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip)
+GE2E| AISHELL-3, etc. |[ge2e](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/ge2e)|[ge2e_ckpt_0.3.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ge2e/ge2e_ckpt_0.3.zip)
+GE2E + Tacotron2| AISHELL-3 |[ge2e-Tacotron2-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/vc0)|[tacotron2_aishell3_ckpt_vc0_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_aishell3_ckpt_vc0_0.2.0.zip)
+GE2E + FastSpeech2 | AISHELL-3  |[ge2e-fastspeech2-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/vc1)|[fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip)
 
 
 ## Audio Classification Models
 
 Model Type | Dataset| Example Link | Pretrained Models | Static Models 
 :-------------:| :------------:| :-----: | :-----: | :-----:
-PANN | Audioset| [audioset_tagging_cnn](https://github.com/qiuqiangkong/audioset_tagging_cnn) | [panns_cnn6.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn6.pdparams), [panns_cnn10.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn10.pdparams), [panns_cnn14.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn14.pdparams) | [panns_cnn6_static.tar.gz](https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn6_static.tar.gz)(18M), [panns_cnn10_static.tar.gz](https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn10_static.tar.gz)(19M), [panns_cnn14_static.tar.gz](https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn14_static.tar.gz)(289M) 
-PANN | ESC-50 |[pann-esc50](../../examples/esc50/cls0)|[esc50_cnn6.tar.gz](https://paddlespeech.bj.bcebos.com/cls/esc50/esc50_cnn6.tar.gz), [esc50_cnn10.tar.gz](https://paddlespeech.bj.bcebos.com/cls/esc50/esc50_cnn10.tar.gz), [esc50_cnn14.tar.gz](https://paddlespeech.bj.bcebos.com/cls/esc50/esc50_cnn14.tar.gz)
+PANN | Audioset| [audioset_tagging_cnn](https://github.com/qiuqiangkong/audioset_tagging_cnn) | [panns_cnn6.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn6.pdparams), [panns_cnn10.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn10.pdparams), [panns_cnn14.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn14.pdparams) | [panns_cnn6_static.tar.gz](https://paddlespeech.cdn.bcebos.com/cls/inference_model/panns_cnn6_static.tar.gz)(18M), [panns_cnn10_static.tar.gz](https://paddlespeech.cdn.bcebos.com/cls/inference_model/panns_cnn10_static.tar.gz)(19M), [panns_cnn14_static.tar.gz](https://paddlespeech.cdn.bcebos.com/cls/inference_model/panns_cnn14_static.tar.gz)(289M) 
+PANN | ESC-50 |[pann-esc50](../../examples/esc50/cls0)|[esc50_cnn6.tar.gz](https://paddlespeech.cdn.bcebos.com/cls/esc50/esc50_cnn6.tar.gz), [esc50_cnn10.tar.gz](https://paddlespeech.cdn.bcebos.com/cls/esc50/esc50_cnn10.tar.gz), [esc50_cnn14.tar.gz](https://paddlespeech.cdn.bcebos.com/cls/esc50/esc50_cnn14.tar.gz)
 
 ## Speaker Verification Models
 
 Model Type | Dataset| Example Link | Pretrained Models | Static Models 
 :-------------:| :------------:| :-----: | :-----: | :-----:
-ECAPA-TDNN | VoxCeleb| [voxceleb_ecapatdnn](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/voxceleb/sv0) | [ecapatdnn.tar.gz](https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1.tar.gz) | -
+ECAPA-TDNN | VoxCeleb| [voxceleb_ecapatdnn](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/voxceleb/sv0) | [ecapatdnn.tar.gz](https://paddlespeech.cdn.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1.tar.gz) | -
 
 ## Punctuation Restoration Models
 Model Type | Dataset| Example Link | Pretrained Models
 :-------------:| :------------:| :-----: | :-----:
-Ernie Linear | IWLST2012_zh |[iwslt2012_punc0](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/iwslt2012/punc0)|[ernie_linear_p3_iwslt2012_zh_ckpt_0.1.1.zip](https://paddlespeech.bj.bcebos.com/text/ernie_linear_p3_iwslt2012_zh_ckpt_0.1.1.zip)
+Ernie Linear | IWLST2012_zh |[iwslt2012_punc0](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/iwslt2012/punc0)|[ernie_linear_p3_iwslt2012_zh_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/text/ernie_linear_p3_iwslt2012_zh_ckpt_0.1.1.zip)
diff --git a/docs/source/streaming_asr_demo_video.rst b/docs/source/streaming_asr_demo_video.rst
index 6c96fea04..98fe51674 100644
--- a/docs/source/streaming_asr_demo_video.rst
+++ b/docs/source/streaming_asr_demo_video.rst
@@ -5,6 +5,6 @@ Streaming ASR Demo Video
      
     <video controls width="1024">
 
-    <source src="https://paddlespeech.bj.bcebos.com/demos/asr_demos/streaming_ASR_slice.mp4" type="video/mp4">
+    <source src="https://paddlespeech.cdn.bcebos.com/demos/asr_demos/streaming_ASR_slice.mp4" type="video/mp4">
     Sorry, your browser doesn't support embedded videos.
     </video>
diff --git a/docs/source/streaming_tts_demo_video.rst b/docs/source/streaming_tts_demo_video.rst
index 3ad9ca6cf..b2dcdba2c 100644
--- a/docs/source/streaming_tts_demo_video.rst
+++ b/docs/source/streaming_tts_demo_video.rst
@@ -5,7 +5,7 @@ Streaming TTS Demo Video
      
     <video controls width="1024">
 
-    <source src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/streaming_tts_demo.mp4"
+    <source src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/streaming_tts_demo.mp4"
             type="video/mp4">
     Sorry, your browser doesn't support embedded videos.
     </video>
diff --git a/docs/source/tts/README.md b/docs/source/tts/README.md
index 835db08ee..97a426be5 100644
--- a/docs/source/tts/README.md
+++ b/docs/source/tts/README.md
@@ -35,39 +35,39 @@ Check our [website](https://paddlespeech.readthedocs.io/en/latest/tts/demo.html)
 ### Acoustic Model
 
 #### FastSpeech2/FastPitch
-1. [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/fastspeech2_nosil_baker_ckpt_0.4.zip)
-2. [fastspeech2_nosil_aishell3_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/fastspeech2_nosil_aishell3_ckpt_0.4.zip)
-3. [fastspeech2_nosil_ljspeech_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/fastspeech2_nosil_ljspeech_ckpt_0.5.zip)
+1. [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/fastspeech2_nosil_baker_ckpt_0.4.zip)
+2. [fastspeech2_nosil_aishell3_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/fastspeech2_nosil_aishell3_ckpt_0.4.zip)
+3. [fastspeech2_nosil_ljspeech_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/fastspeech2_nosil_ljspeech_ckpt_0.5.zip)
 
 #### SpeedySpeech
-1. [speedyspeech_nosil_baker_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/speedyspeech_nosil_baker_ckpt_0.5.zip)
+1. [speedyspeech_nosil_baker_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/speedyspeech_nosil_baker_ckpt_0.5.zip)
 
 #### TransformerTTS
 
-1. [transformer_tts_ljspeech_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/transformer_tts_ljspeech_ckpt_0.4.zip)
+1. [transformer_tts_ljspeech_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/transformer_tts_ljspeech_ckpt_0.4.zip)
 
 #### Tacotron2
 
-1. [tacotron2_ljspeech_ckpt_0.3.zip](https://paddlespeech.bj.bcebos.com/Parakeet/tacotron2_ljspeech_ckpt_0.3.zip)
-2. [tacotron2_ljspeech_ckpt_0.3_alternative.zip](https://paddlespeech.bj.bcebos.com/Parakeet/tacotron2_ljspeech_ckpt_0.3_alternative.zip)
+1. [tacotron2_ljspeech_ckpt_0.3.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/tacotron2_ljspeech_ckpt_0.3.zip)
+2. [tacotron2_ljspeech_ckpt_0.3_alternative.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/tacotron2_ljspeech_ckpt_0.3_alternative.zip)
 
 ### Vocoder
 
 #### WaveFlow
 
-1. [waveflow_ljspeech_ckpt_0.3.zip](https://paddlespeech.bj.bcebos.com/Parakeet/waveflow_ljspeech_ckpt_0.3.zip)
+1. [waveflow_ljspeech_ckpt_0.3.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/waveflow_ljspeech_ckpt_0.3.zip)
 
 #### Parallel WaveGAN
 
-1. [pwg_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/pwg_baker_ckpt_0.4.zip)
-2. [pwg_ljspeech_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/pwg_ljspeech_ckpt_0.5.zip)
+1. [pwg_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/pwg_baker_ckpt_0.4.zip)
+2. [pwg_ljspeech_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/pwg_ljspeech_ckpt_0.5.zip)
 
 ### Voice Cloning
 
 #### Tacotron2_AISHELL3
 
-1. [tacotron2_aishell3_ckpt_0.3.zip](https://paddlespeech.bj.bcebos.com/Parakeet/tacotron2_aishell3_ckpt_0.3.zip)
+1. [tacotron2_aishell3_ckpt_0.3.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/tacotron2_aishell3_ckpt_0.3.zip)
 
 #### GE2E
 
-1. [ge2e_ckpt_0.3.zip](https://paddlespeech.bj.bcebos.com/Parakeet/ge2e_ckpt_0.3.zip)
+1. [ge2e_ckpt_0.3.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/ge2e_ckpt_0.3.zip)
diff --git a/docs/source/tts/demo.rst b/docs/source/tts/demo.rst
index 1ae687f85..bb866e742 100644
--- a/docs/source/tts/demo.rst
+++ b/docs/source/tts/demo.rst
@@ -44,7 +44,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/ljspeech_gt/LJ001-0001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/ljspeech_gt/LJ001-0001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 
@@ -52,7 +52,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_0.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_0.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -63,7 +63,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/ljspeech_gt/LJ001-0002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/ljspeech_gt/LJ001-0002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -72,7 +72,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
             <td>
              <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_1.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_1.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
             </audio>
@@ -83,7 +83,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/ljspeech_gt/LJ001-0003.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/ljspeech_gt/LJ001-0003.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -91,7 +91,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_2.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_2.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -102,7 +102,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/ljspeech_gt/LJ001-0004.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/ljspeech_gt/LJ001-0004.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -110,7 +110,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_3.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_3.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -121,7 +121,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/ljspeech_gt/LJ001-0005.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/ljspeech_gt/LJ001-0005.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -129,7 +129,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_4.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_4.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -155,7 +155,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/baker_gt_24k/009901.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/baker_gt_24k/009901.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -163,7 +163,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/pwg_baker_ckpt_0.4/009901.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/pwg_baker_ckpt_0.4/009901.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -174,7 +174,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/baker_gt_24k/009902.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/baker_gt_24k/009902.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -182,7 +182,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/pwg_baker_ckpt_0.4/009902.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/pwg_baker_ckpt_0.4/009902.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -193,7 +193,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/baker_gt_24k/009903.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/baker_gt_24k/009903.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -201,7 +201,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/pwg_baker_ckpt_0.4/009903.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/pwg_baker_ckpt_0.4/009903.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -212,7 +212,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/baker_gt_24k/009904.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/baker_gt_24k/009904.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -220,7 +220,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/pwg_baker_ckpt_0.4/009904.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/pwg_baker_ckpt_0.4/009904.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -232,7 +232,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/baker_gt_24k/009905.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/baker_gt_24k/009905.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -240,7 +240,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/pwg_baker_ckpt_0.4/009905.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/pwg_baker_ckpt_0.4/009905.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -273,7 +273,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                         <source
-                            src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/001.wav"
+                            src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/001.wav"
                             type="audio/wav">
                         Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -281,7 +281,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td> 
                 <audio controls="controls" style="width: 220px;">
                         <source
-                            src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_1.wav"
+                            src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_1.wav"
                             type="audio/wav">
                         Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -292,7 +292,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                         <source
-                            src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/002.wav"
+                            src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/002.wav"
                             type="audio/wav">
                         Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -300,7 +300,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td> 
             <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_2.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_2.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -311,7 +311,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
             <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/003.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/003.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -320,7 +320,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td> 
             <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_3.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_3.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -332,7 +332,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/004.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/004.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -341,7 +341,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td> 
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_4.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_4.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -352,7 +352,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/005.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/005.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -361,7 +361,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td> 
             <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_5.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_5.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -372,7 +372,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/006.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/006.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -381,7 +381,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td> 
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_6.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_6.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -392,7 +392,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/007.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/007.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -401,7 +401,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td> 
             <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_7.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_7.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -412,7 +412,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/008.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/008.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -421,7 +421,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td> 
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_8.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_8.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -432,7 +432,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/009.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/009.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -441,7 +441,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td> 
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_9.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_9.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -467,7 +467,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -475,7 +475,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -486,7 +486,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -494,7 +494,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -505,7 +505,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/003.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/003.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -513,7 +513,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/003.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/003.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -525,7 +525,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/004.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/004.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -533,7 +533,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/004.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/004.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -544,7 +544,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/005.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/005.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -552,7 +552,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/005.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/005.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -563,7 +563,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/006.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/006.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -571,7 +571,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/006.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/006.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -582,7 +582,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/007.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/007.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -590,7 +590,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/007.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/007.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -601,7 +601,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/008.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/008.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -609,7 +609,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/008.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/008.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -620,7 +620,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/009.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/009.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -628,7 +628,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/009.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/009.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -647,7 +647,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -657,7 +657,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -667,7 +667,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/003.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/003.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -678,7 +678,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/004.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/004.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -688,7 +688,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/005.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/005.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -698,7 +698,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/006.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/006.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -708,7 +708,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/007.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/007.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -718,7 +718,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/008.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/008.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -728,7 +728,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/009.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/009.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -758,7 +758,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/0.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/0.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -766,7 +766,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/0_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/0_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -776,7 +776,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/1.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/1.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -784,7 +784,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/1_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/1_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -794,7 +794,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/2.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/2.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -802,7 +802,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/2_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/2_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -812,7 +812,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/3.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/3.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -820,7 +820,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/3_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/3_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -830,7 +830,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/4.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/4.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -838,7 +838,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/4_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/4_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -848,7 +848,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/5.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/5.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -856,7 +856,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/5_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/5_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -866,7 +866,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/6.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/6.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -874,7 +874,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/6_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/6_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -884,7 +884,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/7.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/7.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -892,7 +892,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/7_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/7_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -902,7 +902,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/8.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/8.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -910,7 +910,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/8_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/8_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -920,7 +920,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/9.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/9.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -928,7 +928,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/9_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/9_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -938,7 +938,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/10.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/10.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -946,7 +946,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/10_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/10_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -956,7 +956,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/11.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/11.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -964,7 +964,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/11_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/11_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -974,7 +974,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/12.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/12.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -982,7 +982,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/12_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/12_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -992,7 +992,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/13.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/13.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1000,7 +1000,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/13_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/13_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1010,7 +1010,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/14.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/14.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1018,7 +1018,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/14_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/14_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1028,7 +1028,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/15.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/15.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1036,7 +1036,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/15_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/15_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1046,7 +1046,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/16.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/16.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1054,7 +1054,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/16_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/16_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1064,7 +1064,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/17.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/17.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1072,7 +1072,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/17_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/17_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1082,7 +1082,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/18.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/18.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1090,7 +1090,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/18_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/18_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1100,7 +1100,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/19.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/19.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1108,7 +1108,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/19_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/19_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1144,7 +1144,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
              <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x0.8_001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1152,7 +1152,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
             <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1_001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1160,7 +1160,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
             <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1.2_001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1170,7 +1170,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
              <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x0.8_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1178,7 +1178,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
             <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1186,7 +1186,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
             <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1.2_002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1196,7 +1196,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
              <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_003.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x0.8_003.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1204,7 +1204,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
             <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_003.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1_003.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1212,7 +1212,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
             <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_003.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1.2_003.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1222,7 +1222,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
              <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_004.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x0.8_004.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1230,7 +1230,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
             <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_004.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1_004.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1238,7 +1238,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
             <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_004.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1.2_004.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1248,7 +1248,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
              <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_005.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x0.8_005.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1256,7 +1256,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
             <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_005.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1_005.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1264,7 +1264,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
             <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_005.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1.2_005.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1274,7 +1274,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
              <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_007.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x0.8_007.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1282,7 +1282,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
             <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_007.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1_007.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1290,7 +1290,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
             <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_007.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1.2_007.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1300,7 +1300,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
              <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_008.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x0.8_008.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1308,7 +1308,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
             <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_008.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1_008.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1316,7 +1316,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
             <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_008.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1.2_008.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1326,7 +1326,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
              <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_009.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x0.8_009.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1334,7 +1334,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
             <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_009.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1_009.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1342,7 +1342,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
             <td>
                 <audio controls="controls" style="width: 250px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_009.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1.2_009.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1376,7 +1376,7 @@ The nomal audios are in the second column of the previous table.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/robot/001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/robot/001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1384,7 +1384,7 @@ The nomal audios are in the second column of the previous table.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/child_voice/001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/child_voice/001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1394,7 +1394,7 @@ The nomal audios are in the second column of the previous table.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/robot/002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/robot/002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1402,7 +1402,7 @@ The nomal audios are in the second column of the previous table.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/child_voice/002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/child_voice/002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1412,7 +1412,7 @@ The nomal audios are in the second column of the previous table.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/robot/003.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/robot/003.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1420,7 +1420,7 @@ The nomal audios are in the second column of the previous table.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/child_voice/003.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/child_voice/003.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1430,7 +1430,7 @@ The nomal audios are in the second column of the previous table.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/robot/004.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/robot/004.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1438,7 +1438,7 @@ The nomal audios are in the second column of the previous table.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/child_voice//004.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/child_voice//004.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1448,7 +1448,7 @@ The nomal audios are in the second column of the previous table.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/robot/005.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/robot/005.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1456,7 +1456,7 @@ The nomal audios are in the second column of the previous table.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/child_voice//005.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/child_voice//005.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1466,7 +1466,7 @@ The nomal audios are in the second column of the previous table.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/robot/007.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/robot/007.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1474,7 +1474,7 @@ The nomal audios are in the second column of the previous table.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/child_voice//007.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/child_voice//007.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1484,7 +1484,7 @@ The nomal audios are in the second column of the previous table.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/robot/008.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/robot/008.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1492,7 +1492,7 @@ The nomal audios are in the second column of the previous table.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/child_voice//008.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/child_voice//008.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1502,7 +1502,7 @@ The nomal audios are in the second column of the previous table.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/robot/009.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/robot/009.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1510,7 +1510,7 @@ The nomal audios are in the second column of the previous table.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/child_voice//009.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/child_voice//009.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1544,7 +1544,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/with_frontend/001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/with_frontend/001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1552,7 +1552,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/without_frontend/001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/without_frontend/001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1563,7 +1563,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/with_frontend/002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/with_frontend/002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1571,7 +1571,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/without_frontend/002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/without_frontend/002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1582,7 +1582,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/with_frontend/003.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/with_frontend/003.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1590,7 +1590,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/without_frontend/003.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/without_frontend/003.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1601,7 +1601,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/with_frontend/004.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/with_frontend/004.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1609,7 +1609,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/without_frontend/004.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/without_frontend/004.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1620,7 +1620,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/with_frontend/005.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/with_frontend/005.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1628,7 +1628,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/without_frontend/005.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/without_frontend/005.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1639,7 +1639,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/with_frontend/006.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/with_frontend/006.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1647,7 +1647,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/without_frontend/006.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/without_frontend/006.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1658,7 +1658,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/with_frontend/007.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/with_frontend/007.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1666,7 +1666,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/without_frontend/007.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/without_frontend/007.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1677,7 +1677,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/with_frontend/008.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/with_frontend/008.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1685,7 +1685,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/without_frontend/008.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/without_frontend/008.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1696,7 +1696,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/with_frontend/009.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/with_frontend/009.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1704,7 +1704,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/without_frontend/009.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/without_frontend/009.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1715,7 +1715,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/with_frontend/010.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/with_frontend/010.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1723,7 +1723,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/without_frontend/010.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/without_frontend/010.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1751,7 +1751,7 @@ When finetuning for CSMSC, we thought ``Freeze encoder`` > ``Non Frozen`` > ``Fr
     <br>
     <audio controls="controls" style="width: 220px;">
         <source
-            src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/ref_fastspeech2_csmsc_hifigan_aishell3.wav"
+            src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/ref_fastspeech2_csmsc_hifigan_aishell3.wav"
             type="audio/wav">
         Your browser does not support the <code>audio</code> element.
     </audio>
@@ -1770,7 +1770,7 @@ When finetuning for CSMSC, we thought ``Freeze encoder`` > ``Non Frozen`` > ``Fr
             <td>
                 <audio controls="controls" style="width: 150px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train10_bn10_epoch100_lr0.0001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train10_bn10_epoch100_lr0.0001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1778,7 +1778,7 @@ When finetuning for CSMSC, we thought ``Freeze encoder`` > ``Non Frozen`` > ``Fr
             <td>
                 <audio controls="controls" style="width: 150px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train18_bn18_epoch100_lr0.0001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train18_bn18_epoch100_lr0.0001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1786,7 +1786,7 @@ When finetuning for CSMSC, we thought ``Freeze encoder`` > ``Non Frozen`` > ``Fr
             <td>
                 <audio controls="controls" style="width: 150px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train97_bn64_epoch100_lr0.0001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train97_bn64_epoch100_lr0.0001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1794,7 +1794,7 @@ When finetuning for CSMSC, we thought ``Freeze encoder`` > ``Non Frozen`` > ``Fr
             <td>
                 <audio controls="controls" style="width: 150px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train196_bn64_epoch100_lr0.0001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train196_bn64_epoch100_lr0.0001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1805,7 +1805,7 @@ When finetuning for CSMSC, we thought ``Freeze encoder`` > ``Non Frozen`` > ``Fr
             <td>
                 <audio controls="controls" style="width: 150px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train10_fr_encoder_bn10_epoch100_lr0.0001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train10_fr_encoder_bn10_epoch100_lr0.0001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1813,7 +1813,7 @@ When finetuning for CSMSC, we thought ``Freeze encoder`` > ``Non Frozen`` > ``Fr
             <td>
                 <audio controls="controls" style="width: 150px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train18_fr_encoder_bn18_epoch100_lr0.0001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train18_fr_encoder_bn18_epoch100_lr0.0001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1821,7 +1821,7 @@ When finetuning for CSMSC, we thought ``Freeze encoder`` > ``Non Frozen`` > ``Fr
             <td>
                 <audio controls="controls" style="width: 150px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train97_fr_encoder_bn64_epoch100_lr0.0001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train97_fr_encoder_bn64_epoch100_lr0.0001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1829,7 +1829,7 @@ When finetuning for CSMSC, we thought ``Freeze encoder`` > ``Non Frozen`` > ``Fr
             <td>
                 <audio controls="controls" style="width: 150px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train196_fr_encoder_bn64_epoch100_lr0.0001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train196_fr_encoder_bn64_epoch100_lr0.0001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1840,7 +1840,7 @@ When finetuning for CSMSC, we thought ``Freeze encoder`` > ``Non Frozen`` > ``Fr
             <td>
                 <audio controls="controls" style="width: 150px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train10_fr_encoder_duration_bn10_epoch100_lr0.0001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train10_fr_encoder_duration_bn10_epoch100_lr0.0001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1848,7 +1848,7 @@ When finetuning for CSMSC, we thought ``Freeze encoder`` > ``Non Frozen`` > ``Fr
             <td>
                 <audio controls="controls" style="width: 150px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train18_fr_encoder_duration_bn18_epoch100_lr0.0001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train18_fr_encoder_duration_bn18_epoch100_lr0.0001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1856,7 +1856,7 @@ When finetuning for CSMSC, we thought ``Freeze encoder`` > ``Non Frozen`` > ``Fr
             <td>
                 <audio controls="controls" style="width: 150px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train97_fr_encoder_duration_bn64_epoch100_lr0.0001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train97_fr_encoder_duration_bn64_epoch100_lr0.0001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -1864,7 +1864,7 @@ When finetuning for CSMSC, we thought ``Freeze encoder`` > ``Non Frozen`` > ``Fr
             <td>
                 <audio controls="controls" style="width: 150px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train196_fr_encoder_duration_bn64_epoch100_lr0.0001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train196_fr_encoder_duration_bn64_epoch100_lr0.0001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
diff --git a/docs/source/tts/demo_2.rst b/docs/source/tts/demo_2.rst
index 06d0d0399..62db2c760 100644
--- a/docs/source/tts/demo_2.rst
+++ b/docs/source/tts/demo_2.rst
@@ -21,7 +21,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -29,7 +29,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/001.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/001.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -40,7 +40,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -48,7 +48,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/002.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/002.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -59,7 +59,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/003.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/003.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -67,7 +67,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/003.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/003.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -78,7 +78,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/004.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/004.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -86,7 +86,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/004.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/004.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -97,7 +97,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/005.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/005.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -105,7 +105,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/005.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/005.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -116,7 +116,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/006.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/006.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -124,7 +124,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/006.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/006.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -135,7 +135,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/007.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/007.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -143,7 +143,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/007.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/007.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -154,7 +154,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/008.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/008.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -162,7 +162,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/008.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/008.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -173,7 +173,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/009.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/009.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -181,7 +181,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/009.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/009.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -192,7 +192,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/010.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/010.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -200,7 +200,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/010.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/010.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -211,7 +211,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/011.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/011.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -219,7 +219,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/011.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/011.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -230,7 +230,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/012.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/012.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -238,7 +238,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/012.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/012.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -249,7 +249,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/013.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/013.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -257,7 +257,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/013.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/013.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -268,7 +268,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/014.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/014.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
@@ -276,7 +276,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
             <td>
                 <audio controls="controls" style="width: 220px;">
                     <source
-                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/014.wav"
+                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/014.wav"
                         type="audio/wav">
                     Your browser does not support the <code>audio</code> element.
                 </audio>
diff --git a/docs/source/tts/svs_music_score.md b/docs/source/tts/svs_music_score.md
index 9f351c001..8607ed28f 100644
--- a/docs/source/tts/svs_music_score.md
+++ b/docs/source/tts/svs_music_score.md
@@ -3,7 +3,7 @@
 # 一、常见基础
 ## 1.1 简谱和音名（note）
 <p align="left">
-  <img src="https://paddlespeech.bj.bcebos.com/t2s/svs/svs_music_scores/seven.png" width="300"/>
+  <img src="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/seven.png" width="300"/>
 </p>
 
 上图从左往右的黑键音名分别是：C#/Db，D#/Db，F#/Db，G#/Ab，A#/Bb
@@ -11,20 +11,20 @@
 钢琴八度音就是12345671八个音，最后一个音是高1。**遵循：全全半全全全半** 就会得到 1 2 3 4 5 6 7 (高)1 的音
 
 <p align="left">
-  <img src="https://paddlespeech.bj.bcebos.com/t2s/svs/svs_music_scores/piano_88.png" />
+  <img src="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/piano_88.png" />
 </p>
 
 ## 1.2 十二大调
 “#”表示升调
 
 <p align="left">
-  <img src="https://paddlespeech.bj.bcebos.com/t2s/svs/svs_music_scores/up.png" />
+  <img src="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/up.png" />
 </p>
 
 “b”表示降调
 
 <p align="left">
-  <img src="https://paddlespeech.bj.bcebos.com/t2s/svs/svs_music_scores/down.png" />
+  <img src="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/down.png" />
 </p>
 
 什么大调表示Do(简谱1) 这个音从哪个键开始，例如D大调，则用D这个键来表示 Do这个音。
@@ -39,7 +39,7 @@
 Tempo 用于表示速度（Speed of the beat/pulse），一分钟里面有几拍（beats per mimute BPM）
 
 <p align="left">
-  <img src="https://paddlespeech.bj.bcebos.com/t2s/svs/svs_music_scores/note_beat.png" width="450"/>
+  <img src="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/note_beat.png" width="450"/>
 </p>
 
 whole note -->  4 beats</br>
@@ -54,7 +54,7 @@ sixteenth note --> 1/4 beat</br>
 music scores 包含：note，note_dur，is_slur
 
 <p align="left">
-  <img src="https://paddlespeech.bj.bcebos.com/t2s/svs/svs_music_scores/pu.png" width="600"/>
+  <img src="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/pu.png" width="600"/>
 </p>
 
 从左上角的谱信息 *bE* 可以得出该谱子是 **降E大调**，可以对应1.2小节十二大调简谱音名对照表根据 简谱获取对应的note
@@ -111,7 +111,7 @@ music scores 包含：note，note_dur，is_slur
       <td > 1 </td>
       <td > 原始 opencpop 标注的 notes，note_durs，is_slurs，升F大调，起始在小字组（第3组） </td>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/t2s/svs/svs_music_scores/test1.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/test1.wav" rel="nofollow">
             <img align="center" src="../../../docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
       </td>
     </tr>
@@ -119,7 +119,7 @@ music scores 包含：note，note_dur，is_slur
       <td > 2 </td>
       <td > 原始 opencpop 标注的 notes 和 is_slurs，note_durs 改变（从谱子获取） </td>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/t2s/svs/svs_music_scores/test2.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/test2.wav" rel="nofollow">
             <img align="center" src="../../../docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
       </td>
     </tr>
@@ -127,7 +127,7 @@ music scores 包含：note，note_dur，is_slur
       <td > 3 </td>
       <td > 原始 opencpop 标注的 notes 去掉 rest（毛字一拍），is_slurs 和 note_durs 改变（从谱子获取） </td>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/t2s/svs/svs_music_scores/test3.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/test3.wav" rel="nofollow">
             <img align="center" src="../../../docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
       </td>
     </tr>
@@ -135,7 +135,7 @@ music scores 包含：note，note_dur，is_slur
       <td > 4 </td>
       <td > 从谱子获取 notes，note durs，is_slurs，不含 rest（毛字一拍），起始在小字一组（第3组） </td>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/t2s/svs/svs_music_scores/test4.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/test4.wav" rel="nofollow">
             <img align="center" src="../../../docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
       </td>
     </tr>
@@ -143,7 +143,7 @@ music scores 包含：note，note_dur，is_slur
       <td > 5 </td>
       <td > 从谱子获取 notes，note durs，is_slurs，加上 rest （毛字半拍，rest半拍），起始在小字一组（第3组）</td>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/t2s/svs/svs_music_scores/test5.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/test5.wav" rel="nofollow">
             <img align="center" src="../../../docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
       </td>
     </tr>
@@ -151,7 +151,7 @@ music scores 包含：note，note_dur，is_slur
       <td > 6 </td>
       <td > 从谱子获取 notes， is_slurs，包含 rest，note_durs 从原始标注获取，起始在小字一组（第3组） </td>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/t2s/svs/svs_music_scores/test6.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/test6.wav" rel="nofollow">
             <img align="center" src="../../../docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
       </td>
     </tr>
@@ -159,7 +159,7 @@ music scores 包含：note，note_dur，is_slur
       <td > 7 </td>
       <td > 从谱子获取 notes，note durs，is_slurs，不含 rest（毛字一拍），起始在小字一组（第4组） </td>
       <td align = "center">
-      <a href="https://paddlespeech.bj.bcebos.com/t2s/svs/svs_music_scores/test7.wav" rel="nofollow">
+      <a href="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/test7.wav" rel="nofollow">
             <img align="center" src="../../../docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
       </td>
     </tr>
diff --git a/docs/source/tts_demo_video.rst b/docs/source/tts_demo_video.rst
index 4f807165d..83890e572 100644
--- a/docs/source/tts_demo_video.rst
+++ b/docs/source/tts_demo_video.rst
@@ -5,7 +5,7 @@ TTS Demo Video
      
     <video controls width="1024">
 
-    <source src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/paddle2021_with_me.mp4"
+    <source src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/paddle2021_with_me.mp4"
             type="video/mp4">
     Sorry, your browser doesn't support embedded videos.
     </video>
diff --git a/docs/topic/ctc/ctc_loss_speed_compare.ipynb b/docs/topic/ctc/ctc_loss_speed_compare.ipynb
index eb7a030c7..6d9187f7f 100644
--- a/docs/topic/ctc/ctc_loss_speed_compare.ipynb
+++ b/docs/topic/ctc/ctc_loss_speed_compare.ipynb
@@ -27,7 +27,7 @@
    ],
    "source": [
     "!mkdir -p ./test_data\n",
-    "!test -f ./test_data/ctc_loss_compare_data.tgz || wget -P ./test_data https://paddlespeech.bj.bcebos.com/datasets/unit_test/asr/ctc_loss_compare_data.tgz\n",
+    "!test -f ./test_data/ctc_loss_compare_data.tgz || wget -P ./test_data https://paddlespeech.cdn.bcebos.com/datasets/unit_test/asr/ctc_loss_compare_data.tgz\n",
     "!tar xzvf test_data/ctc_loss_compare_data.tgz -C ./test_data\n"
    ]
   },
diff --git a/docs/topic/gan_vocoder/gan_vocoder.ipynb b/docs/topic/gan_vocoder/gan_vocoder.ipynb
index edb4eeb1d..4f53c32f7 100644
--- a/docs/topic/gan_vocoder/gan_vocoder.ipynb
+++ b/docs/topic/gan_vocoder/gan_vocoder.ipynb
@@ -156,7 +156,7 @@
     "\n",
     "以 `Parallel WaveGAN` 模型为例，我们复现了基于 `Pytorch` 和基于 `Paddle` 的 `Parallel WaveGAN`，并保持模型结构完全一致，在相同的实验环境下，基于 `Paddle` 的模型收敛速度比基于 `Pytorch` 的模型快 `10.4%`, 而基于 `Conv1D` 的 `stft` 实现的 Paddle 模型的收敛速度和收敛效果和收敛速度差于基于 `Pytorch` 的模型，更明显差于基于 `Paddle` 的模型，所以可以认为 `paddle.signal.stft` 算子大幅度提升了 `Parallel WaveGAN` 模型的效果。\n",
     "\n",
-    "![image](https://paddlespeech.bj.bcebos.com/Parakeet/docs/images/pwg_vs.png)\n"
+    "![image](https://paddlespeech.cdn.bcebos.com/Parakeet/docs/images/pwg_vs.png)\n"
    ]
   }
  ],
diff --git a/docs/tutorial/asr/tutorial_deepspeech2.ipynb b/docs/tutorial/asr/tutorial_deepspeech2.ipynb
index 34c0090ac..c40ada9fa 100644
--- a/docs/tutorial/asr/tutorial_deepspeech2.ipynb
+++ b/docs/tutorial/asr/tutorial_deepspeech2.ipynb
@@ -23,7 +23,7 @@
    "outputs": [],
    "source": [
     "# 下载demo视频\n",
-    "!test -f work/source/subtitle_demo1.mp4 || wget https://paddlespeech.bj.bcebos.com/demos/asr_demos/subtitle_demo1.mp4 -P work/source/"
+    "!test -f work/source/subtitle_demo1.mp4 || wget https://paddlespeech.cdn.bcebos.com/demos/asr_demos/subtitle_demo1.mp4 -P work/source/"
    ]
   },
   {
@@ -311,7 +311,7 @@
    },
    "outputs": [],
    "source": [
-    "!test -f ds2.model.tar.gz || wget -nc https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/ds2.model.tar.gz\n",
+    "!test -f ds2.model.tar.gz || wget -nc https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/ds2.model.tar.gz\n",
     "!tar xzvf ds2.model.tar.gz"
    ]
   },
@@ -329,7 +329,7 @@
     "!mkdir -p data/lm\n",
     "!test -f ./data/lm/zh_giga.no_cna_cmn.prune01244.klm || wget -nc https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm -P data/lm\n",
     "# 获取用于预测的音频文件\n",
-    "!test -f ./data/demo_01_03.wav || wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P ./data/"
+    "!test -f ./data/demo_01_03.wav || wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P ./data/"
    ]
   },
   {
diff --git a/docs/tutorial/asr/tutorial_transformer.ipynb b/docs/tutorial/asr/tutorial_transformer.ipynb
index 77aed4bf8..bcfb57d53 100644
--- a/docs/tutorial/asr/tutorial_transformer.ipynb
+++ b/docs/tutorial/asr/tutorial_transformer.ipynb
@@ -22,7 +22,7 @@
    "outputs": [],
    "source": [
     "# 下载demo视频\n",
-    "!test -f work/source/subtitle_demo1.mp4 || wget -c https://paddlespeech.bj.bcebos.com/demos/asr_demos/subtitle_demo1.mp4 -P work/source/"
+    "!test -f work/source/subtitle_demo1.mp4 || wget -c https://paddlespeech.cdn.bcebos.com/demos/asr_demos/subtitle_demo1.mp4 -P work/source/"
    ]
   },
   {
@@ -181,11 +181,11 @@
    "outputs": [],
    "source": [
     "# 获取模型\n",
-    "!test -f transformer.model.tar.gz || wget -nc https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/transformer.model.tar.gz\n",
+    "!test -f transformer.model.tar.gz || wget -nc https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr1/transformer.model.tar.gz\n",
     "!tar xzvf transformer.model.tar.gz\n",
     "\n",
     "# 获取用于预测的音频文件\n",
-    "!test -f ./data/demo_01_03.wav || wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P ./data/"
+    "!test -f ./data/demo_01_03.wav || wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P ./data/"
    ]
   },
   {
diff --git a/docs/tutorial/cls/cls_tutorial.ipynb b/docs/tutorial/cls/cls_tutorial.ipynb
index e37b086f7..c976a2c63 100644
--- a/docs/tutorial/cls/cls_tutorial.ipynb
+++ b/docs/tutorial/cls/cls_tutorial.ipynb
@@ -35,7 +35,7 @@
    "source": [
     "%%HTML\n",
     "<center><video width=\"800\" controls>\n",
-    "  <source src=\"https://paddlespeech.bj.bcebos.com/PaddleAudio/audio_tagging_demo.mp4\" type=\"video/mp4\">\n",
+    "  <source src=\"https://paddlespeech.cdn.bcebos.com/PaddleAudio/audio_tagging_demo.mp4\" type=\"video/mp4\">\n",
     "</video></center>"
    ]
   },
@@ -90,7 +90,7 @@
    "outputs": [],
    "source": [
     "# 获取示例音频\n",
-    "!test -f ./dog.wav || wget https://paddlespeech.bj.bcebos.com/PaddleAudio/dog.wav\n",
+    "!test -f ./dog.wav || wget https://paddlespeech.cdn.bcebos.com/PaddleAudio/dog.wav\n",
     "IPython.display.Audio('./dog.wav')"
    ]
   },
diff --git a/docs/tutorial/st/st_tutorial.ipynb b/docs/tutorial/st/st_tutorial.ipynb
index e755bebad..6004825f5 100644
--- a/docs/tutorial/st/st_tutorial.ipynb
+++ b/docs/tutorial/st/st_tutorial.ipynb
@@ -191,7 +191,7 @@
    },
    "outputs": [],
    "source": [
-    "!wget -nc https://paddlespeech.bj.bcebos.com/s2t/ted_en_zh/st1/fat_st_ted-en-zh.tar.gz\n",
+    "!wget -nc https://paddlespeech.cdn.bcebos.com/s2t/ted_en_zh/st1/fat_st_ted-en-zh.tar.gz\n",
     "!tar xzvf fat_st_ted-en-zh.tar.gz"
    ]
   },
@@ -227,7 +227,7 @@
    },
    "outputs": [],
    "source": [
-    "!wget -nc https://paddlespeech.bj.bcebos.com/s2t/ted_en_zh/st1/kaldi_bins.tar.gz\n",
+    "!wget -nc https://paddlespeech.cdn.bcebos.com/s2t/ted_en_zh/st1/kaldi_bins.tar.gz\n",
     "!tar xzvf kaldi_bins.tar.gz"
    ]
   },
@@ -424,9 +424,9 @@
    "outputs": [],
    "source": [
     "# 下载wav\n",
-    "!wget -nc https://paddlespeech.bj.bcebos.com/PaddleAudio/74109_0147917-0156334.wav\n",
-    "!wget -nc https://paddlespeech.bj.bcebos.com/PaddleAudio/120221_0278694-0283831.wav\n",
-    "!wget -nc https://paddlespeech.bj.bcebos.com/PaddleAudio/15427_0822000-0833000.wav\n",
+    "!wget -nc https://paddlespeech.cdn.bcebos.com/PaddleAudio/74109_0147917-0156334.wav\n",
+    "!wget -nc https://paddlespeech.cdn.bcebos.com/PaddleAudio/120221_0278694-0283831.wav\n",
+    "!wget -nc https://paddlespeech.cdn.bcebos.com/PaddleAudio/15427_0822000-0833000.wav\n",
     "\n",
     "wav_file = '74109_0147917-0156334.wav'\n",
     "# wav_file = '120221_0278694-0283831.wav'\n",
diff --git a/docs/tutorial/tts/tts_tutorial.ipynb b/docs/tutorial/tts/tts_tutorial.ipynb
index 0cecb680d..99f6a69f4 100644
--- a/docs/tutorial/tts/tts_tutorial.ipynb
+++ b/docs/tutorial/tts/tts_tutorial.ipynb
@@ -20,9 +20,9 @@
    "source": [
     "# download demo sources\n",
     "!mkdir download\n",
-    "!wget -P download https://paddlespeech.bj.bcebos.com/tutorial/tts/ocr_result.jpg\n",
-    "!wget -P download https://paddlespeech.bj.bcebos.com/tutorial/tts/ocr.wav\n",
-    "!wget -P download https://paddlespeech.bj.bcebos.com/tutorial/tts/tts_lips.mp4"
+    "!wget -P download https://paddlespeech.cdn.bcebos.com/tutorial/tts/ocr_result.jpg\n",
+    "!wget -P download https://paddlespeech.cdn.bcebos.com/tutorial/tts/ocr.wav\n",
+    "!wget -P download https://paddlespeech.cdn.bcebos.com/tutorial/tts/tts_lips.mp4"
    ]
   },
   {
@@ -189,9 +189,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!wget -P download https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip\n",
+    "!wget -P download https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip\n",
     "!unzip -d download download/pwg_baker_ckpt_0.4.zip\n",
-    "!wget -P download https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip\n",
+    "!wget -P download https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip\n",
     "!unzip -d download download/fastspeech2_nosil_baker_ckpt_0.4.zip"
    ]
   },
@@ -219,7 +219,7 @@
    "outputs": [],
    "source": [
     "# 本项目的依赖需要用到 nltk 包，但是有时会因为网络原因导致不好下载，此处手动下载一下放到百度服务器的包\n",
-    "!wget https://paddlespeech.bj.bcebos.com/Parakeet/tools/nltk_data.tar.gz\n",
+    "!wget https://paddlespeech.cdn.bcebos.com/Parakeet/tools/nltk_data.tar.gz\n",
     "!tar zxvf nltk_data.tar.gz"
    ]
   },
@@ -696,7 +696,7 @@
    "outputs": [],
    "source": [
     "# 原始音频\n",
-    "dp.display(dp.Audio(url=\"https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_001.wav\"))"
+    "dp.display(dp.Audio(url=\"https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1_001.wav\"))"
    ]
   },
   {
@@ -706,7 +706,7 @@
    "outputs": [],
    "source": [
     "# speed x 1.2\n",
-    "dp.display(dp.Audio(url=\"https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_001.wav\"))"
+    "dp.display(dp.Audio(url=\"https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1.2_001.wav\"))"
    ]
   },
   {
@@ -716,7 +716,7 @@
    "outputs": [],
    "source": [
     "# speed x 0.8\n",
-    "dp.display(dp.Audio(url=\"https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_001.wav\"))"
+    "dp.display(dp.Audio(url=\"https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x0.8_001.wav\"))"
    ]
   },
   {
@@ -726,7 +726,7 @@
    "outputs": [],
    "source": [
     "# pitch x 1.3(童声)\n",
-    "dp.display(dp.Audio(url=\"https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/child_voice/001.wav\"))"
+    "dp.display(dp.Audio(url=\"https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/child_voice/001.wav\"))"
    ]
   },
   {
@@ -736,7 +736,7 @@
    "outputs": [],
    "source": [
     "# robot\n",
-    "dp.display(dp.Audio(url=\"https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/robot/001.wav\"))"
+    "dp.display(dp.Audio(url=\"https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/robot/001.wav\"))"
    ]
   },
   {
diff --git a/examples/aishell/asr0/README.md b/examples/aishell/asr0/README.md
index 1f2bf6475..dee266b85 100644
--- a/examples/aishell/asr0/README.md
+++ b/examples/aishell/asr0/README.md
@@ -164,7 +164,7 @@ using the `tar` scripts to unpack the model and then you can use the script to t
 
 For example:
 ```
-wget https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz
+wget https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz
 tar xzvf asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz
 source path.sh
 # If you have process the data and get the manifest file， you can skip the following 2 steps
@@ -209,12 +209,12 @@ if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
 ```
 you can train the model by yourself, or you can download the pretrained model by the script below:
 ```bash
-wget https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz
+wget https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz
 tar xzvf asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz
 ```
 You can download the audio demo:
 ```bash
-wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
+wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
 ```
 You need to prepare an audio file or use the audio demo above, please confirm the sample rate of the audio is 16K. You can get the result of the audio demo by running the script below.
 ```bash
diff --git a/examples/aishell/asr0/local/test_wav.sh b/examples/aishell/asr0/local/test_wav.sh
index a228dda5a..d08039e52 100755
--- a/examples/aishell/asr0/local/test_wav.sh
+++ b/examples/aishell/asr0/local/test_wav.sh
@@ -14,7 +14,7 @@ ckpt_prefix=$3
 audio_file=$4
 
 mkdir -p data
-wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
+wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
 if [ $? -ne 0 ]; then
    exit 1
 fi
diff --git a/examples/aishell/asr1/README.md b/examples/aishell/asr1/README.md
index a7390fd68..5f05b9e3c 100644
--- a/examples/aishell/asr1/README.md
+++ b/examples/aishell/asr1/README.md
@@ -149,7 +149,7 @@ using the `tar` scripts to unpack the model and then you can use the script to t
 
 For example:
 ```
-wget https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_transformer_aishell_ckpt_0.1.1.model.tar.gz
+wget https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr1/asr1_transformer_aishell_ckpt_0.1.1.model.tar.gz
 tar xzvf asr1_transformer_aishell_ckpt_0.1.1.model.tar.gz
 source path.sh
 # If you have process the data and get the manifest file， you can skip the following 2 steps
@@ -195,12 +195,12 @@ In some situations, you want to use the trained model to do the inference for th
 ```
 you can train the model by yourself using ```bash run.sh --stage 0 --stop_stage 3```, or you can download the pretrained model through the script below:
 ```bash
-wget https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_transformer_aishell_ckpt_0.1.1.model.tar.gz
+wget https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr1/asr1_transformer_aishell_ckpt_0.1.1.model.tar.gz
 tar xzvf transformer.model.tar.gz
 ```
 You can download the audio demo:
 ```bash
-wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
+wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
 ```
 You need to prepare an audio file or use the audio demo above, please confirm the sample rate of the audio is 16K. You can get the result by running the script below.
 ```bash
diff --git a/examples/aishell/asr1/local/test_wav.sh b/examples/aishell/asr1/local/test_wav.sh
index d029f2fde..85b6c23ba 100755
--- a/examples/aishell/asr1/local/test_wav.sh
+++ b/examples/aishell/asr1/local/test_wav.sh
@@ -14,7 +14,7 @@ ckpt_prefix=$3
 audio_file=$4
 
 mkdir -p data
-wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
+wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
 if [ $? -ne 0 ]; then
    exit 1
 fi
diff --git a/examples/aishell/asr3/README.md b/examples/aishell/asr3/README.md
index 6b587e12f..1b11134d1 100644
--- a/examples/aishell/asr3/README.md
+++ b/examples/aishell/asr3/README.md
@@ -90,10 +90,10 @@ data/
 |-- test.csv
 ```
 
-Stage 0 also downloads the Chinese pre-trained [wav2vec2](https://paddlespeech.bj.bcebos.com/wav2vec/chinese-wav2vec2-large.pdparams) model.
+Stage 0 also downloads the Chinese pre-trained [wav2vec2](https://paddlespeech.cdn.bcebos.com/wav2vec/chinese-wav2vec2-large.pdparams) model.
 ```bash
 mkdir -p exp/wav2vec2
-wget -P exp/wav2vec2 https://paddlespeech.bj.bcebos.com/wav2vec/chinese-wav2vec2-large.pdparams
+wget -P exp/wav2vec2 https://paddlespeech.cdn.bcebos.com/wav2vec/chinese-wav2vec2-large.pdparams
 ```
 ## Stage 1: Model Training
 If you want to train the model. you can use stage 1 in `run.sh`. The code is shown below. 
@@ -164,7 +164,7 @@ using the `tar` scripts to unpack the model and then you can use the script to t
 
 For example:
 ```bash
-wget https://paddlespeech.bj.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz
+wget https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz
 tar xzvf wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz
 source path.sh
 # If you have process the data and get the manifest file， you can skip the following 2 steps
@@ -185,12 +185,12 @@ In some situations, you want to use the trained model to do the inference for th
 ```
 you can train the model by yourself using ```bash run.sh --stage 0 --stop_stage 3```, or you can download the pretrained model through the script below:
 ```bash
-wget https://paddlespeech.bj.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz
+wget https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz
 tar xzvf wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz
 ```
 You can download the audio demo:
 ```bash
-wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
+wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
 ```
 You need to prepare an audio file or use the audio demo above, please confirm the sample rate of the audio is 16K. You can get the result of the audio demo by running the script below.
 ```bash
diff --git a/examples/aishell/asr3/local/data.sh b/examples/aishell/asr3/local/data.sh
index bd26c1e78..14e02480f 100755
--- a/examples/aishell/asr3/local/data.sh
+++ b/examples/aishell/asr3/local/data.sh
@@ -94,7 +94,7 @@ echo "Aishell data preparation done."
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     mkdir -p exp/wav2vec2
     echo "Pretrained wav2vec2 model download"
-    wget -P exp/wav2vec2 https://paddlespeech.bj.bcebos.com/wav2vec/chinese-wav2vec2-large.pdparams
+    wget -P exp/wav2vec2 https://paddlespeech.cdn.bcebos.com/wav2vec/chinese-wav2vec2-large.pdparams
 fi
 
 exit 0
diff --git a/examples/aishell/asr3/local/test_wav.sh b/examples/aishell/asr3/local/test_wav.sh
index 7ccef6945..e2a7f56f3 100755
--- a/examples/aishell/asr3/local/test_wav.sh
+++ b/examples/aishell/asr3/local/test_wav.sh
@@ -14,7 +14,7 @@ ckpt_prefix=$3
 audio_file=$4
 
 mkdir -p data
-wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
+wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
 if [ $? -ne 0 ]; then
    exit 1
 fi
diff --git a/examples/aishell3/ernie_sat/README.md b/examples/aishell3/ernie_sat/README.md
index bd5964c3a..e26808e95 100644
--- a/examples/aishell3/ernie_sat/README.md
+++ b/examples/aishell3/ernie_sat/README.md
@@ -16,7 +16,7 @@ Download AISHELL-3 from it's [Official Website](http://www.aishelltech.com/aishe
  
 ### Get MFA Result and Extract
 We use [MFA2.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for aishell3_fastspeech2.
-You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
+You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
 
 ## Get Started
 Assume the path to the dataset is `~/datasets/data_aishell3`.
@@ -70,7 +70,7 @@ CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
 ### Synthesizing
 We use [HiFiGAN](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc5) as the neural vocoder.
 
-Download pretrained HiFiGAN model from [hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip) and unzip it.
+Download pretrained HiFiGAN model from [hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip) and unzip it.
 ```bash
 unzip hifigan_aishell3_ckpt_0.2.0.zip
 ```
@@ -101,10 +101,10 @@ ln -snf libpython3.6m.so.1.0 libpython3.6m.so
 cd -
 # download align models and dicts
 cd aligner
-wget https://paddlespeech.bj.bcebos.com/MFA/ernie_sat/aishell3_model.zip
-wget https://paddlespeech.bj.bcebos.com/MFA/AISHELL-3/with_tone/simple.lexicon
-wget https://paddlespeech.bj.bcebos.com/MFA/ernie_sat/vctk_model.zip
-wget https://paddlespeech.bj.bcebos.com/MFA/LJSpeech-1.1/cmudict-0.7b
+wget https://paddlespeech.cdn.bcebos.com/MFA/ernie_sat/aishell3_model.zip
+wget https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/simple.lexicon
+wget https://paddlespeech.cdn.bcebos.com/MFA/ernie_sat/vctk_model.zip
+wget https://paddlespeech.cdn.bcebos.com/MFA/LJSpeech-1.1/cmudict-0.7b
 cd ../../
 ```
 **prepare pretrained FastSpeech2 models**
@@ -113,8 +113,8 @@ ERNIE-SAT use FastSpeech2 as phoneme duration predictor:
 ```bash
 mkdir download
 cd download
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip
 unzip fastspeech2_conformer_baker_ckpt_0.5.zip
 unzip fastspeech2_nosil_ljspeech_ckpt_0.5.zip
 cd ../
@@ -123,13 +123,13 @@ cd ../
 ```bash
 mkdir source
 cd source
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/source/SSB03540307.wav
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/source/SSB03540428.wav
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/source/LJ050-0278.wav
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/source/p243_313.wav
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/source/p299_096.wav
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/source/this_was_not_the_show_for_me.wav
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/source/README.md
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/SSB03540307.wav
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/SSB03540428.wav
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/LJ050-0278.wav
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/p243_313.wav
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/p299_096.wav
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/this_was_not_the_show_for_me.wav
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/README.md
 cd ../
 ```
 
@@ -143,7 +143,7 @@ You can check the text of downloaded wavs in `source/README.md`.
 You can modify `--wav_path`、`--old_str` and `--new_str` yourself, `--old_str`  should be the text corresponding to the audio of  `--wav_path`, `--new_str` should be designed according to `--task_name`, both `--source_lang` and `--target_lang` should be `zh` for model trained with AISHELL3 dataset.
 ## Pretrained Model
 Pretrained ErnieSAT model:
-- [erniesat_aishell3_ckpt_1.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/erniesat_aishell3_ckpt_1.2.0.zip)
+- [erniesat_aishell3_ckpt_1.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/erniesat_aishell3_ckpt_1.2.0.zip)
 
 Model | Step | eval/mlm_loss | eval/loss
 :-------------:| :------------:| :-----: | :-----:
diff --git a/examples/aishell3/tts3/README.md b/examples/aishell3/tts3/README.md
index 8f3f66dac..ee501fe2d 100644
--- a/examples/aishell3/tts3/README.md
+++ b/examples/aishell3/tts3/README.md
@@ -10,7 +10,7 @@ Download AISHELL-3 from it's [Official Website](http://www.aishelltech.com/aishe
  
 ### Get MFA Result and Extract
 We use [MFA2.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for aishell3_fastspeech2.
-You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
+You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
 
 ## Get Started
 Assume the path to the dataset is `~/datasets/data_aishell3`.
@@ -96,7 +96,7 @@ optional arguments:
 
 ### Synthesizing
 We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc1) as the neural vocoder.
-Download the pretrained parallel wavegan model from [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip) and unzip it.
+Download the pretrained parallel wavegan model from [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip) and unzip it.
 ```bash
 unzip pwg_aishell3_ckpt_0.5.zip
 ```
@@ -219,20 +219,20 @@ optional arguments:
 
 ## Pretrained Model
 Pretrained FastSpeech2 model with no silence in the edge of audios:
-- [fastspeech2_aishell3_ckpt_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_1.1.0.zip)
-- [fastspeech2_conformer_aishell3_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_aishell3_ckpt_0.2.0.zip) (Thanks for [@awmmmm](https://github.com/awmmmm)'s contribution)
+- [fastspeech2_aishell3_ckpt_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_1.1.0.zip)
+- [fastspeech2_conformer_aishell3_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_aishell3_ckpt_0.2.0.zip) (Thanks for [@awmmmm](https://github.com/awmmmm)'s contribution)
 
 The static model can be downloaded here:
-- [fastspeech2_aishell3_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_static_1.1.0.zip)
+- [fastspeech2_aishell3_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_static_1.1.0.zip)
 
 The PIR static model can be downloaded here:
-- [fastspeech2_aishell3_static_pir_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_static_pir_1.1.0.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)
+- [fastspeech2_aishell3_static_pir_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_static_pir_1.1.0.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)
 
 The ONNX model can be downloaded here:
-- [fastspeech2_aishell3_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_onnx_1.1.0.zip)
+- [fastspeech2_aishell3_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_onnx_1.1.0.zip)
 
 The Paddle-Lite model can be downloaded here:
-- [fastspeech2_aishell3_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_pdlite_1.3.0.zip)
+- [fastspeech2_aishell3_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_pdlite_1.3.0.zip)
 
 FastSpeech2 checkpoint contains files listed below.
 
diff --git a/examples/aishell3/vc0/README.md b/examples/aishell3/vc0/README.md
index d64f961ad..54fb9a932 100644
--- a/examples/aishell3/vc0/README.md
+++ b/examples/aishell3/vc0/README.md
@@ -10,7 +10,7 @@ Download AISHELL-3 from it's [Official Website](http://www.aishelltech.com/aishe
 
 ### Get MFA Result and Extract
 We use [MFA2.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get phonemes for Tacotron2, the durations of MFA are not needed here.
-You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
+You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
 
 ## Pretrained GE2E Model
 We use pretrained GE2E model to generate speaker embedding for each sentence.
@@ -79,7 +79,7 @@ The training step is very similar to that one of [tts0](https://github.com/Paddl
 
 ### Synthesizing
 We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc1) as the neural vocoder.
-Download pretrained parallel wavegan model from [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip) and unzip it.
+Download pretrained parallel wavegan model from [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip) and unzip it.
 ```bash
 unzip pwg_aishell3_ckpt_0.5.zip
 ```
@@ -111,7 +111,7 @@ CUDA_VISIBLE_DEVICES=${gpus} ./local/voice_cloning.sh ${conf_path} ${train_outpu
 ```
 
 ## Pretrained Model
-- [tacotron2_aishell3_ckpt_vc0_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_aishell3_ckpt_vc0_0.2.0.zip)
+- [tacotron2_aishell3_ckpt_vc0_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_aishell3_ckpt_vc0_0.2.0.zip)
 
 
 Model | Step | eval/loss | eval/l1_loss | eval/mse_loss | eval/bce_loss| eval/attn_loss
diff --git a/examples/aishell3/vc1/README.md b/examples/aishell3/vc1/README.md
index 93e0fd7ec..84d247a1f 100644
--- a/examples/aishell3/vc1/README.md
+++ b/examples/aishell3/vc1/README.md
@@ -10,7 +10,7 @@ Download AISHELL-3 from it's [Official Website](http://www.aishelltech.com/aishe
 
 ### Get MFA Result and Extract
 We use [MFA2.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for aishell3_fastspeech2.
-You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
+You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
 
 ## Pretrained GE2E Model
 We use pretrained GE2E model to generate speaker embedding for each sentence.
@@ -81,7 +81,7 @@ The training step is very similar to that one of [tts3](https://github.com/Paddl
 
 ### Synthesizing
 We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc1) as the neural vocoder.
-Download pretrained parallel wavegan model from [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip) and unzip it.
+Download pretrained parallel wavegan model from [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip) and unzip it.
 ```bash
 unzip pwg_aishell3_ckpt_0.5.zip
 ```
@@ -112,7 +112,7 @@ ref_audio
 CUDA_VISIBLE_DEVICES=${gpus} ./local/voice_cloning.sh ${conf_path} ${train_output_path} ${ckpt_name} ${ge2e_params_path} ${ref_audio_dir}
 ```
 ## Pretrained Model
-- [fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip)
+- [fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip)
 
 Model | Step | eval/loss | eval/l1_loss | eval/duration_loss | eval/pitch_loss| eval/energy_loss 
 :-------------:| :------------:| :-----: | :-----: | :--------: |:--------:|:---------:
diff --git a/examples/aishell3/vc2/README.md b/examples/aishell3/vc2/README.md
index 774823674..11a394394 100644
--- a/examples/aishell3/vc2/README.md
+++ b/examples/aishell3/vc2/README.md
@@ -10,7 +10,7 @@ Download AISHELL-3 from it's [Official Website](http://www.aishelltech.com/aishe
 
 ### Get MFA Result and Extract
 We use [MFA2.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for aishell3_fastspeech2.
-You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
+You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
 
 ## Get Started
 Assume the path to the dataset is `~/datasets/data_aishell3`.
@@ -75,7 +75,7 @@ The training step is very similar to that one of [tts3](https://github.com/Paddl
 
 ### Synthesizing
 We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc1) as the neural vocoder.
-Download pretrained parallel wavegan model from [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip) and unzip it.
+Download pretrained parallel wavegan model from [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip) and unzip it.
 ```bash
 unzip pwg_aishell3_ckpt_0.5.zip
 ```
@@ -106,7 +106,7 @@ ref_audio
 CUDA_VISIBLE_DEVICES=${gpus} ./local/voice_cloning.sh ${conf_path} ${train_output_path} ${ckpt_name} ${ref_audio_dir}
 ```
 ## Pretrained Model
-- [fastspeech2_aishell3_ckpt_vc2_1.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_vc2_1.2.0.zip)
+- [fastspeech2_aishell3_ckpt_vc2_1.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_vc2_1.2.0.zip)
 
 Model | Step | eval/loss | eval/l1_loss | eval/duration_loss | eval/pitch_loss| eval/energy_loss 
 :-------------:| :------------:| :-----: | :-----: | :--------: |:--------:|:---------:
diff --git a/examples/aishell3/vits-vc/README.md b/examples/aishell3/vits-vc/README.md
index 84f874006..2574003ef 100644
--- a/examples/aishell3/vits-vc/README.md
+++ b/examples/aishell3/vits-vc/README.md
@@ -9,7 +9,7 @@ Download AISHELL-3 from it's [Official Website](http://www.aishelltech.com/aishe
 
 ### Get MFA Result and Extract
 We use [MFA2.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get phonemes for VITS, the durations of MFA are not needed here.
-You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
+You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
 
 ## Pretrained GE2E Model
 We use pretrained GE2E model to generate speaker embedding for each sentence.
@@ -137,7 +137,7 @@ CUDA_VISIBLE_DEVICES=${gpus} ./local/voice_cloning.sh ${conf_path} ${train_outpu
 
 The pretrained model can be downloaded here:
 
-- [vits_vc_aishell3_ckpt_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/vits/vits_vc_aishell3_ckpt_1.1.0.zip) (add_blank=true)
+- [vits_vc_aishell3_ckpt_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/vits/vits_vc_aishell3_ckpt_1.1.0.zip) (add_blank=true)
 
 VITS checkpoint contains files listed below.
 (There is no need for `speaker_id_map.txt` here )
diff --git a/examples/aishell3/vits/README.md b/examples/aishell3/vits/README.md
index 8c19e29fd..a6e77eaeb 100644
--- a/examples/aishell3/vits/README.md
+++ b/examples/aishell3/vits/README.md
@@ -10,7 +10,7 @@ Download AISHELL-3 from it's [Official Website](http://www.aishelltech.com/aishe
 
 ### Get MFA Result and Extract
 We use [MFA2.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get phonemes for VITS, the durations of MFA are not needed here.
-You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
+You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
 
 ## Get Started
 Assume the path to the dataset is `~/datasets/data_aishell3`.
@@ -169,7 +169,7 @@ optional arguments:
 
 The pretrained model can be downloaded here:
 
-- [vits_aishell3_ckpt_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/vits/vits_aishell3_ckpt_1.1.0.zip) (add_blank=true)
+- [vits_aishell3_ckpt_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/vits/vits_aishell3_ckpt_1.1.0.zip) (add_blank=true)
 
 VITS checkpoint contains files listed below.
 ```text
diff --git a/examples/aishell3/voc1/README.md b/examples/aishell3/voc1/README.md
index e453c8ae8..1a7923733 100644
--- a/examples/aishell3/voc1/README.md
+++ b/examples/aishell3/voc1/README.md
@@ -8,7 +8,7 @@ Download AISHELL-3 from it's [Official Website](http://www.aishelltech.com/aishe
 
 ### Get MFA Result and Extract
 We use [MFA2.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for aishell3_fastspeech2.
-You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
+You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
 
 ## Get Started
 Assume the path to the dataset is `~/datasets/data_aishell3`.
@@ -131,19 +131,19 @@ optional arguments:
 
 ## Pretrained Models
 Pretrained models can be downloaded here:
-- [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip)
+- [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip)
 
 The static model can be downloaded here:
-- [pwgan_aishell3_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_static_1.1.0.zip)
+- [pwgan_aishell3_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_static_1.1.0.zip)
 
 The PIR static model can be downloaded here:
-- [pwgan_aishell3_static_pir_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_static_pir_1.1.0.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)
+- [pwgan_aishell3_static_pir_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_static_pir_1.1.0.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)
 
 The ONNX model can be downloaded here:
-- [pwgan_aishell3_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_onnx_1.1.0.zip)
+- [pwgan_aishell3_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_onnx_1.1.0.zip)
 
 The Paddle-Lite model can be downloaded here:
-- [pwgan_aishell3_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_pdlite_1.3.0.zip)
+- [pwgan_aishell3_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_pdlite_1.3.0.zip)
 
 Model | Step | eval/generator_loss | eval/log_stft_magnitude_loss:| eval/spectral_convergence_loss 
 :-------------:| :------------:| :-----: | :-----: | :--------:
diff --git a/examples/aishell3/voc5/README.md b/examples/aishell3/voc5/README.md
index 676f56c28..9d57217b3 100644
--- a/examples/aishell3/voc5/README.md
+++ b/examples/aishell3/voc5/README.md
@@ -7,7 +7,7 @@ AISHELL-3 is a large-scale and high-fidelity multi-speaker Mandarin speech corpu
 Download AISHELL-3 from it's [Official Website](http://www.aishelltech.com/aishell_3) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/data_aishell3`.
 ### Get MFA Result and Extract
 We use [MFA2.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for aishell3_fastspeech2.
-You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
+You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
 
 ## Get Started
 Assume the path to the dataset is `~/datasets/data_aishell3`.
@@ -114,19 +114,19 @@ optional arguments:
 5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
 ## Pretrained Models
 The pretrained model can be downloaded here:
-- [hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip)
+- [hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip)
 
 The static model can be downloaded here:
-- [hifigan_aishell3_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_static_1.1.0.zip)
+- [hifigan_aishell3_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_static_1.1.0.zip)
 
 The PIR static model can be downloaded here:
-- [hifigan_aishell3_static_pir_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_static_pir_1.1.0.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)
+- [hifigan_aishell3_static_pir_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_static_pir_1.1.0.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)
 
 The ONNX model can be downloaded here:
-- [hifigan_aishell3_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_onnx_1.1.0.zip)
+- [hifigan_aishell3_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_onnx_1.1.0.zip)
 
 The Paddle-Lite model can be downloaded here:
-- [hifigan_aishell3_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_pdlite_1.3.0.zip)
+- [hifigan_aishell3_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_pdlite_1.3.0.zip)
 
 Model | Step | eval/generator_loss | eval/mel_loss| eval/feature_matching_loss
 :-------------:| :------------:| :-----: | :-----: | :--------:
diff --git a/examples/aishell3_vctk/ernie_sat/README.md b/examples/aishell3_vctk/ernie_sat/README.md
index fbf9244d1..57dcd472f 100644
--- a/examples/aishell3_vctk/ernie_sat/README.md
+++ b/examples/aishell3_vctk/ernie_sat/README.md
@@ -19,8 +19,8 @@ Download all datasets and extract it to `~/datasets`:
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for the fastspeech2 training.
 You can download from here:
-- [aishell3_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz) 
-- [vctk_alignment.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz)
+- [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz) 
+- [vctk_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz)
 
 Or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
 
@@ -82,7 +82,7 @@ CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
 ### Synthesizing
 We use [HiFiGAN](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc5) as the neural vocoder.
 
-Download pretrained HiFiGAN model from [hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip) and unzip it.
+Download pretrained HiFiGAN model from [hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip) and unzip it.
 ```bash
 unzip hifigan_aishell3_ckpt_0.2.0.zip
 ```
@@ -114,10 +114,10 @@ ln -snf libpython3.6m.so.1.0 libpython3.6m.so
 cd -
 # download align models and dicts
 cd aligner
-wget https://paddlespeech.bj.bcebos.com/MFA/ernie_sat/aishell3_model.zip
-wget https://paddlespeech.bj.bcebos.com/MFA/AISHELL-3/with_tone/simple.lexicon
-wget https://paddlespeech.bj.bcebos.com/MFA/ernie_sat/vctk_model.zip
-wget https://paddlespeech.bj.bcebos.com/MFA/LJSpeech-1.1/cmudict-0.7b
+wget https://paddlespeech.cdn.bcebos.com/MFA/ernie_sat/aishell3_model.zip
+wget https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/simple.lexicon
+wget https://paddlespeech.cdn.bcebos.com/MFA/ernie_sat/vctk_model.zip
+wget https://paddlespeech.cdn.bcebos.com/MFA/LJSpeech-1.1/cmudict-0.7b
 cd ../../
 ```
 **prepare pretrained FastSpeech2 models**
@@ -126,8 +126,8 @@ ERNIE-SAT use FastSpeech2 as phoneme duration predictor:
 ```bash
 mkdir download
 cd download
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip
 unzip fastspeech2_conformer_baker_ckpt_0.5.zip
 unzip fastspeech2_nosil_ljspeech_ckpt_0.5.zip
 cd ../
@@ -136,13 +136,13 @@ cd ../
 ```bash
 mkdir source
 cd source
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/source/SSB03540307.wav
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/source/SSB03540428.wav
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/source/LJ050-0278.wav
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/source/p243_313.wav
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/source/p299_096.wav
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/source/this_was_not_the_show_for_me.wav
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/source/README.md
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/SSB03540307.wav
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/SSB03540428.wav
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/LJ050-0278.wav
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/p243_313.wav
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/p299_096.wav
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/this_was_not_the_show_for_me.wav
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/README.md
 cd ../
 ```
 You can check the text of downloaded wavs in `source/README.md`.
@@ -155,7 +155,7 @@ You can check the text of downloaded wavs in `source/README.md`.
 You can modify  `--wav_path`、`--old_str` and `--new_str` yourself, `--old_str` should be the text corresponding to the audio of  `--wav_path`, `--new_str` should be designed according to `--task_name`, `--source_lang` and `--target_lang` should be different in this example.
 ## Pretrained Model
 Pretrained ErnieSAT model:
-- [erniesat_aishell3_vctk_ckpt_1.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/erniesat_aishell3_vctk_ckpt_1.2.0.zip)
+- [erniesat_aishell3_vctk_ckpt_1.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/erniesat_aishell3_vctk_ckpt_1.2.0.zip)
 
 Model | Step | eval/text_mlm_loss | eval/mlm_loss | eval/loss
 :-------------:| :------------:| :-----: | :-----:| :-----:
diff --git a/examples/ami/sd0/run.sh b/examples/ami/sd0/run.sh
index 1fcec269d..c6afae682 100644
--- a/examples/ami/sd0/run.sh
+++ b/examples/ami/sd0/run.sh
@@ -19,7 +19,7 @@ device=gpu
 
 if [ $stage -le 1 ]; then
     # Download the pretrained model
-    wget https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_1.tar.gz
+    wget https://paddlespeech.cdn.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_1.tar.gz
     mkdir -p ${save_folder} && tar -xvf sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_1.tar.gz -C ${save_folder}
     rm -rf sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_1.tar.gz
     echo "download the pretrained ECAPA-TDNN Model to path: "${pretraind_model_dir}
diff --git a/examples/canton/tts3/README.md b/examples/canton/tts3/README.md
index d2c46f642..fccd669b8 100644
--- a/examples/canton/tts3/README.md
+++ b/examples/canton/tts3/README.md
@@ -27,7 +27,7 @@ After that, it should be look like:
 ### Get MFA Result and Extract
 We use [MFA1.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for canton_fastspeech2.
 You can train your MFA model reference to [canton_mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
-We here provide the MFA results of these two datasets. [canton_alignment.zip](https://paddlespeech.bj.bcebos.com/MFA/Canton/canton_alignment.zip)
+We here provide the MFA results of these two datasets. [canton_alignment.zip](https://paddlespeech.cdn.bcebos.com/MFA/Canton/canton_alignment.zip)
 
 ## Get Started
 Assume the path to the Cantonese MFA result of the two datsets mentioned above is `./canton_alignment`.
@@ -77,13 +77,13 @@ Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file tha
 
 ## Pretrained Model
 Pretrained FastSpeech2 model with no silence in the edge of audios:
-- [fastspeech2_canton_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_ckpt_1.4.0.zip)
+- [fastspeech2_canton_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_ckpt_1.4.0.zip)
 
 The static model can be downloaded here:
-- [fastspeech2_canton_static_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_static_1.4.0.zip)
+- [fastspeech2_canton_static_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_static_1.4.0.zip)
 
 The ONNX model can be downloaded here:  
-- [fastspeech2_canton_onnx_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_onnx_1.4.0.zip)
+- [fastspeech2_canton_onnx_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_onnx_1.4.0.zip)
 
 FastSpeech2 checkpoint contains files listed below.
 
@@ -98,7 +98,7 @@ fastspeech2_canton_ckpt_1.4.0
 └── speech_stats.npy        # statistics used to normalize spectrogram when training fastspeech2
 ```
 We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc1) as the neural vocoder.
-Download the pretrained parallel wavegan model from [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip) and unzip it.
+Download the pretrained parallel wavegan model from [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip) and unzip it.
 ```bash
 unzip pwg_aishell3_ckpt_0.5.zip
 ```
diff --git a/examples/csmsc/jets/README.md b/examples/csmsc/jets/README.md
index 20314cec0..eb96e5f9f 100644
--- a/examples/csmsc/jets/README.md
+++ b/examples/csmsc/jets/README.md
@@ -18,7 +18,7 @@ The structure of the folder is listed below.
 
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get phonemes and durations for JETS.
-You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
+You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
 
 ## Get Started
 Assume the path to the dataset is `~/datasets/BZNSYP`.
@@ -112,8 +112,8 @@ CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_outp
 
 The pretrained model can be downloaded here:
 
-- [jets_csmsc_ckpt_1.5.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/jets_csmsc_ckpt_1.5.0.zip)
+- [jets_csmsc_ckpt_1.5.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/jets_csmsc_ckpt_1.5.0.zip)
 
 The static model can be downloaded here:
 
-- [jets_csmsc_static_1.5.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/jets_csmsc_static_1.5.0.zip)
+- [jets_csmsc_static_1.5.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/jets_csmsc_static_1.5.0.zip)
diff --git a/examples/csmsc/tts0/README.md b/examples/csmsc/tts0/README.md
index ce682495e..374270713 100644
--- a/examples/csmsc/tts0/README.md
+++ b/examples/csmsc/tts0/README.md
@@ -7,7 +7,7 @@ Download CSMSC from it's [Official Website](https://test.data-baker.com/data/ind
 
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get phonemes for Tacotron2, the durations of MFA are not needed here.
-You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
+You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
 
 ## Get Started
 Assume the path to the dataset is `~/datasets/BZNSYP`.
@@ -86,7 +86,7 @@ optional arguments:
 
 ### Synthesizing
 We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc1) as the neural vocoder.
-Download pretrained parallel wavegan model from [pwg_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip) and unzip it.
+Download pretrained parallel wavegan model from [pwg_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip) and unzip it.
 ```bash
 unzip pwg_baker_ckpt_0.4.zip
 ```
@@ -208,10 +208,10 @@ optional arguments:
 
 ## Pretrained Model
 Pretrained Tacotron2 model with no silence in the edge of audios:
-- [tacotron2_csmsc_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_csmsc_ckpt_0.2.0.zip)
+- [tacotron2_csmsc_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_csmsc_ckpt_0.2.0.zip)
 
 The static model can be downloaded here:
-- [tacotron2_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_csmsc_static_0.2.0.zip)
+- [tacotron2_csmsc_static_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_csmsc_static_0.2.0.zip)
 
 
 Model | Step | eval/loss | eval/l1_loss | eval/mse_loss | eval/bce_loss| eval/attn_loss 
diff --git a/examples/csmsc/tts2/README.md b/examples/csmsc/tts2/README.md
index 7f7cdde0e..478ae4127 100644
--- a/examples/csmsc/tts2/README.md
+++ b/examples/csmsc/tts2/README.md
@@ -18,7 +18,7 @@ The structure of the folder is listed below.
 
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for SPEEDYSPEECH.
-You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to  [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
+You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to  [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
 
 ## Get Started
 Assume the path to the dataset is `~/datasets/BZNSYP`.
@@ -103,7 +103,7 @@ optional arguments:
 
 ### Synthesizing
 We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc1) as the neural vocoder.
-Download pretrained parallel wavegan model from [pwg_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip) and unzip it.
+Download pretrained parallel wavegan model from [pwg_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip) and unzip it.
 ```bash
 unzip pwg_baker_ckpt_0.4.zip
 ```
@@ -231,18 +231,18 @@ CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path}
 
 ## Pretrained Model
 Pretrained SpeedySpeech model with no silence in the edge of audios:
-- [speedyspeech_nosil_baker_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_nosil_baker_ckpt_0.5.zip)
-- [speedyspeech_csmsc_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_ckpt_0.2.0.zip)
+- [speedyspeech_nosil_baker_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_nosil_baker_ckpt_0.5.zip)
+- [speedyspeech_csmsc_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_ckpt_0.2.0.zip)
 
 The static model can be downloaded here:
-- [speedyspeech_nosil_baker_static_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_nosil_baker_static_0.5.zip)
-- [speedyspeech_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_static_0.2.0.zip)
+- [speedyspeech_nosil_baker_static_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_nosil_baker_static_0.5.zip)
+- [speedyspeech_csmsc_static_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_static_0.2.0.zip)
 
 The ONNX model can be downloaded here:
-- [speedyspeech_csmsc_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_onnx_0.2.0.zip)
+- [speedyspeech_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_onnx_0.2.0.zip)
 
 The Paddle-Lite model can be downloaded here:
-- [speedyspeech_csmsc_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_pdlite_1.3.0.zip)
+- [speedyspeech_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_pdlite_1.3.0.zip)
 
 
 Model | Step | eval/loss | eval/l1_loss | eval/duration_loss | eval/ssim_loss
diff --git a/examples/csmsc/tts3/README.md b/examples/csmsc/tts3/README.md
index 5a0975376..dc55fac93 100644
--- a/examples/csmsc/tts3/README.md
+++ b/examples/csmsc/tts3/README.md
@@ -8,7 +8,7 @@ Download CSMSC from it's [Official Website](https://test.data-baker.com/data/ind
 
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for fastspeech2.
-You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
+You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
 
 ## Get Started
 Assume the path to the dataset is `~/datasets/BZNSYP`.
@@ -94,7 +94,7 @@ optional arguments:
 
 ### Synthesizing
 We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc1) as the neural vocoder.
-Download pretrained parallel wavegan model from [pwg_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip) and unzip it.
+Download pretrained parallel wavegan model from [pwg_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip) and unzip it.
 ```bash
 unzip pwg_baker_ckpt_0.4.zip
 ```
@@ -223,26 +223,26 @@ CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path}
 
 ## Pretrained Model
 Pretrained FastSpeech2 model with no silence in the edge of audios:
-- [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)
-- [fastspeech2_conformer_baker_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip)
-- [fastspeech2_cnndecoder_csmsc_ckpt_1.0.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_ckpt_1.0.0.zip)
+- [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)
+- [fastspeech2_conformer_baker_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip)
+- [fastspeech2_cnndecoder_csmsc_ckpt_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_ckpt_1.0.0.zip)
 
 The static model can be downloaded here:
-- [fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip)
-- [fastspeech2_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_static_0.2.0.zip)
-- [fastspeech2_cnndecoder_csmsc_static_1.0.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_static_1.0.0.zip)
-- [fastspeech2_cnndecoder_csmsc_streaming_static_1.0.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_static_1.0.0.zip)
+- [fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip)
+- [fastspeech2_csmsc_static_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_static_0.2.0.zip)
+- [fastspeech2_cnndecoder_csmsc_static_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_static_1.0.0.zip)
+- [fastspeech2_cnndecoder_csmsc_streaming_static_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_static_1.0.0.zip)
 
 The ONNX model can be downloaded here:
-- [fastspeech2_csmsc_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip)
-- [fastspeech2_cnndecoder_csmsc_onnx_1.0.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_onnx_1.0.0.zip)
-- [fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip)
+- [fastspeech2_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip)
+- [fastspeech2_cnndecoder_csmsc_onnx_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_onnx_1.0.0.zip)
+- [fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip)
 
 The Paddle-Lite model can be downloaded here:
 > please compile develop version of Paddle-Lite to export and run TTS models, cause TTS models are supported by https://github.com/PaddlePaddle/Paddle-Lite/pull/9587 and https://github.com/PaddlePaddle/Paddle-Lite/pull/9706
-- [fastspeech2_csmsc_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_pdlite_1.3.0.zip)
-- [fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip)
-- [fastspeech2_cnndecoder_csmsc_streaming_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_pdlite_1.3.0.zip)
+- [fastspeech2_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_pdlite_1.3.0.zip)
+- [fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip)
+- [fastspeech2_cnndecoder_csmsc_streaming_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_pdlite_1.3.0.zip)
 
 Model | Step | eval/loss | eval/l1_loss | eval/duration_loss | eval/pitch_loss| eval/energy_loss 
 :-------------:| :------------:| :-----: | :-----: | :--------: |:--------:|:---------:
diff --git a/examples/csmsc/tts3/README_cn.md b/examples/csmsc/tts3/README_cn.md
index 3f2783a97..c6eee69d0 100644
--- a/examples/csmsc/tts3/README_cn.md
+++ b/examples/csmsc/tts3/README_cn.md
@@ -9,7 +9,7 @@
 
 ### 获取MFA结果并解压
 我们使用 [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) 去获得 fastspeech2 的音素持续时间。
-你们可以从这里下载 [baker_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), 或参考 [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) 训练你自己的模型。
+你们可以从这里下载 [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), 或参考 [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) 训练你自己的模型。
 
 ## 开始
 假设数据集的路径是 `~/datasets/BZNSYP`.
@@ -98,7 +98,7 @@ optional arguments:
 
 ### 合成
 我们使用 [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc1) 作为神经声码器（vocoder）。
-从 [pwg_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip) 下载预训练的 parallel wavegan 模型并将其解压。
+从 [pwg_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip) 下载预训练的 parallel wavegan 模型并将其解压。
 
 ```bash
 unzip pwg_baker_ckpt_0.4.zip
@@ -230,10 +230,10 @@ CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path}
 
 ## 预训练模型
 预先训练的 FastSpeech2 模型，在音频边缘没有空白音频：
-- [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)
-- [fastspeech2_conformer_baker_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip)
+- [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)
+- [fastspeech2_conformer_baker_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip)
 
-静态模型可以在这里下载 [fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip).
+静态模型可以在这里下载 [fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip).
 
 Model | Step | eval/loss | eval/l1_loss | eval/duration_loss | eval/pitch_loss| eval/energy_loss 
 :-------------:| :------------:| :-----: | :-----: | :--------: |:--------:|:---------:
diff --git a/examples/csmsc/tts3_rhy/README.md b/examples/csmsc/tts3_rhy/README.md
index 855aa885c..20ae29502 100644
--- a/examples/csmsc/tts3_rhy/README.md
+++ b/examples/csmsc/tts3_rhy/README.md
@@ -7,7 +7,7 @@ Download CSMSC from it's [Official Website](https://test.data-baker.com/data/ind
 
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for fastspeech2.
-You can directly download the rhythm version of MFA result from here [baker_alignment_tone.zip](https://paddlespeech.bj.bcebos.com/Rhy_e2e/baker_alignment_tone.zip), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
+You can directly download the rhythm version of MFA result from here [baker_alignment_tone.zip](https://paddlespeech.cdn.bcebos.com/Rhy_e2e/baker_alignment_tone.zip), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
 Remember in our repo, you should add `--rhy-with-duration` flag to obtain the rhythm information.
 
 ## Get Started
@@ -59,7 +59,7 @@ Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file tha
 
 ## Pretrained Model
 Pretrained FastSpeech2 model for end-to-end rhythm version:
-- [fastspeech2_rhy_csmsc_ckpt_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_rhy_csmsc_ckpt_1.3.0.zip)
+- [fastspeech2_rhy_csmsc_ckpt_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_rhy_csmsc_ckpt_1.3.0.zip)
 
 This FastSpeech2 checkpoint contains files listed below.
 ```text
diff --git a/examples/csmsc/vits/README.md b/examples/csmsc/vits/README.md
index 83871277b..24e152f4e 100644
--- a/examples/csmsc/vits/README.md
+++ b/examples/csmsc/vits/README.md
@@ -7,7 +7,7 @@ Download CSMSC from it's [Official Website](https://test.data-baker.com/data/ind
 
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get phonemes for VITS, the durations of MFA are not needed here.
-You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
+You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
 
 ## Get Started
 Assume the path to the dataset is `~/datasets/BZNSYP`.
@@ -147,7 +147,7 @@ optional arguments:
 
 The pretrained model can be downloaded here:
 
-- [vits_csmsc_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/vits/vits_csmsc_ckpt_1.4.0.zip) (add_blank=true)
+- [vits_csmsc_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/vits/vits_csmsc_ckpt_1.4.0.zip) (add_blank=true)
 
 VITS checkpoint contains files listed below.
 ```text
diff --git a/examples/csmsc/voc1/README.md b/examples/csmsc/voc1/README.md
index 30102cd9e..5aaa3942a 100644
--- a/examples/csmsc/voc1/README.md
+++ b/examples/csmsc/voc1/README.md
@@ -18,7 +18,7 @@ This experiment only uses *.wav files from the Wave file
 
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) results to cut silence at the edge of audio.
-You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to  [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
+You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to  [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
 
 ## Get Started
 Assume the path to the dataset is `~/datasets/BZNSYP`.
@@ -108,7 +108,7 @@ benchmark:
 
 ### Synthesizing
 We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc1) as the neural vocoder.
-Download pretrained parallel wavegan model from [pwg_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip) and unzip it.
+Download pretrained parallel wavegan model from [pwg_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip) and unzip it.
 ```bash
 unzip pwg_baker_ckpt_0.4.zip
 ```
@@ -152,7 +152,7 @@ optional arguments:
 5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
 
 We use [Fastspeech2](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3) as the acoustic model.
-Download pretrained fastspeech2_nosil model from [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)and unzip it.
+Download pretrained fastspeech2_nosil model from [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)and unzip it.
 ```bash
 unzip fastspeech2_nosil_baker_ckpt_0.4.zip
 ```
@@ -228,20 +228,20 @@ optional arguments:
 
 ## Pretrained Models
 The pretrained model can be downloaded here:
-- [pwg_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip)
-- [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)
+- [pwg_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip)
+- [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)
 
 The static model can be downloaded here:
-- [pwg_baker_static_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_static_0.4.zip)
-- [fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip)
+- [pwg_baker_static_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_static_0.4.zip)
+- [fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip)
 
 The ONNX model can be downloaded here:
-- [pwgan_csmsc_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_csmsc_onnx_0.2.0.zip)
-- [fastspeech2_csmsc_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip)
+- [pwgan_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_csmsc_onnx_0.2.0.zip)
+- [fastspeech2_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip)
 
 The Paddle-Lite model can be downloaded here:
-- [pwgan_csmsc_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_csmsc_pdlite_1.3.0.zip)
-- [fastspeech2_csmsc_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_pdlite_1.3.0.zip)
+- [pwgan_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_csmsc_pdlite_1.3.0.zip)
+- [fastspeech2_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_pdlite_1.3.0.zip)
 
 Model | Step | eval/generator_loss | eval/log_stft_magnitude_loss| eval/spectral_convergence_loss
 :-------------:| :------------:| :-----: | :-----: | :--------:
diff --git a/examples/csmsc/voc3/README.md b/examples/csmsc/voc3/README.md
index 25da4c31e..a7807c727 100644
--- a/examples/csmsc/voc3/README.md
+++ b/examples/csmsc/voc3/README.md
@@ -6,7 +6,7 @@ Download CSMSC from it's [official website](https://test.data-baker.com/data/ind
 
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) results to cut the silence in the edge of audio.
-You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
+You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
 
 ## Get Started
 Assume the path to the dataset is `~/datasets/BZNSYP`.
@@ -82,7 +82,7 @@ optional arguments:
 ### Synthesizing
 We use [MultiBand MelGAN](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc3) as the neural vocoder.
 
-Download pretrained MultiBand MelGAN model from [mb_melgan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_ckpt_0.1.1.zip) and unzip it.
+Download pretrained MultiBand MelGAN model from [mb_melgan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_ckpt_0.1.1.zip) and unzip it.
 ```bash
 unzip mb_melgan_csmsc_ckpt_0.1.1.zip
 ```
@@ -126,7 +126,7 @@ optional arguments:
 5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
 
 We use [Fastspeech2](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3) as the acoustic model.
-Download pretrained fastspeech2_nosil model from [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)and unzip it.
+Download pretrained fastspeech2_nosil model from [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)and unzip it.
 ```bash
 unzip fastspeech2_nosil_baker_ckpt_0.4.zip
 ```
@@ -204,7 +204,7 @@ The length of mel-spectrograms should align with the length of wavs, so we shoul
 
 But since we are fine-tuning, we should use the statistics computed during the training step.
 
-You should first download pretrained `FastSpeech2` model from [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip) and `unzip` it.
+You should first download pretrained `FastSpeech2` model from [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip) and `unzip` it.
 
 Assume the path to the dump-dir of training step is `dump`.
 Assume the path to the duration result of CSMSC is `durations.txt` (generated during the training step's preprocessing).
@@ -239,26 +239,26 @@ The hyperparameter of `finetune.yaml` is not good enough, a smaller `learning_ra
 
 ## Pretrained Models
 The pretrained model can be downloaded here:
-- [mb_melgan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_ckpt_0.1.1.zip)
-- [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)
+- [mb_melgan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_ckpt_0.1.1.zip)
+- [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)
 
 The finetuned model can be downloaded here:
-- [mb_melgan_baker_finetune_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_baker_finetune_ckpt_0.5.zip)
+- [mb_melgan_baker_finetune_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_baker_finetune_ckpt_0.5.zip)
 
 The static model can be downloaded here:
-- [mb_melgan_csmsc_static_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_static_0.1.1.zip)
-- [fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip)
+- [mb_melgan_csmsc_static_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_static_0.1.1.zip)
+- [fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip)
 
 The PIR static model can be downloaded here:
-- [mb_melgan_csmsc_static_pir_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_static_pir_0.1.1.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)
+- [mb_melgan_csmsc_static_pir_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_static_pir_0.1.1.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)
 
 The ONNX model can be downloaded here:
-- [mb_melgan_csmsc_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_onnx_0.2.0.zip)
-- [fastspeech2_csmsc_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip)
+- [mb_melgan_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_onnx_0.2.0.zip)
+- [fastspeech2_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip)
 
 The Paddle-Lite model can be downloaded here:
-- [mb_melgan_csmsc_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_pdlite_1.3.0.zip)
-- [fastspeech2_csmsc_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_pdlite_1.3.0.zip)
+- [mb_melgan_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_pdlite_1.3.0.zip)
+- [fastspeech2_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_pdlite_1.3.0.zip)
 
 Model | Step | eval/generator_loss | eval/log_stft_magnitude_loss|eval/spectral_convergence_loss |eval/sub_log_stft_magnitude_loss|eval/sub_spectral_convergence_loss
 :-------------:| :------------:| :-----: | :-----: | :--------:| :--------:| :--------:
diff --git a/examples/csmsc/voc4/README.md b/examples/csmsc/voc4/README.md
index f1a132a84..10dd3bd9a 100644
--- a/examples/csmsc/voc4/README.md
+++ b/examples/csmsc/voc4/README.md
@@ -6,7 +6,7 @@ Download CSMSC from it's [official website](https://test.data-baker.com/data/ind
 
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) results to cut the silence in the edge of audio.
-You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
+You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
 
 ## Get Started
 Assume the path to the dataset is `~/datasets/BZNSYP`.
@@ -113,7 +113,7 @@ optional arguments:
 
 ## Pretrained Models
 The pretrained model can be downloaded here:
-- [style_melgan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/style_melgan/style_melgan_csmsc_ckpt_0.1.1.zip)
+- [style_melgan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/style_melgan/style_melgan_csmsc_ckpt_0.1.1.zip)
 
 The static model of Style MelGAN is not available now.
 
diff --git a/examples/csmsc/voc5/README.md b/examples/csmsc/voc5/README.md
index f2edb3a65..11894e56d 100644
--- a/examples/csmsc/voc5/README.md
+++ b/examples/csmsc/voc5/README.md
@@ -17,7 +17,7 @@ The structure of the folder is listed below.
 
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) results to cut silence at the edge of audio.
-You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
+You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
 
 ## Get Started
 Assume the path to the dataset is `~/datasets/BZNSYP`.
@@ -93,7 +93,7 @@ optional arguments:
 ### Synthesizing
 We use [HiFiGAN](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc5) as the neural vocoder.
 
-Download pretrained HiFiGAN model from [hifigan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_ckpt_0.1.1.zip) and unzip it.
+Download pretrained HiFiGAN model from [hifigan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_ckpt_0.1.1.zip) and unzip it.
 ```bash
 unzip hifigan_csmsc_ckpt_0.1.1.zip
 ```
@@ -137,7 +137,7 @@ optional arguments:
 5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
 
 We use [Fastspeech2](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3) as the acoustic model.
-Download pretrained fastspeech2_nosil model from [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)and unzip it.
+Download pretrained fastspeech2_nosil model from [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)and unzip it.
 ```bash
 unzip fastspeech2_nosil_baker_ckpt_0.4.zip
 ```
@@ -210,23 +210,23 @@ optional arguments:
 
 ## Pretrained Models
 The pretrained model can be downloaded here:
-- [hifigan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_ckpt_0.1.1.zip)
-- [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)
+- [hifigan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_ckpt_0.1.1.zip)
+- [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)
 
 The static model can be downloaded here:
-- [hifigan_csmsc_static_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_static_0.1.1.zip)
-- [fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip)
+- [hifigan_csmsc_static_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_static_0.1.1.zip)
+- [fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip)
 
 The PIR static model can be downloaded here:
-- [hifigan_csmsc_static_pir_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_static_pir_0.1.1.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)
+- [hifigan_csmsc_static_pir_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_static_pir_0.1.1.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)
 
 The ONNX model can be downloaded here:
-- [hifigan_csmsc_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_onnx_0.2.0.zip)
-- [fastspeech2_csmsc_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip)
+- [hifigan_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_onnx_0.2.0.zip)
+- [fastspeech2_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip)
 
 The Paddle-Lite model can be downloaded here:
-- [hifigan_csmsc_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_pdlite_1.3.0.zip)
-- [fastspeech2_csmsc_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_pdlite_1.3.0.zip)
+- [hifigan_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_pdlite_1.3.0.zip)
+- [fastspeech2_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_pdlite_1.3.0.zip)
 
 Model | Step | eval/generator_loss | eval/mel_loss| eval/feature_matching_loss
 :-------------:| :------------:| :-----: | :-----: | :--------:
diff --git a/examples/csmsc/voc5/iSTFTNet.md b/examples/csmsc/voc5/iSTFTNet.md
index 693950c54..652221709 100644
--- a/examples/csmsc/voc5/iSTFTNet.md
+++ b/examples/csmsc/voc5/iSTFTNet.md
@@ -19,7 +19,7 @@ The structure of the folder is listed below.
 
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) results to cut silence at the edge of audio.
-You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
+You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
 
 ## Get Started
 Assume the path to the dataset is `~/datasets/BZNSYP`.
diff --git a/examples/csmsc/voc6/README.md b/examples/csmsc/voc6/README.md
index b48c36414..42a69a30d 100644
--- a/examples/csmsc/voc6/README.md
+++ b/examples/csmsc/voc6/README.md
@@ -6,7 +6,7 @@ Download CSMSC from it's [official website](https://test.data-baker.com/data/ind
 
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) results to cut silence at the edge of audio.
-You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to  [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
+You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to  [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
 
 ## Get Started
 Assume the path to the dataset is `~/datasets/BZNSYP`.
@@ -110,11 +110,11 @@ optional arguments:
 
 ## Pretrained Models
 The pretrained model can be downloaded here:
-- [wavernn_csmsc_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_ckpt_0.2.0.zip)
+- [wavernn_csmsc_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_ckpt_0.2.0.zip)
 
 The static model can be downloaded here:
-- [wavernn_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_static_0.2.0.zip)
-- [wavernn_csmsc_static_1.0.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_static_1.0.0.zip) (fix bug for paddle 2.3)
+- [wavernn_csmsc_static_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_static_0.2.0.zip)
+- [wavernn_csmsc_static_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_static_1.0.0.zip) (fix bug for paddle 2.3)
 
 Model | Step | eval/loss
 :-------------:|:------------:| :------------:
diff --git a/examples/hey_snips/README.md b/examples/hey_snips/README.md
index 6311ad928..4ff55c9fb 100644
--- a/examples/hey_snips/README.md
+++ b/examples/hey_snips/README.md
@@ -2,7 +2,7 @@
 ## Metrics
 
 We mesure FRRs with fixing false alarms in one hour:
-the release model: https://paddlespeech.bj.bcebos.com/kws/heysnips/kws0_mdtc_heysnips_ckpt.tar.gz 
+the release model: https://paddlespeech.cdn.bcebos.com/kws/heysnips/kws0_mdtc_heysnips_ckpt.tar.gz 
 |Model|False Alarm| False Reject Rate|
 |--|--|--|
 |MDTC| 1| 0.003559 |
diff --git a/examples/iwslt2012/punc0/README.md b/examples/iwslt2012/punc0/README.md
index 9b9f3a914..f4d931598 100644
--- a/examples/iwslt2012/punc0/README.md
+++ b/examples/iwslt2012/punc0/README.md
@@ -20,19 +20,19 @@
 ## Pretrained Model
 The pretrained model can be downloaded here:
 
-[ernie_linear_p3_iwslt2012_zh_ckpt_0.1.1.zip](https://paddlespeech.bj.bcebos.com/text/ernie_linear_p3_iwslt2012_zh_ckpt_0.1.1.zip)
+[ernie_linear_p3_iwslt2012_zh_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/text/ernie_linear_p3_iwslt2012_zh_ckpt_0.1.1.zip)
 
-[ernie-3.0-base.tar.gz](https://paddlespeech.bj.bcebos.com/punc_restore/ernie-3.0-base.tar.gz)
+[ernie-3.0-base.tar.gz](https://paddlespeech.cdn.bcebos.com/punc_restore/ernie-3.0-base.tar.gz)
 
-[ernie-3.0-medium.tar.gz](https://paddlespeech.bj.bcebos.com/punc_restore/ernie-3.0-medium.tar.gz)
+[ernie-3.0-medium.tar.gz](https://paddlespeech.cdn.bcebos.com/punc_restore/ernie-3.0-medium.tar.gz)
 
-[ernie-3.0-micro.tar.gz](https://paddlespeech.bj.bcebos.com/punc_restore/ernie-3.0-micro.tar.gz)
+[ernie-3.0-micro.tar.gz](https://paddlespeech.cdn.bcebos.com/punc_restore/ernie-3.0-micro.tar.gz)
 
-[ernie-mini.tar.gz](https://paddlespeech.bj.bcebos.com/punc_restore/ernie-mini.tar.gz)
+[ernie-mini.tar.gz](https://paddlespeech.cdn.bcebos.com/punc_restore/ernie-mini.tar.gz)
 
-[ernie-nano.tar.gz](https://paddlespeech.bj.bcebos.com/punc_restore/ernie-nano.tar.gz)
+[ernie-nano.tar.gz](https://paddlespeech.cdn.bcebos.com/punc_restore/ernie-nano.tar.gz)
 
-[ernie-tiny.tar.gz](https://paddlespeech.bj.bcebos.com/punc_restore/ernie-tiny.tar.gz)
+[ernie-tiny.tar.gz](https://paddlespeech.cdn.bcebos.com/punc_restore/ernie-tiny.tar.gz)
 
 ### Test Result
 - Ernie 1.0
diff --git a/examples/iwslt2012/punc0/local/data.sh b/examples/iwslt2012/punc0/local/data.sh
index 271df0e33..fab9d1cc3 100755
--- a/examples/iwslt2012/punc0/local/data.sh
+++ b/examples/iwslt2012/punc0/local/data.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 if [ ! -d data ]; then
-    wget -c https://paddlespeech.bj.bcebos.com/datasets/iwslt2012.tar.gz
+    wget -c https://paddlespeech.cdn.bcebos.com/datasets/iwslt2012.tar.gz
     tar -xzf iwslt2012.tar.gz
 fi
 
diff --git a/examples/librispeech/asr0/README.md b/examples/librispeech/asr0/README.md
index a097dd99f..40b65ebdc 100644
--- a/examples/librispeech/asr0/README.md
+++ b/examples/librispeech/asr0/README.md
@@ -181,7 +181,7 @@ if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
 ```
 You can download the audio demo:
 ```bash
-wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
+wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
 ```
 You can train a model by yourself, then you need to prepare an audio file or use the audio demo above, please confirm the sample rate of the audio is 16K. You can get the result of the audio demo by running the script below.
 ```bash
diff --git a/examples/librispeech/asr0/local/test_wav.sh b/examples/librispeech/asr0/local/test_wav.sh
index a5712b608..235a0e888 100755
--- a/examples/librispeech/asr0/local/test_wav.sh
+++ b/examples/librispeech/asr0/local/test_wav.sh
@@ -14,7 +14,7 @@ ckpt_prefix=$3
 audio_file=$4
 
 mkdir -p data
-wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
+wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
 if [ $? -ne 0 ]; then
    exit 1
 fi
diff --git a/examples/librispeech/asr1/README.md b/examples/librispeech/asr1/README.md
index 1b02698c7..479dae09e 100644
--- a/examples/librispeech/asr1/README.md
+++ b/examples/librispeech/asr1/README.md
@@ -157,7 +157,7 @@ using the `tar` scripts to unpack the model and then you can use the script to t
 
 For example:
 ```bash
-wget https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr1/asr1_conformer_librispeech_ckpt_0.1.1.model.tar.gz
+wget https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr1/asr1_conformer_librispeech_ckpt_0.1.1.model.tar.gz
 tar xzvf asr1_conformer_librispeech_ckpt_0.1.1.model.tar.gz
 source path.sh
 # If you have process the data and get the manifest file， you can skip the following 2 steps
@@ -205,12 +205,12 @@ In some situations, you want to use the trained model to do the inference for th
 ```
 you can train the model by yourself using ```bash run.sh --stage 0 --stop_stage 3```, or you can download the pretrained model through the script below:
 ```bash
-wget https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr1/asr1_conformer_librispeech_ckpt_0.1.1.model.tar.gz
+wget https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr1/asr1_conformer_librispeech_ckpt_0.1.1.model.tar.gz
 tar xzvf asr1_conformer_librispeech_ckpt_0.1.1.model.tar.gz
 ```
 You can download the audio demo:
 ```bash
-wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
+wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
 ```
 You need to prepare an audio file or use the audio demo above, please confirm the sample rate of the audio is 16K. You can get the result of the audio demo by running the script below.
 ```bash
diff --git a/examples/librispeech/asr1/local/test_wav.sh b/examples/librispeech/asr1/local/test_wav.sh
index e70fc83c8..3baaeb057 100755
--- a/examples/librispeech/asr1/local/test_wav.sh
+++ b/examples/librispeech/asr1/local/test_wav.sh
@@ -14,7 +14,7 @@ ckpt_prefix=$3
 audio_file=$4
 
 mkdir -p data
-wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
+wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
 if [ $? -ne 0 ]; then
    exit 1
 fi
diff --git a/examples/librispeech/asr2/README.md b/examples/librispeech/asr2/README.md
index 253c9b459..c922c7b9c 100644
--- a/examples/librispeech/asr2/README.md
+++ b/examples/librispeech/asr2/README.md
@@ -219,7 +219,7 @@ using the `tar` scripts to unpack the model and then you can use the script to t
 
 For example:
 ```bash
-wget https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr2/asr2_transformer_librispeech_ckpt_0.1.1.model.tar.gz
+wget https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr2/asr2_transformer_librispeech_ckpt_0.1.1.model.tar.gz
 tar xzvf asr2_transformer_librispeech_ckpt_0.1.1.model.tar.gz
 source path.sh
 # If you have process the data and get the manifest file， you can skip the following 2 steps
diff --git a/examples/librispeech/asr3/README.md b/examples/librispeech/asr3/README.md
index f99beb338..18512dc53 100644
--- a/examples/librispeech/asr3/README.md
+++ b/examples/librispeech/asr3/README.md
@@ -89,10 +89,10 @@ data/
 `-- train.meta
 ```
 
-Stage 0 also downloads the pre-trained [wav2vec2](https://paddlespeech.bj.bcebos.com/wav2vec/wav2vec2-large-960h-lv60-self.pdparams) model.
+Stage 0 also downloads the pre-trained [wav2vec2](https://paddlespeech.cdn.bcebos.com/wav2vec/wav2vec2-large-960h-lv60-self.pdparams) model.
 ```bash
 mkdir -p exp/wav2vec2
-wget -P exp/wav2vec2 https://paddlespeech.bj.bcebos.com/wav2vec/wav2vec2-large-960h-lv60-self.pdparams
+wget -P exp/wav2vec2 https://paddlespeech.cdn.bcebos.com/wav2vec/wav2vec2-large-960h-lv60-self.pdparams
 ```
 ## Stage 1: Model Training
 If you want to train the model. you can use stage 1 in `run.sh`. The code is shown below. 
@@ -163,7 +163,7 @@ using the `tar` scripts to unpack the model and then you can use the script to t
 
 For example:
 ```bash
-wget https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr3/wav2vec2ASR-large-960h-librispeech_ckpt_1.3.0.model.tar.gz
+wget https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr3/wav2vec2ASR-large-960h-librispeech_ckpt_1.3.0.model.tar.gz
 tar xzvf wav2vec2ASR-large-960h-librispeech_ckpt_1.3.0.model.tar.gz
 source path.sh
 # If you have process the data and get the manifest file， you can skip the following 2 steps
@@ -184,12 +184,12 @@ In some situations, you want to use the trained model to do the inference for th
 ```
 you can train the model by yourself using ```bash run.sh --stage 0 --stop_stage 3```, or you can download the pretrained model through the script below:
 ```bash
-wget https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr3/wav2vec2ASR-large-960h-librispeech_ckpt_1.3.0.model.tar.gz
+wget https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr3/wav2vec2ASR-large-960h-librispeech_ckpt_1.3.0.model.tar.gz
 tar xzvf wav2vec2ASR-large-960h-librispeech_ckpt_1.3.0.model.tar.gz
 ```
 You can download the audio demo:
 ```bash
-wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
+wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
 ```
 You need to prepare an audio file or use the audio demo above, please confirm the sample rate of the audio is 16K. You can get the result of the audio demo by running the script below.
 ```bash
diff --git a/examples/librispeech/asr3/local/data.sh b/examples/librispeech/asr3/local/data.sh
index 8495a4ab6..9dfba1cc9 100755
--- a/examples/librispeech/asr3/local/data.sh
+++ b/examples/librispeech/asr3/local/data.sh
@@ -104,7 +104,7 @@ echo "LibriSpeech Data preparation done."
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     mkdir -p exp/wav2vec2
     echo "Pretrained wav2vec2 model download"
-    wget -P exp/wav2vec2 https://paddlespeech.bj.bcebos.com/wav2vec/wav2vec2-large-960h-lv60-self.pdparams
+    wget -P exp/wav2vec2 https://paddlespeech.cdn.bcebos.com/wav2vec/wav2vec2-large-960h-lv60-self.pdparams
 fi
 
 exit 0
\ No newline at end of file
diff --git a/examples/librispeech/asr3/local/test_wav.sh b/examples/librispeech/asr3/local/test_wav.sh
index fdf3589f4..c8d013ab5 100755
--- a/examples/librispeech/asr3/local/test_wav.sh
+++ b/examples/librispeech/asr3/local/test_wav.sh
@@ -14,7 +14,7 @@ ckpt_prefix=$3
 audio_file=$4
 
 mkdir -p data
-wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
+wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
 if [ $? -ne 0 ]; then
    exit 1
 fi
diff --git a/examples/librispeech/asr4/README.md b/examples/librispeech/asr4/README.md
index 064a7f16b..b54d6f48e 100644
--- a/examples/librispeech/asr4/README.md
+++ b/examples/librispeech/asr4/README.md
@@ -89,10 +89,10 @@ data/
 `-- train.meta
 ```
 
-Stage 0 also downloads the pre-trained [hubert](https://paddlespeech.bj.bcebos.com/hubert/hubert-large-lv60.pdparams) model.
+Stage 0 also downloads the pre-trained [hubert](https://paddlespeech.cdn.bcebos.com/hubert/hubert-large-lv60.pdparams) model.
 ```bash
 mkdir -p exp/hubert
-wget -P exp/hubert https://paddlespeech.bj.bcebos.com/hubert/hubert-large-lv60.pdparams
+wget -P exp/hubert https://paddlespeech.cdn.bcebos.com/hubert/hubert-large-lv60.pdparams
 ```
 ## Stage 1: Model Training
 If you want to train the model. you can use stage 1 in `run.sh`. The code is shown below. 
@@ -163,7 +163,7 @@ using the `tar` scripts to unpack the model and then you can use the script to t
 
 For example:
 ```bash
-wget https://paddlespeech.bj.bcebos.com/hubert/hubertASR-large-100h-librispeech_ckpt_1.4.0.model.tar.gz
+wget https://paddlespeech.cdn.bcebos.com/hubert/hubertASR-large-100h-librispeech_ckpt_1.4.0.model.tar.gz
 tar xzvf hubertASR-large-100h-librispeech_ckpt_1.4.0.model.tar.gz
 source path.sh
 # If you have process the data and get the manifest file， you can skip the following 2 steps
@@ -184,12 +184,12 @@ In some situations, you want to use the trained model to do the inference for th
 ```
 you can train the model by yourself using ```bash run.sh --stage 0 --stop_stage 3```, or you can download the pretrained model through the script below:
 ```bash
-wget https://paddlespeech.bj.bcebos.com/hubert/hubertASR-large-100h-librispeech_ckpt_1.4.0.model.tar.gz
+wget https://paddlespeech.cdn.bcebos.com/hubert/hubertASR-large-100h-librispeech_ckpt_1.4.0.model.tar.gz
 tar xzvf hubertASR-large-100h-librispeech_ckpt_1.4.0.model.tar.gz
 ```
 You can download the audio demo:
 ```bash
-wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
+wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
 ```
 You need to prepare an audio file or use the audio demo above, please confirm the sample rate of the audio is 16K. You can get the result of the audio demo by running the script below.
 ```bash
diff --git a/examples/librispeech/asr4/local/data.sh b/examples/librispeech/asr4/local/data.sh
index 7d0613d5a..acf3e8e57 100755
--- a/examples/librispeech/asr4/local/data.sh
+++ b/examples/librispeech/asr4/local/data.sh
@@ -104,7 +104,7 @@ echo "LibriSpeech Data preparation done."
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     mkdir -p exp/hubert
     echo "Pretrained hubert model download"
-    wget -P exp/hubert https://paddlespeech.bj.bcebos.com/hubert/hubert-large-lv60.pdparams
+    wget -P exp/hubert https://paddlespeech.cdn.bcebos.com/hubert/hubert-large-lv60.pdparams
 fi
 
 exit 0
\ No newline at end of file
diff --git a/examples/librispeech/asr4/local/test_wav.sh b/examples/librispeech/asr4/local/test_wav.sh
index fdf3589f4..c8d013ab5 100755
--- a/examples/librispeech/asr4/local/test_wav.sh
+++ b/examples/librispeech/asr4/local/test_wav.sh
@@ -14,7 +14,7 @@ ckpt_prefix=$3
 audio_file=$4
 
 mkdir -p data
-wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
+wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
 if [ $? -ne 0 ]; then
    exit 1
 fi
diff --git a/examples/librispeech/asr5/README.md b/examples/librispeech/asr5/README.md
index 826c33cec..97412526c 100644
--- a/examples/librispeech/asr5/README.md
+++ b/examples/librispeech/asr5/README.md
@@ -89,10 +89,10 @@ data/
 `-- train.meta
 ```
 
-Stage 0 also downloads the pre-trained [wavlm](https://paddlespeech.bj.bcebos.com/wavlm/wavlm-base-plus.pdparams) model.
+Stage 0 also downloads the pre-trained [wavlm](https://paddlespeech.cdn.bcebos.com/wavlm/wavlm-base-plus.pdparams) model.
 ```bash
 mkdir -p exp/wavlm
-wget -P exp/wavlm https://paddlespeech.bj.bcebos.com/wavlm/wavlm-base-plus.pdparams
+wget -P exp/wavlm https://paddlespeech.cdn.bcebos.com/wavlm/wavlm-base-plus.pdparams
 ```
 ## Stage 1: Model Training
 If you want to train the model. you can use stage 1 in `run.sh`. The code is shown below. 
@@ -163,7 +163,7 @@ using the `tar` scripts to unpack the model and then you can use the script to t
 
 For example:
 ```bash
-wget https://paddlespeech.bj.bcebos.com/wavlm/wavlmASR-base-100h-librispeech_ckpt_1.4.0.model.tar.gz
+wget https://paddlespeech.cdn.bcebos.com/wavlm/wavlmASR-base-100h-librispeech_ckpt_1.4.0.model.tar.gz
 tar xzvf wavlmASR-base-100h-librispeech_ckpt_1.4.0.model.tar.gz
 source path.sh
 # If you have process the data and get the manifest file， you can skip the following 2 steps
@@ -184,12 +184,12 @@ In some situations, you want to use the trained model to do the inference for th
 ```
 you can train the model by yourself using ```bash run.sh --stage 0 --stop_stage 3```, or you can download the pretrained model through the script below:
 ```bash
-wget https://paddlespeech.bj.bcebos.com/wavlm/wavlm_baseplus_libriclean_100h.tar.gz
+wget https://paddlespeech.cdn.bcebos.com/wavlm/wavlm_baseplus_libriclean_100h.tar.gz
 tar xzvf wavlm_baseplus_libriclean_100h.tar.gz
 ```
 You can download the audio demo:
 ```bash
-wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
+wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
 ```
 You need to prepare an audio file or use the audio demo above, please confirm the sample rate of the audio is 16K. You can get the result of the audio demo by running the script below.
 ```bash
diff --git a/examples/librispeech/asr5/local/data.sh b/examples/librispeech/asr5/local/data.sh
index 8e69dd769..ecbdba5a0 100644
--- a/examples/librispeech/asr5/local/data.sh
+++ b/examples/librispeech/asr5/local/data.sh
@@ -104,7 +104,7 @@ echo "LibriSpeech Data preparation done."
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     mkdir -p exp/wavlm
     echo "Pretrained wavlm model download"
-    wget -P exp/wavlm https://paddlespeech.bj.bcebos.com/wavlm/wavlm-base-plus.pdparams
+    wget -P exp/wavlm https://paddlespeech.cdn.bcebos.com/wavlm/wavlm-base-plus.pdparams
 fi
 
 exit 0
\ No newline at end of file
diff --git a/examples/librispeech/asr5/local/test_wav.sh b/examples/librispeech/asr5/local/test_wav.sh
index fdf3589f4..c8d013ab5 100644
--- a/examples/librispeech/asr5/local/test_wav.sh
+++ b/examples/librispeech/asr5/local/test_wav.sh
@@ -14,7 +14,7 @@ ckpt_prefix=$3
 audio_file=$4
 
 mkdir -p data
-wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
+wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
 if [ $? -ne 0 ]; then
    exit 1
 fi
diff --git a/examples/ljspeech/tts0/README.md b/examples/ljspeech/tts0/README.md
index fa986c85b..a67880e7b 100644
--- a/examples/ljspeech/tts0/README.md
+++ b/examples/ljspeech/tts0/README.md
@@ -7,7 +7,7 @@ Download LJSpeech-1.1 from it's [Official Website](https://keithito.com/LJ-Speec
 
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get phonemes for Tacotron2, the durations of MFA are not needed here.
-You can download from here [ljspeech_alignment.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
+You can download from here [ljspeech_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
 
 ## Get Started
 Assume the path to the dataset is `~/datasets/LJSpeech-1.1`.
@@ -86,7 +86,7 @@ optional arguments:
 
 ### Synthesizing
 We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/voc1) as the neural vocoder.
-Download pretrained parallel wavegan model from [pwg_ljspeech_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_ljspeech_ckpt_0.5.zip) and unzip it.
+Download pretrained parallel wavegan model from [pwg_ljspeech_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_ljspeech_ckpt_0.5.zip) and unzip it.
 ```bash
 unzip pwg_ljspeech_ckpt_0.5.zip
 ```
@@ -208,7 +208,7 @@ optional arguments:
 
 ## Pretrained Model
 Pretrained Tacotron2 model with no silence in the edge of audios:
-- [tacotron2_ljspeech_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_ljspeech_ckpt_0.2.0.zip)
+- [tacotron2_ljspeech_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_ljspeech_ckpt_0.2.0.zip)
 
 
 Model | Step | eval/loss | eval/l1_loss | eval/mse_loss | eval/bce_loss| eval/attn_loss 
diff --git a/examples/ljspeech/tts1/README.md b/examples/ljspeech/tts1/README.md
index 7f0571a1b..8619ceac1 100644
--- a/examples/ljspeech/tts1/README.md
+++ b/examples/ljspeech/tts1/README.md
@@ -77,7 +77,7 @@ optional arguments:
 
 ## Synthesizing
 We use [waveflow](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/voc0) as the neural vocoder.
-Download Pretrained WaveFlow Model with residual channel equals 128 from [waveflow_ljspeech_ckpt_0.3.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/waveflow/waveflow_ljspeech_ckpt_0.3.zip) and unzip it.
+Download Pretrained WaveFlow Model with residual channel equals 128 from [waveflow_ljspeech_ckpt_0.3.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/waveflow/waveflow_ljspeech_ckpt_0.3.zip) and unzip it.
 ```bash
 unzip waveflow_ljspeech_ckpt_0.3.zip
 ```
@@ -169,7 +169,7 @@ optional arguments:
 
 ## Pretrained Model
 Pretrained Model can be downloaded here:
-- [transformer_tts_ljspeech_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/transformer_tts/transformer_tts_ljspeech_ckpt_0.4.zip)
+- [transformer_tts_ljspeech_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/transformer_tts/transformer_tts_ljspeech_ckpt_0.4.zip)
 
 TransformerTTS  checkpoint contains files listed below.
 ```text
diff --git a/examples/ljspeech/tts3/README.md b/examples/ljspeech/tts3/README.md
index f1ed111a0..cac214e74 100644
--- a/examples/ljspeech/tts3/README.md
+++ b/examples/ljspeech/tts3/README.md
@@ -7,7 +7,7 @@ Download LJSpeech-1.1 from it's [Official Website](https://keithito.com/LJ-Speec
 
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for fastspeech2.
-You can download from here [ljspeech_alignment.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
+You can download from here [ljspeech_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
 
 ## Get Started
 Assume the path to the dataset is `~/datasets/LJSpeech-1.1`.
@@ -92,7 +92,7 @@ optional arguments:
 
 ### Synthesizing
 We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/voc1) as the neural vocoder.
-Download pretrained parallel wavegan model from [pwg_ljspeech_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_ljspeech_ckpt_0.5.zip) and unzip it.
+Download pretrained parallel wavegan model from [pwg_ljspeech_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_ljspeech_ckpt_0.5.zip) and unzip it.
 ```bash
 unzip pwg_ljspeech_ckpt_0.5.zip
 ```
@@ -213,16 +213,16 @@ optional arguments:
 
 ## Pretrained Model
 Pretrained FastSpeech2 model with no silence in the edge of audios:
-- [fastspeech2_nosil_ljspeech_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip)
+- [fastspeech2_nosil_ljspeech_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip)
 
 The static model can be downloaded here:
-- [fastspeech2_ljspeech_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_static_1.1.0.zip)
+- [fastspeech2_ljspeech_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_static_1.1.0.zip)
 
 The ONNX model can be downloaded here:
-- [fastspeech2_ljspeech_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_onnx_1.1.0.zip)
+- [fastspeech2_ljspeech_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_onnx_1.1.0.zip)
 
 The Paddle-Lite model can be downloaded here:
-- [fastspeech2_ljspeech_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_pdlite_1.3.0.zip)
+- [fastspeech2_ljspeech_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_pdlite_1.3.0.zip)
 
 
 Model | Step | eval/loss | eval/l1_loss | eval/duration_loss | eval/pitch_loss| eval/energy_loss 
diff --git a/examples/ljspeech/voc0/README.md b/examples/ljspeech/voc0/README.md
index ae48a9a7f..53fc1e34c 100644
--- a/examples/ljspeech/voc0/README.md
+++ b/examples/ljspeech/voc0/README.md
@@ -47,4 +47,4 @@ Synthesize waveform.
 
 ## Pretrained Model
 Pretrained Model with residual channel equals 128 can be downloaded here:
-- [waveflow_ljspeech_ckpt_0.3.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/waveflow/waveflow_ljspeech_ckpt_0.3.zip)
+- [waveflow_ljspeech_ckpt_0.3.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/waveflow/waveflow_ljspeech_ckpt_0.3.zip)
diff --git a/examples/ljspeech/voc1/README.md b/examples/ljspeech/voc1/README.md
index a7ac2af41..90b5911d2 100644
--- a/examples/ljspeech/voc1/README.md
+++ b/examples/ljspeech/voc1/README.md
@@ -5,7 +5,7 @@ This example contains code used to train a [parallel wavegan](http://arxiv.org/a
 Download LJSpeech-1.1 from it's [Official Website](https://keithito.com/LJ-Speech-Dataset/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/LJSpeech-1.1`.
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) results to cut the silence in the edge of audio.
-You can download from here [ljspeech_alignment.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
+You can download from here [ljspeech_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
 
 ## Get Started
 Assume the path to the dataset is `~/datasets/LJSpeech-1.1`.
@@ -128,16 +128,16 @@ optional arguments:
 
 ## Pretrained Model
 Pretrained models can be downloaded here:
-- [pwg_ljspeech_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_ljspeech_ckpt_0.5.zip)
+- [pwg_ljspeech_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_ljspeech_ckpt_0.5.zip)
 
 The static model can be downloaded here:
-- [pwgan_ljspeech_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_static_1.1.0.zip)
+- [pwgan_ljspeech_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_static_1.1.0.zip)
 
 The ONNX model can be downloaded here:
-- [pwgan_ljspeech_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_onnx_1.1.0.zip)
+- [pwgan_ljspeech_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_onnx_1.1.0.zip)
 
 The Paddle-Lite model can be downloaded here:
-- [pwgan_ljspeech_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_pdlite_1.3.0.zip)
+- [pwgan_ljspeech_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_pdlite_1.3.0.zip)
 
 
 Parallel WaveGAN checkpoint contains files listed below.
diff --git a/examples/ljspeech/voc5/README.md b/examples/ljspeech/voc5/README.md
index 65fa53267..e19782aa1 100644
--- a/examples/ljspeech/voc5/README.md
+++ b/examples/ljspeech/voc5/README.md
@@ -5,7 +5,7 @@ This example contains code used to train a [HiFiGAN](https://arxiv.org/abs/2010.
 Download LJSpeech-1.1 from it's [Official Website](https://keithito.com/LJ-Speech-Dataset/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/LJSpeech-1.1`.
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) results to cut the silence in the edge of audio.
-You can download from here [ljspeech_alignment.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
+You can download from here [ljspeech_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
 
 ## Get Started
 Assume the path to the dataset is `~/datasets/LJSpeech-1.1`.
@@ -113,16 +113,16 @@ optional arguments:
 
 ## Pretrained Model
 The pretrained model can be downloaded here:
-- [hifigan_ljspeech_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_ckpt_0.2.0.zip)
+- [hifigan_ljspeech_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_ckpt_0.2.0.zip)
 
 The static model can be downloaded here:
-- [hifigan_ljspeech_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_static_1.1.0.zip)
+- [hifigan_ljspeech_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_static_1.1.0.zip)
 
 The ONNX model can be downloaded here:
-- [hifigan_ljspeech_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_onnx_1.1.0.zip)
+- [hifigan_ljspeech_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_onnx_1.1.0.zip)
 
 The Paddle-Lite model can be downloaded here:
-- [hifigan_ljspeech_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_pdlite_1.3.0.zip)
+- [hifigan_ljspeech_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_pdlite_1.3.0.zip)
 
 Model | Step | eval/generator_loss | eval/mel_loss| eval/feature_matching_loss
 :-------------:| :------------:| :-----: | :-----: | :--------:
diff --git a/examples/opencpop/svs1/README.md b/examples/opencpop/svs1/README.md
index ca5cd9d9b..092f27644 100644
--- a/examples/opencpop/svs1/README.md
+++ b/examples/opencpop/svs1/README.md
@@ -103,7 +103,7 @@ optional arguments:
 
 ### Synthesizing
 We use parallel wavegan as the neural vocoder.
-Download pretrained parallel wavegan model from [pwgan_opencpop_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/t2s/svs/opencpop/pwgan_opencpop_ckpt_1.4.0.zip) and unzip it.
+Download pretrained parallel wavegan model from [pwgan_opencpop_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/svs/opencpop/pwgan_opencpop_ckpt_1.4.0.zip) and unzip it.
 ```bash
 unzip pwgan_opencpop_ckpt_1.4.0.zip
 ```
@@ -243,7 +243,7 @@ Note: At present, the diffsinger model does not support dynamic to static, so do
 
 ## Pretrained Model
 Pretrained DiffSinger model:
-- [diffsinger_opencpop_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/t2s/svs/opencpop/diffsinger_opencpop_ckpt_1.4.0.zip)
+- [diffsinger_opencpop_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/svs/opencpop/diffsinger_opencpop_ckpt_1.4.0.zip)
 
 DiffSinger checkpoint contains files listed below.
 ```text
diff --git a/examples/opencpop/svs1/README_cn.md b/examples/opencpop/svs1/README_cn.md
index c7707bc91..eac72b5aa 100644
--- a/examples/opencpop/svs1/README_cn.md
+++ b/examples/opencpop/svs1/README_cn.md
@@ -104,7 +104,7 @@ optional arguments:
 
 ### 合成
 我们使用 parallel opencpop 作为神经声码器（vocoder）。
-从 [pwgan_opencpop_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/t2s/svs/opencpop/pwgan_opencpop_ckpt_1.4.0.zip) 下载预训练的 parallel wavegan 模型并将其解压。
+从 [pwgan_opencpop_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/svs/opencpop/pwgan_opencpop_ckpt_1.4.0.zip) 下载预训练的 parallel wavegan 模型并将其解压。
 
 ```bash
 unzip pwgan_opencpop_ckpt_1.4.0.zip
@@ -246,7 +246,7 @@ optional arguments:
 
 ## 预训练模型
 预先训练的 DiffSinger 模型：
-- [diffsinger_opencpop_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/t2s/svs/opencpop/diffsinger_opencpop_ckpt_1.4.0.zip)
+- [diffsinger_opencpop_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/svs/opencpop/diffsinger_opencpop_ckpt_1.4.0.zip)
 
 
 DiffSinger 检查点包含下列文件。
diff --git a/examples/opencpop/voc1/README.md b/examples/opencpop/voc1/README.md
index 37570a648..06da3db34 100644
--- a/examples/opencpop/voc1/README.md
+++ b/examples/opencpop/voc1/README.md
@@ -124,7 +124,7 @@ optional arguments:
 
 ## Pretrained Models
 The pretrained model can be downloaded here:
-- [pwgan_opencpop_ckpt_1.4.0](https://paddlespeech.bj.bcebos.com/t2s/svs/opencpop/pwgan_opencpop_ckpt_1.4.0.zip)
+- [pwgan_opencpop_ckpt_1.4.0](https://paddlespeech.cdn.bcebos.com/t2s/svs/opencpop/pwgan_opencpop_ckpt_1.4.0.zip)
 
 
 Parallel WaveGAN checkpoint contains files listed below.
diff --git a/examples/other/ge2e/README.md b/examples/other/ge2e/README.md
index 93fdb052a..ffd70d735 100644
--- a/examples/other/ge2e/README.md
+++ b/examples/other/ge2e/README.md
@@ -98,7 +98,7 @@ In `${BIN_DIR}/inference.py`:
 ## Pretrained Model
 The pretrained model is first trained to 1560k steps at Librispeech-other-500 and voxceleb1. Then trained at aidatatang_200h and magic_data to 3000k steps.
 
-Download URL [ge2e_ckpt_0.3.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ge2e/ge2e_ckpt_0.3.zip).
+Download URL [ge2e_ckpt_0.3.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ge2e/ge2e_ckpt_0.3.zip).
 
 ## References
 
diff --git a/examples/other/rhy/README.md b/examples/other/rhy/README.md
index 11336ad9f..dea2c08f0 100644
--- a/examples/other/rhy/README.md
+++ b/examples/other/rhy/README.md
@@ -20,7 +20,7 @@
 ## Pretrained Model
 The pretrained model can be downloaded here:
 
-[ernie-1.0_aishellcsmsc_ckpt_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/rhy_predict/ernie-1.0_aishellcsmsc_ckpt_1.3.0.zip)
+[ernie-1.0_aishellcsmsc_ckpt_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/rhy_predict/ernie-1.0_aishellcsmsc_ckpt_1.3.0.zip)
 
 And you should put it into `exp/${YOUREXP}/checkpoints` folder.
 
diff --git a/examples/other/rhy/local/data.sh b/examples/other/rhy/local/data.sh
index 93b134873..9cc29c87d 100755
--- a/examples/other/rhy/local/data.sh
+++ b/examples/other/rhy/local/data.sh
@@ -1,11 +1,11 @@
 #!/bin/bash
 
 if [ ! -f 000001-010000.txt ]; then
-    wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/rhy_predict/000001-010000.txt
+    wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/rhy_predict/000001-010000.txt
 fi
 
 if [ ! -f label_train-set.txt ]; then
-    wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/rhy_predict/label_train-set.txt
+    wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/rhy_predict/label_train-set.txt
 fi
 
 
diff --git a/examples/other/tts_finetune/tts3/README.md b/examples/other/tts_finetune/tts3/README.md
index 8564af5f6..943e6e32f 100644
--- a/examples/other/tts_finetune/tts3/README.md
+++ b/examples/other/tts_finetune/tts3/README.md
@@ -7,42 +7,42 @@ For more information on training Fastspeech2 with AISHELL-3, You can refer [exam
 ## Prepare 
 ### Download Pretrained model
 Assume the path to the model is `./pretrained_models`. </br>
-If you want to finetune Chinese pretrained model, you need to download Fastspeech2 pretrained model with AISHELL-3: [fastspeech2_aishell3_ckpt_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_1.1.0.zip) for finetuning. Download HiFiGAN pretrained model with aishell3: [hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip) for synthesis.
+If you want to finetune Chinese pretrained model, you need to download Fastspeech2 pretrained model with AISHELL-3: [fastspeech2_aishell3_ckpt_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_1.1.0.zip) for finetuning. Download HiFiGAN pretrained model with aishell3: [hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip) for synthesis.
 
 ```bash
 mkdir -p pretrained_models && cd pretrained_models
 # pretrained fastspeech2 model
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_1.1.0.zip 
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_1.1.0.zip 
 unzip fastspeech2_aishell3_ckpt_1.1.0.zip
 # pretrained hifigan model
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip
 unzip hifigan_aishell3_ckpt_0.2.0.zip
 cd ../
 ```
 
 
-If you want to finetune English pretrained model, you need to download Fastspeech2 pretrained model with VCTK: [fastspeech2_vctk_ckpt_1.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_ckpt_1.2.0.zip) for finetuning. Download HiFiGAN pretrained model with VCTK: [hifigan_vctk_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip) for synthesis.
+If you want to finetune English pretrained model, you need to download Fastspeech2 pretrained model with VCTK: [fastspeech2_vctk_ckpt_1.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_ckpt_1.2.0.zip) for finetuning. Download HiFiGAN pretrained model with VCTK: [hifigan_vctk_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip) for synthesis.
 
 ```bash
 mkdir -p pretrained_models && cd pretrained_models
 # pretrained fastspeech2 model
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_ckpt_1.2.0.zip 
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_ckpt_1.2.0.zip 
 unzip fastspeech2_vctk_ckpt_1.2.0.zip
 # pretrained hifigan model
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip
 unzip hifigan_vctk_ckpt_0.2.0.zip
 cd ../
 ```
 
-If you want to finetune Chinese-English Mixed pretrained model, you need to download Fastspeech2 pretrained model with mix datasets: [fastspeech2_mix_ckpt_1.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip) for finetuning. Download HiFiGAN pretrained model with aishell3: [hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip) for synthesis.
+If you want to finetune Chinese-English Mixed pretrained model, you need to download Fastspeech2 pretrained model with mix datasets: [fastspeech2_mix_ckpt_1.2.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip) for finetuning. Download HiFiGAN pretrained model with aishell3: [hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip) for synthesis.
 
 ```bash
 mkdir -p pretrained_models && cd pretrained_models
 # pretrained fastspeech2 model
-wget https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip
+wget https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip
 unzip fastspeech2_mix_ckpt_1.2.0.zip
 # pretrained hifigan model
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip
 unzip hifigan_aishell3_ckpt_0.2.0.zip
 cd ../
 ```
@@ -59,7 +59,7 @@ Here is a Chinese data example of the first 200 data of csmsc.
 
 ```bash
 mkdir -p input && cd input
-wget https://paddlespeech.bj.bcebos.com/datasets/csmsc_mini.zip
+wget https://paddlespeech.cdn.bcebos.com/datasets/csmsc_mini.zip
 unzip csmsc_mini.zip
 cd ../
 ```
@@ -73,7 +73,7 @@ Here is an English data example of the first 200 data of ljspeech.
 
 ```bash
 mkdir -p input && cd input
-wget https://paddlespeech.bj.bcebos.com/datasets/ljspeech_mini.zip
+wget https://paddlespeech.cdn.bcebos.com/datasets/ljspeech_mini.zip
 unzip ljspeech_mini.zip
 cd ../
 ```
@@ -82,7 +82,7 @@ If you want to finetune Chinese-English Mixed pretrained model, you need to prep
 
 ```bash
 mkdir -p input && cd input
-wget https://paddlespeech.bj.bcebos.com/datasets/SSB0005_mini.zip
+wget https://paddlespeech.cdn.bcebos.com/datasets/SSB0005_mini.zip
 unzip SSB0005_mini.zip
 cd ../
 ```
@@ -99,23 +99,23 @@ cp montreal-forced-aligner/lib/libpython3.6m.so.1.0 montreal-forced-aligner/lib/
 mkdir -p aligner && cd aligner
 ```
 
-If you want to get mfa result of Chinese data, you need to download pretrained MFA models with aishell3: [aishell3_model.zip](https://paddlespeech.bj.bcebos.com/MFA/ernie_sat/aishell3_model.zip) and unzip it.
+If you want to get mfa result of Chinese data, you need to download pretrained MFA models with aishell3: [aishell3_model.zip](https://paddlespeech.cdn.bcebos.com/MFA/ernie_sat/aishell3_model.zip) and unzip it.
 
 ```bash
 # pretrained mfa model for Chinese data
-wget https://paddlespeech.bj.bcebos.com/MFA/ernie_sat/aishell3_model.zip
+wget https://paddlespeech.cdn.bcebos.com/MFA/ernie_sat/aishell3_model.zip
 unzip aishell3_model.zip
-wget https://paddlespeech.bj.bcebos.com/MFA/AISHELL-3/with_tone/simple.lexicon
+wget https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/simple.lexicon
 cd ../../
 ```
 
-If you want to get mfa result of English data, you need to download pretrained MFA models with vctk: [vctk_model.zip](https://paddlespeech.bj.bcebos.com/MFA/ernie_sat/vctk_model.zip) and unzip it.
+If you want to get mfa result of English data, you need to download pretrained MFA models with vctk: [vctk_model.zip](https://paddlespeech.cdn.bcebos.com/MFA/ernie_sat/vctk_model.zip) and unzip it.
 
 ```bash
 # pretrained mfa model for English data
-wget https://paddlespeech.bj.bcebos.com/MFA/ernie_sat/vctk_model.zip
+wget https://paddlespeech.cdn.bcebos.com/MFA/ernie_sat/vctk_model.zip
 unzip vctk_model.zip
-wget https://paddlespeech.bj.bcebos.com/MFA/LJSpeech-1.1/cmudict-0.7b
+wget https://paddlespeech.cdn.bcebos.com/MFA/LJSpeech-1.1/cmudict-0.7b
 cd ../../
 ```
 
@@ -220,10 +220,10 @@ optional arguments:
 
 ### Synthesizing
 To synthesize Chinese audio, We use [HiFiGAN with aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc5) as the neural vocoder.
-Assume the path to the hifigan model is `./pretrained_models`. Download the pretrained HiFiGAN model from [hifigan_aishell3_ckpt_0.2.0](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip) and unzip it.
+Assume the path to the hifigan model is `./pretrained_models`. Download the pretrained HiFiGAN model from [hifigan_aishell3_ckpt_0.2.0](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip) and unzip it.
 
 To synthesize English audio, We use [HiFiGAN with vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/voc5) as the neural vocoder.
-Assume the path to the hifigan model is `./pretrained_models`. Download the pretrained HiFiGAN model from [hifigan_vctk_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip) and unzip it.
+Assume the path to the hifigan model is `./pretrained_models`. Download the pretrained HiFiGAN model from [hifigan_vctk_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip) and unzip it.
 
 
 Modify `ckpt` in `run.sh` to the final model in `exp/default/checkpoints`.
diff --git a/examples/tal_cs/asr1/README.md b/examples/tal_cs/asr1/README.md
index 176925190..bd2f6b3d7 100644
--- a/examples/tal_cs/asr1/README.md
+++ b/examples/tal_cs/asr1/README.md
@@ -152,7 +152,7 @@ using the `tar` scripts to unpack the model and then you can use the script to t
 
 For example:
 ```bash
-wget https://paddlespeech.bj.bcebos.com/s2t/tal_cs/asr1/asr1_conformer_talcs_ckpt_1.4.0.model.tar.gz
+wget https://paddlespeech.cdn.bcebos.com/s2t/tal_cs/asr1/asr1_conformer_talcs_ckpt_1.4.0.model.tar.gz
 tar xzvf asr1_conformer_talcs_ckpt_1.4.0.model.tar.gz
 source path.sh
 # If you have process the data and get the manifest file， you can skip the following 2 steps
@@ -172,12 +172,12 @@ In some situations, you want to use the trained model to do the inference for th
 ```
 you can train the model by yourself using ```bash run.sh --stage 0 --stop_stage 3```, or you can download the pretrained model through the script below:
 ```bash
-wget https://paddlespeech.bj.bcebos.com/s2t/tal_cs/asr1/asr1_conformer_talcs_ckpt_1.4.0.model.tar.gz
+wget https://paddlespeech.cdn.bcebos.com/s2t/tal_cs/asr1/asr1_conformer_talcs_ckpt_1.4.0.model.tar.gz
 tar xzvf asr1_conformer_talcs_ckpt_1.4.0.model.tar.gz
 ```
 You can download the audio demo:
 ```bash
-wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
+wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
 ```
 You need to prepare an audio file or use the audio demo above, please confirm the sample rate of the audio is 16K. You can get the result of the audio demo by running the script below.
 ```bash
diff --git a/examples/tal_cs/asr1/local/test_wav.sh b/examples/tal_cs/asr1/local/test_wav.sh
index d029f2fde..85b6c23ba 100755
--- a/examples/tal_cs/asr1/local/test_wav.sh
+++ b/examples/tal_cs/asr1/local/test_wav.sh
@@ -14,7 +14,7 @@ ckpt_prefix=$3
 audio_file=$4
 
 mkdir -p data
-wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
+wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
 if [ $? -ne 0 ]; then
    exit 1
 fi
diff --git a/examples/ted_en_zh/st1/local/download_pretrain.sh b/examples/ted_en_zh/st1/local/download_pretrain.sh
index 1ff05ae33..4855b5a34 100755
--- a/examples/ted_en_zh/st1/local/download_pretrain.sh
+++ b/examples/ted_en_zh/st1/local/download_pretrain.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 # download pytorch weight
-wget https://paddlespeech.bj.bcebos.com/s2t/ted_en_zh/st1/snapshot.ep.98 --no-check-certificate
+wget https://paddlespeech.cdn.bcebos.com/s2t/ted_en_zh/st1/snapshot.ep.98 --no-check-certificate
 
 # convert pytorch weight to paddlepaddle
 python local/convert_torch_to_paddle.py \
@@ -9,7 +9,7 @@ python local/convert_torch_to_paddle.py \
 --paddle_ckpt paddle.98.pdparams
 
 # Or you can download converted weights
-# wget https://paddlespeech.bj.bcebos.com/s2t/ted_en_zh/st1/paddle.98.pdparams --no-check-certificate
+# wget https://paddlespeech.cdn.bcebos.com/s2t/ted_en_zh/st1/paddle.98.pdparams --no-check-certificate
 
 if [ $? -ne 0 ]; then
     echo "Failed in downloading and coverting!"
diff --git a/examples/thchs30/align0/README.md b/examples/thchs30/align0/README.md
index 5195ab80b..3f36f3419 100644
--- a/examples/thchs30/align0/README.md
+++ b/examples/thchs30/align0/README.md
@@ -35,8 +35,8 @@ word.lexicon 考虑到了中文的多音字，使用**带概率的字典**, 生
 ---
 我们提供了三种级别 MFA 训练好的对齐结果、模型和字典（`syllable.lexicon`  在 `data/dict` 中，`phone.lexicon` 和` word.lexicon` 运行数据预处理代码后会自动从原始数据集复制或生成）
 
-**phone 级别：** [phone.lexicon](https://paddlespeech.bj.bcebos.com/MFA/THCHS30/phone/phone.lexicon)、 [对齐结果](https://paddlespeech.bj.bcebos.com/MFA/THCHS30/phone/thchs30_alignment.tar.gz)、[模型](https://paddlespeech.bj.bcebos.com/MFA/THCHS30/phone/thchs30_model.zip)
-**syllabel 级别：** [syllable.lexicon](https://paddlespeech.bj.bcebos.com/MFA/THCHS30/syllable/syllable.lexicon)、[对齐结果](https://paddlespeech.bj.bcebos.com/MFA/THCHS30/syllable/thchs30_alignment.tar.gz)、[模型](https://paddlespeech.bj.bcebos.com/MFA/THCHS30/syllable/thchs30_model.zip)
-**word 级别：** [word.lexicon](https://paddlespeech.bj.bcebos.com/MFA/THCHS30/word/word.lexicon)、[对齐结果](https://paddlespeech.bj.bcebos.com/MFA/THCHS30/word/thchs30_alignment.tar.gz)、[模型](https://paddlespeech.bj.bcebos.com/MFA/THCHS30/word/thchs30_model.zip)
+**phone 级别：** [phone.lexicon](https://paddlespeech.cdn.bcebos.com/MFA/THCHS30/phone/phone.lexicon)、 [对齐结果](https://paddlespeech.cdn.bcebos.com/MFA/THCHS30/phone/thchs30_alignment.tar.gz)、[模型](https://paddlespeech.cdn.bcebos.com/MFA/THCHS30/phone/thchs30_model.zip)
+**syllabel 级别：** [syllable.lexicon](https://paddlespeech.cdn.bcebos.com/MFA/THCHS30/syllable/syllable.lexicon)、[对齐结果](https://paddlespeech.cdn.bcebos.com/MFA/THCHS30/syllable/thchs30_alignment.tar.gz)、[模型](https://paddlespeech.cdn.bcebos.com/MFA/THCHS30/syllable/thchs30_model.zip)
+**word 级别：** [word.lexicon](https://paddlespeech.cdn.bcebos.com/MFA/THCHS30/word/word.lexicon)、[对齐结果](https://paddlespeech.cdn.bcebos.com/MFA/THCHS30/word/thchs30_alignment.tar.gz)、[模型](https://paddlespeech.cdn.bcebos.com/MFA/THCHS30/word/thchs30_model.zip)
 
 随后，您可以参考 [MFA 官方文档](https://montreal-forced-aligner.readthedocs.io/en/latest/) 使用我们给您提供好的模型直接对自己的数据集进行强制对齐，注意，您需要使用和模型对应的 lexicon 文件，当文本是汉字时，您需要用空格把不同的**汉字**（而不是词语）分开
diff --git a/examples/vctk/ernie_sat/README.md b/examples/vctk/ernie_sat/README.md
index 1808e2074..3350a7451 100644
--- a/examples/vctk/ernie_sat/README.md
+++ b/examples/vctk/ernie_sat/README.md
@@ -16,7 +16,7 @@ Download VCTK-0.92 from it's [Official Website](https://datashare.ed.ac.uk/handl
 
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for fastspeech2.
-You can download from here [vctk_alignment.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
+You can download from here [vctk_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
 ps: we remove three speakers in VCTK-0.92 (see [reorganize_vctk.py](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/other/mfa/local/reorganize_vctk.py)):
 1. `p315`, because of no text for it.
 2. `p280` and `p362`, because no *_mic2.flac (which is better than *_mic1.flac) for  them.
@@ -72,7 +72,7 @@ CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
 ### Synthesizing
 We use [HiFiGAN](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/voc5) as the neural vocoder.
 
-Download pretrained HiFiGAN model from [hifigan_vctk_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip) and unzip it.
+Download pretrained HiFiGAN model from [hifigan_vctk_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip) and unzip it.
 ```bash
 unzip hifigan_vctk_ckpt_0.2.0.zip
 ```
@@ -103,10 +103,10 @@ ln -snf libpython3.6m.so.1.0 libpython3.6m.so
 cd -
 # download align models and dicts
 cd aligner
-wget https://paddlespeech.bj.bcebos.com/MFA/ernie_sat/aishell3_model.zip
-wget https://paddlespeech.bj.bcebos.com/MFA/AISHELL-3/with_tone/simple.lexicon
-wget https://paddlespeech.bj.bcebos.com/MFA/ernie_sat/vctk_model.zip
-wget https://paddlespeech.bj.bcebos.com/MFA/LJSpeech-1.1/cmudict-0.7b
+wget https://paddlespeech.cdn.bcebos.com/MFA/ernie_sat/aishell3_model.zip
+wget https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/simple.lexicon
+wget https://paddlespeech.cdn.bcebos.com/MFA/ernie_sat/vctk_model.zip
+wget https://paddlespeech.cdn.bcebos.com/MFA/LJSpeech-1.1/cmudict-0.7b
 cd ../../
 ```
 **prepare pretrained FastSpeech2 models**
@@ -115,8 +115,8 @@ ERNIE-SAT use FastSpeech2 as phoneme duration predictor:
 ```bash
 mkdir download
 cd download
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip
 unzip fastspeech2_conformer_baker_ckpt_0.5.zip
 unzip fastspeech2_nosil_ljspeech_ckpt_0.5.zip
 cd ../
@@ -125,13 +125,13 @@ cd ../
 ```bash
 mkdir source
 cd source
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/source/SSB03540307.wav
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/source/SSB03540428.wav
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/source/LJ050-0278.wav
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/source/p243_313.wav
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/source/p299_096.wav
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/source/this_was_not_the_show_for_me.wav
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/source/README.md
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/SSB03540307.wav
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/SSB03540428.wav
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/LJ050-0278.wav
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/p243_313.wav
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/p299_096.wav
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/this_was_not_the_show_for_me.wav
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/README.md
 cd ../
 ```
 You can check the text of downloaded wavs in `source/README.md`.
@@ -144,7 +144,7 @@ You can check the text of downloaded wavs in `source/README.md`.
 You can modify `--wav_path`、`--old_str` and `--new_str` yourself, `--old_str` should be the text corresponding to the audio of  `--wav_path`, `--new_str` should be designed according to `--task_name`, both `--source_lang` and `--target_lang` should be `en` for model trained with VCTK dataset.
 ## Pretrained Model
 Pretrained ErnieSAT model:
-- [erniesat_vctk_ckpt_1.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ernie_sat/erniesat_vctk_ckpt_1.2.0.zip)
+- [erniesat_vctk_ckpt_1.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/erniesat_vctk_ckpt_1.2.0.zip)
 
 Model | Step | eval/mlm_loss | eval/loss
 :-------------:| :------------:| :-----: | :-----:
diff --git a/examples/vctk/tts3/README.md b/examples/vctk/tts3/README.md
index 183a20f0a..aadbff6c3 100644
--- a/examples/vctk/tts3/README.md
+++ b/examples/vctk/tts3/README.md
@@ -7,7 +7,7 @@ Download VCTK-0.92 from it's [Official Website](https://datashare.ed.ac.uk/handl
 
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for fastspeech2.
-You can download from here [vctk_alignment.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
+You can download from here [vctk_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
 ps: we remove three speakers in VCTK-0.92 (see [reorganize_vctk.py](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/other/mfa/local/reorganize_vctk.py)):
 1. `p315`, because of no text for it.
 2. `p280` and `p362`, because no *_mic2.flac (which is better than *_mic1.flac) for  them.
@@ -95,7 +95,7 @@ optional arguments:
 ### Synthesizing
 We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/voc1) as the neural vocoder.
 
-Download pretrained parallel wavegan model from [pwg_vctk_ckpt_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_vctk_ckpt_0.1.1.zip) and unzip it.
+Download pretrained parallel wavegan model from [pwg_vctk_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_vctk_ckpt_0.1.1.zip) and unzip it.
 ```bash
 unzip pwg_vctk_ckpt_0.1.1.zip
 ```
@@ -216,19 +216,19 @@ optional arguments:
 
 ## Pretrained Model
 Pretrained FastSpeech2 model with no silence in the edge of audios:
-- [fastspeech2_vctk_ckpt_1.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_ckpt_1.2.0.zip)
+- [fastspeech2_vctk_ckpt_1.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_ckpt_1.2.0.zip)
 
 The static model can be downloaded here:
-- [fastspeech2_vctk_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_static_1.1.0.zip)
+- [fastspeech2_vctk_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_static_1.1.0.zip)
 
 The PIR static model can be downloaded here:
- - [fastspeech2_vctk_static_pir_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_static_pir_1.1.0.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)
+ - [fastspeech2_vctk_static_pir_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_static_pir_1.1.0.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)
 
 The ONNX model can be downloaded here:
-- [fastspeech2_vctk_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_onnx_1.1.0.zip)
+- [fastspeech2_vctk_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_onnx_1.1.0.zip)
 
 The Paddle-Lite model can be downloaded here:
-- [fastspeech2_vctk_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_pdlite_1.3.0.zip)
+- [fastspeech2_vctk_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_pdlite_1.3.0.zip)
 
 FastSpeech2 checkpoint contains files listed below.
 ```text
diff --git a/examples/vctk/vc3/README.md b/examples/vctk/vc3/README.md
index 83e1003c7..95c1e6a62 100644
--- a/examples/vctk/vc3/README.md
+++ b/examples/vctk/vc3/README.md
@@ -1,10 +1,10 @@
-You can download test source audios from [test_wav.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/starganv2vc/test_wav.zip).
+You can download test source audios from [test_wav.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/starganv2vc/test_wav.zip).
 
 
 Test Voice Conversion:
 
 ```bash
-wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/starganv2vc/test_wav.zip
+wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/starganv2vc/test_wav.zip
 unzip test_wav.zip
 ./run.sh --stage 2 --stop-stage 2 --gpus 0
-```
\ No newline at end of file
+```
diff --git a/examples/vctk/voc1/README.md b/examples/vctk/voc1/README.md
index 761f9bddb..87bcd835e 100644
--- a/examples/vctk/voc1/README.md
+++ b/examples/vctk/voc1/README.md
@@ -7,7 +7,7 @@ Download VCTK-0.92 from it's [Official Website](https://datashare.ed.ac.uk/handl
 
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) results to cut the silence in the edge of audio.
-You can download from here [vctk_alignment.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
+You can download from here [vctk_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
 ps: we remove three speakers in VCTK-0.92 (see [reorganize_vctk.py](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/other/mfa/local/reorganize_vctk.py)):
 1. `p315`, because of no text for it.
 2. `p280` and `p362`, because no *_mic2.flac (which is better than *_mic1.flac) for  them.
@@ -133,16 +133,16 @@ optional arguments:
 
 ## Pretrained Model
 Pretrained models can be downloaded here:
-- [pwg_vctk_ckpt_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_vctk_ckpt_0.1.1.zip)
+- [pwg_vctk_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_vctk_ckpt_0.1.1.zip)
 
 The static model can be downloaded here:
-- [pwgan_vctk_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_static_1.1.0.zip)
+- [pwgan_vctk_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_static_1.1.0.zip)
 
 The ONNX model can be downloaded here:
-- [pwgan_vctk_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_onnx_1.1.0.zip)
+- [pwgan_vctk_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_onnx_1.1.0.zip)
 
 The Paddle-Lite model can be downloaded here:
-- [pwgan_vctk_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_pdlite_1.3.0.zip)
+- [pwgan_vctk_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_pdlite_1.3.0.zip)
 
 
 Parallel WaveGAN checkpoint contains files listed below.
diff --git a/examples/vctk/voc5/README.md b/examples/vctk/voc5/README.md
index f91bc99aa..60aa1a82e 100644
--- a/examples/vctk/voc5/README.md
+++ b/examples/vctk/voc5/README.md
@@ -7,7 +7,7 @@ Download VCTK-0.92 from it's [Official Website](https://datashare.ed.ac.uk/handl
 
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) results to cut the silence in the edge of audio.
-You can download from here [vctk_alignment.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
+You can download from here [vctk_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
 ps: we remove three speakers in VCTK-0.92 (see [reorganize_vctk.py](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/other/mfa/local/reorganize_vctk.py)):
 1. `p315`, because of no text for it.
 2. `p280` and `p362`, because no *_mic2.flac (which is better than *_mic1.flac) for  them.
@@ -119,19 +119,19 @@ optional arguments:
 
 ## Pretrained Model
 The pretrained model can be downloaded here:
-- [hifigan_vctk_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip)
+- [hifigan_vctk_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip)
 
 The static model can be downloaded here:
-- [hifigan_vctk_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_static_1.1.0.zip)
+- [hifigan_vctk_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_static_1.1.0.zip)
 
 The PIR static model can be downloaded here:
-- [hifigan_vctk_static_pir_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_static_pir_1.1.0.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)
+- [hifigan_vctk_static_pir_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_static_pir_1.1.0.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)
 
 The ONNX model can be downloaded here:
-- [hifigan_vctk_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_onnx_1.1.0.zip)
+- [hifigan_vctk_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_onnx_1.1.0.zip)
 
 The Paddle-Lite model can be downloaded here:
-- [hifigan_vctk_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_pdlite_1.3.0.zip)
+- [hifigan_vctk_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_pdlite_1.3.0.zip)
 
 
 Model | Step | eval/generator_loss | eval/mel_loss| eval/feature_matching_loss
diff --git a/examples/voxceleb/sv0/README.md b/examples/voxceleb/sv0/README.md
index 7fe759ebc..de2ba187e 100644
--- a/examples/voxceleb/sv0/README.md
+++ b/examples/voxceleb/sv0/README.md
@@ -141,7 +141,7 @@ using the `tar` scripts to unpack the model and then you can use the script to t
 
 For example:
 ```
-wget https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1.tar.gz
+wget https://paddlespeech.cdn.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1.tar.gz
 tar -xvf sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1.tar.gz
 source path.sh
 # If you have processed the data and get the manifest file， you can skip the following 2 steps
diff --git a/examples/wenetspeech/asr1/README.md b/examples/wenetspeech/asr1/README.md
index 5a516f8ea..9b8db5678 100644
--- a/examples/wenetspeech/asr1/README.md
+++ b/examples/wenetspeech/asr1/README.md
@@ -27,11 +27,11 @@ tar cvzf asr1_chunk_conformer_u2_wenetspeech_ckpt_1.1.0.model.tar.gz model.yaml
 >> {"input": [{"name": "input1", "shape": [3.2230625, 80], "feat": "/home/PaddleSpeech/dataset/aishell/data_aishell/wav/test/S0764/BAC009S0764W0163.wav", "filetype": "sound"}], "output": [{"name": "target1", "shape": [9, 5538], "text": "\u697c\u5e02\u8c03\u63a7\u5c06\u53bb\u5411\u4f55\u65b9", "token": "\u697c \u5e02 \u8c03 \u63a7 \u5c06 \u53bb \u5411 \u4f55 \u65b9", "tokenid": "1891 1121 3502 1543 1018 477 528 163 1657"}], "utt": "BAC009S0764W0163", "utt2spk": "S0764"}
 
 >> Test Wav: 
->> wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+>> wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
 ### U2 chunk conformer
 >> UiDecoder
 >> Make sure `reverse_weight` in config is `0.0`
->> https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2_wenetspeech_ckpt_1.1.0.model.tar.gz
+>> https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2_wenetspeech_ckpt_1.1.0.model.tar.gz
 ```
 tar zxvf asr1_chunk_conformer_u2_wenetspeech_ckpt_1.1.0.model.tar.gz
 ./local/export.sh conf/chunk_conformer.yaml exp/chunk_conformer/checkpoints/avg_10 ./export.ji
@@ -39,7 +39,7 @@ tar zxvf asr1_chunk_conformer_u2_wenetspeech_ckpt_1.1.0.model.tar.gz
 
 ### U2++ chunk conformer
 >> BiDecoder
->> https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.1.0.model.tar.gz
+>> https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.1.0.model.tar.gz
 >> Make sure `reverse_weight` in config is not `0.0`
 
 ```
diff --git a/examples/wenetspeech/asr1/RESULTS.md b/examples/wenetspeech/asr1/RESULTS.md
index cd480163e..770fcbbe3 100644
--- a/examples/wenetspeech/asr1/RESULTS.md
+++ b/examples/wenetspeech/asr1/RESULTS.md
@@ -38,7 +38,7 @@ Pretrain model from http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/wen
 
 ## Conformer Steaming Pretrained Model
 
-Pretrain model from https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz
+Pretrain model from https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz
 
 | Model | Params | Config | Augmentation| Test set | Decode method | Chunk Size | CER |  
 | --- | --- | --- | --- | --- | --- | --- | --- |
@@ -57,7 +57,7 @@ Pretrain model from https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1
 
 ## U2PP Steaming Pretrained Model
 
-Pretrain model from https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.3.0.model.tar.gz
+Pretrain model from https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.3.0.model.tar.gz
 
 | Model | Params | Config | Augmentation| Test set | Decode method | Chunk Size | CER |  
 | --- | --- | --- | --- | --- | --- | --- | --- |
diff --git a/examples/wenetspeech/asr1/local/test_wav.sh b/examples/wenetspeech/asr1/local/test_wav.sh
index c3a17f491..5b8357496 100755
--- a/examples/wenetspeech/asr1/local/test_wav.sh
+++ b/examples/wenetspeech/asr1/local/test_wav.sh
@@ -14,7 +14,7 @@ ckpt_prefix=$3
 audio_file=$4
 
 mkdir -p data
-wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
+wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
 if [ $? -ne 0 ]; then
    exit 1
 fi
diff --git a/examples/zh_en_tts/tts3/README.md b/examples/zh_en_tts/tts3/README.md
index 9c3cd4079..7b1b25c30 100644
--- a/examples/zh_en_tts/tts3/README.md
+++ b/examples/zh_en_tts/tts3/README.md
@@ -15,10 +15,10 @@ Download all datasets and extract it to `./data`:
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for the fastspeech2 training.
 You can download from here:
-- [baker_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz)
-- [ljspeech_alignment.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz)
-- [aishell3_alignment_tone.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz) 
-- [vctk_alignment.tar.gz](https://paddlespeech.bj.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz)
+- [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz)
+- [ljspeech_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz)
+- [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz) 
+- [vctk_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz)
 
 Or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
 
@@ -121,9 +121,9 @@ We have **added module speaker classifier** with reference to [Learning to Speak
 
 ### Synthesizing
 We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc1) as the default neural vocoder.
-Download the pretrained parallel wavegan model from [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip) and unzip it.
+Download the pretrained parallel wavegan model from [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip) and unzip it.
 
-When speaker is `174` (csmsc), use csmsc's vocoder is better than aishell3's, we recommend that you use [hifigan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_ckpt_0.1.1.zip), please check `stage 2`  of `synthesize_e2e.sh`.
+When speaker is `174` (csmsc), use csmsc's vocoder is better than aishell3's, we recommend that you use [hifigan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_ckpt_0.1.1.zip), please check `stage 2`  of `synthesize_e2e.sh`.
 
 But if speaker is `175` (ljspeech), we **don't** recommend you to use ljspeech's vocoder, because ljspeech's vocoders are trained on sample rate 22.05kHz, but this acoustic model is trained on sample rate 24kHz, you can use csmsc's vocoder also, because ljspeech and csmsc are both female speakers.
 
@@ -254,17 +254,17 @@ optional arguments:
 ## Pretrained Model
 
 Pretrained FastSpeech2 model with no silence in the edge of audios:
-- [fastspeech2_mix_ckpt_1.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip)
-- [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip)
+- [fastspeech2_mix_ckpt_1.2.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip)
+- [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip)
 
 The static model can be downloaded here:
-- [fastspeech2_mix_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_0.2.0.zip)
+- [fastspeech2_mix_static_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_0.2.0.zip)
 
 The PIR static model can be downloaded here:
-- [fastspeech2_mix_static_pir_0.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_pir_0.2.0.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)
+- [fastspeech2_mix_static_pir_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_pir_0.2.0.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)
 
 The ONNX model can be downloaded here:
-- [fastspeech2_mix_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_onnx_0.2.0.zip)
+- [fastspeech2_mix_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_onnx_0.2.0.zip)
 
 FastSpeech2 checkpoint contains files listed below.
 
diff --git a/examples/zh_en_tts/tts3/local/mfa_download.sh b/examples/zh_en_tts/tts3/local/mfa_download.sh
index 1863c896d..c03228c2f 100755
--- a/examples/zh_en_tts/tts3/local/mfa_download.sh
+++ b/examples/zh_en_tts/tts3/local/mfa_download.sh
@@ -7,10 +7,10 @@ mkdir -p $mfa
 
 pushd $mfa
 
-wget -c https://paddlespeech.bj.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz &
-wget -c https://paddlespeech.bj.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz &
-wget -c https://paddlespeech.bj.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz &
-wget -c https://paddlespeech.bj.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz &
+wget -c https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz &
+wget -c https://paddlespeech.cdn.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz &
+wget -c https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz &
+wget -c https://paddlespeech.cdn.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz &
 wait
 
 popd
diff --git a/examples/zh_en_tts/tts3/local/model_download.sh b/examples/zh_en_tts/tts3/local/model_download.sh
index 21a218a83..450ba46bf 100755
--- a/examples/zh_en_tts/tts3/local/model_download.sh
+++ b/examples/zh_en_tts/tts3/local/model_download.sh
@@ -7,8 +7,8 @@ mkdir -p $pretrain
 
 pushd $pretrain
 
-wget -c https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip &
-wget -c https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip &
+wget -c https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip &
+wget -c https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip &
 wait
 
 popd
diff --git a/paddlespeech/cli/st/infer.py b/paddlespeech/cli/st/infer.py
index 0867e8158..7e9957129 100644
--- a/paddlespeech/cli/st/infer.py
+++ b/paddlespeech/cli/st/infer.py
@@ -38,7 +38,7 @@ __all__ = ["STExecutor"]
 
 kaldi_bins = {
     "url":
-    "https://paddlespeech.bj.bcebos.com/s2t/ted_en_zh/st1/kaldi_bins.tar.gz",
+    "https://paddlespeech.cdn.bcebos.com/s2t/ted_en_zh/st1/kaldi_bins.tar.gz",
     "md5":
     "c0682303b3f3393dbf6ed4c4e35a53eb",
 }
diff --git a/paddlespeech/resource/pretrained_models.py b/paddlespeech/resource/pretrained_models.py
index e539c0018..441512cdc 100644
--- a/paddlespeech/resource/pretrained_models.py
+++ b/paddlespeech/resource/pretrained_models.py
@@ -42,7 +42,7 @@ ssl_dynamic_pretrained_models = {
     "wav2vec2-en-16k": {
         '1.3': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr3/wav2vec2-large-960h-lv60-self_ckpt_1.3.0.model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr3/wav2vec2-large-960h-lv60-self_ckpt_1.3.0.model.tar.gz',
             'md5':
             'acc46900680e341e500437aa59193518',
             'cfg_path':
@@ -58,7 +58,7 @@ ssl_dynamic_pretrained_models = {
     "wav2vec2ASR_librispeech-en-16k": {
         '1.3': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr3/wav2vec2ASR-large-960h-librispeech_ckpt_1.3.1.model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr3/wav2vec2ASR-large-960h-librispeech_ckpt_1.3.1.model.tar.gz',
             'md5':
             'cbe28d6c78f3dd2e189968402381f454',
             'cfg_path':
@@ -74,7 +74,7 @@ ssl_dynamic_pretrained_models = {
     "wav2vec2-zh-16k": {
         '1.3': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/aishell/asr3/wav2vec2-large-wenetspeech-self_ckpt_1.3.0.model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr3/wav2vec2-large-wenetspeech-self_ckpt_1.3.0.model.tar.gz',
             'md5':
             '00ea4975c05d1bb58181205674052fe1',
             'cfg_path':
@@ -90,7 +90,7 @@ ssl_dynamic_pretrained_models = {
     "wav2vec2ASR_aishell1-zh-16k": {
         '1.3': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.3.0.model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.3.0.model.tar.gz',
             'md5':
             'ac8fa0a6345e6a7535f6fabb5e59e218',
             'cfg_path':
@@ -104,7 +104,7 @@ ssl_dynamic_pretrained_models = {
         },
         '1.4': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz',
             'md5':
             '150e51b8ea5d255ccce6b395de8d916a',
             'cfg_path':
@@ -120,7 +120,7 @@ ssl_dynamic_pretrained_models = {
     "hubert-en-16k": {
         '1.4': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/hubert/hubert-large-lv60_ckpt_1.4.0.model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/hubert/hubert-large-lv60_ckpt_1.4.0.model.tar.gz',
             'md5':
             'efecfb87a8718aa9253b7459c1fe9b54',
             'cfg_path':
@@ -136,7 +136,7 @@ ssl_dynamic_pretrained_models = {
     "hubertASR_librispeech-100h-en-16k": {
         '1.4': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/hubert/hubertASR-large-100h-librispeech_ckpt_1.4.0.model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/hubert/hubertASR-large-100h-librispeech_ckpt_1.4.0.model.tar.gz',
             'md5':
             '574cefd11aaef5737969ce22a7f33ea2',
             'cfg_path':
@@ -151,12 +151,18 @@ ssl_dynamic_pretrained_models = {
     },
     "wavlmASR_librispeech-en-16k": {
         "1.0": {
-            "url": "https://paddlespeech.bj.bcebos.com/wavlm/wavlm_baseplus_libriclean_100h.tar.gz",
-            "md5": "f2238e982bb8bcf046e536201f5ea629",
-            "cfg_path": "model.yaml",
-            "ckpt_path": "exp/wavlmASR/checkpoints/46",
-            "model": "exp/wavlmASR/checkpoints/46.pdparams",
-            "params": "exp/wavlmASR/checkpoints/46.pdparams",
+            "url":
+            "https://paddlespeech.cdn.bcebos.com/wavlm/wavlm_baseplus_libriclean_100h.tar.gz",
+            "md5":
+            "f2238e982bb8bcf046e536201f5ea629",
+            "cfg_path":
+            "model.yaml",
+            "ckpt_path":
+            "exp/wavlmASR/checkpoints/46",
+            "model":
+            "exp/wavlmASR/checkpoints/46.pdparams",
+            "params":
+            "exp/wavlmASR/checkpoints/46.pdparams",
         }
     }
 }
@@ -168,7 +174,7 @@ asr_dynamic_pretrained_models = {
     "conformer_wenetspeech-zh-16k": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1_conformer_wenetspeech_ckpt_0.1.1.model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1_conformer_wenetspeech_ckpt_0.1.1.model.tar.gz',
             'md5':
             '76cb19ed857e6623856b7cd7ebbfeda4',
             'cfg_path':
@@ -180,7 +186,7 @@ asr_dynamic_pretrained_models = {
     "conformer_online_wenetspeech-zh-16k": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz',
             'md5':
             'b8c02632b04da34aca88459835be54a6',
             'cfg_path':
@@ -200,7 +206,7 @@ asr_dynamic_pretrained_models = {
     "conformer_u2pp_online_wenetspeech-zh-16k": {
         '1.3': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.3.0.model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.3.0.model.tar.gz',
             'md5':
             '62d230c1bf27731192aa9d3b8deca300',
             'cfg_path':
@@ -220,7 +226,7 @@ asr_dynamic_pretrained_models = {
     "conformer_online_multicn-zh-16k": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/multi_cn/asr1/asr1_chunk_conformer_multi_cn_ckpt_0.2.0.model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/s2t/multi_cn/asr1/asr1_chunk_conformer_multi_cn_ckpt_0.2.0.model.tar.gz',
             'md5':
             '7989b3248c898070904cf042fd656003',
             'cfg_path':
@@ -230,7 +236,7 @@ asr_dynamic_pretrained_models = {
         },
         '2.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/multi_cn/asr1/asr1_chunk_conformer_multi_cn_ckpt_0.2.3.model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/s2t/multi_cn/asr1/asr1_chunk_conformer_multi_cn_ckpt_0.2.3.model.tar.gz',
             'md5':
             '0ac93d390552336f2a906aec9e33c5fa',
             'cfg_path':
@@ -250,7 +256,7 @@ asr_dynamic_pretrained_models = {
     "conformer_aishell-zh-16k": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_conformer_aishell_ckpt_0.1.2.model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr1/asr1_conformer_aishell_ckpt_0.1.2.model.tar.gz',
             'md5':
             '3f073eccfa7bb14e0c6867d65fc0dc3a',
             'cfg_path':
@@ -262,7 +268,7 @@ asr_dynamic_pretrained_models = {
     "conformer_online_aishell-zh-16k": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_chunk_conformer_aishell_ckpt_0.2.0.model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr1/asr1_chunk_conformer_aishell_ckpt_0.2.0.model.tar.gz',
             'md5':
             'b374cfb93537761270b6224fb0bfc26a',
             'cfg_path':
@@ -272,7 +278,7 @@ asr_dynamic_pretrained_models = {
         },
         '1.4': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_conformer_aishell_ckpt_1.5.0.model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr1/asr1_conformer_aishell_ckpt_1.5.0.model.tar.gz',
             'md5':
             '38924b8adc28ef458847c3571e87e3cb',
             'cfg_path':
@@ -284,7 +290,7 @@ asr_dynamic_pretrained_models = {
     "transformer_librispeech-en-16k": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr1/asr1_transformer_librispeech_ckpt_0.1.1.model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr1/asr1_transformer_librispeech_ckpt_0.1.1.model.tar.gz',
             'md5':
             '2c667da24922aad391eacafe37bc1660',
             'cfg_path':
@@ -296,7 +302,7 @@ asr_dynamic_pretrained_models = {
     "deepspeech2online_wenetspeech-zh-16k": {
         '1.0.3': {
             'url':
-            'http://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.3.model.tar.gz',
+            'http://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.3.model.tar.gz',
             'md5':
             'cfe273793e68f790f742b411c98bc75e',
             'cfg_path':
@@ -316,7 +322,7 @@ asr_dynamic_pretrained_models = {
         },
         '1.0.4': {
             'url':
-            'http://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.4.model.tar.gz',
+            'http://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.4.model.tar.gz',
             'md5':
             'c595cb76902b5a5d01409171375989f4',
             'cfg_path':
@@ -338,7 +344,7 @@ asr_dynamic_pretrained_models = {
     "deepspeech2offline_aishell-zh-16k": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz',
             'md5':
             '4d26066c6f19f52087425dc722ae5b13',
             'cfg_path':
@@ -354,7 +360,7 @@ asr_dynamic_pretrained_models = {
     "deepspeech2online_aishell-zh-16k": {
         '1.0.2': {
             'url':
-            'http://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_1.0.2.model.tar.gz',
+            'http://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_1.0.2.model.tar.gz',
             'md5':
             '4dd42cfce9aaa54db0ec698da6c48ec5',
             'cfg_path':
@@ -376,7 +382,7 @@ asr_dynamic_pretrained_models = {
     "deepspeech2offline_librispeech-en-16k": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr0/asr0_deepspeech2_offline_librispeech_ckpt_1.0.1.model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr0/asr0_deepspeech2_offline_librispeech_ckpt_1.0.1.model.tar.gz',
             'md5':
             'ed9e2b008a65268b3484020281ab048c',
             'cfg_path':
@@ -392,7 +398,7 @@ asr_dynamic_pretrained_models = {
     "conformer_talcs-codeswitch_zh_en-16k": {
         '1.4': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/tal_cs/asr1/asr1_conformer_talcs_ckpt_1.4.0.model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/s2t/tal_cs/asr1/asr1_conformer_talcs_ckpt_1.4.0.model.tar.gz',
             'md5':
             '01962c5d0a70878fe41cacd4f61e14d1',
             'cfg_path':
@@ -407,7 +413,7 @@ asr_static_pretrained_models = {
     "deepspeech2offline_aishell-zh-16k": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz',
             'md5':
             '4d26066c6f19f52087425dc722ae5b13',
             'cfg_path':
@@ -427,7 +433,7 @@ asr_static_pretrained_models = {
     "deepspeech2online_aishell-zh-16k": {
         '1.0.1': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_1.0.1.model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_1.0.1.model.tar.gz',
             'md5':
             'df5ddeac8b679a470176649ac4b78726',
             'cfg_path':
@@ -445,7 +451,7 @@ asr_static_pretrained_models = {
         },
         '1.0.2': {
             'url':
-            'http://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_1.0.2.model.tar.gz',
+            'http://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_1.0.2.model.tar.gz',
             'md5':
             '4dd42cfce9aaa54db0ec698da6c48ec5',
             'cfg_path':
@@ -467,7 +473,7 @@ asr_static_pretrained_models = {
     "deepspeech2online_wenetspeech-zh-16k": {
         '1.0.3': {
             'url':
-            'http://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.3.model.tar.gz',
+            'http://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.3.model.tar.gz',
             'md5':
             'cfe273793e68f790f742b411c98bc75e',
             'cfg_path':
@@ -487,7 +493,7 @@ asr_static_pretrained_models = {
         },
         '1.0.4': {
             'url':
-            'http://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.4.model.tar.gz',
+            'http://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.4.model.tar.gz',
             'md5':
             'c595cb76902b5a5d01409171375989f4',
             'cfg_path':
@@ -512,7 +518,7 @@ asr_onnx_pretrained_models = {
     "deepspeech2online_aishell-zh-16k": {
         '1.0.2': {
             'url':
-            'http://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_1.0.2.model.tar.gz',
+            'http://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_1.0.2.model.tar.gz',
             'md5':
             '4dd42cfce9aaa54db0ec698da6c48ec5',
             'cfg_path':
@@ -534,7 +540,7 @@ asr_onnx_pretrained_models = {
     "deepspeech2online_wenetspeech-zh-16k": {
         '1.0.3': {
             'url':
-            'http://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.3.model.tar.gz',
+            'http://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.3.model.tar.gz',
             'md5':
             'cfe273793e68f790f742b411c98bc75e',
             'cfg_path':
@@ -554,7 +560,7 @@ asr_onnx_pretrained_models = {
         },
         '1.0.4': {
             'url':
-            'http://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.4.model.tar.gz',
+            'http://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.4.model.tar.gz',
             'md5':
             'c595cb76902b5a5d01409171375989f4',
             'cfg_path':
@@ -579,7 +585,7 @@ whisper_dynamic_pretrained_models = {
     "whisper-large-16k": {
         '1.3': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221122/whisper-large-model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-large-model.tar.gz',
             'md5':
             'cf1557af9d8ffa493fefad9cb08ae189',
             'cfg_path':
@@ -591,7 +597,7 @@ whisper_dynamic_pretrained_models = {
             'params':
             'whisper-large-model.pdparams',
             'resource_data':
-            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221108/assets.tar',
+            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221108/assets.tar',
             'resource_data_md5':
             '37a0a8abdb3641a51194f79567a93b61',
         },
@@ -599,7 +605,7 @@ whisper_dynamic_pretrained_models = {
     "whisper-base-en-16k": {
         '1.3': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221122/whisper-base-en-model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-base-en-model.tar.gz',
             'md5':
             'b156529aefde6beb7726d2ea98fd067a',
             'cfg_path':
@@ -611,7 +617,7 @@ whisper_dynamic_pretrained_models = {
             'params':
             'whisper-base-en-model.pdparams',
             'resource_data':
-            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221108/assets.tar',
+            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221108/assets.tar',
             'resource_data_md5':
             '37a0a8abdb3641a51194f79567a93b61',
         },
@@ -619,7 +625,7 @@ whisper_dynamic_pretrained_models = {
     "whisper-base-16k": {
         '1.3': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221122/whisper-base-model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-base-model.tar.gz',
             'md5':
             '6b012a5abd583db14398c3492e47120b',
             'cfg_path':
@@ -631,7 +637,7 @@ whisper_dynamic_pretrained_models = {
             'params':
             'whisper-base-model.pdparams',
             'resource_data':
-            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221108/assets.tar',
+            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221108/assets.tar',
             'resource_data_md5':
             '37a0a8abdb3641a51194f79567a93b61',
         },
@@ -639,7 +645,7 @@ whisper_dynamic_pretrained_models = {
     "whisper-medium-en-16k": {
         '1.3': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221122/whisper-medium-en-model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-medium-en-model.tar.gz',
             'md5':
             'c7f57d270bd20c7b170ba9dcf6c16f74',
             'cfg_path':
@@ -651,7 +657,7 @@ whisper_dynamic_pretrained_models = {
             'params':
             'whisper-medium-en-model.pdparams',
             'resource_data':
-            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221108/assets.tar',
+            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221108/assets.tar',
             'resource_data_md5':
             '37a0a8abdb3641a51194f79567a93b61',
         },
@@ -659,7 +665,7 @@ whisper_dynamic_pretrained_models = {
     "whisper-medium-16k": {
         '1.3': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221122/whisper-medium-model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-medium-model.tar.gz',
             'md5':
             '4c7dcd0df25f408199db4a4548336786',
             'cfg_path':
@@ -671,7 +677,7 @@ whisper_dynamic_pretrained_models = {
             'params':
             'whisper-medium-model.pdparams',
             'resource_data':
-            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221108/assets.tar',
+            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221108/assets.tar',
             'resource_data_md5':
             '37a0a8abdb3641a51194f79567a93b61',
         },
@@ -679,7 +685,7 @@ whisper_dynamic_pretrained_models = {
     "whisper-small-en-16k": {
         '1.3': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221122/whisper-small-en-model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-small-en-model.tar.gz',
             'md5':
             '2b24efcb2e93f3275af7c0c7f598ff1c',
             'cfg_path':
@@ -691,7 +697,7 @@ whisper_dynamic_pretrained_models = {
             'params':
             'whisper-small-en-model.pdparams',
             'resource_data':
-            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221108/assets.tar',
+            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221108/assets.tar',
             'resource_data_md5':
             '37a0a8abdb3641a51194f79567a93b61',
         },
@@ -699,7 +705,7 @@ whisper_dynamic_pretrained_models = {
     "whisper-small-16k": {
         '1.3': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221122/whisper-small-model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-small-model.tar.gz',
             'md5':
             '5a57911dd41651dd6ed78c5763912825',
             'cfg_path':
@@ -711,7 +717,7 @@ whisper_dynamic_pretrained_models = {
             'params':
             'whisper-small-model.pdparams',
             'resource_data':
-            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221108/assets.tar',
+            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221108/assets.tar',
             'resource_data_md5':
             '37a0a8abdb3641a51194f79567a93b61',
         },
@@ -719,7 +725,7 @@ whisper_dynamic_pretrained_models = {
     "whisper-tiny-en-16k": {
         '1.3': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221122/whisper-tiny-en-model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-tiny-en-model.tar.gz',
             'md5':
             '14969164a3f713fd58e56978c34188f6',
             'cfg_path':
@@ -731,7 +737,7 @@ whisper_dynamic_pretrained_models = {
             'params':
             'whisper-tiny-en-model.pdparams',
             'resource_data':
-            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221108/assets.tar',
+            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221108/assets.tar',
             'resource_data_md5':
             '37a0a8abdb3641a51194f79567a93b61',
         },
@@ -739,7 +745,7 @@ whisper_dynamic_pretrained_models = {
     "whisper-tiny-16k": {
         '1.3': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221122/whisper-tiny-model.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-tiny-model.tar.gz',
             'md5':
             'a5b82a1f2067a2ca400f17fabd62b81b',
             'cfg_path':
@@ -751,7 +757,7 @@ whisper_dynamic_pretrained_models = {
             'params':
             'whisper-tiny-model.pdparams',
             'resource_data':
-            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20221108/assets.tar',
+            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221108/assets.tar',
             'resource_data_md5':
             '37a0a8abdb3641a51194f79567a93b61',
         },
@@ -764,7 +770,7 @@ whisper_dynamic_pretrained_models = {
 cls_dynamic_pretrained_models = {
     "panns_cnn6-32k": {
         '1.0': {
-            'url': 'https://paddlespeech.bj.bcebos.com/cls/panns_cnn6.tar.gz',
+            'url': 'https://paddlespeech.cdn.bcebos.com/cls/panns_cnn6.tar.gz',
             'md5': '4cf09194a95df024fd12f84712cf0f9c',
             'cfg_path': 'panns.yaml',
             'ckpt_path': 'cnn6.pdparams',
@@ -773,7 +779,7 @@ cls_dynamic_pretrained_models = {
     },
     "panns_cnn10-32k": {
         '1.0': {
-            'url': 'https://paddlespeech.bj.bcebos.com/cls/panns_cnn10.tar.gz',
+            'url': 'https://paddlespeech.cdn.bcebos.com/cls/panns_cnn10.tar.gz',
             'md5': 'cb8427b22176cc2116367d14847f5413',
             'cfg_path': 'panns.yaml',
             'ckpt_path': 'cnn10.pdparams',
@@ -782,7 +788,7 @@ cls_dynamic_pretrained_models = {
     },
     "panns_cnn14-32k": {
         '1.0': {
-            'url': 'https://paddlespeech.bj.bcebos.com/cls/panns_cnn14.tar.gz',
+            'url': 'https://paddlespeech.cdn.bcebos.com/cls/panns_cnn14.tar.gz',
             'md5': 'e3b9b5614a1595001161d0ab95edee97',
             'cfg_path': 'panns.yaml',
             'ckpt_path': 'cnn14.pdparams',
@@ -795,7 +801,7 @@ cls_static_pretrained_models = {
     "panns_cnn6-32k": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn6_static.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/cls/inference_model/panns_cnn6_static.tar.gz',
             'md5':
             'da087c31046d23281d8ec5188c1967da',
             'cfg_path':
@@ -811,7 +817,7 @@ cls_static_pretrained_models = {
     "panns_cnn10-32k": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn10_static.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/cls/inference_model/panns_cnn10_static.tar.gz',
             'md5':
             '5460cc6eafbfaf0f261cc75b90284ae1',
             'cfg_path':
@@ -827,7 +833,7 @@ cls_static_pretrained_models = {
     "panns_cnn14-32k": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn14_static.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/cls/inference_model/panns_cnn14_static.tar.gz',
             'md5':
             'ccc80b194821274da79466862b2ab00f',
             'cfg_path':
@@ -849,7 +855,7 @@ st_dynamic_pretrained_models = {
     "fat_st_ted-en-zh": {
         '1.0': {
             "url":
-            "https://paddlespeech.bj.bcebos.com/s2t/ted_en_zh/st1/st1_transformer_mtl_noam_ted-en-zh_ckpt_0.1.1.model.tar.gz",
+            "https://paddlespeech.cdn.bcebos.com/s2t/ted_en_zh/st1/st1_transformer_mtl_noam_ted-en-zh_ckpt_0.1.1.model.tar.gz",
             "md5":
             "d62063f35a16d91210a71081bd2dd557",
             "cfg_path":
@@ -862,7 +868,7 @@ st_dynamic_pretrained_models = {
 
 st_kaldi_bins = {
     "url":
-    "https://paddlespeech.bj.bcebos.com/s2t/ted_en_zh/st1/kaldi_bins.tar.gz",
+    "https://paddlespeech.cdn.bcebos.com/s2t/ted_en_zh/st1/kaldi_bins.tar.gz",
     "md5":
     "c0682303b3f3393dbf6ed4c4e35a53eb",
 }
@@ -874,7 +880,7 @@ text_dynamic_pretrained_models = {
     "ernie_linear_p7_wudao-punc-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/text/ernie_linear_p7_wudao-punc-zh.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/text/ernie_linear_p7_wudao-punc-zh.tar.gz',
             'md5':
             '12283e2ddde1797c5d1e57036b512746',
             'cfg_path':
@@ -888,7 +894,7 @@ text_dynamic_pretrained_models = {
     "ernie_linear_p3_wudao-punc-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/text/ernie_linear_p3_wudao-punc-zh.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/text/ernie_linear_p3_wudao-punc-zh.tar.gz',
             'md5':
             '448eb2fdf85b6a997e7e652e80c51dd2',
             'cfg_path':
@@ -902,7 +908,7 @@ text_dynamic_pretrained_models = {
     "ernie_linear_p3_wudao_fast-punc-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/text/ernie_linear_p3_wudao_fast-punc-zh.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/text/ernie_linear_p3_wudao_fast-punc-zh.tar.gz',
             'md5':
             'c93f9594119541a5dbd763381a751d08',
             'cfg_path':
@@ -923,7 +929,7 @@ tts_dynamic_pretrained_models = {
     "speedyspeech_csmsc-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_ckpt_0.2.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_ckpt_0.2.0.zip',
             'md5':
             '6f6fa967b408454b6662c8c00c0027cb',
             'config':
@@ -942,7 +948,7 @@ tts_dynamic_pretrained_models = {
     "fastspeech2_csmsc-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip',
             'md5':
             '637d28a5e53aa60275612ba4393d5f22',
             'config':
@@ -958,7 +964,7 @@ tts_dynamic_pretrained_models = {
     "fastspeech2_canton-canton": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_ckpt_1.4.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_ckpt_1.4.0.zip',
             'md5':
             '504560c082deba82120927627c900374',
             'config':
@@ -976,7 +982,7 @@ tts_dynamic_pretrained_models = {
     "fastspeech2_ljspeech-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip',
             'md5':
             'ffed800c93deaf16ca9b3af89bfcd747',
             'config':
@@ -992,7 +998,7 @@ tts_dynamic_pretrained_models = {
     "fastspeech2_aishell3-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_aishell3_ckpt_0.4.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_aishell3_ckpt_0.4.zip',
             'md5':
             'f4dd4a5f49a4552b77981f544ab3392e',
             'config':
@@ -1010,7 +1016,7 @@ tts_dynamic_pretrained_models = {
     "fastspeech2_vctk-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_vctk_ckpt_0.5.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_vctk_ckpt_0.5.zip',
             'md5':
             '743e5024ca1e17a88c5c271db9779ba4',
             'config':
@@ -1028,7 +1034,7 @@ tts_dynamic_pretrained_models = {
     "fastspeech2_cnndecoder_csmsc-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_ckpt_1.0.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_ckpt_1.0.0.zip',
             'md5':
             '6eb28e22ace73e0ebe7845f86478f89f',
             'config':
@@ -1044,7 +1050,7 @@ tts_dynamic_pretrained_models = {
     "fastspeech2_mix-mix": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_csmscljspeech_add-zhen.zip',
+            'https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_csmscljspeech_add-zhen.zip',
             'md5':
             '77d9d4b5a79ed6203339ead7ef6c74f9',
             'config':
@@ -1060,7 +1066,7 @@ tts_dynamic_pretrained_models = {
         },
         '2.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_0.2.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_0.2.0.zip',
             'md5':
             '1d938e104e972386c8bfcbcc98a91587',
             'config':
@@ -1078,7 +1084,7 @@ tts_dynamic_pretrained_models = {
     "fastspeech2_male-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_ckpt_1.4.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_ckpt_1.4.0.zip',
             'md5':
             '43a9f4bc48a91f5a6f53017474e6c788',
             'config':
@@ -1094,7 +1100,7 @@ tts_dynamic_pretrained_models = {
     "fastspeech2_male-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_ckpt_1.4.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_ckpt_1.4.0.zip',
             'md5':
             'cc9f44f1f20a8173f63e2d1d41ef1a9c',
             'config':
@@ -1110,7 +1116,7 @@ tts_dynamic_pretrained_models = {
     "fastspeech2_male-mix": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_ckpt_1.4.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_ckpt_1.4.0.zip',
             'md5':
             '6d48ad60ef0ab2cee89a5d8cfd93dd86',
             'config':
@@ -1127,7 +1133,7 @@ tts_dynamic_pretrained_models = {
     "tacotron2_csmsc-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_csmsc_ckpt_0.2.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_csmsc_ckpt_0.2.0.zip',
             'md5':
             '0df4b6f0bcbe0d73c5ed6df8867ab91a',
             'config':
@@ -1143,7 +1149,7 @@ tts_dynamic_pretrained_models = {
     "tacotron2_ljspeech-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_ljspeech_ckpt_0.2.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_ljspeech_ckpt_0.2.0.zip',
             'md5':
             '6a5eddd81ae0e81d16959b97481135f3',
             'config':
@@ -1160,7 +1166,7 @@ tts_dynamic_pretrained_models = {
     "pwgan_csmsc-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip',
             'md5':
             '2e481633325b5bdf0a3823c714d2c117',
             'config':
@@ -1174,7 +1180,7 @@ tts_dynamic_pretrained_models = {
     "pwgan_ljspeech-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_ljspeech_ckpt_0.5.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_ljspeech_ckpt_0.5.zip',
             'md5':
             '53610ba9708fd3008ccaf8e99dacbaf0',
             'config':
@@ -1188,7 +1194,7 @@ tts_dynamic_pretrained_models = {
     "pwgan_aishell3-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip',
             'md5':
             'd7598fa41ad362d62f85ffc0f07e3d84',
             'config':
@@ -1202,7 +1208,7 @@ tts_dynamic_pretrained_models = {
     "pwgan_vctk-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_vctk_ckpt_0.1.1.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_vctk_ckpt_0.1.1.zip',
             'md5':
             'b3da1defcde3e578be71eb284cb89f2c',
             'config':
@@ -1216,7 +1222,7 @@ tts_dynamic_pretrained_models = {
     "pwgan_male-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_male_ckpt_1.4.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_male_ckpt_1.4.0.zip',
             'md5':
             'a443d6253bf9be377f27ae5972a03c65',
             'config':
@@ -1231,7 +1237,7 @@ tts_dynamic_pretrained_models = {
     "mb_melgan_csmsc-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_ckpt_0.1.1.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_ckpt_0.1.1.zip',
             'md5':
             'ee5f0604e20091f0d495b6ec4618b90d',
             'config':
@@ -1246,7 +1252,7 @@ tts_dynamic_pretrained_models = {
     "style_melgan_csmsc-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/style_melgan/style_melgan_csmsc_ckpt_0.1.1.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/style_melgan/style_melgan_csmsc_ckpt_0.1.1.zip',
             'md5':
             '5de2d5348f396de0c966926b8c462755',
             'config':
@@ -1261,7 +1267,7 @@ tts_dynamic_pretrained_models = {
     "hifigan_csmsc-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_ckpt_0.1.1.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_ckpt_0.1.1.zip',
             'md5':
             'dd40a3d88dfcf64513fba2f0f961ada6',
             'config':
@@ -1275,7 +1281,7 @@ tts_dynamic_pretrained_models = {
     "hifigan_ljspeech-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_ckpt_0.2.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_ckpt_0.2.0.zip',
             'md5':
             '70e9131695decbca06a65fe51ed38a72',
             'config':
@@ -1289,7 +1295,7 @@ tts_dynamic_pretrained_models = {
     "hifigan_aishell3-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip',
             'md5':
             '3bb49bc75032ed12f79c00c8cc79a09a',
             'config':
@@ -1303,7 +1309,7 @@ tts_dynamic_pretrained_models = {
     "hifigan_vctk-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip',
             'md5':
             '7da8f88359bca2457e705d924cf27bd4',
             'config':
@@ -1317,7 +1323,7 @@ tts_dynamic_pretrained_models = {
     "hifigan_male-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_ckpt_1.4.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_ckpt_1.4.0.zip',
             'md5':
             'a709830596e102c2b83f8adc26d41d85',
             'config':
@@ -1332,7 +1338,7 @@ tts_dynamic_pretrained_models = {
     "wavernn_csmsc-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_ckpt_0.2.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_ckpt_0.2.0.zip',
             'md5':
             'ee37b752f09bcba8f2af3b777ca38e13',
             'config':
@@ -1359,7 +1365,7 @@ tts_static_pretrained_models = {
     "speedyspeech_csmsc-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_nosil_baker_static_0.5.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_nosil_baker_static_0.5.zip',
             'md5':
             'f10cbdedf47dc7a9668d2264494e1823',
             'model':
@@ -1378,7 +1384,7 @@ tts_static_pretrained_models = {
     "fastspeech2_csmsc-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip',
             'md5':
             '9788cd9745e14c7a5d12d32670b2a5a7',
             'model':
@@ -1394,7 +1400,7 @@ tts_static_pretrained_models = {
     "fastspeech2_ljspeech-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_static_1.1.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_static_1.1.0.zip',
             'md5':
             'c49f70b52973423ec45aaa6184fb5bc6',
             'model':
@@ -1410,7 +1416,7 @@ tts_static_pretrained_models = {
     "fastspeech2_aishell3-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_static_1.1.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_static_1.1.0.zip',
             'md5':
             '695af44679f48eb4abc159977ddaee16',
             'model':
@@ -1428,7 +1434,7 @@ tts_static_pretrained_models = {
     "fastspeech2_vctk-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_static_1.1.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_static_1.1.0.zip',
             'md5':
             '92d8c082f180bda2fd05a534fb4a1b62',
             'model':
@@ -1446,7 +1452,7 @@ tts_static_pretrained_models = {
     "fastspeech2_mix-mix": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_csmscljspeech_add-zhen_static.zip',
+            'https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_csmscljspeech_add-zhen_static.zip',
             'md5':
             'b5001f66cccafdde07707e1b6269fa58',
             'model':
@@ -1462,7 +1468,7 @@ tts_static_pretrained_models = {
         },
         '2.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_0.2.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_0.2.0.zip',
             'md5':
             'c6dd138fab3ba261299c0b2efee51d5a',
             'model':
@@ -1480,7 +1486,7 @@ tts_static_pretrained_models = {
     "fastspeech2_male-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_static_1.4.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_static_1.4.0.zip',
             'md5':
             '9b7218829e7fa01aa33dbb2c5f6ef20f',
             'model':
@@ -1496,7 +1502,7 @@ tts_static_pretrained_models = {
     "fastspeech2_male-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_static_1.4.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_static_1.4.0.zip',
             'md5':
             '33cea19b6821b371d242969ffd8b6cbf',
             'model':
@@ -1512,7 +1518,7 @@ tts_static_pretrained_models = {
     "fastspeech2_male-mix": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_static_1.4.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_static_1.4.0.zip',
             'md5':
             '66585b04c0ced72f3cb82ee85b814d80',
             'model':
@@ -1528,7 +1534,7 @@ tts_static_pretrained_models = {
     "fastspeech2_canton-canton": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_static_1.4.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_static_1.4.0.zip',
             'md5':
             '5da80931666503b9b6aed25e894d2ade',
             'model':
@@ -1547,7 +1553,7 @@ tts_static_pretrained_models = {
     "pwgan_csmsc-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_static_0.4.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_static_0.4.zip',
             'md5':
             'e3504aed9c5a290be12d1347836d2742',
             'model':
@@ -1561,7 +1567,7 @@ tts_static_pretrained_models = {
     "pwgan_ljspeech-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_static_1.1.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_static_1.1.0.zip',
             'md5':
             '6f457a069da99c6814ac1fb4677281e4',
             'model':
@@ -1575,7 +1581,7 @@ tts_static_pretrained_models = {
     "pwgan_aishell3-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_static_1.1.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_static_1.1.0.zip',
             'md5':
             '199f64010238275fbdacb326a5cf82d1',
             'model':
@@ -1589,7 +1595,7 @@ tts_static_pretrained_models = {
     "pwgan_vctk-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_static_1.1.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_static_1.1.0.zip',
             'md5':
             'ee0fc571ad5a7fbe4ca20e49df22b819',
             'model':
@@ -1603,7 +1609,7 @@ tts_static_pretrained_models = {
     "pwgan_male-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_male_static_1.4.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_male_static_1.4.0.zip',
             'md5':
             '52a480ad35694b96603e0a92e9fb3f95',
             'model':
@@ -1618,7 +1624,7 @@ tts_static_pretrained_models = {
     "mb_melgan_csmsc-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_static_0.1.1.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_static_0.1.1.zip',
             'md5':
             'ac6eee94ba483421d750433f4c3b8d36',
             'model':
@@ -1633,7 +1639,7 @@ tts_static_pretrained_models = {
     "hifigan_csmsc-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_static_0.1.1.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_static_0.1.1.zip',
             'md5':
             '7edd8c436b3a5546b3a7cb8cff9d5a0c',
             'model':
@@ -1647,7 +1653,7 @@ tts_static_pretrained_models = {
     "hifigan_ljspeech-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_static_1.1.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_static_1.1.0.zip',
             'md5':
             '8c674e79be7c45f6eda74825316438a0',
             'model':
@@ -1661,7 +1667,7 @@ tts_static_pretrained_models = {
     "hifigan_aishell3-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_static_1.1.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_static_1.1.0.zip',
             'md5':
             '7a10ec5d8d851e2000128f040d30cc01',
             'model':
@@ -1675,7 +1681,7 @@ tts_static_pretrained_models = {
     "hifigan_vctk-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_static_1.1.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_static_1.1.0.zip',
             'md5':
             '130f791dfac84ccdd44ccbdfb67bf08e',
             'model':
@@ -1689,7 +1695,7 @@ tts_static_pretrained_models = {
     "hifigan_male-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_static_1.4.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_static_1.4.0.zip',
             'md5':
             '9011fa2738b501e909d1a61054bed29b',
             'model':
@@ -1718,7 +1724,7 @@ tts_onnx_pretrained_models = {
     "speedyspeech_csmsc_onnx-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_onnx_0.2.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_onnx_0.2.0.zip',
             'md5':
             '3e9c45af9ef70675fc1968ed5074fc88',
             'ckpt':
@@ -1735,7 +1741,7 @@ tts_onnx_pretrained_models = {
     "fastspeech2_csmsc_onnx-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip',
             'md5':
             'fd3ad38d83273ad51f0ea4f4abf3ab4e',
             'ckpt':
@@ -1749,7 +1755,7 @@ tts_onnx_pretrained_models = {
     "fastspeech2_ljspeech_onnx-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_onnx_1.1.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_onnx_1.1.0.zip',
             'md5':
             '00754307636a48c972a5f3e65cda3d18',
             'ckpt':
@@ -1763,7 +1769,7 @@ tts_onnx_pretrained_models = {
     "fastspeech2_aishell3_onnx-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_onnx_1.1.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_onnx_1.1.0.zip',
             'md5':
             'a1d6ee21de897ce394f5469e2bb4df0d',
             'ckpt':
@@ -1779,7 +1785,7 @@ tts_onnx_pretrained_models = {
     "fastspeech2_vctk_onnx-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_onnx_1.1.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_onnx_1.1.0.zip',
             'md5':
             'd9c3a9b02204a2070504dd99f5f959bf',
             'ckpt':
@@ -1795,7 +1801,7 @@ tts_onnx_pretrained_models = {
     "fastspeech2_cnndecoder_csmsc_onnx-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip',
             'md5':
             '5f70e1a6bcd29d72d54e7931aa86f266',
             'ckpt': [
@@ -1814,7 +1820,7 @@ tts_onnx_pretrained_models = {
     "fastspeech2_mix_onnx-mix": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_csmscljspeech_add-zhen_onnx.zip',
+            'https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_csmscljspeech_add-zhen_onnx.zip',
             'md5':
             '73052520202957920cf54700980933d0',
             'ckpt':
@@ -1828,7 +1834,7 @@ tts_onnx_pretrained_models = {
         },
         '2.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_onnx_0.2.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_onnx_0.2.0.zip',
             'md5':
             '43b8ca5f85709c503777f808eb02a39e',
             'ckpt':
@@ -1844,7 +1850,7 @@ tts_onnx_pretrained_models = {
     "fastspeech2_male_onnx-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_onnx_1.4.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_onnx_1.4.0.zip',
             'md5':
             '46c66f5ab86f4fcb493d899d9901c863',
             'ckpt':
@@ -1858,7 +1864,7 @@ tts_onnx_pretrained_models = {
     "fastspeech2_male_onnx-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_onnx_1.4.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_onnx_1.4.0.zip',
             'md5':
             '401fb5cc31fdb25e22e901c9acba79c8',
             'ckpt':
@@ -1872,7 +1878,7 @@ tts_onnx_pretrained_models = {
     "fastspeech2_male_onnx-mix": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_onnx_1.4.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_onnx_1.4.0.zip',
             'md5':
             '07e51c5991c529b78603034547e9d0fa',
             'ckpt':
@@ -1886,7 +1892,7 @@ tts_onnx_pretrained_models = {
     "fastspeech2_canton_onnx-canton": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_onnx_1.4.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_onnx_1.4.0.zip',
             'md5':
             '1c8d51ceb2f9bdd168e23be575c2ccf8',
             'ckpt':
@@ -1903,7 +1909,7 @@ tts_onnx_pretrained_models = {
     "pwgan_csmsc_onnx-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_csmsc_onnx_0.2.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_csmsc_onnx_0.2.0.zip',
             'md5':
             '711d0ade33e73f3b721efc9f20669f9c',
             'ckpt':
@@ -1915,7 +1921,7 @@ tts_onnx_pretrained_models = {
     "pwgan_ljspeech_onnx-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_onnx_1.1.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_onnx_1.1.0.zip',
             'md5':
             '73cdeeccb77f2ea6ed4d07e71d8ac8b8',
             'ckpt':
@@ -1927,7 +1933,7 @@ tts_onnx_pretrained_models = {
     "pwgan_aishell3_onnx-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_onnx_1.1.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_onnx_1.1.0.zip',
             'md5':
             '096ab64e152a4fa476aff79ebdadb01b',
             'ckpt':
@@ -1939,7 +1945,7 @@ tts_onnx_pretrained_models = {
     "pwgan_vctk_onnx-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_onnx_1.1.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_onnx_1.1.0.zip',
             'md5':
             '4e754d42cf85f6428f0af887c923d86c',
             'ckpt':
@@ -1951,7 +1957,7 @@ tts_onnx_pretrained_models = {
     "pwgan_male_onnx-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_male_onnx_1.4.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_male_onnx_1.4.0.zip',
             'md5':
             '13163fd1326f555650dc7141d31767c3',
             'ckpt':
@@ -1964,7 +1970,7 @@ tts_onnx_pretrained_models = {
     "mb_melgan_csmsc_onnx-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_onnx_0.2.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_onnx_0.2.0.zip',
             'md5':
             '5b83ec746e8414bc29032d954ffd07ec',
             'ckpt':
@@ -1977,7 +1983,7 @@ tts_onnx_pretrained_models = {
     "hifigan_csmsc_onnx-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_onnx_0.2.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_onnx_0.2.0.zip',
             'md5':
             '1a7dc0385875889e46952e50c0994a6b',
             'ckpt':
@@ -1989,7 +1995,7 @@ tts_onnx_pretrained_models = {
     "hifigan_ljspeech_onnx-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_onnx_1.1.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_onnx_1.1.0.zip',
             'md5':
             '062f54b79c1135a50adb5fc8406260b2',
             'ckpt':
@@ -2001,7 +2007,7 @@ tts_onnx_pretrained_models = {
     "hifigan_aishell3_onnx-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_onnx_1.1.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_onnx_1.1.0.zip',
             'md5':
             'd6c0d684ad148583ca57837d5e870167',
             'ckpt':
@@ -2013,7 +2019,7 @@ tts_onnx_pretrained_models = {
     "hifigan_vctk_onnx-en": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_onnx_1.1.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_onnx_1.1.0.zip',
             'md5':
             'fd714df3be283c0efbefc8510160ff6d',
             'ckpt':
@@ -2025,7 +2031,7 @@ tts_onnx_pretrained_models = {
     "hifigan_male_onnx-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_onnx_1.4.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_onnx_1.4.0.zip',
             'md5':
             'ec6b35417b1fe811d3b1641d4b527769',
             'ckpt':
@@ -2056,7 +2062,7 @@ vector_dynamic_pretrained_models = {
     "ecapatdnn_voxceleb12-16k": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_0.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_0.tar.gz',
             'md5':
             'cc33023c54ab346cd318408f43fcaf95',
             'cfg_path':
@@ -2076,7 +2082,7 @@ kws_dynamic_pretrained_models = {
     'mdtc_heysnips-16k': {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/kws/heysnips/kws0_mdtc_heysnips_ckpt.tar.gz',
+            'https://paddlespeech.cdn.bcebos.com/kws/heysnips/kws0_mdtc_heysnips_ckpt.tar.gz',
             'md5':
             'c0de0a9520d66c3c8d6679460893578f',
             'cfg_path':
@@ -2094,13 +2100,13 @@ g2pw_onnx_models = {
     'G2PWModel': {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.0.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.0.zip',
             'md5':
             '7e049a55547da840502cf99e8a64f20e',
         },
         '1.1': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip',
             'md5':
             'f8b60501770bff92ed6ce90860a610e6',
         },
@@ -2114,7 +2120,7 @@ rhy_frontend_models = {
     'rhy_e2e': {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Rhy_e2e/rhy_frontend.zip',
+            'https://paddlespeech.cdn.bcebos.com/Rhy_e2e/rhy_frontend.zip',
             'md5': '6624a77393de5925d5a84400b363d8ef',
         },
     },
@@ -2127,7 +2133,7 @@ rhy_frontend_models = {
 StarGANv2VC_source = {
     '1.0': {
         'url':
-        'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/starganv2vc/StarGANv2VC_source.zip',
+        'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/starganv2vc/StarGANv2VC_source.zip',
         'md5':
         '195e169419163f5648030ba84c71f866',
     }
diff --git a/paddlespeech/server/tests/asr/online/README.md b/paddlespeech/server/tests/asr/online/README.md
index 1d7fa8824..586836dc1 100644
--- a/paddlespeech/server/tests/asr/online/README.md
+++ b/paddlespeech/server/tests/asr/online/README.md
@@ -23,7 +23,7 @@ The input of  ASR client demo should be a WAV file(`.wav`), and the sample rate
 
 Here are sample files for thisASR client demo that can be downloaded:
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
 ```
 
 ### 2. Streaming ASR Client Usage
diff --git a/paddlespeech/server/tests/asr/online/README_cn.md b/paddlespeech/server/tests/asr/online/README_cn.md
index 403216369..494209469 100644
--- a/paddlespeech/server/tests/asr/online/README_cn.md
+++ b/paddlespeech/server/tests/asr/online/README_cn.md
@@ -20,7 +20,7 @@
 
 可以下载此 ASR client的示例音频：
 ```bash
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
 ```
 
 ### 2. 流式 ASR 客户端使用方法
diff --git a/paddlespeech/t2s/exps/stream_play_tts.py b/paddlespeech/t2s/exps/stream_play_tts.py
index 4dcf4794f..070bfff03 100644
--- a/paddlespeech/t2s/exps/stream_play_tts.py
+++ b/paddlespeech/t2s/exps/stream_play_tts.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 # stream play TTS
 # Before first execution, download and decompress the models in the execution directory
-# wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip
-# wget https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_onnx_0.2.0.zip
+# wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip
+# wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_onnx_0.2.0.zip
 # unzip fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip
 # unzip mb_melgan_csmsc_onnx_0.2.0.zip
 import math
diff --git a/paddlespeech/t2s/modules/losses.py b/paddlespeech/t2s/modules/losses.py
index a1a65a9dc..b437be93d 100644
--- a/paddlespeech/t2s/modules/losses.py
+++ b/paddlespeech/t2s/modules/losses.py
@@ -1413,7 +1413,7 @@ class MultiScaleSTFTLoss(nn.Layer):
             >>> from paddlespeech.audiotools.core.audio_signal import AudioSignal
             >>> import paddle
 
-            >>> x = AudioSignal("https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav", 2_05)
+            >>> x = AudioSignal("https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav", 2_05)
             >>> y = x * 0.01
             >>> loss = MultiScaleSTFTLoss()
             >>> loss(x, y).numpy()
@@ -1440,7 +1440,7 @@ class GANLoss(nn.Layer):
     >>> from paddlespeech.audiotools.core.audio_signal import AudioSignal
     >>> import paddle
 
-    >>> x = AudioSignal("https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav", 2_05)
+    >>> x = AudioSignal("https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav", 2_05)
     >>> y = x * 0.01
     >>> class My_discriminator0:
     >>>     def __call__(self, x):
@@ -1518,7 +1518,7 @@ class SISDRLoss(nn.Layer):
     >>> from paddlespeech.audiotools.core.audio_signal import AudioSignal
     >>> import paddle
 
-    >>> x = AudioSignal("https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav", 2_05)
+    >>> x = AudioSignal("https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav", 2_05)
     >>> y = x * 0.01
     >>> sisdr = SISDRLoss()
     >>> sisdr(x, y).numpy()
diff --git a/runtime/cmake/fastdeploy.cmake b/runtime/cmake/fastdeploy.cmake
index e095cd4c2..5e9e365f5 100644
--- a/runtime/cmake/fastdeploy.cmake
+++ b/runtime/cmake/fastdeploy.cmake
@@ -26,7 +26,7 @@ if(NOT FASTDEPLOY_INSTALL_DIR)
     else() # Linux
         FetchContent_Declare(
             fastdeploy
-            URL      https://paddlespeech.bj.bcebos.com/speechx/fastdeploy/fastdeploy-1.0.5-x86_64-onnx.tar.gz 
+            URL      https://paddlespeech.cdn.bcebos.com/speechx/fastdeploy/fastdeploy-1.0.5-x86_64-onnx.tar.gz 
             URL_HASH MD5=33900d986ea71aa78635e52f0733227c
             ${EXTERNAL_PROJECT_LOG_ARGS}
         )
diff --git a/runtime/examples/codelab/decoder/run.sh b/runtime/examples/codelab/decoder/run.sh
index 1a9e3cd7e..5e43d5cca 100755
--- a/runtime/examples/codelab/decoder/run.sh
+++ b/runtime/examples/codelab/decoder/run.sh
@@ -28,7 +28,7 @@ mkdir -p $exp_dir
 if [[ ! -f data/model/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz ]]; then
     mkdir -p data/model
     pushd data/model
-    wget -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
+    wget -c https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
     tar xzfv asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
     popd
 fi
@@ -36,7 +36,7 @@ fi
 # produce wav scp
 if [ ! -f data/wav.scp ]; then
     pushd data
-    wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+    wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
     echo "utt1 " $PWD/zh.wav > wav.scp
     popd 
 fi
diff --git a/runtime/examples/codelab/feat/run.sh b/runtime/examples/codelab/feat/run.sh
index 5d7612ae5..ffb373b9c 100755
--- a/runtime/examples/codelab/feat/run.sh
+++ b/runtime/examples/codelab/feat/run.sh
@@ -15,7 +15,7 @@ fi
 if [ ! -e data/model/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz ]; then
     mkdir -p data/model
     pushd data/model
-    wget -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
+    wget -c https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
     tar xzfv asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
     popd
 fi
@@ -24,7 +24,7 @@ fi
 if [ ! -f data/wav.scp ]; then
     mkdir -p data
     pushd data
-    wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+    wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
     echo "utt1 " $PWD/zh.wav > wav.scp
     popd 
 fi
diff --git a/runtime/examples/codelab/nnet/run.sh b/runtime/examples/codelab/nnet/run.sh
index 842499ba2..210928db2 100755
--- a/runtime/examples/codelab/nnet/run.sh
+++ b/runtime/examples/codelab/nnet/run.sh
@@ -15,7 +15,7 @@ fi
 if [ ! -f data/model/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz ]; then
     mkdir -p data/model
     pushd data/model
-    wget -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
+    wget -c https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
     tar xzfv asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
     popd
 fi
diff --git a/runtime/examples/codelab/u2/run.sh b/runtime/examples/codelab/u2/run.sh
index d314262ba..8f6787571 100755
--- a/runtime/examples/codelab/u2/run.sh
+++ b/runtime/examples/codelab/u2/run.sh
@@ -15,7 +15,7 @@ fi
 if [ ! -f data/model/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model.tar.gz ]; then
     mkdir -p data/model
     pushd data/model
-    wget -c https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/static/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model.tar.gz
+    wget -c https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/static/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model.tar.gz
     tar xzfv asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model.tar.gz
     popd
 fi
@@ -24,7 +24,7 @@ fi
 if [ ! -f data/wav.scp ]; then
     mkdir -p data
     pushd data
-    wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+    wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
     echo "utt1 " $PWD/zh.wav > wav.scp
     popd 
 fi
diff --git a/runtime/examples/custom_asr/run.sh b/runtime/examples/custom_asr/run.sh
index ed67a52be..b60d7c9ac 100644
--- a/runtime/examples/custom_asr/run.sh
+++ b/runtime/examples/custom_asr/run.sh
@@ -21,7 +21,7 @@ text_with_slot=$data/text_with_slot
 resource=$PWD/resource
 # download resource
 if [ ! -f $cmvn ]; then
-    wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/resource.tar.gz
+    wget -c https://paddlespeech.cdn.bcebos.com/s2t/paddle_asr_online/resource.tar.gz
     tar xzfv resource.tar.gz
     ln -s ./resource/data .
 fi
diff --git a/runtime/examples/u2pp_ol/wenetspeech/local/recognizer_wfst.sh b/runtime/examples/u2pp_ol/wenetspeech/local/recognizer_wfst.sh
index 57d69a4c0..fba6ecc42 100755
--- a/runtime/examples/u2pp_ol/wenetspeech/local/recognizer_wfst.sh
+++ b/runtime/examples/u2pp_ol/wenetspeech/local/recognizer_wfst.sh
@@ -23,7 +23,7 @@ if [ ! -f $graph ]; then
     # download ngram, if you want to make graph by yourself, please refer local/run_build_tlg.sh
     mkdir -p $lang_dir
     pushd $lang_dir
-    wget -c https://paddlespeech.bj.bcebos.com/speechx/examples/ngram/zh/tlg.zip
+    wget -c https://paddlespeech.cdn.bcebos.com/speechx/examples/ngram/zh/tlg.zip
     unzip tlg.zip
     popd
 fi
diff --git a/runtime/examples/u2pp_ol/wenetspeech/local/recognizer_wfst_fastdeploy.sh b/runtime/examples/u2pp_ol/wenetspeech/local/recognizer_wfst_fastdeploy.sh
index fb0a19e88..77f2bcd34 100755
--- a/runtime/examples/u2pp_ol/wenetspeech/local/recognizer_wfst_fastdeploy.sh
+++ b/runtime/examples/u2pp_ol/wenetspeech/local/recognizer_wfst_fastdeploy.sh
@@ -23,7 +23,7 @@ if [ ! -f $graph ]; then
     # download ngram, if you want to make graph by yourself, please refer local/run_build_tlg.sh
     mkdir -p $lang_dir
     pushd $lang_dir
-    wget -c https://paddlespeech.bj.bcebos.com/speechx/examples/ngram/zh/tlg.zip
+    wget -c https://paddlespeech.cdn.bcebos.com/speechx/examples/ngram/zh/tlg.zip
     unzip tlg.zip
     popd
 fi
diff --git a/runtime/examples/u2pp_ol/wenetspeech/local/run_build_tlg.sh b/runtime/examples/u2pp_ol/wenetspeech/local/run_build_tlg.sh
index c061e910a..74d5c5493 100755
--- a/runtime/examples/u2pp_ol/wenetspeech/local/run_build_tlg.sh
+++ b/runtime/examples/u2pp_ol/wenetspeech/local/run_build_tlg.sh
@@ -23,7 +23,7 @@ if [ $stage -le -1 ] && [ $stop_stage -ge -1 ]; then
     if [ ! -f $data/speech.ngram.zh.tar.gz ];then
         # download ngram
         pushd $data
-        wget -c http://paddlespeech.bj.bcebos.com/speechx/examples/ngram/zh/speech.ngram.zh.tar.gz
+        wget -c http://paddlespeech.cdn.bcebos.com/speechx/examples/ngram/zh/speech.ngram.zh.tar.gz
         tar xvzf speech.ngram.zh.tar.gz
         popd
     fi
diff --git a/runtime/examples/u2pp_ol/wenetspeech/run.sh b/runtime/examples/u2pp_ol/wenetspeech/run.sh
index 1d4657e70..13849170c 100755
--- a/runtime/examples/u2pp_ol/wenetspeech/run.sh
+++ b/runtime/examples/u2pp_ol/wenetspeech/run.sh
@@ -30,7 +30,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ];then
         mkdir -p $ckpt_dir
         pushd $ckpt_dir
 
-        wget -c https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/static/asr1_chunk_conformer_u2pp_wenetspeech_static_1.3.0.model.tar.gz
+        wget -c https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/static/asr1_chunk_conformer_u2pp_wenetspeech_static_1.3.0.model.tar.gz
         tar xzfv asr1_chunk_conformer_u2pp_wenetspeech_static_1.3.0.model.tar.gz
 
         popd
@@ -41,7 +41,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ];then
         mkdir -p $ckpt_dir
         pushd $ckpt_dir
 
-        wget -c https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/static/asr1_chunk_conformer_u2pp_wenetspeech_static_quant_1.3.0.model.tar.gz
+        wget -c https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/static/asr1_chunk_conformer_u2pp_wenetspeech_static_quant_1.3.0.model.tar.gz
         tar xzfv asr1_chunk_conformer_u2pp_wenetspeech_static_quant_1.3.0.model.tar.gz
 
         popd
@@ -51,7 +51,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ];then
     if [ ! -f data/wav.scp ]; then
         mkdir -p $data
         pushd $data
-        wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+        wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
         echo "utt1 " $PWD/zh.wav > wav.scp
         popd 
     fi
@@ -59,7 +59,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ];then
     # aishell wav scp
     if [ ! -d $data/test ]; then
         pushd $data
-        wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip
+        wget -c https://paddlespeech.cdn.bcebos.com/s2t/paddle_asr_online/aishell_test.zip
         unzip  aishell_test.zip
         popd
 
diff --git a/tests/benchmark/pwgan/run_all.sh b/tests/benchmark/pwgan/run_all.sh
index 874e9aa25..817ed057c 100755
--- a/tests/benchmark/pwgan/run_all.sh
+++ b/tests/benchmark/pwgan/run_all.sh
@@ -22,7 +22,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
       wget https://weixinxcxdb.oss-cn-beijing.aliyuncs.com/gwYinPinKu/BZNSYP.rar
       mkdir BZNSYP
       unrar x BZNSYP.rar BZNSYP
-      wget https://paddlespeech.bj.bcebos.com/Parakeet/benchmark/durations.txt
+      wget https://paddlespeech.cdn.bcebos.com/Parakeet/benchmark/durations.txt
 fi
 # 数据预处理
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
diff --git a/tests/chains/speedyspeech/prepare.sh b/tests/chains/speedyspeech/prepare.sh
index 1ddcd6776..ab4620465 100755
--- a/tests/chains/speedyspeech/prepare.sh
+++ b/tests/chains/speedyspeech/prepare.sh
@@ -32,23 +32,23 @@ trainer_list=$(func_parser_value "${lines[14]}")
 # MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer']
 if [ ${MODE} = "lite_train_infer" ];then
     # pretrain lite train data
-    wget -nc -P  ./pretrain_models/ https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip
+    wget -nc -P  ./pretrain_models/ https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip
     (cd ./pretrain_models && unzip pwg_baker_ckpt_0.4.zip)
     # download data
     rm -rf ./train_data/mini_BZNSYP
-    wget -nc -P ./train_data/ https://paddlespeech.bj.bcebos.com/datasets/CE/speedyspeech_v0.5/mini_BZNSYP.tar.gz
+    wget -nc -P ./train_data/ https://paddlespeech.cdn.bcebos.com/datasets/CE/speedyspeech_v0.5/mini_BZNSYP.tar.gz
     cd ./train_data/ && tar xzf mini_BZNSYP.tar.gz
     cd ../
 elif [ ${MODE} = "whole_train_infer" ];then
-    wget -nc -P  ./pretrain_models/ https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_nosil_baker_ckpt_0.5.zip
-    wget -nc -P  ./pretrain_models/ https://paddlespeech.bj.bcebos.com/Parakeet/pwg_baker_ckpt_0.4.zip
+    wget -nc -P  ./pretrain_models/ https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_nosil_baker_ckpt_0.5.zip
+    wget -nc -P  ./pretrain_models/ https://paddlespeech.cdn.bcebos.com/Parakeet/pwg_baker_ckpt_0.4.zip
     (cd ./pretrain_models && unzip speedyspeech_nosil_baker_ckpt_0.5.zip && unzip pwg_baker_ckpt_0.4.zip)
     rm -rf ./train_data/processed_BZNSYP
-    wget -nc -P ./train_data/ https://paddlespeech.bj.bcebos.com/datasets/CE/speedyspeech_v0.5/processed_BZNSYP.tar.gz
+    wget -nc -P ./train_data/ https://paddlespeech.cdn.bcebos.com/datasets/CE/speedyspeech_v0.5/processed_BZNSYP.tar.gz
     cd ./train_data/ && tar xzf processed_BZNSYP.tar.gz
     cd ../
 else
     # whole infer using paddle inference library
-    wget -nc -P  ./pretrain_models/ https://paddlespeech.bj.bcebos.com/Parakeet/speedyspeech_pwg_inference_0.5.zip
+    wget -nc -P  ./pretrain_models/ https://paddlespeech.cdn.bcebos.com/Parakeet/speedyspeech_pwg_inference_0.5.zip
     (cd ./pretrain_models && unzip speedyspeech_pwg_inference_0.5.zip)
 fi
diff --git a/tests/test_tipc/prepare.sh b/tests/test_tipc/prepare.sh
index 7d4dd8b16..296067d55 100755
--- a/tests/test_tipc/prepare.sh
+++ b/tests/test_tipc/prepare.sh
@@ -74,9 +74,9 @@ if [[ ${MODE} = "benchmark_train" ]];then
         wget -nc https://paddle-wheel.bj.bcebos.com/benchmark/BZNSYP.rar
         mkdir -p BZNSYP
         unrar x BZNSYP.rar BZNSYP
-        wget -nc https://paddlespeech.bj.bcebos.com/Parakeet/benchmark/durations.txt
+        wget -nc https://paddlespeech.cdn.bcebos.com/Parakeet/benchmark/durations.txt
         # 避免网络问题导致的 nltk_data 无法下载使程序 hang 住
-        wget -nc https://paddlespeech.bj.bcebos.com/Parakeet/tools/nltk_data.tar.gz
+        wget -nc https://paddlespeech.cdn.bcebos.com/Parakeet/tools/nltk_data.tar.gz
         tar -xzf nltk_data.tar.gz -C ${HOME}
         # 数据预处理
         python ../paddlespeech/t2s/exps/gan_vocoder/preprocess.py --rootdir=BZNSYP/ --dumpdir=dump --num-cpu=20 --cut-sil=True --dur-file=durations.txt --config=../examples/csmsc/voc1/conf/default.yaml
@@ -92,8 +92,8 @@ if [[ ${MODE} = "benchmark_train" ]];then
 
     if [[ ${model_name} == "mdtc" ]]; then
         # 下载 Snips 数据集并解压缩
-        wget https://paddlespeech.bj.bcebos.com/datasets/hey_snips_kws_4.0.tar.gz.1 
-	wget https://paddlespeech.bj.bcebos.com/datasets/hey_snips_kws_4.0.tar.gz.2
+        wget https://paddlespeech.cdn.bcebos.com/datasets/hey_snips_kws_4.0.tar.gz.1 
+	wget https://paddlespeech.cdn.bcebos.com/datasets/hey_snips_kws_4.0.tar.gz.2
         cat hey_snips_kws_4.0.tar.gz.* > hey_snips_kws_4.0.tar.gz
         rm hey_snips_kws_4.0.tar.gz.*
         tar -xzf hey_snips_kws_4.0.tar.gz
diff --git a/tests/unit/asr/deepspeech2_online_model_test.sh b/tests/unit/asr/deepspeech2_online_model_test.sh
index 629238fd0..69d79bdb6 100644
--- a/tests/unit/asr/deepspeech2_online_model_test.sh
+++ b/tests/unit/asr/deepspeech2_online_model_test.sh
@@ -1,3 +1,3 @@
 mkdir -p ./test_data
-wget -P ./test_data https://paddlespeech.bj.bcebos.com/datasets/unit_test/asr/static_ds2online_inputs.pickle
+wget -P ./test_data https://paddlespeech.cdn.bcebos.com/datasets/unit_test/asr/static_ds2online_inputs.pickle
 python deepspeech2_online_model_test.py
diff --git a/tests/unit/audiotools/test_audiotools.sh b/tests/unit/audiotools/test_audiotools.sh
index f69447d62..24b9cfe62 100644
--- a/tests/unit/audiotools/test_audiotools.sh
+++ b/tests/unit/audiotools/test_audiotools.sh
@@ -1,5 +1,5 @@
-wget  https://paddlespeech.bj.bcebos.com/PaddleAudio/audio_tools/audio.tar.gz
-wget  https://paddlespeech.bj.bcebos.com/PaddleAudio/audio_tools/regression.tar.gz
+wget  https://paddlespeech.cdn.bcebos.com/PaddleAudio/audio_tools/audio.tar.gz
+wget  https://paddlespeech.cdn.bcebos.com/PaddleAudio/audio_tools/regression.tar.gz
 tar -zxvf audio.tar.gz
 tar -zxvf regression.tar.gz
 python -m pytest
\ No newline at end of file
diff --git a/tests/unit/cli/test_cli.sh b/tests/unit/cli/test_cli.sh
index 3903e6597..4526f6e4c 100755
--- a/tests/unit/cli/test_cli.sh
+++ b/tests/unit/cli/test_cli.sh
@@ -3,19 +3,19 @@ set -e
 echo -e "\e[1;31monly if you see 'Test success !!!', the cli testing is successful\e[0m"
 
 # Audio classification
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/cat.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/dog.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/cat.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/dog.wav
 paddlespeech cls --input ./cat.wav --topk 10
 
 # Punctuation_restoration
 paddlespeech text --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭 --model ernie_linear_p3_wudao_fast
 
 # Speech SSL
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
 paddlespeech ssl --task asr --lang en --input ./en.wav
 paddlespeech ssl --task vector --lang en --input ./en.wav
 
 # Speech_recognition
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/ch_zh_mix.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/ch_zh_mix.wav
 paddlespeech asr --input ./zh.wav
 paddlespeech asr --model conformer_aishell --input ./zh.wav
 paddlespeech asr --model conformer_online_aishell --input ./zh.wav
@@ -34,7 +34,7 @@ paddlespeech asr --model conformer_online_wenetspeech --num_decoding_left_chunks
 
 # long audio restriction
 {
-wget -c https://paddlespeech.bj.bcebos.com/datasets/single_wav/zh/test_long_audio_01.wav
+wget -c https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/test_long_audio_01.wav
 paddlespeech asr --model deepspeech2online_wenetspeech --input test_long_audio_01.wav -y
 if [ $? -ne 255 ]; then
    echo -e "\e[1;31mTime restriction not passed\e[0m"
@@ -81,7 +81,7 @@ paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang mix --input "我
 paddlespeech st --input ./en.wav
 
 # Speaker Verification
-wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
+wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/85236145389.wav
 paddlespeech vector --task spk --input 85236145389.wav
 
 # batch process
diff --git a/tests/unit/server/offline/test_server_client.sh b/tests/unit/server/offline/test_server_client.sh
index 29bdd4032..6418c82fd 100644
--- a/tests/unit/server/offline/test_server_client.sh
+++ b/tests/unit/server/offline/test_server_client.sh
@@ -77,7 +77,7 @@ if [ "$pid" != "" ]; then
 fi
 
 # download test audios for ASR client
-wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
 
 
 target_start_num=0  # the number of start service
diff --git a/tests/unit/tts/test_losses.py b/tests/unit/tts/test_losses.py
index f99d15d1c..1e6270542 100644
--- a/tests/unit/tts/test_losses.py
+++ b/tests/unit/tts/test_losses.py
@@ -20,7 +20,7 @@ from paddlespeech.t2s.modules.losses import SISDRLoss
 
 
 def get_input():
-    x = AudioSignal("https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav",
+    x = AudioSignal("https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav",
                     2_05)
     y = x * 0.01
     return x, y
diff --git a/tools/Makefile b/tools/Makefile
index c6c667cd0..1fd7675f2 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -42,7 +42,7 @@ kenlm.done:
 
 
 mfa.done:
-	test -d montreal-forced-aligner || $(WGET) https://paddlespeech.bj.bcebos.com/Parakeet/montreal-forced-aligner_linux.tar.gz
+	test -d montreal-forced-aligner || $(WGET) https://paddlespeech.cdn.bcebos.com/Parakeet/montreal-forced-aligner_linux.tar.gz
 	tar xvf montreal-forced-aligner_linux.tar.gz
 	touch mfa.done
 

From de3851336fbbfdec6dfd692de68f0dc3da9b6bba Mon Sep 17 00:00:00 2001
From: zxcd <228587199@qq.com>
Date: Wed, 19 Mar 2025 18:57:09 +0800
Subject: [PATCH 26/46] =?UTF-8?q?=E3=80=90audiotools=E3=80=91replace=20sca?=
 =?UTF-8?q?le=20op=20(#4017)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix div

* fix
---
 paddlespeech/audiotools/core/_julius.py  | 3 ++-
 tests/unit/audiotools/test_audiotools.sh | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/paddlespeech/audiotools/core/_julius.py b/paddlespeech/audiotools/core/_julius.py
index 113475cdd..f33f6724a 100644
--- a/paddlespeech/audiotools/core/_julius.py
+++ b/paddlespeech/audiotools/core/_julius.py
@@ -131,7 +131,8 @@ class ResampleFrac(paddle.nn.Layer):
         idx = paddle.arange(
             -self._width, self._width + self.old_sr, dtype="float32")
         for i in range(self.new_sr):
-            t = (-i / self.new_sr + idx / self.old_sr) * sr
+            t = (-i / self.new_sr + idx / paddle.full(idx.shape, self.old_sr)
+                 ) * sr
             t = paddle.clip(t, -self.zeros, self.zeros)
             t *= math.pi
             window = paddle.cos(t / self.zeros / 2)**2
diff --git a/tests/unit/audiotools/test_audiotools.sh b/tests/unit/audiotools/test_audiotools.sh
index 24b9cfe62..bb8f693b7 100644
--- a/tests/unit/audiotools/test_audiotools.sh
+++ b/tests/unit/audiotools/test_audiotools.sh
@@ -1,5 +1,5 @@
-wget  https://paddlespeech.cdn.bcebos.com/PaddleAudio/audio_tools/audio.tar.gz
-wget  https://paddlespeech.cdn.bcebos.com/PaddleAudio/audio_tools/regression.tar.gz
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/audio_tools/audio.tar.gz
+wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/audio_tools/regression.tar.gz
 tar -zxvf audio.tar.gz
 tar -zxvf regression.tar.gz
-python -m pytest
\ No newline at end of file
+python -m pytest

From c2dc4dae2d741c43569f9e66cda1213df8a20386 Mon Sep 17 00:00:00 2001
From: co63oc <co63oc@users.noreply.github.com>
Date: Thu, 20 Mar 2025 09:57:14 +0800
Subject: [PATCH 27/46] Fix typos (#4021)

* Fix

* Fix

* Fix
---
 audio/paddleaudio/datasets/esc50.py           |  4 ++--
 audio/paddleaudio/datasets/gtzan.py           |  4 ++--
 audio/paddleaudio/datasets/rirs_noises.py     |  4 ++--
 audio/paddleaudio/datasets/tess.py            |  4 ++--
 audio/paddleaudio/datasets/urban_sound.py     |  4 ++--
 audio/paddleaudio/datasets/voxceleb.py        | 12 +++++-----
 .../chime3_background/chime3_background.py    |  2 +-
 dataset/timit/timit.py                        |  2 +-
 .../src/front/front_interface.cpp             | 22 +++++++++----------
 demos/audio_searching/src/milvus_helpers.py   |  4 ++--
 demos/speech_web/speech_server/main.py        |  4 ++--
 .../streaming_tts_serving/1/model.py          |  2 +-
 examples/aishell3/tts3/conf/conformer.yaml    |  6 ++---
 examples/aishell3/tts3/conf/default.yaml      |  6 ++---
 examples/aishell3/vc1/conf/default.yaml       |  6 ++---
 examples/aishell3/vc2/conf/default.yaml       |  6 ++---
 examples/aishell3/voc5/conf/default.yaml      |  4 ++--
 examples/canton/tts3/conf/default.yaml        |  6 ++---
 examples/csmsc/jets/conf/default.yaml         |  4 ++--
 examples/csmsc/tts3/conf/cnndecoder.yaml      |  6 ++---
 examples/csmsc/tts3/conf/conformer.yaml       |  6 ++---
 examples/csmsc/tts3/conf/default.yaml         |  6 ++---
 examples/csmsc/voc5/conf/default.yaml         |  4 ++--
 examples/csmsc/voc5/conf/finetune.yaml        |  4 ++--
 examples/csmsc/voc5/conf/iSTFT.yaml           |  4 ++--
 examples/ljspeech/tts1/conf/default.yaml      |  2 +-
 examples/ljspeech/tts3/conf/default.yaml      |  6 ++---
 examples/ljspeech/voc5/conf/default.yaml      |  4 ++--
 examples/opencpop/svs1/conf/default.yaml      |  4 ++--
 examples/opencpop/voc5/conf/default.yaml      |  4 ++--
 examples/opencpop/voc5/conf/finetune.yaml     |  4 ++--
 examples/vctk/tts3/conf/default.yaml          |  6 ++---
 examples/vctk/voc5/conf/default.yaml          |  4 ++--
 examples/voxceleb/sv0/local/data_prepare.py   |  4 ++--
 examples/zh_en_tts/tts3/conf/default.yaml     |  6 ++---
 paddlespeech/audio/utils/tensor_utils.py      |  6 ++---
 paddlespeech/s2t/exps/hubert/model.py         |  8 +++----
 paddlespeech/s2t/exps/wav2vec2/model.py       |  8 +++----
 paddlespeech/s2t/exps/wavlm/model.py          |  8 +++----
 paddlespeech/s2t/training/trainer.py          |  2 +-
 paddlespeech/s2t/utils/tensor_utils.py        |  6 ++---
 paddlespeech/server/utils/audio_handler.py    | 10 ++++-----
 paddlespeech/server/ws/asr_api.py             |  4 ++--
 paddlespeech/server/ws/tts_api.py             |  2 +-
 .../t2s/models/vits/residual_coupling.py      |  4 ++--
 runtime/examples/text_lm/local/mmseg.py       |  4 ++--
 .../unit/server/offline/test_server_client.sh |  6 ++---
 .../server/online/tts/check_server/test.sh    |  6 ++---
 tools/extras/install_liblbfgs.sh              |  2 +-
 tools/extras/install_srilm.sh                 |  2 +-
 utils/fst/ctc_token_fst.py                    |  2 +-
 utils/fst/make_tlg.sh                         |  2 +-
 utils/generate_infer_yaml.py                  |  4 ++--
 utils/train_arpa_with_kenlm.sh                |  4 ++--
 utils/zh_tn.py                                |  2 +-
 55 files changed, 136 insertions(+), 136 deletions(-)

diff --git a/audio/paddleaudio/datasets/esc50.py b/audio/paddleaudio/datasets/esc50.py
index fd8c8503e..555f84834 100644
--- a/audio/paddleaudio/datasets/esc50.py
+++ b/audio/paddleaudio/datasets/esc50.py
@@ -35,7 +35,7 @@ class ESC50(AudioClassificationDataset):
         http://dx.doi.org/10.1145/2733373.2806390
     """
 
-    archieves = [
+    archives = [
         {
             'url':
             'https://paddleaudio.bj.bcebos.com/datasets/ESC-50-master.zip',
@@ -133,7 +133,7 @@ class ESC50(AudioClassificationDataset):
     def _get_data(self, mode: str, split: int) -> Tuple[List[str], List[int]]:
         if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \
             not os.path.isfile(os.path.join(DATA_HOME, self.meta)):
-            download_and_decompress(self.archieves, DATA_HOME)
+            download_and_decompress(self.archives, DATA_HOME)
 
         meta_info = self._get_meta_info()
 
diff --git a/audio/paddleaudio/datasets/gtzan.py b/audio/paddleaudio/datasets/gtzan.py
index a76e9208e..6146c4b98 100644
--- a/audio/paddleaudio/datasets/gtzan.py
+++ b/audio/paddleaudio/datasets/gtzan.py
@@ -35,7 +35,7 @@ class GTZAN(AudioClassificationDataset):
         https://ieeexplore.ieee.org/document/1021072/
     """
 
-    archieves = [
+    archives = [
         {
             'url': 'http://opihi.cs.uvic.ca/sound/genres.tar.gz',
             'md5': '5b3d6dddb579ab49814ab86dba69e7c7',
@@ -85,7 +85,7 @@ class GTZAN(AudioClassificationDataset):
                   split) -> Tuple[List[str], List[int]]:
         if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \
             not os.path.isfile(os.path.join(DATA_HOME, self.meta)):
-            download_and_decompress(self.archieves, DATA_HOME)
+            download_and_decompress(self.archives, DATA_HOME)
 
         meta_info = self._get_meta_info()
         random.seed(seed)  # shuffle samples to split data
diff --git a/audio/paddleaudio/datasets/rirs_noises.py b/audio/paddleaudio/datasets/rirs_noises.py
index 74418daa2..e5d193537 100644
--- a/audio/paddleaudio/datasets/rirs_noises.py
+++ b/audio/paddleaudio/datasets/rirs_noises.py
@@ -30,7 +30,7 @@ __all__ = ['OpenRIRNoise']
 
 
 class OpenRIRNoise(Dataset):
-    archieves = [
+    archives = [
         {
             'url': 'http://www.openslr.org/resources/28/rirs_noises.zip',
             'md5': 'e6f48e257286e05de56413b4779d8ffb',
@@ -76,7 +76,7 @@ class OpenRIRNoise(Dataset):
         print(f"rirs noises base path: {self.base_path}")
         if not os.path.isdir(self.base_path):
             download_and_decompress(
-                self.archieves, self.base_path, decompress=True)
+                self.archives, self.base_path, decompress=True)
         else:
             print(
                 f"{self.base_path} already exists, we will not download and decompress again"
diff --git a/audio/paddleaudio/datasets/tess.py b/audio/paddleaudio/datasets/tess.py
index e34eaea37..e311a8df6 100644
--- a/audio/paddleaudio/datasets/tess.py
+++ b/audio/paddleaudio/datasets/tess.py
@@ -37,7 +37,7 @@ class TESS(AudioClassificationDataset):
         https://doi.org/10.5683/SP2/E8H2MF
     """
 
-    archieves = [
+    archives = [
         {
             'url':
             'https://bj.bcebos.com/paddleaudio/datasets/TESS_Toronto_emotional_speech_set.zip',
@@ -93,7 +93,7 @@ class TESS(AudioClassificationDataset):
     def _get_data(self, mode, seed, n_folds,
                   split) -> Tuple[List[str], List[int]]:
         if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)):
-            download_and_decompress(self.archieves, DATA_HOME)
+            download_and_decompress(self.archives, DATA_HOME)
 
         wav_files = []
         for root, _, files in os.walk(os.path.join(DATA_HOME, self.audio_path)):
diff --git a/audio/paddleaudio/datasets/urban_sound.py b/audio/paddleaudio/datasets/urban_sound.py
index 43d1b36c4..4c4467588 100644
--- a/audio/paddleaudio/datasets/urban_sound.py
+++ b/audio/paddleaudio/datasets/urban_sound.py
@@ -35,7 +35,7 @@ class UrbanSound8K(AudioClassificationDataset):
         https://dl.acm.org/doi/10.1145/2647868.2655045
     """
 
-    archieves = [
+    archives = [
         {
             'url':
             'https://zenodo.org/record/1203745/files/UrbanSound8K.tar.gz',
@@ -81,7 +81,7 @@ class UrbanSound8K(AudioClassificationDataset):
     def _get_data(self, mode: str, split: int) -> Tuple[List[str], List[int]]:
         if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \
             not os.path.isfile(os.path.join(DATA_HOME, self.meta)):
-            download_and_decompress(self.archieves, DATA_HOME)
+            download_and_decompress(self.archives, DATA_HOME)
 
         meta_info = self._get_meta_info()
 
diff --git a/audio/paddleaudio/datasets/voxceleb.py b/audio/paddleaudio/datasets/voxceleb.py
index 1fafb5176..225859700 100644
--- a/audio/paddleaudio/datasets/voxceleb.py
+++ b/audio/paddleaudio/datasets/voxceleb.py
@@ -34,7 +34,7 @@ __all__ = ['VoxCeleb']
 
 class VoxCeleb(Dataset):
     source_url = 'https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/'
-    archieves_audio_dev = [
+    archives_audio_dev = [
         {
             'url': source_url + 'vox1_dev_wav_partaa',
             'md5': 'e395d020928bc15670b570a21695ed96',
@@ -52,13 +52,13 @@ class VoxCeleb(Dataset):
             'md5': '7bb1e9f70fddc7a678fa998ea8b3ba19',
         },
     ]
-    archieves_audio_test = [
+    archives_audio_test = [
         {
             'url': source_url + 'vox1_test_wav.zip',
             'md5': '185fdc63c3c739954633d50379a3d102',
         },
     ]
-    archieves_meta = [
+    archives_meta = [
         {
             'url':
             'https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test2.txt',
@@ -135,11 +135,11 @@ class VoxCeleb(Dataset):
         if not os.path.isdir(self.wav_path):
             print("start to download the voxceleb1 dataset")
             download_and_decompress(  # multi-zip parts concatenate to vox1_dev_wav.zip
-                self.archieves_audio_dev,
+                self.archives_audio_dev,
                 self.base_path,
                 decompress=False)
             download_and_decompress(  # download the vox1_test_wav.zip and unzip
-                self.archieves_audio_test,
+                self.archives_audio_test,
                 self.base_path,
                 decompress=True)
 
@@ -157,7 +157,7 @@ class VoxCeleb(Dataset):
         if not os.path.isdir(self.meta_path):
             print("prepare the meta data")
             download_and_decompress(
-                self.archieves_meta, self.meta_path, decompress=False)
+                self.archives_meta, self.meta_path, decompress=False)
 
         # Data preparation.
         if not os.path.isdir(self.csv_path):
diff --git a/dataset/chime3_background/chime3_background.py b/dataset/chime3_background/chime3_background.py
index 4f081e6c3..6ec8fdfed 100644
--- a/dataset/chime3_background/chime3_background.py
+++ b/dataset/chime3_background/chime3_background.py
@@ -109,7 +109,7 @@ def create_manifest(data_dir, manifest_path):
 
 
 def prepare_chime3(url, md5sum, target_dir, manifest_path):
-    """Download, unpack and create summmary manifest file."""
+    """Download, unpack and create summary manifest file."""
     if not os.path.exists(os.path.join(target_dir, "CHiME3")):
         # download
         filepath = download(url, md5sum, target_dir,
diff --git a/dataset/timit/timit.py b/dataset/timit/timit.py
index 2943ff548..492c6702d 100644
--- a/dataset/timit/timit.py
+++ b/dataset/timit/timit.py
@@ -210,7 +210,7 @@ def create_manifest(data_dir, manifest_path_prefix):
 
 
 def prepare_dataset(url, md5sum, target_dir, manifest_path):
-    """Download, unpack and create summmary manifest file.
+    """Download, unpack and create summary manifest file.
     """
     filepath = os.path.join(target_dir, "TIMIT.zip")
     if not os.path.exists(filepath):
diff --git a/demos/TTSCppFrontend/src/front/front_interface.cpp b/demos/TTSCppFrontend/src/front/front_interface.cpp
index e7b08c798..dd6329ff7 100644
--- a/demos/TTSCppFrontend/src/front/front_interface.cpp
+++ b/demos/TTSCppFrontend/src/front/front_interface.cpp
@@ -115,27 +115,27 @@ int FrontEngineInterface::init() {
 
     // 生成词典（词到音素的映射）
     if (0 != GenDict(_word2phone_path, &word_phone_map)) {
-        LOG(ERROR) << "Genarate word2phone dict failed";
+        LOG(ERROR) << "Generate word2phone dict failed";
         return -1;
     }
 
     // 生成音素字典（音素到音素id的映射）
     if (0 != GenDict(_phone2id_path, &phone_id_map)) {
-        LOG(ERROR) << "Genarate phone2id dict failed";
+        LOG(ERROR) << "Generate phone2id dict failed";
         return -1;
     }
 
     // 生成音调字典（音调到音调id的映射）
     if (_separate_tone == "true") {
         if (0 != GenDict(_tone2id_path, &tone_id_map)) {
-            LOG(ERROR) << "Genarate tone2id dict failed";
+            LOG(ERROR) << "Generate tone2id dict failed";
             return -1;
         }
     }
 
     // 生成繁简字典（繁体到简体id的映射）
     if (0 != GenDict(_trand2simp_path, &trand_simp_map)) {
-        LOG(ERROR) << "Genarate trand2simp dict failed";
+        LOG(ERROR) << "Generate trand2simp dict failed";
         return -1;
     }
 
@@ -263,7 +263,7 @@ int FrontEngineInterface::GetWordsIds(
                 if (0 !=
                     GetInitialsFinals(word, &word_initials, &word_finals)) {
                     LOG(ERROR)
-                        << "Genarate the word_initials and word_finals of "
+                        << "Generate the word_initials and word_finals of "
                         << word << " failed";
                     return -1;
                 }
@@ -304,7 +304,7 @@ int FrontEngineInterface::GetWordsIds(
 
             // 音素到音素id
             if (0 != Phone2Phoneid(phone, phoneids, toneids)) {
-                LOG(ERROR) << "Genarate the phone id of " << word << " failed";
+                LOG(ERROR) << "Generate the phone id of " << word << " failed";
                 return -1;
             }
         }
@@ -916,11 +916,11 @@ int FrontEngineInterface::NeuralSandhi(const std::string &word,
         if (find(must_neural_tone_words.begin(),
                  must_neural_tone_words.end(),
                  word) != must_neural_tone_words.end() ||
-            (word_num >= 2 &&
-             find(must_neural_tone_words.begin(),
-                  must_neural_tone_words.end(),
-                  ppspeech::wstring2utf8string(word_wstr.substr(
-                      word_num - 2))) != must_neural_tone_words.end())) {
+            (word_num >= 2 && find(must_neural_tone_words.begin(),
+                                   must_neural_tone_words.end(),
+                                   ppspeech::wstring2utf8string(
+                                       word_wstr.substr(word_num - 2))) !=
+                                  must_neural_tone_words.end())) {
             (*finals).back() =
                 (*finals).back().replace((*finals).back().length() - 1, 1, "5");
         }
diff --git a/demos/audio_searching/src/milvus_helpers.py b/demos/audio_searching/src/milvus_helpers.py
index 1699e892e..801284ffd 100644
--- a/demos/audio_searching/src/milvus_helpers.py
+++ b/demos/audio_searching/src/milvus_helpers.py
@@ -77,13 +77,13 @@ class MilvusHelper:
                 field1 = FieldSchema(
                     name="id",
                     dtype=DataType.INT64,
-                    descrition="int64",
+                    description="int64",
                     is_primary=True,
                     auto_id=True)
                 field2 = FieldSchema(
                     name="embedding",
                     dtype=DataType.FLOAT_VECTOR,
-                    descrition="speaker embeddings",
+                    description="speaker embeddings",
                     dim=VECTOR_DIMENSION,
                     is_primary=False)
                 schema = CollectionSchema(
diff --git a/demos/speech_web/speech_server/main.py b/demos/speech_web/speech_server/main.py
index f4678628f..1e4d5ed3d 100644
--- a/demos/speech_web/speech_server/main.py
+++ b/demos/speech_web/speech_server/main.py
@@ -225,7 +225,7 @@ async def websocket_endpoint_online(websocket: WebSocket):
         websocket (WebSocket): the websocket instance
     """
 
-    #1. the interface wait to accept the websocket protocal header
+    #1. the interface wait to accept the websocket protocol header
     #   and only we receive the header, it establish the connection with specific thread
     await websocket.accept()
 
@@ -238,7 +238,7 @@ async def websocket_endpoint_online(websocket: WebSocket):
     connection_handler = None
 
     try:
-        #4. we do a loop to process the audio package by package according the protocal
+        #4. we do a loop to process the audio package by package according the protocol
         #   and only if the client send finished signal, we will break the loop
         while True:
             # careful here, changed the source code from starlette.websockets
diff --git a/demos/streaming_tts_serving_fastdeploy/streaming_tts_serving/1/model.py b/demos/streaming_tts_serving_fastdeploy/streaming_tts_serving/1/model.py
index 46473fdb2..324cbf978 100644
--- a/demos/streaming_tts_serving_fastdeploy/streaming_tts_serving/1/model.py
+++ b/demos/streaming_tts_serving_fastdeploy/streaming_tts_serving/1/model.py
@@ -75,7 +75,7 @@ class TritonPythonModel:
     def initialize(self, args):
         """`initialize` is called only once when the model is being loaded.
         Implementing `initialize` function is optional. This function allows
-        the model to intialize any state associated with this model.
+        the model to initialize any state associated with this model.
         Parameters
         ----------
         args : dict
diff --git a/examples/aishell3/tts3/conf/conformer.yaml b/examples/aishell3/tts3/conf/conformer.yaml
index 0834bfe3f..ae305ac81 100644
--- a/examples/aishell3/tts3/conf/conformer.yaml
+++ b/examples/aishell3/tts3/conf/conformer.yaml
@@ -42,7 +42,7 @@ model:
     duration_predictor_layers: 2      # number of layers of duration predictor
     duration_predictor_chans: 256     # number of channels of duration predictor
     duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
     postnet_filts: 5                  # filter size of conv layers in postnet
     postnet_chans: 256                # number of channels of conv layers in postnet
     encoder_normalize_before: True    # whether to perform layer normalization before the input
@@ -66,14 +66,14 @@ model:
     transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
     pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
     pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
     pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
     pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
     pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
     stop_gradient_from_pitch_predictor: true   # whether to stop the gradient from pitch predictor to encoder
     energy_predictor_layers: 2                 # number of conv layers in energy predictor
     energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
     energy_predictor_dropout: 0.5              # dropout rate in energy predictor
     energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
     energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
diff --git a/examples/aishell3/tts3/conf/default.yaml b/examples/aishell3/tts3/conf/default.yaml
index e65b5d0ec..68a3c7fb9 100644
--- a/examples/aishell3/tts3/conf/default.yaml
+++ b/examples/aishell3/tts3/conf/default.yaml
@@ -42,7 +42,7 @@ model:
     duration_predictor_layers: 2      # number of layers of duration predictor
     duration_predictor_chans: 256     # number of channels of duration predictor
     duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
     postnet_filts: 5                  # filter size of conv layers in postnet
     postnet_chans: 256                # number of channels of conv layers in postnet
     use_scaled_pos_enc: True          # whether to use scaled positional encoding
@@ -60,14 +60,14 @@ model:
     transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
     pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
     pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
     pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
     pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
     pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
     stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
     energy_predictor_layers: 2                 # number of conv layers in energy predictor
     energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
     energy_predictor_dropout: 0.5              # dropout rate in energy predictor
     energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
     energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
diff --git a/examples/aishell3/vc1/conf/default.yaml b/examples/aishell3/vc1/conf/default.yaml
index ac4956742..bde47b6db 100644
--- a/examples/aishell3/vc1/conf/default.yaml
+++ b/examples/aishell3/vc1/conf/default.yaml
@@ -42,7 +42,7 @@ model:
     duration_predictor_layers: 2      # number of layers of duration predictor
     duration_predictor_chans: 256     # number of channels of duration predictor
     duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
     postnet_filts: 5                  # filter size of conv layers in postnet
     postnet_chans: 256                # number of channels of conv layers in postnet
     use_scaled_pos_enc: True          # whether to use scaled positional encoding
@@ -60,14 +60,14 @@ model:
     transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
     pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
     pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
     pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
     pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
     pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
     stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
     energy_predictor_layers: 2                 # number of conv layers in energy predictor
     energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
     energy_predictor_dropout: 0.5              # dropout rate in energy predictor
     energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
     energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
diff --git a/examples/aishell3/vc2/conf/default.yaml b/examples/aishell3/vc2/conf/default.yaml
index 5ef37f812..6374b4c87 100644
--- a/examples/aishell3/vc2/conf/default.yaml
+++ b/examples/aishell3/vc2/conf/default.yaml
@@ -42,7 +42,7 @@ model:
     duration_predictor_layers: 2      # number of layers of duration predictor
     duration_predictor_chans: 256     # number of channels of duration predictor
     duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
     postnet_filts: 5                  # filter size of conv layers in postnet
     postnet_chans: 256                # number of channels of conv layers in postnet
     use_scaled_pos_enc: True          # whether to use scaled positional encoding
@@ -60,14 +60,14 @@ model:
     transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
     pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
     pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
     pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
     pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
     pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
     stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
     energy_predictor_layers: 2                 # number of conv layers in energy predictor
     energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
     energy_predictor_dropout: 0.5              # dropout rate in energy predictor
     energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
     energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
diff --git a/examples/aishell3/voc5/conf/default.yaml b/examples/aishell3/voc5/conf/default.yaml
index 728a90369..73953a1e7 100644
--- a/examples/aishell3/voc5/conf/default.yaml
+++ b/examples/aishell3/voc5/conf/default.yaml
@@ -39,7 +39,7 @@ generator_params:
     use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
     bias: True                            # Whether to use bias parameter in conv.
     nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
-    nonlinear_activation_params:          # Nonlinear activation paramters.
+    nonlinear_activation_params:          # Nonlinear activation parameters.
         negative_slope: 0.1
     use_weight_norm: True                 # Whether to apply weight normalization.
 
@@ -77,7 +77,7 @@ discriminator_params:
         max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
         bias: True                         # Whether to use bias parameter in conv layer."
         nonlinear_activation: "leakyrelu"  # Nonlinear activation.
-        nonlinear_activation_params:       # Nonlinear activation paramters.
+        nonlinear_activation_params:       # Nonlinear activation parameters.
             negative_slope: 0.1
         use_weight_norm: True              # Whether to apply weight normalization.
         use_spectral_norm: False           # Whether to apply spectral normalization.
diff --git a/examples/canton/tts3/conf/default.yaml b/examples/canton/tts3/conf/default.yaml
index a101e6eea..ce1536abb 100644
--- a/examples/canton/tts3/conf/default.yaml
+++ b/examples/canton/tts3/conf/default.yaml
@@ -45,7 +45,7 @@ model:
     duration_predictor_layers: 2      # number of layers of duration predictor
     duration_predictor_chans: 256     # number of channels of duration predictor
     duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
     postnet_filts: 5                  # filter size of conv layers in postnet
     postnet_chans: 256                # number of channels of conv layers in postnet
     use_scaled_pos_enc: True          # whether to use scaled positional encoding
@@ -63,14 +63,14 @@ model:
     transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
     pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
     pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
     pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
     pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
     pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
     stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
     energy_predictor_layers: 2                 # number of conv layers in energy predictor
     energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
     energy_predictor_dropout: 0.5              # dropout rate in energy predictor
     energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
     energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
diff --git a/examples/csmsc/jets/conf/default.yaml b/examples/csmsc/jets/conf/default.yaml
index 1dafd20c1..0999b5bc0 100644
--- a/examples/csmsc/jets/conf/default.yaml
+++ b/examples/csmsc/jets/conf/default.yaml
@@ -60,14 +60,14 @@ model:
         transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
         pitch_predictor_layers: 5                    # number of conv layers in pitch predictor
         pitch_predictor_chans: 256                   # number of channels of conv layers in pitch predictor
-        pitch_predictor_kernel_size: 5               # kernel size of conv leyers in pitch predictor
+        pitch_predictor_kernel_size: 5               # kernel size of conv layers in pitch predictor
         pitch_predictor_dropout: 0.5                 # dropout rate in pitch predictor
         pitch_embed_kernel_size: 1                   # kernel size of conv embedding layer for pitch
         pitch_embed_dropout: 0.0                     # dropout rate after conv embedding layer for pitch
         stop_gradient_from_pitch_predictor: true     # whether to stop the gradient from pitch predictor to encoder
         energy_predictor_layers: 2                   # number of conv layers in energy predictor
         energy_predictor_chans: 256                  # number of channels of conv layers in energy predictor
-        energy_predictor_kernel_size: 3              # kernel size of conv leyers in energy predictor
+        energy_predictor_kernel_size: 3              # kernel size of conv layers in energy predictor
         energy_predictor_dropout: 0.5                # dropout rate in energy predictor
         energy_embed_kernel_size: 1                  # kernel size of conv embedding layer for energy
         energy_embed_dropout: 0.0                    # dropout rate after conv embedding layer for energy
diff --git a/examples/csmsc/tts3/conf/cnndecoder.yaml b/examples/csmsc/tts3/conf/cnndecoder.yaml
index 8b46fea44..d0bfe5079 100644
--- a/examples/csmsc/tts3/conf/cnndecoder.yaml
+++ b/examples/csmsc/tts3/conf/cnndecoder.yaml
@@ -43,7 +43,7 @@ model:
     duration_predictor_layers: 2      # number of layers of duration predictor
     duration_predictor_chans: 256     # number of channels of duration predictor
     duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
     postnet_filts: 5                  # filter size of conv layers in postnet
     postnet_chans: 256                # number of channels of conv layers in postnet
     use_scaled_pos_enc: True          # whether to use scaled positional encoding
@@ -65,14 +65,14 @@ model:
     cnn_decoder_embedding_dim: 256
     pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
     pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
     pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
     pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
     pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
     stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
     energy_predictor_layers: 2                 # number of conv layers in energy predictor
     energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
     energy_predictor_dropout: 0.5              # dropout rate in energy predictor
     energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
     energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
diff --git a/examples/csmsc/tts3/conf/conformer.yaml b/examples/csmsc/tts3/conf/conformer.yaml
index fcad86150..f184450c2 100644
--- a/examples/csmsc/tts3/conf/conformer.yaml
+++ b/examples/csmsc/tts3/conf/conformer.yaml
@@ -42,7 +42,7 @@ model:
     duration_predictor_layers: 2      # number of layers of duration predictor
     duration_predictor_chans: 256     # number of channels of duration predictor
     duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
     postnet_filts: 5                  # filter size of conv layers in postnet
     postnet_chans: 256                # number of channels of conv layers in postnet
     encoder_normalize_before: True    # whether to perform layer normalization before the input
@@ -66,14 +66,14 @@ model:
     transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
     pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
     pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
     pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
     pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
     pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
     stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
     energy_predictor_layers: 2                 # number of conv layers in energy predictor
     energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
     energy_predictor_dropout: 0.5              # dropout rate in energy predictor
     energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
     energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
diff --git a/examples/csmsc/tts3/conf/default.yaml b/examples/csmsc/tts3/conf/default.yaml
index 08b6f75ba..3a034cffc 100644
--- a/examples/csmsc/tts3/conf/default.yaml
+++ b/examples/csmsc/tts3/conf/default.yaml
@@ -42,7 +42,7 @@ model:
     duration_predictor_layers: 2      # number of layers of duration predictor
     duration_predictor_chans: 256     # number of channels of duration predictor
     duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
     postnet_filts: 5                  # filter size of conv layers in postnet
     postnet_chans: 256                # number of channels of conv layers in postnet
     use_scaled_pos_enc: True          # whether to use scaled positional encoding
@@ -60,14 +60,14 @@ model:
     transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
     pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
     pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
     pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
     pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
     pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
     stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
     energy_predictor_layers: 2                 # number of conv layers in energy predictor
     energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
     energy_predictor_dropout: 0.5              # dropout rate in energy predictor
     energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
     energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
diff --git a/examples/csmsc/voc5/conf/default.yaml b/examples/csmsc/voc5/conf/default.yaml
index 38b94cf5c..4dc95ac32 100644
--- a/examples/csmsc/voc5/conf/default.yaml
+++ b/examples/csmsc/voc5/conf/default.yaml
@@ -38,7 +38,7 @@ generator_params:
     use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
     bias: True                            # Whether to use bias parameter in conv.
     nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
-    nonlinear_activation_params:          # Nonlinear activation paramters.
+    nonlinear_activation_params:          # Nonlinear activation parameters.
         negative_slope: 0.1
     use_weight_norm: True                 # Whether to apply weight normalization.
 
@@ -76,7 +76,7 @@ discriminator_params:
         max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
         bias: True                         # Whether to use bias parameter in conv layer."
         nonlinear_activation: "leakyrelu"  # Nonlinear activation.
-        nonlinear_activation_params:       # Nonlinear activation paramters.
+        nonlinear_activation_params:       # Nonlinear activation parameters.
             negative_slope: 0.1
         use_weight_norm: True              # Whether to apply weight normalization.
         use_spectral_norm: False           # Whether to apply spectral normalization.
diff --git a/examples/csmsc/voc5/conf/finetune.yaml b/examples/csmsc/voc5/conf/finetune.yaml
index 110ae052b..51be0706e 100644
--- a/examples/csmsc/voc5/conf/finetune.yaml
+++ b/examples/csmsc/voc5/conf/finetune.yaml
@@ -38,7 +38,7 @@ generator_params:
     use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
     bias: True                            # Whether to use bias parameter in conv.
     nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
-    nonlinear_activation_params:          # Nonlinear activation paramters.
+    nonlinear_activation_params:          # Nonlinear activation parameters.
         negative_slope: 0.1
     use_weight_norm: True                 # Whether to apply weight normalization.
 
@@ -76,7 +76,7 @@ discriminator_params:
         max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
         bias: True                         # Whether to use bias parameter in conv layer."
         nonlinear_activation: "leakyrelu"  # Nonlinear activation.
-        nonlinear_activation_params:       # Nonlinear activation paramters.
+        nonlinear_activation_params:       # Nonlinear activation parameters.
             negative_slope: 0.1
         use_weight_norm: True              # Whether to apply weight normalization.
         use_spectral_norm: False           # Whether to apply spectral normalization.
diff --git a/examples/csmsc/voc5/conf/iSTFT.yaml b/examples/csmsc/voc5/conf/iSTFT.yaml
index 06677d796..10b69f917 100644
--- a/examples/csmsc/voc5/conf/iSTFT.yaml
+++ b/examples/csmsc/voc5/conf/iSTFT.yaml
@@ -42,7 +42,7 @@ generator_params:
     use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
     bias: True                            # Whether to use bias parameter in conv.
     nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
-    nonlinear_activation_params:          # Nonlinear activation paramters.
+    nonlinear_activation_params:          # Nonlinear activation parameters.
         negative_slope: 0.1
     use_weight_norm: True                 # Whether to apply weight normalization.
 
@@ -83,7 +83,7 @@ discriminator_params:
         max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
         bias: True                         # Whether to use bias parameter in conv layer."
         nonlinear_activation: "leakyrelu"  # Nonlinear activation.
-        nonlinear_activation_params:       # Nonlinear activation paramters.
+        nonlinear_activation_params:       # Nonlinear activation parameters.
             negative_slope: 0.1
         use_weight_norm: True              # Whether to apply weight normalization.
         use_spectral_norm: False           # Whether to apply spectral normalization.
diff --git a/examples/ljspeech/tts1/conf/default.yaml b/examples/ljspeech/tts1/conf/default.yaml
index 456b6a1e3..fecdd140e 100644
--- a/examples/ljspeech/tts1/conf/default.yaml
+++ b/examples/ljspeech/tts1/conf/default.yaml
@@ -34,7 +34,7 @@ model:                     # keyword arguments for the selected model
     dunits: 1024           # number of decoder ff units
     positionwise_layer_type: conv1d  # type of position-wise layer
     positionwise_conv_kernel_size: 1 # kernel size of position wise conv layer
-    postnet_layers: 5                # number of layers of postnset
+    postnet_layers: 5                # number of layers of postnet
     postnet_filts: 5                 # filter size of conv layers in postnet
     postnet_chans: 256               # number of channels of conv layers in postnet
     use_scaled_pos_enc: True         # whether to use scaled positional encoding
diff --git a/examples/ljspeech/tts3/conf/default.yaml b/examples/ljspeech/tts3/conf/default.yaml
index 5305c912f..eab4af03c 100644
--- a/examples/ljspeech/tts3/conf/default.yaml
+++ b/examples/ljspeech/tts3/conf/default.yaml
@@ -42,7 +42,7 @@ model:
     duration_predictor_layers: 2      # number of layers of duration predictor
     duration_predictor_chans: 256     # number of channels of duration predictor
     duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
     postnet_filts: 5                  # filter size of conv layers in postnet
     postnet_chans: 256                # number of channels of conv layers in postnet
     use_scaled_pos_enc: True          # whether to use scaled positional encoding
@@ -60,14 +60,14 @@ model:
     transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
     pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
     pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
     pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
     pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
     pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
     stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
     energy_predictor_layers: 2                 # number of conv layers in energy predictor
     energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
     energy_predictor_dropout: 0.5              # dropout rate in energy predictor
     energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
     energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
diff --git a/examples/ljspeech/voc5/conf/default.yaml b/examples/ljspeech/voc5/conf/default.yaml
index 97c512204..e45d2d770 100644
--- a/examples/ljspeech/voc5/conf/default.yaml
+++ b/examples/ljspeech/voc5/conf/default.yaml
@@ -38,7 +38,7 @@ generator_params:
     use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
     bias: True                            # Whether to use bias parameter in conv.
     nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
-    nonlinear_activation_params:          # Nonlinear activation paramters.
+    nonlinear_activation_params:          # Nonlinear activation parameters.
         negative_slope: 0.1
     use_weight_norm: True                 # Whether to apply weight normalization.
 
@@ -76,7 +76,7 @@ discriminator_params:
         max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
         bias: True                         # Whether to use bias parameter in conv layer."
         nonlinear_activation: "leakyrelu"  # Nonlinear activation.
-        nonlinear_activation_params:       # Nonlinear activation paramters.
+        nonlinear_activation_params:       # Nonlinear activation parameters.
             negative_slope: 0.1
         use_weight_norm: True              # Whether to apply weight normalization.
         use_spectral_norm: False           # Whether to apply spectral normalization.
diff --git a/examples/opencpop/svs1/conf/default.yaml b/examples/opencpop/svs1/conf/default.yaml
index 5d8060630..a1693923f 100644
--- a/examples/opencpop/svs1/conf/default.yaml
+++ b/examples/opencpop/svs1/conf/default.yaml
@@ -68,14 +68,14 @@ model:
         duration_predictor_dropout_rate: 0.5          # dropout rate in energy predictor
         pitch_predictor_layers: 5                     # number of conv layers in pitch predictor
         pitch_predictor_chans: 256                    # number of channels of conv layers in pitch predictor
-        pitch_predictor_kernel_size: 5                # kernel size of conv leyers in pitch predictor
+        pitch_predictor_kernel_size: 5                # kernel size of conv layers in pitch predictor
         pitch_predictor_dropout: 0.5                  # dropout rate in pitch predictor
         pitch_embed_kernel_size: 1                    # kernel size of conv embedding layer for pitch
         pitch_embed_dropout: 0.0                      # dropout rate after conv embedding layer for pitch
         stop_gradient_from_pitch_predictor: True      # whether to stop the gradient from pitch predictor to encoder
         energy_predictor_layers: 2                    # number of conv layers in energy predictor
         energy_predictor_chans: 256                   # number of channels of conv layers in energy predictor
-        energy_predictor_kernel_size: 3               # kernel size of conv leyers in energy predictor
+        energy_predictor_kernel_size: 3               # kernel size of conv layers in energy predictor
         energy_predictor_dropout: 0.5                 # dropout rate in energy predictor
         energy_embed_kernel_size: 1                   # kernel size of conv embedding layer for energy
         energy_embed_dropout: 0.0                     # dropout rate after conv embedding layer for energy
diff --git a/examples/opencpop/voc5/conf/default.yaml b/examples/opencpop/voc5/conf/default.yaml
index 10449f860..18822f310 100644
--- a/examples/opencpop/voc5/conf/default.yaml
+++ b/examples/opencpop/voc5/conf/default.yaml
@@ -38,7 +38,7 @@ generator_params:
     use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
     bias: True                            # Whether to use bias parameter in conv.
     nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
-    nonlinear_activation_params:          # Nonlinear activation paramters.
+    nonlinear_activation_params:          # Nonlinear activation parameters.
         negative_slope: 0.1
     use_weight_norm: True                 # Whether to apply weight normalization.
 
@@ -76,7 +76,7 @@ discriminator_params:
         max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
         bias: True                         # Whether to use bias parameter in conv layer."
         nonlinear_activation: "leakyrelu"  # Nonlinear activation.
-        nonlinear_activation_params:       # Nonlinear activation paramters.
+        nonlinear_activation_params:       # Nonlinear activation parameters.
             negative_slope: 0.1
         use_weight_norm: True              # Whether to apply weight normalization.
         use_spectral_norm: False           # Whether to apply spectral normalization.
diff --git a/examples/opencpop/voc5/conf/finetune.yaml b/examples/opencpop/voc5/conf/finetune.yaml
index 0022a67aa..331f99755 100644
--- a/examples/opencpop/voc5/conf/finetune.yaml
+++ b/examples/opencpop/voc5/conf/finetune.yaml
@@ -38,7 +38,7 @@ generator_params:
     use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
     bias: True                            # Whether to use bias parameter in conv.
     nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
-    nonlinear_activation_params:          # Nonlinear activation paramters.
+    nonlinear_activation_params:          # Nonlinear activation parameters.
         negative_slope: 0.1
     use_weight_norm: True                 # Whether to apply weight normalization.
 
@@ -76,7 +76,7 @@ discriminator_params:
         max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
         bias: True                         # Whether to use bias parameter in conv layer."
         nonlinear_activation: "leakyrelu"  # Nonlinear activation.
-        nonlinear_activation_params:       # Nonlinear activation paramters.
+        nonlinear_activation_params:       # Nonlinear activation parameters.
             negative_slope: 0.1
         use_weight_norm: True              # Whether to apply weight normalization.
         use_spectral_norm: False           # Whether to apply spectral normalization.
diff --git a/examples/vctk/tts3/conf/default.yaml b/examples/vctk/tts3/conf/default.yaml
index a75658d3d..d0d520e82 100644
--- a/examples/vctk/tts3/conf/default.yaml
+++ b/examples/vctk/tts3/conf/default.yaml
@@ -42,7 +42,7 @@ model:
     duration_predictor_layers: 2      # number of layers of duration predictor
     duration_predictor_chans: 256     # number of channels of duration predictor
     duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
     postnet_filts: 5                  # filter size of conv layers in postnet
     postnet_chans: 256                # number of channels of conv layers in postnet
     use_scaled_pos_enc: True          # whether to use scaled positional encoding
@@ -60,14 +60,14 @@ model:
     transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
     pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
     pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
     pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
     pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
     pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
     stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
     energy_predictor_layers: 2                 # number of conv layers in energy predictor
     energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
     energy_predictor_dropout: 0.5              # dropout rate in energy predictor
     energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
     energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
diff --git a/examples/vctk/voc5/conf/default.yaml b/examples/vctk/voc5/conf/default.yaml
index 6361e01b2..ecf0c1775 100644
--- a/examples/vctk/voc5/conf/default.yaml
+++ b/examples/vctk/voc5/conf/default.yaml
@@ -39,7 +39,7 @@ generator_params:
     use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
     bias: True                            # Whether to use bias parameter in conv.
     nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
-    nonlinear_activation_params:          # Nonlinear activation paramters.
+    nonlinear_activation_params:          # Nonlinear activation parameters.
         negative_slope: 0.1
     use_weight_norm: True                 # Whether to apply weight normalization.
 
@@ -77,7 +77,7 @@ discriminator_params:
         max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
         bias: True                         # Whether to use bias parameter in conv layer."
         nonlinear_activation: "leakyrelu"  # Nonlinear activation.
-        nonlinear_activation_params:       # Nonlinear activation paramters.
+        nonlinear_activation_params:       # Nonlinear activation parameters.
             negative_slope: 0.1
         use_weight_norm: True              # Whether to apply weight normalization.
         use_spectral_norm: False           # Whether to apply spectral normalization.
diff --git a/examples/voxceleb/sv0/local/data_prepare.py b/examples/voxceleb/sv0/local/data_prepare.py
index e5a5dff7b..e7fc8a120 100644
--- a/examples/voxceleb/sv0/local/data_prepare.py
+++ b/examples/voxceleb/sv0/local/data_prepare.py
@@ -32,8 +32,8 @@ def main(args, config):
     seed_everything(config.seed)
 
     # stage 1: generate the voxceleb csv file
-    # Note: this may occurs c++ execption, but the program will execute fine
-    # so we ignore the execption 
+    # Note: this may occurs c++ exception, but the program will execute fine
+    # so we ignore the exception 
     # we explicitly pass the vox2 base path to data prepare and generate the audio info
     logger.info("start to generate the voxceleb dataset info")
     train_dataset = VoxCeleb(
diff --git a/examples/zh_en_tts/tts3/conf/default.yaml b/examples/zh_en_tts/tts3/conf/default.yaml
index efa8b3ea2..be509432d 100644
--- a/examples/zh_en_tts/tts3/conf/default.yaml
+++ b/examples/zh_en_tts/tts3/conf/default.yaml
@@ -42,7 +42,7 @@ model:
     duration_predictor_layers: 2      # number of layers of duration predictor
     duration_predictor_chans: 256     # number of channels of duration predictor
     duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
     postnet_filts: 5                  # filter size of conv layers in postnet
     postnet_chans: 256                # number of channels of conv layers in postnet
     use_scaled_pos_enc: True          # whether to use scaled positional encoding
@@ -60,14 +60,14 @@ model:
     transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
     pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
     pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
     pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
     pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
     pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
     stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
     energy_predictor_layers: 2                 # number of conv layers in energy predictor
     energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
     energy_predictor_dropout: 0.5              # dropout rate in energy predictor
     energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
     energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
diff --git a/paddlespeech/audio/utils/tensor_utils.py b/paddlespeech/audio/utils/tensor_utils.py
index b246a6459..b67b2dd81 100644
--- a/paddlespeech/audio/utils/tensor_utils.py
+++ b/paddlespeech/audio/utils/tensor_utils.py
@@ -79,7 +79,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
     # assuming trailing dimensions and type of all the Tensors
     # in sequences are same and fetching those from sequences[0]
     max_size = paddle.shape(sequences[0])
-    # (TODO Hui Zhang): slice not supprot `end==start`
+    # (TODO Hui Zhang): slice not support `end==start`
     # trailing_dims = max_size[1:]
     trailing_dims = tuple(
         max_size[1:].numpy().tolist()) if sequences[0].ndim >= 2 else ()
@@ -93,7 +93,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
         length = tensor.shape[0]
         # use index notation to prevent duplicate references to the tensor
         if batch_first:
-            # TODO (Hui Zhang): set_value op not supprot `end==start`
+            # TODO (Hui Zhang): set_value op not support `end==start`
             # TODO (Hui Zhang): set_value op not support int16
             # TODO (Hui Zhang): set_varbase 2 rank not support [0,0,...]
             # out_tensor[i, :length, ...] = tensor
@@ -102,7 +102,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
             else:
                 out_tensor[i, length] = tensor
         else:
-            # TODO (Hui Zhang): set_value op not supprot `end==start`
+            # TODO (Hui Zhang): set_value op not support `end==start`
             # out_tensor[:length, i, ...] = tensor
             if length != 0:
                 out_tensor[:length, i] = tensor
diff --git a/paddlespeech/s2t/exps/hubert/model.py b/paddlespeech/s2t/exps/hubert/model.py
index bc05921dd..c2bd63583 100644
--- a/paddlespeech/s2t/exps/hubert/model.py
+++ b/paddlespeech/s2t/exps/hubert/model.py
@@ -362,7 +362,7 @@ class HubertASRTrainer(Trainer):
         scratch = None
         if self.args.resume:
             # just restore ckpt
-            # lr will resotre from optimizer ckpt
+            # lr will restore from optimizer ckpt
             resume_json_path = os.path.join(self.checkpoint_dir,
                                             self.args.resume + '.json')
             with open(resume_json_path, 'r', encoding='utf8') as f:
@@ -370,20 +370,20 @@ class HubertASRTrainer(Trainer):
             self.iteration = 0
             self.epoch = resume_json["epoch"]
 
-            # resotre model from *.pdparams
+            # restore model from *.pdparams
             params_path = os.path.join(self.checkpoint_dir,
                                        "{}".format(self.epoch)) + '.pdparams'
             model_dict = paddle.load(params_path)
             self.model.set_state_dict(model_dict)
 
-            # resotre optimizer from *.pdopt
+            # restore optimizer from *.pdopt
             optimizer_path = os.path.join(self.checkpoint_dir,
                                           "{}".format(self.epoch)) + '.pdopt'
             optimizer_dict = paddle.load(optimizer_path)
             self.model_optimizer.set_state_dict(optimizer_dict['model'])
             self.hubert_optimizer.set_state_dict(optimizer_dict['hubert'])
 
-            # resotre lr_scheduler from *.pdlrs
+            # restore lr_scheduler from *.pdlrs
             scheduler_path = os.path.join(self.checkpoint_dir,
                                           "{}".format(self.epoch)) + '.pdlrs'
             if os.path.isfile(os.path.join(scheduler_path)):
diff --git a/paddlespeech/s2t/exps/wav2vec2/model.py b/paddlespeech/s2t/exps/wav2vec2/model.py
index 6c90f99e1..7ba86c774 100644
--- a/paddlespeech/s2t/exps/wav2vec2/model.py
+++ b/paddlespeech/s2t/exps/wav2vec2/model.py
@@ -361,7 +361,7 @@ class Wav2Vec2ASRTrainer(Trainer):
         scratch = None
         if self.args.resume:
             # just restore ckpt
-            # lr will resotre from optimizer ckpt
+            # lr will restore from optimizer ckpt
             resume_json_path = os.path.join(self.checkpoint_dir,
                                             self.args.resume + '.json')
             with open(resume_json_path, 'r', encoding='utf8') as f:
@@ -369,20 +369,20 @@ class Wav2Vec2ASRTrainer(Trainer):
             self.iteration = 0
             self.epoch = resume_json["epoch"]
 
-            # resotre model from *.pdparams
+            # restore model from *.pdparams
             params_path = os.path.join(self.checkpoint_dir,
                                        "{}".format(self.epoch)) + '.pdparams'
             model_dict = paddle.load(params_path)
             self.model.set_state_dict(model_dict)
 
-            # resotre optimizer from *.pdopt
+            # restore optimizer from *.pdopt
             optimizer_path = os.path.join(self.checkpoint_dir,
                                           "{}".format(self.epoch)) + '.pdopt'
             optimizer_dict = paddle.load(optimizer_path)
             self.model_optimizer.set_state_dict(optimizer_dict['model'])
             self.wav2vec2_optimizer.set_state_dict(optimizer_dict['wav2vec2'])
 
-            # resotre lr_scheduler from *.pdlrs
+            # restore lr_scheduler from *.pdlrs
             scheduler_path = os.path.join(self.checkpoint_dir,
                                           "{}".format(self.epoch)) + '.pdlrs'
             if os.path.isfile(os.path.join(scheduler_path)):
diff --git a/paddlespeech/s2t/exps/wavlm/model.py b/paddlespeech/s2t/exps/wavlm/model.py
index 606867eae..5dfbf3b2e 100644
--- a/paddlespeech/s2t/exps/wavlm/model.py
+++ b/paddlespeech/s2t/exps/wavlm/model.py
@@ -361,7 +361,7 @@ class WavLMASRTrainer(Trainer):
         scratch = None
         if self.args.resume:
             # just restore ckpt
-            # lr will resotre from optimizer ckpt
+            # lr will restore from optimizer ckpt
             resume_json_path = os.path.join(self.checkpoint_dir,
                                             self.args.resume + '.json')
             with open(resume_json_path, 'r', encoding='utf8') as f:
@@ -369,20 +369,20 @@ class WavLMASRTrainer(Trainer):
             self.iteration = 0
             self.epoch = resume_json["epoch"]
 
-            # resotre model from *.pdparams
+            # restore model from *.pdparams
             params_path = os.path.join(self.checkpoint_dir,
                                        "{}".format(self.epoch)) + '.pdparams'
             model_dict = paddle.load(params_path)
             self.model.set_state_dict(model_dict)
 
-            # resotre optimizer from *.pdopt
+            # restore optimizer from *.pdopt
             optimizer_path = os.path.join(self.checkpoint_dir,
                                           "{}".format(self.epoch)) + '.pdopt'
             optimizer_dict = paddle.load(optimizer_path)
             self.model_optimizer.set_state_dict(optimizer_dict['model'])
             self.wavlm_optimizer.set_state_dict(optimizer_dict['wavlm'])
 
-            # resotre lr_scheduler from *.pdlrs
+            # restore lr_scheduler from *.pdlrs
             scheduler_path = os.path.join(self.checkpoint_dir,
                                           "{}".format(self.epoch)) + '.pdlrs'
             if os.path.isfile(os.path.join(scheduler_path)):
diff --git a/paddlespeech/s2t/training/trainer.py b/paddlespeech/s2t/training/trainer.py
index a8f36f91b..3facddc0e 100644
--- a/paddlespeech/s2t/training/trainer.py
+++ b/paddlespeech/s2t/training/trainer.py
@@ -215,7 +215,7 @@ class Trainer():
             checkpoint_path=self.args.checkpoint_path)
         if infos:
             # just restore ckpt
-            # lr will resotre from optimizer ckpt
+            # lr will restore from optimizer ckpt
             self.iteration = infos["step"]
             self.epoch = infos["epoch"]
 
diff --git a/paddlespeech/s2t/utils/tensor_utils.py b/paddlespeech/s2t/utils/tensor_utils.py
index 0d91b9cfb..15f4abdda 100644
--- a/paddlespeech/s2t/utils/tensor_utils.py
+++ b/paddlespeech/s2t/utils/tensor_utils.py
@@ -80,7 +80,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
     # assuming trailing dimensions and type of all the Tensors
     # in sequences are same and fetching those from sequences[0]
     max_size = paddle.shape(sequences[0])
-    # (TODO Hui Zhang): slice not supprot `end==start`
+    # (TODO Hui Zhang): slice not support `end==start`
     # trailing_dims = max_size[1:]
     trailing_dims = tuple(
         max_size[1:].numpy().tolist()) if sequences[0].ndim >= 2 else ()
@@ -98,7 +98,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
             f"length {length}, out_tensor {out_tensor.shape}, tensor {tensor.shape}"
         )
         if batch_first:
-            # TODO (Hui Zhang): set_value op not supprot `end==start`
+            # TODO (Hui Zhang): set_value op not support `end==start`
             # TODO (Hui Zhang): set_value op not support int16
             # TODO (Hui Zhang): set_varbase 2 rank not support [0,0,...]
             # out_tensor[i, :length, ...] = tensor
@@ -107,7 +107,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
             else:
                 out_tensor[i, length] = tensor
         else:
-            # TODO (Hui Zhang): set_value op not supprot `end==start`
+            # TODO (Hui Zhang): set_value op not support `end==start`
             # out_tensor[:length, i, ...] = tensor
             if length != 0:
                 out_tensor[:length, i] = tensor
diff --git a/paddlespeech/server/utils/audio_handler.py b/paddlespeech/server/utils/audio_handler.py
index 4df651337..08f15fe70 100644
--- a/paddlespeech/server/utils/audio_handler.py
+++ b/paddlespeech/server/utils/audio_handler.py
@@ -79,7 +79,7 @@ class ASRWsAudioHandler:
                  punc_server_ip=None,
                  punc_server_port=None):
         """PaddleSpeech Online ASR Server Client  audio handler
-           Online asr server use the websocket protocal
+           Online asr server use the websocket protocol
         Args:
             url (str, optional): the server ip. Defaults to None.
             port (int, optional): the server port. Defaults to None.
@@ -144,10 +144,10 @@ class ASRWsAudioHandler:
             logger.error("No asr server, please input valid ip and port")
             return ""
 
-        # 1. send websocket handshake protocal
+        # 1. send websocket handshake protocol
         start_time = time.time()
         async with websockets.connect(self.url) as ws:
-            # 2. server has already received handshake protocal
+            # 2. server has already received handshake protocol
             # client start to send the command
             audio_info = json.dumps(
                 {
@@ -255,7 +255,7 @@ class ASRHttpHandler:
 class TTSWsHandler:
     def __init__(self, server="127.0.0.1", port=8092, play: bool=False):
         """PaddleSpeech Online TTS Server Client  audio handler
-           Online tts server use the websocket protocal
+           Online tts server use the websocket protocol
         Args:
             server (str, optional): the server ip. Defaults to "127.0.0.1".
             port (int, optional): the server port. Defaults to 8092.
@@ -405,7 +405,7 @@ class TTSWsHandler:
 class TTSHttpHandler:
     def __init__(self, server="127.0.0.1", port=8092, play: bool=False):
         """PaddleSpeech Online TTS Server Client  audio handler
-           Online tts server use the websocket protocal
+           Online tts server use the websocket protocol
         Args:
             server (str, optional): the server ip. Defaults to "127.0.0.1".
             port (int, optional): the server port. Defaults to 8092.
diff --git a/paddlespeech/server/ws/asr_api.py b/paddlespeech/server/ws/asr_api.py
index b3ad0b7c5..3f90ac3b4 100644
--- a/paddlespeech/server/ws/asr_api.py
+++ b/paddlespeech/server/ws/asr_api.py
@@ -31,7 +31,7 @@ async def websocket_endpoint(websocket: WebSocket):
         websocket (WebSocket): the websocket instance
     """
 
-    #1. the interface wait to accept the websocket protocal header
+    #1. the interface wait to accept the websocket protocol header
     #   and only we receive the header, it establish the connection with specific thread
     await websocket.accept()
 
@@ -45,7 +45,7 @@ async def websocket_endpoint(websocket: WebSocket):
     connection_handler = None
 
     try:
-        #4. we do a loop to process the audio package by package according the protocal
+        #4. we do a loop to process the audio package by package according the protocol
         #   and only if the client send finished signal, we will break the loop
         while True:
             # careful here, changed the source code from starlette.websockets
diff --git a/paddlespeech/server/ws/tts_api.py b/paddlespeech/server/ws/tts_api.py
index 275711f58..11194958c 100644
--- a/paddlespeech/server/ws/tts_api.py
+++ b/paddlespeech/server/ws/tts_api.py
@@ -32,7 +32,7 @@ async def websocket_endpoint(websocket: WebSocket):
         websocket (WebSocket): the websocket instance
     """
 
-    #1. the interface wait to accept the websocket protocal header
+    #1. the interface wait to accept the websocket protocol header
     #   and only we receive the header, it establish the connection with specific thread
     await websocket.accept()
 
diff --git a/paddlespeech/t2s/models/vits/residual_coupling.py b/paddlespeech/t2s/models/vits/residual_coupling.py
index afa6d1fa7..09916484d 100644
--- a/paddlespeech/t2s/models/vits/residual_coupling.py
+++ b/paddlespeech/t2s/models/vits/residual_coupling.py
@@ -76,7 +76,7 @@ class ResidualAffineCouplingBlock(nn.Layer):
             use_weight_norm (bool):
                 Whether to use weight normalization in WaveNet.
             bias (bool):
-                Whether to use bias paramters in WaveNet.
+                Whether to use bias parameters in WaveNet.
             use_only_mean (bool):
                 Whether to estimate only mean.
 
@@ -169,7 +169,7 @@ class ResidualAffineCouplingLayer(nn.Layer):
             use_weight_norm (bool):
                 Whether to use weight normalization in WaveNet.
             bias (bool):
-                Whether to use bias paramters in WaveNet.
+                Whether to use bias parameters in WaveNet.
             use_only_mean (bool):
                 Whether to estimate only mean.
 
diff --git a/runtime/examples/text_lm/local/mmseg.py b/runtime/examples/text_lm/local/mmseg.py
index 74295cd3c..d5bff6df3 100755
--- a/runtime/examples/text_lm/local/mmseg.py
+++ b/runtime/examples/text_lm/local/mmseg.py
@@ -156,8 +156,8 @@ class Analysis:
         return self.text[self.pos]
 
     #判断该字符是否是中文字符（不包括中文标点）    
-    def isChineseChar(self, charater):
-        return 0x4e00 <= ord(charater) < 0x9fa6
+    def isChineseChar(self, character):
+        return 0x4e00 <= ord(character) < 0x9fa6
 
     #判断是否是ASCII码  
     def isASCIIChar(self, ch):
diff --git a/tests/unit/server/offline/test_server_client.sh b/tests/unit/server/offline/test_server_client.sh
index 6418c82fd..26fb100a3 100644
--- a/tests/unit/server/offline/test_server_client.sh
+++ b/tests/unit/server/offline/test_server_client.sh
@@ -66,8 +66,8 @@ config_file=./conf/application.yaml
 server_ip=$(cat $config_file | grep "host" | awk -F " " '{print $2}')
 port=$(cat $config_file | grep "port" | awk '/port:/ {print $2}')
 
-echo "Sevice ip: $server_ip" | tee ./log/test_result.log
-echo "Sevice port: $port" | tee -a ./log/test_result.log
+echo "Service ip: $server_ip" | tee ./log/test_result.log
+echo "Service port: $port" | tee -a ./log/test_result.log
 
 # whether a process is listening on $port
 pid=`lsof -i :"$port"|grep -v "PID" | awk '{print $2}'`
@@ -190,7 +190,7 @@ echo "**************************************************************************
 
 echo "All tests completed."  | tee -a ./log/test_result.log
 
-# sohw all the test results
+# show all the test results
 echo "***************** Here are all the test results ********************"
 cat ./log/test_result.log
 
diff --git a/tests/unit/server/online/tts/check_server/test.sh b/tests/unit/server/online/tts/check_server/test.sh
index c62c54c76..998a07b3f 100644
--- a/tests/unit/server/online/tts/check_server/test.sh
+++ b/tests/unit/server/online/tts/check_server/test.sh
@@ -76,8 +76,8 @@ config_file=./conf/application.yaml
 server_ip=$(cat $config_file | grep "host" | awk -F " " '{print $2}')
 port=$(cat $config_file | grep "port" | awk '/port:/ {print $2}')
 
-echo "Sevice ip: $server_ip" | tee $log/test_result.log
-echo "Sevice port: $port" | tee -a $log/test_result.log
+echo "Service ip: $server_ip" | tee $log/test_result.log
+echo "Service port: $port" | tee -a $log/test_result.log
 
 # whether a process is listening on $port
 pid=`lsof -i :"$port"|grep -v "PID" | awk '{print $2}'`
@@ -307,7 +307,7 @@ echo "**************************************************************************
 echo "All tests completed."  | tee -a $log/test_result.log
 
 
-# sohw all the test results
+# show all the test results
 echo "***************** Here are all the test results ********************"
 cat $log/test_result.log
 
diff --git a/tools/extras/install_liblbfgs.sh b/tools/extras/install_liblbfgs.sh
index 8d6ae4ab7..1fa727d1f 100755
--- a/tools/extras/install_liblbfgs.sh
+++ b/tools/extras/install_liblbfgs.sh
@@ -23,7 +23,7 @@ cd ..
 
 (
   [ ! -z "${LIBLBFGS}" ] && \
-    echo >&2 "LIBLBFGS variable is aleady defined. Undefining..." && \
+    echo >&2 "LIBLBFGS variable is already defined. Undefining..." && \
     unset LIBLBFGS
 
   [ -f ./env.sh ] && . ./env.sh
diff --git a/tools/extras/install_srilm.sh b/tools/extras/install_srilm.sh
index f359e70ce..fdbcf5d97 100755
--- a/tools/extras/install_srilm.sh
+++ b/tools/extras/install_srilm.sh
@@ -68,7 +68,7 @@ make || exit
 cd ..
 (
   [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
+    echo >&2 "SRILM variable is already defined. Undefining..." && \
     unset SRILM
 
   [ -f ./env.sh ] && . ./env.sh
diff --git a/utils/fst/ctc_token_fst.py b/utils/fst/ctc_token_fst.py
index f63e9cdac..85974f27f 100755
--- a/utils/fst/ctc_token_fst.py
+++ b/utils/fst/ctc_token_fst.py
@@ -32,7 +32,7 @@ def main(args):
                 # leaving `token`
                 print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
             node += 1
-    # Fianl node
+    # Final node
     print('0')
 
 
diff --git a/utils/fst/make_tlg.sh b/utils/fst/make_tlg.sh
index c68387af9..944b8b1f3 100755
--- a/utils/fst/make_tlg.sh
+++ b/utils/fst/make_tlg.sh
@@ -21,7 +21,7 @@ cp -r $src_lang $tgt_lang
 # eps2disambig.pl: replace epsilons on the input side with the special disambiguation symbol #0. 
 # s2eps.pl: replaces <s> and </s> with <eps> (on both input and output sides), for the G.fst acceptor.
 # G.fst, the disambiguation symbol #0 only appears on the input side
-# do eps2disambig.pl and s2eps.pl maybe just for fallowing `fstrmepsilon`.
+# do eps2disambig.pl and s2eps.pl maybe just for following `fstrmepsilon`.
 cat $arpa_lm | \
    grep -v '<s> <s>' | \
    grep -v '</s> <s>' | \
diff --git a/utils/generate_infer_yaml.py b/utils/generate_infer_yaml.py
index ca8d6b60d..bd45a1bbd 100755
--- a/utils/generate_infer_yaml.py
+++ b/utils/generate_infer_yaml.py
@@ -3,7 +3,7 @@
 '''
     Merge training configs into a single inference config.
     The single inference config is for CLI, which only takes a single config to do inferencing.
-    The trainig configs includes: model config, preprocess config, decode config, vocab file and cmvn file.
+    The training configs includes: model config, preprocess config, decode config, vocab file and cmvn file.
 
     Process:
     # step 1: prepare dir
@@ -11,7 +11,7 @@
     cp -r exp conf data release_dir
     cd release_dir 
  
-    # step 2: get "model.yaml" which conatains all configuration info.
+    # step 2: get "model.yaml" which contains all configuration info.
     # if does not contain preprocess.yaml file. e.g ds2:
     python generate_infer_yaml.py --cfg_pth conf/deepspeech2_online.yaml --dcd_pth conf/tuning/chunk_decode.yaml --vb_pth data/lang_char/vocab.txt --cmvn_pth data/mean_std.json --save_pth model.yaml --pre_pth null        
     # if contains preprocess.yaml file. e.g  u2:
diff --git a/utils/train_arpa_with_kenlm.sh b/utils/train_arpa_with_kenlm.sh
index 8af646ceb..b435239af 100755
--- a/utils/train_arpa_with_kenlm.sh
+++ b/utils/train_arpa_with_kenlm.sh
@@ -37,7 +37,7 @@ fi
 # the text should be properly pre-processed, e.g:
 #   cleand, normalized and possibly word-segmented
 
-# get rid off irrelavent symbols
+# get rid off irrelevant symbols
 grep -v '<eps>' $symbol_table \
   | grep -v '#0' \
   | grep -v '<unk>' | grep -v '<UNK>' \
@@ -51,7 +51,7 @@ grep -v '<eps>' $symbol_table \
 # 
 # TL;DR reason:
 # Unlike SRILM's -limit-vocab, kenlm's --limit_vocab_file option 
-# spcifies a *valid* set of vocabulary, whereas *valid but unseen* 
+# specifies a *valid* set of vocabulary, whereas *valid but unseen* 
 # words are discarded in final arpa.
 # So the trick is, 
 # we explicitly add kaldi's vocab(one word per line) to training text, 
diff --git a/utils/zh_tn.py b/utils/zh_tn.py
index 6fee626bd..4bb684a1e 100755
--- a/utils/zh_tn.py
+++ b/utils/zh_tn.py
@@ -1288,7 +1288,7 @@ def normalize_corpus(corpus,
 
 
 def char_token(s: Text) -> List[Text]:
-    """chinese charactor
+    """chinese character
     Args:
         s (Text): "我爱中国“
 

From 50ef94b68c495f610810083960b1e62b09c9fff8 Mon Sep 17 00:00:00 2001
From: co63oc <co63oc@users.noreply.github.com>
Date: Fri, 21 Mar 2025 14:38:53 +0800
Subject: [PATCH 28/46] Fix typos (#4024)

* Fix

* Fix
---
 audio/paddleaudio/compliance/kaldi.py                     | 6 +++---
 audio/paddleaudio/compliance/librosa.py                   | 2 +-
 examples/aishell/asr3/cmd.sh                              | 4 ++--
 examples/librispeech/asr1/cmd.sh                          | 4 ++--
 examples/librispeech/asr2/cmd.sh                          | 4 ++--
 examples/librispeech/asr3/cmd.sh                          | 4 ++--
 examples/librispeech/asr4/cmd.sh                          | 4 ++--
 examples/librispeech/asr5/cmd.sh                          | 4 ++--
 examples/mustc/st1/cmd.sh                                 | 4 ++--
 examples/other/ngram_lm/s0/local/kenlm_score_test.py      | 2 +-
 examples/ted_en_zh/st1/cmd.sh                             | 4 ++--
 paddlespeech/dataset/aidatatang_200zh/aidatatang_200zh.py | 2 +-
 paddlespeech/dataset/aishell/aishell.py                   | 4 ++--
 paddlespeech/s2t/utils/error_rate.py                      | 2 +-
 paddlespeech/t2s/frontend/zh_frontend.py                  | 2 +-
 paddlespeech/t2s/utils/error_rate.py                      | 2 +-
 .../engine/asr/decoder/ctc_prefix_beam_search_decoder.cc  | 4 ++--
 runtime/engine/common/frontend/cmvn.cc                    | 2 +-
 runtime/engine/common/frontend/cmvn.h                     | 2 +-
 runtime/engine/common/frontend/db_norm.h                  | 2 +-
 runtime/engine/common/matrix/kaldi-matrix.cc              | 8 ++++----
 runtime/engine/common/matrix/kaldi-vector.cc              | 2 +-
 runtime/engine/common/utils/file_utils.cc                 | 2 +-
 runtime/engine/kaldi/lat/kaldi-lattice.cc                 | 2 +-
 runtime/examples/README.md                                | 2 +-
 runtime/examples/audio_classification/README.md           | 6 +++---
 runtime/examples/text_lm/local/mmseg.py                   | 1 -
 runtime/patch/openfst/src/include/fst/flags.h             | 4 ++--
 tests/test_tipc/conformer/scripts/aishell_tiny.py         | 2 +-
 tests/unit/cli/aishell_test_prepare.py                    | 2 +-
 tests/unit/tts/test_snapshot.py                           | 2 +-
 tools/extras/install_liblbfgs.sh                          | 2 +-
 utils/format_triplet_data.py                              | 2 +-
 utils/tokenizer.perl                                      | 2 +-
 34 files changed, 51 insertions(+), 52 deletions(-)

diff --git a/audio/paddleaudio/compliance/kaldi.py b/audio/paddleaudio/compliance/kaldi.py
index a94ec4053..d2184ebe3 100644
--- a/audio/paddleaudio/compliance/kaldi.py
+++ b/audio/paddleaudio/compliance/kaldi.py
@@ -233,7 +233,7 @@ def spectrogram(waveform: Tensor,
         round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
             to FFT. Defaults to True.
         sr (int, optional): Sample rate of input waveform. Defaults to 16000.
-        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it
+        snip_edges (bool, optional): Drop samples in the end of waveform that can't fit a signal frame when it
             is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
         subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
         window_type (str, optional): Choose type of window for FFT computation. Defaults to "povey".
@@ -443,7 +443,7 @@ def fbank(waveform: Tensor,
         round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
             to FFT. Defaults to True.
         sr (int, optional): Sample rate of input waveform. Defaults to 16000.
-        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it
+        snip_edges (bool, optional): Drop samples in the end of waveform that can't fit a signal frame when it
             is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
         subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
         use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False.
@@ -566,7 +566,7 @@ def mfcc(waveform: Tensor,
         round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
             to FFT. Defaults to True.
         sr (int, optional): Sample rate of input waveform. Defaults to 16000.
-        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it
+        snip_edges (bool, optional): Drop samples in the end of waveform that can't fit a signal frame when it
             is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
         subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
         use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False.
diff --git a/audio/paddleaudio/compliance/librosa.py b/audio/paddleaudio/compliance/librosa.py
index c24d6d497..d0cee642a 100644
--- a/audio/paddleaudio/compliance/librosa.py
+++ b/audio/paddleaudio/compliance/librosa.py
@@ -527,7 +527,7 @@ def melspectrogram(x: np.ndarray,
     if fmax is None:
         fmax = sr // 2
     if fmin < 0 or fmin >= fmax:
-        raise ParameterError('fmin and fmax must statisfy 0<fmin<fmax')
+        raise ParameterError('fmin and fmax must satisfy 0<fmin<fmax')
 
     s = stft(
         x,
diff --git a/examples/aishell/asr3/cmd.sh b/examples/aishell/asr3/cmd.sh
index 7b70ef5e0..c87b0f233 100755
--- a/examples/aishell/asr3/cmd.sh
+++ b/examples/aishell/asr3/cmd.sh
@@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
     # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
     # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
     # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
 
diff --git a/examples/librispeech/asr1/cmd.sh b/examples/librispeech/asr1/cmd.sh
index 7b70ef5e0..c87b0f233 100644
--- a/examples/librispeech/asr1/cmd.sh
+++ b/examples/librispeech/asr1/cmd.sh
@@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
     # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
     # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
     # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
 
diff --git a/examples/librispeech/asr2/cmd.sh b/examples/librispeech/asr2/cmd.sh
index 7b70ef5e0..c87b0f233 100644
--- a/examples/librispeech/asr2/cmd.sh
+++ b/examples/librispeech/asr2/cmd.sh
@@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
     # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
     # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
     # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
 
diff --git a/examples/librispeech/asr3/cmd.sh b/examples/librispeech/asr3/cmd.sh
index 7b70ef5e0..c87b0f233 100644
--- a/examples/librispeech/asr3/cmd.sh
+++ b/examples/librispeech/asr3/cmd.sh
@@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
     # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
     # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
     # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
 
diff --git a/examples/librispeech/asr4/cmd.sh b/examples/librispeech/asr4/cmd.sh
index 7b70ef5e0..c87b0f233 100644
--- a/examples/librispeech/asr4/cmd.sh
+++ b/examples/librispeech/asr4/cmd.sh
@@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
     # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
     # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
     # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
 
diff --git a/examples/librispeech/asr5/cmd.sh b/examples/librispeech/asr5/cmd.sh
index 7b70ef5e0..c87b0f233 100644
--- a/examples/librispeech/asr5/cmd.sh
+++ b/examples/librispeech/asr5/cmd.sh
@@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
     # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
     # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
     # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
 
diff --git a/examples/mustc/st1/cmd.sh b/examples/mustc/st1/cmd.sh
index 7b70ef5e0..c87b0f233 100644
--- a/examples/mustc/st1/cmd.sh
+++ b/examples/mustc/st1/cmd.sh
@@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
     # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
     # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
     # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
 
diff --git a/examples/other/ngram_lm/s0/local/kenlm_score_test.py b/examples/other/ngram_lm/s0/local/kenlm_score_test.py
index 30bc1e4b1..4f388cd00 100644
--- a/examples/other/ngram_lm/s0/local/kenlm_score_test.py
+++ b/examples/other/ngram_lm/s0/local/kenlm_score_test.py
@@ -97,7 +97,7 @@ def test_full_scores_words():
         if w not in model:
             print('"{0}" is an OOV'.format(w))
             oov.append(w)
-    # zh_giga.no_cna_cmn.prune01244.klm is chinese charactor LM 
+    # zh_giga.no_cna_cmn.prune01244.klm is chinese character LM 
     assert oov == ["盘点", "不怕", "网站", "❗", "️", "海淘", "向来", "便宜", "保真",
                    "！"], 'error oov'
 
diff --git a/examples/ted_en_zh/st1/cmd.sh b/examples/ted_en_zh/st1/cmd.sh
index 7b70ef5e0..c87b0f233 100644
--- a/examples/ted_en_zh/st1/cmd.sh
+++ b/examples/ted_en_zh/st1/cmd.sh
@@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
     # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
     # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
     # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
 
diff --git a/paddlespeech/dataset/aidatatang_200zh/aidatatang_200zh.py b/paddlespeech/dataset/aidatatang_200zh/aidatatang_200zh.py
index 5d914a438..7250a50b9 100644
--- a/paddlespeech/dataset/aidatatang_200zh/aidatatang_200zh.py
+++ b/paddlespeech/dataset/aidatatang_200zh/aidatatang_200zh.py
@@ -62,7 +62,7 @@ def create_manifest(data_dir, manifest_path_prefix):
         if line == '':
             continue
         audio_id, text = line.split(' ', 1)
-        # remove withespace, charactor text
+        # remove withespace, character text
         text = ''.join(text.split())
         transcript_dict[audio_id] = text
 
diff --git a/paddlespeech/dataset/aishell/aishell.py b/paddlespeech/dataset/aishell/aishell.py
index 7ea4d6766..f227cfbd5 100644
--- a/paddlespeech/dataset/aishell/aishell.py
+++ b/paddlespeech/dataset/aishell/aishell.py
@@ -65,7 +65,7 @@ def create_manifest(data_dir, manifest_path_prefix):
         if line == '':
             continue
         audio_id, text = line.split(' ', 1)
-        # remove withespace, charactor text
+        # remove withespace, character text
         text = ''.join(text.split())
         transcript_dict[audio_id] = text
 
@@ -159,7 +159,7 @@ def check_dataset(data_dir):
         if line == '':
             continue
         audio_id, text = line.split(' ', 1)
-        # remove withespace, charactor text
+        # remove withespace, character text
         text = ''.join(text.split())
         transcript_dict[audio_id] = text
 
diff --git a/paddlespeech/s2t/utils/error_rate.py b/paddlespeech/s2t/utils/error_rate.py
index 548376aa2..9e3357b91 100644
--- a/paddlespeech/s2t/utils/error_rate.py
+++ b/paddlespeech/s2t/utils/error_rate.py
@@ -171,7 +171,7 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '):
 
 
 def cer(reference, hypothesis, ignore_case=False, remove_space=False):
-    """Calculate charactor error rate (CER). CER compares reference text and
+    """Calculate character error rate (CER). CER compares reference text and
     hypothesis text in char-level. CER is defined as:
 
     .. math::
diff --git a/paddlespeech/t2s/frontend/zh_frontend.py b/paddlespeech/t2s/frontend/zh_frontend.py
index 95c75a7f0..3c87c6827 100644
--- a/paddlespeech/t2s/frontend/zh_frontend.py
+++ b/paddlespeech/t2s/frontend/zh_frontend.py
@@ -523,7 +523,7 @@ class Frontend():
         initials = []
         finals = []
 
-        # to charactor list
+        # to character list
         words = self._split_word_to_char(words[0])
 
         for pinyin, char in zip(pinyin_spec, words):
diff --git a/paddlespeech/t2s/utils/error_rate.py b/paddlespeech/t2s/utils/error_rate.py
index 76a4f45be..1298680ed 100644
--- a/paddlespeech/t2s/utils/error_rate.py
+++ b/paddlespeech/t2s/utils/error_rate.py
@@ -159,7 +159,7 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '):
 
 
 def cer(reference, hypothesis, ignore_case=False, remove_space=False):
-    """Calculate charactor error rate (CER). CER compares reference text and
+    """Calculate character error rate (CER). CER compares reference text and
     hypothesis text in char-level. CER is defined as:
     .. math::
         CER = (Sc + Dc + Ic) / Nc
diff --git a/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder.cc b/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder.cc
index bf912af2e..1bda492c8 100644
--- a/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder.cc
+++ b/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder.cc
@@ -157,7 +157,7 @@ void CTCPrefixBeamSearch::AdvanceDecoding(
                     next_score.v_b = prefix_score.ViterbiScore() + prob;
                     next_score.times_b = prefix_score.Times();
 
-                    // Prefix not changed, copy the context from pefix
+                    // Prefix not changed, copy the context from prefix
                     if (context_graph_ && !next_score.has_context) {
                         next_score.CopyContext(prefix_score);
                         next_score.has_context = true;
@@ -183,7 +183,7 @@ void CTCPrefixBeamSearch::AdvanceDecoding(
                         }
                     }
 
-                    // Prefix not changed, copy the context from pefix
+                    // Prefix not changed, copy the context from prefix
                     if (context_graph_ && !next_score1.has_context) {
                         next_score1.CopyContext(prefix_score);
                         next_score1.has_context = true;
diff --git a/runtime/engine/common/frontend/cmvn.cc b/runtime/engine/common/frontend/cmvn.cc
index 0f1108208..bfb02a840 100644
--- a/runtime/engine/common/frontend/cmvn.cc
+++ b/runtime/engine/common/frontend/cmvn.cc
@@ -72,7 +72,7 @@ bool CMVN::Read(std::vector<BaseFloat>* feats) {
         return false;
     }
 
-    // appply cmvn
+    // apply cmvn
     kaldi::Timer timer;
     Compute(feats);
     VLOG(1) << "CMVN::Read cost: " << timer.Elapsed() << " sec.";
diff --git a/runtime/engine/common/frontend/cmvn.h b/runtime/engine/common/frontend/cmvn.h
index c515b6aeb..2d8917d95 100644
--- a/runtime/engine/common/frontend/cmvn.h
+++ b/runtime/engine/common/frontend/cmvn.h
@@ -29,7 +29,7 @@ class CMVN : public FrontendInterface {
     // the length of feats = feature_row * feature_dim,
     // the Matrix is squashed into Vector
     virtual bool Read(std::vector<kaldi::BaseFloat>* feats);
-    // the dim_ is the feautre dim.
+    // the dim_ is the feature dim.
     virtual size_t Dim() const { return dim_; }
     virtual void SetFinished() { base_extractor_->SetFinished(); }
     virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
diff --git a/runtime/engine/common/frontend/db_norm.h b/runtime/engine/common/frontend/db_norm.h
index 425971437..e9f8b6995 100644
--- a/runtime/engine/common/frontend/db_norm.h
+++ b/runtime/engine/common/frontend/db_norm.h
@@ -47,7 +47,7 @@ class DecibelNormalizer : public FrontendInterface {
         std::unique_ptr<FrontendInterface> base_extractor);
     virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& waves);
     virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* waves);
-    // noramlize audio, the dim is 1.
+    // normalize audio, the dim is 1.
     virtual size_t Dim() const { return dim_; }
     virtual void SetFinished() { base_extractor_->SetFinished(); }
     virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
diff --git a/runtime/engine/common/matrix/kaldi-matrix.cc b/runtime/engine/common/matrix/kaldi-matrix.cc
index 6f65fb0a0..65e8e09a6 100644
--- a/runtime/engine/common/matrix/kaldi-matrix.cc
+++ b/runtime/engine/common/matrix/kaldi-matrix.cc
@@ -244,8 +244,8 @@ void MatrixBase<Real>::SymAddMat2(const Real alpha,
   /// function will produce NaN in the output. This is a bug in the
   /// ATLAS library. To overcome this, the AddMatMat function, which calls
   /// cblas_Xgemm(...) rather than cblas_Xsyrk(...), is used in this special
-  /// sitation.
-  /// Wei Shi: Note this bug is observerd for single precision matrix
+  /// situation.
+  /// Wei Shi: Note this bug is observed for single precision matrix
   /// on a 64-bit machine
 #ifdef HAVE_ATLAS
   if (transA == kTrans && num_rows_ >= 56) {
@@ -683,7 +683,7 @@ empty.
   if (V_in == NULL) tmpV.Resize(1, this->num_cols_);  // work-space if V_in
 empty.
 
-  /// Impementation notes:
+  /// Implementation notes:
   /// Lapack works in column-order, therefore the dimensions of *this are
   /// swapped as well as the U and V matrices.
 
@@ -2378,7 +2378,7 @@ bool ReadHtk(std::istream &is, Matrix<Real> *M_ptr, HtkHeader *header_ptr)
   Matrix<Real> &M = *M_ptr;
   HtkHeader htk_hdr;
 
-  // TODO(arnab): this fails if the HTK file has CRC cheksum or is compressed.
+  // TODO(arnab): this fails if the HTK file has CRC checksum or is compressed.
   is.read((char*)&htk_hdr, sizeof(htk_hdr));  // we're being really POSIX here!
   if (is.fail()) {
     KALDI_WARN << "Could not read header from HTK feature file ";
diff --git a/runtime/engine/common/matrix/kaldi-vector.cc b/runtime/engine/common/matrix/kaldi-vector.cc
index 3ab9a7ffa..790ebe128 100644
--- a/runtime/engine/common/matrix/kaldi-vector.cc
+++ b/runtime/engine/common/matrix/kaldi-vector.cc
@@ -235,7 +235,7 @@ void VectorBase<Real>::CopyRowsFromMat(const MatrixBase<Real> &mat) {
         memcpy(inc_data, mat.Data(), cols * rows * sizeof(Real));
     } else {
         for (MatrixIndexT i = 0; i < rows; i++) {
-            // copy the data to the propper position
+            // copy the data to the proper position
             memcpy(inc_data, mat.RowData(i), cols * sizeof(Real));
             // set new copy position
             inc_data += cols;
diff --git a/runtime/engine/common/utils/file_utils.cc b/runtime/engine/common/utils/file_utils.cc
index 385f2b656..59bb64482 100644
--- a/runtime/engine/common/utils/file_utils.cc
+++ b/runtime/engine/common/utils/file_utils.cc
@@ -44,7 +44,7 @@ std::string ReadFile2String(const std::string& path) {
 }
 
 bool FileExists(const std::string& strFilename) { 
-    // this funciton if from:
+    // this function if from:
     // https://github.com/kaldi-asr/kaldi/blob/master/src/fstext/deterministic-fst-test.cc
     struct stat stFileInfo; 
     bool blnReturn; 
diff --git a/runtime/engine/kaldi/lat/kaldi-lattice.cc b/runtime/engine/kaldi/lat/kaldi-lattice.cc
index 744cc5384..0bd291ee1 100644
--- a/runtime/engine/kaldi/lat/kaldi-lattice.cc
+++ b/runtime/engine/kaldi/lat/kaldi-lattice.cc
@@ -407,7 +407,7 @@ bool WriteLattice(std::ostream &os, bool binary, const Lattice &t) {
     if (os.fail())
       KALDI_WARN << "Stream failure detected.";
     // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not somethig in the original
+    // detect this [this is a Kaldi mechanism, not something in the original
     // OpenFst code].
     os << '\n';
     return os.good();
diff --git a/runtime/examples/README.md b/runtime/examples/README.md
index de27bd94b..6d316d649 100644
--- a/runtime/examples/README.md
+++ b/runtime/examples/README.md
@@ -34,7 +34,7 @@ bash run.sh --stop_stage 4
 
 ## Display Model with [Netron](https://github.com/lutzroeder/netron)  
 
-If you have a model, we can using this commnd to show model graph.
+If you have a model, we can using this commend to show model graph.
 
 For example:
 ```
diff --git a/runtime/examples/audio_classification/README.md b/runtime/examples/audio_classification/README.md
index 6d7a37423..7fb8d611a 100644
--- a/runtime/examples/audio_classification/README.md
+++ b/runtime/examples/audio_classification/README.md
@@ -74,7 +74,7 @@ includes/
 #### set path
 push resource into android phone
 
-1. change resource path in conf to gloabal path, such as:
+1. change resource path in conf to global path, such as:
 
     [CONF]
     wav_normal=true
@@ -92,9 +92,9 @@ push resource into android phone
     high_freq=14000
     dither=0.0
 2. adb push conf label_list scp test.wav /data/local/tmp/
-3. set reource path in android demo(android_demo/app/src/main/cpp/native-lib.cpp) to actual path, such as:
+3. set resource path in android demo(android_demo/app/src/main/cpp/native-lib.cpp) to actual path, such as:
 
 std::string conf_path = "/data/local/tmp/conf";
 std::string wav_path = "/data/local/tmp/test.wav";
 
-4. excecute android_demo in android studio
+4. execute android_demo in android studio
diff --git a/runtime/examples/text_lm/local/mmseg.py b/runtime/examples/text_lm/local/mmseg.py
index d5bff6df3..4d72afd39 100755
--- a/runtime/examples/text_lm/local/mmseg.py
+++ b/runtime/examples/text_lm/local/mmseg.py
@@ -253,7 +253,6 @@ class Analysis:
                             # print(word3.length, word3.text)
                             if word3.length == -1:
                                 chunk = Chunk(word1, word2)
-                                # print("Ture")
                             else:
                                 chunk = Chunk(word1, word2, word3)
                             chunks.append(chunk)
diff --git a/runtime/patch/openfst/src/include/fst/flags.h b/runtime/patch/openfst/src/include/fst/flags.h
index b5ec8ff74..54dd30cc2 100644
--- a/runtime/patch/openfst/src/include/fst/flags.h
+++ b/runtime/patch/openfst/src/include/fst/flags.h
@@ -181,8 +181,8 @@ template <typename T>
 class FlagRegisterer {
  public:
   FlagRegisterer(const string &name, const FlagDescription<T> &desc) {
-    auto registr = FlagRegister<T>::GetRegister();
-    registr->SetDescription(name, desc);
+    auto r = FlagRegister<T>::GetRegister();
+    r->SetDescription(name, desc);
   }
 
  private:
diff --git a/tests/test_tipc/conformer/scripts/aishell_tiny.py b/tests/test_tipc/conformer/scripts/aishell_tiny.py
index c87463b50..27b713a55 100644
--- a/tests/test_tipc/conformer/scripts/aishell_tiny.py
+++ b/tests/test_tipc/conformer/scripts/aishell_tiny.py
@@ -62,7 +62,7 @@ def create_manifest(data_dir, manifest_path_prefix):
         if line == '':
             continue
         audio_id, text = line.split(' ', 1)
-        # remove withespace, charactor text
+        # remove withespace, character text
         text = ''.join(text.split())
         transcript_dict[audio_id] = text
 
diff --git a/tests/unit/cli/aishell_test_prepare.py b/tests/unit/cli/aishell_test_prepare.py
index c364e4fd9..ef582426c 100644
--- a/tests/unit/cli/aishell_test_prepare.py
+++ b/tests/unit/cli/aishell_test_prepare.py
@@ -63,7 +63,7 @@ def create_manifest(data_dir, manifest_path_prefix):
         if line == '':
             continue
         audio_id, text = line.split(' ', 1)
-        # remove withespace, charactor text
+        # remove withespace, character text
         text = ''.join(text.split())
         transcript_dict[audio_id] = text
 
diff --git a/tests/unit/tts/test_snapshot.py b/tests/unit/tts/test_snapshot.py
index fb18c7d78..750e6b68d 100644
--- a/tests/unit/tts/test_snapshot.py
+++ b/tests/unit/tts/test_snapshot.py
@@ -30,7 +30,7 @@ def _test_snapshot():
     # use a simplest iterable object as dataloader
     dataloader = count()
 
-    # hack the training proecss: training does nothing except increse iteration
+    # hack the training proecss: training does nothing except increase iteration
     updater = StandardUpdater(model, optimizer, dataloader=dataloader)
     updater.update_core = lambda x: None
 
diff --git a/tools/extras/install_liblbfgs.sh b/tools/extras/install_liblbfgs.sh
index 1fa727d1f..0148bd841 100755
--- a/tools/extras/install_liblbfgs.sh
+++ b/tools/extras/install_liblbfgs.sh
@@ -17,7 +17,7 @@ cd liblbfgs-$VER
 ./configure --prefix=`pwd`
 make
 # due to the liblbfgs project directory structure, we have to use -i
-# but the erros are completely harmless
+# but the errors are completely harmless
 make -i install
 cd ..
 
diff --git a/utils/format_triplet_data.py b/utils/format_triplet_data.py
index e9a0cf54c..029ea2d9b 100755
--- a/utils/format_triplet_data.py
+++ b/utils/format_triplet_data.py
@@ -44,7 +44,7 @@ add_arg('manifest_paths',   str,
 # bpe
 add_arg('spm_model_prefix', str, None,
      "spm model prefix, spm_model_%(bpe_mode)_%(count_threshold), only need when `unit_type` is spm")
-add_arg('output_path',  str, None, "filepath of formated manifest.", required=True)
+add_arg('output_path',  str, None, "filepath of formatted manifest.", required=True)
 # yapf: disable
 args = parser.parse_args()
 
diff --git a/utils/tokenizer.perl b/utils/tokenizer.perl
index 836fe19c6..babf81886 100644
--- a/utils/tokenizer.perl
+++ b/utils/tokenizer.perl
@@ -79,7 +79,7 @@ if ($HELP)
         print "  -b     ... disable Perl buffering.\n";
         print "  -time  ... enable processing time calculation.\n";
         print "  -penn  ... use Penn treebank-like tokenization.\n";
-        print "  -protected FILE  ... specify file with patters to be protected in tokenisation.\n";
+        print "  -protected FILE  ... specify file with patterns to be protected in tokenisation.\n";
 	print "  -no-escape ... don't perform HTML escaping on apostrophy, quotes, etc.\n";
 	exit;
 }

From fc4f8c344f2d3455ff13a0b25972028e5faa39cd Mon Sep 17 00:00:00 2001
From: zxcd <228587199@qq.com>
Date: Mon, 24 Mar 2025 17:00:01 +0800
Subject: [PATCH 29/46] remove useless requirement. (#4029)

---
 demos/audio_searching/requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/demos/audio_searching/requirements.txt b/demos/audio_searching/requirements.txt
index 3c0f05afc..e6391694c 100644
--- a/demos/audio_searching/requirements.txt
+++ b/demos/audio_searching/requirements.txt
@@ -1,5 +1,4 @@
 diskcache
-dtaidistane
 fastapi
 librosa==0.8.0
 numpy==1.22.0

From 96600493f57e8dd39b5cdcd442c3293f2f6801ed Mon Sep 17 00:00:00 2001
From: rich04lin <152049331+rich04lin@users.noreply.github.com>
Date: Mon, 24 Mar 2025 17:02:35 +0800
Subject: [PATCH 30/46] NUM15 (#4011)

---
 examples/csmsc/voc3/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/csmsc/voc3/README.md b/examples/csmsc/voc3/README.md
index a7807c727..667f5f0c9 100644
--- a/examples/csmsc/voc3/README.md
+++ b/examples/csmsc/voc3/README.md
@@ -86,7 +86,7 @@ Download pretrained MultiBand MelGAN model from [mb_melgan_csmsc_ckpt_0.1.1.zip]
 ```bash
 unzip mb_melgan_csmsc_ckpt_0.1.1.zip
 ```
-HiFiGAN checkpoint contains files listed below.
+MultiBand MelGAN checkpoint contains files listed below.
 ```text
 mb_melgan_csmsc_ckpt_0.1.1
 ├── default.yaml                    # default config used to train MultiBand MelGAN

From f2b4b947a7288ec920d4d61e0be963f2595f8702 Mon Sep 17 00:00:00 2001
From: co63oc <co63oc@users.noreply.github.com>
Date: Tue, 25 Mar 2025 10:41:03 +0800
Subject: [PATCH 31/46]  Fix typos in multiple files (#4032)

---
 README.md                                 |   4 +-
 audio/paddleaudio/datasets/dataset.py     |   2 +-
 audio/paddleaudio/datasets/esc50.py       |   2 +-
 audio/paddleaudio/datasets/gtzan.py       |   2 +-
 audio/paddleaudio/datasets/tess.py        |   2 +-
 audio/paddleaudio/datasets/urban_sound.py |   2 +-
 audio/tests/backends/sox_io/save_test.py  |   2 +-
 demos/TTSAndroid/README.md                |   8 +-
 demos/audio_searching/README.md           |   4 +-
 demos/speech_server/README.md             |   2 +-
 demos/speech_web/web_client/yarn.lock     | 305 ++++------------------
 demos/streaming_asr_server/README.md      |   8 +-
 demos/streaming_tts_server/README.md      |   4 +-
 demos/text_to_speech/README.md            |   4 +-
 demos/whisper/README.md                   |   2 +-
 docs/source/tts/advanced_usage.md         |   2 +-
 examples/aishell/asr0/local/train.sh      |   4 +-
 examples/aishell/asr1/local/train.sh      |   4 +-
 examples/csmsc/voc3/conf/default.yaml     |   2 +-
 examples/csmsc/voc3/conf/finetune.yaml    |   2 +-
 examples/librispeech/asr0/local/train.sh  |   4 +-
 examples/librispeech/asr1/local/train.sh  |   4 +-
 examples/librispeech/asr2/local/train.sh  |   4 +-
 examples/tal_cs/asr1/local/data.sh        |   2 +-
 examples/tal_cs/asr1/local/train.sh       |   4 +-
 examples/timit/asr1/local/train.sh        |   4 +-
 examples/tiny/asr0/local/train.sh         |   4 +-
 examples/tiny/asr1/local/train.sh         |   4 +-
 examples/wenetspeech/asr1/local/train.sh  |   4 +-
 paddlespeech/cli/asr/infer.py             |   2 +-
 paddlespeech/cli/cls/infer.py             |   2 +-
 paddlespeech/cli/kws/infer.py             |   2 +-
 paddlespeech/cli/ssl/infer.py             |   2 +-
 paddlespeech/cli/st/infer.py              |   2 +-
 paddlespeech/cli/text/infer.py            |   2 +-
 paddlespeech/cli/tts/infer.py             |   4 +-
 paddlespeech/cli/vector/infer.py          |   2 +-
 paddlespeech/cli/whisper/infer.py         |   2 +-
 paddlespeech/s2t/modules/align.py         |   2 +-
 paddlespeech/s2t/training/cli.py          |   2 +-
 paddlespeech/t2s/modules/diffusion.py     |   2 +-
 paddlespeech/t2s/training/cli.py          |   2 +-
 paddlespeech/utils/initialize.py          |   4 +-
 runtime/examples/custom_asr/README.md     |   2 +-
 44 files changed, 111 insertions(+), 324 deletions(-)

diff --git a/README.md b/README.md
index ace7f7c57..6ac520f98 100644
--- a/README.md
+++ b/README.md
@@ -173,7 +173,7 @@ Via the easy-to-use, efficient, flexible and scalable implementation, our vision
 - 🏆  **Streaming ASR and TTS System**: we provide production ready streaming asr and streaming tts system.
 - 💯  **Rule-based Chinese frontend**: our frontend contains Text Normalization and Grapheme-to-Phoneme (G2P, including Polyphone and Tone Sandhi). Moreover, we use self-defined linguistic rules to adapt Chinese context.
 - 📦  **Varieties of Functions that Vitalize both Industrial and Academia**:
-  - 🛎️  *Implementation of critical audio tasks*: this toolkit contains audio functions like  Automatic Speech Recognition, Text-to-Speech Synthesis, Speaker Verfication, KeyWord Spotting, Audio Classification, and Speech Translation, etc.
+  - 🛎️  *Implementation of critical audio tasks*: this toolkit contains audio functions like  Automatic Speech Recognition, Text-to-Speech Synthesis, Speaker Verification, KeyWord Spotting, Audio Classification, and Speech Translation, etc.
   - 🔬  *Integration of mainstream models and datasets*: the toolkit implements modules that participate in the whole pipeline of the speech tasks, and uses mainstream datasets like LibriSpeech, LJSpeech, AIShell, CSMSC, etc. See also [model list](#model-list) for more details.
   - 🧩  *Cascaded models application*: as an extension of the typical traditional audio tasks, we combine the workflows of the aforementioned tasks with other fields like Natural language processing (NLP) and Computer Vision (CV).
 
@@ -1025,7 +1025,7 @@ You are warmly welcome to submit questions in [discussions](https://github.com/P
 - Many thanks to [vpegasus](https://github.com/vpegasus)/[xuesebot](https://github.com/vpegasus/xuesebot) for developing a rasa chatbot,which is able to speak and listen thanks to PaddleSpeech.
 - Many thanks to [chenkui164](https://github.com/chenkui164)/[FastASR](https://github.com/chenkui164/FastASR) for the C++ inference implementation of PaddleSpeech ASR.
 - Many thanks to [heyudage](https://github.com/heyudage)/[VoiceTyping](https://github.com/heyudage/VoiceTyping) for the real-time voice typing tool implementation of PaddleSpeech ASR streaming services.
-- Many thanks to [EscaticZheng](https://github.com/EscaticZheng)/[ps3.9wheel-install](https://github.com/EscaticZheng/ps3.9wheel-install) for the python3.9 prebuilt wheel for PaddleSpeech installation in Windows without Viusal Studio.
+- Many thanks to [EscaticZheng](https://github.com/EscaticZheng)/[ps3.9wheel-install](https://github.com/EscaticZheng/ps3.9wheel-install) for the python3.9 prebuilt wheel for PaddleSpeech installation in Windows without Visual Studio.
 Besides, PaddleSpeech depends on a lot of open source repositories. See [references](./docs/source/reference.md) for more information.
 - Many thanks to [chinobing](https://github.com/chinobing)/[FastAPI-PaddleSpeech-Audio-To-Text](https://github.com/chinobing/FastAPI-PaddleSpeech-Audio-To-Text) for converting audio to text based on FastAPI and PaddleSpeech.
 - Many thanks to [MistEO](https://github.com/MistEO)/[Pallas-Bot](https://github.com/MistEO/Pallas-Bot) for QQ bot based on PaddleSpeech TTS.
diff --git a/audio/paddleaudio/datasets/dataset.py b/audio/paddleaudio/datasets/dataset.py
index 170e91669..a8aaf35b4 100644
--- a/audio/paddleaudio/datasets/dataset.py
+++ b/audio/paddleaudio/datasets/dataset.py
@@ -43,7 +43,7 @@ class AudioClassificationDataset(paddle.io.Dataset):
                  sample_rate: int=None,
                  **kwargs):
         """
-        Ags:
+        Args:
             files (:obj:`List[str]`): A list of absolute path of audio files.
             labels (:obj:`List[int]`): Labels of audio files.
             feat_type (:obj:`str`, `optional`, defaults to `raw`):
diff --git a/audio/paddleaudio/datasets/esc50.py b/audio/paddleaudio/datasets/esc50.py
index 555f84834..6f4675069 100644
--- a/audio/paddleaudio/datasets/esc50.py
+++ b/audio/paddleaudio/datasets/esc50.py
@@ -111,7 +111,7 @@ class ESC50(AudioClassificationDataset):
                  feat_type: str='raw',
                  **kwargs):
         """
-        Ags:
+        Args:
             mode (:obj:`str`, `optional`, defaults to `train`):
                 It identifies the dataset mode (train or dev).
             split (:obj:`int`, `optional`, defaults to 1):
diff --git a/audio/paddleaudio/datasets/gtzan.py b/audio/paddleaudio/datasets/gtzan.py
index 6146c4b98..299391968 100644
--- a/audio/paddleaudio/datasets/gtzan.py
+++ b/audio/paddleaudio/datasets/gtzan.py
@@ -57,7 +57,7 @@ class GTZAN(AudioClassificationDataset):
                  feat_type='raw',
                  **kwargs):
         """
-        Ags:
+        Args:
             mode (:obj:`str`, `optional`, defaults to `train`):
                 It identifies the dataset mode (train or dev).
             seed (:obj:`int`, `optional`, defaults to 0):
diff --git a/audio/paddleaudio/datasets/tess.py b/audio/paddleaudio/datasets/tess.py
index e311a8df6..a770c5b0c 100644
--- a/audio/paddleaudio/datasets/tess.py
+++ b/audio/paddleaudio/datasets/tess.py
@@ -66,7 +66,7 @@ class TESS(AudioClassificationDataset):
                  feat_type='raw',
                  **kwargs):
         """
-        Ags:
+        Args:
             mode (:obj:`str`, `optional`, defaults to `train`):
                 It identifies the dataset mode (train or dev).
             seed (:obj:`int`, `optional`, defaults to 0):
diff --git a/audio/paddleaudio/datasets/urban_sound.py b/audio/paddleaudio/datasets/urban_sound.py
index 4c4467588..b9a5fc030 100644
--- a/audio/paddleaudio/datasets/urban_sound.py
+++ b/audio/paddleaudio/datasets/urban_sound.py
@@ -62,7 +62,7 @@ class UrbanSound8K(AudioClassificationDataset):
         super(UrbanSound8K, self).__init__(
             files=files, labels=labels, feat_type=feat_type, **kwargs)
         """
-        Ags:
+        Args:
             mode (:obj:`str`, `optional`, defaults to `train`):
                 It identifies the dataset mode (train or dev).
             split (:obj:`int`, `optional`, defaults to 1):
diff --git a/audio/tests/backends/sox_io/save_test.py b/audio/tests/backends/sox_io/save_test.py
index e73f9f63f..de500dc12 100644
--- a/audio/tests/backends/sox_io/save_test.py
+++ b/audio/tests/backends/sox_io/save_test.py
@@ -41,7 +41,7 @@ class TestSaveBase(TempDirMixin):
             test_mode: str="path", ):
         """`save` function produces file that is comparable with `sox` command
 
-        To compare that the file produced by `save` function agains the file produced by
+        To compare that the file produced by `save` function against the file produced by
         the equivalent `sox` command, we need to load both files.
         But there are many formats that cannot be opened with common Python modules (like
         SciPy).
diff --git a/demos/TTSAndroid/README.md b/demos/TTSAndroid/README.md
index a26172cb6..6a6860400 100644
--- a/demos/TTSAndroid/README.md
+++ b/demos/TTSAndroid/README.md
@@ -8,7 +8,7 @@
 
 ### 环境准备
 
-1. 在本地环境安装好 Android Studio 工具，详细安装方法请见 [Android Stuido 官网](https://developer.android.com/studio)。
+1. 在本地环境安装好 Android Studio 工具，详细安装方法请见 [Android Studio 官网](https://developer.android.com/studio)。
 2. 准备一部 Android 手机，并开启 USB 调试模式。开启方法: `手机设置 -> 查找开发者选项 -> 打开开发者选项和 USB 调试模式`。
 
 **注意**：
@@ -20,10 +20,10 @@
 2. 手机连接电脑，打开 USB 调试和文件传输模式，并在 Android Studio 上连接自己的手机设备（手机需要开启允许从 USB 安装软件权限）。
 
 **注意：**
->1. 如果您在导入项目、编译或者运行过程中遇到 NDK 配置错误的提示，请打开 `File > Project Structure > SDK Location`，修改 `Andriod NDK location` 为您本机配置的 NDK 所在路径。
->2. 如果您是通过 Andriod Studio 的 SDK Tools 下载的 NDK (见本章节"环境准备")，可以直接点击下拉框选择默认路径。
+>1. 如果您在导入项目、编译或者运行过程中遇到 NDK 配置错误的提示，请打开 `File > Project Structure > SDK Location`，修改 `Android NDK location` 为您本机配置的 NDK 所在路径。
+>2. 如果您是通过 Android Studio 的 SDK Tools 下载的 NDK (见本章节"环境准备")，可以直接点击下拉框选择默认路径。
 >3. 还有一种 NDK 配置方法，你可以在 `TTSAndroid/local.properties` 文件中手动添加 NDK 路径配置 `nkd.dir=/root/android-ndk-r20b`
->4. 如果以上步骤仍旧无法解决 NDK 配置错误，请尝试根据 Andriod Studio 官方文档中的[更新 Android Gradle 插件](https://developer.android.com/studio/releases/gradle-plugin?hl=zh-cn#updating-plugin)章节，尝试更新 Android Gradle plugin 版本。
+>4. 如果以上步骤仍旧无法解决 NDK 配置错误，请尝试根据 Android Studio 官方文档中的[更新 Android Gradle 插件](https://developer.android.com/studio/releases/gradle-plugin?hl=zh-cn#updating-plugin)章节，尝试更新 Android Gradle plugin 版本。
 
 3. 点击 Run 按钮，自动编译 APP 并安装到手机。(该过程会自动下载 Paddle Lite 预测库和模型，需要联网)
    成功后效果如下：
diff --git a/demos/audio_searching/README.md b/demos/audio_searching/README.md
index 5b3890382..d3852ace3 100644
--- a/demos/audio_searching/README.md
+++ b/demos/audio_searching/README.md
@@ -217,7 +217,7 @@ Then to start the system server, and it provides HTTP backend services.
 - memory：132G
 
 dataset：
-- CN-Celeb, train size 650,000, test size 10,000, dimention 192, distance L2
+- CN-Celeb, train size 650,000, test size 10,000, dimension 192, distance L2
 
 recall and elapsed time statistics are shown in the following figure：
 
@@ -226,7 +226,7 @@ recall and elapsed time statistics are shown in the following figure：
 
 The retrieval framework based on Milvus takes about 2.9 milliseconds to retrieve on the premise of 90% recall rate, and it takes about 500 milliseconds for feature extraction (testing audio takes about 5 seconds), that is, a single audio test takes about 503 milliseconds in total, which can meet most application scenarios.
 
-* compute embeding takes 500 ms
+* compute embedding takes 500 ms
 * retrieval with cosine takes 2.9 ms
 * total takes 503 ms
 
diff --git a/demos/speech_server/README.md b/demos/speech_server/README.md
index 178374428..b44ec73fd 100644
--- a/demos/speech_server/README.md
+++ b/demos/speech_server/README.md
@@ -42,7 +42,7 @@ Currently the engine type supports two forms: python and inference (Paddle Infer
   paddlespeech_server start --help
   ```
   Arguments:
-  - `config_file`: yaml file of the app, defalut: ./conf/application.yaml
+  - `config_file`: yaml file of the app, default: ./conf/application.yaml
   - `log_file`: log file. Default: ./log/paddlespeech.log
 
   Output:
diff --git a/demos/speech_web/web_client/yarn.lock b/demos/speech_web/web_client/yarn.lock
index 741bfc005..2e6bac334 100644
--- a/demos/speech_web/web_client/yarn.lock
+++ b/demos/speech_web/web_client/yarn.lock
@@ -22,28 +22,11 @@
     "@ant-design/colors" "^6.0.0"
     "@ant-design/icons-svg" "^4.2.1"
 
-"@babel/helper-string-parser@^7.25.9":
-  version "7.25.9"
-  resolved "https://registry.yarnpkg.com/@babel/helper-string-parser/-/helper-string-parser-7.25.9.tgz#1aabb72ee72ed35789b4bbcad3ca2862ce614e8c"
-  integrity sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA==
-
-"@babel/helper-validator-identifier@^7.25.9":
-  version "7.25.9"
-  resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.9.tgz#24b64e2c3ec7cd3b3c547729b8d16871f22cbdc7"
-  integrity sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==
-
 "@babel/parser@^7.16.4":
   version "7.17.9"
   resolved "https://registry.npmmirror.com/@babel/parser/-/parser-7.17.9.tgz"
   integrity sha512-vqUSBLP8dQHFPdPi9bc5GK9vRkYHJ49fsZdtoJ8EQ8ibpwk5rPKfvNIwChB0KVXcIjcepEBBd2VHC5r9Gy8ueg==
 
-"@babel/parser@^7.25.3":
-  version "7.26.9"
-  resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.26.9.tgz#d9e78bee6dc80f9efd8f2349dcfbbcdace280fd5"
-  integrity sha512-81NWa1njQblgZbQHxWHpxxCzNsa3ZwvFqpUg7P+NNUU6f3UU2jBEg4OlF/J6rl8+PQGh1q6/zWScd001YwcA5A==
-  dependencies:
-    "@babel/types" "^7.26.9"
-
 "@babel/runtime@^7.10.5":
   version "7.17.9"
   resolved "https://registry.npmmirror.com/@babel/runtime/-/runtime-7.17.9.tgz"
@@ -51,14 +34,6 @@
   dependencies:
     regenerator-runtime "^0.13.4"
 
-"@babel/types@^7.26.9":
-  version "7.26.9"
-  resolved "https://registry.yarnpkg.com/@babel/types/-/types-7.26.9.tgz#08b43dec79ee8e682c2ac631c010bdcac54a21ce"
-  integrity sha512-Y3IR1cRnOxOCDvMmNiym7XpXQ93iGDDPHx+Zj+NM+rg0fBaShfQLkg+hKPaZCEvg5N/LeCo4+Rj/i3FuJsIQaw==
-  dependencies:
-    "@babel/helper-string-parser" "^7.25.9"
-    "@babel/helper-validator-identifier" "^7.25.9"
-
 "@ctrl/tinycolor@^3.4.0":
   version "3.4.1"
   resolved "https://registry.npmmirror.com/@ctrl/tinycolor/-/tinycolor-3.4.1.tgz"
@@ -66,13 +41,13 @@
 
 "@element-plus/icons-vue@^1.1.4":
   version "1.1.4"
-  resolved "https://registry.npmmirror.com/@element-plus/icons-vue/-/icons-vue-1.1.4.tgz"
+  resolved "https://registry.npmjs.org/@element-plus/icons-vue/-/icons-vue-1.1.4.tgz"
   integrity sha512-Iz/nHqdp1sFPmdzRwHkEQQA3lKvoObk8azgABZ81QUOpW9s/lUyQVUSh0tNtEPZXQlKwlSh7SPgoVxzrE0uuVQ==
 
 "@element-plus/icons-vue@^2.0.9":
-  version "2.0.9"
-  resolved "https://registry.npmmirror.com/@element-plus/icons-vue/-/icons-vue-2.0.9.tgz#b7777c57534522e387303d194451d50ff549d49a"
-  integrity sha512-okdrwiVeKBmW41Hkl0eMrXDjzJwhQMuKiBOu17rOszqM+LS/yBYpNQNV5Jvoh06Wc+89fMmb/uhzf8NZuDuUaQ==
+  version "2.3.1"
+  resolved "https://registry.npmjs.org/@element-plus/icons-vue/-/icons-vue-2.3.1.tgz"
+  integrity sha512-XxVUZv48RZAd87ucGS48jPf6pKu0yV5UCg9f4FFwtrYxXOwWuVJo6wOvSLKEoMQKjv8GsX/mhP6UsC1lRwbUWg==
 
 "@floating-ui/core@^0.6.1":
   version "0.6.1"
@@ -86,11 +61,6 @@
   dependencies:
     "@floating-ui/core" "^0.6.1"
 
-"@jridgewell/sourcemap-codec@^1.5.0":
-  version "1.5.0"
-  resolved "https://registry.yarnpkg.com/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz#3188bcb273a414b0d215fd22a58540b989b9409a"
-  integrity sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==
-
 "@popperjs/core@^2.11.4":
   version "2.11.5"
   resolved "https://registry.npmmirror.com/@popperjs/core/-/core-2.11.5.tgz"
@@ -104,7 +74,7 @@
     core-js "^3.15.1"
     nanopop "^2.1.0"
 
-"@types/lodash-es@^4.17.6":
+"@types/lodash-es@*", "@types/lodash-es@^4.17.6":
   version "4.17.6"
   resolved "https://registry.npmmirror.com/@types/lodash-es/-/lodash-es-4.17.6.tgz"
   integrity sha512-R+zTeVUKDdfoRxpAryaQNRKk3105Rrgx2CFRClIgRGaqDTdjsm8h6IYA8ir584W3ePzkZfst5xIgDwYrlh9HLg==
@@ -131,17 +101,6 @@
     estree-walker "^2.0.2"
     source-map "^0.6.1"
 
-"@vue/compiler-core@3.5.13":
-  version "3.5.13"
-  resolved "https://registry.yarnpkg.com/@vue/compiler-core/-/compiler-core-3.5.13.tgz#b0ae6c4347f60c03e849a05d34e5bf747c9bda05"
-  integrity sha512-oOdAkwqUfW1WqpwSYJce06wvt6HljgY3fGeM9NcVA1HaYOij3mZG9Rkysn0OHuyUAGMbEbARIpsG+LPVlBJ5/Q==
-  dependencies:
-    "@babel/parser" "^7.25.3"
-    "@vue/shared" "3.5.13"
-    entities "^4.5.0"
-    estree-walker "^2.0.2"
-    source-map-js "^1.2.0"
-
 "@vue/compiler-dom@3.2.32":
   version "3.2.32"
   resolved "https://registry.npmmirror.com/@vue/compiler-dom/-/compiler-dom-3.2.32.tgz"
@@ -150,15 +109,7 @@
     "@vue/compiler-core" "3.2.32"
     "@vue/shared" "3.2.32"
 
-"@vue/compiler-dom@3.5.13":
-  version "3.5.13"
-  resolved "https://registry.yarnpkg.com/@vue/compiler-dom/-/compiler-dom-3.5.13.tgz#bb1b8758dbc542b3658dda973b98a1c9311a8a58"
-  integrity sha512-ZOJ46sMOKUjO3e94wPdCzQ6P1Lx/vhp2RSvfaab88Ajexs0AHeV0uasYhi99WPaogmBlRHNRuly8xV75cNTMDA==
-  dependencies:
-    "@vue/compiler-core" "3.5.13"
-    "@vue/shared" "3.5.13"
-
-"@vue/compiler-sfc@3.2.32":
+"@vue/compiler-sfc@^3.1.0", "@vue/compiler-sfc@>=3.1.0", "@vue/compiler-sfc@3.2.32":
   version "3.2.32"
   resolved "https://registry.npmmirror.com/@vue/compiler-sfc/-/compiler-sfc-3.2.32.tgz"
   integrity sha512-uO6+Gh3AVdWm72lRRCjMr8nMOEqc6ezT9lWs5dPzh1E9TNaJkMYPaRtdY9flUv/fyVQotkfjY/ponjfR+trPSg==
@@ -174,21 +125,6 @@
     postcss "^8.1.10"
     source-map "^0.6.1"
 
-"@vue/compiler-sfc@^3.1.0":
-  version "3.5.13"
-  resolved "https://registry.yarnpkg.com/@vue/compiler-sfc/-/compiler-sfc-3.5.13.tgz#461f8bd343b5c06fac4189c4fef8af32dea82b46"
-  integrity sha512-6VdaljMpD82w6c2749Zhf5T9u5uLBWKnVue6XWxprDobftnletJ8+oel7sexFfM3qIxNmVE7LSFGTpv6obNyaQ==
-  dependencies:
-    "@babel/parser" "^7.25.3"
-    "@vue/compiler-core" "3.5.13"
-    "@vue/compiler-dom" "3.5.13"
-    "@vue/compiler-ssr" "3.5.13"
-    "@vue/shared" "3.5.13"
-    estree-walker "^2.0.2"
-    magic-string "^0.30.11"
-    postcss "^8.4.48"
-    source-map-js "^1.2.0"
-
 "@vue/compiler-ssr@3.2.32":
   version "3.2.32"
   resolved "https://registry.npmmirror.com/@vue/compiler-ssr/-/compiler-ssr-3.2.32.tgz"
@@ -197,14 +133,6 @@
     "@vue/compiler-dom" "3.2.32"
     "@vue/shared" "3.2.32"
 
-"@vue/compiler-ssr@3.5.13":
-  version "3.5.13"
-  resolved "https://registry.yarnpkg.com/@vue/compiler-ssr/-/compiler-ssr-3.5.13.tgz#e771adcca6d3d000f91a4277c972a996d07f43ba"
-  integrity sha512-wMH6vrYHxQl/IybKJagqbquvxpWCuVYpoUJfCqFZwa/JY1GdATAQ+TgVtgrwwMZ0D07QhA99rs/EAAWfvG6KpA==
-  dependencies:
-    "@vue/compiler-dom" "3.5.13"
-    "@vue/shared" "3.5.13"
-
 "@vue/reactivity-transform@3.2.32":
   version "3.2.32"
   resolved "https://registry.npmmirror.com/@vue/reactivity-transform/-/reactivity-transform-3.2.32.tgz"
@@ -253,11 +181,6 @@
   resolved "https://registry.npmmirror.com/@vue/shared/-/shared-3.2.32.tgz"
   integrity sha512-bjcixPErUsAnTQRQX4Z5IQnICYjIfNCyCl8p29v1M6kfVzvwOICPw+dz48nNuWlTOOx2RHhzHdazJibE8GSnsw==
 
-"@vue/shared@3.5.13":
-  version "3.5.13"
-  resolved "https://registry.yarnpkg.com/@vue/shared/-/shared-3.5.13.tgz#87b309a6379c22b926e696893237826f64339b6f"
-  integrity sha512-/hnE/qP5ZoGpol0a5mDi45bOd7t3tjYJBjsgCsivow7D48cJeV5l05RD82lPqi7gRiphZM37rnhW1l6ZoCNNnQ==
-
 "@vueuse/core@^8.2.4":
   version "8.2.5"
   resolved "https://registry.npmmirror.com/@vueuse/core/-/core-8.2.5.tgz"
@@ -318,12 +241,12 @@ async-validator@^4.0.7:
 
 asynckit@^0.4.0:
   version "0.4.0"
-  resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79"
+  resolved "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz"
   integrity sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==
 
 axios@^1.8.2:
   version "1.8.2"
-  resolved "https://registry.yarnpkg.com/axios/-/axios-1.8.2.tgz#fabe06e241dfe83071d4edfbcaa7b1c3a40f7979"
+  resolved "https://registry.npmjs.org/axios/-/axios-1.8.2.tgz"
   integrity sha512-ls4GYBm5aig9vWx8AWDSGLpnpDQRtWAfrjU+EuytuODrFBkqesN2RkOQCBzrA1RQNHw1SmRMSDDDSwzNAYQ6Rg==
   dependencies:
     follow-redirects "^1.15.6"
@@ -332,7 +255,7 @@ axios@^1.8.2:
 
 call-bind-apply-helpers@^1.0.1, call-bind-apply-helpers@^1.0.2:
   version "1.0.2"
-  resolved "https://registry.yarnpkg.com/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz#4b5428c222be985d79c3d82657479dbe0b59b2d6"
+  resolved "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz"
   integrity sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==
   dependencies:
     es-errors "^1.3.0"
@@ -340,7 +263,7 @@ call-bind-apply-helpers@^1.0.1, call-bind-apply-helpers@^1.0.2:
 
 combined-stream@^1.0.8:
   version "1.0.8"
-  resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f"
+  resolved "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz"
   integrity sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==
   dependencies:
     delayed-stream "~1.0.0"
@@ -381,7 +304,7 @@ debug@^3.2.6:
 
 delayed-stream@~1.0.0:
   version "1.0.0"
-  resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619"
+  resolved "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz"
   integrity sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==
 
 dom-align@^1.12.1:
@@ -396,7 +319,7 @@ dom-scroll-into-view@^2.0.0:
 
 dunder-proto@^1.0.1:
   version "1.0.1"
-  resolved "https://registry.yarnpkg.com/dunder-proto/-/dunder-proto-1.0.1.tgz#d7ae667e1dc83482f8b70fd0f6eefc50da30f58a"
+  resolved "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz"
   integrity sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==
   dependencies:
     call-bind-apply-helpers "^1.0.1"
@@ -424,11 +347,6 @@ element-plus@^2.1.9:
     memoize-one "^6.0.0"
     normalize-wheel-es "^1.1.2"
 
-entities@^4.5.0:
-  version "4.5.0"
-  resolved "https://registry.yarnpkg.com/entities/-/entities-4.5.0.tgz#5d268ea5e7113ec74c4d033b79ea5a35a488fb48"
-  integrity sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==
-
 errno@^0.1.1:
   version "0.1.8"
   resolved "https://registry.npmmirror.com/errno/-/errno-0.1.8.tgz"
@@ -438,24 +356,24 @@ errno@^0.1.1:
 
 es-define-property@^1.0.1:
   version "1.0.1"
-  resolved "https://registry.yarnpkg.com/es-define-property/-/es-define-property-1.0.1.tgz#983eb2f9a6724e9303f61addf011c72e09e0b0fa"
+  resolved "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz"
   integrity sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==
 
 es-errors@^1.3.0:
   version "1.3.0"
-  resolved "https://registry.yarnpkg.com/es-errors/-/es-errors-1.3.0.tgz#05f75a25dab98e4fb1dcd5e1472c0546d5057c8f"
+  resolved "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz"
   integrity sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==
 
 es-object-atoms@^1.0.0, es-object-atoms@^1.1.1:
   version "1.1.1"
-  resolved "https://registry.yarnpkg.com/es-object-atoms/-/es-object-atoms-1.1.1.tgz#1c4f2c4837327597ce69d2ca190a7fdd172338c1"
+  resolved "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz"
   integrity sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==
   dependencies:
     es-errors "^1.3.0"
 
 es-set-tostringtag@^2.1.0:
   version "2.1.0"
-  resolved "https://registry.yarnpkg.com/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz#f31dbbe0c183b00a6d26eb6325c810c0fd18bd4d"
+  resolved "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz"
   integrity sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==
   dependencies:
     es-errors "^1.3.0"
@@ -463,106 +381,11 @@ es-set-tostringtag@^2.1.0:
     has-tostringtag "^1.0.2"
     hasown "^2.0.2"
 
-esbuild-android-64@0.14.36:
-  version "0.14.36"
-  resolved "https://registry.yarnpkg.com/esbuild-android-64/-/esbuild-android-64-0.14.36.tgz#fc5f95ce78c8c3d790fa16bc71bd904f2bb42aa1"
-  integrity sha512-jwpBhF1jmo0tVCYC/ORzVN+hyVcNZUWuozGcLHfod0RJCedTDTvR4nwlTXdx1gtncDqjk33itjO+27OZHbiavw==
-
-esbuild-android-arm64@0.14.36:
-  version "0.14.36"
-  resolved "https://registry.yarnpkg.com/esbuild-android-arm64/-/esbuild-android-arm64-0.14.36.tgz#44356fbb9f8de82a5cdf11849e011dfb3ad0a8a8"
-  integrity sha512-/hYkyFe7x7Yapmfv4X/tBmyKnggUmdQmlvZ8ZlBnV4+PjisrEhAvC3yWpURuD9XoB8Wa1d5dGkTsF53pIvpjsg==
-
 esbuild-darwin-64@0.14.36:
   version "0.14.36"
   resolved "https://registry.npmmirror.com/esbuild-darwin-64/-/esbuild-darwin-64-0.14.36.tgz"
   integrity sha512-kkl6qmV0dTpyIMKagluzYqlc1vO0ecgpviK/7jwPbRDEv5fejRTaBBEE2KxEQbTHcLhiiDbhG7d5UybZWo/1zQ==
 
-esbuild-darwin-arm64@0.14.36:
-  version "0.14.36"
-  resolved "https://registry.yarnpkg.com/esbuild-darwin-arm64/-/esbuild-darwin-arm64-0.14.36.tgz#2a8040c2e465131e5281034f3c72405e643cb7b2"
-  integrity sha512-q8fY4r2Sx6P0Pr3VUm//eFYKVk07C5MHcEinU1BjyFnuYz4IxR/03uBbDwluR6ILIHnZTE7AkTUWIdidRi1Jjw==
-
-esbuild-freebsd-64@0.14.36:
-  version "0.14.36"
-  resolved "https://registry.yarnpkg.com/esbuild-freebsd-64/-/esbuild-freebsd-64-0.14.36.tgz#d82c387b4d01fe9e8631f97d41eb54f2dbeb68a3"
-  integrity sha512-Hn8AYuxXXRptybPqoMkga4HRFE7/XmhtlQjXFHoAIhKUPPMeJH35GYEUWGbjteai9FLFvBAjEAlwEtSGxnqWww==
-
-esbuild-freebsd-arm64@0.14.36:
-  version "0.14.36"
-  resolved "https://registry.yarnpkg.com/esbuild-freebsd-arm64/-/esbuild-freebsd-arm64-0.14.36.tgz#e8ce2e6c697da6c7ecd0cc0ac821d47c5ab68529"
-  integrity sha512-S3C0attylLLRiCcHiJd036eDEMOY32+h8P+jJ3kTcfhJANNjP0TNBNL30TZmEdOSx/820HJFgRrqpNAvTbjnDA==
-
-esbuild-linux-32@0.14.36:
-  version "0.14.36"
-  resolved "https://registry.yarnpkg.com/esbuild-linux-32/-/esbuild-linux-32-0.14.36.tgz#a4a261e2af91986ea62451f2db712a556cb38a15"
-  integrity sha512-Eh9OkyTrEZn9WGO4xkI3OPPpUX7p/3QYvdG0lL4rfr73Ap2HAr6D9lP59VMF64Ex01LhHSXwIsFG/8AQjh6eNw==
-
-esbuild-linux-64@0.14.36:
-  version "0.14.36"
-  resolved "https://registry.yarnpkg.com/esbuild-linux-64/-/esbuild-linux-64-0.14.36.tgz#4a9500f9197e2c8fcb884a511d2c9d4c2debde72"
-  integrity sha512-vFVFS5ve7PuwlfgoWNyRccGDi2QTNkQo/2k5U5ttVD0jRFaMlc8UQee708fOZA6zTCDy5RWsT5MJw3sl2X6KDg==
-
-esbuild-linux-arm64@0.14.36:
-  version "0.14.36"
-  resolved "https://registry.yarnpkg.com/esbuild-linux-arm64/-/esbuild-linux-arm64-0.14.36.tgz#c91c21e25b315464bd7da867365dd1dae14ca176"
-  integrity sha512-24Vq1M7FdpSmaTYuu1w0Hdhiqkbto1I5Pjyi+4Cdw5fJKGlwQuw+hWynTcRI/cOZxBcBpP21gND7W27gHAiftw==
-
-esbuild-linux-arm@0.14.36:
-  version "0.14.36"
-  resolved "https://registry.yarnpkg.com/esbuild-linux-arm/-/esbuild-linux-arm-0.14.36.tgz#90e23bca2e6e549affbbe994f80ba3bb6c4d934a"
-  integrity sha512-NhgU4n+NCsYgt7Hy61PCquEz5aevI6VjQvxwBxtxrooXsxt5b2xtOUXYZe04JxqQo+XZk3d1gcr7pbV9MAQ/Lg==
-
-esbuild-linux-mips64le@0.14.36:
-  version "0.14.36"
-  resolved "https://registry.yarnpkg.com/esbuild-linux-mips64le/-/esbuild-linux-mips64le-0.14.36.tgz#40e11afb08353ff24709fc89e4db0f866bc131d2"
-  integrity sha512-hZUeTXvppJN+5rEz2EjsOFM9F1bZt7/d2FUM1lmQo//rXh1RTFYzhC0txn7WV0/jCC7SvrGRaRz0NMsRPf8SIA==
-
-esbuild-linux-ppc64le@0.14.36:
-  version "0.14.36"
-  resolved "https://registry.yarnpkg.com/esbuild-linux-ppc64le/-/esbuild-linux-ppc64le-0.14.36.tgz#9e8a588c513d06cc3859f9dcc52e5fdfce8a1a5e"
-  integrity sha512-1Bg3QgzZjO+QtPhP9VeIBhAduHEc2kzU43MzBnMwpLSZ890azr4/A9Dganun8nsqD/1TBcqhId0z4mFDO8FAvg==
-
-esbuild-linux-riscv64@0.14.36:
-  version "0.14.36"
-  resolved "https://registry.yarnpkg.com/esbuild-linux-riscv64/-/esbuild-linux-riscv64-0.14.36.tgz#e578c09b23b3b97652e60e3692bfda628b541f06"
-  integrity sha512-dOE5pt3cOdqEhaufDRzNCHf5BSwxgygVak9UR7PH7KPVHwSTDAZHDoEjblxLqjJYpc5XaU9+gKJ9F8mp9r5I4A==
-
-esbuild-linux-s390x@0.14.36:
-  version "0.14.36"
-  resolved "https://registry.yarnpkg.com/esbuild-linux-s390x/-/esbuild-linux-s390x-0.14.36.tgz#3c9dab40d0d69932ffded0fd7317bb403626c9bc"
-  integrity sha512-g4FMdh//BBGTfVHjF6MO7Cz8gqRoDPzXWxRvWkJoGroKA18G9m0wddvPbEqcQf5Tbt2vSc1CIgag7cXwTmoTXg==
-
-esbuild-netbsd-64@0.14.36:
-  version "0.14.36"
-  resolved "https://registry.yarnpkg.com/esbuild-netbsd-64/-/esbuild-netbsd-64-0.14.36.tgz#e27847f6d506218291619b8c1e121ecd97628494"
-  integrity sha512-UB2bVImxkWk4vjnP62ehFNZ73lQY1xcnL5ZNYF3x0AG+j8HgdkNF05v67YJdCIuUJpBuTyCK8LORCYo9onSW+A==
-
-esbuild-openbsd-64@0.14.36:
-  version "0.14.36"
-  resolved "https://registry.yarnpkg.com/esbuild-openbsd-64/-/esbuild-openbsd-64-0.14.36.tgz#c94c04c557fae516872a586eae67423da6d2fabb"
-  integrity sha512-NvGB2Chf8GxuleXRGk8e9zD3aSdRO5kLt9coTQbCg7WMGXeX471sBgh4kSg8pjx0yTXRt0MlrUDnjVYnetyivg==
-
-esbuild-sunos-64@0.14.36:
-  version "0.14.36"
-  resolved "https://registry.yarnpkg.com/esbuild-sunos-64/-/esbuild-sunos-64-0.14.36.tgz#9b79febc0df65a30f1c9bd63047d1675511bf99d"
-  integrity sha512-VkUZS5ftTSjhRjuRLp+v78auMO3PZBXu6xl4ajomGenEm2/rGuWlhFSjB7YbBNErOchj51Jb2OK8lKAo8qdmsQ==
-
-esbuild-windows-32@0.14.36:
-  version "0.14.36"
-  resolved "https://registry.yarnpkg.com/esbuild-windows-32/-/esbuild-windows-32-0.14.36.tgz#910d11936c8d2122ffdd3275e5b28d8a4e1240ec"
-  integrity sha512-bIar+A6hdytJjZrDxfMBUSEHHLfx3ynoEZXx/39nxy86pX/w249WZm8Bm0dtOAByAf4Z6qV0LsnTIJHiIqbw0w==
-
-esbuild-windows-64@0.14.36:
-  version "0.14.36"
-  resolved "https://registry.yarnpkg.com/esbuild-windows-64/-/esbuild-windows-64-0.14.36.tgz#21b4ce8b42a4efc63f4b58ec617f1302448aad26"
-  integrity sha512-+p4MuRZekVChAeueT1Y9LGkxrT5x7YYJxYE8ZOTcEfeUUN43vktSn6hUNsvxzzATrSgq5QqRdllkVBxWZg7KqQ==
-
-esbuild-windows-arm64@0.14.36:
-  version "0.14.36"
-  resolved "https://registry.yarnpkg.com/esbuild-windows-arm64/-/esbuild-windows-arm64-0.14.36.tgz#ba21546fecb7297667d0052d00150de22c044b24"
-  integrity sha512-fBB4WlDqV1m18EF/aheGYQkQZHfPHiHJSBYzXIo8yKehek+0BtBwo/4PNwKGJ5T0YK0oc8pBKjgwPbzSrPLb+Q==
-
 esbuild@^0.14.27:
   version "0.14.36"
   resolved "https://registry.npmmirror.com/esbuild/-/esbuild-0.14.36.tgz"
@@ -601,12 +424,12 @@ estree-walker@^2.0.2:
 
 follow-redirects@^1.15.6:
   version "1.15.9"
-  resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.15.9.tgz#a604fa10e443bf98ca94228d9eebcc2e8a2c8ee1"
+  resolved "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.9.tgz"
   integrity sha512-gew4GsXizNgdoRyqmyfMHyAmXsZDk6mHkSxZFCzW9gwlbtOW44CDtYavM+y+72qD/Vq2l550kMF52DT8fOLJqQ==
 
 form-data@^4.0.0:
   version "4.0.2"
-  resolved "https://registry.yarnpkg.com/form-data/-/form-data-4.0.2.tgz#35cabbdd30c3ce73deb2c42d3c8d3ed9ca51794c"
+  resolved "https://registry.npmjs.org/form-data/-/form-data-4.0.2.tgz"
   integrity sha512-hGfm/slu0ZabnNt4oaRZ6uREyfCj6P4fT/n6A1rGV+Z0VdGXjfOhVUpkn6qVQONHGIFwmveGXyDs75+nr6FM8w==
   dependencies:
     asynckit "^0.4.0"
@@ -619,19 +442,14 @@ fsevents@~2.3.2:
   resolved "https://registry.npmmirror.com/fsevents/-/fsevents-2.3.2.tgz"
   integrity sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==
 
-function-bind@^1.1.1:
-  version "1.1.1"
-  resolved "https://registry.npmmirror.com/function-bind/-/function-bind-1.1.1.tgz"
-  integrity sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==
-
-function-bind@^1.1.2:
+function-bind@^1.1.1, function-bind@^1.1.2:
   version "1.1.2"
-  resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.2.tgz#2c02d864d97f3ea6c8830c464cbd11ab6eab7a1c"
+  resolved "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz"
   integrity sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==
 
 get-intrinsic@^1.2.6:
   version "1.3.0"
-  resolved "https://registry.yarnpkg.com/get-intrinsic/-/get-intrinsic-1.3.0.tgz#743f0e3b6964a93a5491ed1bffaae054d7f98d01"
+  resolved "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz"
   integrity sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==
   dependencies:
     call-bind-apply-helpers "^1.0.2"
@@ -647,7 +465,7 @@ get-intrinsic@^1.2.6:
 
 get-proto@^1.0.1:
   version "1.0.1"
-  resolved "https://registry.yarnpkg.com/get-proto/-/get-proto-1.0.1.tgz#150b3f2743869ef3e851ec0c49d15b1d14d00ee1"
+  resolved "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz"
   integrity sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==
   dependencies:
     dunder-proto "^1.0.1"
@@ -655,7 +473,7 @@ get-proto@^1.0.1:
 
 gopd@^1.2.0:
   version "1.2.0"
-  resolved "https://registry.yarnpkg.com/gopd/-/gopd-1.2.0.tgz#89f56b8217bdbc8802bd299df6d7f1081d7e51a1"
+  resolved "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz"
   integrity sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==
 
 graceful-fs@^4.1.2:
@@ -665,12 +483,12 @@ graceful-fs@^4.1.2:
 
 has-symbols@^1.0.3, has-symbols@^1.1.0:
   version "1.1.0"
-  resolved "https://registry.yarnpkg.com/has-symbols/-/has-symbols-1.1.0.tgz#fc9c6a783a084951d0b971fe1018de813707a338"
+  resolved "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz"
   integrity sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==
 
 has-tostringtag@^1.0.2:
   version "1.0.2"
-  resolved "https://registry.yarnpkg.com/has-tostringtag/-/has-tostringtag-1.0.2.tgz#2cdc42d40bef2e5b4eeab7c01a73c54ce7ab5abc"
+  resolved "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz"
   integrity sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==
   dependencies:
     has-symbols "^1.0.3"
@@ -684,7 +502,7 @@ has@^1.0.3:
 
 hasown@^2.0.2:
   version "2.0.2"
-  resolved "https://registry.yarnpkg.com/hasown/-/hasown-2.0.2.tgz#003eaf91be7adc372e84ec59dc37252cedb80003"
+  resolved "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz"
   integrity sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==
   dependencies:
     function-bind "^1.1.2"
@@ -735,7 +553,7 @@ lamejs@^1.2.1:
   dependencies:
     use-strict "1.0.1"
 
-less@^4.1.2:
+less@*, less@^4.1.2:
   version "4.1.2"
   resolved "https://registry.npmmirror.com/less/-/less-4.1.2.tgz"
   integrity sha512-EoQp/Et7OSOVu0aJknJOtlXZsnr8XE8KwuzTHOLeVSEx8pVWUICc8Q0VYRHgzyjX78nMEyC/oztWFbgyhtNfDA==
@@ -752,7 +570,7 @@ less@^4.1.2:
     needle "^2.5.2"
     source-map "~0.6.0"
 
-lodash-es@^4.17.15, lodash-es@^4.17.21:
+lodash-es@*, lodash-es@^4.17.15, lodash-es@^4.17.21:
   version "4.17.21"
   resolved "https://registry.npmmirror.com/lodash-es/-/lodash-es-4.17.21.tgz"
   integrity sha512-mKnC+QJ9pWVzv+C4/U3rRsHapFfHvQFoFB92e52xeyGMcX6/OlIl78je1u8vePzYZSkkogMPJ2yjxxsb89cxyw==
@@ -762,7 +580,7 @@ lodash-unified@^1.0.2:
   resolved "https://registry.npmmirror.com/lodash-unified/-/lodash-unified-1.0.2.tgz"
   integrity sha512-OGbEy+1P+UT26CYi4opY4gebD8cWRDxAT6MAObIVQMiqYdxZr1g3QHWCToVsm31x2NkLS4K3+MC2qInaRMa39g==
 
-lodash@^4.17.21:
+lodash@*, lodash@^4.17.21:
   version "4.17.21"
   resolved "https://registry.npmmirror.com/lodash/-/lodash-4.17.21.tgz"
   integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==
@@ -781,13 +599,6 @@ magic-string@^0.25.7:
   dependencies:
     sourcemap-codec "^1.4.8"
 
-magic-string@^0.30.11:
-  version "0.30.17"
-  resolved "https://registry.yarnpkg.com/magic-string/-/magic-string-0.30.17.tgz#450a449673d2460e5bbcfba9a61916a1714c7453"
-  integrity sha512-sNPKHvyjVf7gyjwS4xGTaW/mCnF8wnjtifKBEhxfZ7E/S8tQ0rssrwGNn6q8JH/ohItJfSQp9mBtQYuTlH5QnA==
-  dependencies:
-    "@jridgewell/sourcemap-codec" "^1.5.0"
-
 make-dir@^2.1.0:
   version "2.1.0"
   resolved "https://registry.npmmirror.com/make-dir/-/make-dir-2.1.0.tgz"
@@ -798,7 +609,7 @@ make-dir@^2.1.0:
 
 math-intrinsics@^1.1.0:
   version "1.1.0"
-  resolved "https://registry.yarnpkg.com/math-intrinsics/-/math-intrinsics-1.1.0.tgz#a0dd74be81e2aa5c2f27e65ce283605ee4e2b7f9"
+  resolved "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz"
   integrity sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==
 
 memoize-one@^6.0.0:
@@ -808,12 +619,12 @@ memoize-one@^6.0.0:
 
 mime-db@1.52.0:
   version "1.52.0"
-  resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.52.0.tgz#bbabcdc02859f4987301c856e3387ce5ec43bf70"
+  resolved "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz"
   integrity sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==
 
 mime-types@^2.1.12:
   version "2.1.35"
-  resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.35.tgz#381a871b62a734450660ae3deee44813f70d959a"
+  resolved "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz"
   integrity sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==
   dependencies:
     mime-db "1.52.0"
@@ -825,7 +636,7 @@ mime@^1.4.1:
 
 moment@^2.27.0:
   version "2.29.4"
-  resolved "https://registry.yarnpkg.com/moment/-/moment-2.29.4.tgz#3dbe052889fe7c1b2ed966fcb3a77328964ef108"
+  resolved "https://registry.npmjs.org/moment/-/moment-2.29.4.tgz"
   integrity sha512-5LC9SOxjSc2HF6vO2CyuTDNivEdoz2IvyJJGj6X8DJ0eFyfszE0QiEd+iXmBvUP3WHxSjFH/vIsA0EN00cgr8w==
 
 ms@^2.1.1:
@@ -833,14 +644,9 @@ ms@^2.1.1:
   resolved "https://registry.npmmirror.com/ms/-/ms-2.1.3.tgz"
   integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==
 
-nanoid@^3.3.1:
-  version "3.3.2"
-  resolved "https://registry.npmmirror.com/nanoid/-/nanoid-3.3.2.tgz"
-  integrity sha512-CuHBogktKwpm5g2sRgv83jEy2ijFzBwMoYA60orPDR7ynsLijJDqgsi4RDGj3OJpy3Ieb+LYwiRmIOGyytgITA==
-
 nanoid@^3.3.8:
   version "3.3.9"
-  resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.9.tgz#e0097d8e026b3343ff053e9ccd407360a03f503a"
+  resolved "https://registry.npmjs.org/nanoid/-/nanoid-3.3.9.tgz"
   integrity sha512-SppoicMGpZvbF1l3z4x7No3OlIjP7QJvC9XR7AhZr1kL133KHnKPztkKDc+Ir4aJ/1VhTySrtKhrsycmrMQfvg==
 
 nanopop@^2.1.0:
@@ -877,14 +683,9 @@ path-parse@^1.0.7:
   resolved "https://registry.npmmirror.com/path-parse/-/path-parse-1.0.7.tgz"
   integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==
 
-picocolors@^1.0.0:
-  version "1.0.0"
-  resolved "https://registry.npmmirror.com/picocolors/-/picocolors-1.0.0.tgz"
-  integrity sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==
-
 picocolors@^1.1.1:
   version "1.1.1"
-  resolved "https://registry.yarnpkg.com/picocolors/-/picocolors-1.1.1.tgz#3d321af3eab939b083c8f929a1d12cda81c26b6b"
+  resolved "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz"
   integrity sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==
 
 pify@^4.0.1:
@@ -892,18 +693,9 @@ pify@^4.0.1:
   resolved "https://registry.npmmirror.com/pify/-/pify-4.0.1.tgz"
   integrity sha512-uB80kBFb/tfd68bVleG9T5GGsGPjJrLAUpR5PZIrhBnIaRTQRjqdJSsIKkOP6OAIFbj7GOrcudc5pNjZ+geV2g==
 
-postcss@^8.1.10:
-  version "8.4.12"
-  resolved "https://registry.npmmirror.com/postcss/-/postcss-8.4.12.tgz"
-  integrity sha512-lg6eITwYe9v6Hr5CncVbK70SoioNQIq81nsaG86ev5hAidQvmOeETBqs7jm43K2F5/Ley3ytDtriImV6TpNiSg==
-  dependencies:
-    nanoid "^3.3.1"
-    picocolors "^1.0.0"
-    source-map-js "^1.0.2"
-
-postcss@^8.4.13, postcss@^8.4.48:
+postcss@^8.1.10, postcss@^8.4.13:
   version "8.5.3"
-  resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.5.3.tgz#1463b6f1c7fb16fe258736cba29a2de35237eafb"
+  resolved "https://registry.npmjs.org/postcss/-/postcss-8.5.3.tgz"
   integrity sha512-dle9A3yYxlBSrt8Fu+IpjGT8SY8hN0mlaA6GY8t0P5PjIOZemULz/E2Bnm/2dcUOena75OTNkHI76uZBNUUq3A==
   dependencies:
     nanoid "^3.3.8"
@@ -912,7 +704,7 @@ postcss@^8.4.13, postcss@^8.4.48:
 
 proxy-from-env@^1.1.0:
   version "1.1.0"
-  resolved "https://registry.yarnpkg.com/proxy-from-env/-/proxy-from-env-1.1.0.tgz#e102f16ca355424865755d2c9e8ea4f24d58c3e2"
+  resolved "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz"
   integrity sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==
 
 prr@~1.0.1:
@@ -940,9 +732,9 @@ resolve@^1.22.0:
     supports-preserve-symlinks-flag "^1.0.0"
 
 "rollup@>=2.59.0 <2.78.0":
-  version "2.77.3"
-  resolved "https://registry.yarnpkg.com/rollup/-/rollup-2.77.3.tgz#8f00418d3a2740036e15deb653bed1a90ee0cc12"
-  integrity sha512-/qxNTG7FbmefJWoeeYJFbHehJ2HNWnjkAFRKzWN/45eNBBF/r8lo992CwcJXEzyVxs5FmfId+vTSTQDb+bxA+g==
+  version "2.70.1"
+  resolved "https://registry.npmmirror.com/rollup/-/rollup-2.70.1.tgz"
+  integrity sha512-CRYsI5EuzLbXdxC6RnYhOuRdtz4bhejPMSWjsFLfVM/7w/85n2szZv6yExqUXsBdz5KT8eoubeyDUDjhLHEslA==
   optionalDependencies:
     fsevents "~2.3.2"
 
@@ -973,14 +765,9 @@ shallow-equal@^1.0.0:
   resolved "https://registry.npmmirror.com/shallow-equal/-/shallow-equal-1.2.1.tgz"
   integrity sha512-S4vJDjHHMBaiZuT9NPb616CSmLf618jawtv3sufLl6ivK8WocjAo58cXwbRV1cgqxH0Qbv+iUt6m05eqEa2IRA==
 
-source-map-js@^1.0.2:
-  version "1.0.2"
-  resolved "https://registry.npmmirror.com/source-map-js/-/source-map-js-1.0.2.tgz"
-  integrity sha512-R0XvVJ9WusLiqTCEiGCmICCMplcCkIwwR11mOSD9CR5u+IXYdiseeEuXCVAjS54zqwkLcPNnmU4OeJ6tUrWhDw==
-
-source-map-js@^1.2.0, source-map-js@^1.2.1:
+source-map-js@^1.2.1:
   version "1.2.1"
-  resolved "https://registry.yarnpkg.com/source-map-js/-/source-map-js-1.2.1.tgz#1ce5650fddd87abc099eda37dcff024c2667ae46"
+  resolved "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz"
   integrity sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==
 
 source-map@^0.6.1, source-map@~0.6.0:
@@ -1008,9 +795,9 @@ use-strict@1.0.1:
   resolved "https://registry.npmmirror.com/use-strict/-/use-strict-1.0.1.tgz"
   integrity sha512-IeiWvvEXfW5ltKVMkxq6FvNf2LojMKvB2OCeja6+ct24S1XOmQw2dGr2JyndwACWAGJva9B7yPHwAmeA9QCqAQ==
 
-vite@^2.9.13:
+vite@^2.5.10, vite@^2.9.13:
   version "2.9.18"
-  resolved "https://registry.yarnpkg.com/vite/-/vite-2.9.18.tgz#74e2a83b29da81e602dac4c293312cc575f091c7"
+  resolved "https://registry.npmjs.org/vite/-/vite-2.9.18.tgz"
   integrity sha512-sAOqI5wNM9QvSEE70W3UGMdT8cyEn0+PmJMTFvTB8wB0YbYUWw3gUbY62AOyrXosGieF2htmeLATvNxpv/zNyQ==
   dependencies:
     esbuild "^0.14.27"
@@ -1032,7 +819,7 @@ vue-types@^3.0.0:
   dependencies:
     is-plain-object "3.0.1"
 
-vue@^3.2.25:
+"vue@^2.6.0 || ^3.2.0", vue@^3.0.0, "vue@^3.0.0-0 || ^2.6.0", vue@^3.2.0, vue@^3.2.25, vue@>=3.0.3, vue@>=3.1.0, vue@3.2.32:
   version "3.2.32"
   resolved "https://registry.npmmirror.com/vue/-/vue-3.2.32.tgz"
   integrity sha512-6L3jKZApF042OgbCkh+HcFeAkiYi3Lovi8wNhWqIK98Pi5efAMLZzRHgi91v+60oIRxdJsGS9sTMsb+yDpY8Eg==
diff --git a/demos/streaming_asr_server/README.md b/demos/streaming_asr_server/README.md
index 670dce193..c4e535972 100644
--- a/demos/streaming_asr_server/README.md
+++ b/demos/streaming_asr_server/README.md
@@ -51,7 +51,7 @@ wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
   paddlespeech_server start --help
   ```
   Arguments:
-  - `config_file`: yaml file of the app, defalut: `./conf/application.yaml`
+  - `config_file`: yaml file of the app, default: `./conf/application.yaml`
   - `log_file`: log file. Default: `./log/paddlespeech.log`
 
   Output:
@@ -307,7 +307,7 @@ wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
 - Command Line
   **Note:** The default deployment of the server is on the 'CPU' device, which can be deployed on the 'GPU' by modifying the 'device' parameter in the service configuration file.
   ```bash
-  In PaddleSpeech/demos/streaming_asr_server directory to lanuch punctuation service
+  In PaddleSpeech/demos/streaming_asr_server directory to launch punctuation service
   paddlespeech_server start --config_file conf/punc_application.yaml
   ```
 
@@ -414,7 +414,7 @@ wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
 
 By default, each server is deployed on the 'CPU' device and speech recognition and punctuation prediction can be deployed on different 'GPU' by modifying the' device 'parameter in the service configuration file respectively.
 
-We use `streaming_ asr_server.py` and `punc_server.py` two services to lanuch streaming speech recognition and punctuation prediction services respectively. And the `websocket_client.py` script can be used to call streaming speech recognition and punctuation prediction services at the same time.
+We use `streaming_ asr_server.py` and `punc_server.py` two services to launch streaming speech recognition and punctuation prediction services respectively. And the `websocket_client.py` script can be used to call streaming speech recognition and punctuation prediction services at the same time.
 
 ### 1. Start two server
 
@@ -584,7 +584,7 @@ bash server.sh
 
 By default, each server is deployed on the 'CPU' device and speech recognition and punctuation prediction can be deployed on different 'GPU' by modifying the' device 'parameter in the service configuration file respectively.
 
-We use `streaming_ asr_server.py` and `punc_server.py` two services to lanuch streaming speech recognition and punctuation prediction services respectively. And the `websocket_client_srt.py` script can be used to call streaming speech recognition and punctuation prediction services at the same time, and will generate the corresponding subtitle (.srt format).
+We use `streaming_ asr_server.py` and `punc_server.py` two services to launch streaming speech recognition and punctuation prediction services respectively. And the `websocket_client_srt.py` script can be used to call streaming speech recognition and punctuation prediction services at the same time, and will generate the corresponding subtitle (.srt format).
 
 **need to install ffmpeg before running this script**
 
diff --git a/demos/streaming_tts_server/README.md b/demos/streaming_tts_server/README.md
index ad87bebdc..01cd69620 100644
--- a/demos/streaming_tts_server/README.md
+++ b/demos/streaming_tts_server/README.md
@@ -52,7 +52,7 @@ The configuration file can be found in `conf/tts_online_application.yaml`.
   paddlespeech_server start --help
   ```
   Arguments:
-  - `config_file`: yaml file of the app, defalut: ./conf/tts_online_application.yaml
+  - `config_file`: yaml file of the app, default: ./conf/tts_online_application.yaml
   - `log_file`: log file. Default: ./log/paddlespeech.log
 
   Output:
@@ -180,7 +180,7 @@ The configuration file can be found in `conf/tts_online_application.yaml`.
   paddlespeech_server start --help
   ```
   Arguments:
-  - `config_file`: yaml file of the app, defalut: ./conf/tts_online_application.yaml
+  - `config_file`: yaml file of the app, default: ./conf/tts_online_application.yaml
   - `log_file`: log file. Default: ./log/paddlespeech.log
 
   Output:
diff --git a/demos/text_to_speech/README.md b/demos/text_to_speech/README.md
index b58777def..61c20c371 100644
--- a/demos/text_to_speech/README.md
+++ b/demos/text_to_speech/README.md
@@ -99,7 +99,7 @@ The input of this demo should be a text of the specific language that can be pas
   Arguments:
   - `input`(required): Input text to generate..
   - `am`: Acoustic model type of tts task. Default: `fastspeech2_csmsc`.
-  - `am_config`: Config of acoustic model. Use deault config when it is None. Default: `None`.
+  - `am_config`: Config of acoustic model. Use default config when it is None. Default: `None`.
   - `am_ckpt`: Acoustic model checkpoint. Use pretrained model when it is None. Default: `None`.
   - `am_stat`: Mean and standard deviation used to normalize spectrogram when training acoustic model. Default: `None`.
   - `phones_dict`: Phone vocabulary file. Default: `None`.
@@ -107,7 +107,7 @@ The input of this demo should be a text of the specific language that can be pas
   - `speaker_dict`: speaker id map file. Default: `None`.
   - `spk_id`: Speaker id for multi speaker acoustic model. Default: `0`.
   - `voc`: Vocoder type of tts task. Default: `pwgan_csmsc`.
-  - `voc_config`: Config of vocoder. Use deault config when it is None. Default: `None`.
+  - `voc_config`: Config of vocoder. Use default config when it is None. Default: `None`.
   - `voc_ckpt`: Vocoder checkpoint. Use pretrained model when it is None. Default: `None`.
   - `voc_stat`: Mean and standard deviation used to normalize spectrogram when training vocoder. Default: `None`.
   - `lang`: Language of tts task. Default: `zh`.
diff --git a/demos/whisper/README.md b/demos/whisper/README.md
index ccd695d5e..1d8e8c442 100644
--- a/demos/whisper/README.md
+++ b/demos/whisper/README.md
@@ -42,7 +42,7 @@ Whisper model trained by OpenAI whisper https://github.com/openai/whisper
    - `model`: Model type of asr task. Default: `whisper-large`.
    - `task`: Output type. Default: `transcribe`.
    - `lang`: Model language. Default: ``. Use `en` to choice English-only model. Now [medium,base,small,tiny] size can support English-only.
-   - `size`: Model size for decode. Defalut: `large`. Now can support [large,medium,base,small,tiny].
+   - `size`: Model size for decode. Default: `large`. Now can support [large,medium,base,small,tiny].
    - `language`: Set decode language. Default: `None`. Forcibly set the recognized language, which is determined by the model itself by default. 
    - `sample_rate`: Sample rate of the model. Default: `16000`. Other sampling rates are not supported now.
    - `config`: Config of asr task. Use pretrained model when it is None. Default: `None`.
diff --git a/docs/source/tts/advanced_usage.md b/docs/source/tts/advanced_usage.md
index 4dd742b70..9f86689e7 100644
--- a/docs/source/tts/advanced_usage.md
+++ b/docs/source/tts/advanced_usage.md
@@ -303,7 +303,7 @@ The experimental codes in PaddleSpeech TTS are generally organized as follows:
 .
 ├──  README.md               (help information)
 ├──  conf
-│     └── default.yaml       (defalut config)
+│     └── default.yaml       (default config)
 ├──  local
 │    ├──  preprocess.sh      (script to call data preprocessing.py)
 │    ├──  synthesize.sh      (script to call synthesis.py)  
diff --git a/examples/aishell/asr0/local/train.sh b/examples/aishell/asr0/local/train.sh
index c0da33257..8c12066c4 100755
--- a/examples/aishell/asr0/local/train.sh
+++ b/examples/aishell/asr0/local/train.sh
@@ -26,8 +26,8 @@ if [ ${seed} != 0 ]; then
     export FLAGS_cudnn_deterministic=True
 fi
 
-# default memeory allocator strategy may case gpu training hang
-# for no OOM raised when memory exhaused
+# default memory allocator strategy may case gpu training hang
+# for no OOM raised when memory exhausted
 export FLAGS_allocator_strategy=naive_best_fit
 
 if [ ${ngpu} == 0 ]; then
diff --git a/examples/aishell/asr1/local/train.sh b/examples/aishell/asr1/local/train.sh
index 3d4f052a3..ea64d42c4 100755
--- a/examples/aishell/asr1/local/train.sh
+++ b/examples/aishell/asr1/local/train.sh
@@ -35,8 +35,8 @@ echo ${ips_config}
 
 mkdir -p exp
 
-# default memeory allocator strategy may case gpu training hang
-# for no OOM raised when memory exhaused
+# default memory allocator strategy may case gpu training hang
+# for no OOM raised when memory exhausted
 export FLAGS_allocator_strategy=naive_best_fit
 
 if [ ${ngpu} == 0 ]; then
diff --git a/examples/csmsc/voc3/conf/default.yaml b/examples/csmsc/voc3/conf/default.yaml
index a5ee17808..c424759d2 100644
--- a/examples/csmsc/voc3/conf/default.yaml
+++ b/examples/csmsc/voc3/conf/default.yaml
@@ -4,7 +4,7 @@
 # This configuration requires ~ 8GB memory and will finish within 7 days on Titan V.
 
 # This configuration is based on full-band MelGAN but the hop size and sampling
-# rate is different from the paper (16kHz vs 24kHz). The number of iteraions
+# rate is different from the paper (16kHz vs 24kHz). The number of iterations
 # is not shown in the paper so currently we train 1M iterations (not sure enough
 # to converge).
 
diff --git a/examples/csmsc/voc3/conf/finetune.yaml b/examples/csmsc/voc3/conf/finetune.yaml
index 8c37ac302..2aeb3c3f3 100644
--- a/examples/csmsc/voc3/conf/finetune.yaml
+++ b/examples/csmsc/voc3/conf/finetune.yaml
@@ -4,7 +4,7 @@
 # This configuration requires ~ 8GB memory and will finish within 7 days on Titan V.
 
 # This configuration is based on full-band MelGAN but the hop size and sampling
-# rate is different from the paper (16kHz vs 24kHz). The number of iteraions
+# rate is different from the paper (16kHz vs 24kHz). The number of iterations
 # is not shown in the paper so currently we train 1M iterations (not sure enough
 # to converge). 
 
diff --git a/examples/librispeech/asr0/local/train.sh b/examples/librispeech/asr0/local/train.sh
index bb41fd554..e7d0b54c4 100755
--- a/examples/librispeech/asr0/local/train.sh
+++ b/examples/librispeech/asr0/local/train.sh
@@ -26,8 +26,8 @@ if [ ${seed} != 0 ]; then
     export FLAGS_cudnn_deterministic=True
 fi
 
-# default memeory allocator strategy may case gpu training hang
-# for no OOM raised when memory exhaused
+# default memory allocator strategy may case gpu training hang
+# for no OOM raised when memory exhausted
 export FLAGS_allocator_strategy=naive_best_fit
 
 if [ ${ngpu} == 0 ]; then
diff --git a/examples/librispeech/asr1/local/train.sh b/examples/librispeech/asr1/local/train.sh
index e274b9133..55a88e116 100755
--- a/examples/librispeech/asr1/local/train.sh
+++ b/examples/librispeech/asr1/local/train.sh
@@ -29,8 +29,8 @@ fi
 # export FLAGS_cudnn_exhaustive_search=true
 # export FLAGS_conv_workspace_size_limit=4000
 
-# default memeory allocator strategy may case gpu training hang
-# for no OOM raised when memory exhaused
+# default memory allocator strategy may case gpu training hang
+# for no OOM raised when memory exhausted
 export FLAGS_allocator_strategy=naive_best_fit
 
 if [ ${ngpu} == 0 ]; then
diff --git a/examples/librispeech/asr2/local/train.sh b/examples/librispeech/asr2/local/train.sh
index c2f2d4b65..4c6723a7a 100755
--- a/examples/librispeech/asr2/local/train.sh
+++ b/examples/librispeech/asr2/local/train.sh
@@ -26,8 +26,8 @@ if [ ${seed} != 0 ]; then
     export FLAGS_cudnn_deterministic=True
 fi
 
-# default memeory allocator strategy may case gpu training hang
-# for no OOM raised when memory exhaused
+# default memory allocator strategy may case gpu training hang
+# for no OOM raised when memory exhausted
 export FLAGS_allocator_strategy=naive_best_fit
 
 if [ ${ngpu} == 0 ]; then
diff --git a/examples/tal_cs/asr1/local/data.sh b/examples/tal_cs/asr1/local/data.sh
index 7ea12809f..c03650aca 100644
--- a/examples/tal_cs/asr1/local/data.sh
+++ b/examples/tal_cs/asr1/local/data.sh
@@ -24,7 +24,7 @@ mkdir -p ${TARGET_DIR}
 #prepare data
 if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
     if [ ! -d "${MAIN_ROOT}/dataset/tal_cs/TALCS_corpus" ]; then
-        echo "${MAIN_ROOT}/dataset/tal_cs/TALCS_corpus does not exist. Please donwload tal_cs data and unpack it from https://ai.100tal.com/dataset first."
+        echo "${MAIN_ROOT}/dataset/tal_cs/TALCS_corpus does not exist. Please download tal_cs data and unpack it from https://ai.100tal.com/dataset first."
         echo "data md5 reference: 4c879b3c9c05365fc9dee1fc68713afe"
         exit
     fi
diff --git a/examples/tal_cs/asr1/local/train.sh b/examples/tal_cs/asr1/local/train.sh
index bfa8dd97d..f70137b11 100755
--- a/examples/tal_cs/asr1/local/train.sh
+++ b/examples/tal_cs/asr1/local/train.sh
@@ -35,8 +35,8 @@ echo ${ips_config}
 
 mkdir -p exp
 
-# default memeory allocator strategy may case gpu training hang
-# for no OOM raised when memory exhaused
+# default memory allocator strategy may case gpu training hang
+# for no OOM raised when memory exhausted
 export FLAGS_allocator_strategy=naive_best_fit
 
 if [ ${ngpu} == 0 ]; then
diff --git a/examples/timit/asr1/local/train.sh b/examples/timit/asr1/local/train.sh
index 1088c7ffa..2c961a571 100755
--- a/examples/timit/asr1/local/train.sh
+++ b/examples/timit/asr1/local/train.sh
@@ -19,8 +19,8 @@ if [ ${seed} != 0  ]; then
     export FLAGS_cudnn_deterministic=True
 fi
 
-# default memeory allocator strategy may case gpu training hang
-# for no OOM raised when memory exhaused
+# default memory allocator strategy may case gpu training hang
+# for no OOM raised when memory exhausted
 export FLAGS_allocator_strategy=naive_best_fit
 
 if [ ${ngpu} == 0 ]; then
diff --git a/examples/tiny/asr0/local/train.sh b/examples/tiny/asr0/local/train.sh
index e233a0c0a..aa89ed2bd 100755
--- a/examples/tiny/asr0/local/train.sh
+++ b/examples/tiny/asr0/local/train.sh
@@ -32,8 +32,8 @@ fi
 
 mkdir -p exp
 
-# default memeory allocator strategy may case gpu training hang
-# for no OOM raised when memory exhaused
+# default memory allocator strategy may case gpu training hang
+# for no OOM raised when memory exhausted
 export FLAGS_allocator_strategy=naive_best_fit
 
 if [ ${ngpu} == 0 ]; then
diff --git a/examples/tiny/asr1/local/train.sh b/examples/tiny/asr1/local/train.sh
index fbfb41f6f..dcc1ad5de 100755
--- a/examples/tiny/asr1/local/train.sh
+++ b/examples/tiny/asr1/local/train.sh
@@ -34,8 +34,8 @@ fi
 
 mkdir -p exp
 
-# default memeory allocator strategy may case gpu training hang
-# for no OOM raised when memory exhaused
+# default memory allocator strategy may case gpu training hang
+# for no OOM raised when memory exhausted
 export FLAGS_allocator_strategy=naive_best_fit
 
 if [ ${ngpu} == 0 ]; then
diff --git a/examples/wenetspeech/asr1/local/train.sh b/examples/wenetspeech/asr1/local/train.sh
index 6813d270c..fe5773e39 100755
--- a/examples/wenetspeech/asr1/local/train.sh
+++ b/examples/wenetspeech/asr1/local/train.sh
@@ -35,8 +35,8 @@ echo ${ips_config}
 
 mkdir -p exp
 
-# default memeory allocator strategy may case gpu training hang
-# for no OOM raised when memory exhaused
+# default memory allocator strategy may case gpu training hang
+# for no OOM raised when memory exhausted
 export FLAGS_allocator_strategy=naive_best_fit
 
 if [ ${ngpu} == 0 ]; then
diff --git a/paddlespeech/cli/asr/infer.py b/paddlespeech/cli/asr/infer.py
index 231a00f4d..548beb7ba 100644
--- a/paddlespeech/cli/asr/infer.py
+++ b/paddlespeech/cli/asr/infer.py
@@ -79,7 +79,7 @@ class ASRExecutor(BaseExecutor):
             '--config',
             type=str,
             default=None,
-            help='Config of asr task. Use deault config when it is None.')
+            help='Config of asr task. Use default config when it is None.')
         self.parser.add_argument(
             '--decode_method',
             type=str,
diff --git a/paddlespeech/cli/cls/infer.py b/paddlespeech/cli/cls/infer.py
index fa49f7bdb..54780fdd2 100644
--- a/paddlespeech/cli/cls/infer.py
+++ b/paddlespeech/cli/cls/infer.py
@@ -51,7 +51,7 @@ class CLSExecutor(BaseExecutor):
             '--config',
             type=str,
             default=None,
-            help='Config of cls task. Use deault config when it is None.')
+            help='Config of cls task. Use default config when it is None.')
         self.parser.add_argument(
             '--ckpt_path',
             type=str,
diff --git a/paddlespeech/cli/kws/infer.py b/paddlespeech/cli/kws/infer.py
index 6dee4cc84..467b463f7 100644
--- a/paddlespeech/cli/kws/infer.py
+++ b/paddlespeech/cli/kws/infer.py
@@ -58,7 +58,7 @@ class KWSExecutor(BaseExecutor):
             '--config',
             type=str,
             default=None,
-            help='Config of kws task. Use deault config when it is None.')
+            help='Config of kws task. Use default config when it is None.')
         self.parser.add_argument(
             '--ckpt_path',
             type=str,
diff --git a/paddlespeech/cli/ssl/infer.py b/paddlespeech/cli/ssl/infer.py
index 33cdf7637..37ad0bd9d 100644
--- a/paddlespeech/cli/ssl/infer.py
+++ b/paddlespeech/cli/ssl/infer.py
@@ -76,7 +76,7 @@ class SSLExecutor(BaseExecutor):
             '--config',
             type=str,
             default=None,
-            help='Config of asr task. Use deault config when it is None.')
+            help='Config of asr task. Use default config when it is None.')
         self.parser.add_argument(
             '--decode_method',
             type=str,
diff --git a/paddlespeech/cli/st/infer.py b/paddlespeech/cli/st/infer.py
index 7e9957129..5dab6f53b 100644
--- a/paddlespeech/cli/st/infer.py
+++ b/paddlespeech/cli/st/infer.py
@@ -82,7 +82,7 @@ class STExecutor(BaseExecutor):
             "--config",
             type=str,
             default=None,
-            help="Config of st task. Use deault config when it is None.")
+            help="Config of st task. Use default config when it is None.")
         self.parser.add_argument(
             "--ckpt_path",
             type=str,
diff --git a/paddlespeech/cli/text/infer.py b/paddlespeech/cli/text/infer.py
index bd76a13d0..59286ea31 100644
--- a/paddlespeech/cli/text/infer.py
+++ b/paddlespeech/cli/text/infer.py
@@ -63,7 +63,7 @@ class TextExecutor(BaseExecutor):
             '--config',
             type=str,
             default=None,
-            help='Config of cls task. Use deault config when it is None.')
+            help='Config of cls task. Use default config when it is None.')
         self.parser.add_argument(
             '--ckpt_path',
             type=str,
diff --git a/paddlespeech/cli/tts/infer.py b/paddlespeech/cli/tts/infer.py
index beba7f602..1f128dfe9 100644
--- a/paddlespeech/cli/tts/infer.py
+++ b/paddlespeech/cli/tts/infer.py
@@ -90,7 +90,7 @@ class TTSExecutor(BaseExecutor):
             '--am_config',
             type=str,
             default=None,
-            help='Config of acoustic model. Use deault config when it is None.')
+            help='Config of acoustic model. Use default config when it is None.')
         self.parser.add_argument(
             '--am_ckpt',
             type=str,
@@ -148,7 +148,7 @@ class TTSExecutor(BaseExecutor):
             '--voc_config',
             type=str,
             default=None,
-            help='Config of voc. Use deault config when it is None.')
+            help='Config of voc. Use default config when it is None.')
         self.parser.add_argument(
             '--voc_ckpt',
             type=str,
diff --git a/paddlespeech/cli/vector/infer.py b/paddlespeech/cli/vector/infer.py
index c4ae11c75..29c32e516 100644
--- a/paddlespeech/cli/vector/infer.py
+++ b/paddlespeech/cli/vector/infer.py
@@ -82,7 +82,7 @@ class VectorExecutor(BaseExecutor):
             '--config',
             type=str,
             default=None,
-            help='Config of asr task. Use deault config when it is None.')
+            help='Config of asr task. Use default config when it is None.')
         self.parser.add_argument(
             "--device",
             type=str,
diff --git a/paddlespeech/cli/whisper/infer.py b/paddlespeech/cli/whisper/infer.py
index 17e8c0b8c..5649b757f 100644
--- a/paddlespeech/cli/whisper/infer.py
+++ b/paddlespeech/cli/whisper/infer.py
@@ -96,7 +96,7 @@ class WhisperExecutor(BaseExecutor):
             '--config',
             type=str,
             default=None,
-            help='Config of asr task. Use deault config when it is None.')
+            help='Config of asr task. Use default config when it is None.')
         self.parser.add_argument(
             '--decode_method',
             type=str,
diff --git a/paddlespeech/s2t/modules/align.py b/paddlespeech/s2t/modules/align.py
index 46d8796ab..a206a350c 100644
--- a/paddlespeech/s2t/modules/align.py
+++ b/paddlespeech/s2t/modules/align.py
@@ -17,7 +17,7 @@ import paddle
 from paddle import nn
 """
     To align the initializer between paddle and torch, 
-    the API below are set defalut initializer with priority higger than global initializer.
+    the API below are set default initializer with priority higger than global initializer.
 """
 global_init_type = None
 
diff --git a/paddlespeech/s2t/training/cli.py b/paddlespeech/s2t/training/cli.py
index ded2aff9f..fc0caf10a 100644
--- a/paddlespeech/s2t/training/cli.py
+++ b/paddlespeech/s2t/training/cli.py
@@ -47,7 +47,7 @@ def default_argument_parser(parser=None):
     other experiments with t2s. It requires a minimal set of command line
     arguments to start a training script.
 
-    The ``--config`` and ``--opts`` are used for overwrite the deault
+    The ``--config`` and ``--opts`` are used for overwrite the default
     configuration.
 
     The ``--data`` and ``--output`` specifies the data path and output path.
diff --git a/paddlespeech/t2s/modules/diffusion.py b/paddlespeech/t2s/modules/diffusion.py
index adbd9ce7f..f00ddc595 100644
--- a/paddlespeech/t2s/modules/diffusion.py
+++ b/paddlespeech/t2s/modules/diffusion.py
@@ -40,7 +40,7 @@ class GaussianDiffusion(nn.Layer):
         num_max_timesteps (int, optional): 
             The max timestep transition from real to noise, by default None.
         stretch (bool, optional): 
-            Whether to stretch before diffusion, by defalut True.
+            Whether to stretch before diffusion, by default True.
         min_values: (paddle.Tensor):
             The minimum value of the feature to stretch.
         max_values: (paddle.Tensor):
diff --git a/paddlespeech/t2s/training/cli.py b/paddlespeech/t2s/training/cli.py
index 83dae1177..d503744d5 100644
--- a/paddlespeech/t2s/training/cli.py
+++ b/paddlespeech/t2s/training/cli.py
@@ -21,7 +21,7 @@ def default_argument_parser():
     other experiments with t2s. It requires a minimal set of command line 
     arguments to start a training script.
     
-    The ``--config`` and ``--opts`` are used for overwrite the deault 
+    The ``--config`` and ``--opts`` are used for overwrite the default 
     configuration.
     
     The ``--data`` and ``--output`` specifies the data path and output path. 
diff --git a/paddlespeech/utils/initialize.py b/paddlespeech/utils/initialize.py
index 8ebe6845e..efa6ee6d5 100644
--- a/paddlespeech/utils/initialize.py
+++ b/paddlespeech/utils/initialize.py
@@ -234,7 +234,7 @@ def kaiming_uniform_(tensor,
     Modified tensor inspace using kaiming_uniform method
     Args:
         tensor (paddle.Tensor): paddle Tensor
-        mode (str): ['fan_in', 'fan_out'], 'fin_in' defalut
+        mode (str): ['fan_in', 'fan_out'], 'fin_in' default
         nonlinearity (str): nonlinearity method name
         reverse (bool):  reverse (bool: False): tensor data format order, False by default as [fout, fin, ...].
     Return:
@@ -256,7 +256,7 @@ def kaiming_normal_(tensor,
     Modified tensor inspace using kaiming_normal_
     Args:
         tensor (paddle.Tensor): paddle Tensor
-        mode (str): ['fan_in', 'fan_out'], 'fin_in' defalut
+        mode (str): ['fan_in', 'fan_out'], 'fin_in' default
         nonlinearity (str): nonlinearity method name
         reverse (bool):  reverse (bool: False): tensor data format order, False by default as [fout, fin, ...].
     Return:
diff --git a/runtime/examples/custom_asr/README.md b/runtime/examples/custom_asr/README.md
index 33cf4ff03..992e5aac5 100644
--- a/runtime/examples/custom_asr/README.md
+++ b/runtime/examples/custom_asr/README.md
@@ -13,7 +13,7 @@ eg:
 * after replace operation, G = fstreplace(G_with_slot, address_slot), we will get the customized graph.
 ![](https://ai-studio-static-online.cdn.bcebos.com/60a3095293044f10b73039ab10c7950d139a6717580a44a3ba878c6e74de402b)
 
-These operations are in the scripts, please check out. we will lanuch more detail scripts.
+These operations are in the scripts, please check out. we will launch more detail scripts.
 
 ## How to run
 

From 7e4502f00feb82944a3695ab1956de808a093c74 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 25 Mar 2025 11:08:31 +0800
Subject: [PATCH 32/46] Bump @babel/runtime in /demos/speech_web/web_client
 (#4031)

Bumps [@babel/runtime](https://github.com/babel/babel/tree/HEAD/packages/babel-runtime) from 7.17.9 to 7.26.10.
- [Release notes](https://github.com/babel/babel/releases)
- [Changelog](https://github.com/babel/babel/blob/main/CHANGELOG.md)
- [Commits](https://github.com/babel/babel/commits/v7.26.10/packages/babel-runtime)

---
updated-dependencies:
- dependency-name: "@babel/runtime"
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 demos/speech_web/web_client/package-lock.json | 30 ++++++++++---------
 demos/speech_web/web_client/yarn.lock         | 16 +++++-----
 2 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/demos/speech_web/web_client/package-lock.json b/demos/speech_web/web_client/package-lock.json
index 95e082ecd..5b303286a 100644
--- a/demos/speech_web/web_client/package-lock.json
+++ b/demos/speech_web/web_client/package-lock.json
@@ -61,11 +61,12 @@
       }
     },
     "node_modules/@babel/runtime": {
-      "version": "7.17.9",
-      "resolved": "https://registry.npmmirror.com/@babel/runtime/-/runtime-7.17.9.tgz",
-      "integrity": "sha512-lSiBBvodq29uShpWGNbgFdKYNiFDo5/HIYsaCEY9ff4sb10x9jizo2+pRrSyF4jKZCXqgzuqBOQKbUm90gQwJg==",
+      "version": "7.26.10",
+      "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.26.10.tgz",
+      "integrity": "sha512-2WJMeRQPHKSPemqk/awGrAiuFfzBmOIPXKizAsVhWH9YJqLZ0H+HS4c8loHGgW6utJ3E/ejXQUsiGaQy2NZ9Fw==",
+      "license": "MIT",
       "dependencies": {
-        "regenerator-runtime": "^0.13.4"
+        "regenerator-runtime": "^0.14.0"
       },
       "engines": {
         "node": ">=6.9.0"
@@ -1138,9 +1139,10 @@
       "optional": true
     },
     "node_modules/regenerator-runtime": {
-      "version": "0.13.9",
-      "resolved": "https://registry.npmmirror.com/regenerator-runtime/-/regenerator-runtime-0.13.9.tgz",
-      "integrity": "sha512-p3VT+cOEgxFsRRA9X4lkI1E+k2/CtnKtU4gcxyaCUreilL/vqI6CdZ3wxVUx3UOUg+gnUOQQcRI7BmSI656MYA=="
+      "version": "0.14.1",
+      "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.14.1.tgz",
+      "integrity": "sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw==",
+      "license": "MIT"
     },
     "node_modules/resize-observer-polyfill": {
       "version": "1.5.1",
@@ -1392,11 +1394,11 @@
       "integrity": "sha512-vqUSBLP8dQHFPdPi9bc5GK9vRkYHJ49fsZdtoJ8EQ8ibpwk5rPKfvNIwChB0KVXcIjcepEBBd2VHC5r9Gy8ueg=="
     },
     "@babel/runtime": {
-      "version": "7.17.9",
-      "resolved": "https://registry.npmmirror.com/@babel/runtime/-/runtime-7.17.9.tgz",
-      "integrity": "sha512-lSiBBvodq29uShpWGNbgFdKYNiFDo5/HIYsaCEY9ff4sb10x9jizo2+pRrSyF4jKZCXqgzuqBOQKbUm90gQwJg==",
+      "version": "7.26.10",
+      "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.26.10.tgz",
+      "integrity": "sha512-2WJMeRQPHKSPemqk/awGrAiuFfzBmOIPXKizAsVhWH9YJqLZ0H+HS4c8loHGgW6utJ3E/ejXQUsiGaQy2NZ9Fw==",
       "requires": {
-        "regenerator-runtime": "^0.13.4"
+        "regenerator-runtime": "^0.14.0"
       }
     },
     "@ctrl/tinycolor": {
@@ -2149,9 +2151,9 @@
       "optional": true
     },
     "regenerator-runtime": {
-      "version": "0.13.9",
-      "resolved": "https://registry.npmmirror.com/regenerator-runtime/-/regenerator-runtime-0.13.9.tgz",
-      "integrity": "sha512-p3VT+cOEgxFsRRA9X4lkI1E+k2/CtnKtU4gcxyaCUreilL/vqI6CdZ3wxVUx3UOUg+gnUOQQcRI7BmSI656MYA=="
+      "version": "0.14.1",
+      "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.14.1.tgz",
+      "integrity": "sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw=="
     },
     "resize-observer-polyfill": {
       "version": "1.5.1",
diff --git a/demos/speech_web/web_client/yarn.lock b/demos/speech_web/web_client/yarn.lock
index 2e6bac334..38d5f61b0 100644
--- a/demos/speech_web/web_client/yarn.lock
+++ b/demos/speech_web/web_client/yarn.lock
@@ -28,11 +28,11 @@
   integrity sha512-vqUSBLP8dQHFPdPi9bc5GK9vRkYHJ49fsZdtoJ8EQ8ibpwk5rPKfvNIwChB0KVXcIjcepEBBd2VHC5r9Gy8ueg==
 
 "@babel/runtime@^7.10.5":
-  version "7.17.9"
-  resolved "https://registry.npmmirror.com/@babel/runtime/-/runtime-7.17.9.tgz"
-  integrity sha512-lSiBBvodq29uShpWGNbgFdKYNiFDo5/HIYsaCEY9ff4sb10x9jizo2+pRrSyF4jKZCXqgzuqBOQKbUm90gQwJg==
+  version "7.26.10"
+  resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.26.10.tgz#a07b4d8fa27af131a633d7b3524db803eb4764c2"
+  integrity sha512-2WJMeRQPHKSPemqk/awGrAiuFfzBmOIPXKizAsVhWH9YJqLZ0H+HS4c8loHGgW6utJ3E/ejXQUsiGaQy2NZ9Fw==
   dependencies:
-    regenerator-runtime "^0.13.4"
+    regenerator-runtime "^0.14.0"
 
 "@ctrl/tinycolor@^3.4.0":
   version "3.4.1"
@@ -712,10 +712,10 @@ prr@~1.0.1:
   resolved "https://registry.npmmirror.com/prr/-/prr-1.0.1.tgz"
   integrity sha512-yPw4Sng1gWghHQWj0B3ZggWUm4qVbPwPFcRG8KyxiU7J2OHFSoEHKS+EZ3fv5l1t9CyCiop6l/ZYeWbrgoQejw==
 
-regenerator-runtime@^0.13.4:
-  version "0.13.9"
-  resolved "https://registry.npmmirror.com/regenerator-runtime/-/regenerator-runtime-0.13.9.tgz"
-  integrity sha512-p3VT+cOEgxFsRRA9X4lkI1E+k2/CtnKtU4gcxyaCUreilL/vqI6CdZ3wxVUx3UOUg+gnUOQQcRI7BmSI656MYA==
+regenerator-runtime@^0.14.0:
+  version "0.14.1"
+  resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.14.1.tgz#356ade10263f685dda125100cd862c1db895327f"
+  integrity sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw==
 
 resize-observer-polyfill@^1.5.1:
   version "1.5.1"

From 290ce59dc767508e89decb022d2fd774e43a6b2a Mon Sep 17 00:00:00 2001
From: zxcd <228587199@qq.com>
Date: Mon, 31 Mar 2025 11:43:04 +0800
Subject: [PATCH 33/46] fix g2p model link (#4040)

---
 paddlespeech/resource/pretrained_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddlespeech/resource/pretrained_models.py b/paddlespeech/resource/pretrained_models.py
index 441512cdc..64ef44481 100644
--- a/paddlespeech/resource/pretrained_models.py
+++ b/paddlespeech/resource/pretrained_models.py
@@ -2106,7 +2106,7 @@ g2pw_onnx_models = {
         },
         '1.1': {
             'url':
-            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.1.zip',
+            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/g2p/new/G2PWModel_1.1.zip',
             'md5':
             'f8b60501770bff92ed6ce90860a610e6',
         },

From 1f377a4f49d2454f9f89599f1299710a8dff7c96 Mon Sep 17 00:00:00 2001
From: Echo-Nie <157974576+Echo-Nie@users.noreply.github.com>
Date: Mon, 31 Mar 2025 14:41:20 +0800
Subject: [PATCH 34/46] =?UTF-8?q?=E3=80=90PaddleSpeech=20No.13=E3=80=8114?=
 =?UTF-8?q?=E3=80=91=E8=A1=A5=E5=85=A8=E5=90=88=E6=88=90=E7=B3=BB=E5=88=97?=
 =?UTF-8?q?=E4=B8=AD=E7=9A=84=E8=84=9A=E6=9C=AC=E4=B8=AD=E5=8F=82=E6=95=B0?=
 =?UTF-8?q?=E7=BC=BA=E5=A4=B1=20(#4013)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* update examples/vctk/tts3/README.md

* update examples/vctk/ernie_sat; examples/vctk/tts3

* update examples/vctk/ernie_sat; examples/vctk/tts3

* fix the errors found after the review
---
 examples/vctk/ernie_sat/README.md | 5 ++++-
 examples/vctk/ernie_sat/run.sh    | 7 ++++---
 examples/vctk/tts3/README.md      | 8 ++++++--
 examples/vctk/tts3/run.sh         | 8 ++++----
 4 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/examples/vctk/ernie_sat/README.md b/examples/vctk/ernie_sat/README.md
index 3350a7451..3fe99172b 100644
--- a/examples/vctk/ernie_sat/README.md
+++ b/examples/vctk/ernie_sat/README.md
@@ -85,9 +85,12 @@ hifigan_vctk_ckpt_0.2.0
 ```
 `./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
 ```
+`--stage` controls the vocoder model during synthesis, which can be `0` , use`hifigan` model as vocoder.
+
 ##  Speech Synthesis and Speech Editing
+
 ### Prepare
 **prepare aligner**
 ```bash
diff --git a/examples/vctk/ernie_sat/run.sh b/examples/vctk/ernie_sat/run.sh
index cb80d1ad8..c9bdfde79 100755
--- a/examples/vctk/ernie_sat/run.sh
+++ b/examples/vctk/ernie_sat/run.sh
@@ -27,10 +27,11 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # synthesize, vocoder is pwgan
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize, vocoder is hifigan by default
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize, task_name is speech synthesize by default stage 0, stage 1 will use speech edit as taskname
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
diff --git a/examples/vctk/tts3/README.md b/examples/vctk/tts3/README.md
index aadbff6c3..a9d568ebf 100644
--- a/examples/vctk/tts3/README.md
+++ b/examples/vctk/tts3/README.md
@@ -108,8 +108,10 @@ pwg_vctk_ckpt_0.1.1
 ```
 `./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
 ```
+`--stage` controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
+
 ```text
 usage: synthesize.py [-h]
                      [--am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}]
@@ -156,8 +158,10 @@ optional arguments:
 ```
 `./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
 ```
+`--stage` controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
+
 ```text
 usage: synthesize_e2e.py [-h]
                          [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
diff --git a/examples/vctk/tts3/run.sh b/examples/vctk/tts3/run.sh
index 76307bd5f..8ce3b707d 100755
--- a/examples/vctk/tts3/run.sh
+++ b/examples/vctk/tts3/run.sh
@@ -27,13 +27,13 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # synthesize, vocoder is pwgan by default
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize, vocoder is pwgan by default stage 0, stage 1 will use hifigan as vocoder
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # synthesize_e2e, vocoder is pwgan by default
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize_e2e, vocoder is pwgan by default 0, stage 1 will use hifigan as vocoder
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
 
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then

From 7db18434bfd339b169f8a0b86882d58c73ed39cf Mon Sep 17 00:00:00 2001
From: Echo-Nie <157974576+Echo-Nie@users.noreply.github.com>
Date: Tue, 1 Apr 2025 13:34:04 +0800
Subject: [PATCH 35/46] =?UTF-8?q?=E3=80=90PaddleSpeech=20No.11=E3=80=91?=
 =?UTF-8?q?=E8=A1=A5=E5=85=A8=E5=90=88=E6=88=90=E7=B3=BB=E5=88=97=E4=B8=AD?=
 =?UTF-8?q?=E7=9A=84=E8=84=9A=E6=9C=AC=E4=B8=AD=E5=8F=82=E6=95=B0=E7=BC=BA?=
 =?UTF-8?q?=E5=A4=B1=20(#4038)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/ljspeech/tts3/README.md | 8 ++++++--
 examples/ljspeech/tts3/run.sh    | 8 ++++----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/examples/ljspeech/tts3/README.md b/examples/ljspeech/tts3/README.md
index cac214e74..12bd73777 100644
--- a/examples/ljspeech/tts3/README.md
+++ b/examples/ljspeech/tts3/README.md
@@ -105,8 +105,10 @@ pwg_ljspeech_ckpt_0.5
 ```
 `./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
 ```
+`--stage` controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
+
 ```text
 usage: synthesize.py [-h]
                      [--am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}]
@@ -153,8 +155,10 @@ optional arguments:
 ```
 `./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
 ```
+`--stage` controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
+
 ```text
 usage: synthesize_e2e.py [-h]
                          [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
diff --git a/examples/ljspeech/tts3/run.sh b/examples/ljspeech/tts3/run.sh
index 0d8da920c..b02126e6e 100755
--- a/examples/ljspeech/tts3/run.sh
+++ b/examples/ljspeech/tts3/run.sh
@@ -27,13 +27,13 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # synthesize, vocoder is pwgan by default
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize, vocoder is pwgan by default stage 0, stage 1 will use hifigan as vocoder
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # synthesize_e2e, vocoder is pwgan by default
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize_e2e, vocoder is pwgan by default stage 0, stage 1 will use hifigan as vocoder
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
 
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then

From a11a4c6b5fb6c0b458433a4ff647e6b3d6876ac1 Mon Sep 17 00:00:00 2001
From: Echo-Nie <157974576+Echo-Nie@users.noreply.github.com>
Date: Mon, 7 Apr 2025 14:56:04 +0800
Subject: [PATCH 36/46] fix the Parameter about --voc and --am (#4047)

---
 examples/csmsc/voc1/local/synthesize_e2e.sh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/csmsc/voc1/local/synthesize_e2e.sh b/examples/csmsc/voc1/local/synthesize_e2e.sh
index 428c234ff..7e1a6e8b7 100644
--- a/examples/csmsc/voc1/local/synthesize_e2e.sh
+++ b/examples/csmsc/voc1/local/synthesize_e2e.sh
@@ -8,13 +8,13 @@ FLAGS_allocator_strategy=naive_best_fit \
 FLAGS_fraction_of_gpu_memory_to_use=0.01 \
 python3 ${BIN_DIR}/../../synthesize_e2e.py \
     --am=fastspeech2_csmsc \
-    --am_config=${config_path} \
-    --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
-    --am_stat=dump/train/speech_stats.npy \
+    --am_config=fastspeech2_nosil_baker_ckpt_0.4/default.yaml \
+    --am_ckpt=fastspeech2_nosil_baker_ckpt_0.4/snapshot_iter_76000.pdz \
+    --am_stat=fastspeech2_nosil_baker_ckpt_0.4/speech_stats.npy \
     --voc=pwgan_csmsc \
-    --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \
-    --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
-    --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
+    --voc_config=${config_path} \
+    --voc_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
+    --voc_stat=dump/train/feats_stats.npy \
     --lang=zh \
     --text=${BIN_DIR}/../../assets/sentences.txt \
     --output_dir=${train_output_path}/test_e2e \

From ff61df9d68e97f08350a7da91350b8d8e07af491 Mon Sep 17 00:00:00 2001
From: Echo-Nie <157974576+Echo-Nie@users.noreply.github.com>
Date: Mon, 7 Apr 2025 15:06:46 +0800
Subject: [PATCH 37/46] fix the run.sh stage (#4049)

---
 examples/csmsc/tts3_rhy/run.sh | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/examples/csmsc/tts3_rhy/run.sh b/examples/csmsc/tts3_rhy/run.sh
index e49f43ee6..294ceded5 100755
--- a/examples/csmsc/tts3_rhy/run.sh
+++ b/examples/csmsc/tts3_rhy/run.sh
@@ -28,11 +28,13 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # synthesize, vocoder is pwgan by default
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize, vocoder is pwgan by default stage 0
+    # use stage 1-4 to select the vocoder to use {multi band melgan, style melgan, hifigan, wavernn}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # synthesize_e2e, vocoder is pwgan by default
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize_e2e, vocoder is pwgan by default stage 0
+    # use stage 1,3,4 to select the vocoder to use {multi band melgan, hifigan, wavernn}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi

From ef29dbb1f066a5f96ba2d94c346a67dfcd47a357 Mon Sep 17 00:00:00 2001
From: Echo-Nie <157974576+Echo-Nie@users.noreply.github.com>
Date: Mon, 7 Apr 2025 15:11:00 +0800
Subject: [PATCH 38/46] fix the tts2 (#4044)

---
 examples/csmsc/tts2/README.md |  8 ++++++--
 examples/csmsc/tts2/run.sh    | 10 ++++++----
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/examples/csmsc/tts2/README.md b/examples/csmsc/tts2/README.md
index 478ae4127..3c6e7d96c 100644
--- a/examples/csmsc/tts2/README.md
+++ b/examples/csmsc/tts2/README.md
@@ -116,8 +116,10 @@ pwg_baker_ckpt_0.4
 ```
 `./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
 ```
+`--stage` controls the vocoder model during synthesis, which can use stage `0-4` to select the vocoder to use {`pwgan`, `multi band melgan`, `style melgan`, `hifigan`, `wavernn`}
+
 ```text
 usage: synthesize.py [-h]
                      [--am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}]
@@ -164,8 +166,10 @@ optional arguments:
 ```
 `./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
 ```
+`--stage` controls the vocoder model during synthesis, which can use stage `0,1,3,4` to select the vocoder to use {`pwgan`, `multi band melgan`, `hifigan`, `wavernn`}
+
 ```text
 usage: synthesize_e2e.py [-h]
                          [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
diff --git a/examples/csmsc/tts2/run.sh b/examples/csmsc/tts2/run.sh
index 5732ea3c7..6f62bc95b 100755
--- a/examples/csmsc/tts2/run.sh
+++ b/examples/csmsc/tts2/run.sh
@@ -27,13 +27,15 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # synthesize, vocoder is pwgan by default
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize, vocoder is pwgan by default stage 0
+    # use stage 1-4 to select the vocoder to use {multi band melgan, style melgan, hifigan, wavernn}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # synthesize_e2e, vocoder is pwgan by default
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize_e2e, vocoder is pwgan by default stage 0
+    # use stage 1,3,4 to select the vocoder to use {multi band melgan, hifigan, wavernn}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
 
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then

From de3fa28b1bde0ad475e770bf5f19a69caf666d72 Mon Sep 17 00:00:00 2001
From: Echo-Nie <157974576+Echo-Nie@users.noreply.github.com>
Date: Mon, 7 Apr 2025 15:12:13 +0800
Subject: [PATCH 39/46] =?UTF-8?q?=E3=80=90PaddleSpeech=20No.7=E3=80=91=20(?=
 =?UTF-8?q?#4043)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* run.sh修改：为 synthesize 和 synthesize_e2e 添加 --stage 参数控制 vocoder 模型选择，REAMDE.md修改：补充 stage 参数说明，明确 vocoder 选择逻辑

* 添加run.sh中stage参数相关的注释

* HiFiGAN改为MultiBand MelGAN

* cmsc文件改回原位（No.15不修改），这里只对No.6做修改

* fix the tts0
---
 examples/csmsc/tts0/README.md |  9 +++++++--
 examples/csmsc/tts0/run.sh    | 10 ++++++----
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/examples/csmsc/tts0/README.md b/examples/csmsc/tts0/README.md
index 374270713..6269b419a 100644
--- a/examples/csmsc/tts0/README.md
+++ b/examples/csmsc/tts0/README.md
@@ -99,8 +99,10 @@ pwg_baker_ckpt_0.4
 ```
 `./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
 ```
+`--stage` controls the vocoder model during synthesis, which can use stage `0-4` to select the vocoder to use {`pwgan`, `multi band melgan`, `style melgan`, ` hifigan`, `wavernn`}
+
 ```text
 usage: synthesize.py [-h]
                      [--am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}]
@@ -146,9 +148,12 @@ optional arguments:
                         output dir.
 ```
 `./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
+
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
 ```
+`--stage` controls the vocoder model during synthesis, which can use stage `0,1,3,4` to select the vocoder to use{`pwgan`, `multi band melgan`, `hifigan`, `wavernn`}
+
 ```text
 usage: synthesize_e2e.py [-h]
                          [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
diff --git a/examples/csmsc/tts0/run.sh b/examples/csmsc/tts0/run.sh
index 8f06e933c..83bb02a5f 100755
--- a/examples/csmsc/tts0/run.sh
+++ b/examples/csmsc/tts0/run.sh
@@ -27,13 +27,15 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # synthesize, vocoder is pwgan
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize, vocoder is pwgan by default stage 0
+    # stage 1-4 to select the vocoder to use {multi band melgan, style melgan, hifigan, wavernn}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # synthesize_e2e, vocoder is pwgan
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize_e2e, vocoder is pwgan by default stage 0
+    # stage 1,3,4 to select the vocoder to use {multi band melgan, hifigan, wavernn}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
 
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then

From fae44e7e78e21b9d5386c2e9c1c6b741d47363f6 Mon Sep 17 00:00:00 2001
From: Echo-Nie <157974576+Echo-Nie@users.noreply.github.com>
Date: Mon, 7 Apr 2025 15:15:40 +0800
Subject: [PATCH 40/46] =?UTF-8?q?=E3=80=90PaddleSpeech=20No.12=E3=80=91=20?=
 =?UTF-8?q?(#4037)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix the svs1

* del stage 0

* 删去中文README中多余的stage 0
---
 examples/opencpop/svs1/README.md    | 6 +++++-
 examples/opencpop/svs1/README_cn.md | 6 +++++-
 examples/opencpop/svs1/run.sh       | 4 ++--
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/examples/opencpop/svs1/README.md b/examples/opencpop/svs1/README.md
index 092f27644..534a741fd 100644
--- a/examples/opencpop/svs1/README.md
+++ b/examples/opencpop/svs1/README.md
@@ -118,6 +118,8 @@ pwgan_opencpop_ckpt_1.4.0.zip
 ```bash
 CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
 ```
+use `pwgan` model as vocoder.
+
 ```text
 usage: synthesize.py [-h]
                      [--am {diffsinger_opencpop}]
@@ -170,8 +172,10 @@ optional arguments:
 `local/pinyin_to_phone.txt` comes from the readme of the opencpop dataset, indicating the mapping from pinyin to phonemes in opencpop.
 
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
 ```
+`--stage` controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
+
 ```text
 usage: synthesize_e2e.py [-h]
                          [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
diff --git a/examples/opencpop/svs1/README_cn.md b/examples/opencpop/svs1/README_cn.md
index eac72b5aa..e35967d71 100644
--- a/examples/opencpop/svs1/README_cn.md
+++ b/examples/opencpop/svs1/README_cn.md
@@ -121,6 +121,8 @@ pwgan_opencpop_ckpt_1.4.0.zip
 ```bash
 CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
 ```
+使用 `pwgan` 模型作为声码器。
+
 ```text
 usage: synthesize.py [-h]
                      [--am {diffsinger_opencpop}]
@@ -173,8 +175,10 @@ optional arguments:
 `local/pinyin_to_phone.txt`来源于opencpop数据集中的README，表示opencpop中拼音到音素的映射。
 
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
 ```
+`--stage` 用于选择合成时使用的声码器模型，取值为 `0` 或 `1`，分别对应使用 `pwgan` 或 `hifigan` 模型作为声码器。
+
 ```text
 usage: synthesize_e2e.py [-h]
                          [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
diff --git a/examples/opencpop/svs1/run.sh b/examples/opencpop/svs1/run.sh
index bfe5b6594..6c6688b2f 100755
--- a/examples/opencpop/svs1/run.sh
+++ b/examples/opencpop/svs1/run.sh
@@ -32,6 +32,6 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # synthesize_e2e, vocoder is pwgan by default
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize_e2e, vocoder is pwgan by default, stage 1 will use hifigan as vocoder
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi

From 5417f426fe1eaf87391eef8e203047c16b60fc83 Mon Sep 17 00:00:00 2001
From: Echo-Nie <157974576+Echo-Nie@users.noreply.github.com>
Date: Tue, 8 Apr 2025 11:26:58 +0800
Subject: [PATCH 41/46] fix the voc3 synthesize_e2e about --am and --voc
 (#4051)

---
 examples/csmsc/voc5/local/synthesize_e2e.sh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/csmsc/voc5/local/synthesize_e2e.sh b/examples/csmsc/voc5/local/synthesize_e2e.sh
index a068cf19a..64f5d8a3f 100644
--- a/examples/csmsc/voc5/local/synthesize_e2e.sh
+++ b/examples/csmsc/voc5/local/synthesize_e2e.sh
@@ -8,13 +8,13 @@ FLAGS_allocator_strategy=naive_best_fit \
 FLAGS_fraction_of_gpu_memory_to_use=0.01 \
 python3 ${BIN_DIR}/../../synthesize_e2e.py \
     --am=fastspeech2_csmsc \
-    --am_config=${config_path} \
-    --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
-    --am_stat=dump/train/speech_stats.npy \
+    --am_config=fastspeech2_nosil_baker_ckpt_0.4/default.yaml \
+    --am_ckpt=fastspeech2_nosil_baker_ckpt_0.4/snapshot_iter_76000.pdz \
+    --am_stat=fastspeech2_nosil_baker_ckpt_0.4/speech_stats.npy \
     --voc=hifigan_csmsc \
-    --voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
-    --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
-    --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
+    --voc_config=${config_path} \
+    --voc_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
+    --voc_stat=dump/train/feats_stats.npy \
     --lang=zh \
     --text=${BIN_DIR}/../../assets/sentences.txt \
     --output_dir=${train_output_path}/test_e2e \

From f2be23553d04efeb4f6b114588af73c0c23df2ed Mon Sep 17 00:00:00 2001
From: Echo-Nie <157974576+Echo-Nie@users.noreply.github.com>
Date: Tue, 8 Apr 2025 11:28:06 +0800
Subject: [PATCH 42/46] fix the voc3 about --am and --voc (#4050)

---
 examples/csmsc/voc3/local/synthesize_e2e.sh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/csmsc/voc3/local/synthesize_e2e.sh b/examples/csmsc/voc3/local/synthesize_e2e.sh
index e9b52dbec..778dc7f97 100644
--- a/examples/csmsc/voc3/local/synthesize_e2e.sh
+++ b/examples/csmsc/voc3/local/synthesize_e2e.sh
@@ -8,13 +8,13 @@ FLAGS_allocator_strategy=naive_best_fit \
 FLAGS_fraction_of_gpu_memory_to_use=0.01 \
 python3 ${BIN_DIR}/../../synthesize_e2e.py \
     --am=fastspeech2_csmsc \
-    --am_config=${config_path} \
-    --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
-    --am_stat=dump/train/speech_stats.npy \
+    --am_config=fastspeech2_nosil_baker_ckpt_0.4/default.yaml \
+    --am_ckpt=fastspeech2_nosil_baker_ckpt_0.4/snapshot_iter_76000.pdz \
+    --am_stat=fastspeech2_nosil_baker_ckpt_0.4/speech_stats.npy \
     --voc=mb_melgan_csmsc \
-    --voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \
-    --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\
-    --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
+    --voc_config=${config_path} \
+    --voc_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
+    --voc_stat=dump/train/feats_stats.npy \
     --lang=zh \
     --text=${BIN_DIR}/../../assets/sentences.txt \
     --output_dir=${train_output_path}/test_e2e \

From d0bb18d7d7836fddc02f4af2da53e112e85b388d Mon Sep 17 00:00:00 2001
From: Echo-Nie <157974576+Echo-Nie@users.noreply.github.com>
Date: Tue, 8 Apr 2025 11:29:22 +0800
Subject: [PATCH 43/46] =?UTF-8?q?=E3=80=90PaddleSpeech=20No.9=E3=80=91=20(?=
 =?UTF-8?q?#4045)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix the tts3

* 中英文文档保持一致
---
 examples/csmsc/tts3/README.md    |  8 ++++++--
 examples/csmsc/tts3/README_cn.md |  8 ++++++--
 examples/csmsc/tts3/run.sh       | 10 ++++++----
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/examples/csmsc/tts3/README.md b/examples/csmsc/tts3/README.md
index dc55fac93..9d349426b 100644
--- a/examples/csmsc/tts3/README.md
+++ b/examples/csmsc/tts3/README.md
@@ -107,8 +107,10 @@ pwg_baker_ckpt_0.4
 ```
 `./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
 ```
+`--stage` controls the vocoder model during synthesis. The parameter values range from `0-4`, corresponding to the following five vocoder models: `pwgan`, `multi band melgan`, `style melgan`, `hifigan`, and `wavernn`.
+
 ```text
 usage: synthesize.py [-h]
                      [--am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}]
@@ -155,8 +157,10 @@ optional arguments:
 ```
 `./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
 ```
+`--stage` controls the vocoder model during synthesis. The parameter values are {`0,1,3,4`}, corresponding to the following four vocoder models: `pwgan`, `multi band melgan`, `hifigan`, and `wavernn`.
+
 ```text
 usage: synthesize_e2e.py [-h]
                          [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
diff --git a/examples/csmsc/tts3/README_cn.md b/examples/csmsc/tts3/README_cn.md
index c6eee69d0..399c15d55 100644
--- a/examples/csmsc/tts3/README_cn.md
+++ b/examples/csmsc/tts3/README_cn.md
@@ -113,8 +113,10 @@ pwg_baker_ckpt_0.4
 `./local/synthesize.sh` 调用 `${BIN_DIR}/../synthesize.py` 即可从 `metadata.jsonl`中合成波形。
 
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
 ```
+`--stage` 参数用于控制合成过程中使用的声码器模型。该参数的取值范围为 `0-4`，分别对应以下五种声码器模型：`pwgan`、`multi band melgan`、`style melgan`、`hifigan` 和 `wavernn`。
+
 ```text
 usage: synthesize.py [-h]
                      [--am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}]
@@ -162,8 +164,10 @@ optional arguments:
 `./local/synthesize_e2e.sh` 调用 `${BIN_DIR}/../synthesize_e2e.py`，即可从文本文件中合成波形。
 
 ```bash
-CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name}
+CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name}
 ```
+`--stage` 参数用于控制合成过程中使用的声码器模型。该参数的取值范围为{ `0,1,3,4`}，分别对应以下四种声码器模型：`pwgan`、`multi band melgan`、`hifigan` 和 `wavernn`。
+
 ```text
 usage: synthesize_e2e.py [-h]
                          [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
diff --git a/examples/csmsc/tts3/run.sh b/examples/csmsc/tts3/run.sh
index a7b4e4239..96fa84471 100755
--- a/examples/csmsc/tts3/run.sh
+++ b/examples/csmsc/tts3/run.sh
@@ -27,13 +27,15 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # synthesize, vocoder is pwgan by default
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize, vocoder is pwgan by default stage 0
+    # use stage 1-4 to select the vocoder to use {multi band melgan, style melgan, hifigan, wavernn}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # synthesize_e2e, vocoder is pwgan by default
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize_e2e, vocoder is pwgan by default stage 0
+    # use stage 1,3,4 to select the vocoder to use {multi band melgan, hifigan, wavernn}
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
 
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then

From 2da51752d2afa04ecc94346a5cd2b163bb0dcce9 Mon Sep 17 00:00:00 2001
From: Echo-Nie <157974576+Echo-Nie@users.noreply.github.com>
Date: Wed, 16 Apr 2025 15:41:30 +0800
Subject: [PATCH 44/46] =?UTF-8?q?=E3=80=90PaddleSpeech=20No.5=E3=80=91=20(?=
 =?UTF-8?q?#4042)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* run.sh修改：为 synthesize 和 synthesize_e2e 添加 --stage 参数控制 vocoder 模型选择，REAMDE.md修改：补充 stage 参数说明，明确 vocoder 选择逻辑

* 添加run.sh中stage参数相关的注释

* HiFiGAN改为MultiBand MelGAN

* cmsc文件改回原位（No.15不修改），这里只对No.6做修改

* fix Speech No.5

* update README.md

* fix the README.md
---
 examples/aishell3_vctk/ernie_sat/README.md | 6 ++++--
 examples/aishell3_vctk/ernie_sat/run.sh    | 5 +++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/examples/aishell3_vctk/ernie_sat/README.md b/examples/aishell3_vctk/ernie_sat/README.md
index 57dcd472f..e73291add 100644
--- a/examples/aishell3_vctk/ernie_sat/README.md
+++ b/examples/aishell3_vctk/ernie_sat/README.md
@@ -15,7 +15,7 @@ In ERNIE-SAT, we propose two innovations:
 Download all datasets and extract it to `~/datasets`:
 - The aishell3 dataset is in the directory `~/datasets/data_aishell3`
 - The vctk dataset is in the directory `~/datasets/VCTK-Corpus-0.92`
- 
+
 ### Get MFA Result and Extract
 We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for the fastspeech2 training.
 You can download from here:
@@ -97,6 +97,8 @@ hifigan_aishell3_ckpt_0.2.0
 ```bash
 CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
 ```
+synthesize, vocoder is `hifigan`
+
 ##  Speech Synthesis and Speech Editing
 ### Prepare
 
@@ -150,7 +152,7 @@ You can check the text of downloaded wavs in `source/README.md`.
 ```bash
 ./run.sh --stage 3 --stop-stage 3 --gpus 0
 ```
-`stage 3` of `run.sh` calls `local/synthesize_e2e.sh`.
+`stage 3` of `run.sh` calls `local/synthesize_e2e.sh`. `synthesize_e2e.sh` is a script for end-to-end speech synthesis, supporting cross-language speech synthesis tasks, including English-to-Chinese (en → zh) and Chinese-to-English (zh → en).
 
 You can modify  `--wav_path`、`--old_str` and `--new_str` yourself, `--old_str` should be the text corresponding to the audio of  `--wav_path`, `--new_str` should be designed according to `--task_name`, `--source_lang` and `--target_lang` should be different in this example.
 ## Pretrained Model
diff --git a/examples/aishell3_vctk/ernie_sat/run.sh b/examples/aishell3_vctk/ernie_sat/run.sh
index 8cd9d8d1b..d29f0b6e8 100755
--- a/examples/aishell3_vctk/ernie_sat/run.sh
+++ b/examples/aishell3_vctk/ernie_sat/run.sh
@@ -27,10 +27,11 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # synthesize, vocoder is pwgan
+    # synthesize, vocoder is hifigan
     CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
+    # synthesize_e2e, default speech synthesis from Chinese to English, use stage1 to switch from English to Chinese
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh --stage 0 ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi

From a9d9d5f9616a2a62caae7773ef98263dd24a5c11 Mon Sep 17 00:00:00 2001
From: zxcd <228587199@qq.com>
Date: Wed, 16 Apr 2025 19:25:29 +0800
Subject: [PATCH 45/46] fix (#4061)

---
 paddlespeech/audiotools/core/_julius.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/paddlespeech/audiotools/core/_julius.py b/paddlespeech/audiotools/core/_julius.py
index f33f6724a..c4f986086 100644
--- a/paddlespeech/audiotools/core/_julius.py
+++ b/paddlespeech/audiotools/core/_julius.py
@@ -63,9 +63,6 @@ def sinc(x: paddle.Tensor):
 
     __Warning__: the input is not multiplied by `pi`!
     """
-    if satisfy_paddle_version("3.0"):
-        return paddle.sinc(x)
-
     return paddle.where(
         x == 0,
         paddle.to_tensor(1.0, dtype=x.dtype, place=x.place),

From 7883aa6cbdfe656edb2683aa36fd1c47fd66cab1 Mon Sep 17 00:00:00 2001
From: Echo-Nie <157974576+Echo-Nie@users.noreply.github.com>
Date: Mon, 21 Apr 2025 17:21:12 +0800
Subject: [PATCH 46/46] update the stage of run.sh and synthesize_e2e.sh, to be
 clear (#4057)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* run.sh修改：为 synthesize 和 synthesize_e2e 添加 --stage 参数控制 vocoder 模型选择，REAMDE.md修改：补充 stage 参数说明，明确 vocoder 选择逻辑

* 添加run.sh中stage参数相关的注释

* HiFiGAN改为MultiBand MelGAN

* cmsc文件改回原位（No.15不修改），这里只对No.6做修改

* update the stage of run.sh and synthesize_e2e.sh, to be clear

* fix the md
---
 examples/aishell3/ernie_sat/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/aishell3/ernie_sat/README.md b/examples/aishell3/ernie_sat/README.md
index e26808e95..10956f9e9 100644
--- a/examples/aishell3/ernie_sat/README.md
+++ b/examples/aishell3/ernie_sat/README.md
@@ -13,7 +13,7 @@ In ERNIE-SAT, we propose two innovations:
 ## Dataset
 ### Download and Extract
 Download AISHELL-3 from it's [Official Website](http://www.aishelltech.com/aishell_3) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/data_aishell3`.
- 
+
 ### Get MFA Result and Extract
 We use [MFA2.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for aishell3_fastspeech2.
 You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
@@ -138,7 +138,7 @@ You can check the text of downloaded wavs in `source/README.md`.
 ```bash
 ./run.sh --stage 3 --stop-stage 3 --gpus 0
 ```
-`stage 3` of `run.sh` calls `local/synthesize_e2e.sh`, `stage 0` of it is **Speech Synthesis** and  `stage 1` of it is **Speech Editing**.
+`stage 3` of `run.sh` calls `local/synthesize_e2e.sh`. `synthesize_e2e.sh` is a script for performing both **Speech Synthesis** and **Speech Editing** tasks by default. It converts input text into speech for synthesis and modifies existing speech based on new text content for editing.
 
 You can modify `--wav_path`、`--old_str` and `--new_str` yourself, `--old_str`  should be the text corresponding to the audio of  `--wav_path`, `--new_str` should be designed according to `--task_name`, both `--source_lang` and `--target_lang` should be `zh` for model trained with AISHELL3 dataset.
 ## Pretrained Model