From 03022f2170ce76d2ca8385a92aa8df3519e2366b Mon Sep 17 00:00:00 2001 From: mjxs <52824616+kk-2000@users.noreply.github.com> Date: Tue, 4 Jun 2024 10:34:39 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=E3=80=90Fix=20Speech=20Issue=20No.5?= =?UTF-8?q?=E3=80=91issue=203444=20transformation=20import=20error=20(#377?= =?UTF-8?q?9)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix paddlespeech.s2t.transform.transformation import error * fix paddlespeech.s2t.transform import error --- audio/tests/features/base.py | 2 +- audio/tests/features/test_istft.py | 4 ++-- audio/tests/features/test_log_melspectrogram.py | 2 +- audio/tests/features/test_spectrogram.py | 2 +- audio/tests/features/test_stft.py | 2 +- docs/tutorial/asr/tutorial_transformer.ipynb | 4 ++-- utils/apply-cmvn.py | 2 +- utils/compute-cmvn-stats.py | 2 +- utils/copy-feats.py | 2 +- utils/feat-to-shape.py | 2 +- 10 files changed, 12 insertions(+), 12 deletions(-) diff --git a/audio/tests/features/base.py b/audio/tests/features/base.py index d183b72ad..3bb1d1dde 100644 --- a/audio/tests/features/base.py +++ b/audio/tests/features/base.py @@ -37,7 +37,7 @@ class FeatTest(unittest.TestCase): self.waveform, self.sr = load(os.path.abspath(os.path.basename(url))) self.waveform = self.waveform.astype( np.float32 - ) # paddlespeech.s2t.transform.spectrogram only supports float32 + ) # paddlespeech.audio.transform.spectrogram only supports float32 dim = len(self.waveform.shape) assert dim in [1, 2] diff --git a/audio/tests/features/test_istft.py b/audio/tests/features/test_istft.py index 9cf8cdd65..ea1ee5cb6 100644 --- a/audio/tests/features/test_istft.py +++ b/audio/tests/features/test_istft.py @@ -18,8 +18,8 @@ import paddle from paddleaudio.functional.window import get_window from .base import FeatTest -from paddlespeech.s2t.transform.spectrogram import IStft -from paddlespeech.s2t.transform.spectrogram import Stft +from paddlespeech.audio.transform.spectrogram import IStft +from paddlespeech.audio.transform.spectrogram import Stft class TestIstft(FeatTest): diff --git a/audio/tests/features/test_log_melspectrogram.py b/audio/tests/features/test_log_melspectrogram.py index 7d5680387..b2765d3be 100644 --- a/audio/tests/features/test_log_melspectrogram.py +++ b/audio/tests/features/test_log_melspectrogram.py @@ -18,7 +18,7 @@ import paddle import paddleaudio from .base import FeatTest -from paddlespeech.s2t.transform.spectrogram import LogMelSpectrogram +from paddlespeech.audio.transform.spectrogram import LogMelSpectrogram class TestLogMelSpectrogram(FeatTest): diff --git a/audio/tests/features/test_spectrogram.py b/audio/tests/features/test_spectrogram.py index 5fe5afee1..6f4609632 100644 --- a/audio/tests/features/test_spectrogram.py +++ b/audio/tests/features/test_spectrogram.py @@ -18,7 +18,7 @@ import paddle import paddleaudio from .base import FeatTest -from paddlespeech.s2t.transform.spectrogram import Spectrogram +from paddlespeech.audio.transform.spectrogram import Spectrogram class TestSpectrogram(FeatTest): diff --git a/audio/tests/features/test_stft.py b/audio/tests/features/test_stft.py index 58792ffe2..9511a2926 100644 --- a/audio/tests/features/test_stft.py +++ b/audio/tests/features/test_stft.py @@ -18,7 +18,7 @@ import paddle from paddleaudio.functional.window import get_window from .base import FeatTest -from paddlespeech.s2t.transform.spectrogram import Stft +from paddlespeech.audio.transform.spectrogram import Stft class TestStft(FeatTest): diff --git a/docs/tutorial/asr/tutorial_transformer.ipynb b/docs/tutorial/asr/tutorial_transformer.ipynb index dc3030061..77aed4bf8 100644 --- a/docs/tutorial/asr/tutorial_transformer.ipynb +++ b/docs/tutorial/asr/tutorial_transformer.ipynb @@ -236,8 +236,8 @@ "warnings.filterwarnings('ignore')\n", "\n", "from yacs.config import CfgNode\n", - "from paddlespeech.s2t.transform.spectrogram import LogMelSpectrogramKaldi\n", - "from paddlespeech.s2t.transform.cmvn import GlobalCMVN\n", + "from paddlespeech.audio.transform.spectrogram import LogMelSpectrogramKaldi\n", + "from paddlespeech.audio.transform.cmvn import GlobalCMVN\n", "from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer\n", "from paddlespeech.s2t.models.u2 import U2Model\n", "\n", diff --git a/utils/apply-cmvn.py b/utils/apply-cmvn.py index cf91bdfcd..fa69ff8e0 100755 --- a/utils/apply-cmvn.py +++ b/utils/apply-cmvn.py @@ -6,7 +6,7 @@ import kaldiio import numpy from distutils.util import strtobool -from paddlespeech.s2t.transform.cmvn import CMVN +from paddlespeech.audio.transform.cmvn import CMVN from paddlespeech.s2t.utils.cli_readers import file_reader_helper from paddlespeech.s2t.utils.cli_utils import get_commandline_args from paddlespeech.s2t.utils.cli_utils import is_scipy_wav_style diff --git a/utils/compute-cmvn-stats.py b/utils/compute-cmvn-stats.py index 276bcd36e..763347ce8 100755 --- a/utils/compute-cmvn-stats.py +++ b/utils/compute-cmvn-stats.py @@ -5,7 +5,7 @@ import logging import kaldiio import numpy as np -from paddlespeech.s2t.transform.transformation import Transformation +from paddlespeech.audio.transform.transformation import Transformation from paddlespeech.s2t.utils.cli_readers import file_reader_helper from paddlespeech.s2t.utils.cli_utils import get_commandline_args from paddlespeech.s2t.utils.cli_utils import is_scipy_wav_style diff --git a/utils/copy-feats.py b/utils/copy-feats.py index dc7a70b45..89ea30f97 100755 --- a/utils/copy-feats.py +++ b/utils/copy-feats.py @@ -4,7 +4,7 @@ import logging from distutils.util import strtobool -from paddlespeech.s2t.transform.transformation import Transformation +from paddlespeech.audio.transform.transformation import Transformation from paddlespeech.s2t.utils.cli_readers import file_reader_helper from paddlespeech.s2t.utils.cli_utils import get_commandline_args from paddlespeech.s2t.utils.cli_utils import is_scipy_wav_style diff --git a/utils/feat-to-shape.py b/utils/feat-to-shape.py index bbc9242f4..e5e014ded 100755 --- a/utils/feat-to-shape.py +++ b/utils/feat-to-shape.py @@ -3,7 +3,7 @@ import argparse import logging import sys -from paddlespeech.s2t.transform.transformation import Transformation +from paddlespeech.audio.transform.transformation import Transformation from paddlespeech.s2t.utils.cli_readers import file_reader_helper from paddlespeech.s2t.utils.cli_utils import get_commandline_args from paddlespeech.s2t.utils.cli_utils import is_scipy_wav_style From 09e5d8a4ac03f29c2ce6511e1a3c39136cd3e29b Mon Sep 17 00:00:00 2001 From: Mattheliu Date: Wed, 5 Jun 2024 10:41:32 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E3=80=90Fix=20Speech=20Issue=20No.8?= =?UTF-8?q?=E3=80=91issue=203652=20merge=5Fyi=20function=20has=20a=20bug?= =?UTF-8?q?=20(#3786)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 【Fix Speech Issue No.8】issue 3652 merge_yi function has a bug * 【Fix Speech Issue No.8】issue 3652 merge_yi function has a bug --- paddlespeech/t2s/frontend/tone_sandhi.py | 25 ++++++++++-------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/paddlespeech/t2s/frontend/tone_sandhi.py b/paddlespeech/t2s/frontend/tone_sandhi.py index 690f69aa2..3558064cd 100644 --- a/paddlespeech/t2s/frontend/tone_sandhi.py +++ b/paddlespeech/t2s/frontend/tone_sandhi.py @@ -237,30 +237,25 @@ class ToneSandhi(): # output seg: [['听一听', 'v']] def _merge_yi(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]: new_seg = [] + skip_next = False # function 1 for i, (word, pos) in enumerate(seg): - if i - 1 >= 0 and word == "一" and i + 1 < len(seg) and seg[i - 1][ - 0] == seg[i + 1][0] and seg[i - 1][1] == "v": - if i - 1 < len(new_seg): - new_seg[i - - 1][0] = new_seg[i - 1][0] + "一" + new_seg[i - 1][0] - else: - new_seg.append([word, pos]) - new_seg.append([seg[i + 1][0], pos]) + if skip_next: + skip_next = False + continue + if i - 1 >= 0 and word == "一" and i + 1 < len(seg) and seg[i - 1][0] == seg[i + 1][0] and seg[i - 1][1] == "v": + new_seg[-1] = (new_seg[-1][0] + "一" + seg[i + 1][0], new_seg[-1][1]) + skip_next = True else: - if i - 2 >= 0 and seg[i - 1][0] == "一" and seg[i - 2][ - 0] == word and pos == "v": - continue - else: - new_seg.append([word, pos]) + new_seg.append((word, pos)) seg = new_seg new_seg = [] # function 2 for i, (word, pos) in enumerate(seg): if new_seg and new_seg[-1][0] == "一": - new_seg[-1][0] = new_seg[-1][0] + word + new_seg[-1] = (new_seg[-1][0] + word, new_seg[-1][1]) else: - new_seg.append([word, pos]) + new_seg.append((word, pos)) return new_seg # the first and the second words are all_tone_three From 05660a62cb2f56c1af0773be06a75d8dbc18df20 Mon Sep 17 00:00:00 2001 From: zxcd <228587199@qq.com> Date: Wed, 5 Jun 2024 14:28:14 +0800 Subject: [PATCH 3/4] =?UTF-8?q?=E3=80=90test=E3=80=91add=20cli=20test=20re?= =?UTF-8?q?adme=20(#3784)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add cli test readme * fix code style --- tests/unit/cli/test_cli.sh | 2 ++ tests/unit/doc/test_cli.md | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 tests/unit/doc/test_cli.md diff --git a/tests/unit/cli/test_cli.sh b/tests/unit/cli/test_cli.sh index a7f7d11e4..3bc2eae2f 100755 --- a/tests/unit/cli/test_cli.sh +++ b/tests/unit/cli/test_cli.sh @@ -110,5 +110,7 @@ paddlespeech whisper --task transcribe --input ./zh.wav # whisper recognize text and translate to English paddlespeech whisper --task translate --input ./zh.wav +# to change model English-Only model +paddlespeech whisper --lang en --size base --task transcribe --input ./en.wav echo -e "\033[32mTest success !!!\033[0m" diff --git a/tests/unit/doc/test_cli.md b/tests/unit/doc/test_cli.md new file mode 100644 index 000000000..34a0c016a --- /dev/null +++ b/tests/unit/doc/test_cli.md @@ -0,0 +1,29 @@ +# test CLI 测试文档 + + 该文档为 CLI 测试说明,该测试目前覆盖大部分 paddlespeech 中的 CLI 推理。该 CI 建立后用于快速验证修复是否正确。 + + # 测试流程 + ## 1. 环境安装 + + CI 重建时在已有通过版本 paddlepaddle-gpu==2.5.1, paddlepseech==develop 下运行。 + + CI 重建后在 paddlepaddle-gpu==develop, paddlepseech==develop 下运行。 + + ### 其他相关依赖 + + gcc >= 4.8.5, + python >= 3.8 + + ## 2. 功能测试 + + 在 repo 的 tests/unit/cli 中运行: + + ```shell + + source path.sh + bash test_cli.sh + + ``` +## 3. 预期结果 + + 输出 "Test success",且运行过程中无报错或 Error 即为成功。 From 72ce8861779cc7fef9eb3277217878fd65375c58 Mon Sep 17 00:00:00 2001 From: zxcd <228587199@qq.com> Date: Thu, 6 Jun 2024 15:26:16 +0800 Subject: [PATCH 4/4] =?UTF-8?q?=E3=80=90test=E3=80=91fix=20test=20cli=20bu?= =?UTF-8?q?g=20(#3793)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add cli test readme * fix code style * fix bug --- tests/unit/cli/test_cli.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/cli/test_cli.sh b/tests/unit/cli/test_cli.sh index 3bc2eae2f..3903e6597 100755 --- a/tests/unit/cli/test_cli.sh +++ b/tests/unit/cli/test_cli.sh @@ -10,11 +10,12 @@ paddlespeech cls --input ./cat.wav --topk 10 paddlespeech text --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭 --model ernie_linear_p3_wudao_fast # Speech SSL +wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav paddlespeech ssl --task asr --lang en --input ./en.wav paddlespeech ssl --task vector --lang en --input ./en.wav # Speech_recognition -wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/ch_zh_mix.wav +wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/ch_zh_mix.wav paddlespeech asr --input ./zh.wav paddlespeech asr --model conformer_aishell --input ./zh.wav paddlespeech asr --model conformer_online_aishell --input ./zh.wav