Merge branch 'PaddlePaddle:develop' into fix_view

pull/3794/head
zxcd 1 year ago committed by GitHub
commit 7c73010c29
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -37,7 +37,7 @@ class FeatTest(unittest.TestCase):
self.waveform, self.sr = load(os.path.abspath(os.path.basename(url))) self.waveform, self.sr = load(os.path.abspath(os.path.basename(url)))
self.waveform = self.waveform.astype( self.waveform = self.waveform.astype(
np.float32 np.float32
) # paddlespeech.s2t.transform.spectrogram only supports float32 ) # paddlespeech.audio.transform.spectrogram only supports float32
dim = len(self.waveform.shape) dim = len(self.waveform.shape)
assert dim in [1, 2] assert dim in [1, 2]

@ -18,8 +18,8 @@ import paddle
from paddleaudio.functional.window import get_window from paddleaudio.functional.window import get_window
from .base import FeatTest from .base import FeatTest
from paddlespeech.s2t.transform.spectrogram import IStft from paddlespeech.audio.transform.spectrogram import IStft
from paddlespeech.s2t.transform.spectrogram import Stft from paddlespeech.audio.transform.spectrogram import Stft
class TestIstft(FeatTest): class TestIstft(FeatTest):

@ -18,7 +18,7 @@ import paddle
import paddleaudio import paddleaudio
from .base import FeatTest from .base import FeatTest
from paddlespeech.s2t.transform.spectrogram import LogMelSpectrogram from paddlespeech.audio.transform.spectrogram import LogMelSpectrogram
class TestLogMelSpectrogram(FeatTest): class TestLogMelSpectrogram(FeatTest):

@ -18,7 +18,7 @@ import paddle
import paddleaudio import paddleaudio
from .base import FeatTest from .base import FeatTest
from paddlespeech.s2t.transform.spectrogram import Spectrogram from paddlespeech.audio.transform.spectrogram import Spectrogram
class TestSpectrogram(FeatTest): class TestSpectrogram(FeatTest):

@ -18,7 +18,7 @@ import paddle
from paddleaudio.functional.window import get_window from paddleaudio.functional.window import get_window
from .base import FeatTest from .base import FeatTest
from paddlespeech.s2t.transform.spectrogram import Stft from paddlespeech.audio.transform.spectrogram import Stft
class TestStft(FeatTest): class TestStft(FeatTest):

@ -236,8 +236,8 @@
"warnings.filterwarnings('ignore')\n", "warnings.filterwarnings('ignore')\n",
"\n", "\n",
"from yacs.config import CfgNode\n", "from yacs.config import CfgNode\n",
"from paddlespeech.s2t.transform.spectrogram import LogMelSpectrogramKaldi\n", "from paddlespeech.audio.transform.spectrogram import LogMelSpectrogramKaldi\n",
"from paddlespeech.s2t.transform.cmvn import GlobalCMVN\n", "from paddlespeech.audio.transform.cmvn import GlobalCMVN\n",
"from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer\n", "from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer\n",
"from paddlespeech.s2t.models.u2 import U2Model\n", "from paddlespeech.s2t.models.u2 import U2Model\n",
"\n", "\n",

@ -237,30 +237,25 @@ class ToneSandhi():
# output seg: [['听一听', 'v']] # output seg: [['听一听', 'v']]
def _merge_yi(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]: def _merge_yi(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
new_seg = [] new_seg = []
skip_next = False
# function 1 # function 1
for i, (word, pos) in enumerate(seg): for i, (word, pos) in enumerate(seg):
if i - 1 >= 0 and word == "" and i + 1 < len(seg) and seg[i - 1][ if skip_next:
0] == seg[i + 1][0] and seg[i - 1][1] == "v": skip_next = False
if i - 1 < len(new_seg):
new_seg[i -
1][0] = new_seg[i - 1][0] + "" + new_seg[i - 1][0]
else:
new_seg.append([word, pos])
new_seg.append([seg[i + 1][0], pos])
else:
if i - 2 >= 0 and seg[i - 1][0] == "" and seg[i - 2][
0] == word and pos == "v":
continue continue
if i - 1 >= 0 and word == "" and i + 1 < len(seg) and seg[i - 1][0] == seg[i + 1][0] and seg[i - 1][1] == "v":
new_seg[-1] = (new_seg[-1][0] + "" + seg[i + 1][0], new_seg[-1][1])
skip_next = True
else: else:
new_seg.append([word, pos]) new_seg.append((word, pos))
seg = new_seg seg = new_seg
new_seg = [] new_seg = []
# function 2 # function 2
for i, (word, pos) in enumerate(seg): for i, (word, pos) in enumerate(seg):
if new_seg and new_seg[-1][0] == "": if new_seg and new_seg[-1][0] == "":
new_seg[-1][0] = new_seg[-1][0] + word new_seg[-1] = (new_seg[-1][0] + word, new_seg[-1][1])
else: else:
new_seg.append([word, pos]) new_seg.append((word, pos))
return new_seg return new_seg
# the first and the second words are all_tone_three # the first and the second words are all_tone_three

@ -10,11 +10,12 @@ paddlespeech cls --input ./cat.wav --topk 10
paddlespeech text --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭 --model ernie_linear_p3_wudao_fast paddlespeech text --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭 --model ernie_linear_p3_wudao_fast
# Speech SSL # Speech SSL
wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
paddlespeech ssl --task asr --lang en --input ./en.wav paddlespeech ssl --task asr --lang en --input ./en.wav
paddlespeech ssl --task vector --lang en --input ./en.wav paddlespeech ssl --task vector --lang en --input ./en.wav
# Speech_recognition # Speech_recognition
wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/ch_zh_mix.wav wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/ch_zh_mix.wav
paddlespeech asr --input ./zh.wav paddlespeech asr --input ./zh.wav
paddlespeech asr --model conformer_aishell --input ./zh.wav paddlespeech asr --model conformer_aishell --input ./zh.wav
paddlespeech asr --model conformer_online_aishell --input ./zh.wav paddlespeech asr --model conformer_online_aishell --input ./zh.wav
@ -110,5 +111,7 @@ paddlespeech whisper --task transcribe --input ./zh.wav
# whisper recognize text and translate to English # whisper recognize text and translate to English
paddlespeech whisper --task translate --input ./zh.wav paddlespeech whisper --task translate --input ./zh.wav
# to change model English-Only model
paddlespeech whisper --lang en --size base --task transcribe --input ./en.wav
echo -e "\033[32mTest success !!!\033[0m" echo -e "\033[32mTest success !!!\033[0m"

@ -0,0 +1,29 @@
# test CLI 测试文档
该文档为 CLI 测试说明,该测试目前覆盖大部分 paddlespeech 中的 CLI 推理。该 CI 建立后用于快速验证修复是否正确。
# 测试流程
## 1. 环境安装
CI 重建时在已有通过版本 paddlepaddle-gpu==2.5.1, paddlepseech==develop 下运行。
CI 重建后在 paddlepaddle-gpu==develop, paddlepseech==develop 下运行。
### 其他相关依赖
gcc >= 4.8.5,
python >= 3.8
## 2. 功能测试
在 repo 的 tests/unit/cli 中运行:
```shell
source path.sh
bash test_cli.sh
```
## 3. 预期结果
输出 "Test success",且运行过程中无报错或 Error 即为成功。

@ -6,7 +6,7 @@ import kaldiio
import numpy import numpy
from distutils.util import strtobool from distutils.util import strtobool
from paddlespeech.s2t.transform.cmvn import CMVN from paddlespeech.audio.transform.cmvn import CMVN
from paddlespeech.s2t.utils.cli_readers import file_reader_helper from paddlespeech.s2t.utils.cli_readers import file_reader_helper
from paddlespeech.s2t.utils.cli_utils import get_commandline_args from paddlespeech.s2t.utils.cli_utils import get_commandline_args
from paddlespeech.s2t.utils.cli_utils import is_scipy_wav_style from paddlespeech.s2t.utils.cli_utils import is_scipy_wav_style

@ -5,7 +5,7 @@ import logging
import kaldiio import kaldiio
import numpy as np import numpy as np
from paddlespeech.s2t.transform.transformation import Transformation from paddlespeech.audio.transform.transformation import Transformation
from paddlespeech.s2t.utils.cli_readers import file_reader_helper from paddlespeech.s2t.utils.cli_readers import file_reader_helper
from paddlespeech.s2t.utils.cli_utils import get_commandline_args from paddlespeech.s2t.utils.cli_utils import get_commandline_args
from paddlespeech.s2t.utils.cli_utils import is_scipy_wav_style from paddlespeech.s2t.utils.cli_utils import is_scipy_wav_style

@ -4,7 +4,7 @@ import logging
from distutils.util import strtobool from distutils.util import strtobool
from paddlespeech.s2t.transform.transformation import Transformation from paddlespeech.audio.transform.transformation import Transformation
from paddlespeech.s2t.utils.cli_readers import file_reader_helper from paddlespeech.s2t.utils.cli_readers import file_reader_helper
from paddlespeech.s2t.utils.cli_utils import get_commandline_args from paddlespeech.s2t.utils.cli_utils import get_commandline_args
from paddlespeech.s2t.utils.cli_utils import is_scipy_wav_style from paddlespeech.s2t.utils.cli_utils import is_scipy_wav_style

@ -3,7 +3,7 @@ import argparse
import logging import logging
import sys import sys
from paddlespeech.s2t.transform.transformation import Transformation from paddlespeech.audio.transform.transformation import Transformation
from paddlespeech.s2t.utils.cli_readers import file_reader_helper from paddlespeech.s2t.utils.cli_readers import file_reader_helper
from paddlespeech.s2t.utils.cli_utils import get_commandline_args from paddlespeech.s2t.utils.cli_utils import get_commandline_args
from paddlespeech.s2t.utils.cli_utils import is_scipy_wav_style from paddlespeech.s2t.utils.cli_utils import is_scipy_wav_style

Loading…
Cancel
Save