fix specaug

pull/768/head
Hui Zhang 3 years ago
parent 4725bace4e
commit c09b0e8940

@ -1,12 +1,12 @@
[中文版](README_cn.md) [中文版](README_cn.md)
# PaddlePaddle ASR toolkit # PaddlePaddle Speech to Any toolkit
![License](https://img.shields.io/badge/license-Apache%202-red.svg) ![License](https://img.shields.io/badge/license-Apache%202-red.svg)
![python version](https://img.shields.io/badge/python-3.7+-orange.svg) ![python version](https://img.shields.io/badge/python-3.7+-orange.svg)
![support os](https://img.shields.io/badge/os-linux-yellow.svg) ![support os](https://img.shields.io/badge/os-linux-yellow.svg)
*PaddleASR* is an open-source implementation of end-to-end Automatic Speech Recognition (ASR) engine, with [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) platform. Our vision is to empower both industrial application and academic research on speech recognition, via an easy-to-use, efficient, samller and scalable implementation, including training, inference & testing module, and deployment. *DeepSpeech* is an open-source implementation of end-to-end Automatic Speech Recognition engine, with [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) platform. Our vision is to empower both industrial application and academic research on speech recognition, via an easy-to-use, efficient, samller and scalable implementation, including training, inference & testing module, and deployment.
## Features ## Features
@ -15,6 +15,7 @@
## Setup ## Setup
* Ubuntu 16.04
* python>=3.7 * python>=3.7
* paddlepaddle>=2.1.2 * paddlepaddle>=2.1.2

@ -1,12 +1,12 @@
[English](README.md) [English](README.md)
# PaddlePaddle ASR toolkit # PaddlePaddle Speech to Any toolkit
![License](https://img.shields.io/badge/license-Apache%202-red.svg) ![License](https://img.shields.io/badge/license-Apache%202-red.svg)
![python version](https://img.shields.io/badge/python-3.7+-orange.svg) ![python version](https://img.shields.io/badge/python-3.7+-orange.svg)
![support os](https://img.shields.io/badge/os-linux-yellow.svg) ![support os](https://img.shields.io/badge/os-linux-yellow.svg)
*PaddleASR*是一个采用[PaddlePaddle](https://github.com/PaddlePaddle/Paddle)平台的端到端自动语音识别ASR引擎的开源项目, *DeepSpeech*是一个采用[PaddlePaddle](https://github.com/PaddlePaddle/Paddle)平台的端到端自动语音识别引擎的开源项目,
我们的愿景是为语音识别在工业应用和学术研究上,提供易于使用、高效、小型化和可扩展的工具,包括训练,推理,以及 部署。 我们的愿景是为语音识别在工业应用和学术研究上,提供易于使用、高效、小型化和可扩展的工具,包括训练,推理,以及 部署。
## 特性 ## 特性
@ -16,6 +16,7 @@
## 安装 ## 安装
* Ubuntu 16.04
* python>=3.7 * python>=3.7
* paddlepaddle>=2.1.2 * paddlepaddle>=2.1.2

@ -30,7 +30,7 @@ class AugmentorBase():
@abstractmethod @abstractmethod
def __call__(self, xs): def __call__(self, xs):
raise NotImplementedError raise NotImplementedError("AugmentorBase: Not impl __call__")
@abstractmethod @abstractmethod
def transform_audio(self, audio_segment): def transform_audio(self, audio_segment):
@ -44,7 +44,7 @@ class AugmentorBase():
:param audio_segment: Audio segment to add effects to. :param audio_segment: Audio segment to add effects to.
:type audio_segment: AudioSegmenet|SpeechSegment :type audio_segment: AudioSegmenet|SpeechSegment
""" """
raise NotImplementedError raise NotImplementedError("AugmentorBase: Not impl transform_audio")
@abstractmethod @abstractmethod
def transform_feature(self, spec_segment): def transform_feature(self, spec_segment):
@ -56,4 +56,4 @@ class AugmentorBase():
Args: Args:
spec_segment (Spectrogram): Spectrogram segment to add effects to. spec_segment (Spectrogram): Spectrogram segment to add effects to.
""" """
raise NotImplementedError raise NotImplementedError("AugmentorBase: Not impl transform_feature")

@ -64,7 +64,7 @@ class SpecAugmentor(AugmentorBase):
self.n_freq_masks = n_freq_masks self.n_freq_masks = n_freq_masks
self.n_time_masks = n_time_masks self.n_time_masks = n_time_masks
self.p = p self.p = p
#logger.info(f"specaug: F-{F}, T-{T}, F-n-{n_freq_masks}, T-n-{n_time_masks}")
# adaptive SpecAugment # adaptive SpecAugment
self.adaptive_number_ratio = adaptive_number_ratio self.adaptive_number_ratio = adaptive_number_ratio
@ -120,6 +120,9 @@ class SpecAugmentor(AugmentorBase):
@property @property
def time_mask(self): def time_mask(self):
return self._time_mask return self._time_mask
def __repr__(self):
return f"specaug: F-{F}, T-{T}, F-n-{n_freq_masks}, T-n-{n_time_masks}"
def time_warp(xs, W=40): def time_warp(xs, W=40):
raise NotImplementedError raise NotImplementedError
@ -160,7 +163,7 @@ class SpecAugmentor(AugmentorBase):
def __call__(self, x, train=True): def __call__(self, x, train=True):
if not train: if not train:
return return
self.transform_audio(x) self.transform_feature(x)
def transform_feature(self, xs: np.ndarray): def transform_feature(self, xs: np.ndarray):
""" """

@ -1,21 +1,4 @@
[ [
{
"type": "shift",
"params": {
"min_shift_ms": -5,
"max_shift_ms": 5
},
"prob": 1.0
},
{
"type": "speed",
"params": {
"min_speed_rate": 0.9,
"max_speed_rate": 1.1,
"num_rates": 3
},
"prob": 0.0
},
{ {
"type": "specaug", "type": "specaug",
"params": { "params": {

Loading…
Cancel
Save