remove debug info and format code

pull/1012/head
Hui Zhang 3 years ago
parent 8b0e344c69
commit 9a71c091c5

@ -10,16 +10,16 @@ process:
cmvn_path: data/mean_std.json
# these three processes are a.k.a. SpecAugument
- type: time_warp
max_time_warp: 0
max_time_warp: 5
inplace: true
mode: PIL
- type: freq_mask
F: 10
F: 30
n_mask: 2
inplace: true
replace_with_zero: true
replace_with_zero: false
- type: time_mask
T: 50
T: 40
n_mask: 2
inplace: true
replace_with_zero: true
replace_with_zero: false

@ -24,9 +24,9 @@ import soundfile
import soxbindings as sox
from scipy import signal
from .utility import subfile_from_tar
from .utility import convert_samples_to_float32
from .utility import convert_samples_from_float32
from .utility import convert_samples_to_float32
from .utility import subfile_from_tar
class AudioSegment():

@ -390,4 +390,3 @@ def convert_samples_from_float32(samples, dtype):
else:
raise TypeError("Unsupported sample type: %s." % samples.dtype)
return output_samples.astype(dtype)

@ -34,6 +34,9 @@ def time_warp(x, max_time_warp=80, inplace=False, mode="PIL"):
:returns numpy.ndarray: time warped spectrogram (time, freq)
"""
window = max_time_warp
if window == 0:
return x
if mode == "PIL":
t = x.shape[0]
if t - window <= window:

@ -307,9 +307,6 @@ class IStft():
center=self.center, )
from paddlespeech.s2t.utils.log import Log
logger = Log(__name__).getlog()
class LogMelSpectrogramKaldi():
def __init__(
self,
@ -347,22 +344,22 @@ class LogMelSpectrogramKaldi():
self.dither = dither
def __repr__(self):
return ("{name}(fs={fs}, n_mels={n_mels}, n_fft={n_fft}, "
"n_shift={n_shift}, win_length={win_length}, window={window}, "
"fmin={fmin}, fmax={fmax}, eps={eps}, preemph={preemph}, window={window}, dither={dither}))".format(
return (
"{name}(fs={fs}, n_mels={n_mels}, n_fft={n_fft}, "
"n_shift={n_shift}, win_length={win_length}, preemph={preemph}, window={window}, "
"fmin={fmin}, fmax={fmax}, eps={eps}, dither={dither}))".format(
name=self.__class__.__name__,
fs=self.fs,
n_mels=self.n_mels,
n_fft=self.n_fft,
n_shift=self.n_shift,
preemph=self.preemph,
win_length=self.win_length,
window=self.window,
fmin=self.fmin,
fmax=self.fmax,
eps=self.eps,
preemph=self.preemph,
window=self.window,
dither=self.dither))
dither=self.dither, ))
def __call__(self, x):
"""
@ -379,12 +376,10 @@ class LogMelSpectrogramKaldi():
if x.ndim != 1:
raise ValueError("Not support x: [Time, Channel]")
logger.info(f"in {x}")
if x.dtype in np.sctypes['float']:
# PCM32 -> PCM16
bits = np.iinfo(np.int16).bits
x = x * 2**(bits - 1)
logger.info(f"b {x}")
# logfbank need PCM16 input
y = logfbank(
@ -400,7 +395,4 @@ class LogMelSpectrogramKaldi():
remove_dc_offset=self.remove_dc_offset,
preemph=self.preemph,
wintype=self.window)
logger.info(f"a {y}")
return y

Loading…
Cancel
Save