remove debug info and format code

pull/1012/head
Hui Zhang 3 years ago
parent 8b0e344c69
commit 9a71c091c5

@ -10,16 +10,16 @@ process:
cmvn_path: data/mean_std.json cmvn_path: data/mean_std.json
# these three processes are a.k.a. SpecAugument # these three processes are a.k.a. SpecAugument
- type: time_warp - type: time_warp
max_time_warp: 0 max_time_warp: 5
inplace: true inplace: true
mode: PIL mode: PIL
- type: freq_mask - type: freq_mask
F: 10 F: 30
n_mask: 2 n_mask: 2
inplace: true inplace: true
replace_with_zero: true replace_with_zero: false
- type: time_mask - type: time_mask
T: 50 T: 40
n_mask: 2 n_mask: 2
inplace: true inplace: true
replace_with_zero: true replace_with_zero: false

@ -24,9 +24,9 @@ import soundfile
import soxbindings as sox import soxbindings as sox
from scipy import signal from scipy import signal
from .utility import subfile_from_tar
from .utility import convert_samples_to_float32
from .utility import convert_samples_from_float32 from .utility import convert_samples_from_float32
from .utility import convert_samples_to_float32
from .utility import subfile_from_tar
class AudioSegment(): class AudioSegment():

@ -390,4 +390,3 @@ def convert_samples_from_float32(samples, dtype):
else: else:
raise TypeError("Unsupported sample type: %s." % samples.dtype) raise TypeError("Unsupported sample type: %s." % samples.dtype)
return output_samples.astype(dtype) return output_samples.astype(dtype)

@ -34,6 +34,9 @@ def time_warp(x, max_time_warp=80, inplace=False, mode="PIL"):
:returns numpy.ndarray: time warped spectrogram (time, freq) :returns numpy.ndarray: time warped spectrogram (time, freq)
""" """
window = max_time_warp window = max_time_warp
if window == 0:
return x
if mode == "PIL": if mode == "PIL":
t = x.shape[0] t = x.shape[0]
if t - window <= window: if t - window <= window:

@ -307,9 +307,6 @@ class IStft():
center=self.center, ) center=self.center, )
from paddlespeech.s2t.utils.log import Log
logger = Log(__name__).getlog()
class LogMelSpectrogramKaldi(): class LogMelSpectrogramKaldi():
def __init__( def __init__(
self, self,
@ -347,22 +344,22 @@ class LogMelSpectrogramKaldi():
self.dither = dither self.dither = dither
def __repr__(self): def __repr__(self):
return ("{name}(fs={fs}, n_mels={n_mels}, n_fft={n_fft}, " return (
"n_shift={n_shift}, win_length={win_length}, window={window}, " "{name}(fs={fs}, n_mels={n_mels}, n_fft={n_fft}, "
"fmin={fmin}, fmax={fmax}, eps={eps}, preemph={preemph}, window={window}, dither={dither}))".format( "n_shift={n_shift}, win_length={win_length}, preemph={preemph}, window={window}, "
name=self.__class__.__name__, "fmin={fmin}, fmax={fmax}, eps={eps}, dither={dither}))".format(
fs=self.fs, name=self.__class__.__name__,
n_mels=self.n_mels, fs=self.fs,
n_fft=self.n_fft, n_mels=self.n_mels,
n_shift=self.n_shift, n_fft=self.n_fft,
win_length=self.win_length, n_shift=self.n_shift,
window=self.window, preemph=self.preemph,
fmin=self.fmin, win_length=self.win_length,
fmax=self.fmax, window=self.window,
eps=self.eps, fmin=self.fmin,
preemph=self.preemph, fmax=self.fmax,
window=self.window, eps=self.eps,
dither=self.dither)) dither=self.dither, ))
def __call__(self, x): def __call__(self, x):
""" """
@ -379,12 +376,10 @@ class LogMelSpectrogramKaldi():
if x.ndim != 1: if x.ndim != 1:
raise ValueError("Not support x: [Time, Channel]") raise ValueError("Not support x: [Time, Channel]")
logger.info(f"in {x}")
if x.dtype in np.sctypes['float']: if x.dtype in np.sctypes['float']:
# PCM32 -> PCM16 # PCM32 -> PCM16
bits = np.iinfo(np.int16).bits bits = np.iinfo(np.int16).bits
x = x * 2**(bits - 1) x = x * 2**(bits - 1)
logger.info(f"b {x}")
# logfbank need PCM16 input # logfbank need PCM16 input
y = logfbank( y = logfbank(
@ -400,7 +395,4 @@ class LogMelSpectrogramKaldi():
remove_dc_offset=self.remove_dc_offset, remove_dc_offset=self.remove_dc_offset,
preemph=self.preemph, preemph=self.preemph,
wintype=self.window) wintype=self.window)
logger.info(f"a {y}")
return y return y

Loading…
Cancel
Save