* Fix

* Fix
pull/3985/head
co63oc 7 months ago committed by GitHub
parent 4e5181c949
commit f3a5df2049
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -183,7 +183,7 @@ def soundfile_save(y: np.ndarray, sr: int, file: os.PathLike) -> None:
Args: Args:
y (np.ndarray): Input waveform array in 1D or 2D. y (np.ndarray): Input waveform array in 1D or 2D.
sr (int): Sample rate. sr (int): Sample rate.
file (os.PathLike): Path of auido file to save. file (os.PathLike): Path of audio file to save.
""" """
if not file.endswith('.wav'): if not file.endswith('.wav'):
raise ParameterError( raise ParameterError(
@ -216,10 +216,10 @@ def soundfile_load(
duration: Optional[int]=None, duration: Optional[int]=None,
dtype: str='float32', dtype: str='float32',
resample_mode: str='kaiser_fast') -> Tuple[np.ndarray, int]: resample_mode: str='kaiser_fast') -> Tuple[np.ndarray, int]:
"""Load audio file from disk. This function loads audio from disk using using audio beackend. """Load audio file from disk. This function loads audio from disk using using audio backend.
Args: Args:
file (os.PathLike): Path of auido file to load. file (os.PathLike): Path of audio file to load.
sr (Optional[int], optional): Sample rate of loaded waveform. Defaults to None. sr (Optional[int], optional): Sample rate of loaded waveform. Defaults to None.
mono (bool, optional): Return waveform with mono channel. Defaults to True. mono (bool, optional): Return waveform with mono channel. Defaults to True.
merge_type (str, optional): Merge type of multi-channels waveform. Defaults to 'average'. merge_type (str, optional): Merge type of multi-channels waveform. Defaults to 'average'.
@ -250,14 +250,14 @@ def soundfile_load(
if normal: if normal:
y = normalize(y, norm_type, norm_mul_factor) y = normalize(y, norm_type, norm_mul_factor)
elif dtype in ['int8', 'int16']: elif dtype in ['int8', 'int16']:
# still need to do normalization, before depth convertion # still need to do normalization, before depth conversion
y = normalize(y, 'linear', 1.0) y = normalize(y, 'linear', 1.0)
y = depth_convert(y, dtype) y = depth_convert(y, dtype)
return y, r return y, r
#the code below token form: https://github.com/pytorch/audio/blob/main/torchaudio/backend/soundfile_backend.py with modificaion. #The code below is taken from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/soundfile_backend.py, with some modifications.
def _get_subtype_for_wav(dtype: paddle.dtype, def _get_subtype_for_wav(dtype: paddle.dtype,
@ -382,7 +382,7 @@ def save(
channels_first (bool, optional): If ``True``, the given tensor is interpreted as `[channel, time]`, channels_first (bool, optional): If ``True``, the given tensor is interpreted as `[channel, time]`,
otherwise `[time, channel]`. otherwise `[time, channel]`.
compression (float of None, optional): Not used. compression (float of None, optional): Not used.
It is here only for interface compatibility reson with "sox_io" backend. It is here only for interface compatibility reason with "sox_io" backend.
format (str or None, optional): Override the audio format. format (str or None, optional): Override the audio format.
When ``filepath`` argument is path-like object, audio format is When ``filepath`` argument is path-like object, audio format is
inferred from file extension. If the file extension is missing or inferred from file extension. If the file extension is missing or
@ -394,8 +394,8 @@ def save(
Valid values are ``"wav"``, ``"ogg"``, ``"vorbis"``, Valid values are ``"wav"``, ``"ogg"``, ``"vorbis"``,
``"flac"`` and ``"sph"``. ``"flac"`` and ``"sph"``.
encoding (str or None, optional): Changes the encoding for supported formats. encoding (str or None, optional): Changes the encoding for supported formats.
This argument is effective only for supported formats, sush as This argument is effective only for supported formats, such as
``"wav"``, ``""flac"`` and ``"sph"``. Valid values are; ``"wav"``, ``""flac"`` and ``"sph"``. Valid values are:
- ``"PCM_S"`` (signed integer Linear PCM) - ``"PCM_S"`` (signed integer Linear PCM)
- ``"PCM_U"`` (unsigned integer Linear PCM) - ``"PCM_U"`` (unsigned integer Linear PCM)

@ -626,7 +626,7 @@ def mu_decode(y: np.ndarray, mu: int=255, quantized: bool=True) -> np.ndarray:
def _randint(high: int) -> int: def _randint(high: int) -> int:
"""Generate one random integer in range [0 high) """Generate one random integer in range [0 high)
This is a helper function for random data augmentaiton This is a helper function for random data augmentation
""" """
return int(np.random.randint(0, high=high)) return int(np.random.randint(0, high=high))
@ -659,7 +659,7 @@ def depth_augment(y: np.ndarray,
def adaptive_spect_augment(spect: np.ndarray, def adaptive_spect_augment(spect: np.ndarray,
tempo_axis: int=0, tempo_axis: int=0,
level: float=0.1) -> np.ndarray: level: float=0.1) -> np.ndarray:
"""Do adpative spectrogram augmentation. The level of the augmentation is gowern by the paramter level, ranging from 0 to 1, with 0 represents no augmentation. """Do adaptive spectrogram augmentation. The level of the augmentation is govern by the parameter level, ranging from 0 to 1, with 0 represents no augmentation.
Args: Args:
spect (np.ndarray): Input spectrogram. spect (np.ndarray): Input spectrogram.
@ -711,9 +711,9 @@ def spect_augment(spect: np.ndarray,
spect (np.ndarray): Input spectrogram. spect (np.ndarray): Input spectrogram.
tempo_axis (int, optional): Indicate the tempo axis. Defaults to 0. tempo_axis (int, optional): Indicate the tempo axis. Defaults to 0.
max_time_mask (int, optional): Maximum number of time masking. Defaults to 3. max_time_mask (int, optional): Maximum number of time masking. Defaults to 3.
max_freq_mask (int, optional): Maximum number of frenquence masking. Defaults to 3. max_freq_mask (int, optional): Maximum number of frequency masking. Defaults to 3.
max_time_mask_width (int, optional): Maximum width of time masking. Defaults to 30. max_time_mask_width (int, optional): Maximum width of time masking. Defaults to 30.
max_freq_mask_width (int, optional): Maximum width of frenquence masking. Defaults to 20. max_freq_mask_width (int, optional): Maximum width of frequency masking. Defaults to 20.
Returns: Returns:
np.ndarray: The augmented spectrogram. np.ndarray: The augmented spectrogram.

@ -449,7 +449,7 @@ unsigned get_precision(const std::string filetype, py::dtype dtype) {
return SOX_UNSPEC; return SOX_UNSPEC;
if (filetype == "wav" || filetype == "amb") { if (filetype == "wav" || filetype == "amb") {
switch (dtype.num()) { switch (dtype.num()) {
case 1: // byte in numpy dype num case 1: // byte in numpy dtype num
return 8; return 8;
case 3: // short, in numpy dtype num case 3: // short, in numpy dtype num
return 16; return 16;

@ -58,7 +58,7 @@ class MockedSaveTest(unittest.TestCase):
encoding=encoding, encoding=encoding,
bits_per_sample=bits_per_sample, ) bits_per_sample=bits_per_sample, )
# on +Py3.8 call_args.kwargs is more descreptive # on +Py3.8 call_args.kwargs is more descriptive
args = mocked_write.call_args[1] args = mocked_write.call_args[1]
assert args["file"] == filepath assert args["file"] == filepath
assert args["samplerate"] == sample_rate assert args["samplerate"] == sample_rate

@ -58,7 +58,7 @@ def download(url, md5sum, target_dir, filename=None):
if not (os.path.exists(filepath) and md5file(filepath) == md5sum): if not (os.path.exists(filepath) and md5file(filepath) == md5sum):
print("Downloading %s ..." % url) print("Downloading %s ..." % url)
wget.download(url, target_dir) wget.download(url, target_dir)
print("\nMD5 Chesksum %s ..." % filepath) print("\nMD5 Checksum %s ..." % filepath)
if not md5file(filepath) == md5sum: if not md5file(filepath) == md5sum:
raise RuntimeError("MD5 checksum failed.") raise RuntimeError("MD5 checksum failed.")
else: else:

@ -108,7 +108,7 @@ def create_manifest(data_dir, manifest_path):
def prepare_dataset(url, md5sum, target_dir, manifest_path): def prepare_dataset(url, md5sum, target_dir, manifest_path):
"""Download, unpack and create summmary manifest file. """Download, unpack and create summary manifest file.
""" """
if not os.path.exists(os.path.join(target_dir, "LibriSpeech")): if not os.path.exists(os.path.join(target_dir, "LibriSpeech")):
# download # download

Loading…
Cancel
Save