diff --git a/paddlespeech/vector/io/augment.py b/paddlespeech/vector/io/augment.py index d6bbc8a9..af7aeb22 100644 --- a/paddlespeech/vector/io/augment.py +++ b/paddlespeech/vector/io/augment.py @@ -178,7 +178,8 @@ class DropChunk(nn.Layer): # Update waveform if not self.noise_factor: for j in range(drop_times[i]): - dropped_waveform[i, start[j]:end[j]] = 0.0 + if start[j] < end[j]: + dropped_waveform[i, start[j]:end[j]] = 0.0 else: # Uniform distribution of -2 to +2 * avg amplitude should # preserve the average for normalization diff --git a/tests/unit/vector/conftest.py b/tests/unit/vector/conftest.py new file mode 100644 index 00000000..7cac519b --- /dev/null +++ b/tests/unit/vector/conftest.py @@ -0,0 +1,11 @@ +def pytest_addoption(parser): + parser.addoption("--device", action="store", default="cpu") + + +def pytest_generate_tests(metafunc): + # This is called for every test. Only get/set command line arguments + # if the argument is specified in the list of test "fixturenames". + option_value = metafunc.config.option.device + if "device" in metafunc.fixturenames and option_value is not None: + metafunc.parametrize("device", [option_value]) + diff --git a/tests/unit/vector/test_augment.py b/tests/unit/vector/test_augment.py new file mode 100644 index 00000000..21d75bb3 --- /dev/null +++ b/tests/unit/vector/test_augment.py @@ -0,0 +1,138 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.io import BatchSampler +from paddle.io import DataLoader +from paddle.io import Dataset + + +def test_add_noise(tmpdir, device): + paddle.device.set_device(device) + from paddlespeech.vector.io.augment import AddNoise + + test_waveform = paddle.sin( + paddle.arange(16000.0, dtype="float32")).unsqueeze(0) + test_noise = paddle.cos( + paddle.arange(16000.0, dtype="float32")).unsqueeze(0) + wav_lens = paddle.ones([1], dtype="float32") + + # Edge cases + no_noise = AddNoise(mix_prob=0.0) + assert no_noise(test_waveform, wav_lens).allclose(test_waveform) + + +def test_speed_perturb(device): + paddle.device.set_device(device) + from paddlespeech.vector.io.augment import SpeedPerturb + + test_waveform = paddle.sin( + paddle.arange(16000.0, dtype="float32")).unsqueeze(0) + + # Edge cases + no_perturb = SpeedPerturb(16000, perturb_prob=0.0) + assert no_perturb(test_waveform).allclose(test_waveform) + no_perturb = SpeedPerturb(16000, speeds=[100]) + assert no_perturb(test_waveform).allclose(test_waveform) + + # # Half speed + half_speed = SpeedPerturb(16000, speeds=[50]) + assert half_speed(test_waveform).allclose(test_waveform[:, ::2], atol=3e-1) + + +def test_babble(device): + paddle.device.set_device(device) + from paddlespeech.vector.io.augment import AddBabble + + test_waveform = paddle.stack( + (paddle.sin(paddle.arange(16000.0, dtype="float32")), + paddle.cos(paddle.arange(16000.0, dtype="float32")), )) + lengths = paddle.ones([2]) + + # Edge cases + no_babble = AddBabble(mix_prob=0.0) + assert no_babble(test_waveform, lengths).allclose(test_waveform) + no_babble = AddBabble(speaker_count=1, snr_low=1000, snr_high=1000) + assert no_babble(test_waveform, lengths).allclose(test_waveform) + + # One babbler just averages the two speakers + babble = AddBabble(speaker_count=1).to(device) + expected = (test_waveform + test_waveform.roll(1, 0)) / 2 + assert babble(test_waveform, lengths).allclose(expected, atol=1e-4) + + +def test_drop_freq(device): + paddle.device.set_device(device) + from paddlespeech.vector.io.augment import DropFreq + + test_waveform = paddle.sin( + paddle.arange(16000.0, dtype="float32")).unsqueeze(0) + + # Edge cases + no_drop = DropFreq(drop_prob=0.0) + assert no_drop(test_waveform).allclose(test_waveform) + no_drop = DropFreq(drop_count_low=0, drop_count_high=0) + assert no_drop(test_waveform).allclose(test_waveform) + + # Check case where frequency range *does not* include signal frequency + drop_diff_freq = DropFreq(drop_freq_low=0.5, drop_freq_high=0.9) + assert drop_diff_freq(test_waveform).allclose(test_waveform, atol=1e-1) + + # Check case where frequency range *does* include signal frequency + drop_same_freq = DropFreq(drop_freq_low=0.28, drop_freq_high=0.28) + assert drop_same_freq(test_waveform).allclose( + paddle.zeros([1, 16000]), atol=4e-1) + + +def test_drop_chunk(device): + paddle.device.set_device(device) + from paddlespeech.vector.io.augment import DropChunk + + test_waveform = paddle.sin( + paddle.arange(16000.0, dtype="float32")).unsqueeze(0) + lengths = paddle.ones([1]) + + # Edge cases + no_drop = DropChunk(drop_prob=0.0) + assert no_drop(test_waveform, lengths).allclose(test_waveform) + no_drop = DropChunk(drop_length_low=0, drop_length_high=0) + assert no_drop(test_waveform, lengths).allclose(test_waveform) + no_drop = DropChunk(drop_count_low=0, drop_count_high=0) + assert no_drop(test_waveform, lengths).allclose(test_waveform) + no_drop = DropChunk(drop_start=0, drop_end=0) + assert no_drop(test_waveform, lengths).allclose(test_waveform) + + # Specify all parameters to ensure it is deterministic + dropper = DropChunk( + drop_length_low=100, + drop_length_high=100, + drop_count_low=1, + drop_count_high=1, + drop_start=100, + drop_end=200, + noise_factor=0.0, ) + expected_waveform = test_waveform.clone() + expected_waveform[:, 100:200] = 0.0 + + assert dropper(test_waveform, lengths).allclose(expected_waveform) + + # Make sure amplitude is similar before and after + dropper = DropChunk(noise_factor=1.0) + drop_amplitude = dropper(test_waveform, lengths).abs().mean() + orig_amplitude = test_waveform.abs().mean() + assert drop_amplitude.allclose(orig_amplitude, atol=1e-2)