You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
PaddleSpeech/tests/unit/vector/test_augment.py

139 lines
5.0 KiB

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.io import BatchSampler
from paddle.io import DataLoader
from paddle.io import Dataset
def test_add_noise(tmpdir, device):
paddle.device.set_device(device)
from paddlespeech.vector.io.augment import AddNoise
test_waveform = paddle.sin(
paddle.arange(16000.0, dtype="float32")).unsqueeze(0)
test_noise = paddle.cos(
paddle.arange(16000.0, dtype="float32")).unsqueeze(0)
wav_lens = paddle.ones([1], dtype="float32")
# Edge cases
no_noise = AddNoise(mix_prob=0.0)
assert no_noise(test_waveform, wav_lens).allclose(test_waveform)
def test_speed_perturb(device):
paddle.device.set_device(device)
from paddlespeech.vector.io.augment import SpeedPerturb
test_waveform = paddle.sin(
paddle.arange(16000.0, dtype="float32")).unsqueeze(0)
# Edge cases
no_perturb = SpeedPerturb(16000, perturb_prob=0.0)
assert no_perturb(test_waveform).allclose(test_waveform)
no_perturb = SpeedPerturb(16000, speeds=[100])
assert no_perturb(test_waveform).allclose(test_waveform)
# # Half speed
half_speed = SpeedPerturb(16000, speeds=[50])
assert half_speed(test_waveform).allclose(test_waveform[:, ::2], atol=3e-1)
def test_babble(device):
paddle.device.set_device(device)
from paddlespeech.vector.io.augment import AddBabble
test_waveform = paddle.stack(
(paddle.sin(paddle.arange(16000.0, dtype="float32")),
paddle.cos(paddle.arange(16000.0, dtype="float32")), ))
lengths = paddle.ones([2])
# Edge cases
no_babble = AddBabble(mix_prob=0.0)
assert no_babble(test_waveform, lengths).allclose(test_waveform)
no_babble = AddBabble(speaker_count=1, snr_low=1000, snr_high=1000)
assert no_babble(test_waveform, lengths).allclose(test_waveform)
# One babbler just averages the two speakers
babble = AddBabble(speaker_count=1).to(device)
expected = (test_waveform + test_waveform.roll(1, 0)) / 2
assert babble(test_waveform, lengths).allclose(expected, atol=1e-4)
def test_drop_freq(device):
paddle.device.set_device(device)
from paddlespeech.vector.io.augment import DropFreq
test_waveform = paddle.sin(
paddle.arange(16000.0, dtype="float32")).unsqueeze(0)
# Edge cases
no_drop = DropFreq(drop_prob=0.0)
assert no_drop(test_waveform).allclose(test_waveform)
no_drop = DropFreq(drop_count_low=0, drop_count_high=0)
assert no_drop(test_waveform).allclose(test_waveform)
# Check case where frequency range *does not* include signal frequency
drop_diff_freq = DropFreq(drop_freq_low=0.5, drop_freq_high=0.9)
assert drop_diff_freq(test_waveform).allclose(test_waveform, atol=1e-1)
# Check case where frequency range *does* include signal frequency
drop_same_freq = DropFreq(drop_freq_low=0.28, drop_freq_high=0.28)
assert drop_same_freq(test_waveform).allclose(
paddle.zeros([1, 16000]), atol=4e-1)
def test_drop_chunk(device):
paddle.device.set_device(device)
from paddlespeech.vector.io.augment import DropChunk
test_waveform = paddle.sin(
paddle.arange(16000.0, dtype="float32")).unsqueeze(0)
lengths = paddle.ones([1])
# Edge cases
no_drop = DropChunk(drop_prob=0.0)
assert no_drop(test_waveform, lengths).allclose(test_waveform)
no_drop = DropChunk(drop_length_low=0, drop_length_high=0)
assert no_drop(test_waveform, lengths).allclose(test_waveform)
no_drop = DropChunk(drop_count_low=0, drop_count_high=0)
assert no_drop(test_waveform, lengths).allclose(test_waveform)
no_drop = DropChunk(drop_start=0, drop_end=0)
assert no_drop(test_waveform, lengths).allclose(test_waveform)
# Specify all parameters to ensure it is deterministic
dropper = DropChunk(
drop_length_low=100,
drop_length_high=100,
drop_count_low=1,
drop_count_high=1,
drop_start=100,
drop_end=200,
noise_factor=0.0, )
expected_waveform = test_waveform.clone()
expected_waveform[:, 100:200] = 0.0
assert dropper(test_waveform, lengths).allclose(expected_waveform)
# Make sure amplitude is similar before and after
dropper = DropChunk(noise_factor=1.0)
drop_amplitude = dropper(test_waveform, lengths).abs().mean()
orig_amplitude = test_waveform.abs().mean()
assert drop_amplitude.allclose(orig_amplitude, atol=1e-2)