From 1b787411d332bc0eec265c354562576b33112e91 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Tue, 8 Aug 2017 21:53:26 +0800 Subject: [PATCH] Update noise and impulse augmentor according to code review. --- conf/augmentation.config | 8 ++++++++ .../augmentation.config.example | 18 +++++++++++++++--- data_utils/augmentor/augmentation.py | 2 +- data_utils/augmentor/impulse_response.py | 15 ++++++++------- data_utils/augmentor/noise_perturb.py | 11 ++++++----- datasets/run_all.sh | 9 --------- datasets/run_noise.sh | 10 ++++++++++ train.py | 2 +- 8 files changed, 49 insertions(+), 26 deletions(-) create mode 100644 conf/augmentation.config rename augmentation.config => conf/augmentation.config.example (56%) create mode 100644 datasets/run_noise.sh diff --git a/conf/augmentation.config b/conf/augmentation.config new file mode 100644 index 00000000..6c24da54 --- /dev/null +++ b/conf/augmentation.config @@ -0,0 +1,8 @@ +[ + { + "type": "shift", + "params": {"min_shift_ms": -5, + "max_shift_ms": 5}, + "prob": 1.0 + } +] diff --git a/augmentation.config b/conf/augmentation.config.example similarity index 56% rename from augmentation.config rename to conf/augmentation.config.example index 263af586..21ed6ee1 100644 --- a/augmentation.config +++ b/conf/augmentation.config.example @@ -3,14 +3,19 @@ "type": "noise", "params": {"min_snr_dB": 40, "max_snr_dB": 50, - "noise_manifest": "datasets/manifest.noise"}, - "prob": 0.0 + "noise_manifest_path": "datasets/manifest.noise"}, + "prob": 0.6 + }, + { + "type": "impulse", + "params": {"impulse_manifest_path": "datasets/manifest.impulse"}, + "prob": 0.5 }, { "type": "speed", "params": {"min_speed_rate": 0.95, "max_speed_rate": 1.05}, - "prob": 0.0 + "prob": 0.5 }, { "type": "shift", @@ -24,4 +29,11 @@ "max_gain_dBFS": 10}, "prob": 0.0 }, + { + "type": "bayesian_normal", + "params": {"target_db": -20, + "prior_db": -20, + "prior_samples": 100}, + "prob": 0.0 + } ] diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py index c9e36031..5c30b627 100644 --- a/data_utils/augmentor/augmentation.py +++ b/data_utils/augmentor/augmentation.py @@ -30,7 +30,7 @@ class AugmentationPipeline(object): "type": "noise", "params": {"min_snr_dB": 10, "max_snr_dB": 20, - "noise_manifest": "datasets/manifest.noise"}, + "noise_manifest_path": "datasets/manifest.noise"}, "prob": 0.0 }, { diff --git a/data_utils/augmentor/impulse_response.py b/data_utils/augmentor/impulse_response.py index d868c3a1..c3de0fdb 100644 --- a/data_utils/augmentor/impulse_response.py +++ b/data_utils/augmentor/impulse_response.py @@ -13,13 +13,14 @@ class ImpulseResponseAugmentor(AugmentorBase): :param rng: Random generator object. :type rng: random.Random - :param impulse_manifest: Manifest path for impulse audio data. - :type impulse_manifest: basestring + :param impulse_manifest_path: Manifest path for impulse audio data. + :type impulse_manifest_path: basestring """ - def __init__(self, rng, impulse_manifest): + def __init__(self, rng, impulse_manifest_path): self._rng = rng - self._manifest = utils.read_manifest(manifest_path=impulse_manifest) + self._impulse_manifest = utils.read_manifest( + manifest_path=impulse_manifest_path) def transform_audio(self, audio_segment): """Add impulse response effect. @@ -29,6 +30,6 @@ class ImpulseResponseAugmentor(AugmentorBase): :param audio_segment: Audio segment to add effects to. :type audio_segment: AudioSegmenet|SpeechSegment """ - noise_json = self._rng.sample(self._manifest, 1)[0] - noise_segment = AudioSegment.from_file(noise_json['audio_filepath']) - audio_segment.convolve(noise_segment, allow_resample=True) + impulse_json = self._rng.sample(self._impulse_manifest, 1)[0] + impulse_segment = AudioSegment.from_file(impulse_json['audio_filepath']) + audio_segment.convolve(impulse_segment, allow_resample=True) diff --git a/data_utils/augmentor/noise_perturb.py b/data_utils/augmentor/noise_perturb.py index b4fa18e1..281174af 100644 --- a/data_utils/augmentor/noise_perturb.py +++ b/data_utils/augmentor/noise_perturb.py @@ -17,15 +17,16 @@ class NoisePerturbAugmentor(AugmentorBase): :type min_snr_dB: float :param max_snr_dB: Maximal signal noise ratio, in decibels. :type max_snr_dB: float - :param noise_manifest: Manifest path for noise audio data. - :type noise_manifest: basestring + :param noise_manifest_path: Manifest path for noise audio data. + :type noise_manifest_path: basestring """ - def __init__(self, rng, min_snr_dB, max_snr_dB, noise_manifest): + def __init__(self, rng, min_snr_dB, max_snr_dB, noise_manifest_path): self._min_snr_dB = min_snr_dB self._max_snr_dB = max_snr_dB self._rng = rng - self._manifest = utils.read_manifest(manifest_path=noise_manifest) + self._noise_manifest = utils.read_manifest( + manifest_path=noise_manifest_path) def transform_audio(self, audio_segment): """Add background noise audio. @@ -35,7 +36,7 @@ class NoisePerturbAugmentor(AugmentorBase): :param audio_segment: Audio segment to add effects to. :type audio_segment: AudioSegmenet|SpeechSegment """ - noise_json = self._rng.sample(self._manifest, 1)[0] + noise_json = self._rng.sample(self._noise_manifest, 1)[0] if noise_json['duration'] < audio_segment.duration: raise RuntimeError("The duration of sampled noise audio is smaller " "than the audio segment to add effects to.") diff --git a/datasets/run_all.sh b/datasets/run_all.sh index 61747a50..ef2b721f 100644 --- a/datasets/run_all.sh +++ b/datasets/run_all.sh @@ -6,17 +6,8 @@ if [ $? -ne 0 ]; then fi cd - -cd noise -python chime3_background.py -if [ $? -ne 0 ]; then - echo "Prepare CHiME3 background noise failed. Terminated." - exit 1 -fi -cd - - cat librispeech/manifest.train* | shuf > manifest.train cat librispeech/manifest.dev-clean > manifest.dev cat librispeech/manifest.test-clean > manifest.test -cat noise/manifest.* > manifest.noise echo "All done." diff --git a/datasets/run_noise.sh b/datasets/run_noise.sh new file mode 100644 index 00000000..7b27abde --- /dev/null +++ b/datasets/run_noise.sh @@ -0,0 +1,10 @@ +cd noise +python chime3_background.py +if [ $? -ne 0 ]; then + echo "Prepare CHiME3 background noise failed. Terminated." + exit 1 +fi +cd - + +cat noise/manifest.* > manifest.noise +echo "All done." diff --git a/train.py b/train.py index 34c40601..0d4e2508 100644 --- a/train.py +++ b/train.py @@ -123,7 +123,7 @@ parser.add_argument( help="Directory for saving models. (default: %(default)s)") parser.add_argument( "--augmentation_config", - default=open('augmentation.config', 'r').read(), + default=open('conf/augmentation.config', 'r').read(), type=str, help="Augmentation configuration in json-format. " "(default: %(default)s)")