From 7b3a901b0826bcacc1920930f301077817b100a8 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 5 Nov 2021 10:31:35 +0000 Subject: [PATCH] more conf with preprocess.yaml --- examples/aishell/s1/conf/chunk_conformer.yaml | 4 +-- examples/aishell/s1/conf/conformer.yaml | 4 +-- examples/aishell/s1/conf/preprocess.yaml | 29 +++++++++++++++++++ .../callcenter/s1/conf/chunk_conformer.yaml | 4 +-- examples/callcenter/s1/conf/conformer.yaml | 4 +-- examples/callcenter/s1/conf/preprocess.yaml | 29 +++++++++++++++++++ examples/timit/s1/conf/preprocess.yaml | 29 +++++++++++++++++++ examples/timit/s1/conf/transformer.yaml | 4 +-- 8 files changed, 97 insertions(+), 10 deletions(-) create mode 100644 examples/aishell/s1/conf/preprocess.yaml create mode 100644 examples/callcenter/s1/conf/preprocess.yaml create mode 100644 examples/timit/s1/conf/preprocess.yaml diff --git a/examples/aishell/s1/conf/chunk_conformer.yaml b/examples/aishell/s1/conf/chunk_conformer.yaml index 8682538b..336a6c46 100644 --- a/examples/aishell/s1/conf/chunk_conformer.yaml +++ b/examples/aishell/s1/conf/chunk_conformer.yaml @@ -15,7 +15,7 @@ collator: vocab_filepath: data/vocab.txt unit_type: 'char' spm_model_prefix: '' - augmentation_config: conf/augmentation.json + augmentation_config: conf/preprocess.yaml batch_size: 32 raw_wav: True # use raw_wav or kaldi feature spectrum_type: fbank #linear, mfcc, fbank @@ -38,7 +38,7 @@ collator: # network architecture model: - cmvn_file: "data/mean_std.json" + cmvn_file: cmvn_file_type: "json" # encoder related encoder: conformer diff --git a/examples/aishell/s1/conf/conformer.yaml b/examples/aishell/s1/conf/conformer.yaml index 71cd044e..0e9d79d8 100644 --- a/examples/aishell/s1/conf/conformer.yaml +++ b/examples/aishell/s1/conf/conformer.yaml @@ -15,7 +15,7 @@ collator: vocab_filepath: data/vocab.txt unit_type: 'char' spm_model_prefix: '' - augmentation_config: conf/augmentation.json + augmentation_config: conf/preprocess.yaml batch_size: 64 raw_wav: True # use raw_wav or kaldi feature spectrum_type: fbank #linear, mfcc, fbank @@ -37,7 +37,7 @@ collator: # network architecture model: - cmvn_file: "data/mean_std.json" + cmvn_file: cmvn_file_type: "json" # encoder related encoder: conformer diff --git a/examples/aishell/s1/conf/preprocess.yaml b/examples/aishell/s1/conf/preprocess.yaml new file mode 100644 index 00000000..dd4cfd27 --- /dev/null +++ b/examples/aishell/s1/conf/preprocess.yaml @@ -0,0 +1,29 @@ +process: + # extract kaldi fbank from PCM + - type: fbank_kaldi + fs: 16000 + n_mels: 80 + n_shift: 160 + win_length: 400 + dither: true + - type: cmvn_json + cmvn_path: data/mean_std.json + # these three processes are a.k.a. SpecAugument + - type: time_warp + max_time_warp: 5 + inplace: true + mode: PIL + - type: freq_mask + F: 30 + n_mask: 2 + inplace: true + replace_with_zero: false + - type: time_mask + T: 40 + n_mask: 2 + inplace: true + replace_with_zero: false + + + + diff --git a/examples/callcenter/s1/conf/chunk_conformer.yaml b/examples/callcenter/s1/conf/chunk_conformer.yaml index a853658a..b18b46fe 100644 --- a/examples/callcenter/s1/conf/chunk_conformer.yaml +++ b/examples/callcenter/s1/conf/chunk_conformer.yaml @@ -15,7 +15,7 @@ collator: vocab_filepath: data/vocab.txt unit_type: 'char' spm_model_prefix: '' - augmentation_config: conf/augmentation.json + augmentation_config: conf/preprocess.yaml batch_size: 32 raw_wav: True # use raw_wav or kaldi feature spectrum_type: fbank #linear, mfcc, fbank @@ -38,7 +38,7 @@ collator: # network architecture model: - cmvn_file: "data/mean_std.json" + cmvn_file: cmvn_file_type: "json" # encoder related encoder: conformer diff --git a/examples/callcenter/s1/conf/conformer.yaml b/examples/callcenter/s1/conf/conformer.yaml index bd4f4578..47c438a6 100644 --- a/examples/callcenter/s1/conf/conformer.yaml +++ b/examples/callcenter/s1/conf/conformer.yaml @@ -15,7 +15,7 @@ collator: vocab_filepath: data/vocab.txt unit_type: 'char' spm_model_prefix: '' - augmentation_config: conf/augmentation.json + augmentation_config: conf/preprocess.yaml batch_size: 32 raw_wav: True # use raw_wav or kaldi feature spectrum_type: fbank #linear, mfcc, fbank @@ -37,7 +37,7 @@ collator: # network architecture model: - cmvn_file: "data/mean_std.json" + cmvn_file: cmvn_file_type: "json" # encoder related encoder: conformer diff --git a/examples/callcenter/s1/conf/preprocess.yaml b/examples/callcenter/s1/conf/preprocess.yaml new file mode 100644 index 00000000..dd4cfd27 --- /dev/null +++ b/examples/callcenter/s1/conf/preprocess.yaml @@ -0,0 +1,29 @@ +process: + # extract kaldi fbank from PCM + - type: fbank_kaldi + fs: 16000 + n_mels: 80 + n_shift: 160 + win_length: 400 + dither: true + - type: cmvn_json + cmvn_path: data/mean_std.json + # these three processes are a.k.a. SpecAugument + - type: time_warp + max_time_warp: 5 + inplace: true + mode: PIL + - type: freq_mask + F: 30 + n_mask: 2 + inplace: true + replace_with_zero: false + - type: time_mask + T: 40 + n_mask: 2 + inplace: true + replace_with_zero: false + + + + diff --git a/examples/timit/s1/conf/preprocess.yaml b/examples/timit/s1/conf/preprocess.yaml new file mode 100644 index 00000000..dd4cfd27 --- /dev/null +++ b/examples/timit/s1/conf/preprocess.yaml @@ -0,0 +1,29 @@ +process: + # extract kaldi fbank from PCM + - type: fbank_kaldi + fs: 16000 + n_mels: 80 + n_shift: 160 + win_length: 400 + dither: true + - type: cmvn_json + cmvn_path: data/mean_std.json + # these three processes are a.k.a. SpecAugument + - type: time_warp + max_time_warp: 5 + inplace: true + mode: PIL + - type: freq_mask + F: 30 + n_mask: 2 + inplace: true + replace_with_zero: false + - type: time_mask + T: 40 + n_mask: 2 + inplace: true + replace_with_zero: false + + + + diff --git a/examples/timit/s1/conf/transformer.yaml b/examples/timit/s1/conf/transformer.yaml index d3ced898..1d18468b 100644 --- a/examples/timit/s1/conf/transformer.yaml +++ b/examples/timit/s1/conf/transformer.yaml @@ -14,7 +14,7 @@ collator: vocab_filepath: data/vocab.txt unit_type: "word" mean_std_filepath: "" - augmentation_config: "" + augmentation_config: conf/preprocess.yaml batch_size: 64 raw_wav: True # use raw_wav or kaldi feature spectrum_type: fbank #linear, mfcc, fbank @@ -37,7 +37,7 @@ collator: # network architecture model: - cmvn_file: "data/mean_std.json" + cmvn_file: cmvn_file_type: "json" # encoder related encoder: transformer