diff --git a/speechx/speechx/decoder/ctc_prefix_beam_search_decoder_main.cc b/speechx/speechx/decoder/ctc_prefix_beam_search_decoder_main.cc index 305449cd..f452636f 100644 --- a/speechx/speechx/decoder/ctc_prefix_beam_search_decoder_main.cc +++ b/speechx/speechx/decoder/ctc_prefix_beam_search_decoder_main.cc @@ -30,10 +30,10 @@ DEFINE_string(dict_file, "vocab.txt", "vocabulary of lm"); DEFINE_string(lm_path, "", "language model"); DEFINE_int32(receptive_field_length, 7, - "receptive field of two CNN(kernel=5) downsampling module."); + "receptive field of two CNN(kernel=3) downsampling module."); DEFINE_int32(downsampling_rate, 4, - "two CNN(kernel=5) module downsampling rate."); + "two CNN(kernel=3) module downsampling rate."); DEFINE_string( model_input_names, "audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_box", diff --git a/speechx/speechx/decoder/param.h b/speechx/speechx/decoder/param.h index 780a1d6e..4c5abfad 100644 --- a/speechx/speechx/decoder/param.h +++ b/speechx/speechx/decoder/param.h @@ -28,10 +28,10 @@ DEFINE_string(cmvn_file, "", "read cmvn"); // feature sliding window DEFINE_int32(receptive_field_length, 7, - "receptive field of two CNN(kernel=5) downsampling module."); + "receptive field of two CNN(kernel=3) downsampling module."); DEFINE_int32(downsampling_rate, 4, - "two CNN(kernel=5) module downsampling rate."); + "two CNN(kernel=3) module downsampling rate."); DEFINE_int32(nnet_decoder_chunk, 1, "paddle nnet forward chunk"); // nnet DEFINE_string(model_path, "avg_1.jit.pdmodel", "paddle nnet model"); diff --git a/speechx/speechx/decoder/tlg_decoder_main.cc b/speechx/speechx/decoder/tlg_decoder_main.cc index 9cf10a18..010acccf 100644 --- a/speechx/speechx/decoder/tlg_decoder_main.cc +++ b/speechx/speechx/decoder/tlg_decoder_main.cc @@ -33,10 +33,10 @@ DEFINE_int32(max_active, 7500, "decoder graph"); DEFINE_int32(nnet_decoder_chunk, 1, "paddle nnet forward chunk"); DEFINE_int32(receptive_field_length, 7, - "receptive field of two CNN(kernel=5) downsampling module."); + "receptive field of two CNN(kernel=3) downsampling module."); DEFINE_int32(downsampling_rate, 4, - "two CNN(kernel=5) module downsampling rate."); + "two CNN(kernel=3) module downsampling rate."); DEFINE_string( model_input_names, "audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_box", diff --git a/speechx/speechx/nnet/nnet_forward_main.cc b/speechx/speechx/nnet/nnet_forward_main.cc index 170b74a5..0c5a55a7 100644 --- a/speechx/speechx/nnet/nnet_forward_main.cc +++ b/speechx/speechx/nnet/nnet_forward_main.cc @@ -27,10 +27,10 @@ DEFINE_string(param_path, "avg_1.jit.pdiparams", "paddle nnet model param"); DEFINE_int32(nnet_decoder_chunk, 1, "paddle nnet forward chunk"); DEFINE_int32(receptive_field_length, 7, - "receptive field of two CNN(kernel=5) downsampling module."); + "receptive field of two CNN(kernel=3) downsampling module."); DEFINE_int32(downsampling_rate, 4, - "two CNN(kernel=5) module downsampling rate."); + "two CNN(kernel=3) module downsampling rate."); DEFINE_string( model_input_names, "audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_box", @@ -159,4 +159,4 @@ int main(int argc, char* argv[]) { KALDI_LOG << "Done " << num_done << " utterances, " << num_err << " with errors."; return (num_done != 0 ? 0 : 1); -} \ No newline at end of file +}