add fill zero opt for frontend

pull/2524/head
Hui Zhang 2 years ago
parent 83f885c6cc
commit 28dafea0e0

@ -20,6 +20,7 @@
// feature // feature
DEFINE_bool(use_fbank, false, "False for fbank; or linear feature"); DEFINE_bool(use_fbank, false, "False for fbank; or linear feature");
DEFINE_bool(fill_zero, false, "fill zero at last chunk, when chunk < chunk_size");
// DEFINE_bool(to_float32, true, "audio convert to pcm32. True for linear // DEFINE_bool(to_float32, true, "audio convert to pcm32. True for linear
// feature, or fbank"); // feature, or fbank");
DEFINE_int32(num_bins, 161, "num bins of mel"); DEFINE_int32(num_bins, 161, "num bins of mel");

@ -47,17 +47,16 @@ bool Assembler::Read(kaldi::Vector<kaldi::BaseFloat>* feats) {
// read frame by frame from base_feature_extractor_ into cache_ // read frame by frame from base_feature_extractor_ into cache_
bool Assembler::Compute(Vector<BaseFloat>* feats) { bool Assembler::Compute(Vector<BaseFloat>* feats) {
// compute and feed frame by frame // compute and feed frame by frame
bool result = false;
while (feature_cache_.size() < frame_chunk_size_) { while (feature_cache_.size() < frame_chunk_size_) {
Vector<BaseFloat> feature; Vector<BaseFloat> feature;
result = base_extractor_->Read(&feature); bool result = base_extractor_->Read(&feature);
if (result == false || feature.Dim() == 0) { if (result == false || feature.Dim() == 0) {
VLOG(1) << "result: " << result << "feature dim: " << feature.Dim(); VLOG(1) << "result: " << result << " feature dim: " << feature.Dim();
if (IsFinished() == false) { if (IsFinished() == false) {
LOG(INFO) << "finished reading feature. cache size: " << feature_cache_.size(); VLOG(1) << "finished reading feature. cache size: " << feature_cache_.size();
return false; return false;
} else { } else {
LOG(INFO) << "break"; VLOG(1) << "break";
break; break;
} }
} }
@ -103,7 +102,7 @@ bool Assembler::Compute(Vector<BaseFloat>* feats) {
counter++; counter++;
} }
CHECK(feature_cache_.size() == cache_size_ ); CHECK(feature_cache_.size() == cache_size_);
return true; return true;
} }

@ -27,6 +27,7 @@
// feature // feature
DECLARE_bool(use_fbank); DECLARE_bool(use_fbank);
DECLARE_bool(fill_zero);
DECLARE_int32(num_bins); DECLARE_int32(num_bins);
DECLARE_string(cmvn_file); DECLARE_string(cmvn_file);
@ -80,15 +81,18 @@ struct FeaturePipelineOptions {
// assembler opts // assembler opts
opts.assembler_opts.subsampling_rate = FLAGS_subsampling_rate; opts.assembler_opts.subsampling_rate = FLAGS_subsampling_rate;
LOG(INFO) << "subsampling rate: "
<< opts.assembler_opts.subsampling_rate;
opts.assembler_opts.receptive_filed_length = opts.assembler_opts.receptive_filed_length =
FLAGS_receptive_field_length; FLAGS_receptive_field_length;
opts.assembler_opts.nnet_decoder_chunk = FLAGS_nnet_decoder_chunk;
opts.assembler_opts.fill_zero = FLAGS_fill_zero;
LOG(INFO) << "subsampling rate: "
<< opts.assembler_opts.subsampling_rate;
LOG(INFO) << "nnet receptive filed length: " LOG(INFO) << "nnet receptive filed length: "
<< opts.assembler_opts.receptive_filed_length; << opts.assembler_opts.receptive_filed_length;
opts.assembler_opts.nnet_decoder_chunk = FLAGS_nnet_decoder_chunk;
LOG(INFO) << "nnet chunk size: " LOG(INFO) << "nnet chunk size: "
<< opts.assembler_opts.nnet_decoder_chunk; << opts.assembler_opts.nnet_decoder_chunk;
LOG(INFO) << "frontend fill zeros: "
<< opts.assembler_opts.fill_zero;
return opts; return opts;
} }
}; };

@ -114,7 +114,7 @@ bool Decodable::AdvanceChunk(kaldi::Vector<kaldi::BaseFloat>* logprobs,
// read one frame likelihood // read one frame likelihood
bool Decodable::FrameLikelihood(int32 frame, vector<BaseFloat>* likelihood) { bool Decodable::FrameLikelihood(int32 frame, vector<BaseFloat>* likelihood) {
if (EnsureFrameHaveComputed(frame) == false) { if (EnsureFrameHaveComputed(frame) == false) {
LOG(INFO) << "framelikehood exit."; VLOG(1) << "framelikehood exit.";
return false; return false;
} }

@ -38,6 +38,8 @@ struct RecognizerResource {
resource.acoustic_scale = FLAGS_acoustic_scale; resource.acoustic_scale = FLAGS_acoustic_scale;
resource.feature_pipeline_opts = resource.feature_pipeline_opts =
FeaturePipelineOptions::InitFromFlags(); FeaturePipelineOptions::InitFromFlags();
resource.feature_pipeline_opts.assembler_opts.fill_zero = true;
LOG(INFO) << "ds2 need fill zero be true: " << resource.feature_pipeline_opts.assembler_opts.fill_zero;
resource.model_opts = ModelOptions::InitFromFlags(); resource.model_opts = ModelOptions::InitFromFlags();
resource.tlg_opts = TLGDecoderOptions::InitFromFlags(); resource.tlg_opts = TLGDecoderOptions::InitFromFlags();
return resource; return resource;

@ -101,6 +101,8 @@ struct U2RecognizerResource {
resource.feature_pipeline_opts = resource.feature_pipeline_opts =
ppspeech::FeaturePipelineOptions::InitFromFlags(); ppspeech::FeaturePipelineOptions::InitFromFlags();
resource.feature_pipeline_opts.assembler_opts.fill_zero = false;
LOG(INFO) << "u2 need fill zero be false: " << resource.feature_pipeline_opts.assembler_opts.fill_zero;
resource.model_opts = ppspeech::ModelOptions::InitFromFlags(); resource.model_opts = ppspeech::ModelOptions::InitFromFlags();
resource.decoder_opts = ppspeech::DecodeOptions::InitFromFlags(); resource.decoder_opts = ppspeech::DecodeOptions::InitFromFlags();
return resource; return resource;

@ -85,9 +85,6 @@ int main(int argc, char* argv[]) {
cnt++; cnt++;
} }
CHECK(sample_offset == tot_samples); CHECK(sample_offset == tot_samples);
VLOG(1) << "num decode: " << cnt;
// recognizer.SetFinished();
// second pass decoding // second pass decoding
recognizer.Rescoring(); recognizer.Rescoring();

Loading…
Cancel
Save