You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
PaddleSpeech/runtime/engine/common/frontend/feature_pipeline.h

113 lines
3.9 KiB

3 years ago
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// todo refactor later (SGoat)
#pragma once
#include "frontend/assembler.h"
#include "frontend/audio_cache.h"
#include "frontend/cmvn.h"
#include "frontend/data_cache.h"
#include "frontend/fbank.h"
#include "frontend/feature_cache.h"
#include "frontend/frontend_itf.h"
3 years ago
2 years ago
// feature
DECLARE_bool(fill_zero);
2 years ago
DECLARE_int32(num_bins);
DECLARE_string(cmvn_file);
// feature sliding window
DECLARE_int32(receptive_field_length);
DECLARE_int32(subsampling_rate);
DECLARE_int32(nnet_decoder_chunk);
3 years ago
namespace ppspeech {
2 years ago
3 years ago
struct FeaturePipelineOptions {
2 years ago
std::string cmvn_file{};
knf::FbankOptions fbank_opts{};
2 years ago
AssemblerOptions assembler_opts{};
2 years ago
static FeaturePipelineOptions InitFromFlags() {
2 years ago
FeaturePipelineOptions opts;
opts.cmvn_file = FLAGS_cmvn_file;
2 years ago
LOG(INFO) << "cmvn file: " << opts.cmvn_file;
2 years ago
// frame options
knf::FrameExtractionOptions frame_opts;
2 years ago
frame_opts.dither = 0.0;
2 years ago
LOG(INFO) << "dither: " << frame_opts.dither;
2 years ago
frame_opts.frame_shift_ms = 10;
2 years ago
LOG(INFO) << "frame shift ms: " << frame_opts.frame_shift_ms;
frame_opts.window_type = "povey";
frame_opts.frame_length_ms = 25;
opts.fbank_opts.mel_opts.num_bins = FLAGS_num_bins;
LOG(INFO) << "num bins: " << opts.fbank_opts.mel_opts.num_bins;
opts.fbank_opts.frame_opts = frame_opts;
2 years ago
LOG(INFO) << "frame length ms: " << frame_opts.frame_length_ms;
2 years ago
// assembler opts
opts.assembler_opts.subsampling_rate = FLAGS_subsampling_rate;
2 years ago
opts.assembler_opts.receptive_filed_length =
FLAGS_receptive_field_length;
opts.assembler_opts.nnet_decoder_chunk = FLAGS_nnet_decoder_chunk;
opts.assembler_opts.fill_zero = FLAGS_fill_zero;
LOG(INFO) << "subsampling rate: "
<< opts.assembler_opts.subsampling_rate;
2 years ago
LOG(INFO) << "nnet receptive filed length: "
<< opts.assembler_opts.receptive_filed_length;
LOG(INFO) << "nnet chunk size: "
<< opts.assembler_opts.nnet_decoder_chunk;
2 years ago
LOG(INFO) << "frontend fill zeros: " << opts.assembler_opts.fill_zero;
2 years ago
return opts;
}
3 years ago
};
2 years ago
3 years ago
class FeaturePipeline : public FrontendInterface {
public:
explicit FeaturePipeline(const FeaturePipelineOptions& opts);
virtual void Accept(const std::vector<kaldi::BaseFloat>& waves) {
3 years ago
base_extractor_->Accept(waves);
}
virtual bool Read(std::vector<kaldi::BaseFloat>* feats) {
3 years ago
return base_extractor_->Read(feats);
}
virtual size_t Dim() const { return base_extractor_->Dim(); }
virtual void SetFinished() { base_extractor_->SetFinished(); }
virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
virtual void Reset() { base_extractor_->Reset(); }
2 years ago
const FeaturePipelineOptions& Config() { return opts_; }
const BaseFloat FrameShift() const {
return opts_.fbank_opts.frame_opts.frame_shift_ms;
}
const BaseFloat FrameLength() const {
return opts_.fbank_opts.frame_opts.frame_length_ms;
}
const BaseFloat SampleRate() const {
return opts_.fbank_opts.frame_opts.samp_freq;
}
3 years ago
private:
2 years ago
FeaturePipelineOptions opts_;
3 years ago
std::unique_ptr<FrontendInterface> base_extractor_;
};
2 years ago
} // namespace ppspeech