|
|
@ -31,24 +31,28 @@ class Decodable : public kaldi::DecodableInterface {
|
|
|
|
virtual kaldi::BaseFloat LogLikelihood(int32 frame, int32 index);
|
|
|
|
virtual kaldi::BaseFloat LogLikelihood(int32 frame, int32 index);
|
|
|
|
virtual bool IsLastFrame(int32 frame);
|
|
|
|
virtual bool IsLastFrame(int32 frame);
|
|
|
|
virtual int32 NumIndices() const;
|
|
|
|
virtual int32 NumIndices() const;
|
|
|
|
virtual bool FrameLogLikelihood(int32 frame,
|
|
|
|
// not logprob
|
|
|
|
std::vector<kaldi::BaseFloat>* likelihood);
|
|
|
|
virtual bool FrameLikelihood(int32 frame,
|
|
|
|
|
|
|
|
std::vector<kaldi::BaseFloat>* likelihood);
|
|
|
|
virtual int32 NumFramesReady() const;
|
|
|
|
virtual int32 NumFramesReady() const;
|
|
|
|
// for offline test
|
|
|
|
// for offline test
|
|
|
|
void Acceptlikelihood(const kaldi::Matrix<kaldi::BaseFloat>& likelihood);
|
|
|
|
void Acceptlikelihood(const kaldi::Matrix<kaldi::BaseFloat>& likelihood);
|
|
|
|
void Reset();
|
|
|
|
void Reset();
|
|
|
|
bool IsInputFinished() const { return frontend_->IsFinished(); }
|
|
|
|
bool IsInputFinished() const { return frontend_->IsFinished(); }
|
|
|
|
bool EnsureFrameHaveComputed(int32 frame);
|
|
|
|
bool EnsureFrameHaveComputed(int32 frame);
|
|
|
|
|
|
|
|
int32 TokenId2NnetId(int32 token_id);
|
|
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
private:
|
|
|
|
bool AdvanceChunk();
|
|
|
|
bool AdvanceChunk();
|
|
|
|
std::shared_ptr<FrontendInterface> frontend_;
|
|
|
|
std::shared_ptr<FrontendInterface> frontend_;
|
|
|
|
std::shared_ptr<NnetInterface> nnet_;
|
|
|
|
std::shared_ptr<NnetInterface> nnet_;
|
|
|
|
kaldi::Matrix<kaldi::BaseFloat> nnet_cache_;
|
|
|
|
kaldi::Matrix<kaldi::BaseFloat> nnet_cache_;
|
|
|
|
|
|
|
|
// the frame is nnet prob frame rather than audio feature frame
|
|
|
|
|
|
|
|
// nnet frame subsample the feature frame
|
|
|
|
|
|
|
|
// eg: 35 frame features output 8 frame inferences
|
|
|
|
int32 frame_offset_;
|
|
|
|
int32 frame_offset_;
|
|
|
|
int32 frames_ready_;
|
|
|
|
int32 frames_ready_;
|
|
|
|
// todo: feature frame mismatch with nnet inference frame
|
|
|
|
// todo: feature frame mismatch with nnet inference frame
|
|
|
|
// eg: 35 frame features output 8 frame inferences
|
|
|
|
|
|
|
|
// so use subsampled_frame
|
|
|
|
// so use subsampled_frame
|
|
|
|
int32 current_log_post_subsampled_offset_;
|
|
|
|
int32 current_log_post_subsampled_offset_;
|
|
|
|
int32 num_chunk_computed_;
|
|
|
|
int32 num_chunk_computed_;
|
|
|
|