add log & rename LogFrameLikelihood

pull/1599/head
Yang Zhou 2 years ago
parent 1f23c4bd24
commit 3456ae4a51

@ -93,7 +93,7 @@ void CTCBeamSearch::AdvanceDecode(
vector<vector<BaseFloat>> likelihood;
vector<BaseFloat> frame_prob;
bool flag =
decodable->FrameLogLikelihood(num_frame_decoded_, &frame_prob);
decodable->FrameLikelihood(num_frame_decoded_, &frame_prob);
if (flag == false) break;
likelihood.push_back(frame_prob);
AdvanceDecoding(likelihood);

@ -143,7 +143,7 @@ class DecodableInterface {
/// this is for compatibility with OpenFst).
virtual int32 NumIndices() const = 0;
virtual bool FrameLogLikelihood(
virtual bool FrameLikelihood(
int32 frame, std::vector<kaldi::BaseFloat>* likelihood) = 0;

@ -49,11 +49,18 @@ bool Decodable::IsLastFrame(int32 frame) {
int32 Decodable::NumIndices() const { return 0; }
// the ilable(TokenId) of wfst(TLG) insert <eps>(id = 0) in front of Nnet prob id.
int32 Decodable::TokenId2NnetId(int32 token_id) {
return token_id - 1;
}
BaseFloat Decodable::LogLikelihood(int32 frame, int32 index) {
CHECK_LE(index, nnet_cache_.NumCols());
CHECK_LE(frame, frames_ready_);
int32 frame_idx = frame - frame_offset_;
return acoustic_scale_ * std::log(nnet_cache_(frame_idx, index - 1) +
// the nnet output is prob ranther than log prob
// the index - 1, because the ilabel
return acoustic_scale_ * std::log(nnet_cache_(frame_idx, TokenId2NnetId(index)) +
std::numeric_limits<float>::min());
}
@ -81,7 +88,7 @@ bool Decodable::AdvanceChunk() {
return true;
}
bool Decodable::FrameLogLikelihood(int32 frame, vector<BaseFloat>* likelihood) {
bool Decodable::FrameLikelihood(int32 frame, vector<BaseFloat>* likelihood) {
std::vector<BaseFloat> result;
if (EnsureFrameHaveComputed(frame) == false) return false;
likelihood->resize(nnet_cache_.NumCols());

@ -31,24 +31,28 @@ class Decodable : public kaldi::DecodableInterface {
virtual kaldi::BaseFloat LogLikelihood(int32 frame, int32 index);
virtual bool IsLastFrame(int32 frame);
virtual int32 NumIndices() const;
virtual bool FrameLogLikelihood(int32 frame,
std::vector<kaldi::BaseFloat>* likelihood);
// not logprob
virtual bool FrameLikelihood(int32 frame,
std::vector<kaldi::BaseFloat>* likelihood);
virtual int32 NumFramesReady() const;
// for offline test
void Acceptlikelihood(const kaldi::Matrix<kaldi::BaseFloat>& likelihood);
void Reset();
bool IsInputFinished() const { return frontend_->IsFinished(); }
bool EnsureFrameHaveComputed(int32 frame);
int32 TokenId2NnetId(int32 token_id);
private:
bool AdvanceChunk();
std::shared_ptr<FrontendInterface> frontend_;
std::shared_ptr<NnetInterface> nnet_;
kaldi::Matrix<kaldi::BaseFloat> nnet_cache_;
// the frame is nnet prob frame rather than audio feature frame
// nnet frame subsample the feature frame
// eg: 35 frame features output 8 frame inferences
int32 frame_offset_;
int32 frames_ready_;
// todo: feature frame mismatch with nnet inference frame
// eg: 35 frame features output 8 frame inferences
// so use subsampled_frame
int32 current_log_post_subsampled_offset_;
int32 num_chunk_computed_;

Loading…
Cancel
Save