|
|
@ -1,17 +1,3 @@
|
|
|
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
|
|
|
|
|
|
|
//
|
|
|
|
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
|
|
|
|
//
|
|
|
|
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
//
|
|
|
|
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// itf/decodable-itf.h
|
|
|
|
// itf/decodable-itf.h
|
|
|
|
|
|
|
|
|
|
|
|
// Copyright 2009-2011 Microsoft Corporation; Saarland University;
|
|
|
|
// Copyright 2009-2011 Microsoft Corporation; Saarland University;
|
|
|
@ -56,10 +42,8 @@ namespace kaldi {
|
|
|
|
|
|
|
|
|
|
|
|
For online decoding, where the features are coming in in real time, it is
|
|
|
|
For online decoding, where the features are coming in in real time, it is
|
|
|
|
important to understand the IsLastFrame() and NumFramesReady() functions.
|
|
|
|
important to understand the IsLastFrame() and NumFramesReady() functions.
|
|
|
|
There are two ways these are used: the old online-decoding code, in
|
|
|
|
There are two ways these are used: the old online-decoding code, in ../online/,
|
|
|
|
../online/,
|
|
|
|
and the new online-decoding code, in ../online2/. In the old online-decoding
|
|
|
|
and the new online-decoding code, in ../online2/. In the old
|
|
|
|
|
|
|
|
online-decoding
|
|
|
|
|
|
|
|
code, the decoder would do:
|
|
|
|
code, the decoder would do:
|
|
|
|
\code{.cc}
|
|
|
|
\code{.cc}
|
|
|
|
for (int frame = 0; !decodable.IsLastFrame(frame); frame++) {
|
|
|
|
for (int frame = 0; !decodable.IsLastFrame(frame); frame++) {
|
|
|
@ -68,16 +52,13 @@ namespace kaldi {
|
|
|
|
\endcode
|
|
|
|
\endcode
|
|
|
|
and the call to IsLastFrame would block if the features had not arrived yet.
|
|
|
|
and the call to IsLastFrame would block if the features had not arrived yet.
|
|
|
|
The decodable object would have to know when to terminate the decoding. This
|
|
|
|
The decodable object would have to know when to terminate the decoding. This
|
|
|
|
online-decoding mode is still supported, it is what happens when you call,
|
|
|
|
online-decoding mode is still supported, it is what happens when you call, for
|
|
|
|
for
|
|
|
|
|
|
|
|
example, LatticeFasterDecoder::Decode().
|
|
|
|
example, LatticeFasterDecoder::Decode().
|
|
|
|
|
|
|
|
|
|
|
|
We realized that this "blocking" mode of decoding is not very convenient
|
|
|
|
We realized that this "blocking" mode of decoding is not very convenient
|
|
|
|
because it forces the program to be multi-threaded and makes it complex to
|
|
|
|
because it forces the program to be multi-threaded and makes it complex to
|
|
|
|
control endpointing. In the "new" decoding code, you don't call (for
|
|
|
|
control endpointing. In the "new" decoding code, you don't call (for example)
|
|
|
|
example)
|
|
|
|
LatticeFasterDecoder::Decode(), you call LatticeFasterDecoder::InitDecoding(),
|
|
|
|
LatticeFasterDecoder::Decode(), you call
|
|
|
|
|
|
|
|
LatticeFasterDecoder::InitDecoding(),
|
|
|
|
|
|
|
|
and then each time you get more features, you provide them to the decodable
|
|
|
|
and then each time you get more features, you provide them to the decodable
|
|
|
|
object, and you call LatticeFasterDecoder::AdvanceDecoding(), which does
|
|
|
|
object, and you call LatticeFasterDecoder::AdvanceDecoding(), which does
|
|
|
|
something like this:
|
|
|
|
something like this:
|
|
|
@ -87,8 +68,7 @@ namespace kaldi {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
\endcode
|
|
|
|
\endcode
|
|
|
|
So the decodable object never has IsLastFrame() called. For decoding where
|
|
|
|
So the decodable object never has IsLastFrame() called. For decoding where
|
|
|
|
you are starting with a matrix of features, the NumFramesReady() function
|
|
|
|
you are starting with a matrix of features, the NumFramesReady() function will
|
|
|
|
will
|
|
|
|
|
|
|
|
always just return the number of frames in the file, and IsLastFrame() will
|
|
|
|
always just return the number of frames in the file, and IsLastFrame() will
|
|
|
|
return true for the last frame.
|
|
|
|
return true for the last frame.
|
|
|
|
|
|
|
|
|
|
|
@ -100,52 +80,45 @@ namespace kaldi {
|
|
|
|
frame of the file once we've decided to terminate decoding.
|
|
|
|
frame of the file once we've decided to terminate decoding.
|
|
|
|
*/
|
|
|
|
*/
|
|
|
|
class DecodableInterface {
|
|
|
|
class DecodableInterface {
|
|
|
|
public:
|
|
|
|
public:
|
|
|
|
/// Returns the log likelihood, which will be negated in the decoder.
|
|
|
|
/// Returns the log likelihood, which will be negated in the decoder.
|
|
|
|
/// The "frame" starts from zero. You should verify that NumFramesReady() >
|
|
|
|
/// The "frame" starts from zero. You should verify that NumFramesReady() > frame
|
|
|
|
/// frame
|
|
|
|
/// before calling this.
|
|
|
|
/// before calling this.
|
|
|
|
virtual BaseFloat LogLikelihood(int32 frame, int32 index) = 0;
|
|
|
|
virtual BaseFloat LogLikelihood(int32 frame, int32 index) = 0;
|
|
|
|
|
|
|
|
|
|
|
|
/// Returns true if this is the last frame. Frames are zero-based, so the
|
|
|
|
/// Returns true if this is the last frame. Frames are zero-based, so the
|
|
|
|
/// first frame is zero. IsLastFrame(-1) will return false, unless the file
|
|
|
|
/// first frame is zero. IsLastFrame(-1) will return false, unless the file
|
|
|
|
/// is empty (which is a case that I'm not sure all the code will handle, so
|
|
|
|
/// is empty (which is a case that I'm not sure all the code will handle, so
|
|
|
|
/// be careful). Caution: the behavior of this function in an online setting
|
|
|
|
/// be careful). Caution: the behavior of this function in an online
|
|
|
|
/// is being changed somewhat. In future it may return false in cases where
|
|
|
|
/// setting
|
|
|
|
/// we haven't yet decided to terminate decoding, but later true if we decide
|
|
|
|
/// is being changed somewhat. In future it may return false in cases where
|
|
|
|
/// to terminate decoding. The plan in future is to rely more on
|
|
|
|
/// we haven't yet decided to terminate decoding, but later true if we
|
|
|
|
/// NumFramesReady(), and in future, IsLastFrame() would always return false
|
|
|
|
/// decide
|
|
|
|
/// in an online-decoding setting, and would only return true in a
|
|
|
|
/// to terminate decoding. The plan in future is to rely more on
|
|
|
|
/// decoding-from-matrix setting where we want to allow the last delta or LDA
|
|
|
|
/// NumFramesReady(), and in future, IsLastFrame() would always return false
|
|
|
|
/// features to be flushed out for compatibility with the baseline setup.
|
|
|
|
/// in an online-decoding setting, and would only return true in a
|
|
|
|
virtual bool IsLastFrame(int32 frame) const = 0;
|
|
|
|
/// decoding-from-matrix setting where we want to allow the last delta or
|
|
|
|
|
|
|
|
/// LDA
|
|
|
|
/// The call NumFramesReady() will return the number of frames currently available
|
|
|
|
/// features to be flushed out for compatibility with the baseline setup.
|
|
|
|
/// for this decodable object. This is for use in setups where you don't want the
|
|
|
|
virtual bool IsLastFrame(int32 frame) const = 0;
|
|
|
|
/// decoder to block while waiting for input. This is newly added as of Jan 2014,
|
|
|
|
|
|
|
|
/// and I hope, going forward, to rely on this mechanism more than IsLastFrame to
|
|
|
|
/// The call NumFramesReady() will return the number of frames currently
|
|
|
|
/// know when to stop decoding.
|
|
|
|
/// available
|
|
|
|
virtual int32 NumFramesReady() const {
|
|
|
|
/// for this decodable object. This is for use in setups where you don't
|
|
|
|
KALDI_ERR << "NumFramesReady() not implemented for this decodable type.";
|
|
|
|
/// want the
|
|
|
|
return -1;
|
|
|
|
/// decoder to block while waiting for input. This is newly added as of Jan
|
|
|
|
}
|
|
|
|
/// 2014,
|
|
|
|
|
|
|
|
/// and I hope, going forward, to rely on this mechanism more than
|
|
|
|
/// Returns the number of states in the acoustic model
|
|
|
|
/// IsLastFrame to
|
|
|
|
/// (they will be indexed one-based, i.e. from 1 to NumIndices();
|
|
|
|
/// know when to stop decoding.
|
|
|
|
/// this is for compatibility with OpenFst).
|
|
|
|
virtual int32 NumFramesReady() const {
|
|
|
|
virtual int32 NumIndices() const = 0;
|
|
|
|
KALDI_ERR
|
|
|
|
|
|
|
|
<< "NumFramesReady() not implemented for this decodable type.";
|
|
|
|
virtual bool FrameLogLikelihood(int32 frame,
|
|
|
|
return -1;
|
|
|
|
std::vector<kaldi::BaseFloat>* likelihood) = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/// Returns the number of states in the acoustic model
|
|
|
|
virtual ~DecodableInterface() {}
|
|
|
|
/// (they will be indexed one-based, i.e. from 1 to NumIndices();
|
|
|
|
|
|
|
|
/// this is for compatibility with OpenFst).
|
|
|
|
|
|
|
|
virtual int32 NumIndices() const = 0;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
virtual std::vector<BaseFloat> FrameLogLikelihood(int32 frame) = 0;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
virtual ~DecodableInterface() {}
|
|
|
|
|
|
|
|
};
|
|
|
|
};
|
|
|
|
/// @}
|
|
|
|
/// @}
|
|
|
|
} // namespace Kaldi
|
|
|
|
} // namespace Kaldi
|
|
|
|