Merge branch 'speechx' into speechx_refactor

pull/2768/head
YangZhou 3 years ago committed by GitHub
commit a92aa6436e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -12,9 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "decoder/ctc_prefix_beam_search_decoder.h"
#include "absl/strings/str_split.h"
#include "base/common.h"
#include "decoder/ctc_prefix_beam_search_decoder.h"
#include "frontend/audio/data_cache.h"
#include "fst/symbol-table.h"
#include "kaldi/util/table-types.h"
@ -118,9 +119,9 @@ int main(int argc, char* argv[]) {
ori_feature_len - chunk_idx * chunk_stride, chunk_size);
}
if (this_chunk_size < receptive_field_length) {
LOG(WARNING) << "utt: " << utt << " skip last "
<< this_chunk_size << " frames, expect is "
<< receptive_field_length;
LOG(WARNING)
<< "utt: " << utt << " skip last " << this_chunk_size
<< " frames, expect is " << receptive_field_length;
break;
}

@ -1,3 +1,4 @@
set(srcs decodable.cc nnet_producer.cc)
if(USING_DS2)

@ -62,12 +62,14 @@ class Decodable : public kaldi::DecodableInterface {
int32 TokenId2NnetId(int32 token_id);
// for offline test
void Acceptlikelihood(const kaldi::Matrix<kaldi::BaseFloat>& likelihood);
private:
std::shared_ptr<NnetProducer> nnet_producer_;
// the frame is nnet prob frame rather than audio feature frame
// nnet frame subsample the feature frame
// eg: 35 frame features output 8 frame inferences
@ -79,7 +81,6 @@ class Decodable : public kaldi::DecodableInterface {
int32 current_log_post_subsampled_offset_;
int32 num_chunk_computed_;
std::vector<kaldi::BaseFloat> framelikelihood_;
kaldi::BaseFloat acoustic_scale_;
};

@ -50,6 +50,7 @@ U2Recognizer::U2Recognizer(const U2RecognizerResource& resource)
void U2Recognizer::Reset() {
global_frame_offset_ = 0;
input_finished_ = false;
num_frames_ = 0;
result_.clear();

@ -1 +1,2 @@
#add_subdirectory(websocket)

Loading…
Cancel
Save