Merge branch 'speechx' into speechx_refactor

pull/2768/head
YangZhou 3 years ago committed by GitHub
commit a92aa6436e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -12,9 +12,10 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "decoder/ctc_prefix_beam_search_decoder.h"
#include "absl/strings/str_split.h" #include "absl/strings/str_split.h"
#include "base/common.h" #include "base/common.h"
#include "decoder/ctc_prefix_beam_search_decoder.h"
#include "frontend/audio/data_cache.h" #include "frontend/audio/data_cache.h"
#include "fst/symbol-table.h" #include "fst/symbol-table.h"
#include "kaldi/util/table-types.h" #include "kaldi/util/table-types.h"
@ -118,9 +119,9 @@ int main(int argc, char* argv[]) {
ori_feature_len - chunk_idx * chunk_stride, chunk_size); ori_feature_len - chunk_idx * chunk_stride, chunk_size);
} }
if (this_chunk_size < receptive_field_length) { if (this_chunk_size < receptive_field_length) {
LOG(WARNING) << "utt: " << utt << " skip last " LOG(WARNING)
<< this_chunk_size << " frames, expect is " << "utt: " << utt << " skip last " << this_chunk_size
<< receptive_field_length; << " frames, expect is " << receptive_field_length;
break; break;
} }

@ -1,3 +1,4 @@
set(srcs decodable.cc nnet_producer.cc) set(srcs decodable.cc nnet_producer.cc)
if(USING_DS2) if(USING_DS2)

@ -134,4 +134,4 @@ void Decodable::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
VLOG(1) << "Attention Rescoring cost: " << timer.Elapsed() << " sec."; VLOG(1) << "Attention Rescoring cost: " << timer.Elapsed() << " sec.";
} }
} // namespace ppspeech } // namespace ppspeech

@ -62,12 +62,14 @@ class Decodable : public kaldi::DecodableInterface {
int32 TokenId2NnetId(int32 token_id); int32 TokenId2NnetId(int32 token_id);
// for offline test // for offline test
void Acceptlikelihood(const kaldi::Matrix<kaldi::BaseFloat>& likelihood); void Acceptlikelihood(const kaldi::Matrix<kaldi::BaseFloat>& likelihood);
private: private:
std::shared_ptr<NnetProducer> nnet_producer_; std::shared_ptr<NnetProducer> nnet_producer_;
// the frame is nnet prob frame rather than audio feature frame // the frame is nnet prob frame rather than audio feature frame
// nnet frame subsample the feature frame // nnet frame subsample the feature frame
// eg: 35 frame features output 8 frame inferences // eg: 35 frame features output 8 frame inferences
@ -79,7 +81,6 @@ class Decodable : public kaldi::DecodableInterface {
int32 current_log_post_subsampled_offset_; int32 current_log_post_subsampled_offset_;
int32 num_chunk_computed_; int32 num_chunk_computed_;
std::vector<kaldi::BaseFloat> framelikelihood_; std::vector<kaldi::BaseFloat> framelikelihood_;
kaldi::BaseFloat acoustic_scale_; kaldi::BaseFloat acoustic_scale_;
}; };

@ -50,6 +50,7 @@ U2Recognizer::U2Recognizer(const U2RecognizerResource& resource)
void U2Recognizer::Reset() { void U2Recognizer::Reset() {
global_frame_offset_ = 0; global_frame_offset_ = 0;
input_finished_ = false; input_finished_ = false;
num_frames_ = 0; num_frames_ = 0;
result_.clear(); result_.clear();

@ -1 +1,2 @@
#add_subdirectory(websocket) #add_subdirectory(websocket)

Loading…
Cancel
Save