Merge branch 'speechx' into speechx_refactor

3 years ago · a92aa6436e
parent cd49b31a18 869f4267d5
commit a92aa6436e
6 changed files with 11 additions and 6 deletions
--- a/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder_main.cc
+++ b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder_main.cc
@ -12,9 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "decoder/ctc_prefix_beam_search_decoder.h"
+
 #include "absl/strings/str_split.h"
 #include "base/common.h"
+#include "decoder/ctc_prefix_beam_search_decoder.h"
 #include "frontend/audio/data_cache.h"
 #include "fst/symbol-table.h"
 #include "kaldi/util/table-types.h"
@ -118,9 +119,9 @@ int main(int argc, char* argv[]) {
                    ori_feature_len - chunk_idx * chunk_stride, chunk_size);
            }
            if (this_chunk_size < receptive_field_length) {
-                LOG(WARNING) << "utt: " << utt << " skip last "
-                             << this_chunk_size << " frames, expect is "
-                             << receptive_field_length;
+                LOG(WARNING)
+                    << "utt: " << utt << " skip last " << this_chunk_size
+                    << " frames, expect is " << receptive_field_length;
                break;
            }

--- a/speechx/speechx/asr/nnet/CMakeLists.txt
+++ b/speechx/speechx/asr/nnet/CMakeLists.txt
@ -1,3 +1,4 @@
+
 set(srcs decodable.cc nnet_producer.cc)

 if(USING_DS2)
--- a/speechx/speechx/asr/nnet/decodable.h
+++ b/speechx/speechx/asr/nnet/decodable.h
@ -62,12 +62,14 @@ class Decodable : public kaldi::DecodableInterface {

    int32 TokenId2NnetId(int32 token_id);

+
    // for offline test
    void Acceptlikelihood(const kaldi::Matrix<kaldi::BaseFloat>& likelihood);

  private:
    std::shared_ptr<NnetProducer> nnet_producer_;

+
    // the frame is nnet prob frame rather than audio feature frame
    // nnet frame subsample the feature frame
    // eg: 35 frame features output 8 frame inferences
@ -79,7 +81,6 @@ class Decodable : public kaldi::DecodableInterface {
    int32 current_log_post_subsampled_offset_;
    int32 num_chunk_computed_;
    std::vector<kaldi::BaseFloat> framelikelihood_;
-
    kaldi::BaseFloat acoustic_scale_;
 };

--- a/speechx/speechx/asr/recognizer/u2_recognizer.cc
+++ b/speechx/speechx/asr/recognizer/u2_recognizer.cc
@ -50,6 +50,7 @@ U2Recognizer::U2Recognizer(const U2RecognizerResource& resource)
 void U2Recognizer::Reset() {
    global_frame_offset_ = 0;
    input_finished_ = false;
+
    num_frames_ = 0;
    result_.clear();

--- a/speechx/speechx/asr/server/CMakeLists.txt
+++ b/speechx/speechx/asr/server/CMakeLists.txt
@ -1 +1,2 @@
 #add_subdirectory(websocket)
+