refactor speechx egs

4 years ago · cad09b4910
parent ec469179bf
commit cad09b4910
53 changed files with 358 additions and 1810 deletions
--- a/speechx/examples/CMakeLists.txt
+++ b/speechx/examples/CMakeLists.txt
@ -1,7 +1,4 @@
 cmake_minimum_required(VERSION 3.14 FATAL_ERROR)

-add_subdirectory(feat)
-add_subdirectory(nnet)
-add_subdirectory(decoder)
-
-add_subdirectory(glog)
+add_subdirectory(ds2_ol)
+add_subdirectory(dev)
--- a/speechx/examples/aishell/local/compute-wer.py
+++ b/speechx/examples/aishell/local/compute-wer.py
@ -1,500 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import re, sys, unicodedata
-import codecs
-
-remove_tag = True
-spacelist= [' ', '\t', '\r', '\n']
-puncts = ['!', ',', '?',
-          '、', '。', '！', '，', '；', '？',
-          '：', '「', '」', '︰',  '『', '』', '《', '》']
-
-def characterize(string) :
-  res = []
-  i = 0
-  while i < len(string):
-    char = string[i]
-    if char in puncts:
-      i += 1
-      continue
-    cat1 = unicodedata.category(char)
-    #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
-    if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist: # space or not assigned
-       i += 1
-       continue
-    if cat1 == 'Lo': # letter-other
-       res.append(char)
-       i += 1
-    else:
-       # some input looks like: <unk><noise>, we want to separate it to two words.
-       sep = ' '
-       if char == '<': sep = '>'
-       j = i+1
-       while j < len(string):
-         c = string[j]
-         if ord(c) >= 128 or (c in spacelist) or (c==sep):
-           break
-         j += 1
-       if j < len(string) and string[j] == '>':
-         j += 1
-       res.append(string[i:j])
-       i = j
-  return res
-
-def stripoff_tags(x):
-  if not x: return ''
-  chars = []
-  i = 0; T=len(x)
-  while i < T:
-    if x[i] == '<':
-      while i < T and x[i] != '>':
-         i += 1
-      i += 1
-    else:
-      chars.append(x[i])
-      i += 1
-  return ''.join(chars)
-
-
-def normalize(sentence, ignore_words, cs, split=None):
-    """ sentence, ignore_words are both in unicode
-    """
-    new_sentence = []
-    for token in sentence:
-        x = token
-        if not cs:
-           x = x.upper()
-        if x in ignore_words:
-           continue
-        if remove_tag:
-          x = stripoff_tags(x)
-        if not x:
-          continue
-        if split and x in split:
-          new_sentence += split[x]
-        else:
-          new_sentence.append(x)
-    return new_sentence
-
-class Calculator :
-  def __init__(self) :
-    self.data = {}
-    self.space = []
-    self.cost = {}
-    self.cost['cor'] = 0
-    self.cost['sub'] = 1
-    self.cost['del'] = 1
-    self.cost['ins'] = 1
-  def calculate(self, lab, rec) :
-    # Initialization
-    lab.insert(0, '')
-    rec.insert(0, '')
-    while len(self.space) < len(lab) :
-      self.space.append([])
-    for row in self.space :
-      for element in row :
-        element['dist'] = 0
-        element['error'] = 'non'
-      while len(row) < len(rec) :
-        row.append({'dist' : 0, 'error' : 'non'})
-    for i in range(len(lab)) :
-      self.space[i][0]['dist'] = i
-      self.space[i][0]['error'] = 'del'
-    for j in range(len(rec)) :
-      self.space[0][j]['dist'] = j
-      self.space[0][j]['error'] = 'ins'
-    self.space[0][0]['error'] = 'non'
-    for token in lab :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    for token in rec :
-      if token not in self.data and len(token) > 0 :
-        self.data[token] = {'all' : 0, 'cor' : 0, 'sub' : 0, 'ins' : 0, 'del' : 0}
-    # Computing edit distance
-    for i, lab_token in enumerate(lab) :
-      for j, rec_token in enumerate(rec) :
-        if i == 0 or j == 0 :
-          continue
-        min_dist = sys.maxsize
-        min_error = 'none'
-        dist = self.space[i-1][j]['dist'] + self.cost['del']
-        error = 'del'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        dist = self.space[i][j-1]['dist'] + self.cost['ins']
-        error = 'ins'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        if lab_token == rec_token :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['cor']
-          error = 'cor'
-        else :
-          dist = self.space[i-1][j-1]['dist'] + self.cost['sub']
-          error = 'sub'
-        if dist < min_dist :
-          min_dist = dist
-          min_error = error
-        self.space[i][j]['dist'] = min_dist
-        self.space[i][j]['error'] = min_error
-    # Tracing back
-    result = {'lab':[], 'rec':[], 'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    i = len(lab) - 1
-    j = len(rec) - 1
-    while True :
-      if self.space[i][j]['error'] == 'cor' : # correct
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
-          result['all'] = result['all'] + 1
-          result['cor'] = result['cor'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'sub' : # substitution
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
-          result['all'] = result['all'] + 1
-          result['sub'] = result['sub'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, rec[j])
-        i = i - 1
-        j = j - 1
-      elif self.space[i][j]['error'] == 'del' : # deletion
-        if len(lab[i]) > 0 :
-          self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
-          self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
-          result['all'] = result['all'] + 1
-          result['del'] = result['del'] + 1
-        result['lab'].insert(0, lab[i])
-        result['rec'].insert(0, "")
-        i = i - 1
-      elif self.space[i][j]['error'] == 'ins' : # insertion
-        if len(rec[j]) > 0 :
-          self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
-          result['ins'] = result['ins'] + 1
-        result['lab'].insert(0, "")
-        result['rec'].insert(0, rec[j])
-        j = j - 1
-      elif self.space[i][j]['error'] == 'non' : # starting point
-        break
-      else : # shouldn't reach here
-        print('this should not happen , i = {i} , j = {j} , error = {error}'.format(i = i, j = j, error = self.space[i][j]['error']))
-    return result
-  def overall(self) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in self.data :
-      result['all'] = result['all'] + self.data[token]['all']
-      result['cor'] = result['cor'] + self.data[token]['cor']
-      result['sub'] = result['sub'] + self.data[token]['sub']
-      result['ins'] = result['ins'] + self.data[token]['ins']
-      result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def cluster(self, data) :
-    result = {'all':0, 'cor':0, 'sub':0, 'ins':0, 'del':0}
-    for token in data :
-      if token in self.data :
-        result['all'] = result['all'] + self.data[token]['all']
-        result['cor'] = result['cor'] + self.data[token]['cor']
-        result['sub'] = result['sub'] + self.data[token]['sub']
-        result['ins'] = result['ins'] + self.data[token]['ins']
-        result['del'] = result['del'] + self.data[token]['del']
-    return result
-  def keys(self) :
-      return list(self.data.keys())
-
-def width(string):
-  return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)
-
-def default_cluster(word) :
-  unicode_names = [ unicodedata.name(char) for char in word ]
-  for i in reversed(range(len(unicode_names))) :
-    if unicode_names[i].startswith('DIGIT') :  # 1
-      unicode_names[i] = 'Number'  # 'DIGIT'
-    elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
-          unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')) :
-      # 明 / 郎
-      unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
-    elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
-          unicode_names[i].startswith('LATIN SMALL LETTER')) :
-      # A / a
-      unicode_names[i] = 'English'  # 'LATIN LETTER'
-    elif unicode_names[i].startswith('HIRAGANA LETTER') :  # は こ め
-      unicode_names[i] = 'Japanese'  # 'GANA LETTER'
-    elif (unicode_names[i].startswith('AMPERSAND') or
-          unicode_names[i].startswith('APOSTROPHE') or
-          unicode_names[i].startswith('COMMERCIAL AT') or
-          unicode_names[i].startswith('DEGREE CELSIUS') or
-          unicode_names[i].startswith('EQUALS SIGN') or
-          unicode_names[i].startswith('FULL STOP') or
-          unicode_names[i].startswith('HYPHEN-MINUS') or
-          unicode_names[i].startswith('LOW LINE') or
-          unicode_names[i].startswith('NUMBER SIGN') or
-          unicode_names[i].startswith('PLUS SIGN') or
-          unicode_names[i].startswith('SEMICOLON')) :
-      # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
-      del unicode_names[i]
-    else :
-      return 'Other'
-  if len(unicode_names) == 0 :
-      return 'Other'
-  if len(unicode_names) == 1 :
-      return unicode_names[0]
-  for i in range(len(unicode_names)-1) :
-    if unicode_names[i] != unicode_names[i+1] :
-      return 'Other'
-  return unicode_names[0]
-
-def usage() :
-  print("compute-wer.py : compute word error rate (WER) and align recognition results and references.")
-  print("         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer")
-
-if __name__ == '__main__':
-  if len(sys.argv) == 1 :
-    usage()
-    sys.exit(0)
-  calculator = Calculator()
-  cluster_file = ''
-  ignore_words = set()
-  tochar = False
-  verbose= 1
-  padding_symbol= ' '
-  case_sensitive = False
-  max_words_per_line = sys.maxsize
-  split = None
-  while len(sys.argv) > 3:
-     a = '--maxw='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):]
-        del sys.argv[1]
-        max_words_per_line = int(b)
-        continue
-     a = '--rt='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        remove_tag = (b == 'true') or (b != '0')
-        continue
-     a = '--cs='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        case_sensitive = (b == 'true') or (b != '0')
-        continue
-     a = '--cluster='
-     if sys.argv[1].startswith(a):
-       cluster_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       continue
-     a = '--splitfile='
-     if sys.argv[1].startswith(a):
-       split_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       split = dict()
-       with codecs.open(split_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           words = line.strip().split()
-           if len(words) >= 2:
-             split[words[0]] = words[1:]
-       continue
-     a = '--ig='
-     if sys.argv[1].startswith(a):
-       ignore_file = sys.argv[1][len(a):]
-       del sys.argv[1]
-       with codecs.open(ignore_file, 'r', 'utf-8') as fh:
-         for line in fh:  # line in unicode
-           line = line.strip()
-           if len(line) > 0:
-             ignore_words.add(line)
-       continue
-     a = '--char='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        tochar = (b == 'true') or (b != '0')
-        continue
-     a = '--v='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        verbose=0
-        try:
-          verbose=int(b)
-        except:
-           if b == 'true' or b != '0':
-              verbose = 1
-        continue
-     a = '--padding-symbol='
-     if sys.argv[1].startswith(a):
-        b = sys.argv[1][len(a):].lower()
-        del sys.argv[1]
-        if b == 'space':
-          padding_symbol= ' '
-        elif b == 'underline':
-          padding_symbol= '_'
-        continue
-     if True or sys.argv[1].startswith('-'):
-        #ignore invalid switch
-        del sys.argv[1]
-        continue
-
-  if not case_sensitive:
-     ig=set([w.upper() for w in ignore_words])
-     ignore_words = ig
-
-  default_clusters = {}
-  default_words = {}
-
-  ref_file = sys.argv[1]
-  hyp_file = sys.argv[2]
-  rec_set = {}
-  if split and not case_sensitive:
-     newsplit = dict()
-     for w in split:
-        words = split[w]
-        for i in range(len(words)):
-           words[i] = words[i].upper()
-        newsplit[w.upper()] = words
-     split = newsplit
-
-  with codecs.open(hyp_file, 'r', 'utf-8') as fh:
-     for line in fh:
-        if tochar:
-            array = characterize(line)
-        else:
-            array = line.strip().split()
-        if len(array)==0: continue
-        fid = array[0]
-        rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive, split)
-
-  # compute error rate on the interaction of reference file and hyp file
-  for line in open(ref_file, 'r', encoding='utf-8') :
-    if tochar:
-          array = characterize(line)
-    else:
-          array = line.rstrip('\n').split()
-    if len(array)==0: continue
-    fid = array[0]
-    if fid not in rec_set:
-       continue
-    lab = normalize(array[1:], ignore_words, case_sensitive, split)
-    rec = rec_set[fid]
-    if verbose:
-      print('\nutt: %s' % fid)
-
-    for word in rec + lab :
-      if word not in default_words :
-         default_cluster_name = default_cluster(word)
-         if default_cluster_name not in default_clusters :
-           default_clusters[default_cluster_name] = {}
-         if word not in default_clusters[default_cluster_name] :
-           default_clusters[default_cluster_name][word] = 1
-         default_words[word] = default_cluster_name
-
-    result = calculator.calculate(lab, rec)
-    if verbose:
-      if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-      else :
-        wer = 0.0
-      print('WER: %4.2f %%' % wer, end = ' ')
-      print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-      space = {}
-      space['lab'] = []
-      space['rec'] = []
-      for idx in range(len(result['lab'])) :
-        len_lab = width(result['lab'][idx])
-        len_rec = width(result['rec'][idx])
-        length = max(len_lab, len_rec)
-        space['lab'].append(length-len_lab)
-        space['rec'].append(length-len_rec)
-      upper_lab = len(result['lab'])
-      upper_rec = len(result['rec'])
-      lab1, rec1 = 0, 0
-      while lab1 < upper_lab or rec1 < upper_rec:
-         if verbose > 1:
-             print('lab(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-             print('lab:', end = ' ')
-         lab2 = min(upper_lab, lab1 + max_words_per_line)
-         for idx in range(lab1, lab2):
-           token = result['lab'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['lab'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print()
-         if verbose > 1:
-            print('rec(%s):' % fid.encode('utf-8'), end = ' ')
-         else:
-            print('rec:', end = ' ')
-         rec2 = min(upper_rec, rec1 + max_words_per_line)
-         for idx in range(rec1, rec2):
-           token = result['rec'][idx]
-           print('{token}'.format(token = token), end = '')
-           for n in range(space['rec'][idx]) :
-             print(padding_symbol, end = '')
-           print(' ',end='')
-         print('\n', end='\n')
-         lab1 = lab2
-         rec1 = rec2
-
-  if verbose:
-    print('===========================================================================')
-    print()
-
-  result = calculator.overall()
-  if result['all'] != 0 :
-    wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-  else :
-    wer = 0.0
-  print('Overall -> %4.2f %%' % wer, end = ' ')
-  print('N=%d C=%d S=%d D=%d I=%d' %
-        (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-  if not verbose:
-     print()
-
-  if verbose:
-   for cluster_id in default_clusters :
-     result = calculator.cluster([ k for k in default_clusters[cluster_id] ])
-     if result['all'] != 0 :
-        wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-     else :
-        wer = 0.0
-     print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-     print('N=%d C=%d S=%d D=%d I=%d' %
-          (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-   if len(cluster_file) > 0 : # compute separated WERs for word clusters
-     cluster_id = ''
-     cluster = []
-     for line in open(cluster_file, 'r', encoding='utf-8') :
-       for token in line.decode('utf-8').rstrip('\n').split() :
-        # end of cluster reached, like </Keyword>
-        if token[0:2] == '</' and token[len(token)-1] == '>' and \
-           token.lstrip('</').rstrip('>') == cluster_id :
-          result = calculator.cluster(cluster)
-          if result['all'] != 0 :
-            wer = float(result['ins'] + result['sub'] + result['del']) * 100.0 / result['all']
-          else :
-            wer = 0.0
-          print('%s -> %4.2f %%' % (cluster_id, wer), end = ' ')
-          print('N=%d C=%d S=%d D=%d I=%d' %
-                (result['all'], result['cor'], result['sub'], result['del'], result['ins']))
-          cluster_id = ''
-          cluster = []
-        # begin of cluster reached, like <Keyword>
-        elif token[0] == '<' and token[len(token)-1] == '>' and \
-             cluster_id == '' :
-          cluster_id = token.lstrip('<').rstrip('>')
-          cluster = []
-        # general terms, like WEATHER / CAR / ...
-        else :
-          cluster.append(token)
-   print()
-   print('===========================================================================')
--- a/speechx/examples/aishell/utils
+++ b/speechx/examples/aishell/utils
@ -1 +0,0 @@
-../../../utils
--- a/speechx/examples/decoder/CMakeLists.txt
+++ b/speechx/examples/decoder/CMakeLists.txt
@ -1,18 +0,0 @@
-cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
-
-add_executable(offline_decoder_sliding_chunk_main ${CMAKE_CURRENT_SOURCE_DIR}/offline_decoder_sliding_chunk_main.cc)
-target_include_directories(offline_decoder_sliding_chunk_main PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
-target_link_libraries(offline_decoder_sliding_chunk_main PUBLIC nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util ${DEPS})
-
-add_executable(offline_decoder_main ${CMAKE_CURRENT_SOURCE_DIR}/offline_decoder_main.cc)
-target_include_directories(offline_decoder_main PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
-target_link_libraries(offline_decoder_main PUBLIC nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util ${DEPS})
-
-add_executable(offline_wfst_decoder_main ${CMAKE_CURRENT_SOURCE_DIR}/offline_wfst_decoder_main.cc)
-target_include_directories(offline_wfst_decoder_main PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
-target_link_libraries(offline_wfst_decoder_main PUBLIC nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util kaldi-decoder ${DEPS})
-
-add_executable(decoder_test_main ${CMAKE_CURRENT_SOURCE_DIR}/decoder_test_main.cc)
-target_include_directories(decoder_test_main PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
-target_link_libraries(decoder_test_main PUBLIC nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util ${DEPS})
-
--- a/speechx/examples/decoder/offline_decoder_main.cc
+++ b/speechx/examples/decoder/offline_decoder_main.cc
@ -1,121 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// todo refactor, repalce with gtest
-
-#include "base/flags.h"
-#include "base/log.h"
-#include "decoder/ctc_beam_search_decoder.h"
-#include "frontend/audio/data_cache.h"
-#include "kaldi/util/table-types.h"
-#include "nnet/decodable.h"
-#include "nnet/paddle_nnet.h"
-
-DEFINE_string(feature_respecifier, "", "feature matrix rspecifier");
-DEFINE_string(model_path, "avg_1.jit.pdmodel", "paddle nnet model");
-DEFINE_string(param_path, "avg_1.jit.pdiparams", "paddle nnet model param");
-DEFINE_string(dict_file, "vocab.txt", "vocabulary of lm");
-DEFINE_string(lm_path, "lm.klm", "language model");
-DEFINE_int32(chunk_size, 35, "feat chunk size");
-
-
-using kaldi::BaseFloat;
-using kaldi::Matrix;
-using std::vector;
-
-// test decoder by feeding speech feature, deprecated.
-int main(int argc, char* argv[]) {
-    gflags::ParseCommandLineFlags(&argc, &argv, false);
-    google::InitGoogleLogging(argv[0]);
-
-    kaldi::SequentialBaseFloatMatrixReader feature_reader(
-        FLAGS_feature_respecifier);
-    std::string model_graph = FLAGS_model_path;
-    std::string model_params = FLAGS_param_path;
-    std::string dict_file = FLAGS_dict_file;
-    std::string lm_path = FLAGS_lm_path;
-    int32 chunk_size = FLAGS_chunk_size;
-    LOG(INFO) << "model path: " << model_graph;
-    LOG(INFO) << "model param: " << model_params;
-    LOG(INFO) << "dict path: " << dict_file;
-    LOG(INFO) << "lm path: " << lm_path;
-    LOG(INFO) << "chunk size (frame): " << chunk_size;
-
-    int32 num_done = 0, num_err = 0;
-
-    // frontend + nnet is decodable
-    ppspeech::ModelOptions model_opts;
-    model_opts.model_path = model_graph;
-    model_opts.params_path = model_params;
-    std::shared_ptr<ppspeech::PaddleNnet> nnet(
-        new ppspeech::PaddleNnet(model_opts));
-    std::shared_ptr<ppspeech::DataCache> raw_data(new ppspeech::DataCache());
-    std::shared_ptr<ppspeech::Decodable> decodable(
-        new ppspeech::Decodable(nnet, raw_data));
-    LOG(INFO) << "Init decodeable.";
-
-    // init decoder
-    ppspeech::CTCBeamSearchOptions opts;
-    opts.dict_file = dict_file;
-    opts.lm_path = lm_path;
-    ppspeech::CTCBeamSearch decoder(opts);
-    LOG(INFO) << "Init decoder.";
-
-    decoder.InitDecoder();
-    for (; !feature_reader.Done(); feature_reader.Next()) {
-        string utt = feature_reader.Key();
-        const kaldi::Matrix<BaseFloat> feature = feature_reader.Value();
-        LOG(INFO) << "utt: " << utt;
-
-        // feat dim
-        raw_data->SetDim(feature.NumCols());
-        LOG(INFO) << "dim: " << raw_data->Dim();
-
-        int32 row_idx = 0;
-        int32 num_chunks = feature.NumRows() / chunk_size;
-        LOG(INFO) << "n chunks: " << num_chunks;
-        for (int chunk_idx = 0; chunk_idx < num_chunks; ++chunk_idx) {
-            // feat chunk
-            kaldi::Vector<kaldi::BaseFloat> feature_chunk(chunk_size *
-                                                          feature.NumCols());
-            for (int row_id = 0; row_id < chunk_size; ++row_id) {
-                kaldi::SubVector<kaldi::BaseFloat> feat_one_row(feature,
-                                                                row_idx);
-                kaldi::SubVector<kaldi::BaseFloat> f_chunk_tmp(
-                    feature_chunk.Data() + row_id * feature.NumCols(),
-                    feature.NumCols());
-                f_chunk_tmp.CopyFromVec(feat_one_row);
-                row_idx++;
-            }
-            // feed to raw cache
-            raw_data->Accept(feature_chunk);
-            if (chunk_idx == num_chunks - 1) {
-                raw_data->SetFinished();
-            }
-            // decode step
-            decoder.AdvanceDecode(decodable);
-        }
-
-        std::string result;
-        result = decoder.GetFinalBestPath();
-        KALDI_LOG << " the result of " << utt << " is " << result;
-        decodable->Reset();
-        decoder.Reset();
-        ++num_done;
-    }
-
-    KALDI_LOG << "Done " << num_done << " utterances, " << num_err
-              << " with errors.";
-    return (num_done != 0 ? 0 : 1);
-}
--- a/speechx/examples/decoder/run.sh
+++ b/speechx/examples/decoder/run.sh
@ -1,43 +0,0 @@
-#!/bin/bash
-set +x
-set -e
-
-. path.sh
-
-# 1. compile
-if [ ! -d ${SPEECHX_EXAMPLES} ]; then
-    pushd ${SPEECHX_ROOT} 
-    bash build.sh
-    popd
-fi
-
-
-# 2. download model
-if [ ! -d ../paddle_asr_model ]; then
-    wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/paddle_asr_model.tar.gz
-    tar xzfv paddle_asr_model.tar.gz
-    mv ./paddle_asr_model ../
-    # produce wav scp
-    echo "utt1 " $PWD/../paddle_asr_model/BAC009S0764W0290.wav > ../paddle_asr_model/wav.scp
-fi
-
-model_dir=../paddle_asr_model
-feat_wspecifier=./feats.ark
-cmvn=./cmvn.ark
-
-
-export GLOG_logtostderr=1
-
-# 3. gen linear feat
-linear_spectrogram_main \
-    --wav_rspecifier=scp:$model_dir/wav.scp \
-    --feature_wspecifier=ark,t:$feat_wspecifier \
-    --cmvn_write_path=$cmvn
-
-# 4. run decoder
-offline_decoder_main \
-    --feature_respecifier=ark:$feat_wspecifier \
-    --model_path=$model_dir/avg_1.jit.pdmodel \
-    --param_path=$model_dir/avg_1.jit.pdparams \
-    --dict_file=$model_dir/vocab.txt \
-    --lm_path=$model_dir/avg_1.jit.klm
--- a/speechx/examples/dev/CMakeLists.txt
+++ b/speechx/examples/dev/CMakeLists.txt
@ -0,0 +1,3 @@
+cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
+
+add_subdirectory(glog)
--- a/speechx/examples/dev/glog/CMakeLists.txt
+++ b/speechx/examples/dev/glog/CMakeLists.txt
--- a/speechx/examples/dev/glog/README.md
+++ b/speechx/examples/dev/glog/README.md
--- a/speechx/examples/dev/glog/glog_logtostderr_test.cc
+++ b/speechx/examples/dev/glog/glog_logtostderr_test.cc
--- a/speechx/examples/dev/glog/glog_test.cc
+++ b/speechx/examples/dev/glog/glog_test.cc
--- a/speechx/examples/dev/glog/path.sh
+++ b/speechx/examples/dev/glog/path.sh
@ -1,14 +1,15 @@
 # This contains the locations of binarys build required for running the examples.

-SPEECHX_ROOT=$PWD/../..
-SPEECHX_EXAMPLES=$SPEECHX_ROOT/build/examples
+SPEECHX_ROOT=$PWD/../../../

 SPEECHX_TOOLS=$SPEECHX_ROOT/tools
 TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin

-[ -d $SPEECHX_EXAMPLES ] || { echo "Error: 'build/examples' directory not found. please ensure that the project build successfully"; }

-export LC_AL=C
+SPEECHX_EXAMPLES=$SPEECHX_ROOT/build/examples
+[ -d $SPEECHX_EXAMPLES ] || { echo "Error: 'build/examples' directory not found. please ensure that the project build successfully"; }

-SPEECHX_BIN=$SPEECHX_EXAMPLES/nnet
+SPEECHX_BIN=$SPEECHX_EXAMPLES/dev/glog
 export PATH=$PATH:$SPEECHX_BIN:$TOOLS_BIN
+
+export LC_AL=C
--- a/speechx/examples/dev/glog/run.sh
+++ b/speechx/examples/dev/glog/run.sh
--- a/speechx/examples/ds2_ol/CMakeLists.txt
+++ b/speechx/examples/ds2_ol/CMakeLists.txt
@ -0,0 +1,5 @@
+cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
+
+add_subdirectory(feat)
+add_subdirectory(nnet)
+add_subdirectory(decoder)
--- a/speechx/examples/ds2_ol/README.md
+++ b/speechx/examples/ds2_ol/README.md
@ -0,0 +1,11 @@
+# Deepspeech2 Streaming
+
+Please go to `aishell` to test it.
+
+* aishell
+Deepspeech2 Streaming Decoding under aishell dataset.
+
+The below is for developing and offline testing:
+* nnet
+* feat
+* decoder
--- a/speechx/examples/ds2_ol/aishell/local/split_data.sh
+++ b/speechx/examples/ds2_ol/aishell/local/split_data.sh
--- a/speechx/examples/ds2_ol/aishell/path.sh
+++ b/speechx/examples/ds2_ol/aishell/path.sh
@ -1,6 +1,6 @@
 # This contains the locations of binarys build required for running the examples.

-SPEECHX_ROOT=$PWD/../..
+SPEECHX_ROOT=$PWD/../../../
 SPEECHX_EXAMPLES=$SPEECHX_ROOT/build/examples

 SPEECHX_TOOLS=$SPEECHX_ROOT/tools
--- a/speechx/examples/ds2_ol/aishell/run.sh
+++ b/speechx/examples/ds2_ol/aishell/run.sh
@ -11,18 +11,20 @@ if [ ! -d ${SPEECHX_EXAMPLES} ]; then
    popd
 fi

-
-# 2. download model
-if [ ! -d ../paddle_asr_model ]; then
-    wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/paddle_asr_model.tar.gz
-    tar xzfv paddle_asr_model.tar.gz
-    mv ./paddle_asr_model ../
-    # produce wav scp
-    echo "utt1 " $PWD/../paddle_asr_model/BAC009S0764W0290.wav > ../paddle_asr_model/wav.scp
-fi
-
+# input
 mkdir -p data
 data=$PWD/data
+
+ckpt_dir=$data/model
+model_dir=$ckpt_dir/exp/deepspeech2_online/checkpoints/
+vocb_dir=$ckpt_dir/data/lang_char/
+
+lm=$data/zh_giga.no_cna_cmn.prune01244.klm
+
+# output
+mkdir -p exp
+exp=$PWD/exp
+
 aishell_wav_scp=aishell_test.scp
 if [ ! -d $data/test ]; then
    wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip
@ -39,9 +41,15 @@ if [ ! -d $model_dir ]; then
    tar xzfv $model_dir/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz -C $model_dir
 fi

+if [ ! -f $lm ]; then
+    pushd $data
+    wget -c https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm
+    popd
+fi
+
+
 # 3. make feature
 aishell_online_model=$model_dir/exp/deepspeech2_online/checkpoints
-lm_model_dir=../paddle_asr_model
 label_file=./aishell_result
 wer=./aishell_wer

@ -71,11 +79,11 @@ utils/run.pl JOB=1:$nj $data/split${nj}/JOB/log \
    --model_path=$aishell_online_model/avg_1.jit.pdmodel \
    --param_path=$aishell_online_model/avg_1.jit.pdiparams \
    --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
-    --dict_file=$lm_model_dir/vocab.txt \
+    --dict_file=$vocb_dir/vocab.txt \
    --result_wspecifier=ark,t:$data/split${nj}/JOB/result

 cat $data/split${nj}/*/result > ${label_file}
-local/compute-wer.py --char=1 --v=1 ${label_file} $text > ${wer}
+utils/compute-wer.py --char=1 --v=1 ${label_file} $text > ${wer}

 # 4. decode with lm
 utils/run.pl JOB=1:$nj $data/split${nj}/JOB/log_lm \
@ -84,12 +92,14 @@ utils/run.pl JOB=1:$nj $data/split${nj}/JOB/log_lm \
    --model_path=$aishell_online_model/avg_1.jit.pdmodel \
    --param_path=$aishell_online_model/avg_1.jit.pdiparams \
    --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
-    --dict_file=$lm_model_dir/vocab.txt \
-    --lm_path=$lm_model_dir/avg_1.jit.klm \
+    --dict_file=$vocb_dir/vocab.txt \
+    --lm_path=$lm \
    --result_wspecifier=ark,t:$data/split${nj}/JOB/result_lm

+
 cat $data/split${nj}/*/result_lm > ${label_file}_lm
-local/compute-wer.py --char=1 --v=1 ${label_file}_lm $text > ${wer}_lm
+utils/compute-wer.py --char=1 --v=1 ${label_file}_lm $text > ${wer}_lm
+

 graph_dir=./aishell_graph
 if [ ! -d $ ]; then
@ -97,6 +107,7 @@ if [ ! -d $ ]; then
    unzip -d aishell_graph.zip
 fi

+
 # 5. test TLG decoder
 utils/run.pl JOB=1:$nj $data/split${nj}/JOB/log_tlg \
  offline_wfst_decoder_main \
@ -109,5 +120,6 @@ utils/run.pl JOB=1:$nj $data/split${nj}/JOB/log_tlg \
    --acoustic_scale=1.2 \
    --result_wspecifier=ark,t:$data/split${nj}/JOB/result_tlg

+
 cat $data/split${nj}/*/result_tlg > ${label_file}_tlg
-local/compute-wer.py --char=1 --v=1 ${label_file}_tlg $text > ${wer}_tlg
+utils/compute-wer.py --char=1 --v=1 ${label_file}_tlg $text > ${wer}_tlg
--- a/speechx/examples/ds2_ol/aishell/utils
+++ b/speechx/examples/ds2_ol/aishell/utils
@ -0,0 +1 @@
+../../../../utils/
--- a/speechx/examples/ds2_ol/decoder/.gitignore
+++ b/speechx/examples/ds2_ol/decoder/.gitignore
@ -0,0 +1,2 @@
+data
+exp
--- a/speechx/examples/ds2_ol/decoder/CMakeLists.txt
+++ b/speechx/examples/ds2_ol/decoder/CMakeLists.txt
@ -0,0 +1,19 @@
+cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
+
+set(bin_name ctc-prefix-beam-search-decoder-ol)
+add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
+target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
+target_link_libraries(${bin_name} PUBLIC nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util ${DEPS})
+
+
+set(bin_name wfst-decoder-ol)
+add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
+target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
+target_link_libraries(${bin_name} PUBLIC nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util kaldi-decoder ${DEPS})
+
+
+set(bin_name nnet-logprob-decoder-test)
+add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
+target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
+target_link_libraries(${bin_name} PUBLIC nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util ${DEPS})
+
--- a/speechx/examples/ds2_ol/decoder/README.md
+++ b/speechx/examples/ds2_ol/decoder/README.md
@ -0,0 +1,12 @@
+# ASR Decoder
+
+ASR Decoder test bins. We using theses bins to test CTC BeamSearch decoder and WFST decoder.
+
+* decoder_test_main.cc 
+feed nnet output logprob, and only test decoder
+
+* offline_decoder_sliding_chunk_main.cc
+feed streaming audio feature, decode as streaming manner.
+
+* offline_wfst_decoder_main.cc
+feed streaming audio feature, decode using WFST as streaming manner.
--- a/speechx/examples/ds2_ol/decoder/ctc-prefix-beam-search-decoder-ol.cc
+++ b/speechx/examples/ds2_ol/decoder/ctc-prefix-beam-search-decoder-ol.cc
@ -34,10 +34,11 @@ DEFINE_int32(receptive_field_length,
 DEFINE_int32(downsampling_rate,
             4,
             "two CNN(kernel=5) module downsampling rate.");
+DEFINE_string(model_input_names,
+              "audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_box",
+              "model input names");
 DEFINE_string(model_output_names,
-              "save_infer_model/scale_0.tmp_1,save_infer_model/"
-              "scale_1.tmp_1,save_infer_model/scale_2.tmp_1,save_infer_model/"
-              "scale_3.tmp_1",
+              "softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0",
              "model output names");
 DEFINE_string(model_cache_names, "5-1-1024,5-1-1024", "model cache names");

@ -50,9 +51,13 @@ int main(int argc, char* argv[]) {
    gflags::ParseCommandLineFlags(&argc, &argv, false);
    google::InitGoogleLogging(argv[0]);

+    CHECK(FLAGS_result_wspecifier != "");
+    CHECK(FLAGS_feature_rspecifier != "");
+
    kaldi::SequentialBaseFloatMatrixReader feature_reader(
        FLAGS_feature_rspecifier);
    kaldi::TokenWriter result_writer(FLAGS_result_wspecifier);
+    
    std::string model_graph = FLAGS_model_path;
    std::string model_params = FLAGS_param_path;
    std::string dict_file = FLAGS_dict_file;
@ -73,6 +78,7 @@ int main(int argc, char* argv[]) {
    model_opts.model_path = model_graph;
    model_opts.params_path = model_params;
    model_opts.cache_shape = FLAGS_model_cache_names;
+    model_opts.input_names = FLAGS_model_input_names;
    model_opts.output_names = FLAGS_model_output_names;
    std::shared_ptr<ppspeech::PaddleNnet> nnet(
        new ppspeech::PaddleNnet(model_opts));
--- a/speechx/examples/ds2_ol/decoder/local/model.sh
+++ b/speechx/examples/ds2_ol/decoder/local/model.sh
--- a/speechx/examples/ds2_ol/decoder/nnet-logprob-decoder-test.cc
+++ b/speechx/examples/ds2_ol/decoder/nnet-logprob-decoder-test.cc
--- a/speechx/examples/ds2_ol/decoder/path.sh
+++ b/speechx/examples/ds2_ol/decoder/path.sh
@ -1,6 +1,6 @@
 # This contains the locations of binarys build required for running the examples.

-SPEECHX_ROOT=$PWD/../..
+SPEECHX_ROOT=$PWD/../../../
 SPEECHX_EXAMPLES=$SPEECHX_ROOT/build/examples

 SPEECHX_TOOLS=$SPEECHX_ROOT/tools
@ -10,5 +10,5 @@ TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin

 export LC_AL=C

-SPEECHX_BIN=$SPEECHX_EXAMPLES/decoder:$SPEECHX_EXAMPLES/feat
+SPEECHX_BIN=$SPEECHX_EXAMPLES/ds2_ol/decoder:$SPEECHX_EXAMPLES/ds2_ol/feat
 export PATH=$PATH:$SPEECHX_BIN:$TOOLS_BIN
--- a/speechx/examples/ds2_ol/decoder/run.sh
+++ b/speechx/examples/ds2_ol/decoder/run.sh
@ -0,0 +1,79 @@
+#!/bin/bash
+set +x
+set -e
+
+. path.sh
+
+# 1. compile
+if [ ! -d ${SPEECHX_EXAMPLES} ]; then
+    pushd ${SPEECHX_ROOT} 
+    bash build.sh
+    popd
+fi
+
+# input
+mkdir -p data
+data=$PWD/data
+ckpt_dir=$data/model
+model_dir=$ckpt_dir/exp/deepspeech2_online/checkpoints/
+vocb_dir=$ckpt_dir/data/lang_char/
+
+lm=$data/zh_giga.no_cna_cmn.prune01244.klm
+
+# output
+exp_dir=./exp
+mkdir -p $exp_dir
+
+# 2. download model
+if [[ ! -f data/model/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz ]]; then
+    mkdir -p data/model
+    pushd data/model
+    wget -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
+    tar xzfv asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
+    popd
+fi
+
+# produce wav scp
+if [ ! -f data/wav.scp ]; then
+    pushd data
+    wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+    echo "utt1 " $PWD/zh.wav > wav.scp
+    popd 
+fi
+
+# download lm
+if [ ! -f $lm ]; then
+    pushd data
+    wget -c https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm
+    popd
+fi
+
+
+feat_wspecifier=$exp_dir/feats.ark
+cmvn=$exp_dir/cmvn.ark
+
+export GLOG_logtostderr=1
+
+# dump json cmvn to kaldi
+cmvn-json2kaldi \
+    --json_file  $ckpt_dir/data/mean_std.json \
+    --cmvn_write_path $exp_dir/cmvn.ark \
+    --binary=false
+echo "convert json cmvn to kaldi ark."
+
+
+# generate linear feature as streaming
+linear-spectrogram-wo-db-norm-ol \
+    --wav_rspecifier=scp:$data/wav.scp \
+    --feature_wspecifier=ark,t:$feat_wspecifier \
+    --cmvn_file=$exp_dir/cmvn.ark
+echo "compute linear spectrogram feature."
+
+# run ctc beam search decoder as streaming
+ctc-prefix-beam-search-decoder-ol \
+    --result_wspecifier=ark,t:$exp_dir/result.txt \
+    --feature_rspecifier=ark:$feat_wspecifier \
+    --model_path=$model_dir/avg_1.jit.pdmodel \
+    --param_path=$model_dir/avg_1.jit.pdiparams \
+    --dict_file=$vocb_dir/vocab.txt \
+    --lm_path=$lm
--- a/speechx/examples/ds2_ol/decoder/valgrind.sh
+++ b/speechx/examples/ds2_ol/decoder/valgrind.sh
--- a/speechx/examples/decoder/offline_wfst_decoder_main.cc
+++ b/speechx/examples/decoder/offline_wfst_decoder_main.cc
@ -28,6 +28,7 @@ DEFINE_string(model_path, "avg_1.jit.pdmodel", "paddle nnet model");
 DEFINE_string(param_path, "avg_1.jit.pdiparams", "paddle nnet model param");
 DEFINE_string(word_symbol_table, "words.txt", "word symbol table");
 DEFINE_string(graph_path, "TLG", "decoder graph");
+
 DEFINE_double(acoustic_scale, 1.0, "acoustic scale");
 DEFINE_int32(max_active, 7500, "decoder graph");
 DEFINE_int32(receptive_field_length,
--- a/speechx/examples/ds2_ol/feat/.gitignore
+++ b/speechx/examples/ds2_ol/feat/.gitignore
@ -0,0 +1,2 @@
+exp
+data
--- a/speechx/examples/ds2_ol/feat/CMakeLists.txt
+++ b/speechx/examples/ds2_ol/feat/CMakeLists.txt
@ -0,0 +1,12 @@
+cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
+
+set(bin_name linear-spectrogram-wo-db-norm-ol)
+add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
+target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
+target_link_libraries(${bin_name} frontend kaldi-util kaldi-feat-common gflags glog)
+
+
+set(bin_name cmvn-json2kaldi)
+add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
+target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
+target_link_libraries(${bin_name} utils kaldi-util kaldi-matrix gflags glog)
--- a/speechx/examples/ds2_ol/feat/README.md
+++ b/speechx/examples/ds2_ol/feat/README.md
@ -0,0 +1,8 @@
+# Deepspeech2 Straming Audio Feature
+
+ASR audio feature test bins. We using theses bins to test linaer/fbank/mfcc asr feature as streaming manner.
+
+* linear_spectrogram_without_db_norm_main.cc
+
+compute linear spectrogram w/o db norm in streaming manner.
+
--- a/speechx/examples/ds2_ol/feat/cmvn-json2kaldi.cc
+++ b/speechx/examples/ds2_ol/feat/cmvn-json2kaldi.cc
@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+// Note: Do not print/log ondemand object.
+
 #include "base/flags.h"
 #include "base/log.h"
 #include "kaldi/matrix/kaldi-matrix.h"
@ -28,23 +30,30 @@ using namespace simdjson;
 int main(int argc, char* argv[]) {
    gflags::ParseCommandLineFlags(&argc, &argv, false);
    google::InitGoogleLogging(argv[0]);
+    
+    LOG(INFO) << "cmvn josn path: " << FLAGS_json_file ;
+    padded_string json = padded_string::load(FLAGS_json_file);

    ondemand::parser parser;
-    padded_string json = padded_string::load(FLAGS_json_file);
-    ondemand::document val = parser.iterate(json);
-    ondemand::object doc = val;
-    kaldi::int32 frame_num = uint64_t(doc["frame_num"]);
-    auto mean_stat = doc["mean_stat"];
+    ondemand::document doc = parser.iterate(json);
+    ondemand::value val = doc;
+
+    ondemand::array mean_stat = val["mean_stat"];
    std::vector<kaldi::BaseFloat> mean_stat_vec;
    for (double x : mean_stat) {
        mean_stat_vec.push_back(x);
    }
-    auto var_stat = doc["var_stat"];
+    // LOG(INFO) << mean_stat; this line will casue simdjson::simdjson_error("Objects and arrays can only be iterated when they are first encountered")
+
+    ondemand::array  var_stat = val["var_stat"];
    std::vector<kaldi::BaseFloat> var_stat_vec;
    for (double x : var_stat) {
        var_stat_vec.push_back(x);
    }

+    kaldi::int32 frame_num = uint64_t(val["frame_num"]);
+    LOG(INFO) << "nframe: " << frame_num;
+   
    size_t mean_size = mean_stat_vec.size();
    kaldi::Matrix<double> cmvn_stats(2, mean_size + 1);
    for (size_t idx = 0; idx < mean_size; ++idx) {
@ -52,7 +61,10 @@ int main(int argc, char* argv[]) {
        cmvn_stats(1, idx) = var_stat_vec[idx];
    }
    cmvn_stats(0, mean_size) = frame_num;
+    LOG(INFO) << cmvn_stats;
+
    kaldi::WriteKaldiObject(cmvn_stats, FLAGS_cmvn_write_path, FLAGS_binary);
-    LOG(INFO) << "the json file have write into " << FLAGS_cmvn_write_path;
+    LOG(INFO) << "cmvn stats have write into: " << FLAGS_cmvn_write_path;
+    LOG(INFO) << "Binary: " << FLAGS_binary;
    return 0;
 }
--- a/speechx/examples/ds2_ol/feat/linear-spectrogram-wo-db-norm-ol.cc
+++ b/speechx/examples/ds2_ol/feat/linear-spectrogram-wo-db-norm-ol.cc
@ -32,6 +32,7 @@ DEFINE_string(feature_wspecifier, "", "output feats wspecifier");
 DEFINE_string(cmvn_file, "./cmvn.ark", "read cmvn");
 DEFINE_double(streaming_chunk, 0.36, "streaming feature chunk size");

+
 int main(int argc, char* argv[]) {
    gflags::ParseCommandLineFlags(&argc, &argv, false);
    google::InitGoogleLogging(argv[0]);
--- a/speechx/examples/ds2_ol/feat/path.sh
+++ b/speechx/examples/ds2_ol/feat/path.sh
@ -1,6 +1,6 @@
 # This contains the locations of binarys build required for running the examples.

-SPEECHX_ROOT=$PWD/../..
+SPEECHX_ROOT=$PWD/../../../
 SPEECHX_EXAMPLES=$SPEECHX_ROOT/build/examples

 SPEECHX_TOOLS=$SPEECHX_ROOT/tools
@ -10,5 +10,5 @@ TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin

 export LC_AL=C

-SPEECHX_BIN=$SPEECHX_EXAMPLES/feat
+SPEECHX_BIN=$SPEECHX_EXAMPLES/ds2_ol/feat
 export PATH=$PATH:$SPEECHX_BIN:$TOOLS_BIN
--- a/speechx/examples/ds2_ol/feat/run.sh
+++ b/speechx/examples/ds2_ol/feat/run.sh
@ -0,0 +1,57 @@
+#!/bin/bash
+set +x
+set -e
+
+. ./path.sh
+
+# 1. compile
+if [ ! -d ${SPEECHX_EXAMPLES} ]; then
+    pushd ${SPEECHX_ROOT} 
+    bash build.sh
+    popd
+fi
+
+# 2. download model
+if [ ! -e data/model/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz ]; then
+    mkdir -p data/model
+    pushd data/model
+    wget -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
+    tar xzfv asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
+    popd
+fi
+
+# produce wav scp
+if [ ! -f data/wav.scp ]; then
+    mkdir -p data
+    pushd data
+    wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+    echo "utt1 " $PWD/zh.wav > wav.scp
+    popd 
+fi
+
+
+# input
+data_dir=./data
+exp_dir=./exp
+model_dir=$data_dir/model/
+
+mkdir -p $exp_dir
+
+
+# 3. run feat
+export GLOG_logtostderr=1
+
+cmvn-json2kaldi \
+    --json_file  $model_dir/data/mean_std.json \
+    --cmvn_write_path $exp_dir/cmvn.ark \
+    --binary=false
+echo "convert json cmvn to kaldi ark."
+
+
+linear-spectrogram-wo-db-norm-ol \
+    --wav_rspecifier=scp:$data_dir/wav.scp \
+    --feature_wspecifier=ark,t:$exp_dir/feats.ark \
+    --cmvn_file=$exp_dir/cmvn.ark
+echo "compute linear spectrogram feature."
+
+
--- a/speechx/examples/ds2_ol/feat/valgrind.sh
+++ b/speechx/examples/ds2_ol/feat/valgrind.sh
--- a/speechx/examples/ds2_ol/nnet/.gitignore
+++ b/speechx/examples/ds2_ol/nnet/.gitignore
@ -0,0 +1,2 @@
+data
+exp
--- a/speechx/examples/ds2_ol/nnet/CMakeLists.txt
+++ b/speechx/examples/ds2_ol/nnet/CMakeLists.txt
@ -0,0 +1,6 @@
+cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
+
+set(bin_name ds2-model-ol-test)
+add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
+target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
+target_link_libraries(${bin_name} PUBLIC nnet gflags glog ${DEPS})
--- a/speechx/examples/ds2_ol/nnet/README.md
+++ b/speechx/examples/ds2_ol/nnet/README.md
@ -0,0 +1,3 @@
+# Deepspeech2 Streaming NNet Test
+
+Using for ds2 streaming nnet inference test.
--- a/speechx/examples/ds2_ol/nnet/ds2-model-ol-test.cc
+++ b/speechx/examples/ds2_ol/nnet/ds2-model-ol-test.cc
@ -12,7 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include <gflags/gflags.h>
+// deepspeech2 online model info
+
+#include "base/flags.h"
+#include "base/log.h"
 #include <algorithm>
 #include <fstream>
 #include <functional>
@ -25,16 +28,19 @@
 using std::cout;
 using std::endl;

-DEFINE_string(model_path, "avg_1.jit.pdmodel", "xxx.pdmodel");
-DEFINE_string(param_path, "avg_1.jit.pdiparams", "xxx.pdiparams");
+
+DEFINE_string(model_path, "", "xxx.pdmodel");
+DEFINE_string(param_path, "", "xxx.pdiparams");
+DEFINE_int32(chunk_size, 35, "feature chunk size, unit:frame");
+DEFINE_int32(feat_dim, 161, "feature dim");


 void produce_data(std::vector<std::vector<float>>* data);
 void model_forward_test();

 void produce_data(std::vector<std::vector<float>>* data) {
-    int chunk_size = 35;  // chunk_size in frame
-    int col_size = 161;   // feat dim
+    int chunk_size = FLAGS_chunk_size;  // chunk_size in frame
+    int col_size = FLAGS_feat_dim;   // feat dim
    cout << "chunk size: " << chunk_size << endl;
    cout << "feat dim: " << col_size << endl;

@ -57,6 +63,8 @@ void model_forward_test() {
    ;
    std::string model_graph = FLAGS_model_path;
    std::string model_params = FLAGS_param_path;
+    CHECK(model_graph != "");
+    CHECK(model_params != "");
    cout << "model path: " << model_graph << endl;
    cout << "model param path : " << model_params << endl;

@ -106,7 +114,7 @@ void model_forward_test() {
    // state_h
    std::unique_ptr<paddle_infer::Tensor> chunk_state_h_box =
        predictor->GetInputHandle(input_names[2]);
-    std::vector<int> chunk_state_h_box_shape = {3, 1, 1024};
+    std::vector<int> chunk_state_h_box_shape = {5, 1, 1024};
    chunk_state_h_box->Reshape(chunk_state_h_box_shape);
    int chunk_state_h_box_size =
        std::accumulate(chunk_state_h_box_shape.begin(),
@ -119,7 +127,7 @@ void model_forward_test() {
    // state_c
    std::unique_ptr<paddle_infer::Tensor> chunk_state_c_box =
        predictor->GetInputHandle(input_names[3]);
-    std::vector<int> chunk_state_c_box_shape = {3, 1, 1024};
+    std::vector<int> chunk_state_c_box_shape = {5, 1, 1024};
    chunk_state_c_box->Reshape(chunk_state_c_box_shape);
    int chunk_state_c_box_size =
        std::accumulate(chunk_state_c_box_shape.begin(),
@ -187,7 +195,9 @@ void model_forward_test() {
 }

 int main(int argc, char* argv[]) {
-    gflags::ParseCommandLineFlags(&argc, &argv, true);
+    gflags::ParseCommandLineFlags(&argc, &argv, false);
+    google::InitGoogleLogging(argv[0]);
+    
    model_forward_test();
    return 0;
 }
--- a/speechx/examples/ds2_ol/nnet/path.sh
+++ b/speechx/examples/ds2_ol/nnet/path.sh
@ -1,6 +1,6 @@
 # This contains the locations of binarys build required for running the examples.

-SPEECHX_ROOT=$PWD/../..
+SPEECHX_ROOT=$PWD/../../../
 SPEECHX_EXAMPLES=$SPEECHX_ROOT/build/examples

 SPEECHX_TOOLS=$SPEECHX_ROOT/tools
@ -10,5 +10,5 @@ TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin

 export LC_AL=C

-SPEECHX_BIN=$SPEECHX_EXAMPLES/glog
+SPEECHX_BIN=$SPEECHX_EXAMPLES/ds2_ol/nnet
 export PATH=$PATH:$SPEECHX_BIN:$TOOLS_BIN
--- a/speechx/examples/ds2_ol/nnet/run.sh
+++ b/speechx/examples/ds2_ol/nnet/run.sh
@ -0,0 +1,38 @@
+#!/bin/bash
+set +x
+set -e
+
+. path.sh
+
+# 1. compile
+if [ ! -d ${SPEECHX_EXAMPLES} ]; then
+    pushd ${SPEECHX_ROOT} 
+    bash build.sh
+    popd
+fi
+
+# 2. download model
+if [ ! -f data/model/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz ]; then
+    mkdir -p data/model
+    pushd data/model
+    wget -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
+    tar xzfv asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
+    popd
+fi
+
+# produce wav scp
+if [ ! -f data/wav.scp ]; then
+    mkdir -p data
+    pushd data
+    wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+    echo "utt1 " $PWD/zh.wav > wav.scp
+    popd 
+fi
+
+ckpt_dir=./data/model
+model_dir=$ckpt_dir/exp/deepspeech2_online/checkpoints/
+
+ds2-model-ol-test \
+    --model_path=$model_dir/avg_1.jit.pdmodel \
+    --param_path=$model_dir/avg_1.jit.pdiparams
+
--- a/speechx/examples/ds2_ol/nnet/valgrind.sh
+++ b/speechx/examples/ds2_ol/nnet/valgrind.sh
--- a/speechx/examples/feat/CMakeLists.txt
+++ b/speechx/examples/feat/CMakeLists.txt
@ -1,18 +0,0 @@
-cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
-
-
-add_executable(mfcc-test ${CMAKE_CURRENT_SOURCE_DIR}/feature-mfcc-test.cc)
-target_include_directories(mfcc-test PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
-target_link_libraries(mfcc-test kaldi-mfcc)
-
-add_executable(linear_spectrogram_main ${CMAKE_CURRENT_SOURCE_DIR}/linear_spectrogram_main.cc)
-target_include_directories(linear_spectrogram_main PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
-target_link_libraries(linear_spectrogram_main frontend kaldi-util kaldi-feat-common gflags glog)
-
-add_executable(linear_spectrogram_without_db_norm_main ${CMAKE_CURRENT_SOURCE_DIR}/linear_spectrogram_without_db_norm_main.cc)
-target_include_directories(linear_spectrogram_without_db_norm_main PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
-target_link_libraries(linear_spectrogram_without_db_norm_main frontend kaldi-util kaldi-feat-common gflags glog)
-
-add_executable(cmvn_json2binary_main ${CMAKE_CURRENT_SOURCE_DIR}/cmvn_json2binary_main.cc)
-target_include_directories(cmvn_json2binary_main PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
-target_link_libraries(cmvn_json2binary_main utils kaldi-util kaldi-matrix gflags glog)
--- a/speechx/examples/feat/feature-mfcc-test.cc
+++ b/speechx/examples/feat/feature-mfcc-test.cc
@ -1,719 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// feat/feature-mfcc-test.cc
-
-// Copyright 2009-2011  Karel Vesely;  Petr Motlicek
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include <iostream>
-
-#include "base/kaldi-math.h"
-#include "feat/feature-mfcc.h"
-#include "feat/wave-reader.h"
-#include "matrix/kaldi-matrix-inl.h"
-
-using namespace kaldi;
-
-static void UnitTestReadWave() {
-    std::cout << "=== UnitTestReadWave() ===\n";
-
-    Vector<BaseFloat> v, v2;
-
-    std::cout << "<<<=== Reading waveform\n";
-
-    {
-        std::ifstream is("test_data/test.wav", std::ios_base::binary);
-        WaveData wave;
-        wave.Read(is);
-        const Matrix<BaseFloat> data(wave.Data());
-        KALDI_ASSERT(data.NumRows() == 1);
-        v.Resize(data.NumCols());
-        v.CopyFromVec(data.Row(0));
-    }
-
-    std::cout
-        << "<<<=== Reading Vector<BaseFloat> waveform, prepared by matlab\n";
-    std::ifstream input("test_data/test_matlab.ascii");
-    KALDI_ASSERT(input.good());
-    v2.Read(input, false);
-    input.close();
-
-    std::cout
-        << "<<<=== Comparing freshly read waveform to 'libsndfile' waveform\n";
-    KALDI_ASSERT(v.Dim() == v2.Dim());
-    for (int32 i = 0; i < v.Dim(); i++) {
-        KALDI_ASSERT(v(i) == v2(i));
-    }
-    std::cout << "<<<=== Comparing done\n";
-
-    // std::cout << "== The Waveform Samples == \n";
-    // std::cout << v;
-
-    std::cout << "Test passed :)\n\n";
-}
-
-
-/**
- */
-static void UnitTestSimple() {
-    std::cout << "=== UnitTestSimple() ===\n";
-
-    Vector<BaseFloat> v(100000);
-    Matrix<BaseFloat> m;
-
-    // init with noise
-    for (int32 i = 0; i < v.Dim(); i++) {
-        v(i) = (abs(i * 433024253) % 65535) - (65535 / 2);
-    }
-
-    std::cout << "<<<=== Just make sure it runs... Nothing is compared\n";
-    // the parametrization object
-    MfccOptions op;
-    // trying to have same opts as baseline.
-    op.frame_opts.dither = 0.0;
-    op.frame_opts.preemph_coeff = 0.0;
-    op.frame_opts.window_type = "rectangular";
-    op.frame_opts.remove_dc_offset = false;
-    op.frame_opts.round_to_power_of_two = true;
-    op.mel_opts.low_freq = 0.0;
-    op.mel_opts.htk_mode = true;
-    op.htk_compat = true;
-
-    Mfcc mfcc(op);
-    // use default parameters
-
-    // compute mfccs.
-    mfcc.Compute(v, 1.0, &m);
-
-    // possibly dump
-    //   std::cout << "== Output features == \n" << m;
-    std::cout << "Test passed :)\n\n";
-}
-
-
-static void UnitTestHTKCompare1() {
-    std::cout << "=== UnitTestHTKCompare1() ===\n";
-
-    std::ifstream is("test_data/test.wav", std::ios_base::binary);
-    WaveData wave;
-    wave.Read(is);
-    KALDI_ASSERT(wave.Data().NumRows() == 1);
-    SubVector<BaseFloat> waveform(wave.Data(), 0);
-
-    // read the HTK features
-    Matrix<BaseFloat> htk_features;
-    {
-        std::ifstream is("test_data/test.wav.fea_htk.1",
-                         std::ios::in | std::ios_base::binary);
-        bool ans = ReadHtk(is, &htk_features, 0);
-        KALDI_ASSERT(ans);
-    }
-
-    // use mfcc with default configuration...
-    MfccOptions op;
-    op.frame_opts.dither = 0.0;
-    op.frame_opts.preemph_coeff = 0.0;
-    op.frame_opts.window_type = "hamming";
-    op.frame_opts.remove_dc_offset = false;
-    op.frame_opts.round_to_power_of_two = true;
-    op.mel_opts.low_freq = 0.0;
-    op.mel_opts.htk_mode = true;
-    op.htk_compat = true;
-    op.use_energy = false;  // C0 not energy.
-
-    Mfcc mfcc(op);
-
-    // calculate kaldi features
-    Matrix<BaseFloat> kaldi_raw_features;
-    mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
-
-    DeltaFeaturesOptions delta_opts;
-    Matrix<BaseFloat> kaldi_features;
-    ComputeDeltas(delta_opts, kaldi_raw_features, &kaldi_features);
-
-    // compare the results
-    bool passed = true;
-    int32 i_old = -1;
-    KALDI_ASSERT(kaldi_features.NumRows() == htk_features.NumRows());
-    KALDI_ASSERT(kaldi_features.NumCols() == htk_features.NumCols());
-    // Ignore ends-- we make slightly different choices than
-    // HTK about how to treat the deltas at the ends.
-    for (int32 i = 10; i + 10 < kaldi_features.NumRows(); i++) {
-        for (int32 j = 0; j < kaldi_features.NumCols(); j++) {
-            BaseFloat a = kaldi_features(i, j), b = htk_features(i, j);
-            if ((std::abs(b - a)) > 1.0) {  //<< TOLERANCE TO DIFFERENCES!!!!!
-                // print the non-matching data only once per-line
-                if (i_old != i) {
-                    std::cout << "\n\n\n[HTK-row: " << i << "] "
-                              << htk_features.Row(i) << "\n";
-                    std::cout << "[Kaldi-row: " << i << "] "
-                              << kaldi_features.Row(i) << "\n\n\n";
-                    i_old = i;
-                }
-                // print indices of non-matching cells
-                std::cout << "[" << i << ", " << j << "]";
-                passed = false;
-            }
-        }
-    }
-    if (!passed) KALDI_ERR << "Test failed";
-
-    // write the htk features for later inspection
-    HtkHeader header = {
-        kaldi_features.NumRows(),
-        100000,  // 10ms
-        static_cast<int16>(sizeof(float) * kaldi_features.NumCols()),
-        021406  // MFCC_D_A_0
-    };
-    {
-        std::ofstream os("tmp.test.wav.fea_kaldi.1",
-                         std::ios::out | std::ios::binary);
-        WriteHtk(os, kaldi_features, header);
-    }
-
-    std::cout << "Test passed :)\n\n";
-
-    unlink("tmp.test.wav.fea_kaldi.1");
-}
-
-
-static void UnitTestHTKCompare2() {
-    std::cout << "=== UnitTestHTKCompare2() ===\n";
-
-    std::ifstream is("test_data/test.wav", std::ios_base::binary);
-    WaveData wave;
-    wave.Read(is);
-    KALDI_ASSERT(wave.Data().NumRows() == 1);
-    SubVector<BaseFloat> waveform(wave.Data(), 0);
-
-    // read the HTK features
-    Matrix<BaseFloat> htk_features;
-    {
-        std::ifstream is("test_data/test.wav.fea_htk.2",
-                         std::ios::in | std::ios_base::binary);
-        bool ans = ReadHtk(is, &htk_features, 0);
-        KALDI_ASSERT(ans);
-    }
-
-    // use mfcc with default configuration...
-    MfccOptions op;
-    op.frame_opts.dither = 0.0;
-    op.frame_opts.preemph_coeff = 0.0;
-    op.frame_opts.window_type = "hamming";
-    op.frame_opts.remove_dc_offset = false;
-    op.frame_opts.round_to_power_of_two = true;
-    op.mel_opts.low_freq = 0.0;
-    op.mel_opts.htk_mode = true;
-    op.htk_compat = true;
-    op.use_energy = true;  // Use energy.
-
-    Mfcc mfcc(op);
-
-    // calculate kaldi features
-    Matrix<BaseFloat> kaldi_raw_features;
-    mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
-
-    DeltaFeaturesOptions delta_opts;
-    Matrix<BaseFloat> kaldi_features;
-    ComputeDeltas(delta_opts, kaldi_raw_features, &kaldi_features);
-
-    // compare the results
-    bool passed = true;
-    int32 i_old = -1;
-    KALDI_ASSERT(kaldi_features.NumRows() == htk_features.NumRows());
-    KALDI_ASSERT(kaldi_features.NumCols() == htk_features.NumCols());
-    // Ignore ends-- we make slightly different choices than
-    // HTK about how to treat the deltas at the ends.
-    for (int32 i = 10; i + 10 < kaldi_features.NumRows(); i++) {
-        for (int32 j = 0; j < kaldi_features.NumCols(); j++) {
-            BaseFloat a = kaldi_features(i, j), b = htk_features(i, j);
-            if ((std::abs(b - a)) > 1.0) {  //<< TOLERANCE TO DIFFERENCES!!!!!
-                // print the non-matching data only once per-line
-                if (i_old != i) {
-                    std::cout << "\n\n\n[HTK-row: " << i << "] "
-                              << htk_features.Row(i) << "\n";
-                    std::cout << "[Kaldi-row: " << i << "] "
-                              << kaldi_features.Row(i) << "\n\n\n";
-                    i_old = i;
-                }
-                // print indices of non-matching cells
-                std::cout << "[" << i << ", " << j << "]";
-                passed = false;
-            }
-        }
-    }
-    if (!passed) KALDI_ERR << "Test failed";
-
-    // write the htk features for later inspection
-    HtkHeader header = {
-        kaldi_features.NumRows(),
-        100000,  // 10ms
-        static_cast<int16>(sizeof(float) * kaldi_features.NumCols()),
-        021406  // MFCC_D_A_0
-    };
-    {
-        std::ofstream os("tmp.test.wav.fea_kaldi.2",
-                         std::ios::out | std::ios::binary);
-        WriteHtk(os, kaldi_features, header);
-    }
-
-    std::cout << "Test passed :)\n\n";
-
-    unlink("tmp.test.wav.fea_kaldi.2");
-}
-
-
-static void UnitTestHTKCompare3() {
-    std::cout << "=== UnitTestHTKCompare3() ===\n";
-
-    std::ifstream is("test_data/test.wav", std::ios_base::binary);
-    WaveData wave;
-    wave.Read(is);
-    KALDI_ASSERT(wave.Data().NumRows() == 1);
-    SubVector<BaseFloat> waveform(wave.Data(), 0);
-
-    // read the HTK features
-    Matrix<BaseFloat> htk_features;
-    {
-        std::ifstream is("test_data/test.wav.fea_htk.3",
-                         std::ios::in | std::ios_base::binary);
-        bool ans = ReadHtk(is, &htk_features, 0);
-        KALDI_ASSERT(ans);
-    }
-
-    // use mfcc with default configuration...
-    MfccOptions op;
-    op.frame_opts.dither = 0.0;
-    op.frame_opts.preemph_coeff = 0.0;
-    op.frame_opts.window_type = "hamming";
-    op.frame_opts.remove_dc_offset = false;
-    op.frame_opts.round_to_power_of_two = true;
-    op.htk_compat = true;
-    op.use_energy = true;  // Use energy.
-    op.mel_opts.low_freq = 20.0;
-    // op.mel_opts.debug_mel = true;
-    op.mel_opts.htk_mode = true;
-
-    Mfcc mfcc(op);
-
-    // calculate kaldi features
-    Matrix<BaseFloat> kaldi_raw_features;
-    mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
-
-    DeltaFeaturesOptions delta_opts;
-    Matrix<BaseFloat> kaldi_features;
-    ComputeDeltas(delta_opts, kaldi_raw_features, &kaldi_features);
-
-    // compare the results
-    bool passed = true;
-    int32 i_old = -1;
-    KALDI_ASSERT(kaldi_features.NumRows() == htk_features.NumRows());
-    KALDI_ASSERT(kaldi_features.NumCols() == htk_features.NumCols());
-    // Ignore ends-- we make slightly different choices than
-    // HTK about how to treat the deltas at the ends.
-    for (int32 i = 10; i + 10 < kaldi_features.NumRows(); i++) {
-        for (int32 j = 0; j < kaldi_features.NumCols(); j++) {
-            BaseFloat a = kaldi_features(i, j), b = htk_features(i, j);
-            if ((std::abs(b - a)) > 1.0) {  //<< TOLERANCE TO DIFFERENCES!!!!!
-                // print the non-matching data only once per-line
-                if (static_cast<int32>(i_old) != i) {
-                    std::cout << "\n\n\n[HTK-row: " << i << "] "
-                              << htk_features.Row(i) << "\n";
-                    std::cout << "[Kaldi-row: " << i << "] "
-                              << kaldi_features.Row(i) << "\n\n\n";
-                    i_old = i;
-                }
-                // print indices of non-matching cells
-                std::cout << "[" << i << ", " << j << "]";
-                passed = false;
-            }
-        }
-    }
-    if (!passed) KALDI_ERR << "Test failed";
-
-    // write the htk features for later inspection
-    HtkHeader header = {
-        kaldi_features.NumRows(),
-        100000,  // 10ms
-        static_cast<int16>(sizeof(float) * kaldi_features.NumCols()),
-        021406  // MFCC_D_A_0
-    };
-    {
-        std::ofstream os("tmp.test.wav.fea_kaldi.3",
-                         std::ios::out | std::ios::binary);
-        WriteHtk(os, kaldi_features, header);
-    }
-
-    std::cout << "Test passed :)\n\n";
-
-    unlink("tmp.test.wav.fea_kaldi.3");
-}
-
-
-static void UnitTestHTKCompare4() {
-    std::cout << "=== UnitTestHTKCompare4() ===\n";
-
-    std::ifstream is("test_data/test.wav", std::ios_base::binary);
-    WaveData wave;
-    wave.Read(is);
-    KALDI_ASSERT(wave.Data().NumRows() == 1);
-    SubVector<BaseFloat> waveform(wave.Data(), 0);
-
-    // read the HTK features
-    Matrix<BaseFloat> htk_features;
-    {
-        std::ifstream is("test_data/test.wav.fea_htk.4",
-                         std::ios::in | std::ios_base::binary);
-        bool ans = ReadHtk(is, &htk_features, 0);
-        KALDI_ASSERT(ans);
-    }
-
-    // use mfcc with default configuration...
-    MfccOptions op;
-    op.frame_opts.dither = 0.0;
-    op.frame_opts.window_type = "hamming";
-    op.frame_opts.remove_dc_offset = false;
-    op.frame_opts.round_to_power_of_two = true;
-    op.mel_opts.low_freq = 0.0;
-    op.htk_compat = true;
-    op.use_energy = true;  // Use energy.
-    op.mel_opts.htk_mode = true;
-
-    Mfcc mfcc(op);
-
-    // calculate kaldi features
-    Matrix<BaseFloat> kaldi_raw_features;
-    mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
-
-    DeltaFeaturesOptions delta_opts;
-    Matrix<BaseFloat> kaldi_features;
-    ComputeDeltas(delta_opts, kaldi_raw_features, &kaldi_features);
-
-    // compare the results
-    bool passed = true;
-    int32 i_old = -1;
-    KALDI_ASSERT(kaldi_features.NumRows() == htk_features.NumRows());
-    KALDI_ASSERT(kaldi_features.NumCols() == htk_features.NumCols());
-    // Ignore ends-- we make slightly different choices than
-    // HTK about how to treat the deltas at the ends.
-    for (int32 i = 10; i + 10 < kaldi_features.NumRows(); i++) {
-        for (int32 j = 0; j < kaldi_features.NumCols(); j++) {
-            BaseFloat a = kaldi_features(i, j), b = htk_features(i, j);
-            if ((std::abs(b - a)) > 1.0) {  //<< TOLERANCE TO DIFFERENCES!!!!!
-                // print the non-matching data only once per-line
-                if (static_cast<int32>(i_old) != i) {
-                    std::cout << "\n\n\n[HTK-row: " << i << "] "
-                              << htk_features.Row(i) << "\n";
-                    std::cout << "[Kaldi-row: " << i << "] "
-                              << kaldi_features.Row(i) << "\n\n\n";
-                    i_old = i;
-                }
-                // print indices of non-matching cells
-                std::cout << "[" << i << ", " << j << "]";
-                passed = false;
-            }
-        }
-    }
-    if (!passed) KALDI_ERR << "Test failed";
-
-    // write the htk features for later inspection
-    HtkHeader header = {
-        kaldi_features.NumRows(),
-        100000,  // 10ms
-        static_cast<int16>(sizeof(float) * kaldi_features.NumCols()),
-        021406  // MFCC_D_A_0
-    };
-    {
-        std::ofstream os("tmp.test.wav.fea_kaldi.4",
-                         std::ios::out | std::ios::binary);
-        WriteHtk(os, kaldi_features, header);
-    }
-
-    std::cout << "Test passed :)\n\n";
-
-    unlink("tmp.test.wav.fea_kaldi.4");
-}
-
-
-static void UnitTestHTKCompare5() {
-    std::cout << "=== UnitTestHTKCompare5() ===\n";
-
-    std::ifstream is("test_data/test.wav", std::ios_base::binary);
-    WaveData wave;
-    wave.Read(is);
-    KALDI_ASSERT(wave.Data().NumRows() == 1);
-    SubVector<BaseFloat> waveform(wave.Data(), 0);
-
-    // read the HTK features
-    Matrix<BaseFloat> htk_features;
-    {
-        std::ifstream is("test_data/test.wav.fea_htk.5",
-                         std::ios::in | std::ios_base::binary);
-        bool ans = ReadHtk(is, &htk_features, 0);
-        KALDI_ASSERT(ans);
-    }
-
-    // use mfcc with default configuration...
-    MfccOptions op;
-    op.frame_opts.dither = 0.0;
-    op.frame_opts.window_type = "hamming";
-    op.frame_opts.remove_dc_offset = false;
-    op.frame_opts.round_to_power_of_two = true;
-    op.htk_compat = true;
-    op.use_energy = true;  // Use energy.
-    op.mel_opts.low_freq = 0.0;
-    op.mel_opts.vtln_low = 100.0;
-    op.mel_opts.vtln_high = 7500.0;
-    op.mel_opts.htk_mode = true;
-
-    BaseFloat vtln_warp =
-        1.1;  // our approach identical to htk for warp factor >1,
-    // differs slightly for higher mel bins if warp_factor <0.9
-
-    Mfcc mfcc(op);
-
-    // calculate kaldi features
-    Matrix<BaseFloat> kaldi_raw_features;
-    mfcc.Compute(waveform, vtln_warp, &kaldi_raw_features);
-
-    DeltaFeaturesOptions delta_opts;
-    Matrix<BaseFloat> kaldi_features;
-    ComputeDeltas(delta_opts, kaldi_raw_features, &kaldi_features);
-
-    // compare the results
-    bool passed = true;
-    int32 i_old = -1;
-    KALDI_ASSERT(kaldi_features.NumRows() == htk_features.NumRows());
-    KALDI_ASSERT(kaldi_features.NumCols() == htk_features.NumCols());
-    // Ignore ends-- we make slightly different choices than
-    // HTK about how to treat the deltas at the ends.
-    for (int32 i = 10; i + 10 < kaldi_features.NumRows(); i++) {
-        for (int32 j = 0; j < kaldi_features.NumCols(); j++) {
-            BaseFloat a = kaldi_features(i, j), b = htk_features(i, j);
-            if ((std::abs(b - a)) > 1.0) {  //<< TOLERANCE TO DIFFERENCES!!!!!
-                // print the non-matching data only once per-line
-                if (static_cast<int32>(i_old) != i) {
-                    std::cout << "\n\n\n[HTK-row: " << i << "] "
-                              << htk_features.Row(i) << "\n";
-                    std::cout << "[Kaldi-row: " << i << "] "
-                              << kaldi_features.Row(i) << "\n\n\n";
-                    i_old = i;
-                }
-                // print indices of non-matching cells
-                std::cout << "[" << i << ", " << j << "]";
-                passed = false;
-            }
-        }
-    }
-    if (!passed) KALDI_ERR << "Test failed";
-
-    // write the htk features for later inspection
-    HtkHeader header = {
-        kaldi_features.NumRows(),
-        100000,  // 10ms
-        static_cast<int16>(sizeof(float) * kaldi_features.NumCols()),
-        021406  // MFCC_D_A_0
-    };
-    {
-        std::ofstream os("tmp.test.wav.fea_kaldi.5",
-                         std::ios::out | std::ios::binary);
-        WriteHtk(os, kaldi_features, header);
-    }
-
-    std::cout << "Test passed :)\n\n";
-
-    unlink("tmp.test.wav.fea_kaldi.5");
-}
-
-static void UnitTestHTKCompare6() {
-    std::cout << "=== UnitTestHTKCompare6() ===\n";
-
-
-    std::ifstream is("test_data/test.wav", std::ios_base::binary);
-    WaveData wave;
-    wave.Read(is);
-    KALDI_ASSERT(wave.Data().NumRows() == 1);
-    SubVector<BaseFloat> waveform(wave.Data(), 0);
-
-    // read the HTK features
-    Matrix<BaseFloat> htk_features;
-    {
-        std::ifstream is("test_data/test.wav.fea_htk.6",
-                         std::ios::in | std::ios_base::binary);
-        bool ans = ReadHtk(is, &htk_features, 0);
-        KALDI_ASSERT(ans);
-    }
-
-    // use mfcc with default configuration...
-    MfccOptions op;
-    op.frame_opts.dither = 0.0;
-    op.frame_opts.preemph_coeff = 0.97;
-    op.frame_opts.window_type = "hamming";
-    op.frame_opts.remove_dc_offset = false;
-    op.frame_opts.round_to_power_of_two = true;
-    op.mel_opts.num_bins = 24;
-    op.mel_opts.low_freq = 125.0;
-    op.mel_opts.high_freq = 7800.0;
-    op.htk_compat = true;
-    op.use_energy = false;  // C0 not energy.
-
-    Mfcc mfcc(op);
-
-    // calculate kaldi features
-    Matrix<BaseFloat> kaldi_raw_features;
-    mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
-
-    DeltaFeaturesOptions delta_opts;
-    Matrix<BaseFloat> kaldi_features;
-    ComputeDeltas(delta_opts, kaldi_raw_features, &kaldi_features);
-
-    // compare the results
-    bool passed = true;
-    int32 i_old = -1;
-    KALDI_ASSERT(kaldi_features.NumRows() == htk_features.NumRows());
-    KALDI_ASSERT(kaldi_features.NumCols() == htk_features.NumCols());
-    // Ignore ends-- we make slightly different choices than
-    // HTK about how to treat the deltas at the ends.
-    for (int32 i = 10; i + 10 < kaldi_features.NumRows(); i++) {
-        for (int32 j = 0; j < kaldi_features.NumCols(); j++) {
-            BaseFloat a = kaldi_features(i, j), b = htk_features(i, j);
-            if ((std::abs(b - a)) > 1.0) {  //<< TOLERANCE TO DIFFERENCES!!!!!
-                // print the non-matching data only once per-line
-                if (static_cast<int32>(i_old) != i) {
-                    std::cout << "\n\n\n[HTK-row: " << i << "] "
-                              << htk_features.Row(i) << "\n";
-                    std::cout << "[Kaldi-row: " << i << "] "
-                              << kaldi_features.Row(i) << "\n\n\n";
-                    i_old = i;
-                }
-                // print indices of non-matching cells
-                std::cout << "[" << i << ", " << j << "]";
-                passed = false;
-            }
-        }
-    }
-    if (!passed) KALDI_ERR << "Test failed";
-
-    // write the htk features for later inspection
-    HtkHeader header = {
-        kaldi_features.NumRows(),
-        100000,  // 10ms
-        static_cast<int16>(sizeof(float) * kaldi_features.NumCols()),
-        021406  // MFCC_D_A_0
-    };
-    {
-        std::ofstream os("tmp.test.wav.fea_kaldi.6",
-                         std::ios::out | std::ios::binary);
-        WriteHtk(os, kaldi_features, header);
-    }
-
-    std::cout << "Test passed :)\n\n";
-
-    unlink("tmp.test.wav.fea_kaldi.6");
-}
-
-void UnitTestVtln() {
-    // Test the function VtlnWarpFreq.
-    BaseFloat low_freq = 10, high_freq = 7800, vtln_low_cutoff = 20,
-              vtln_high_cutoff = 7400;
-
-    for (size_t i = 0; i < 100; i++) {
-        BaseFloat freq = 5000, warp_factor = 0.9 + RandUniform() * 0.2;
-        AssertEqual(MelBanks::VtlnWarpFreq(vtln_low_cutoff,
-                                           vtln_high_cutoff,
-                                           low_freq,
-                                           high_freq,
-                                           warp_factor,
-                                           freq),
-                    freq / warp_factor);
-
-        AssertEqual(MelBanks::VtlnWarpFreq(vtln_low_cutoff,
-                                           vtln_high_cutoff,
-                                           low_freq,
-                                           high_freq,
-                                           warp_factor,
-                                           low_freq),
-                    low_freq);
-        AssertEqual(MelBanks::VtlnWarpFreq(vtln_low_cutoff,
-                                           vtln_high_cutoff,
-                                           low_freq,
-                                           high_freq,
-                                           warp_factor,
-                                           high_freq),
-                    high_freq);
-        BaseFloat freq2 = low_freq + (high_freq - low_freq) * RandUniform(),
-                  freq3 = freq2 +
-                          (high_freq - freq2) * RandUniform();  // freq3>=freq2
-        BaseFloat w2 = MelBanks::VtlnWarpFreq(vtln_low_cutoff,
-                                              vtln_high_cutoff,
-                                              low_freq,
-                                              high_freq,
-                                              warp_factor,
-                                              freq2);
-        BaseFloat w3 = MelBanks::VtlnWarpFreq(vtln_low_cutoff,
-                                              vtln_high_cutoff,
-                                              low_freq,
-                                              high_freq,
-                                              warp_factor,
-                                              freq3);
-        KALDI_ASSERT(w3 >= w2);  // increasing function.
-        BaseFloat w3dash = MelBanks::VtlnWarpFreq(
-            vtln_low_cutoff, vtln_high_cutoff, low_freq, high_freq, 1.0, freq3);
-        AssertEqual(w3dash, freq3);
-    }
-}
-
-static void UnitTestFeat() {
-    UnitTestVtln();
-    UnitTestReadWave();
-    UnitTestSimple();
-    UnitTestHTKCompare1();
-    UnitTestHTKCompare2();
-    // commenting out this one as it doesn't compare right now I normalized
-    // the way the FFT bins are treated (removed offset of 0.5)... this seems
-    // to relate to the way frequency zero behaves.
-    UnitTestHTKCompare3();
-    UnitTestHTKCompare4();
-    UnitTestHTKCompare5();
-    UnitTestHTKCompare6();
-    std::cout << "Tests succeeded.\n";
-}
-
-
-int main() {
-    try {
-        for (int i = 0; i < 5; i++) UnitTestFeat();
-        std::cout << "Tests succeeded.\n";
-        return 0;
-    } catch (const std::exception &e) {
-        std::cerr << e.what();
-        return 1;
-    }
-}
--- a/speechx/examples/feat/linear_spectrogram_main.cc
+++ b/speechx/examples/feat/linear_spectrogram_main.cc
@ -1,270 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// todo refactor, repalce with gtest
-
-#include "base/flags.h"
-#include "base/log.h"
-#include "kaldi/feat/wave-reader.h"
-#include "kaldi/util/kaldi-io.h"
-#include "kaldi/util/table-types.h"
-
-#include "frontend/audio/audio_cache.h"
-#include "frontend/audio/data_cache.h"
-#include "frontend/audio/feature_cache.h"
-#include "frontend/audio/frontend_itf.h"
-#include "frontend/audio/linear_spectrogram.h"
-#include "frontend/audio/normalizer.h"
-
-DEFINE_string(wav_rspecifier, "", "test wav scp path");
-DEFINE_string(feature_wspecifier, "", "output feats wspecifier");
-DEFINE_string(cmvn_write_path, "./cmvn.ark", "write cmvn");
-DEFINE_double(streaming_chunk, 0.36, "streaming feature chunk size");
-
-
-std::vector<float> mean_{
-    -13730251.531853663, -12982852.199316509, -13673844.299583456,
-    -13089406.559646806, -12673095.524938712, -12823859.223276224,
-    -13590267.158903603, -14257618.467152044, -14374605.116185192,
-    -14490009.21822485,  -14849827.158924166, -15354435.470563512,
-    -15834149.206532761, -16172971.985514281, -16348740.496746974,
-    -16423536.699409386, -16556246.263649225, -16744088.772748645,
-    -16916184.08510357,  -17054034.840031497, -17165612.509455364,
-    -17255955.470915023, -17322572.527648456, -17408943.862033736,
-    -17521554.799865916, -17620623.254924215, -17699792.395918526,
-    -17723364.411134344, -17741483.4433254,   -17747426.888704527,
-    -17733315.928209435, -17748780.160905756, -17808336.883775543,
-    -17895918.671983004, -18009812.59173023,  -18098188.66548325,
-    -18195798.958462656, -18293617.62980999,  -18397432.92077201,
-    -18505834.787318766, -18585451.8100908,   -18652438.235649142,
-    -18700960.306275308, -18734944.58792185,  -18737426.313365128,
-    -18735347.165987637, -18738813.444170244, -18737086.848890636,
-    -18731576.2474336,   -18717405.44095871,  -18703089.25545657,
-    -18691014.546456724, -18692460.568905357, -18702119.628629155,
-    -18727710.621126678, -18761582.72034647,  -18806745.835547544,
-    -18850674.8692112,   -18884431.510951452, -18919999.992506847,
-    -18939303.799078144, -18952946.273760635, -18980289.22996379,
-    -19011610.17803294,  -19040948.61805145,  -19061021.429847397,
-    -19112055.53768819,  -19149667.414264943, -19201127.05091321,
-    -19270250.82564605,  -19334606.883057203, -19390513.336589377,
-    -19444176.259208687, -19502755.000038862, -19544333.014549147,
-    -19612668.183176614, -19681902.19006569,  -19771969.951249883,
-    -19873329.723376893, -19996752.59235844,  -20110031.131400537,
-    -20231658.612529557, -20319378.894054495, -20378534.45718066,
-    -20413332.089584175, -20438147.844177883, -20443710.248040095,
-    -20465457.02238927,  -20488610.969337028, -20516295.16424432,
-    -20541423.795738827, -20553192.874953747, -20573605.50701977,
-    -20577871.61936797,  -20571807.008916274, -20556242.38912231,
-    -20542199.30819195,  -20521239.063551214, -20519150.80004532,
-    -20527204.80248933,  -20536933.769257784, -20543470.522332076,
-    -20549700.089992985, -20551525.24958494,  -20554873.406493705,
-    -20564277.65794227,  -20572211.740052115, -20574305.69550465,
-    -20575494.450104576, -20567092.577932164, -20549302.929608088,
-    -20545445.11878376,  -20546625.326603737, -20549190.03499401,
-    -20554824.947828256, -20568341.378989458, -20577582.331383612,
-    -20577980.519402675, -20566603.03458152,  -20560131.592262644,
-    -20552166.469060015, -20549063.06763577,  -20544490.562339947,
-    -20539817.82346569,  -20528747.715731595, -20518026.24576161,
-    -20510977.844974525, -20506874.36087992,  -20506731.11977665,
-    -20510482.133420516, -20507760.92101862,  -20494644.834457114,
-    -20480107.89304893,  -20461312.091867123, -20442941.75080173,
-    -20426123.02834838,  -20424607.675283,    -20426810.369107097,
-    -20434024.50097819,  -20437404.75544205,  -20447688.63916367,
-    -20460893.335563846, -20482922.735127095, -20503610.119434915,
-    -20527062.76448319,  -20557830.035128627, -20593274.72068722,
-    -20632528.452965066, -20673637.471334763, -20733106.97143075,
-    -20842921.0447562,   -21054357.83621519,  -21416569.534189366,
-    -21978460.272811692, -22753170.052172784, -23671344.10563395,
-    -24613499.293358143, -25406477.12230188,  -25884377.82156489,
-    -26049040.62791664,  -26996879.104431007};
-std::vector<float> variance_{
-    213747175.10846674, 188395815.34302503, 212706429.10966414,
-    199109025.81461075, 189235901.23864496, 194901336.53253657,
-    217481594.29306737, 238689869.12327808, 243977501.24115244,
-    248479623.6431067,  259766741.47116545, 275516766.7790273,
-    291271202.3691234,  302693239.8220509,  308627358.3997694,
-    311143911.38788426, 315446105.07731867, 321705430.9341829,
-    327458907.4659941,  332245072.43223983, 336251717.5935284,
-    339694069.7639722,  342188204.4322228,  345587110.31313115,
-    349903086.2875232,  353660214.20643026, 356700344.5270885,
-    357665362.3529641,  358493352.05658793, 358857951.620328,
-    358375239.52774596, 358899733.6342954,  361051818.3511561,
-    364361716.05025816, 368750322.3771452,  372047800.6462831,
-    375655861.1349018,  379358519.1980013,  383327605.3935181,
-    387458599.282341,   390434692.3406868,  392994486.35057056,
-    394874418.04603153, 396230525.79763395, 396365592.0414835,
-    396334819.8242737,  396488353.19250053, 396438877.00744957,
-    396197980.4459586,  395590921.6672991,  395001107.62072515,
-    394528291.7318225,  394593110.424006,   395018405.59353715,
-    396110577.5415993,  397506704.0371068,  399400197.4657644,
-    401243568.2468382,  402687134.7805103,  404136047.2872507,
-    404883170.001883,   405522253.219517,   406660365.3626476,
-    407919346.0991902,  409045348.5384909,  409759588.7889818,
-    411974821.8564483,  413489718.78201455, 415535392.56684107,
-    418466481.97674364, 421104678.35678065, 423405392.5200779,
-    425550570.40798235, 427929423.9579701,  429585274.253478,
-    432368493.55181056, 435193587.13513297, 438886855.20476013,
-    443058876.8633751,  448181232.5093362,  452883835.6332396,
-    458056721.77926534, 461816531.22735566, 464363620.1970998,
-    465886343.5057493,  466928872.0651,     467180536.42647296,
-    468111848.70714295, 469138695.3071312,  470378429.6930793,
-    471517958.7132626,  472109050.4262365,  473087417.0177867,
-    473381322.04648733, 473220195.85483915, 472666071.8998819,
-    472124669.87879956, 471298571.411737,   471251033.2902761,
-    471672676.43128747, 472177147.2193172,  472572361.7711908,
-    472968783.7751127,  473156295.4164052,  473398034.82676554,
-    473897703.5203811,  474328271.33112127, 474452670.98002136,
-    474549003.99284613, 474252887.13567275, 473557462.909069,
-    473483385.85193115, 473609738.04855174, 473746944.82085115,
-    474016729.91696435, 474617321.94138587, 475045097.237122,
-    475125402.586558,   474664112.9824912,  474426247.5800283,
-    474104075.42796475, 473978219.7273978,  473773171.7798875,
-    473578534.69508696, 473102924.16904145, 472651240.5232615,
-    472374383.1810912,  472209479.6956096,  472202298.8921673,
-    472370090.76781124, 472220933.99374026, 471625467.37106377,
-    470994646.51883453, 470182428.9637543,  469348211.5939578,
-    468570387.4467277,  468540442.7225135,  468672018.90414184,
-    468994346.9533251,  469138757.58201426, 469553915.95710236,
-    470134523.38582784, 471082421.62055486, 471962316.51804745,
-    472939745.1708408,  474250621.5944825,  475773933.43199486,
-    477465399.71087736, 479218782.61382693, 481752299.7930922,
-    486608947.8984568,  496119403.2067917,  512730085.5704984,
-    539048915.2641417,  576285298.3548826,  621610270.2240586,
-    669308196.4436442,  710656993.5957186,  736344437.3725077,
-    745481288.0241544,  801121432.9925804};
-int count_ = 912592;
-
-void WriteMatrix() {
-    kaldi::Matrix<double> cmvn_stats(2, mean_.size() + 1);
-    for (size_t idx = 0; idx < mean_.size(); ++idx) {
-        cmvn_stats(0, idx) = mean_[idx];
-        cmvn_stats(1, idx) = variance_[idx];
-    }
-    cmvn_stats(0, mean_.size()) = count_;
-    kaldi::WriteKaldiObject(cmvn_stats, FLAGS_cmvn_write_path, false);
-}
-
-int main(int argc, char* argv[]) {
-    gflags::ParseCommandLineFlags(&argc, &argv, false);
-    google::InitGoogleLogging(argv[0]);
-
-    kaldi::SequentialTableReader<kaldi::WaveHolder> wav_reader(
-        FLAGS_wav_rspecifier);
-    kaldi::BaseFloatMatrixWriter feat_writer(FLAGS_feature_wspecifier);
-    WriteMatrix();
-
-
-    int32 num_done = 0, num_err = 0;
-
-    // feature pipeline: wave cache --> decibel_normalizer --> hanning
-    // window -->linear_spectrogram --> global cmvn -> feat cache
-
-    // std::unique_ptr<ppspeech::FrontendInterface> data_source(new
-    // ppspeech::DataCache());
-    std::unique_ptr<ppspeech::FrontendInterface> data_source(
-        new ppspeech::AudioCache());
-
-    ppspeech::DecibelNormalizerOptions db_norm_opt;
-    std::unique_ptr<ppspeech::FrontendInterface> db_norm(
-        new ppspeech::DecibelNormalizer(db_norm_opt, std::move(data_source)));
-
-    ppspeech::LinearSpectrogramOptions opt;
-    opt.frame_opts.frame_length_ms = 20;
-    opt.frame_opts.frame_shift_ms = 10;
-    opt.streaming_chunk = FLAGS_streaming_chunk;
-    opt.frame_opts.dither = 0.0;
-    opt.frame_opts.remove_dc_offset = false;
-    opt.frame_opts.window_type = "hanning";
-    opt.frame_opts.preemph_coeff = 0.0;
-    LOG(INFO) << "frame length (ms): " << opt.frame_opts.frame_length_ms;
-    LOG(INFO) << "frame shift (ms): " << opt.frame_opts.frame_shift_ms;
-
-    std::unique_ptr<ppspeech::FrontendInterface> linear_spectrogram(
-        new ppspeech::LinearSpectrogram(opt, std::move(db_norm)));
-
-    std::unique_ptr<ppspeech::FrontendInterface> cmvn(new ppspeech::CMVN(
-        FLAGS_cmvn_write_path, std::move(linear_spectrogram)));
-
-    ppspeech::FeatureCache feature_cache(kint16max, std::move(cmvn));
-    LOG(INFO) << "feat dim: " << feature_cache.Dim();
-
-    int sample_rate = 16000;
-    float streaming_chunk = FLAGS_streaming_chunk;
-    int chunk_sample_size = streaming_chunk * sample_rate;
-    LOG(INFO) << "sr: " << sample_rate;
-    LOG(INFO) << "chunk size (s): " << streaming_chunk;
-    LOG(INFO) << "chunk size (sample): " << chunk_sample_size;
-
-
-    for (; !wav_reader.Done(); wav_reader.Next()) {
-        std::string utt = wav_reader.Key();
-        const kaldi::WaveData& wave_data = wav_reader.Value();
-        LOG(INFO) << "process utt: " << utt;
-
-        int32 this_channel = 0;
-        kaldi::SubVector<kaldi::BaseFloat> waveform(wave_data.Data(),
-                                                    this_channel);
-        int tot_samples = waveform.Dim();
-        LOG(INFO) << "wav len (sample): " << tot_samples;
-
-        int sample_offset = 0;
-        std::vector<kaldi::Vector<BaseFloat>> feats;
-        int feature_rows = 0;
-        while (sample_offset < tot_samples) {
-            int cur_chunk_size =
-                std::min(chunk_sample_size, tot_samples - sample_offset);
-
-            kaldi::Vector<kaldi::BaseFloat> wav_chunk(cur_chunk_size);
-            for (int i = 0; i < cur_chunk_size; ++i) {
-                wav_chunk(i) = waveform(sample_offset + i);
-            }
-
-            kaldi::Vector<BaseFloat> features;
-            feature_cache.Accept(wav_chunk);
-            if (cur_chunk_size < chunk_sample_size) {
-                feature_cache.SetFinished();
-            }
-            feature_cache.Read(&features);
-            if (features.Dim() == 0) break;
-
-            feats.push_back(features);
-            sample_offset += cur_chunk_size;
-            feature_rows += features.Dim() / feature_cache.Dim();
-        }
-
-        int cur_idx = 0;
-        kaldi::Matrix<kaldi::BaseFloat> features(feature_rows,
-                                                 feature_cache.Dim());
-        for (auto feat : feats) {
-            int num_rows = feat.Dim() / feature_cache.Dim();
-            for (int row_idx = 0; row_idx < num_rows; ++row_idx) {
-                for (size_t col_idx = 0; col_idx < feature_cache.Dim();
-                     ++col_idx) {
-                    features(cur_idx, col_idx) =
-                        feat(row_idx * feature_cache.Dim() + col_idx);
-                }
-                ++cur_idx;
-            }
-        }
-        feat_writer.Write(utt, features);
-        feature_cache.Reset();
-
-        if (num_done % 50 == 0 && num_done != 0)
-            KALDI_VLOG(2) << "Processed " << num_done << " utterances";
-        num_done++;
-    }
-    KALDI_LOG << "Done " << num_done << " utterances, " << num_err
-              << " with errors.";
-    return (num_done != 0 ? 0 : 1);
-}
--- a/speechx/examples/feat/run.sh
+++ b/speechx/examples/feat/run.sh
@ -1,32 +0,0 @@
-#!/bin/bash
-set +x
-set -e
-
-. ./path.sh
-
-# 1. compile
-if [ ! -d ${SPEECHX_EXAMPLES} ]; then
-    pushd ${SPEECHX_ROOT} 
-    bash build.sh
-    popd
-fi
-
-# 2. download model
-if [ ! -d ../paddle_asr_model ]; then
-    wget https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/paddle_asr_model.tar.gz
-    tar xzfv paddle_asr_model.tar.gz
-    mv ./paddle_asr_model ../
-    # produce wav scp
-    echo "utt1 " $PWD/../paddle_asr_model/BAC009S0764W0290.wav > ../paddle_asr_model/wav.scp
-fi
-
-model_dir=../paddle_asr_model
-feat_wspecifier=./feats.ark
-cmvn=./cmvn.ark
-
-# 3. run feat
-export GLOG_logtostderr=1
-linear_spectrogram_main \
-    --wav_rspecifier=scp:$model_dir/wav.scp \
-    --feature_wspecifier=ark,t:$feat_wspecifier \
-    --cmvn_write_path=$cmvn
--- a/speechx/examples/ngram/.gitignore
+++ b/speechx/examples/ngram/.gitignore
@ -0,0 +1,2 @@
+data
+exp
--- a/speechx/examples/ngram/README.md
+++ b/speechx/examples/ngram/README.md
@ -0,0 +1,3 @@
+# NGram Train
+
+
--- a/speechx/examples/nnet/CMakeLists.txt
+++ b/speechx/examples/nnet/CMakeLists.txt
@ -1,5 +0,0 @@
-cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
-
-add_executable(pp-model-test ${CMAKE_CURRENT_SOURCE_DIR}/pp-model-test.cc)
-target_include_directories(pp-model-test PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
-target_link_libraries(pp-model-test PUBLIC nnet gflags ${DEPS})
--- a/speechx/examples/nnet/run.sh
+++ b/speechx/examples/nnet/run.sh
@ -1,29 +0,0 @@
-#!/bin/bash
-set +x
-set -e
-
-. path.sh
-
-# 1. compile
-if [ ! -d ${SPEECHX_EXAMPLES} ]; then
-    pushd ${SPEECHX_ROOT} 
-    bash build.sh
-    popd
-fi
-
-# 2. download model
-if [ ! -d ../paddle_asr_model ]; then
-    wget https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/paddle_asr_model.tar.gz
-    tar xzfv paddle_asr_model.tar.gz
-    mv ./paddle_asr_model ../
-    # produce wav scp
-    echo "utt1 " $PWD/../paddle_asr_model/BAC009S0764W0290.wav > ../paddle_asr_model/wav.scp
-fi
-
-model_dir=../paddle_asr_model
-
-# 4. run decoder
-pp-model-test \
-    --model_path=$model_dir/avg_1.jit.pdmodel \
-    --param_path=$model_dir/avg_1.jit.pdparams
-
--- a/speechx/speechx/nnet/paddle_nnet.h
+++ b/speechx/speechx/nnet/paddle_nnet.h
@ -45,8 +45,7 @@ struct ModelOptions {
          thread_num(2),
          use_gpu(false),
          input_names(
-              "audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_"
-              "box"),
+              "audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_box"),
          output_names(
              "save_infer_model/scale_0.tmp_1,save_infer_model/"
              "scale_1.tmp_1,save_infer_model/scale_2.tmp_1,save_infer_model/"