Merge pull request #1740 from zh794390558/fix

[speechx] fix nnet input and output name
3 years ago · fe8a14dd50
parent 08e0cf2b68 3561875dd0
commit fe8a14dd50
21 changed files with 964 additions and 869 deletions
--- a/.flake8
+++ b/.flake8
@ -12,6 +12,8 @@ exclude =
    .git,
    # python cache
    __pycache__,
+    # third party
+    utils/compute-wer.py,
    third_party/,
 # Provide a comma-separate list of glob patterns to include for checks.
 filename =
--- a/paddlespeech/cli/asr/infer.py
+++ b/paddlespeech/cli/asr/infer.py
@ -40,6 +40,7 @@ from paddlespeech.s2t.utils.utility import UpdateConfig

 __all__ = ['ASRExecutor']

+
@cli_register(
    name='paddlespeech.asr', description='Speech to text infer command.')
 class ASRExecutor(BaseExecutor):
@ -148,7 +149,7 @@ class ASRExecutor(BaseExecutor):
                os.path.dirname(os.path.abspath(self.cfg_path)))
        logger.info(self.cfg_path)
        logger.info(self.ckpt_path)
-        
+
        #Init body.
        self.config = CfgNode(new_allowed=True)
        self.config.merge_from_file(self.cfg_path)
@ -278,7 +279,8 @@ class ASRExecutor(BaseExecutor):
            self._outputs["result"] = result_transcripts[0]

        elif "conformer" in model_type or "transformer" in model_type:
-            logger.info(f"we will use the transformer like model : {model_type}")
+            logger.info(
+                f"we will use the transformer like model : {model_type}")
            try:
                result_transcripts = self.model.decode(
                    audio,
--- a/paddlespeech/s2t/models/u2/u2.py
+++ b/paddlespeech/s2t/models/u2/u2.py
@ -279,7 +279,7 @@ class U2BaseModel(ASRInterface, nn.Layer):
            # TODO(Hui Zhang): if end_flag.sum() == running_size:
            if end_flag.cast(paddle.int64).sum() == running_size:
                break
-            
+
            # 2.1 Forward decoder step
            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
                running_size, 1, 1).to(device)  # (B*N, i, i)
--- a/paddlespeech/s2t/modules/ctc.py
+++ b/paddlespeech/s2t/modules/ctc.py
@ -180,7 +180,7 @@ class CTCDecoder(CTCDecoderBase):
        # init once
        if self._ext_scorer is not None:
            return
-        
+
        if language_model_path != '':
            logger.info("begin to initialize the external scorer "
                        "for decoding")
--- a/paddlespeech/server/README.md
+++ b/paddlespeech/server/README.md
@ -47,4 +47,4 @@ paddlespeech_server start --config_file conf/ws_conformer_application.yaml

 ```
 paddlespeech_client asr_online  --server_ip 127.0.0.1 --port 8090 --input input_16k.wav
-```
+```
--- a/paddlespeech/server/README_cn.md
+++ b/paddlespeech/server/README_cn.md
@ -48,4 +48,4 @@ paddlespeech_server start --config_file conf/ws_conformer_application.yaml

 ```
 paddlespeech_client asr_online  --server_ip 127.0.0.1 --port 8090 --input zh.wav
-```
+```
--- a/paddlespeech/server/engine/asr/online/ctc_search.py
+++ b/paddlespeech/server/engine/asr/online/ctc_search.py
@ -12,7 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from collections import defaultdict
+
 import paddle
+
 from paddlespeech.cli.log import logger
 from paddlespeech.s2t.utils.utility import log_add

--- a/paddlespeech/t2s/exps/synthesize.py
+++ b/paddlespeech/t2s/exps/synthesize.py
@ -52,7 +52,7 @@ def evaluate(args):
    # acoustic model
    am_name = args.am[:args.am.rindex('_')]
    am_dataset = args.am[args.am.rindex('_') + 1:]
-    
+
    am_inference = get_am_inference(
        am=args.am,
        am_config=am_config,
--- a/paddlespeech/vector/cluster/diarization.py
+++ b/paddlespeech/vector/cluster/diarization.py
@ -20,11 +20,11 @@ A few sklearn functions are modified in this script as per requirement.
 import argparse
 import copy
 import warnings
-from distutils.util import strtobool

 import numpy as np
 import scipy
 import sklearn
+from distutils.util import strtobool
 from scipy import linalg
 from scipy import sparse
 from scipy.sparse.csgraph import connected_components
--- a/speechx/examples/ds2_ol/decoder/ctc-prefix-beam-search-decoder-ol.cc
+++ b/speechx/examples/ds2_ol/decoder/ctc-prefix-beam-search-decoder-ol.cc
@ -34,10 +34,12 @@ DEFINE_int32(receptive_field_length,
 DEFINE_int32(downsampling_rate,
             4,
             "two CNN(kernel=5) module downsampling rate.");
+DEFINE_string(
+    model_input_names,
+    "audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_box",
+    "model input names");
 DEFINE_string(model_output_names,
-              "save_infer_model/scale_0.tmp_1,save_infer_model/"
-              "scale_1.tmp_1,save_infer_model/scale_2.tmp_1,save_infer_model/"
-              "scale_3.tmp_1",
+              "softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0",
              "model output names");
 DEFINE_string(model_cache_names, "5-1-1024,5-1-1024", "model cache names");

@ -76,6 +78,7 @@ int main(int argc, char* argv[]) {
    model_opts.model_path = model_path;
    model_opts.params_path = model_params;
    model_opts.cache_shape = FLAGS_model_cache_names;
+    model_opts.input_names = FLAGS_model_input_names;
    model_opts.output_names = FLAGS_model_output_names;
    std::shared_ptr<ppspeech::PaddleNnet> nnet(
        new ppspeech::PaddleNnet(model_opts));
--- a/speechx/examples/ds2_ol/decoder/run.sh
+++ b/speechx/examples/ds2_ol/decoder/run.sh
@ -48,7 +48,6 @@ if [ ! -f $lm ]; then
    popd
 fi

-
 feat_wspecifier=$exp_dir/feats.ark
 cmvn=$exp_dir/cmvn.ark

@ -57,7 +56,7 @@ export GLOG_logtostderr=1
 # dump json cmvn to kaldi
 cmvn-json2kaldi \
    --json_file  $ckpt_dir/data/mean_std.json \
-    --cmvn_write_path $exp_dir/cmvn.ark \
+    --cmvn_write_path $cmvn \
    --binary=false
 echo "convert json cmvn to kaldi ark."

@ -66,7 +65,7 @@ echo "convert json cmvn to kaldi ark."
 linear-spectrogram-wo-db-norm-ol \
    --wav_rspecifier=scp:$data/wav.scp \
    --feature_wspecifier=ark,t:$feat_wspecifier \
-    --cmvn_file=$exp_dir/cmvn.ark
+    --cmvn_file=$cmvn
 echo "compute linear spectrogram feature."

 # run ctc beam search decoder as streaming
--- a/speechx/examples/ds2_ol/decoder/wfst-decoder-ol.cc
+++ b/speechx/examples/ds2_ol/decoder/wfst-decoder-ol.cc
@ -37,10 +37,12 @@ DEFINE_int32(receptive_field_length,
 DEFINE_int32(downsampling_rate,
             4,
             "two CNN(kernel=5) module downsampling rate.");
+DEFINE_string(
+    model_input_names,
+    "audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_box",
+    "model input names");
 DEFINE_string(model_output_names,
-              "save_infer_model/scale_0.tmp_1,save_infer_model/"
-              "scale_1.tmp_1,save_infer_model/scale_2.tmp_1,save_infer_model/"
-              "scale_3.tmp_1",
+              "softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0",
              "model output names");
 DEFINE_string(model_cache_names, "5-1-1024,5-1-1024", "model cache names");

@ -79,6 +81,7 @@ int main(int argc, char* argv[]) {
    model_opts.model_path = model_graph;
    model_opts.params_path = model_params;
    model_opts.cache_shape = FLAGS_model_cache_names;
+    model_opts.input_names = FLAGS_model_input_names;
    model_opts.output_names = FLAGS_model_output_names;
    std::shared_ptr<ppspeech::PaddleNnet> nnet(
        new ppspeech::PaddleNnet(model_opts));
--- a/speechx/examples/ds2_ol/feat/CMakeLists.txt
+++ b/speechx/examples/ds2_ol/feat/CMakeLists.txt
@ -9,4 +9,4 @@ target_link_libraries(${bin_name} frontend kaldi-util kaldi-feat-common gflags g
 set(bin_name cmvn-json2kaldi)
 add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
 target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
-target_link_libraries(${bin_name} utils kaldi-util kaldi-matrix gflags glog)
+target_link_libraries(${bin_name} utils kaldi-util kaldi-matrix gflags glog ${DEPS})
--- a/speechx/examples/ds2_ol/feat/cmvn-json2kaldi.cc
+++ b/speechx/examples/ds2_ol/feat/cmvn-json2kaldi.cc
@ -14,18 +14,20 @@

 // Note: Do not print/log ondemand object.

+#include "base/common.h"
 #include "base/flags.h"
 #include "base/log.h"
 #include "kaldi/matrix/kaldi-matrix.h"
 #include "kaldi/util/kaldi-io.h"
 #include "utils/file_utils.h"
-#include "utils/simdjson.h"
+// #include "boost/json.hpp"
+#include <boost/json/src.hpp>

 DEFINE_string(json_file, "", "cmvn json file");
 DEFINE_string(cmvn_write_path, "./cmvn.ark", "write cmvn");
 DEFINE_bool(binary, true, "write cmvn in binary (true) or text(false)");

-using namespace simdjson;
+using namespace boost::json;  // from <boost/json.hpp>

 int main(int argc, char* argv[]) {
    gflags::ParseCommandLineFlags(&argc, &argv, false);
@ -33,49 +35,51 @@ int main(int argc, char* argv[]) {

    LOG(INFO) << "cmvn josn path: " << FLAGS_json_file;

-    try {
-        padded_string json = padded_string::load(FLAGS_json_file);
-
-        ondemand::parser parser;
-        ondemand::document doc = parser.iterate(json);
-        ondemand::value val = doc;
+    auto ifs = std::ifstream(FLAGS_json_file);
+    std::string json_str = ppspeech::ReadFile2String(FLAGS_json_file);
+    auto value = boost::json::parse(json_str);
+    if (!value.is_object()) {
+        LOG(ERROR) << "Input json file format error.";
+    }

-        ondemand::array mean_stat = val["mean_stat"];
-        std::vector<kaldi::BaseFloat> mean_stat_vec;
-        for (double x : mean_stat) {
-            mean_stat_vec.push_back(x);
+    for (auto obj : value.as_object()) {
+        if (obj.key() == "mean_stat") {
+            LOG(INFO) << "mean_stat:" << obj.value();
        }
-        // LOG(INFO) << mean_stat; this line will casue
-        // simdjson::simdjson_error("Objects and arrays can only be iterated
-        // when
-        // they are first encountered")
-
-        ondemand::array var_stat = val["var_stat"];
-        std::vector<kaldi::BaseFloat> var_stat_vec;
-        for (double x : var_stat) {
-            var_stat_vec.push_back(x);
+        if (obj.key() == "var_stat") {
+            LOG(INFO) << "var_stat: " << obj.value();
        }
-
-        kaldi::int32 frame_num = uint64_t(val["frame_num"]);
-        LOG(INFO) << "nframe: " << frame_num;
-
-        size_t mean_size = mean_stat_vec.size();
-        kaldi::Matrix<double> cmvn_stats(2, mean_size + 1);
-        for (size_t idx = 0; idx < mean_size; ++idx) {
-            cmvn_stats(0, idx) = mean_stat_vec[idx];
-            cmvn_stats(1, idx) = var_stat_vec[idx];
+        if (obj.key() == "frame_num") {
+            LOG(INFO) << "frame_num: " << obj.value();
        }
-        cmvn_stats(0, mean_size) = frame_num;
-        LOG(INFO) << cmvn_stats;
+    }
+
+    boost::json::array mean_stat = value.at("mean_stat").as_array();
+    std::vector<kaldi::BaseFloat> mean_stat_vec;
+    for (auto it = mean_stat.begin(); it != mean_stat.end(); it++) {
+        mean_stat_vec.push_back(it->as_double());
+    }

-        kaldi::WriteKaldiObject(
-            cmvn_stats, FLAGS_cmvn_write_path, FLAGS_binary);
-        LOG(INFO) << "cmvn stats have write into: " << FLAGS_cmvn_write_path;
-        LOG(INFO) << "Binary: " << FLAGS_binary;
-    } catch (simdjson::simdjson_error& err) {
-        LOG(ERROR) << err.what();
+    boost::json::array var_stat = value.at("var_stat").as_array();
+    std::vector<kaldi::BaseFloat> var_stat_vec;
+    for (auto it = var_stat.begin(); it != var_stat.end(); it++) {
+        var_stat_vec.push_back(it->as_double());
    }

+    kaldi::int32 frame_num = uint64_t(value.at("frame_num").as_int64());
+    LOG(INFO) << "nframe: " << frame_num;
+
+    size_t mean_size = mean_stat_vec.size();
+    kaldi::Matrix<double> cmvn_stats(2, mean_size + 1);
+    for (size_t idx = 0; idx < mean_size; ++idx) {
+        cmvn_stats(0, idx) = mean_stat_vec[idx];
+        cmvn_stats(1, idx) = var_stat_vec[idx];
+    }
+    cmvn_stats(0, mean_size) = frame_num;
+    LOG(INFO) << cmvn_stats;

+    kaldi::WriteKaldiObject(cmvn_stats, FLAGS_cmvn_write_path, FLAGS_binary);
+    LOG(INFO) << "cmvn stats have write into: " << FLAGS_cmvn_write_path;
+    LOG(INFO) << "Binary: " << FLAGS_binary;
    return 0;
 }
--- a/speechx/examples/ngram/zh/local/text_to_lexicon.py
+++ b/speechx/examples/ngram/zh/local/text_to_lexicon.py
@ -2,6 +2,7 @@
 import argparse
 from collections import Counter

+
 def main(args):
    counter = Counter()
    with open(args.text, 'r') as fin, open(args.lexicon, 'w') as fout:
@ -12,7 +13,7 @@ def main(args):
                words = text.split()
            else:
                words = line.split()
-            
+
            counter.update(words)

        for word in counter:
@ -20,21 +21,16 @@ def main(args):
            fout.write(f"{word}\t{val}\n")
            fout.flush()

+
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='text(line:utt1 中国 人) to lexicon（line:中国 中 国).')
    parser.add_argument(
-        '--has_key',
-        default=True,
-        help='text path, with utt or not')
+        '--has_key', default=True, help='text path, with utt or not')
    parser.add_argument(
-        '--text',
-        required=True,
-        help='text path. line: utt1 中国 人 or 中国 人')
+        '--text', required=True, help='text path. line: utt1 中国 人 or 中国 人')
    parser.add_argument(
-        '--lexicon',
-        required=True,
-        help='lexicon path. line:中国 中 国')
+        '--lexicon', required=True, help='lexicon path. line:中国 中 国')
    args = parser.parse_args()
    print(args)

--- a/speechx/examples/text_lm/local/mmseg.py
+++ b/speechx/examples/text_lm/local/mmseg.py
@ -1,305 +1,315 @@
 #!/usr/bin/env python3
-
 # modify from https://sites.google.com/site/homepageoffuyanwei/Home/remarksandexcellentdiscussion/page-2

-class Word:  
-    def __init__(self,text = '',freq = 0):  
-        self.text = text  
-        self.freq = freq  
-        self.length = len(text)  
-  
+
+class Word:
+    def __init__(self, text='', freq=0):
+        self.text = text
+        self.freq = freq
+        self.length = len(text)
+
+
 class Chunk:
-    def __init__(self,w1,w2 = None,w3 = None):  
-        self.words = []  
-        self.words.append(w1)  
-        if w2:  
-            self.words.append(w2)  
-        if w3:  
-            self.words.append(w3)  
-      
+    def __init__(self, w1, w2=None, w3=None):
+        self.words = []
+        self.words.append(w1)
+        if w2:
+            self.words.append(w2)
+        if w3:
+            self.words.append(w3)
+
    #计算chunk的总长度  
-    def totalWordLength(self):  
-        length = 0  
-        for word in self.words:  
-            length += len(word.text)  
-        return length  
-      
+    def totalWordLength(self):
+        length = 0
+        for word in self.words:
+            length += len(word.text)
+        return length
+
    #计算平均长度  
-    def averageWordLength(self):  
-        return float(self.totalWordLength()) / float(len(self.words))  
-      
+    def averageWordLength(self):
+        return float(self.totalWordLength()) / float(len(self.words))
+
    #计算标准差  
-    def standardDeviation(self):  
-        average = self.averageWordLength()  
-        sum = 0.0  
-        for word in self.words:  
-            tmp = (len(word.text) - average)  
-            sum += float(tmp) * float(tmp)  
-        return sum  
-      
+    def standardDeviation(self):
+        average = self.averageWordLength()
+        sum = 0.0
+        for word in self.words:
+            tmp = (len(word.text) - average)
+            sum += float(tmp) * float(tmp)
+        return sum
+
    #自由语素度  
    def wordFrequency(self):
-        sum = 0  
-        for word in self.words:  
-            sum += word.freq  
-        return sum  
-  
-class ComplexCompare:  
-      
+        sum = 0
+        for word in self.words:
+            sum += word.freq
+        return sum
+
+
+class ComplexCompare:
    def takeHightest(self, chunks, comparator):
-        i = 1  
-        for j in range(1, len(chunks)):  
-            rlt = comparator(chunks[j], chunks[0])  
-            if rlt > 0:  
-                i = 0  
-            if rlt >= 0:  
-                chunks[i], chunks[j] = chunks[j], chunks[i]  
-                i += 1  
+        i = 1
+        for j in range(1, len(chunks)):
+            rlt = comparator(chunks[j], chunks[0])
+            if rlt > 0:
+                i = 0
+            if rlt >= 0:
+                chunks[i], chunks[j] = chunks[j], chunks[i]
+                i += 1
        return chunks[0:i]
-      
+
    #以下四个函数是mmseg算法的四种过滤原则，核心算法  
    def mmFilter(self, chunks):
-        def comparator(a,b):  
-            return a.totalWordLength() - b.totalWordLength()  
-        return self.takeHightest(chunks, comparator)  
-      
-    def lawlFilter(self,chunks):  
-        def comparator(a,b):  
-            return a.averageWordLength() - b.averageWordLength()  
-        return self.takeHightest(chunks,comparator)  
-      
-    def svmlFilter(self,chunks):  
-        def comparator(a,b):  
-            return b.standardDeviation() - a.standardDeviation()  
-        return self.takeHightest(chunks, comparator)  
-      
-    def logFreqFilter(self,chunks):  
-        def comparator(a,b):  
-            return a.wordFrequency() - b.wordFrequency()  
-        return self.takeHightest(chunks, comparator)  
-   
-   
+        def comparator(a, b):
+            return a.totalWordLength() - b.totalWordLength()
+
+        return self.takeHightest(chunks, comparator)
+
+    def lawlFilter(self, chunks):
+        def comparator(a, b):
+            return a.averageWordLength() - b.averageWordLength()
+
+        return self.takeHightest(chunks, comparator)
+
+    def svmlFilter(self, chunks):
+        def comparator(a, b):
+            return b.standardDeviation() - a.standardDeviation()
+
+        return self.takeHightest(chunks, comparator)
+
+    def logFreqFilter(self, chunks):
+        def comparator(a, b):
+            return a.wordFrequency() - b.wordFrequency()
+
+        return self.takeHightest(chunks, comparator)
+
+
 #加载词组字典和字符字典
 dictWord = {}
 maxWordLength = 0
-      
-def loadDictChars(filepath):  
-    global maxWordLength  
+
+
+def loadDictChars(filepath):
+    global maxWordLength
    fsock = open(filepath)
    for line in fsock:
        freq, word = line.split()
        word = word.strip()
-        dictWord[word] = (len(word), int(freq))  
-        maxWordLength = len(word) if maxWordLength < len(word) else maxWordLength  
-    fsock.close()  
-      
-def loadDictWords(filepath):  
-    global maxWordLength  
-    fsock = open(filepath)  
-    for line in fsock.readlines():  
+        dictWord[word] = (len(word), int(freq))
+        maxWordLength = len(word) if maxWordLength < len(
+            word) else maxWordLength
+    fsock.close()
+
+
+def loadDictWords(filepath):
+    global maxWordLength
+    fsock = open(filepath)
+    for line in fsock.readlines():
        word = line.strip()
-        dictWord[word] = (len(word), 0)  
-        maxWordLength = len(word) if maxWordLength < len(word) else maxWordLength
-    fsock.close()  
-  
+        dictWord[word] = (len(word), 0)
+        maxWordLength = len(word) if maxWordLength < len(
+            word) else maxWordLength
+    fsock.close()
+
+
 #判断该词word是否在字典dictWord中      
-def getDictWord(word):  
-    result = dictWord.get(word)  
-    if result:  
-        return Word(word, result[1])  
-    return None  
-      
+def getDictWord(word):
+    result = dictWord.get(word)
+    if result:
+        return Word(word, result[1])
+    return None
+
+
 #开始加载字典  
-def run():  
-    from os.path import join, dirname  
-    loadDictChars(join(dirname(__file__), 'data', 'chars.dic'))  
-    loadDictWords(join(dirname(__file__), 'data', 'words.dic'))  
-  
-class Analysis:  
-      
-    def __init__(self, text):  
+def run():
+    from os.path import join, dirname
+    loadDictChars(join(dirname(__file__), 'data', 'chars.dic'))
+    loadDictWords(join(dirname(__file__), 'data', 'words.dic'))
+
+
+class Analysis:
+    def __init__(self, text):
        self.text = text
-        self.cacheSize = 3  
-        self.pos = 0  
-        self.textLength = len(self.text)  
-        self.cache = []  
-        self.cacheIndex = 0  
-        self.complexCompare = ComplexCompare()  
-          
+        self.cacheSize = 3
+        self.pos = 0
+        self.textLength = len(self.text)
+        self.cache = []
+        self.cacheIndex = 0
+        self.complexCompare = ComplexCompare()
+
        #简单小技巧，用到个缓存，不知道具体有没有用处  
-        for i in range(self.cacheSize):  
+        for i in range(self.cacheSize):
            self.cache.append([-1, Word()])
-          
+
        #控制字典只加载一次  
        if not dictWord:
            run()

+    def __iter__(self):
+        while True:
+            token = self.getNextToken()
+            if token is None:
+                raise StopIteration
+            yield token
+
+    def getNextChar(self):
+        return self.text[self.pos]

-    def __iter__(self):  
-        while True:  
-            token = self.getNextToken()  
-            if token == None:  
-                raise StopIteration  
-            yield token  
-              
-    def getNextChar(self):  
-        return self.text[self.pos]  
-          
    #判断该字符是否是中文字符（不包括中文标点）    
-    def isChineseChar(self,charater):  
-        return 0x4e00 <= ord(charater) < 0x9fa6  
-          
+    def isChineseChar(self, charater):
+        return 0x4e00 <= ord(charater) < 0x9fa6
+
    #判断是否是ASCII码  
-    def isASCIIChar(self, ch):  
-        import string  
-        if ch in string.whitespace:  
-            return False  
-        if ch in string.punctuation:  
-            return False  
+    def isASCIIChar(self, ch):
+        import string
+        if ch in string.whitespace:
+            return False
+        if ch in string.punctuation:
+            return False
        return ch in string.printable
-      
+
    #得到下一个切割结果  
-    def getNextToken(self):  
-        while self.pos < self.textLength:  
-            if self.isChineseChar(self.getNextChar()):  
-                token = self.getChineseWords()  
-            else :  
-                token = self.getASCIIWords()+'/'  
-            if len(token) > 0:  
+    def getNextToken(self):
+        while self.pos < self.textLength:
+            if self.isChineseChar(self.getNextChar()):
+                token = self.getChineseWords()
+            else:
+                token = self.getASCIIWords() + '/'
+            if len(token) > 0:
                return token
        return None
-      
+
    #切割出非中文词  
    def getASCIIWords(self):
        # Skip pre-word whitespaces and punctuations  
        #跳过中英文标点和空格  
-        while self.pos < self.textLength:  
-            ch = self.getNextChar()  
-            if self.isASCIIChar(ch) or self.isChineseChar(ch):  
-                break  
-            self.pos += 1  
+        while self.pos < self.textLength:
+            ch = self.getNextChar()
+            if self.isASCIIChar(ch) or self.isChineseChar(ch):
+                break
+            self.pos += 1
        #得到英文单词的起始位置      
-        start = self.pos  
-          
+        start = self.pos
+
        #找出英文单词的结束位置  
-        while self.pos < self.textLength:  
-            ch = self.getNextChar()  
-            if not self.isASCIIChar(ch):  
-                break  
-            self.pos += 1  
-        end = self.pos  
-          
+        while self.pos < self.textLength:
+            ch = self.getNextChar()
+            if not self.isASCIIChar(ch):
+                break
+            self.pos += 1
+        end = self.pos
+
        #Skip chinese word whitespaces and punctuations  
        #跳过中英文标点和空格  
-        while self.pos < self.textLength:  
-            ch = self.getNextChar()  
-            if self.isASCIIChar(ch) or self.isChineseChar(ch):  
-                break  
-            self.pos += 1  
-              
+        while self.pos < self.textLength:
+            ch = self.getNextChar()
+            if self.isASCIIChar(ch) or self.isChineseChar(ch):
+                break
+            self.pos += 1
+
        #返回英文单词  
-        return self.text[start:end]  
-      
+        return self.text[start:end]
+
    #切割出中文词，并且做处理，用上述4种方法  
-    def getChineseWords(self):  
-        chunks = self.createChunks()  
-        if len(chunks) > 1:  
-            chunks = self.complexCompare.mmFilter(chunks)  
-        if len(chunks) > 1:  
-            chunks = self.complexCompare.lawlFilter(chunks)  
-        if len(chunks) > 1:  
-            chunks = self.complexCompare.svmlFilter(chunks)  
-        if len(chunks) > 1:  
-            chunks = self.complexCompare.logFreqFilter(chunks)  
-        if len(chunks) == 0 :  
-            return ''  
+    def getChineseWords(self):
+        chunks = self.createChunks()
+        if len(chunks) > 1:
+            chunks = self.complexCompare.mmFilter(chunks)
+        if len(chunks) > 1:
+            chunks = self.complexCompare.lawlFilter(chunks)
+        if len(chunks) > 1:
+            chunks = self.complexCompare.svmlFilter(chunks)
+        if len(chunks) > 1:
+            chunks = self.complexCompare.logFreqFilter(chunks)
+        if len(chunks) == 0:
+            return ''

        #最后只有一种切割方法  
-        word = chunks[0].words  
-        token = ""  
-        length = 0  
-        for x in word:  
-            if x.length != -1:  
-                token += x.text + "/"  
-                length += len(x.text)  
-        self.pos += length  
-        return token  
-      
+        word = chunks[0].words
+        token = ""
+        length = 0
+        for x in word:
+            if x.length != -1:
+                token += x.text + "/"
+                length += len(x.text)
+        self.pos += length
+        return token
+
    #三重循环来枚举切割方法，这里也可以运用递归来实现  
-    def createChunks(self):  
-        chunks = []  
-        originalPos = self.pos  
-        words1 = self.getMatchChineseWords()  
-          
-        for word1 in words1:  
-            self.pos += len(word1.text)  
-            if self.pos < self.textLength:  
-                words2 = self.getMatchChineseWords()  
-                for word2 in words2:  
-                    self.pos += len(word2.text)  
-                    if self.pos < self.textLength:  
-                        words3 = self.getMatchChineseWords()  
-                        for word3 in words3:  
+    def createChunks(self):
+        chunks = []
+        originalPos = self.pos
+        words1 = self.getMatchChineseWords()
+
+        for word1 in words1:
+            self.pos += len(word1.text)
+            if self.pos < self.textLength:
+                words2 = self.getMatchChineseWords()
+                for word2 in words2:
+                    self.pos += len(word2.text)
+                    if self.pos < self.textLength:
+                        words3 = self.getMatchChineseWords()
+                        for word3 in words3:
                            # print(word3.length, word3.text)
-                            if word3.length == -1:  
-                                chunk = Chunk(word1,word2)  
+                            if word3.length == -1:
+                                chunk = Chunk(word1, word2)
                                # print("Ture")
-                            else :  
-                                chunk = Chunk(word1,word2,word3)  
-                            chunks.append(chunk)  
-                    elif self.pos == self.textLength:  
-                        chunks.append(Chunk(word1,word2))  
-                    self.pos -= len(word2.text)  
-            elif self.pos == self.textLength:  
-                chunks.append(Chunk(word1))  
-            self.pos -= len(word1.text)  
-                                  
-        self.pos = originalPos  
-        return chunks  
-      
+                            else:
+                                chunk = Chunk(word1, word2, word3)
+                            chunks.append(chunk)
+                    elif self.pos == self.textLength:
+                        chunks.append(Chunk(word1, word2))
+                    self.pos -= len(word2.text)
+            elif self.pos == self.textLength:
+                chunks.append(Chunk(word1))
+            self.pos -= len(word1.text)
+
+        self.pos = originalPos
+        return chunks
+
    #运用正向最大匹配算法结合字典来切割中文文本    
-    def getMatchChineseWords(self):  
+    def getMatchChineseWords(self):
        #use cache,check it   
-        for i in range(self.cacheSize):  
-            if self.cache[i][0] == self.pos:  
-                return self.cache[i][1]  
-              
-        originalPos = self.pos  
-        words = []  
-        index = 0  
-        while self.pos < self.textLength:  
-            if index >= maxWordLength :  
-                break  
-            if not self.isChineseChar(self.getNextChar()):  
-                break  
-            self.pos += 1  
-            index += 1  
-              
-            text = self.text[originalPos:self.pos]  
-            word = getDictWord(text)  
-            if word:  
-                words.append(word)  
-                  
-        self.pos = originalPos  
+        for i in range(self.cacheSize):
+            if self.cache[i][0] == self.pos:
+                return self.cache[i][1]
+
+        originalPos = self.pos
+        words = []
+        index = 0
+        while self.pos < self.textLength:
+            if index >= maxWordLength:
+                break
+            if not self.isChineseChar(self.getNextChar()):
+                break
+            self.pos += 1
+            index += 1
+
+            text = self.text[originalPos:self.pos]
+            word = getDictWord(text)
+            if word:
+                words.append(word)
+
+        self.pos = originalPos
        #没有词则放置个‘X’，将文本长度标记为-1  
-        if not words:  
-            word = Word()  
-            word.length = -1  
-            word.text = 'X'  
-            words.append(word)  
-          
-        self.cache[self.cacheIndex] = (self.pos,words)  
-        self.cacheIndex += 1  
-        if self.cacheIndex >= self.cacheSize:  
-            self.cacheIndex = 0  
-        return words  
-
-
-if __name__=="__main__":  
-
-    def cuttest(text):  
+        if not words:
+            word = Word()
+            word.length = -1
+            word.text = 'X'
+            words.append(word)
+
+        self.cache[self.cacheIndex] = (self.pos, words)
+        self.cacheIndex += 1
+        if self.cacheIndex >= self.cacheSize:
+            self.cacheIndex = 0
+        return words
+
+
+if __name__ == "__main__":
+
+    def cuttest(text):
        #cut =  Analysis(text)  
-        tmp=""
+        tmp = ""
        try:
            for word in iter(Analysis(text)):
                tmp += word
@ -310,71 +320,73 @@ if __name__=="__main__":
        print("================================")

    cuttest(u"研究生命来源")
-    cuttest(u"南京市长江大桥欢迎您")  
-    cuttest(u"请把手抬高一点儿")  
-    cuttest(u"长春市长春节致词。")  
-    cuttest(u"长春市长春药店。")  
-    cuttest(u"我的和服务必在明天做好。")  
-    cuttest(u"我发现有很多人喜欢他。")  
-    cuttest(u"我喜欢看电视剧大长今。")  
-    cuttest(u"半夜给拎起来陪看欧洲杯糊着两眼半晌没搞明白谁和谁踢。")  
-    cuttest(u"李智伟高高兴兴以及王晓薇出去玩，后来智伟和晓薇又单独去玩了。")  
-    cuttest(u"一次性交出去很多钱。 ")  
-    cuttest(u"这是一个伸手不见五指的黑夜。我叫孙悟空，我爱北京，我爱Python和C++。")  
-    cuttest(u"我不喜欢日本和服。")  
-    cuttest(u"雷猴回归人间。")  
-    cuttest(u"工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作")  
-    cuttest(u"我需要廉租房")  
-    cuttest(u"永和服装饰品有限公司")  
-    cuttest(u"我爱北京天安门")  
-    cuttest(u"abc")  
-    cuttest(u"隐马尔可夫")  
-    cuttest(u"雷猴是个好网站")  
-    cuttest(u"“Microsoft”一词由“MICROcomputer（微型计算机）”和“SOFTware（软件）”两部分组成")  
-    cuttest(u"草泥马和欺实马是今年的流行词汇")  
-    cuttest(u"伊藤洋华堂总府店")  
-    cuttest(u"中国科学院计算技术研究所")  
-    cuttest(u"罗密欧与朱丽叶")  
-    cuttest(u"我购买了道具和服装")  
-    cuttest(u"PS: 我觉得开源有一个好处，就是能够敦促自己不断改进，避免敞帚自珍")  
-    cuttest(u"湖北省石首市")  
-    cuttest(u"总经理完成了这件事情")  
-    cuttest(u"电脑修好了")  
-    cuttest(u"做好了这件事情就一了百了了")  
-    cuttest(u"人们审美的观点是不同的")  
-    cuttest(u"我们买了一个美的空调")  
-    cuttest(u"线程初始化时我们要注意")  
-    cuttest(u"一个分子是由好多原子组织成的")  
-    cuttest(u"祝你马到功成")  
-    cuttest(u"他掉进了无底洞里")  
-    cuttest(u"中国的首都是北京")  
-    cuttest(u"孙君意")  
-    cuttest(u"外交部发言人马朝旭")  
-    cuttest(u"领导人会议和第四届东亚峰会")  
-    cuttest(u"在过去的这五年")  
-    cuttest(u"还需要很长的路要走")  
-    cuttest(u"60周年首都阅兵")  
-    cuttest(u"你好人们审美的观点是不同的")  
-    cuttest(u"买水果然后来世博园")  
-    cuttest(u"买水果然后去世博园")  
-    cuttest(u"但是后来我才知道你是对的")  
-    cuttest(u"存在即合理")  
-    cuttest(u"的的的的的在的的的的就以和和和")  
-    cuttest(u"I love你，不以为耻，反以为rong")  
-    cuttest(u" ")  
-    cuttest(u"")  
-    cuttest(u"hello你好人们审美的观点是不同的")  
-    cuttest(u"很好但主要是基于网页形式")  
-    cuttest(u"hello你好人们审美的观点是不同的")  
-    cuttest(u"为什么我不能拥有想要的生活")  
-    cuttest(u"后来我才")  
-    cuttest(u"此次来中国是为了")  
-    cuttest(u"使用了它就可以解决一些问题")  
-    cuttest(u",使用了它就可以解决一些问题")  
-    cuttest(u"其实使用了它就可以解决一些问题")  
-    cuttest(u"好人使用了它就可以解决一些问题")  
-    cuttest(u"是因为和国家")  
-    cuttest(u"老年搜索还支持")  
-    cuttest(u"干脆就把那部蒙人的闲法给废了拉倒！RT @laoshipukong : 27日，全国人大常委会第三次审议侵权责任法草案，删除了有关医疗损害责任“举证倒置”的规定。在医患纠纷中本已处于弱势地位的消费者由此将陷入万劫不复的境地。 ")  
+    cuttest(u"南京市长江大桥欢迎您")
+    cuttest(u"请把手抬高一点儿")
+    cuttest(u"长春市长春节致词。")
+    cuttest(u"长春市长春药店。")
+    cuttest(u"我的和服务必在明天做好。")
+    cuttest(u"我发现有很多人喜欢他。")
+    cuttest(u"我喜欢看电视剧大长今。")
+    cuttest(u"半夜给拎起来陪看欧洲杯糊着两眼半晌没搞明白谁和谁踢。")
+    cuttest(u"李智伟高高兴兴以及王晓薇出去玩，后来智伟和晓薇又单独去玩了。")
+    cuttest(u"一次性交出去很多钱。 ")
+    cuttest(u"这是一个伸手不见五指的黑夜。我叫孙悟空，我爱北京，我爱Python和C++。")
+    cuttest(u"我不喜欢日本和服。")
+    cuttest(u"雷猴回归人间。")
+    cuttest(u"工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作")
+    cuttest(u"我需要廉租房")
+    cuttest(u"永和服装饰品有限公司")
+    cuttest(u"我爱北京天安门")
+    cuttest(u"abc")
+    cuttest(u"隐马尔可夫")
+    cuttest(u"雷猴是个好网站")
+    cuttest(u"“Microsoft”一词由“MICROcomputer（微型计算机）”和“SOFTware（软件）”两部分组成")
+    cuttest(u"草泥马和欺实马是今年的流行词汇")
+    cuttest(u"伊藤洋华堂总府店")
+    cuttest(u"中国科学院计算技术研究所")
+    cuttest(u"罗密欧与朱丽叶")
+    cuttest(u"我购买了道具和服装")
+    cuttest(u"PS: 我觉得开源有一个好处，就是能够敦促自己不断改进，避免敞帚自珍")
+    cuttest(u"湖北省石首市")
+    cuttest(u"总经理完成了这件事情")
+    cuttest(u"电脑修好了")
+    cuttest(u"做好了这件事情就一了百了了")
+    cuttest(u"人们审美的观点是不同的")
+    cuttest(u"我们买了一个美的空调")
+    cuttest(u"线程初始化时我们要注意")
+    cuttest(u"一个分子是由好多原子组织成的")
+    cuttest(u"祝你马到功成")
+    cuttest(u"他掉进了无底洞里")
+    cuttest(u"中国的首都是北京")
+    cuttest(u"孙君意")
+    cuttest(u"外交部发言人马朝旭")
+    cuttest(u"领导人会议和第四届东亚峰会")
+    cuttest(u"在过去的这五年")
+    cuttest(u"还需要很长的路要走")
+    cuttest(u"60周年首都阅兵")
+    cuttest(u"你好人们审美的观点是不同的")
+    cuttest(u"买水果然后来世博园")
+    cuttest(u"买水果然后去世博园")
+    cuttest(u"但是后来我才知道你是对的")
+    cuttest(u"存在即合理")
+    cuttest(u"的的的的的在的的的的就以和和和")
+    cuttest(u"I love你，不以为耻，反以为rong")
+    cuttest(u" ")
+    cuttest(u"")
+    cuttest(u"hello你好人们审美的观点是不同的")
+    cuttest(u"很好但主要是基于网页形式")
+    cuttest(u"hello你好人们审美的观点是不同的")
+    cuttest(u"为什么我不能拥有想要的生活")
+    cuttest(u"后来我才")
+    cuttest(u"此次来中国是为了")
+    cuttest(u"使用了它就可以解决一些问题")
+    cuttest(u",使用了它就可以解决一些问题")
+    cuttest(u"其实使用了它就可以解决一些问题")
+    cuttest(u"好人使用了它就可以解决一些问题")
+    cuttest(u"是因为和国家")
+    cuttest(u"老年搜索还支持")
+    cuttest(
+        u"干脆就把那部蒙人的闲法给废了拉倒！RT @laoshipukong : 27日，全国人大常委会第三次审议侵权责任法草案，删除了有关医疗损害责任“举证倒置”的规定。在医患纠纷中本已处于弱势地位的消费者由此将陷入万劫不复的境地。 "
+    )
    cuttest("2022年12月30日是星期几？")
-    cuttest("二零二二年十二月三十日是星期几？")
+    cuttest("二零二二年十二月三十日是星期几？")
--- a/speechx/examples/wfst/README.md
+++ b/speechx/examples/wfst/README.md
@ -183,4 +183,4 @@ data/
        ├── lexiconp_disambig.txt
        ├── lexiconp.txt
        └── units.list
-```
+```
--- a/utils/DER.py
+++ b/utils/DER.py
@ -26,9 +26,9 @@ import argparse
 import os
 import re
 import subprocess
-from distutils.util import strtobool

 import numpy as np
+from distutils.util import strtobool

 FILE_IDS = re.compile(r"(?<=Speaker Diarization for).+(?=\*\*\*)")
 SCORED_SPEAKER_TIME = re.compile(r"(?<=SCORED SPEAKER TIME =)[\d.]+")
--- a/utils/compute-wer.py
+++ b/utils/compute-wer.py
--- a/utils/format_rsl.py
+++ b/utils/format_rsl.py
@ -1,11 +1,21 @@
-import os
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import argparse
 import jsonlines


-def trans_hyp(origin_hyp,
-             trans_hyp = None, 
-             trans_hyp_sclite = None):
+def trans_hyp(origin_hyp, trans_hyp=None, trans_hyp_sclite=None):
    """
    Args:
        origin_hyp: The input json file which contains the model output
@ -17,19 +27,18 @@ def trans_hyp(origin_hyp,
    with open(origin_hyp, "r+", encoding="utf8") as f:
        for item in jsonlines.Reader(f):
            input_dict[item["utt"]] = item["hyps"][0]
-    if trans_hyp is not None:   
+    if trans_hyp is not None:
        with open(trans_hyp, "w+", encoding="utf8") as f:
            for key in input_dict.keys():
                f.write(key + " " + input_dict[key] + "\n")
-    if trans_hyp_sclite is not None: 
+    if trans_hyp_sclite is not None:
        with open(trans_hyp_sclite, "w+") as f:
            for key in input_dict.keys():
-                line = input_dict[key] + "(" + key + ".wav" +")" + "\n"
+                line = input_dict[key] + "(" + key + ".wav" + ")" + "\n"
                f.write(line)

-def trans_ref(origin_ref,
-                trans_ref = None, 
-                trans_ref_sclite = None):
+
+def trans_ref(origin_ref, trans_ref=None, trans_ref_sclite=None):
    """
    Args:
        origin_hyp: The input json file which contains the model output
@ -49,42 +58,48 @@ def trans_ref(origin_ref,
    if trans_ref_sclite is not None:
        with open(trans_ref_sclite, "w") as f:
            for key in input_dict.keys():
-                line = input_dict[key] + "(" + key + ".wav" +")" + "\n"
+                line = input_dict[key] + "(" + key + ".wav" + ")" + "\n"
                f.write(line)


-
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(prog='format hyp file for compute CER/WER', add_help=True)
+    parser = argparse.ArgumentParser(
+        prog='format hyp file for compute CER/WER', add_help=True)
    parser.add_argument(
-        '--origin_hyp',
-        type=str,
-        default = None,
-        help='origin hyp file')
+        '--origin_hyp', type=str, default=None, help='origin hyp file')
    parser.add_argument(
-        '--trans_hyp', type=str, default = None, help='hyp file for caculating CER/WER')
+        '--trans_hyp',
+        type=str,
+        default=None,
+        help='hyp file for caculating CER/WER')
    parser.add_argument(
-        '--trans_hyp_sclite', type=str,  default = None, help='hyp file for caculating CER/WER by sclite')
+        '--trans_hyp_sclite',
+        type=str,
+        default=None,
+        help='hyp file for caculating CER/WER by sclite')

    parser.add_argument(
-        '--origin_ref',
-        type=str,
-        default = None,
-        help='origin ref file')
+        '--origin_ref', type=str, default=None, help='origin ref file')
    parser.add_argument(
-        '--trans_ref', type=str, default = None, help='ref file for caculating CER/WER')
+        '--trans_ref',
+        type=str,
+        default=None,
+        help='ref file for caculating CER/WER')
    parser.add_argument(
-        '--trans_ref_sclite', type=str,  default = None, help='ref file for caculating CER/WER by sclite')
+        '--trans_ref_sclite',
+        type=str,
+        default=None,
+        help='ref file for caculating CER/WER by sclite')
    parser_args = parser.parse_args()

    if parser_args.origin_hyp is not None:
        trans_hyp(
-            origin_hyp = parser_args.origin_hyp,
-            trans_hyp = parser_args.trans_hyp,
-            trans_hyp_sclite = parser_args.trans_hyp_sclite, )
+            origin_hyp=parser_args.origin_hyp,
+            trans_hyp=parser_args.trans_hyp,
+            trans_hyp_sclite=parser_args.trans_hyp_sclite, )

    if parser_args.origin_ref is not None:
        trans_ref(
-            origin_ref = parser_args.origin_ref,
-            trans_ref = parser_args.trans_ref,
-            trans_ref_sclite = parser_args.trans_ref_sclite, )
+            origin_ref=parser_args.origin_ref,
+            trans_ref=parser_args.trans_ref,
+            trans_ref_sclite=parser_args.trans_ref_sclite, )
--- a/utils/fst/prepare_dict.py
+++ b/utils/fst/prepare_dict.py
@ -35,7 +35,7 @@ def main(args):
    # used to filter polyphone and invalid word
    lexicon_table = set()
    in_n = 0  # in lexicon word count
-    out_n = 0 # out lexicon word cout
+    out_n = 0  # out lexicon word cout
    with open(args.in_lexicon, 'r') as fin, \
            open(args.out_lexicon, 'w') as fout:
        for line in fin:
@ -82,7 +82,10 @@ def main(args):
                lexicon_table.add(word)
                out_n += 1

-    print(f"Filter lexicon by unit table: filter out {in_n - out_n}, {out_n}/{in_n}")
+    print(
+        f"Filter lexicon by unit table: filter out {in_n - out_n}, {out_n}/{in_n}"
+    )
+

 if __name__ == '__main__':
    parser = argparse.ArgumentParser(