Fix

6 months ago · 6d330bd477
parent 201275e7d2
commit 6d330bd477
12 changed files with 23 additions and 23 deletions
--- a/paddlespeech/audio/utils/tensor_utils.py
+++ b/paddlespeech/audio/utils/tensor_utils.py
@ -79,7 +79,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
    # assuming trailing dimensions and type of all the Tensors
    # in sequences are same and fetching those from sequences[0]
    max_size = paddle.shape(sequences[0])
-    # (TODO Hui Zhang): slice not supprot `end==start`
+    # (TODO Hui Zhang): slice not support `end==start`
    # trailing_dims = max_size[1:]
    trailing_dims = tuple(
        max_size[1:].numpy().tolist()) if sequences[0].ndim >= 2 else ()
@ -93,7 +93,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
        length = tensor.shape[0]
        # use index notation to prevent duplicate references to the tensor
        if batch_first:
-            # TODO (Hui Zhang): set_value op not supprot `end==start`
+            # TODO (Hui Zhang): set_value op not support `end==start`
            # TODO (Hui Zhang): set_value op not support int16
            # TODO (Hui Zhang): set_varbase 2 rank not support [0,0,...]
            # out_tensor[i, :length, ...] = tensor
@ -102,7 +102,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
            else:
                out_tensor[i, length] = tensor
        else:
-            # TODO (Hui Zhang): set_value op not supprot `end==start`
+            # TODO (Hui Zhang): set_value op not support `end==start`
            # out_tensor[:length, i, ...] = tensor
            if length != 0:
                out_tensor[:length, i] = tensor
--- a/paddlespeech/s2t/utils/tensor_utils.py
+++ b/paddlespeech/s2t/utils/tensor_utils.py
@ -80,7 +80,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
    # assuming trailing dimensions and type of all the Tensors
    # in sequences are same and fetching those from sequences[0]
    max_size = paddle.shape(sequences[0])
-    # (TODO Hui Zhang): slice not supprot `end==start`
+    # (TODO Hui Zhang): slice not support `end==start`
    # trailing_dims = max_size[1:]
    trailing_dims = tuple(
        max_size[1:].numpy().tolist()) if sequences[0].ndim >= 2 else ()
@ -98,7 +98,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
            f"length {length}, out_tensor {out_tensor.shape}, tensor {tensor.shape}"
        )
        if batch_first:
-            # TODO (Hui Zhang): set_value op not supprot `end==start`
+            # TODO (Hui Zhang): set_value op not support `end==start`
            # TODO (Hui Zhang): set_value op not support int16
            # TODO (Hui Zhang): set_varbase 2 rank not support [0,0,...]
            # out_tensor[i, :length, ...] = tensor
@ -107,7 +107,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
            else:
                out_tensor[i, length] = tensor
        else:
-            # TODO (Hui Zhang): set_value op not supprot `end==start`
+            # TODO (Hui Zhang): set_value op not support `end==start`
            # out_tensor[:length, i, ...] = tensor
            if length != 0:
                out_tensor[:length, i] = tensor
--- a/runtime/examples/text_lm/local/mmseg.py
+++ b/runtime/examples/text_lm/local/mmseg.py
@ -156,8 +156,8 @@ class Analysis:
        return self.text[self.pos]
    #判断该字符是否是中文字符（不包括中文标点）    
-    def isChineseChar(self, charater):
+    def isChineseChar(self, character):
-        return 0x4e00 <= ord(charater) < 0x9fa6
+        return 0x4e00 <= ord(character) < 0x9fa6
    #判断是否是ASCII码  
    def isASCIIChar(self, ch):
--- a/tests/unit/server/offline/test_server_client.sh
+++ b/tests/unit/server/offline/test_server_client.sh
@ -66,8 +66,8 @@ config_file=./conf/application.yaml
 server_ip=$(cat $config_file | grep "host" | awk -F " " '{print $2}')
 port=$(cat $config_file | grep "port" | awk '/port:/ {print $2}')
-echo "Sevice ip: $server_ip" | tee ./log/test_result.log
+echo "Service ip: $server_ip" | tee ./log/test_result.log
-echo "Sevice port: $port" | tee -a ./log/test_result.log
+echo "Service port: $port" | tee -a ./log/test_result.log
 # whether a process is listening on $port
 pid=`lsof -i :"$port"|grep -v "PID" | awk '{print $2}'`
@ -190,7 +190,7 @@ echo "**************************************************************************
 echo "All tests completed."  | tee -a ./log/test_result.log
-# sohw all the test results
+# show all the test results
 echo "***************** Here are all the test results ********************"
 cat ./log/test_result.log
--- a/tests/unit/server/online/tts/check_server/test.sh
+++ b/tests/unit/server/online/tts/check_server/test.sh
@ -76,8 +76,8 @@ config_file=./conf/application.yaml
 server_ip=$(cat $config_file | grep "host" | awk -F " " '{print $2}')
 port=$(cat $config_file | grep "port" | awk '/port:/ {print $2}')
-echo "Sevice ip: $server_ip" | tee $log/test_result.log
+echo "Service ip: $server_ip" | tee $log/test_result.log
-echo "Sevice port: $port" | tee -a $log/test_result.log
+echo "Service port: $port" | tee -a $log/test_result.log
 # whether a process is listening on $port
 pid=`lsof -i :"$port"|grep -v "PID" | awk '{print $2}'`
@ -307,7 +307,7 @@ echo "**************************************************************************
 echo "All tests completed."  | tee -a $log/test_result.log
-# sohw all the test results
+# show all the test results
 echo "***************** Here are all the test results ********************"
 cat $log/test_result.log
--- a/tools/extras/install_liblbfgs.sh
+++ b/tools/extras/install_liblbfgs.sh
@ -23,7 +23,7 @@ cd ..
 (
  [ ! -z "${LIBLBFGS}" ] && \
-    echo >&2 "LIBLBFGS variable is aleady defined. Undefining..." && \
+    echo >&2 "LIBLBFGS variable is already defined. Undefining..." && \
    unset LIBLBFGS
  [ -f ./env.sh ] && . ./env.sh
--- a/tools/extras/install_srilm.sh
+++ b/tools/extras/install_srilm.sh
@ -68,7 +68,7 @@ make || exit
 cd ..
 (
  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
+    echo >&2 "SRILM variable is already defined. Undefining..." && \
    unset SRILM
  [ -f ./env.sh ] && . ./env.sh
--- a/utils/fst/ctc_token_fst.py
+++ b/utils/fst/ctc_token_fst.py
@ -32,7 +32,7 @@ def main(args):
                # leaving `token`
                print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
            node += 1
-    # Fianl node
+    # Final node
    print('0')
--- a/utils/fst/make_tlg.sh
+++ b/utils/fst/make_tlg.sh
@ -21,7 +21,7 @@ cp -r $src_lang $tgt_lang
 # eps2disambig.pl: replace epsilons on the input side with the special disambiguation symbol #0. 
 # s2eps.pl: replaces <s> and </s> with <eps> (on both input and output sides), for the G.fst acceptor.
 # G.fst, the disambiguation symbol #0 only appears on the input side
-# do eps2disambig.pl and s2eps.pl maybe just for fallowing `fstrmepsilon`.
+# do eps2disambig.pl and s2eps.pl maybe just for following `fstrmepsilon`.
 cat $arpa_lm | \
   grep -v '<s> <s>' | \
   grep -v '</s> <s>' | \
--- a/utils/generate_infer_yaml.py
+++ b/utils/generate_infer_yaml.py
@ -3,7 +3,7 @@
 '''
    Merge training configs into a single inference config.
    The single inference config is for CLI, which only takes a single config to do inferencing.
-    The trainig configs includes: model config, preprocess config, decode config, vocab file and cmvn file.
+    The training configs includes: model config, preprocess config, decode config, vocab file and cmvn file.
    Process:
    # step 1: prepare dir
@ -11,7 +11,7 @@
    cp -r exp conf data release_dir
    cd release_dir 
-    # step 2: get "model.yaml" which conatains all configuration info.
+    # step 2: get "model.yaml" which contains all configuration info.
    # if does not contain preprocess.yaml file. e.g ds2:
    python generate_infer_yaml.py --cfg_pth conf/deepspeech2_online.yaml --dcd_pth conf/tuning/chunk_decode.yaml --vb_pth data/lang_char/vocab.txt --cmvn_pth data/mean_std.json --save_pth model.yaml --pre_pth null        
    # if contains preprocess.yaml file. e.g  u2:
--- a/utils/train_arpa_with_kenlm.sh
+++ b/utils/train_arpa_with_kenlm.sh
@ -37,7 +37,7 @@ fi
 # the text should be properly pre-processed, e.g:
 #   cleand, normalized and possibly word-segmented
-# get rid off irrelavent symbols
+# get rid off irrelevant symbols
 grep -v '<eps>' $symbol_table \
  | grep -v '#0' \
  | grep -v '<unk>' | grep -v '<UNK>' \
@ -51,7 +51,7 @@ grep -v '<eps>' $symbol_table \
 # 
 # TL;DR reason:
 # Unlike SRILM's -limit-vocab, kenlm's --limit_vocab_file option 
-# spcifies a *valid* set of vocabulary, whereas *valid but unseen* 
+# specifies a *valid* set of vocabulary, whereas *valid but unseen* 
 # words are discarded in final arpa.
 # So the trick is, 
 # we explicitly add kaldi's vocab(one word per line) to training text, 
--- a/utils/zh_tn.py
+++ b/utils/zh_tn.py
@ -1288,7 +1288,7 @@ def normalize_corpus(corpus,
 def char_token(s: Text) -> List[Text]:
-    """chinese charactor
+    """chinese character
    Args:
        s (Text): "我爱中国“