From 6d330bd4777c6ceb9ee5f964a2350f978941ceca Mon Sep 17 00:00:00 2001
From: co63oc <co63oc@users.noreply.github.com>
Date: Thu, 20 Mar 2025 08:23:04 +0800
Subject: [PATCH] Fix

---
 paddlespeech/audio/utils/tensor_utils.py          | 6 +++---
 paddlespeech/s2t/utils/tensor_utils.py            | 6 +++---
 runtime/examples/text_lm/local/mmseg.py           | 4 ++--
 tests/unit/server/offline/test_server_client.sh   | 6 +++---
 tests/unit/server/online/tts/check_server/test.sh | 6 +++---
 tools/extras/install_liblbfgs.sh                  | 2 +-
 tools/extras/install_srilm.sh                     | 2 +-
 utils/fst/ctc_token_fst.py                        | 2 +-
 utils/fst/make_tlg.sh                             | 2 +-
 utils/generate_infer_yaml.py                      | 4 ++--
 utils/train_arpa_with_kenlm.sh                    | 4 ++--
 utils/zh_tn.py                                    | 2 +-
 12 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/paddlespeech/audio/utils/tensor_utils.py b/paddlespeech/audio/utils/tensor_utils.py
index b246a6459..b67b2dd81 100644
--- a/paddlespeech/audio/utils/tensor_utils.py
+++ b/paddlespeech/audio/utils/tensor_utils.py
@@ -79,7 +79,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
     # assuming trailing dimensions and type of all the Tensors
     # in sequences are same and fetching those from sequences[0]
     max_size = paddle.shape(sequences[0])
-    # (TODO Hui Zhang): slice not supprot `end==start`
+    # (TODO Hui Zhang): slice not support `end==start`
     # trailing_dims = max_size[1:]
     trailing_dims = tuple(
         max_size[1:].numpy().tolist()) if sequences[0].ndim >= 2 else ()
@@ -93,7 +93,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
         length = tensor.shape[0]
         # use index notation to prevent duplicate references to the tensor
         if batch_first:
-            # TODO (Hui Zhang): set_value op not supprot `end==start`
+            # TODO (Hui Zhang): set_value op not support `end==start`
             # TODO (Hui Zhang): set_value op not support int16
             # TODO (Hui Zhang): set_varbase 2 rank not support [0,0,...]
             # out_tensor[i, :length, ...] = tensor
@@ -102,7 +102,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
             else:
                 out_tensor[i, length] = tensor
         else:
-            # TODO (Hui Zhang): set_value op not supprot `end==start`
+            # TODO (Hui Zhang): set_value op not support `end==start`
             # out_tensor[:length, i, ...] = tensor
             if length != 0:
                 out_tensor[:length, i] = tensor
diff --git a/paddlespeech/s2t/utils/tensor_utils.py b/paddlespeech/s2t/utils/tensor_utils.py
index 0d91b9cfb..15f4abdda 100644
--- a/paddlespeech/s2t/utils/tensor_utils.py
+++ b/paddlespeech/s2t/utils/tensor_utils.py
@@ -80,7 +80,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
     # assuming trailing dimensions and type of all the Tensors
     # in sequences are same and fetching those from sequences[0]
     max_size = paddle.shape(sequences[0])
-    # (TODO Hui Zhang): slice not supprot `end==start`
+    # (TODO Hui Zhang): slice not support `end==start`
     # trailing_dims = max_size[1:]
     trailing_dims = tuple(
         max_size[1:].numpy().tolist()) if sequences[0].ndim >= 2 else ()
@@ -98,7 +98,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
             f"length {length}, out_tensor {out_tensor.shape}, tensor {tensor.shape}"
         )
         if batch_first:
-            # TODO (Hui Zhang): set_value op not supprot `end==start`
+            # TODO (Hui Zhang): set_value op not support `end==start`
             # TODO (Hui Zhang): set_value op not support int16
             # TODO (Hui Zhang): set_varbase 2 rank not support [0,0,...]
             # out_tensor[i, :length, ...] = tensor
@@ -107,7 +107,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
             else:
                 out_tensor[i, length] = tensor
         else:
-            # TODO (Hui Zhang): set_value op not supprot `end==start`
+            # TODO (Hui Zhang): set_value op not support `end==start`
             # out_tensor[:length, i, ...] = tensor
             if length != 0:
                 out_tensor[:length, i] = tensor
diff --git a/runtime/examples/text_lm/local/mmseg.py b/runtime/examples/text_lm/local/mmseg.py
index 74295cd3c..d5bff6df3 100755
--- a/runtime/examples/text_lm/local/mmseg.py
+++ b/runtime/examples/text_lm/local/mmseg.py
@@ -156,8 +156,8 @@ class Analysis:
         return self.text[self.pos]
 
     #判断该字符是否是中文字符（不包括中文标点）    
-    def isChineseChar(self, charater):
-        return 0x4e00 <= ord(charater) < 0x9fa6
+    def isChineseChar(self, character):
+        return 0x4e00 <= ord(character) < 0x9fa6
 
     #判断是否是ASCII码  
     def isASCIIChar(self, ch):
diff --git a/tests/unit/server/offline/test_server_client.sh b/tests/unit/server/offline/test_server_client.sh
index 6418c82fd..26fb100a3 100644
--- a/tests/unit/server/offline/test_server_client.sh
+++ b/tests/unit/server/offline/test_server_client.sh
@@ -66,8 +66,8 @@ config_file=./conf/application.yaml
 server_ip=$(cat $config_file | grep "host" | awk -F " " '{print $2}')
 port=$(cat $config_file | grep "port" | awk '/port:/ {print $2}')
 
-echo "Sevice ip: $server_ip" | tee ./log/test_result.log
-echo "Sevice port: $port" | tee -a ./log/test_result.log
+echo "Service ip: $server_ip" | tee ./log/test_result.log
+echo "Service port: $port" | tee -a ./log/test_result.log
 
 # whether a process is listening on $port
 pid=`lsof -i :"$port"|grep -v "PID" | awk '{print $2}'`
@@ -190,7 +190,7 @@ echo "**************************************************************************
 
 echo "All tests completed."  | tee -a ./log/test_result.log
 
-# sohw all the test results
+# show all the test results
 echo "***************** Here are all the test results ********************"
 cat ./log/test_result.log
 
diff --git a/tests/unit/server/online/tts/check_server/test.sh b/tests/unit/server/online/tts/check_server/test.sh
index c62c54c76..998a07b3f 100644
--- a/tests/unit/server/online/tts/check_server/test.sh
+++ b/tests/unit/server/online/tts/check_server/test.sh
@@ -76,8 +76,8 @@ config_file=./conf/application.yaml
 server_ip=$(cat $config_file | grep "host" | awk -F " " '{print $2}')
 port=$(cat $config_file | grep "port" | awk '/port:/ {print $2}')
 
-echo "Sevice ip: $server_ip" | tee $log/test_result.log
-echo "Sevice port: $port" | tee -a $log/test_result.log
+echo "Service ip: $server_ip" | tee $log/test_result.log
+echo "Service port: $port" | tee -a $log/test_result.log
 
 # whether a process is listening on $port
 pid=`lsof -i :"$port"|grep -v "PID" | awk '{print $2}'`
@@ -307,7 +307,7 @@ echo "**************************************************************************
 echo "All tests completed."  | tee -a $log/test_result.log
 
 
-# sohw all the test results
+# show all the test results
 echo "***************** Here are all the test results ********************"
 cat $log/test_result.log
 
diff --git a/tools/extras/install_liblbfgs.sh b/tools/extras/install_liblbfgs.sh
index 8d6ae4ab7..1fa727d1f 100755
--- a/tools/extras/install_liblbfgs.sh
+++ b/tools/extras/install_liblbfgs.sh
@@ -23,7 +23,7 @@ cd ..
 
 (
   [ ! -z "${LIBLBFGS}" ] && \
-    echo >&2 "LIBLBFGS variable is aleady defined. Undefining..." && \
+    echo >&2 "LIBLBFGS variable is already defined. Undefining..." && \
     unset LIBLBFGS
 
   [ -f ./env.sh ] && . ./env.sh
diff --git a/tools/extras/install_srilm.sh b/tools/extras/install_srilm.sh
index f359e70ce..fdbcf5d97 100755
--- a/tools/extras/install_srilm.sh
+++ b/tools/extras/install_srilm.sh
@@ -68,7 +68,7 @@ make || exit
 cd ..
 (
   [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
+    echo >&2 "SRILM variable is already defined. Undefining..." && \
     unset SRILM
 
   [ -f ./env.sh ] && . ./env.sh
diff --git a/utils/fst/ctc_token_fst.py b/utils/fst/ctc_token_fst.py
index f63e9cdac..85974f27f 100755
--- a/utils/fst/ctc_token_fst.py
+++ b/utils/fst/ctc_token_fst.py
@@ -32,7 +32,7 @@ def main(args):
                 # leaving `token`
                 print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
             node += 1
-    # Fianl node
+    # Final node
     print('0')
 
 
diff --git a/utils/fst/make_tlg.sh b/utils/fst/make_tlg.sh
index c68387af9..944b8b1f3 100755
--- a/utils/fst/make_tlg.sh
+++ b/utils/fst/make_tlg.sh
@@ -21,7 +21,7 @@ cp -r $src_lang $tgt_lang
 # eps2disambig.pl: replace epsilons on the input side with the special disambiguation symbol #0. 
 # s2eps.pl: replaces <s> and </s> with <eps> (on both input and output sides), for the G.fst acceptor.
 # G.fst, the disambiguation symbol #0 only appears on the input side
-# do eps2disambig.pl and s2eps.pl maybe just for fallowing `fstrmepsilon`.
+# do eps2disambig.pl and s2eps.pl maybe just for following `fstrmepsilon`.
 cat $arpa_lm | \
    grep -v '<s> <s>' | \
    grep -v '</s> <s>' | \
diff --git a/utils/generate_infer_yaml.py b/utils/generate_infer_yaml.py
index ca8d6b60d..bd45a1bbd 100755
--- a/utils/generate_infer_yaml.py
+++ b/utils/generate_infer_yaml.py
@@ -3,7 +3,7 @@
 '''
     Merge training configs into a single inference config.
     The single inference config is for CLI, which only takes a single config to do inferencing.
-    The trainig configs includes: model config, preprocess config, decode config, vocab file and cmvn file.
+    The training configs includes: model config, preprocess config, decode config, vocab file and cmvn file.
 
     Process:
     # step 1: prepare dir
@@ -11,7 +11,7 @@
     cp -r exp conf data release_dir
     cd release_dir 
  
-    # step 2: get "model.yaml" which conatains all configuration info.
+    # step 2: get "model.yaml" which contains all configuration info.
     # if does not contain preprocess.yaml file. e.g ds2:
     python generate_infer_yaml.py --cfg_pth conf/deepspeech2_online.yaml --dcd_pth conf/tuning/chunk_decode.yaml --vb_pth data/lang_char/vocab.txt --cmvn_pth data/mean_std.json --save_pth model.yaml --pre_pth null        
     # if contains preprocess.yaml file. e.g  u2:
diff --git a/utils/train_arpa_with_kenlm.sh b/utils/train_arpa_with_kenlm.sh
index 8af646ceb..b435239af 100755
--- a/utils/train_arpa_with_kenlm.sh
+++ b/utils/train_arpa_with_kenlm.sh
@@ -37,7 +37,7 @@ fi
 # the text should be properly pre-processed, e.g:
 #   cleand, normalized and possibly word-segmented
 
-# get rid off irrelavent symbols
+# get rid off irrelevant symbols
 grep -v '<eps>' $symbol_table \
   | grep -v '#0' \
   | grep -v '<unk>' | grep -v '<UNK>' \
@@ -51,7 +51,7 @@ grep -v '<eps>' $symbol_table \
 # 
 # TL;DR reason:
 # Unlike SRILM's -limit-vocab, kenlm's --limit_vocab_file option 
-# spcifies a *valid* set of vocabulary, whereas *valid but unseen* 
+# specifies a *valid* set of vocabulary, whereas *valid but unseen* 
 # words are discarded in final arpa.
 # So the trick is, 
 # we explicitly add kaldi's vocab(one word per line) to training text, 
diff --git a/utils/zh_tn.py b/utils/zh_tn.py
index 6fee626bd..4bb684a1e 100755
--- a/utils/zh_tn.py
+++ b/utils/zh_tn.py
@@ -1288,7 +1288,7 @@ def normalize_corpus(corpus,
 
 
 def char_token(s: Text) -> List[Text]:
-    """chinese charactor
+    """chinese character
     Args:
         s (Text): "我爱中国“