From 6d330bd4777c6ceb9ee5f964a2350f978941ceca Mon Sep 17 00:00:00 2001 From: co63oc Date: Thu, 20 Mar 2025 08:23:04 +0800 Subject: [PATCH] Fix --- paddlespeech/audio/utils/tensor_utils.py | 6 +++--- paddlespeech/s2t/utils/tensor_utils.py | 6 +++--- runtime/examples/text_lm/local/mmseg.py | 4 ++-- tests/unit/server/offline/test_server_client.sh | 6 +++--- tests/unit/server/online/tts/check_server/test.sh | 6 +++--- tools/extras/install_liblbfgs.sh | 2 +- tools/extras/install_srilm.sh | 2 +- utils/fst/ctc_token_fst.py | 2 +- utils/fst/make_tlg.sh | 2 +- utils/generate_infer_yaml.py | 4 ++-- utils/train_arpa_with_kenlm.sh | 4 ++-- utils/zh_tn.py | 2 +- 12 files changed, 23 insertions(+), 23 deletions(-) diff --git a/paddlespeech/audio/utils/tensor_utils.py b/paddlespeech/audio/utils/tensor_utils.py index b246a6459..b67b2dd81 100644 --- a/paddlespeech/audio/utils/tensor_utils.py +++ b/paddlespeech/audio/utils/tensor_utils.py @@ -79,7 +79,7 @@ def pad_sequence(sequences: List[paddle.Tensor], # assuming trailing dimensions and type of all the Tensors # in sequences are same and fetching those from sequences[0] max_size = paddle.shape(sequences[0]) - # (TODO Hui Zhang): slice not supprot `end==start` + # (TODO Hui Zhang): slice not support `end==start` # trailing_dims = max_size[1:] trailing_dims = tuple( max_size[1:].numpy().tolist()) if sequences[0].ndim >= 2 else () @@ -93,7 +93,7 @@ def pad_sequence(sequences: List[paddle.Tensor], length = tensor.shape[0] # use index notation to prevent duplicate references to the tensor if batch_first: - # TODO (Hui Zhang): set_value op not supprot `end==start` + # TODO (Hui Zhang): set_value op not support `end==start` # TODO (Hui Zhang): set_value op not support int16 # TODO (Hui Zhang): set_varbase 2 rank not support [0,0,...] # out_tensor[i, :length, ...] = tensor @@ -102,7 +102,7 @@ def pad_sequence(sequences: List[paddle.Tensor], else: out_tensor[i, length] = tensor else: - # TODO (Hui Zhang): set_value op not supprot `end==start` + # TODO (Hui Zhang): set_value op not support `end==start` # out_tensor[:length, i, ...] = tensor if length != 0: out_tensor[:length, i] = tensor diff --git a/paddlespeech/s2t/utils/tensor_utils.py b/paddlespeech/s2t/utils/tensor_utils.py index 0d91b9cfb..15f4abdda 100644 --- a/paddlespeech/s2t/utils/tensor_utils.py +++ b/paddlespeech/s2t/utils/tensor_utils.py @@ -80,7 +80,7 @@ def pad_sequence(sequences: List[paddle.Tensor], # assuming trailing dimensions and type of all the Tensors # in sequences are same and fetching those from sequences[0] max_size = paddle.shape(sequences[0]) - # (TODO Hui Zhang): slice not supprot `end==start` + # (TODO Hui Zhang): slice not support `end==start` # trailing_dims = max_size[1:] trailing_dims = tuple( max_size[1:].numpy().tolist()) if sequences[0].ndim >= 2 else () @@ -98,7 +98,7 @@ def pad_sequence(sequences: List[paddle.Tensor], f"length {length}, out_tensor {out_tensor.shape}, tensor {tensor.shape}" ) if batch_first: - # TODO (Hui Zhang): set_value op not supprot `end==start` + # TODO (Hui Zhang): set_value op not support `end==start` # TODO (Hui Zhang): set_value op not support int16 # TODO (Hui Zhang): set_varbase 2 rank not support [0,0,...] # out_tensor[i, :length, ...] = tensor @@ -107,7 +107,7 @@ def pad_sequence(sequences: List[paddle.Tensor], else: out_tensor[i, length] = tensor else: - # TODO (Hui Zhang): set_value op not supprot `end==start` + # TODO (Hui Zhang): set_value op not support `end==start` # out_tensor[:length, i, ...] = tensor if length != 0: out_tensor[:length, i] = tensor diff --git a/runtime/examples/text_lm/local/mmseg.py b/runtime/examples/text_lm/local/mmseg.py index 74295cd3c..d5bff6df3 100755 --- a/runtime/examples/text_lm/local/mmseg.py +++ b/runtime/examples/text_lm/local/mmseg.py @@ -156,8 +156,8 @@ class Analysis: return self.text[self.pos] #判断该字符是否是中文字符(不包括中文标点) - def isChineseChar(self, charater): - return 0x4e00 <= ord(charater) < 0x9fa6 + def isChineseChar(self, character): + return 0x4e00 <= ord(character) < 0x9fa6 #判断是否是ASCII码 def isASCIIChar(self, ch): diff --git a/tests/unit/server/offline/test_server_client.sh b/tests/unit/server/offline/test_server_client.sh index 6418c82fd..26fb100a3 100644 --- a/tests/unit/server/offline/test_server_client.sh +++ b/tests/unit/server/offline/test_server_client.sh @@ -66,8 +66,8 @@ config_file=./conf/application.yaml server_ip=$(cat $config_file | grep "host" | awk -F " " '{print $2}') port=$(cat $config_file | grep "port" | awk '/port:/ {print $2}') -echo "Sevice ip: $server_ip" | tee ./log/test_result.log -echo "Sevice port: $port" | tee -a ./log/test_result.log +echo "Service ip: $server_ip" | tee ./log/test_result.log +echo "Service port: $port" | tee -a ./log/test_result.log # whether a process is listening on $port pid=`lsof -i :"$port"|grep -v "PID" | awk '{print $2}'` @@ -190,7 +190,7 @@ echo "************************************************************************** echo "All tests completed." | tee -a ./log/test_result.log -# sohw all the test results +# show all the test results echo "***************** Here are all the test results ********************" cat ./log/test_result.log diff --git a/tests/unit/server/online/tts/check_server/test.sh b/tests/unit/server/online/tts/check_server/test.sh index c62c54c76..998a07b3f 100644 --- a/tests/unit/server/online/tts/check_server/test.sh +++ b/tests/unit/server/online/tts/check_server/test.sh @@ -76,8 +76,8 @@ config_file=./conf/application.yaml server_ip=$(cat $config_file | grep "host" | awk -F " " '{print $2}') port=$(cat $config_file | grep "port" | awk '/port:/ {print $2}') -echo "Sevice ip: $server_ip" | tee $log/test_result.log -echo "Sevice port: $port" | tee -a $log/test_result.log +echo "Service ip: $server_ip" | tee $log/test_result.log +echo "Service port: $port" | tee -a $log/test_result.log # whether a process is listening on $port pid=`lsof -i :"$port"|grep -v "PID" | awk '{print $2}'` @@ -307,7 +307,7 @@ echo "************************************************************************** echo "All tests completed." | tee -a $log/test_result.log -# sohw all the test results +# show all the test results echo "***************** Here are all the test results ********************" cat $log/test_result.log diff --git a/tools/extras/install_liblbfgs.sh b/tools/extras/install_liblbfgs.sh index 8d6ae4ab7..1fa727d1f 100755 --- a/tools/extras/install_liblbfgs.sh +++ b/tools/extras/install_liblbfgs.sh @@ -23,7 +23,7 @@ cd .. ( [ ! -z "${LIBLBFGS}" ] && \ - echo >&2 "LIBLBFGS variable is aleady defined. Undefining..." && \ + echo >&2 "LIBLBFGS variable is already defined. Undefining..." && \ unset LIBLBFGS [ -f ./env.sh ] && . ./env.sh diff --git a/tools/extras/install_srilm.sh b/tools/extras/install_srilm.sh index f359e70ce..fdbcf5d97 100755 --- a/tools/extras/install_srilm.sh +++ b/tools/extras/install_srilm.sh @@ -68,7 +68,7 @@ make || exit cd .. ( [ ! -z "${SRILM}" ] && \ - echo >&2 "SRILM variable is aleady defined. Undefining..." && \ + echo >&2 "SRILM variable is already defined. Undefining..." && \ unset SRILM [ -f ./env.sh ] && . ./env.sh diff --git a/utils/fst/ctc_token_fst.py b/utils/fst/ctc_token_fst.py index f63e9cdac..85974f27f 100755 --- a/utils/fst/ctc_token_fst.py +++ b/utils/fst/ctc_token_fst.py @@ -32,7 +32,7 @@ def main(args): # leaving `token` print('{} {} {} {}'.format(node, 2, '', '')) node += 1 - # Fianl node + # Final node print('0') diff --git a/utils/fst/make_tlg.sh b/utils/fst/make_tlg.sh index c68387af9..944b8b1f3 100755 --- a/utils/fst/make_tlg.sh +++ b/utils/fst/make_tlg.sh @@ -21,7 +21,7 @@ cp -r $src_lang $tgt_lang # eps2disambig.pl: replace epsilons on the input side with the special disambiguation symbol #0. # s2eps.pl: replaces and with (on both input and output sides), for the G.fst acceptor. # G.fst, the disambiguation symbol #0 only appears on the input side -# do eps2disambig.pl and s2eps.pl maybe just for fallowing `fstrmepsilon`. +# do eps2disambig.pl and s2eps.pl maybe just for following `fstrmepsilon`. cat $arpa_lm | \ grep -v ' ' | \ grep -v ' ' | \ diff --git a/utils/generate_infer_yaml.py b/utils/generate_infer_yaml.py index ca8d6b60d..bd45a1bbd 100755 --- a/utils/generate_infer_yaml.py +++ b/utils/generate_infer_yaml.py @@ -3,7 +3,7 @@ ''' Merge training configs into a single inference config. The single inference config is for CLI, which only takes a single config to do inferencing. - The trainig configs includes: model config, preprocess config, decode config, vocab file and cmvn file. + The training configs includes: model config, preprocess config, decode config, vocab file and cmvn file. Process: # step 1: prepare dir @@ -11,7 +11,7 @@ cp -r exp conf data release_dir cd release_dir - # step 2: get "model.yaml" which conatains all configuration info. + # step 2: get "model.yaml" which contains all configuration info. # if does not contain preprocess.yaml file. e.g ds2: python generate_infer_yaml.py --cfg_pth conf/deepspeech2_online.yaml --dcd_pth conf/tuning/chunk_decode.yaml --vb_pth data/lang_char/vocab.txt --cmvn_pth data/mean_std.json --save_pth model.yaml --pre_pth null # if contains preprocess.yaml file. e.g u2: diff --git a/utils/train_arpa_with_kenlm.sh b/utils/train_arpa_with_kenlm.sh index 8af646ceb..b435239af 100755 --- a/utils/train_arpa_with_kenlm.sh +++ b/utils/train_arpa_with_kenlm.sh @@ -37,7 +37,7 @@ fi # the text should be properly pre-processed, e.g: # cleand, normalized and possibly word-segmented -# get rid off irrelavent symbols +# get rid off irrelevant symbols grep -v '' $symbol_table \ | grep -v '#0' \ | grep -v '' | grep -v '' \ @@ -51,7 +51,7 @@ grep -v '' $symbol_table \ # # TL;DR reason: # Unlike SRILM's -limit-vocab, kenlm's --limit_vocab_file option -# spcifies a *valid* set of vocabulary, whereas *valid but unseen* +# specifies a *valid* set of vocabulary, whereas *valid but unseen* # words are discarded in final arpa. # So the trick is, # we explicitly add kaldi's vocab(one word per line) to training text, diff --git a/utils/zh_tn.py b/utils/zh_tn.py index 6fee626bd..4bb684a1e 100755 --- a/utils/zh_tn.py +++ b/utils/zh_tn.py @@ -1288,7 +1288,7 @@ def normalize_corpus(corpus, def char_token(s: Text) -> List[Text]: - """chinese charactor + """chinese character Args: s (Text): "我爱中国“