optional tokenizer and fix some doc. (#3042)

3 years ago · 9bf5471613
parent b07f87b42e
commit 9bf5471613
6 changed files with 4 additions and 3 deletions
--- a/examples/librispeech/asr3/local/data.sh
+++ b/examples/librispeech/asr3/local/data.sh
--- a/examples/librispeech/asr3/local/test.sh
+++ b/examples/librispeech/asr3/local/test.sh
--- a/examples/librispeech/asr3/local/test_wav.sh
+++ b/examples/librispeech/asr3/local/test_wav.sh
--- a/examples/librispeech/asr3/local/train.sh
+++ b/examples/librispeech/asr3/local/train.sh
--- a/examples/librispeech/asr3/run.sh
+++ b/examples/librispeech/asr3/run.sh
@ -6,7 +6,7 @@ set -e

 gpus=0
 stage=0
-stop_stage=0
+stop_stage=4
 conf_path=conf/wav2vec2ASR.yaml
 ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
--- a/paddlespeech/s2t/exps/wav2vec2/bin/test_wav.py
+++ b/paddlespeech/s2t/exps/wav2vec2/bin/test_wav.py
@ -34,8 +34,9 @@ class Wav2vec2Infer():
        self.args = args
        self.config = config
        self.audio_file = args.audio_file
+        self.tokenizer = config.get("tokenizer", None)

-        if self.config.tokenizer:
+        if self.tokenizer:
            self.text_feature = AutoTokenizer.from_pretrained(
                self.config.tokenizer)
        else:
@ -72,7 +73,7 @@ class Wav2vec2Infer():
                text_feature=self.text_feature,
                decoding_method=decode_config.decoding_method,
                beam_size=decode_config.beam_size,
-                tokenizer=self.config.tokenizer, )
+                tokenizer=self.tokenizer, )
            rsl = result_transcripts[0]
            utt = Path(self.audio_file).name
            logger.info(f"hyp: {utt} {rsl}")