From 4d8aee052859dd8900e35030a0442b72e43c04fe Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Thu, 21 Apr 2022 03:36:18 +0000 Subject: [PATCH] update wfst graph; --- speechx/examples/ds2_ol/aishell/README.md | 13 +++++++++++-- speechx/examples/ds2_ol/aishell/run.sh | 9 +++++---- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/speechx/examples/ds2_ol/aishell/README.md b/speechx/examples/ds2_ol/aishell/README.md index f4a81516..115bf85f 100644 --- a/speechx/examples/ds2_ol/aishell/README.md +++ b/speechx/examples/ds2_ol/aishell/README.md @@ -8,7 +8,7 @@ Mandarin -> 16.14 % N=104612 C=88190 S=16110 D=312 I=465 Other -> 0.00 % N=0 C=0 S=0 D=0 I=0 ``` -## CTC Prefix Beam Search w LM +## CTC Prefix Beam Search w/ LM LM: zh_giga.no_cna_cmn.prune01244.klm ``` @@ -19,9 +19,18 @@ Other -> 0.00 % N=0 C=0 S=0 D=0 I=0 ## CTC WFST -LM: aishell train +LM: [aishell train](http://paddlespeech.bj.bcebos.com/speechx/examples/ds2_ol/aishell/aishell_graph.zip) +--acoustic_scale=1.2 ``` Overall -> 11.14 % N=103017 C=93363 S=9583 D=71 I=1819 Mandarin -> 11.14 % N=103017 C=93363 S=9583 D=71 I=1818 Other -> 0.00 % N=0 C=0 S=0 D=0 I=1 ``` + +LM: [wenetspeech](http://paddlespeech.bj.bcebos.com/speechx/examples/ds2_ol/aishell/wenetspeech_graph.zip) +--acoustic_scale=1.5 +``` +Overall -> 10.93 % N=104765 C=93410 S=9780 D=1575 I=95 +Mandarin -> 10.93 % N=104762 C=93410 S=9779 D=1573 I=95 +Other -> 100.00 % N=3 C=0 S=1 D=2 I=0 +``` \ No newline at end of file diff --git a/speechx/examples/ds2_ol/aishell/run.sh b/speechx/examples/ds2_ol/aishell/run.sh index 779123d5..9a63ff4d 100755 --- a/speechx/examples/ds2_ol/aishell/run.sh +++ b/speechx/examples/ds2_ol/aishell/run.sh @@ -87,7 +87,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ctc-prefix-beam-search-decoder-ol \ --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \ --model_path=$model_dir/avg_1.jit.pdmodel \ - --params_path=$model_dir/avg_1.jit.pdiparams \ + --param_path=$model_dir/avg_1.jit.pdiparams \ --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \ --dict_file=$vocb_dir/vocab.txt \ --result_wspecifier=ark,t:$data/split${nj}/JOB/result @@ -102,7 +102,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then ctc-prefix-beam-search-decoder-ol \ --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \ --model_path=$model_dir/avg_1.jit.pdmodel \ - --params_path=$model_dir/avg_1.jit.pdiparams \ + --param_path=$model_dir/avg_1.jit.pdiparams \ --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \ --dict_file=$vocb_dir/vocab.txt \ --lm_path=$lm \ @@ -119,17 +119,18 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then pushd $wfst wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_graph.zip unzip aishell_graph.zip + mv aishell_graph/* popd fi - graph_dir=$wfst/aishell_graph + graph_dir=$wfst/ # TLG decoder utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recog.wfst.log \ wfst-decoder-ol \ --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \ --model_path=$model_dir/avg_1.jit.pdmodel \ - --params_path=$model_dir/avg_1.jit.pdiparams \ + --param_path=$model_dir/avg_1.jit.pdiparams \ --word_symbol_table=$graph_dir/words.txt \ --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \ --graph_path=$graph_dir/TLG.fst --max_active=7500 \