diff --git a/speechx/examples/README.md b/speechx/examples/README.md index 705ca2006..35174a0d7 100644 --- a/speechx/examples/README.md +++ b/speechx/examples/README.md @@ -1,17 +1,25 @@ -# Examples +# Examples for SpeechX + +* dev - for speechx developer, using for test. +* ngram - using to build NGram ARPA lm. +* ds2_ol - ds2 streaming test under `aishell-1` test dataset. + The entrypoint is `ds2_ol/aishell/run.sh` -* glog - glog usage -* feat - mfcc, linear -* nnet - ds2 nn -* decoder - online decoder to work as offline ## How to run `run.sh` is the entry point. -Example to play `decoder`: +Example to play `ds2_ol`: ``` -pushd decoder +pushd ds2_ol/aishell bash run.sh ``` + +## Display Model with [Netron](https://github.com/lutzroeder/netron) + +``` +pip install netron +netron exp/deepspeech2_online/checkpoints/avg_1.jit.pdmodel --port 8022 --host 10.21.55.20 +``` diff --git a/speechx/examples/ds2_ol/aishell/.gitignore b/speechx/examples/ds2_ol/aishell/.gitignore new file mode 100644 index 000000000..bbd86a25b --- /dev/null +++ b/speechx/examples/ds2_ol/aishell/.gitignore @@ -0,0 +1,2 @@ +data +exp diff --git a/speechx/examples/ds2_ol/aishell/path.sh b/speechx/examples/ds2_ol/aishell/path.sh index b74ffbec1..8e26e6e7e 100644 --- a/speechx/examples/ds2_ol/aishell/path.sh +++ b/speechx/examples/ds2_ol/aishell/path.sh @@ -10,5 +10,5 @@ TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin export LC_AL=C -SPEECHX_BIN=$SPEECHX_EXAMPLES/decoder:$SPEECHX_EXAMPLES/feat +SPEECHX_BIN=$SPEECHX_EXAMPLES/ds2_ol/decoder:$SPEECHX_EXAMPLES/ds2_ol/feat export PATH=$PATH:$SPEECHX_BIN:$TOOLS_BIN diff --git a/speechx/examples/ds2_ol/aishell/run.sh b/speechx/examples/ds2_ol/aishell/run.sh index a002ebb8c..7d65c8c2d 100755 --- a/speechx/examples/ds2_ol/aishell/run.sh +++ b/speechx/examples/ds2_ol/aishell/run.sh @@ -4,6 +4,9 @@ set -e . path.sh +nj=40 + + # 1. compile if [ ! -d ${SPEECHX_EXAMPLES} ]; then pushd ${SPEECHX_ROOT} @@ -19,52 +22,51 @@ ckpt_dir=$data/model model_dir=$ckpt_dir/exp/deepspeech2_online/checkpoints/ vocb_dir=$ckpt_dir/data/lang_char/ -lm=$data/zh_giga.no_cna_cmn.prune01244.klm - # output mkdir -p exp exp=$PWD/exp aishell_wav_scp=aishell_test.scp if [ ! -d $data/test ]; then + pushd $data wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip - unzip -d $data aishell_test.zip + unzip aishell_test.zip + popd + realpath $data/test/*/*.wav > $data/wavlist awk -F '/' '{ print $(NF) }' $data/wavlist | awk -F '.' '{ print $1 }' > $data/utt_id paste $data/utt_id $data/wavlist > $data/$aishell_wav_scp fi -model_dir=$PWD/aishell_ds2_online_model -if [ ! -d $model_dir ]; then - mkdir -p $model_dir - wget -P $model_dir -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz - tar xzfv $model_dir/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz -C $model_dir + +if [ ! -d $ckpt_dir ]; then + mkdir -p $ckpt_dir + wget -P $ckpt_dir -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz + tar xzfv $model_dir/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz -C $ckpt_dir fi +lm=$data/zh_giga.no_cna_cmn.prune01244.klm if [ ! -f $lm ]; then pushd $data wget -c https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm popd fi - # 3. make feature -aishell_online_model=$model_dir/exp/deepspeech2_online/checkpoints label_file=./aishell_result wer=./aishell_wer -nj=40 export GLOG_logtostderr=1 -#./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj - -data=$PWD/data # 3. gen linear feat cmvn=$PWD/cmvn.ark -cmvn_json2binary_main --json_file=$model_dir/data/mean_std.json --cmvn_write_path=$cmvn +cmvn-json2kaldi --json_file=$ckpt_dir/data/mean_std.json --cmvn_write_path=$cmvn + + +./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj utils/run.pl JOB=1:$nj $data/split${nj}/JOB/feat_log \ -linear_spectrogram_without_db_norm_main \ +linear-spectrogram-wo-db-norm-ol \ --wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \ --feature_wspecifier=ark,scp:$data/split${nj}/JOB/feat.ark,$data/split${nj}/JOB/feat.scp \ --cmvn_file=$cmvn \ @@ -74,10 +76,10 @@ text=$data/test/text # 4. recognizer utils/run.pl JOB=1:$nj $data/split${nj}/JOB/log \ - offline_decoder_sliding_chunk_main \ + ctc-prefix-beam-search-decoder-ol \ --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \ - --model_path=$aishell_online_model/avg_1.jit.pdmodel \ - --param_path=$aishell_online_model/avg_1.jit.pdiparams \ + --model_path=$model_dir/avg_1.jit.pdmodel \ + --param_path=$model_dir/avg_1.jit.pdiparams \ --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \ --dict_file=$vocb_dir/vocab.txt \ --result_wspecifier=ark,t:$data/split${nj}/JOB/result @@ -87,10 +89,10 @@ utils/compute-wer.py --char=1 --v=1 ${label_file} $text > ${wer} # 4. decode with lm utils/run.pl JOB=1:$nj $data/split${nj}/JOB/log_lm \ - offline_decoder_sliding_chunk_main \ + ctc-prefix-beam-search-decoder-ol \ --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \ - --model_path=$aishell_online_model/avg_1.jit.pdmodel \ - --param_path=$aishell_online_model/avg_1.jit.pdiparams \ + --model_path=$model_dir/avg_1.jit.pdmodel \ + --param_path=$model_dir/avg_1.jit.pdiparams \ --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \ --dict_file=$vocb_dir/vocab.txt \ --lm_path=$lm \ @@ -110,10 +112,10 @@ fi # 5. test TLG decoder utils/run.pl JOB=1:$nj $data/split${nj}/JOB/log_tlg \ - offline_wfst_decoder_main \ + wfst-decoder-ol \ --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \ - --model_path=$aishell_online_model/avg_1.jit.pdmodel \ - --param_path=$aishell_online_model/avg_1.jit.pdiparams \ + --model_path=$model_dir/avg_1.jit.pdmodel \ + --param_path=$model_dir/avg_1.jit.pdiparams \ --word_symbol_table=$graph_dir/words.txt \ --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \ --graph_path=$graph_dir/TLG.fst --max_active=7500 \ diff --git a/speechx/examples/ds2_ol/feat/cmvn-json2kaldi.cc b/speechx/examples/ds2_ol/feat/cmvn-json2kaldi.cc index 2457febe9..b83856642 100644 --- a/speechx/examples/ds2_ol/feat/cmvn-json2kaldi.cc +++ b/speechx/examples/ds2_ol/feat/cmvn-json2kaldi.cc @@ -32,41 +32,50 @@ int main(int argc, char* argv[]) { google::InitGoogleLogging(argv[0]); LOG(INFO) << "cmvn josn path: " << FLAGS_json_file; - padded_string json = padded_string::load(FLAGS_json_file); - ondemand::parser parser; - ondemand::document doc = parser.iterate(json); - ondemand::value val = doc; + try { + padded_string json = padded_string::load(FLAGS_json_file); - ondemand::array mean_stat = val["mean_stat"]; - std::vector mean_stat_vec; - for (double x : mean_stat) { - mean_stat_vec.push_back(x); - } - // LOG(INFO) << mean_stat; this line will casue - // simdjson::simdjson_error("Objects and arrays can only be iterated when - // they are first encountered") + ondemand::parser parser; + ondemand::document doc = parser.iterate(json); + ondemand::value val = doc; - ondemand::array var_stat = val["var_stat"]; - std::vector var_stat_vec; - for (double x : var_stat) { - var_stat_vec.push_back(x); - } + ondemand::array mean_stat = val["mean_stat"]; + std::vector mean_stat_vec; + for (double x : mean_stat) { + mean_stat_vec.push_back(x); + } + // LOG(INFO) << mean_stat; this line will casue + // simdjson::simdjson_error("Objects and arrays can only be iterated + // when + // they are first encountered") + + ondemand::array var_stat = val["var_stat"]; + std::vector var_stat_vec; + for (double x : var_stat) { + var_stat_vec.push_back(x); + } - kaldi::int32 frame_num = uint64_t(val["frame_num"]); - LOG(INFO) << "nframe: " << frame_num; + kaldi::int32 frame_num = uint64_t(val["frame_num"]); + LOG(INFO) << "nframe: " << frame_num; - size_t mean_size = mean_stat_vec.size(); - kaldi::Matrix cmvn_stats(2, mean_size + 1); - for (size_t idx = 0; idx < mean_size; ++idx) { - cmvn_stats(0, idx) = mean_stat_vec[idx]; - cmvn_stats(1, idx) = var_stat_vec[idx]; + size_t mean_size = mean_stat_vec.size(); + kaldi::Matrix cmvn_stats(2, mean_size + 1); + for (size_t idx = 0; idx < mean_size; ++idx) { + cmvn_stats(0, idx) = mean_stat_vec[idx]; + cmvn_stats(1, idx) = var_stat_vec[idx]; + } + cmvn_stats(0, mean_size) = frame_num; + LOG(INFO) << cmvn_stats; + + kaldi::WriteKaldiObject( + cmvn_stats, FLAGS_cmvn_write_path, FLAGS_binary); + LOG(INFO) << "cmvn stats have write into: " << FLAGS_cmvn_write_path; + LOG(INFO) << "Binary: " << FLAGS_binary; + } catch (simdjson::simdjson_error& err) { + LOG(ERR) << err.what(); } - cmvn_stats(0, mean_size) = frame_num; - LOG(INFO) << cmvn_stats; - kaldi::WriteKaldiObject(cmvn_stats, FLAGS_cmvn_write_path, FLAGS_binary); - LOG(INFO) << "cmvn stats have write into: " << FLAGS_cmvn_write_path; - LOG(INFO) << "Binary: " << FLAGS_binary; + return 0; } \ No newline at end of file diff --git a/speechx/speechx/utils/simdjson.h b/speechx/speechx/utils/simdjson.h index 50f7e7793..28a9239b1 100644 --- a/speechx/speechx/utils/simdjson.h +++ b/speechx/speechx/utils/simdjson.h @@ -2412,7 +2412,7 @@ enum error_code { CAPACITY, ///< This parser can't support a document that big MEMALLOC, ///< Error allocating memory, most likely out of memory TAPE_ERROR, ///< Something went wrong while writing to the tape (stage 2), - ///this is a generic error + /// this is a generic error DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation STRING_ERROR, ///< Problem while parsing a string T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter @@ -2438,9 +2438,9 @@ enum error_code { UNEXPECTED_ERROR, ///< indicative of a bug in simdjson PARSER_IN_USE, ///< parser is already in use. OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of - ///order + /// order INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for - ///simdjson to safely parse it. + /// simdjson to safely parse it. INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. OUT_OF_BOUNDS, ///< Attempted to access location outside of document.