ds2 ol aishell egs

pull/1707/head
Hui Zhang 3 years ago
parent c7b987c55d
commit 1e4207904e

@ -1,17 +1,25 @@
# Examples # Examples for SpeechX
* dev - for speechx developer, using for test.
* ngram - using to build NGram ARPA lm.
* ds2_ol - ds2 streaming test under `aishell-1` test dataset.
The entrypoint is `ds2_ol/aishell/run.sh`
* glog - glog usage
* feat - mfcc, linear
* nnet - ds2 nn
* decoder - online decoder to work as offline
## How to run ## How to run
`run.sh` is the entry point. `run.sh` is the entry point.
Example to play `decoder`: Example to play `ds2_ol`:
``` ```
pushd decoder pushd ds2_ol/aishell
bash run.sh bash run.sh
``` ```
## Display Model with [Netron](https://github.com/lutzroeder/netron)
```
pip install netron
netron exp/deepspeech2_online/checkpoints/avg_1.jit.pdmodel --port 8022 --host 10.21.55.20
```

@ -10,5 +10,5 @@ TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin
export LC_AL=C export LC_AL=C
SPEECHX_BIN=$SPEECHX_EXAMPLES/decoder:$SPEECHX_EXAMPLES/feat SPEECHX_BIN=$SPEECHX_EXAMPLES/ds2_ol/decoder:$SPEECHX_EXAMPLES/ds2_ol/feat
export PATH=$PATH:$SPEECHX_BIN:$TOOLS_BIN export PATH=$PATH:$SPEECHX_BIN:$TOOLS_BIN

@ -4,6 +4,9 @@ set -e
. path.sh . path.sh
nj=40
# 1. compile # 1. compile
if [ ! -d ${SPEECHX_EXAMPLES} ]; then if [ ! -d ${SPEECHX_EXAMPLES} ]; then
pushd ${SPEECHX_ROOT} pushd ${SPEECHX_ROOT}
@ -19,52 +22,51 @@ ckpt_dir=$data/model
model_dir=$ckpt_dir/exp/deepspeech2_online/checkpoints/ model_dir=$ckpt_dir/exp/deepspeech2_online/checkpoints/
vocb_dir=$ckpt_dir/data/lang_char/ vocb_dir=$ckpt_dir/data/lang_char/
lm=$data/zh_giga.no_cna_cmn.prune01244.klm
# output # output
mkdir -p exp mkdir -p exp
exp=$PWD/exp exp=$PWD/exp
aishell_wav_scp=aishell_test.scp aishell_wav_scp=aishell_test.scp
if [ ! -d $data/test ]; then if [ ! -d $data/test ]; then
pushd $data
wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip
unzip -d $data aishell_test.zip unzip aishell_test.zip
popd
realpath $data/test/*/*.wav > $data/wavlist realpath $data/test/*/*.wav > $data/wavlist
awk -F '/' '{ print $(NF) }' $data/wavlist | awk -F '.' '{ print $1 }' > $data/utt_id awk -F '/' '{ print $(NF) }' $data/wavlist | awk -F '.' '{ print $1 }' > $data/utt_id
paste $data/utt_id $data/wavlist > $data/$aishell_wav_scp paste $data/utt_id $data/wavlist > $data/$aishell_wav_scp
fi fi
model_dir=$PWD/aishell_ds2_online_model
if [ ! -d $model_dir ]; then if [ ! -d $ckpt_dir ]; then
mkdir -p $model_dir mkdir -p $ckpt_dir
wget -P $model_dir -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz wget -P $ckpt_dir -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
tar xzfv $model_dir/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz -C $model_dir tar xzfv $model_dir/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz -C $ckpt_dir
fi fi
lm=$data/zh_giga.no_cna_cmn.prune01244.klm
if [ ! -f $lm ]; then if [ ! -f $lm ]; then
pushd $data pushd $data
wget -c https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm wget -c https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm
popd popd
fi fi
# 3. make feature # 3. make feature
aishell_online_model=$model_dir/exp/deepspeech2_online/checkpoints
label_file=./aishell_result label_file=./aishell_result
wer=./aishell_wer wer=./aishell_wer
nj=40
export GLOG_logtostderr=1 export GLOG_logtostderr=1
#./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj
data=$PWD/data
# 3. gen linear feat # 3. gen linear feat
cmvn=$PWD/cmvn.ark cmvn=$PWD/cmvn.ark
cmvn_json2binary_main --json_file=$model_dir/data/mean_std.json --cmvn_write_path=$cmvn cmvn-json2kaldi --json_file=$ckpt_dir/data/mean_std.json --cmvn_write_path=$cmvn
./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj
utils/run.pl JOB=1:$nj $data/split${nj}/JOB/feat_log \ utils/run.pl JOB=1:$nj $data/split${nj}/JOB/feat_log \
linear_spectrogram_without_db_norm_main \ linear-spectrogram-wo-db-norm-ol \
--wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \ --wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \
--feature_wspecifier=ark,scp:$data/split${nj}/JOB/feat.ark,$data/split${nj}/JOB/feat.scp \ --feature_wspecifier=ark,scp:$data/split${nj}/JOB/feat.ark,$data/split${nj}/JOB/feat.scp \
--cmvn_file=$cmvn \ --cmvn_file=$cmvn \
@ -74,10 +76,10 @@ text=$data/test/text
# 4. recognizer # 4. recognizer
utils/run.pl JOB=1:$nj $data/split${nj}/JOB/log \ utils/run.pl JOB=1:$nj $data/split${nj}/JOB/log \
offline_decoder_sliding_chunk_main \ ctc-prefix-beam-search-decoder-ol \
--feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \ --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \
--model_path=$aishell_online_model/avg_1.jit.pdmodel \ --model_path=$model_dir/avg_1.jit.pdmodel \
--param_path=$aishell_online_model/avg_1.jit.pdiparams \ --param_path=$model_dir/avg_1.jit.pdiparams \
--model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \ --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
--dict_file=$vocb_dir/vocab.txt \ --dict_file=$vocb_dir/vocab.txt \
--result_wspecifier=ark,t:$data/split${nj}/JOB/result --result_wspecifier=ark,t:$data/split${nj}/JOB/result
@ -87,10 +89,10 @@ utils/compute-wer.py --char=1 --v=1 ${label_file} $text > ${wer}
# 4. decode with lm # 4. decode with lm
utils/run.pl JOB=1:$nj $data/split${nj}/JOB/log_lm \ utils/run.pl JOB=1:$nj $data/split${nj}/JOB/log_lm \
offline_decoder_sliding_chunk_main \ ctc-prefix-beam-search-decoder-ol \
--feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \ --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \
--model_path=$aishell_online_model/avg_1.jit.pdmodel \ --model_path=$model_dir/avg_1.jit.pdmodel \
--param_path=$aishell_online_model/avg_1.jit.pdiparams \ --param_path=$model_dir/avg_1.jit.pdiparams \
--model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \ --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
--dict_file=$vocb_dir/vocab.txt \ --dict_file=$vocb_dir/vocab.txt \
--lm_path=$lm \ --lm_path=$lm \
@ -110,10 +112,10 @@ fi
# 5. test TLG decoder # 5. test TLG decoder
utils/run.pl JOB=1:$nj $data/split${nj}/JOB/log_tlg \ utils/run.pl JOB=1:$nj $data/split${nj}/JOB/log_tlg \
offline_wfst_decoder_main \ wfst-decoder-ol \
--feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \ --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \
--model_path=$aishell_online_model/avg_1.jit.pdmodel \ --model_path=$model_dir/avg_1.jit.pdmodel \
--param_path=$aishell_online_model/avg_1.jit.pdiparams \ --param_path=$model_dir/avg_1.jit.pdiparams \
--word_symbol_table=$graph_dir/words.txt \ --word_symbol_table=$graph_dir/words.txt \
--model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \ --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
--graph_path=$graph_dir/TLG.fst --max_active=7500 \ --graph_path=$graph_dir/TLG.fst --max_active=7500 \

@ -32,41 +32,50 @@ int main(int argc, char* argv[]) {
google::InitGoogleLogging(argv[0]); google::InitGoogleLogging(argv[0]);
LOG(INFO) << "cmvn josn path: " << FLAGS_json_file; LOG(INFO) << "cmvn josn path: " << FLAGS_json_file;
padded_string json = padded_string::load(FLAGS_json_file);
ondemand::parser parser; try {
ondemand::document doc = parser.iterate(json); padded_string json = padded_string::load(FLAGS_json_file);
ondemand::value val = doc;
ondemand::array mean_stat = val["mean_stat"]; ondemand::parser parser;
std::vector<kaldi::BaseFloat> mean_stat_vec; ondemand::document doc = parser.iterate(json);
for (double x : mean_stat) { ondemand::value val = doc;
mean_stat_vec.push_back(x);
}
// LOG(INFO) << mean_stat; this line will casue
// simdjson::simdjson_error("Objects and arrays can only be iterated when
// they are first encountered")
ondemand::array var_stat = val["var_stat"]; ondemand::array mean_stat = val["mean_stat"];
std::vector<kaldi::BaseFloat> var_stat_vec; std::vector<kaldi::BaseFloat> mean_stat_vec;
for (double x : var_stat) { for (double x : mean_stat) {
var_stat_vec.push_back(x); mean_stat_vec.push_back(x);
} }
// LOG(INFO) << mean_stat; this line will casue
// simdjson::simdjson_error("Objects and arrays can only be iterated
// when
// they are first encountered")
ondemand::array var_stat = val["var_stat"];
std::vector<kaldi::BaseFloat> var_stat_vec;
for (double x : var_stat) {
var_stat_vec.push_back(x);
}
kaldi::int32 frame_num = uint64_t(val["frame_num"]); kaldi::int32 frame_num = uint64_t(val["frame_num"]);
LOG(INFO) << "nframe: " << frame_num; LOG(INFO) << "nframe: " << frame_num;
size_t mean_size = mean_stat_vec.size(); size_t mean_size = mean_stat_vec.size();
kaldi::Matrix<double> cmvn_stats(2, mean_size + 1); kaldi::Matrix<double> cmvn_stats(2, mean_size + 1);
for (size_t idx = 0; idx < mean_size; ++idx) { for (size_t idx = 0; idx < mean_size; ++idx) {
cmvn_stats(0, idx) = mean_stat_vec[idx]; cmvn_stats(0, idx) = mean_stat_vec[idx];
cmvn_stats(1, idx) = var_stat_vec[idx]; cmvn_stats(1, idx) = var_stat_vec[idx];
}
cmvn_stats(0, mean_size) = frame_num;
LOG(INFO) << cmvn_stats;
kaldi::WriteKaldiObject(
cmvn_stats, FLAGS_cmvn_write_path, FLAGS_binary);
LOG(INFO) << "cmvn stats have write into: " << FLAGS_cmvn_write_path;
LOG(INFO) << "Binary: " << FLAGS_binary;
} catch (simdjson::simdjson_error& err) {
LOG(ERR) << err.what();
} }
cmvn_stats(0, mean_size) = frame_num;
LOG(INFO) << cmvn_stats;
kaldi::WriteKaldiObject(cmvn_stats, FLAGS_cmvn_write_path, FLAGS_binary);
LOG(INFO) << "cmvn stats have write into: " << FLAGS_cmvn_write_path;
LOG(INFO) << "Binary: " << FLAGS_binary;
return 0; return 0;
} }

@ -2412,7 +2412,7 @@ enum error_code {
CAPACITY, ///< This parser can't support a document that big CAPACITY, ///< This parser can't support a document that big
MEMALLOC, ///< Error allocating memory, most likely out of memory MEMALLOC, ///< Error allocating memory, most likely out of memory
TAPE_ERROR, ///< Something went wrong while writing to the tape (stage 2), TAPE_ERROR, ///< Something went wrong while writing to the tape (stage 2),
///this is a generic error /// this is a generic error
DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation
STRING_ERROR, ///< Problem while parsing a string STRING_ERROR, ///< Problem while parsing a string
T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter
@ -2438,9 +2438,9 @@ enum error_code {
UNEXPECTED_ERROR, ///< indicative of a bug in simdjson UNEXPECTED_ERROR, ///< indicative of a bug in simdjson
PARSER_IN_USE, ///< parser is already in use. PARSER_IN_USE, ///< parser is already in use.
OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of
///order /// order
INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for
///simdjson to safely parse it. /// simdjson to safely parse it.
INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early.
SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value.
OUT_OF_BOUNDS, ///< Attempted to access location outside of document. OUT_OF_BOUNDS, ///< Attempted to access location outside of document.

Loading…
Cancel
Save