diff --git a/speechx/examples/u2pp_ol/wenetspeech/.gitignore b/speechx/examples/u2pp_ol/wenetspeech/.gitignore new file mode 100644 index 00000000..02c0cc21 --- /dev/null +++ b/speechx/examples/u2pp_ol/wenetspeech/.gitignore @@ -0,0 +1,3 @@ +data +utils +exp diff --git a/speechx/examples/u2pp_ol/wenetspeech/README.md b/speechx/examples/u2pp_ol/wenetspeech/README.md new file mode 100644 index 00000000..a9a4578f --- /dev/null +++ b/speechx/examples/u2pp_ol/wenetspeech/README.md @@ -0,0 +1,28 @@ +# u2/u2pp Streaming ASR + +## Testing with Aishell Test Data + +## Download wav and model + +``` +run.sh --stop_stage 0 +``` + +### compute feature + +``` +./run.sh --stage 1 --stop_stage 1 +``` + +### decoding using feature + +``` +./run.sh --stage 2 --stop_stage 2 +``` + +### decoding using wav + + +``` +./run.sh --stage 3 --stop_stage 3 +``` \ No newline at end of file diff --git a/speechx/examples/u2pp_ol/wenetspeech/local/aishell_train_lms.sh b/speechx/examples/u2pp_ol/wenetspeech/local/aishell_train_lms.sh new file mode 100755 index 00000000..544a1f59 --- /dev/null +++ b/speechx/examples/u2pp_ol/wenetspeech/local/aishell_train_lms.sh @@ -0,0 +1,71 @@ +#!/bin/bash + +# To be run from one directory above this script. +. ./path.sh + +nj=40 +text=data/local/lm/text +lexicon=data/local/dict/lexicon.txt + +for f in "$text" "$lexicon"; do + [ ! -f $x ] && echo "$0: No such file $f" && exit 1; +done + +# Check SRILM tools +if ! which ngram-count > /dev/null; then + echo "srilm tools are not found, please download it and install it from: " + echo "http://www.speech.sri.com/projects/srilm/download.html" + echo "Then add the tools to your PATH" + exit 1 +fi + +# This script takes no arguments. It assumes you have already run +# aishell_data_prep.sh. +# It takes as input the files +# data/local/lm/text +# data/local/dict/lexicon.txt +dir=data/local/lm +mkdir -p $dir + +cleantext=$dir/text.no_oov + +# oov to +# lexicon line: word char0 ... charn +# text line: utt word0 ... wordn -> line: word0 ... wordn +text_dir=$(dirname $text) +split_name=$(basename $text) +./local/split_data.sh $text_dir $text $split_name $nj + +utils/run.pl JOB=1:$nj $text_dir/split${nj}/JOB/${split_name}.no_oov.log \ + cat ${text_dir}/split${nj}/JOB/${split_name} \| awk -v lex=$lexicon 'BEGIN{while((getline0){ seen[$1]=1; } } + {for(n=1; n<=NF;n++) { if (seen[$n]) { printf("%s ", $n); } else {printf(" ");} } printf("\n");}' \ + \> ${text_dir}/split${nj}/JOB/${split_name}.no_oov || exit 1; +cat ${text_dir}/split${nj}/*/${split_name}.no_oov > $cleantext + +# compute word counts, sort in descending order +# line: count word +cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort --parallel=`nproc` | uniq -c | \ + sort --parallel=`nproc` -nr > $dir/word.counts || exit 1; + +# Get counts from acoustic training transcripts, and add one-count +# for each word in the lexicon (but not silence, we don't want it +# in the LM-- we'll add it optionally later). +cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \ + cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \ + sort --parallel=`nproc` | uniq -c | sort --parallel=`nproc` -nr > $dir/unigram.counts || exit 1; + +# word with +cat $dir/unigram.counts | awk '{print $2}' | cat - <(echo ""; echo "" ) > $dir/wordlist + +# hold out to compute ppl +heldout_sent=10000 # Don't change this if you want result to be comparable with kaldi_lm results + +mkdir -p $dir +cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n $dir/heldout +cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n $dir/train + +ngram-count -text $dir/train -order 3 -limit-vocab -vocab $dir/wordlist -unk \ + -map-unk "" -kndiscount -interpolate -lm $dir/lm.arpa +ngram -lm $dir/lm.arpa -ppl $dir/heldout \ No newline at end of file diff --git a/speechx/examples/u2pp_ol/wenetspeech/local/decode.sh b/speechx/examples/u2pp_ol/wenetspeech/local/decode.sh new file mode 100755 index 00000000..c17cdbe6 --- /dev/null +++ b/speechx/examples/u2pp_ol/wenetspeech/local/decode.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -e + +. path.sh + +data=data +exp=exp +nj=20 +mkdir -p $exp +ckpt_dir=./data/model +model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model/ + +utils/run.pl JOB=1:$nj $data/split${nj}/JOB/decoder.fbank.wolm.log \ +ctc_prefix_beam_search_decoder_main \ + --model_path=$model_dir/export.jit \ + --vocab_path=$model_dir/unit.txt \ + --nnet_decoder_chunk=16 \ + --receptive_field_length=7 \ + --subsampling_rate=4 \ + --feature_rspecifier=scp:$data/split${nj}/JOB/fbank.scp \ + --result_wspecifier=ark,t:$data/split${nj}/JOB/result_decode.ark + +cat $data/split${nj}/*/result_decode.ark > $exp/${label_file} +utils/compute-wer.py --char=1 --v=1 $text $exp/${label_file} > $exp/${wer} +tail -n 7 $exp/${wer} \ No newline at end of file diff --git a/speechx/examples/u2pp_ol/wenetspeech/local/feat.sh b/speechx/examples/u2pp_ol/wenetspeech/local/feat.sh new file mode 100755 index 00000000..4341cec8 --- /dev/null +++ b/speechx/examples/u2pp_ol/wenetspeech/local/feat.sh @@ -0,0 +1,31 @@ +#!/bin/bash +set -e + +. path.sh + +data=data +exp=exp +nj=20 +mkdir -p $exp +ckpt_dir=./data/model +model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model/ +aishell_wav_scp=aishell_test.scp + +cmvn_json2kaldi_main \ + --json_file $model_dir/mean_std.json \ + --cmvn_write_path $exp/cmvn.ark \ + --binary=false + +echo "convert json cmvn to kaldi ark." + +./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj + +utils/run.pl JOB=1:$nj $data/split${nj}/JOB/feat.log \ +compute_fbank_main \ + --num_bins 80 \ + --cmvn_file=$exp/cmvn.ark \ + --streaming_chunk=36 \ + --wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \ + --feature_wspecifier=ark,scp:$data/split${nj}/JOB/fbank.ark,$data/split${nj}/JOB/fbank.scp + +echo "compute fbank feature." diff --git a/speechx/examples/u2pp_ol/wenetspeech/local/nnet.sh b/speechx/examples/u2pp_ol/wenetspeech/local/nnet.sh new file mode 100755 index 00000000..4419201c --- /dev/null +++ b/speechx/examples/u2pp_ol/wenetspeech/local/nnet.sh @@ -0,0 +1,23 @@ +#!/bin/bash +set -x +set -e + +. path.sh + +data=data +exp=exp +mkdir -p $exp +ckpt_dir=./data/model +model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model/ + +u2_nnet_main \ + --model_path=$model_dir/export.jit \ + --feature_rspecifier=ark,t:$exp/fbank.ark \ + --nnet_decoder_chunk=16 \ + --receptive_field_length=7 \ + --subsampling_rate=4 \ + --acoustic_scale=1.0 \ + --nnet_encoder_outs_wspecifier=ark,t:$exp/encoder_outs.ark \ + --nnet_prob_wspecifier=ark,t:$exp/logprobs.ark +echo "u2 nnet decode." + diff --git a/speechx/examples/u2pp_ol/wenetspeech/local/recognizer.sh b/speechx/examples/u2pp_ol/wenetspeech/local/recognizer.sh new file mode 100755 index 00000000..29b50537 --- /dev/null +++ b/speechx/examples/u2pp_ol/wenetspeech/local/recognizer.sh @@ -0,0 +1,34 @@ +#!/bin/bash +set -e + +. path.sh + +data=data +exp=exp +nj=20 +mkdir -p $exp +ckpt_dir=./data/model +model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model/ +aishell_wav_scp=aishell_test.scp + +./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj + +utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recognizer.log \ +u2_recognizer_main \ + --use_fbank=true \ + --num_bins=80 \ + --cmvn_file=$exp/cmvn.ark \ + --model_path=$model_dir/export.jit \ + --vocab_path=$model_dir/unit.txt \ + --nnet_decoder_chunk=16 \ + --receptive_field_length=7 \ + --subsampling_rate=4 \ + --wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \ + --result_wspecifier=ark,t:$data/split${nj}/JOB/result_recognizer.ark + + +cat $data/split${nj}/*/result_recognizer.ark > $exp/${label_file}_recognizer +utils/compute-wer.py --char=1 --v=1 $text $exp/${label_file}_recognizer > $exp/${wer}.recognizer +echo "recognizer test have finished!!!" +echo "please checkout in ${exp}/${wer}.recognizer" +tail -n 7 $exp/${wer}.recognizer \ No newline at end of file diff --git a/speechx/examples/u2pp_ol/wenetspeech/local/split_data.sh b/speechx/examples/u2pp_ol/wenetspeech/local/split_data.sh new file mode 100755 index 00000000..faa5c42d --- /dev/null +++ b/speechx/examples/u2pp_ol/wenetspeech/local/split_data.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +set -eo pipefail + +data=$1 +scp=$2 +split_name=$3 +numsplit=$4 + +# save in $data/split{n} +# $scp to split +# + +if [[ ! $numsplit -gt 0 ]]; then + echo "$0: Invalid num-split argument"; + exit 1; +fi + +directories=$(for n in `seq $numsplit`; do echo $data/split${numsplit}/$n; done) +scp_splits=$(for n in `seq $numsplit`; do echo $data/split${numsplit}/$n/${split_name}; done) + +# if this mkdir fails due to argument-list being too long, iterate. +if ! mkdir -p $directories >&/dev/null; then + for n in `seq $numsplit`; do + mkdir -p $data/split${numsplit}/$n + done +fi + +echo "utils/split_scp.pl $scp $scp_splits" +utils/split_scp.pl $scp $scp_splits diff --git a/speechx/examples/u2pp_ol/wenetspeech/path.sh b/speechx/examples/u2pp_ol/wenetspeech/path.sh new file mode 100644 index 00000000..7f32fbce --- /dev/null +++ b/speechx/examples/u2pp_ol/wenetspeech/path.sh @@ -0,0 +1,18 @@ +# This contains the locations of binarys build required for running the examples. + +unset GREP_OPTIONS + +SPEECHX_ROOT=$PWD/../../../ +SPEECHX_BUILD=$SPEECHX_ROOT/build/speechx + +SPEECHX_TOOLS=$SPEECHX_ROOT/tools +TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin + +[ -d $SPEECHX_BUILD ] || { echo "Error: 'build/speechx' directory not found. please ensure that the project build successfully"; } + +export LC_AL=C + +export PATH=$PATH:$TOOLS_BIN:$SPEECHX_BUILD/nnet:$SPEECHX_BUILD/decoder:$SPEECHX_BUILD/frontend/audio + +PADDLE_LIB_PATH=$(python -c "import paddle ; print(':'.join(paddle.sysconfig.get_lib()), end='')") +export LD_LIBRARY_PATH=$PADDLE_LIB_PATH:$LD_LIBRARY_PATH diff --git a/speechx/examples/u2pp_ol/wenetspeech/run.sh b/speechx/examples/u2pp_ol/wenetspeech/run.sh new file mode 100755 index 00000000..12e3af95 --- /dev/null +++ b/speechx/examples/u2pp_ol/wenetspeech/run.sh @@ -0,0 +1,76 @@ +#!/bin/bash +set +x +set -e + +. path.sh + +nj=40 +stage=0 +stop_stage=5 + +. utils/parse_options.sh + +# input +data=data +exp=exp +mkdir -p $exp $data + + +# 1. compile +if [ ! -d ${SPEECHX_BUILD} ]; then + pushd ${SPEECHX_ROOT} + bash build.sh + popd +fi + + +ckpt_dir=$data/model +model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model/ + + +if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ];then + # download model + if [ ! -f $ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model.tar.gz ]; then + mkdir -p $ckpt_dir + pushd $ckpt_dir + + wget -c https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/static/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model.tar.gz + tar xzfv asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model.tar.gz + + popd + fi + + # test wav scp + if [ ! -f data/wav.scp ]; then + mkdir -p $data + pushd $data + wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav + echo "utt1 " $PWD/zh.wav > wav.scp + popd + fi + + # aishell wav scp + if [ ! -d $data/test ]; then + pushd $data + wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip + unzip aishell_test.zip + popd + + realpath $data/test/*/*.wav > $data/wavlist + awk -F '/' '{ print $(NF) }' $data/wavlist | awk -F '.' '{ print $1 }' > $data/utt_id + paste $data/utt_id $data/wavlist > $data/$aishell_wav_scp + fi +fi + + +if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then + ./local/feat.sh +fi + +if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then + ./local/decode.sh +fi + +if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then + ./loca/recognizer.sh +fi \ No newline at end of file