parent
56a0a02452
commit
72c9e973a2
@ -0,0 +1,3 @@
|
||||
data
|
||||
utils
|
||||
exp
|
@ -0,0 +1,28 @@
|
||||
# u2/u2pp Streaming ASR
|
||||
|
||||
## Testing with Aishell Test Data
|
||||
|
||||
## Download wav and model
|
||||
|
||||
```
|
||||
run.sh --stop_stage 0
|
||||
```
|
||||
|
||||
### compute feature
|
||||
|
||||
```
|
||||
./run.sh --stage 1 --stop_stage 1
|
||||
```
|
||||
|
||||
### decoding using feature
|
||||
|
||||
```
|
||||
./run.sh --stage 2 --stop_stage 2
|
||||
```
|
||||
|
||||
### decoding using wav
|
||||
|
||||
|
||||
```
|
||||
./run.sh --stage 3 --stop_stage 3
|
||||
```
|
@ -0,0 +1,71 @@
|
||||
#!/bin/bash
|
||||
|
||||
# To be run from one directory above this script.
|
||||
. ./path.sh
|
||||
|
||||
nj=40
|
||||
text=data/local/lm/text
|
||||
lexicon=data/local/dict/lexicon.txt
|
||||
|
||||
for f in "$text" "$lexicon"; do
|
||||
[ ! -f $x ] && echo "$0: No such file $f" && exit 1;
|
||||
done
|
||||
|
||||
# Check SRILM tools
|
||||
if ! which ngram-count > /dev/null; then
|
||||
echo "srilm tools are not found, please download it and install it from: "
|
||||
echo "http://www.speech.sri.com/projects/srilm/download.html"
|
||||
echo "Then add the tools to your PATH"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# This script takes no arguments. It assumes you have already run
|
||||
# aishell_data_prep.sh.
|
||||
# It takes as input the files
|
||||
# data/local/lm/text
|
||||
# data/local/dict/lexicon.txt
|
||||
dir=data/local/lm
|
||||
mkdir -p $dir
|
||||
|
||||
cleantext=$dir/text.no_oov
|
||||
|
||||
# oov to <SPOKEN_NOISE>
|
||||
# lexicon line: word char0 ... charn
|
||||
# text line: utt word0 ... wordn -> line: <SPOKEN_NOISE> word0 ... wordn
|
||||
text_dir=$(dirname $text)
|
||||
split_name=$(basename $text)
|
||||
./local/split_data.sh $text_dir $text $split_name $nj
|
||||
|
||||
utils/run.pl JOB=1:$nj $text_dir/split${nj}/JOB/${split_name}.no_oov.log \
|
||||
cat ${text_dir}/split${nj}/JOB/${split_name} \| awk -v lex=$lexicon 'BEGIN{while((getline<lex) >0){ seen[$1]=1; } }
|
||||
{for(n=1; n<=NF;n++) { if (seen[$n]) { printf("%s ", $n); } else {printf("<SPOKEN_NOISE> ");} } printf("\n");}' \
|
||||
\> ${text_dir}/split${nj}/JOB/${split_name}.no_oov || exit 1;
|
||||
cat ${text_dir}/split${nj}/*/${split_name}.no_oov > $cleantext
|
||||
|
||||
# compute word counts, sort in descending order
|
||||
# line: count word
|
||||
cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort --parallel=`nproc` | uniq -c | \
|
||||
sort --parallel=`nproc` -nr > $dir/word.counts || exit 1;
|
||||
|
||||
# Get counts from acoustic training transcripts, and add one-count
|
||||
# for each word in the lexicon (but not silence, we don't want it
|
||||
# in the LM-- we'll add it optionally later).
|
||||
cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \
|
||||
cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \
|
||||
sort --parallel=`nproc` | uniq -c | sort --parallel=`nproc` -nr > $dir/unigram.counts || exit 1;
|
||||
|
||||
# word with <s> </s>
|
||||
cat $dir/unigram.counts | awk '{print $2}' | cat - <(echo "<s>"; echo "</s>" ) > $dir/wordlist
|
||||
|
||||
# hold out to compute ppl
|
||||
heldout_sent=10000 # Don't change this if you want result to be comparable with kaldi_lm results
|
||||
|
||||
mkdir -p $dir
|
||||
cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
|
||||
head -$heldout_sent > $dir/heldout
|
||||
cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
|
||||
tail -n +$heldout_sent > $dir/train
|
||||
|
||||
ngram-count -text $dir/train -order 3 -limit-vocab -vocab $dir/wordlist -unk \
|
||||
-map-unk "<UNK>" -kndiscount -interpolate -lm $dir/lm.arpa
|
||||
ngram -lm $dir/lm.arpa -ppl $dir/heldout
|
@ -0,0 +1,25 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
. path.sh
|
||||
|
||||
data=data
|
||||
exp=exp
|
||||
nj=20
|
||||
mkdir -p $exp
|
||||
ckpt_dir=./data/model
|
||||
model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model/
|
||||
|
||||
utils/run.pl JOB=1:$nj $data/split${nj}/JOB/decoder.fbank.wolm.log \
|
||||
ctc_prefix_beam_search_decoder_main \
|
||||
--model_path=$model_dir/export.jit \
|
||||
--vocab_path=$model_dir/unit.txt \
|
||||
--nnet_decoder_chunk=16 \
|
||||
--receptive_field_length=7 \
|
||||
--subsampling_rate=4 \
|
||||
--feature_rspecifier=scp:$data/split${nj}/JOB/fbank.scp \
|
||||
--result_wspecifier=ark,t:$data/split${nj}/JOB/result_decode.ark
|
||||
|
||||
cat $data/split${nj}/*/result_decode.ark > $exp/${label_file}
|
||||
utils/compute-wer.py --char=1 --v=1 $text $exp/${label_file} > $exp/${wer}
|
||||
tail -n 7 $exp/${wer}
|
@ -0,0 +1,31 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
. path.sh
|
||||
|
||||
data=data
|
||||
exp=exp
|
||||
nj=20
|
||||
mkdir -p $exp
|
||||
ckpt_dir=./data/model
|
||||
model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model/
|
||||
aishell_wav_scp=aishell_test.scp
|
||||
|
||||
cmvn_json2kaldi_main \
|
||||
--json_file $model_dir/mean_std.json \
|
||||
--cmvn_write_path $exp/cmvn.ark \
|
||||
--binary=false
|
||||
|
||||
echo "convert json cmvn to kaldi ark."
|
||||
|
||||
./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj
|
||||
|
||||
utils/run.pl JOB=1:$nj $data/split${nj}/JOB/feat.log \
|
||||
compute_fbank_main \
|
||||
--num_bins 80 \
|
||||
--cmvn_file=$exp/cmvn.ark \
|
||||
--streaming_chunk=36 \
|
||||
--wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \
|
||||
--feature_wspecifier=ark,scp:$data/split${nj}/JOB/fbank.ark,$data/split${nj}/JOB/fbank.scp
|
||||
|
||||
echo "compute fbank feature."
|
@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
set -x
|
||||
set -e
|
||||
|
||||
. path.sh
|
||||
|
||||
data=data
|
||||
exp=exp
|
||||
mkdir -p $exp
|
||||
ckpt_dir=./data/model
|
||||
model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model/
|
||||
|
||||
u2_nnet_main \
|
||||
--model_path=$model_dir/export.jit \
|
||||
--feature_rspecifier=ark,t:$exp/fbank.ark \
|
||||
--nnet_decoder_chunk=16 \
|
||||
--receptive_field_length=7 \
|
||||
--subsampling_rate=4 \
|
||||
--acoustic_scale=1.0 \
|
||||
--nnet_encoder_outs_wspecifier=ark,t:$exp/encoder_outs.ark \
|
||||
--nnet_prob_wspecifier=ark,t:$exp/logprobs.ark
|
||||
echo "u2 nnet decode."
|
||||
|
@ -0,0 +1,34 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
. path.sh
|
||||
|
||||
data=data
|
||||
exp=exp
|
||||
nj=20
|
||||
mkdir -p $exp
|
||||
ckpt_dir=./data/model
|
||||
model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model/
|
||||
aishell_wav_scp=aishell_test.scp
|
||||
|
||||
./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj
|
||||
|
||||
utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recognizer.log \
|
||||
u2_recognizer_main \
|
||||
--use_fbank=true \
|
||||
--num_bins=80 \
|
||||
--cmvn_file=$exp/cmvn.ark \
|
||||
--model_path=$model_dir/export.jit \
|
||||
--vocab_path=$model_dir/unit.txt \
|
||||
--nnet_decoder_chunk=16 \
|
||||
--receptive_field_length=7 \
|
||||
--subsampling_rate=4 \
|
||||
--wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \
|
||||
--result_wspecifier=ark,t:$data/split${nj}/JOB/result_recognizer.ark
|
||||
|
||||
|
||||
cat $data/split${nj}/*/result_recognizer.ark > $exp/${label_file}_recognizer
|
||||
utils/compute-wer.py --char=1 --v=1 $text $exp/${label_file}_recognizer > $exp/${wer}.recognizer
|
||||
echo "recognizer test have finished!!!"
|
||||
echo "please checkout in ${exp}/${wer}.recognizer"
|
||||
tail -n 7 $exp/${wer}.recognizer
|
@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
data=$1
|
||||
scp=$2
|
||||
split_name=$3
|
||||
numsplit=$4
|
||||
|
||||
# save in $data/split{n}
|
||||
# $scp to split
|
||||
#
|
||||
|
||||
if [[ ! $numsplit -gt 0 ]]; then
|
||||
echo "$0: Invalid num-split argument";
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
directories=$(for n in `seq $numsplit`; do echo $data/split${numsplit}/$n; done)
|
||||
scp_splits=$(for n in `seq $numsplit`; do echo $data/split${numsplit}/$n/${split_name}; done)
|
||||
|
||||
# if this mkdir fails due to argument-list being too long, iterate.
|
||||
if ! mkdir -p $directories >&/dev/null; then
|
||||
for n in `seq $numsplit`; do
|
||||
mkdir -p $data/split${numsplit}/$n
|
||||
done
|
||||
fi
|
||||
|
||||
echo "utils/split_scp.pl $scp $scp_splits"
|
||||
utils/split_scp.pl $scp $scp_splits
|
@ -0,0 +1,18 @@
|
||||
# This contains the locations of binarys build required for running the examples.
|
||||
|
||||
unset GREP_OPTIONS
|
||||
|
||||
SPEECHX_ROOT=$PWD/../../../
|
||||
SPEECHX_BUILD=$SPEECHX_ROOT/build/speechx
|
||||
|
||||
SPEECHX_TOOLS=$SPEECHX_ROOT/tools
|
||||
TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin
|
||||
|
||||
[ -d $SPEECHX_BUILD ] || { echo "Error: 'build/speechx' directory not found. please ensure that the project build successfully"; }
|
||||
|
||||
export LC_AL=C
|
||||
|
||||
export PATH=$PATH:$TOOLS_BIN:$SPEECHX_BUILD/nnet:$SPEECHX_BUILD/decoder:$SPEECHX_BUILD/frontend/audio
|
||||
|
||||
PADDLE_LIB_PATH=$(python -c "import paddle ; print(':'.join(paddle.sysconfig.get_lib()), end='')")
|
||||
export LD_LIBRARY_PATH=$PADDLE_LIB_PATH:$LD_LIBRARY_PATH
|
@ -0,0 +1,76 @@
|
||||
#!/bin/bash
|
||||
set +x
|
||||
set -e
|
||||
|
||||
. path.sh
|
||||
|
||||
nj=40
|
||||
stage=0
|
||||
stop_stage=5
|
||||
|
||||
. utils/parse_options.sh
|
||||
|
||||
# input
|
||||
data=data
|
||||
exp=exp
|
||||
mkdir -p $exp $data
|
||||
|
||||
|
||||
# 1. compile
|
||||
if [ ! -d ${SPEECHX_BUILD} ]; then
|
||||
pushd ${SPEECHX_ROOT}
|
||||
bash build.sh
|
||||
popd
|
||||
fi
|
||||
|
||||
|
||||
ckpt_dir=$data/model
|
||||
model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model/
|
||||
|
||||
|
||||
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ];then
|
||||
# download model
|
||||
if [ ! -f $ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model.tar.gz ]; then
|
||||
mkdir -p $ckpt_dir
|
||||
pushd $ckpt_dir
|
||||
|
||||
wget -c https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/static/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model.tar.gz
|
||||
tar xzfv asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model.tar.gz
|
||||
|
||||
popd
|
||||
fi
|
||||
|
||||
# test wav scp
|
||||
if [ ! -f data/wav.scp ]; then
|
||||
mkdir -p $data
|
||||
pushd $data
|
||||
wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
|
||||
echo "utt1 " $PWD/zh.wav > wav.scp
|
||||
popd
|
||||
fi
|
||||
|
||||
# aishell wav scp
|
||||
if [ ! -d $data/test ]; then
|
||||
pushd $data
|
||||
wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip
|
||||
unzip aishell_test.zip
|
||||
popd
|
||||
|
||||
realpath $data/test/*/*.wav > $data/wavlist
|
||||
awk -F '/' '{ print $(NF) }' $data/wavlist | awk -F '.' '{ print $1 }' > $data/utt_id
|
||||
paste $data/utt_id $data/wavlist > $data/$aishell_wav_scp
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
||||
./local/feat.sh
|
||||
fi
|
||||
|
||||
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
||||
./local/decode.sh
|
||||
fi
|
||||
|
||||
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
||||
./loca/recognizer.sh
|
||||
fi
|
Loading…
Reference in new issue