From dc8efca27248d3bc5b9792ec00979ea6ce642756 Mon Sep 17 00:00:00 2001 From: Yang Zhou Date: Tue, 19 Apr 2022 20:14:02 +0800 Subject: [PATCH] add test script --- .../ds2_ol/aishell/websocket_client.sh | 37 ++++++++ .../ds2_ol/aishell/websocket_server.sh | 66 ++++++++++++++ .../ds2_ol/decoder/recognizer_test_main.cc | 85 +++++++++++++++++++ 3 files changed, 188 insertions(+) create mode 100644 speechx/examples/ds2_ol/aishell/websocket_client.sh create mode 100644 speechx/examples/ds2_ol/aishell/websocket_server.sh create mode 100644 speechx/examples/ds2_ol/decoder/recognizer_test_main.cc diff --git a/speechx/examples/ds2_ol/aishell/websocket_client.sh b/speechx/examples/ds2_ol/aishell/websocket_client.sh new file mode 100644 index 00000000..3c6b4e91 --- /dev/null +++ b/speechx/examples/ds2_ol/aishell/websocket_client.sh @@ -0,0 +1,37 @@ +#!/bin/bash +set +x +set -e + +. path.sh + +# 1. compile +if [ ! -d ${SPEECHX_EXAMPLES} ]; then + pushd ${SPEECHX_ROOT} + bash build.sh + popd +fi + +# input +mkdir -p data +data=$PWD/data +ckpt_dir=$data/model +model_dir=$ckpt_dir/exp/deepspeech2_online/checkpoints/ +vocb_dir=$ckpt_dir/data/lang_char +# output +aishell_wav_scp=aishell_test.scp +if [ ! -d $data/test ]; then + pushd $data + wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip + unzip aishell_test.zip + popd + + realpath $data/test/*/*.wav > $data/wavlist + awk -F '/' '{ print $(NF) }' $data/wavlist | awk -F '.' '{ print $1 }' > $data/utt_id + paste $data/utt_id $data/wavlist > $data/$aishell_wav_scp +fi + +export GLOG_logtostderr=1 + +# websocket client +websocket_client_main \ + --wav_rspecifier=scp:$data/$aishell_wav_scp --streaming_chunk=0.36 diff --git a/speechx/examples/ds2_ol/aishell/websocket_server.sh b/speechx/examples/ds2_ol/aishell/websocket_server.sh new file mode 100644 index 00000000..ea619d54 --- /dev/null +++ b/speechx/examples/ds2_ol/aishell/websocket_server.sh @@ -0,0 +1,66 @@ +#!/bin/bash +set +x +set -e + +. path.sh + + +# 1. compile +if [ ! -d ${SPEECHX_EXAMPLES} ]; then + pushd ${SPEECHX_ROOT} + bash build.sh + popd +fi + +# input +mkdir -p data +data=$PWD/data +ckpt_dir=$data/model +model_dir=$ckpt_dir/exp/deepspeech2_online/checkpoints/ +vocb_dir=$ckpt_dir/data/lang_char/ + +# output +aishell_wav_scp=aishell_test.scp +if [ ! -d $data/test ]; then + pushd $data + wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip + unzip aishell_test.zip + popd + + realpath $data/test/*/*.wav > $data/wavlist + awk -F '/' '{ print $(NF) }' $data/wavlist | awk -F '.' '{ print $1 }' > $data/utt_id + paste $data/utt_id $data/wavlist > $data/$aishell_wav_scp +fi + + +if [ ! -d $ckpt_dir ]; then + mkdir -p $ckpt_dir + wget -P $ckpt_dir -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz + tar xzfv $ckpt_dir/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz -C $ckpt_dir +fi + + +export GLOG_logtostderr=1 + +# 3. gen cmvn +cmvn=$PWD/cmvn.ark +cmvn-json2kaldi --json_file=$ckpt_dir/data/mean_std.json --cmvn_write_path=$cmvn + +text=$data/test/text +graph_dir=./aishell_graph +if [ ! -d $graph_dir ]; then + wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_graph.zip + unzip aishell_graph.zip +fi + +# 5. test websocket server +websocket_server_main \ + --cmvn_file=$cmvn \ + --model_path=$model_dir/avg_1.jit.pdmodel \ + --streaming_chunk=0.1 \ + --convert2PCM32=true \ + --params_path=$model_dir/avg_1.jit.pdiparams \ + --word_symbol_table=$graph_dir/words.txt \ + --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \ + --graph_path=$graph_dir/TLG.fst --max_active=7500 \ + --acoustic_scale=1.2 diff --git a/speechx/examples/ds2_ol/decoder/recognizer_test_main.cc b/speechx/examples/ds2_ol/decoder/recognizer_test_main.cc new file mode 100644 index 00000000..198a8ec2 --- /dev/null +++ b/speechx/examples/ds2_ol/decoder/recognizer_test_main.cc @@ -0,0 +1,85 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "decoder/recognizer.h" +#include "decoder/param.h" +#include "kaldi/feat/wave-reader.h" +#include "kaldi/util/table-types.h" + +DEFINE_string(wav_rspecifier, "", "test feature rspecifier"); +DEFINE_string(result_wspecifier, "", "test result wspecifier"); + +int main(int argc, char* argv[]) { + gflags::ParseCommandLineFlags(&argc, &argv, false); + google::InitGoogleLogging(argv[0]); + + ppspeech::RecognizerResource resource = ppspeech::InitRecognizerResoure(); + ppspeech::Recognizer recognizer(resource); + + kaldi::SequentialTableReader wav_reader( + FLAGS_wav_rspecifier); + kaldi::TokenWriter result_writer(FLAGS_result_wspecifier); + int sample_rate = 16000; + float streaming_chunk = FLAGS_streaming_chunk; + int chunk_sample_size = streaming_chunk * sample_rate; + LOG(INFO) << "sr: " << sample_rate; + LOG(INFO) << "chunk size (s): " << streaming_chunk; + LOG(INFO) << "chunk size (sample): " << chunk_sample_size; + + int32 num_done = 0, num_err = 0; + + for (; !wav_reader.Done(); wav_reader.Next()) { + std::string utt = wav_reader.Key(); + const kaldi::WaveData& wave_data = wav_reader.Value(); + + int32 this_channel = 0; + kaldi::SubVector waveform(wave_data.Data(), + this_channel); + int tot_samples = waveform.Dim(); + LOG(INFO) << "wav len (sample): " << tot_samples; + + int sample_offset = 0; + std::vector> feats; + int feature_rows = 0; + while (sample_offset < tot_samples) { + int cur_chunk_size = + std::min(chunk_sample_size, tot_samples - sample_offset); + + kaldi::Vector wav_chunk(cur_chunk_size); + for (int i = 0; i < cur_chunk_size; ++i) { + wav_chunk(i) = waveform(sample_offset + i); + } + + recognizer.Accept(wav_chunk); + if (cur_chunk_size < chunk_sample_size) { + recognizer.SetFinished(); + } + recognizer.Decode(); + + sample_offset += cur_chunk_size; + } + std::string result; + result = recognizer.GetFinalResult(); + recognizer.Reset(); + if (result.empty()) { + // the TokenWriter can not write empty string. + ++num_err; + KALDI_LOG << " the result of " << utt << " is empty"; + continue; + } + KALDI_LOG << " the result of " << utt << " is " << result; + result_writer.Write(utt, result); + ++num_done; + } +} \ No newline at end of file