pull/3173/head
YangZhou 2 years ago
parent 85a1744ecc
commit 707c72c3ea

@ -87,9 +87,9 @@ void CTCPrefixBeamSearch::AdvanceDecode(
VLOG(1) << "num_frame_decoded_: " << num_frame_decoded_; VLOG(1) << "num_frame_decoded_: " << num_frame_decoded_;
} }
VLOG(1) << "AdvanceDecode feat + forward cost: " << feat_nnet_cost VLOG(2) << "AdvanceDecode feat + forward cost: " << feat_nnet_cost
<< " sec."; << " sec.";
VLOG(1) << "AdvanceDecode search cost: " << search_cost << " sec."; VLOG(2) << "AdvanceDecode search cost: " << search_cost << " sec.";
} }
static bool PrefixScoreCompare( static bool PrefixScoreCompare(

@ -46,7 +46,6 @@ void NnetProducer::Acceptlikelihood(
bool NnetProducer::Read(std::vector<kaldi::BaseFloat>* nnet_prob) { bool NnetProducer::Read(std::vector<kaldi::BaseFloat>* nnet_prob) {
bool flag = cache_.pop(nnet_prob); bool flag = cache_.pop(nnet_prob);
VLOG(1) << "nnet cache_ size: " << cache_.size();
return flag; return flag;
} }

@ -124,7 +124,15 @@ U2Nnet::U2Nnet(const U2Nnet& other) {
offset_ = other.offset_; offset_ = other.offset_;
// copy model ptr // copy model ptr
model_ = other.model_->Clone(); // model_ = other.model_->Clone();
// hack, fix later
#ifdef WITH_GPU
dev_ = phi::GPUPlace();
#else
dev_ = phi::CPUPlace();
#endif
paddle::jit::Layer model = paddle::jit::Load(other.opts_.model_path, dev_);
model_ = std::make_shared<paddle::jit::Layer>(std::move(model));
ctc_activation_ = model_->Function("ctc_activation"); ctc_activation_ = model_->Function("ctc_activation");
subsampling_rate_ = model_->Attribute<int>("subsampling_rate"); subsampling_rate_ = model_->Attribute<int>("subsampling_rate");
right_context_ = model_->Attribute<int>("right_context"); right_context_ = model_->Attribute<int>("right_context");
@ -166,6 +174,7 @@ void U2Nnet::Reset() {
std::move(paddle::zeros({0, 0, 0, 0}, paddle::DataType::FLOAT32)); std::move(paddle::zeros({0, 0, 0, 0}, paddle::DataType::FLOAT32));
encoder_outs_.clear(); encoder_outs_.clear();
VLOG(1) << "FeedForward cost: " << cost_time_ << " sec. ";
VLOG(3) << "u2nnet reset"; VLOG(3) << "u2nnet reset";
} }
@ -185,8 +194,10 @@ void U2Nnet::FeedForward(const std::vector<BaseFloat>& features,
std::vector<kaldi::BaseFloat> ctc_probs; std::vector<kaldi::BaseFloat> ctc_probs;
ForwardEncoderChunkImpl( ForwardEncoderChunkImpl(
features, feature_dim, &out->logprobs, &out->vocab_dim); features, feature_dim, &out->logprobs, &out->vocab_dim);
VLOG(1) << "FeedForward cost: " << timer.Elapsed() << " sec. " float forward_chunk_time = timer.Elapsed();
VLOG(1) << "FeedForward cost: " << forward_chunk_time << " sec. "
<< features.size() / feature_dim << " frames."; << features.size() / feature_dim << " frames.";
cost_time_ += forward_chunk_time;
} }

@ -113,8 +113,8 @@ class U2Nnet : public U2NnetBase {
void EncoderOuts( void EncoderOuts(
std::vector<std::vector<kaldi::BaseFloat>>* encoder_out) const; std::vector<std::vector<kaldi::BaseFloat>>* encoder_out) const;
ModelOptions opts_; // hack, fix later
private: private:
ModelOptions opts_;
phi::Place dev_; phi::Place dev_;
std::shared_ptr<paddle::jit::Layer> model_{nullptr}; std::shared_ptr<paddle::jit::Layer> model_{nullptr};
@ -127,6 +127,7 @@ class U2Nnet : public U2NnetBase {
paddle::jit::Function forward_encoder_chunk_; paddle::jit::Function forward_encoder_chunk_;
paddle::jit::Function forward_attention_decoder_; paddle::jit::Function forward_attention_decoder_;
paddle::jit::Function ctc_activation_; paddle::jit::Function ctc_activation_;
float cost_time_ = 0.0;
}; };
} // namespace ppspeech } // namespace ppspeech

@ -88,7 +88,8 @@ int main(int argc, char* argv[]) {
kaldi::Timer timer; kaldi::Timer timer;
recognizer_ptr->AttentionRescoring(); recognizer_ptr->AttentionRescoring();
tot_attention_rescore_time += timer.Elapsed(); float rescore_time = timer.Elapsed();
tot_attention_rescore_time += rescore_time;
std::string result = recognizer_ptr->GetFinalResult(); std::string result = recognizer_ptr->GetFinalResult();
if (result.empty()) { if (result.empty()) {
@ -101,7 +102,7 @@ int main(int argc, char* argv[]) {
tot_decode_time += local_timer.Elapsed(); tot_decode_time += local_timer.Elapsed();
LOG(INFO) << utt << " " << result; LOG(INFO) << utt << " " << result;
LOG(INFO) << " RTF: " << local_timer.Elapsed() / dur << " dur: " << dur LOG(INFO) << " RTF: " << local_timer.Elapsed() / dur << " dur: " << dur
<< " cost: " << local_timer.Elapsed(); << " cost: " << local_timer.Elapsed() << " rescore:" << rescore_time;
result_writer.Write(utt, result); result_writer.Write(utt, result);

@ -67,7 +67,7 @@ Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
#### RTF #### RTF
``` ```
I0417 08:07:15.300631 75784 recognizer_main.cc:113] total wav duration is: 36108.9 sec I0417 08:07:15.300631 75784 recognizer_main.cc:113] total wav duration is: 36108.9 sec
I0417 08:07:15.300642 75784 recognizer_main.cc:114] total decode cost:16353.7 sec I0417 08:07:15.300642 75784 recognizer_main.cc:114] total decode cost:10247.7 sec
I0417 08:07:15.300648 75784 recognizer_main.cc:115] total rescore cost:936.858 sec I0417 08:07:15.300648 75784 recognizer_main.cc:115] total rescore cost:908.228 sec
I0417 08:07:15.300653 75784 recognizer_main.cc:116] RTF is: 0.4529 I0417 08:07:15.300653 75784 recognizer_main.cc:116] RTF is: 0.283
``` ```

@ -0,0 +1,36 @@
#!/bin/bash
set -e
data=data
exp=exp
nj=20
. utils/parse_options.sh
mkdir -p $exp
ckpt_dir=./data/model
model_dir=$ckpt_dir/onnx_model/
aishell_wav_scp=aishell_test.scp
text=$data/test/text
./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj
utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recognizer.fd.log \
recognizer_main \
--use_fbank=true \
--num_bins=80 \
--model_path=$model_dir \
--word_symbol_table=$model_dir/unit.txt \
--nnet_decoder_chunk=16 \
--receptive_field_length=7 \
--subsampling_rate=4 \
--with_onnx_model=true \
--wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \
--result_wspecifier=ark,t:$data/split${nj}/JOB/recognizer.fd.rsl.ark
cat $data/split${nj}/*/recognizer.fd.rsl.ark > $exp/aishell.recognizer.fd.rsl
utils/compute-wer.py --char=1 --v=1 $text $exp/aishell.recognizer.fd.rsl > $exp/aishell.recognizer.fd.err
echo "recognizer fd test have finished!!!"
echo "please checkout in $exp/aishell.recognizer.fd.err"
tail -n 7 $exp/aishell.recognizer.fd.err

@ -0,0 +1,51 @@
#!/bin/bash
set -e
data=data
exp=exp
nj=20
. utils/parse_options.sh
mkdir -p $exp
ckpt_dir=./data/model
model_dir=$ckpt_dir/onnx_model/
aishell_wav_scp=aishell_test.scp
text=$data/test/text
./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj
lang_dir=./data/lang_test/
graph=$lang_dir/TLG.fst
word_table=$lang_dir/words.txt
if [ ! -f $graph ]; then
# download ngram, if you want to make graph by yourself, please refer local/run_build_tlg.sh
mkdir -p $lang_dir
pushd $lang_dir
wget -c https://paddlespeech.bj.bcebos.com/speechx/examples/ngram/zh/tlg.zip
unzip tlg.zip
popd
fi
utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recognizer_wfst_fd.log \
recognizer_main \
--use_fbank=true \
--num_bins=80 \
--model_path=$model_dir \
--graph_path=$lang_dir/TLG.fst \
--word_symbol_table=$word_table \
--nnet_decoder_chunk=16 \
--receptive_field_length=7 \
--subsampling_rate=4 \
--wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \
--rescoring_weight=0.0 \
--acoustic_scale=2 \
--result_wspecifier=ark,t:$data/split${nj}/JOB/result_recognizer_wfst_fd.ark
cat $data/split${nj}/*/result_recognizer_wfst_fd.ark > $exp/aishell_recognizer_wfst_fd
utils/compute-wer.py --char=1 --v=1 $text $exp/aishell_recognizer_wfst_fd > $exp/aishell.recognizer_wfst_fd.err
echo "recognizer test have finished!!!"
echo "please checkout in $exp/aishell.recognizer_wfst_fd.err"
tail -n 7 $exp/aishell.recognizer_wfst_fd.err
Loading…
Cancel
Save