From 300caf188ba17e57a8620385f5569df22c215ea4 Mon Sep 17 00:00:00 2001
From: Yang Zhou <goat.zhou@qq.com>
Date: Wed, 16 Mar 2022 16:24:19 +0800
Subject: [PATCH] add valgrind

---
 speechx/TODO.md                               |  3 +++
 speechx/examples/decoder/run.sh               |  2 +-
 speechx/examples/decoder/valgrind.sh          | 20 ++++++++++++++
 speechx/examples/feat/run.sh                  |  2 +-
 speechx/examples/feat/valgrind.sh             | 20 ++++++++++++++
 speechx/examples/nnet/run.sh                  |  1 -
 speechx/examples/nnet/valgrind.sh             | 18 +++++++++++++
 speechx/examples/path.sh                      |  5 +++-
 .../decoder/ctc_beam_search_decoder.cc        |  6 ++---
 speechx/tools/setup_valgrind.sh               | 26 +++++++++++++++++++
 10 files changed, 96 insertions(+), 7 deletions(-)
 create mode 100644 speechx/TODO.md
 create mode 100644 speechx/examples/decoder/valgrind.sh
 create mode 100644 speechx/examples/feat/valgrind.sh
 create mode 100644 speechx/examples/nnet/valgrind.sh
 create mode 100755 speechx/tools/setup_valgrind.sh

diff --git a/speechx/TODO.md b/speechx/TODO.md
new file mode 100644
index 00000000..d65adaa2
--- /dev/null
+++ b/speechx/TODO.md
@@ -0,0 +1,3 @@
+# TODO
+
+* DecibelNormalizer: there is a little bit difference between offline and online db norm. The computation of online db norm read feature chunk by chunk, which causes the feature size is different with offline db norm. In normalizer.cc:73, the samples.size() is different, which causes the difference of result.
diff --git a/speechx/examples/decoder/run.sh b/speechx/examples/decoder/run.sh
index ffb05b77..4251cc04 100644
--- a/speechx/examples/decoder/run.sh
+++ b/speechx/examples/decoder/run.sh
@@ -28,5 +28,5 @@ cmvn=./cmvn.ark
 linear_spectrogram_main --wav_rspecifier=scp:$model_dir/wav.scp --feature_wspecifier=ark:$feat_wspecifier --cmvn_write_path=$cmvn
 
 # 4. run decoder
-../../build/examples/decoder/offline_decoder_main --feature_respecifier=ark:$feat_wspecifier --model_path=$model_dir/avg_1.jit.pdmodel --param_path=$model_dir/avg_1.jit.pdparams --dict_file=$model_dir/vocab.txt --lm_path=$model_dir/avg_1.jit.klm
+offline_decoder_main --feature_respecifier=ark:$feat_wspecifier --model_path=$model_dir/avg_1.jit.pdmodel --param_path=$model_dir/avg_1.jit.pdparams --dict_file=$model_dir/vocab.txt --lm_path=$model_dir/avg_1.jit.klm
 
diff --git a/speechx/examples/decoder/valgrind.sh b/speechx/examples/decoder/valgrind.sh
new file mode 100644
index 00000000..3e90299e
--- /dev/null
+++ b/speechx/examples/decoder/valgrind.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# this script is for memory check, so please run ./run.sh first.
+
+set +x
+set -e
+
+if [ ! -d ../../tools/valgrind/install ]; then
+  echo "please install valgrind in the speechx tools dir.\n" 
+  exit 1
+fi
+
+. ../path.sh
+
+model_dir=../paddle_asr_model
+feat_wspecifier=./feats.ark
+cmvn=./cmvn.ark
+
+valgrind --tool=memcheck --track-origins=yes --leak-check=full --show-leak-kinds=all offline_decoder_main --feature_respecifier=ark:$feat_wspecifier --model_path=$model_dir/avg_1.jit.pdmodel --param_path=$model_dir/avg_1.jit.pdparams --dict_file=$model_dir/vocab.txt --lm_path=$model_dir/avg_1.jit.klm
+
diff --git a/speechx/examples/feat/run.sh b/speechx/examples/feat/run.sh
index 276fefa3..4116288a 100644
--- a/speechx/examples/feat/run.sh
+++ b/speechx/examples/feat/run.sh
@@ -25,4 +25,4 @@ feat_wspecifier=./feats.ark
 cmvn=./cmvn.ark
 
 # 3. run feat
-linear_spectrogram_main --wav_rspecifier=scp:$model_dir/wav.scp --feature_wspecifier=ark,t:$feat_wspecifier --cmvn_write_path=$cmvn
+../../fc_patch/valgrind-build/bin/valgrind --tool=memcheck --track-origins=yes --leak-check=full --show-leak-kinds=all linear_spectrogram_main --wav_rspecifier=scp:$model_dir/wav.scp --feature_wspecifier=ark,t:$feat_wspecifier --cmvn_write_path=$cmvn
diff --git a/speechx/examples/feat/valgrind.sh b/speechx/examples/feat/valgrind.sh
new file mode 100644
index 00000000..5a6a538a
--- /dev/null
+++ b/speechx/examples/feat/valgrind.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# this script is for memory check, so please run ./run.sh first.
+
+set +x
+set -e
+
+if [ ! -d ../../tools/valgrind/install ]; then
+  echo "please install valgrind in the speechx tools dir.\n" 
+  exit 1
+fi
+
+. ../path.sh
+
+model_dir=../paddle_asr_model
+feat_wspecifier=./feats.ark
+cmvn=./cmvn.ark
+
+valgrind --tool=memcheck --track-origins=yes --leak-check=full --show-leak-kinds=all linear_spectrogram_main --wav_rspecifier=scp:$model_dir/wav.scp --feature_wspecifier=ark,t:$feat_wspecifier --cmvn_write_path=$cmvn
+
diff --git a/speechx/examples/nnet/run.sh b/speechx/examples/nnet/run.sh
index a2ef0beb..6615082a 100644
--- a/speechx/examples/nnet/run.sh
+++ b/speechx/examples/nnet/run.sh
@@ -22,7 +22,6 @@ fi
 
 model_dir=../paddle_asr_model
 
-
 # 4. run decoder
 pp-model-test --model_path=$model_dir/avg_1.jit.pdmodel --param_path=$model_dir/avg_1.jit.pdparams
 
diff --git a/speechx/examples/nnet/valgrind.sh b/speechx/examples/nnet/valgrind.sh
new file mode 100644
index 00000000..d692db50
--- /dev/null
+++ b/speechx/examples/nnet/valgrind.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+# this script is for memory check, so please run ./run.sh first.
+
+set +x
+set -e
+
+if [ ! -d ../../tools/valgrind/install ]; then
+  echo "please install valgrind in the speechx tools dir.\n" 
+  exit 1
+fi
+
+. ../path.sh
+
+model_dir=../paddle_asr_model
+
+valgrind --tool=memcheck --track-origins=yes --leak-check=full --show-leak-kinds=all pp-model-test --model_path=$model_dir/avg_1.jit.pdmodel --param_path=$model_dir/avg_1.jit.pdparams
+
diff --git a/speechx/examples/path.sh b/speechx/examples/path.sh
index c29bc5af..1f62ed7e 100644
--- a/speechx/examples/path.sh
+++ b/speechx/examples/path.sh
@@ -4,8 +4,11 @@ SPEECHX_ROOT=$PWD/../..
 SPEECHX_EXAMPLES=$SPEECHX_ROOT/build/examples
 SPEECHX_BIN=$SPEECHX_EXAMPLES/nnet:$SPEECHX_EXAMPLES/decoder:$SPEECHX_EXAMPLES/feat
 
+SPEECHX_TOOLS=$SPEECHX_ROOT/tools
+TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin
+
 [ -d $SPEECHX_EXAMPLES ] || { echo "Error: 'build/examples' directory not found. please ensure that the project build successfully"; }
 
 export LC_AL=C
 
-export PATH=$PATH:$SPEECHX_BIN
+export PATH=$PATH:$SPEECHX_BIN:$TOOLS_BIN
diff --git a/speechx/speechx/decoder/ctc_beam_search_decoder.cc b/speechx/speechx/decoder/ctc_beam_search_decoder.cc
index 7bbb9506..582f2c95 100644
--- a/speechx/speechx/decoder/ctc_beam_search_decoder.cc
+++ b/speechx/speechx/decoder/ctc_beam_search_decoder.cc
@@ -206,9 +206,9 @@ int32 CTCBeamSearch::SearchOneChar(
     size_t beam_size = opts_.beam_size;
     const auto& c = log_prob_idx.first;
     const auto& log_prob_c = log_prob_idx.second;
-    size_t prefixes__len = std::min(prefixes_.size(), beam_size);
+    size_t prefixes_len = std::min(prefixes_.size(), beam_size);
 
-    for (size_t i = 0; i < prefixes__len; ++i) {
+    for (size_t i = 0; i < prefixes_len; ++i) {
         auto prefix = prefixes_[i];
         if (full_beam && log_prob_c + prefix->score < min_cutoff) {
             break;
@@ -311,4 +311,4 @@ void CTCBeamSearch::LMRescore() {
     }
 }
 
-}  // namespace ppspeech
\ No newline at end of file
+}  // namespace ppspeech
diff --git a/speechx/tools/setup_valgrind.sh b/speechx/tools/setup_valgrind.sh
new file mode 100755
index 00000000..9e9f0716
--- /dev/null
+++ b/speechx/tools/setup_valgrind.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+VALGRIND_VERSION=3.18.1
+
+set -e
+
+tarball=valgrind-3.18.1.tar.bz2
+
+url=https://sourceware.org/pub/valgrind/valgrind-3.18.1.tar.bz2
+
+if [ -f $tarball ]; then
+  echo "use the $tarball have downloaded."
+else
+  wget -t3 --no-check-certificate $url
+fi
+
+tar xjfv $tarball
+
+mv valgrind-3.18.1 valgrind
+
+prefix=$PWD/valgrind/install
+cd ./valgrind/
+  ./configure --prefix=$prefix
+  make
+  make install
+cd -