diff --git a/demos/audio_searching/src/encode.py b/demos/audio_searching/src/encode.py index cf5f29a4..35805784 100644 --- a/demos/audio_searching/src/encode.py +++ b/demos/audio_searching/src/encode.py @@ -11,11 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import os - -import librosa import numpy as np -from config import DEFAULT_TABLE from logs import LOGGER from paddlespeech.cli import VectorExecutor diff --git a/speechx/examples/CMakeLists.txt b/speechx/examples/CMakeLists.txt index ef0a72b8..7f1543c2 100644 --- a/speechx/examples/CMakeLists.txt +++ b/speechx/examples/CMakeLists.txt @@ -3,3 +3,5 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR) add_subdirectory(feat) add_subdirectory(nnet) add_subdirectory(decoder) + +add_subdirectory(glog) \ No newline at end of file diff --git a/speechx/examples/README.md b/speechx/examples/README.md index 941c4272..705ca200 100644 --- a/speechx/examples/README.md +++ b/speechx/examples/README.md @@ -1,8 +1,9 @@ # Examples -* decoder - online decoder to work as offline +* glog - glog usage * feat - mfcc, linear * nnet - ds2 nn +* decoder - online decoder to work as offline ## How to run diff --git a/speechx/examples/decoder/offline_decoder_main.cc b/speechx/examples/decoder/offline_decoder_main.cc index 44127c73..eccd7c09 100644 --- a/speechx/examples/decoder/offline_decoder_main.cc +++ b/speechx/examples/decoder/offline_decoder_main.cc @@ -63,6 +63,7 @@ int main(int argc, char* argv[]) { int32 chunk_size = 35; decoder.InitDecoder(); + LOG(INFO) << "chunk size: " << chunk_size; for (; !feature_reader.Done(); feature_reader.Next()) { string utt = feature_reader.Key(); diff --git a/speechx/examples/decoder/run.sh b/speechx/examples/decoder/run.sh index fc5e9182..1e5a678c 100755 --- a/speechx/examples/decoder/run.sh +++ b/speechx/examples/decoder/run.sh @@ -25,6 +25,9 @@ model_dir=../paddle_asr_model feat_wspecifier=./feats.ark cmvn=./cmvn.ark + +export GLOG_logtostderr=1 + # 3. run feat linear_spectrogram_main \ --wav_rspecifier=scp:$model_dir/wav.scp \ @@ -37,4 +40,4 @@ offline_decoder_main \ --model_path=$model_dir/avg_1.jit.pdmodel \ --param_path=$model_dir/avg_1.jit.pdparams \ --dict_file=$model_dir/vocab.txt \ - --lm_path=$model_dir/avg_1.jit.klm \ No newline at end of file + --lm_path=$model_dir/avg_1.jit.klm diff --git a/speechx/examples/feat/linear_spectrogram_main.cc b/speechx/examples/feat/linear_spectrogram_main.cc index 9ed4d6f9..a27db56f 100644 --- a/speechx/examples/feat/linear_spectrogram_main.cc +++ b/speechx/examples/feat/linear_spectrogram_main.cc @@ -25,6 +25,8 @@ #include "kaldi/util/kaldi-io.h" #include "kaldi/util/table-types.h" +#include + DEFINE_string(wav_rspecifier, "", "test wav scp path"); DEFINE_string(feature_wspecifier, "", "output feats wspecifier"); DEFINE_string(cmvn_write_path, "./cmvn.ark", "write cmvn"); @@ -172,6 +174,9 @@ int main(int argc, char* argv[]) { ppspeech::LinearSpectrogramOptions opt; opt.frame_opts.frame_length_ms = 20; opt.frame_opts.frame_shift_ms = 10; + LOG(INFO) << "frame length (ms):" << opt.frame_opts.frame_length_ms; + LOG(INFO) << "frame shift (ms):" << opt.frame_opts.frame_shift_ms; + ppspeech::DecibelNormalizerOptions db_norm_opt; std::unique_ptr base_feature_extractor( new ppspeech::DecibelNormalizer(db_norm_opt, std::move(data_source))); @@ -190,6 +195,11 @@ int main(int argc, char* argv[]) { int sample_rate = 16000; int chunk_sample_size = streaming_chunk * sample_rate; + LOG(INFO) << "sr:" << sample_rate; + LOG(INFO) << "chunk size (s):" << streaming_chunk; + LOG(INFO) << "chunk size (sample):" << chunk_sample_size; + + for (; !wav_reader.Done(); wav_reader.Next()) { std::string utt = wav_reader.Key(); const kaldi::WaveData& wave_data = wav_reader.Value(); diff --git a/speechx/examples/feat/run.sh b/speechx/examples/feat/run.sh index bd21bd7f..29c49d32 100755 --- a/speechx/examples/feat/run.sh +++ b/speechx/examples/feat/run.sh @@ -25,6 +25,7 @@ feat_wspecifier=./feats.ark cmvn=./cmvn.ark # 3. run feat +export GLOG_logtostderr=1 linear_spectrogram_main \ --wav_rspecifier=scp:$model_dir/wav.scp \ --feature_wspecifier=ark,t:$feat_wspecifier \