add aishell eg & add json parser & add write cmvn binary

pull/1676/head
Yang Zhou 3 years ago
parent e75b906e11
commit ec33f8d73b

@ -2,7 +2,9 @@
data=$1 data=$1
feat_scp=$2 feat_scp=$2
numsplit=$3 split_feat_name=$3
numsplit=$4
if ! [ "$numsplit" -gt 0 ]; then if ! [ "$numsplit" -gt 0 ]; then
echo "Invalid num-split argument"; echo "Invalid num-split argument";
@ -10,7 +12,7 @@ if ! [ "$numsplit" -gt 0 ]; then
fi fi
directories=$(for n in `seq $numsplit`; do echo $data/split${numsplit}/$n; done) directories=$(for n in `seq $numsplit`; do echo $data/split${numsplit}/$n; done)
feat_split_scp=$(for n in `seq $numsplit`; do echo $data/split${numsplit}/$n/feats.scp; done) feat_split_scp=$(for n in `seq $numsplit`; do echo $data/split${numsplit}/$n/${split_feat_name}; done)
echo $feat_split_scp echo $feat_split_scp
# if this mkdir fails due to argument-list being too long, iterate. # if this mkdir fails due to argument-list being too long, iterate.
if ! mkdir -p $directories >&/dev/null; then if ! mkdir -p $directories >&/dev/null; then

@ -22,54 +22,60 @@ if [ ! -d ../paddle_asr_model ]; then
fi fi
mkdir -p data mkdir -p data
if [ ! -d ./test ]; then data=$PWD/data
aishell_wav_scp=aishell_test.scp
if [ ! -d $data/test ]; then
wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip
unzip aishell_test.zip unzip -d $data aishell_test.zip
realpath ./test/*/*.wav > wavlist realpath $data/test/*/*.wav > $data/wavlist
awk -F '/' '{ print $(NF) }' wavlist | awk -F '.' '{ print $1 }' > utt_id awk -F '/' '{ print $(NF) }' $data/wavlist | awk -F '.' '{ print $1 }' > $data/utt_id
paste utt_id wavlist > aishell_test.scp paste $data/utt_id $data/wavlist > $data/$aishell_wav_scp
fi fi
if [ ! -d aishell_ds2_online_model ]; then model_dir=$PWD/aishell_ds2_online_model
mkdir -p aishell_ds2_online_model if [ ! -d $model_dir ]; then
wget -P ./aishell_ds2_online_model -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/aishell_ds2_online_cer8.00_release.tar.gz mkdir -p $model_dir
tar xzfv ./aishell_ds2_online_model/aishell_ds2_online_cer8.00_release.tar.gz -C ./aishell_ds2_online_model wget -P $model_dir -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
tar xzfv $model_dir/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz -C $model_dir
fi fi
# 3. make feature # 3. make feature
aishell_wav_scp=./aishell_test.scp aishell_online_model=$model_dir/exp/deepspeech2_online/checkpoints
aishell_online_model=./aishell_ds2_online_model/exp/deepspeech2_online/checkpoints lm_model_dir=../paddle_asr_model
model_dir=../paddle_asr_model
feat_ark=./feats.ark
feat_scp=./aishell_feat.scp
cmvn=./cmvn.ark
label_file=./aishell_result label_file=./aishell_result
wer=./aishell_wer wer=./aishell_wer
nj=40
export GLOG_logtostderr=1 export GLOG_logtostderr=1
./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj
data=$PWD/data
# 3. gen linear feat # 3. gen linear feat
linear_spectrogram_main \ cmvn=$PWD/cmvn.ark
--wav_rspecifier=scp:$aishell_wav_scp \ cmvn_json2binary_main --json_file=$model_dir/data/mean_std.json --cmvn_write_path=$cmvn
--feature_wspecifier=ark,scp:$feat_ark,$feat_scp \
--cmvn_write_path=$cmvn \ utils/run.pl JOB=1:$nj $data/split${nj}/JOB/feat_log \
--streaming_chunk=10 linear_spectrogram_without_db_norm_main \
--wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \
--feature_wspecifier=ark,scp:$data/split${nj}/JOB/feat.ark,$data/split${nj}/JOB/feat.scp \
--cmvn_file=$cmvn \
--streaming_chunk=0.36
nj=10 text=$data/test/text
data=./data
text=./test/text
# recognizer
./local/split_data.sh data aishell_feat.scp $nj
# 4. recognizer
utils/run.pl JOB=1:$nj $data/split${nj}/JOB/log \ utils/run.pl JOB=1:$nj $data/split${nj}/JOB/log \
offline_decoder_sliding_chunk_main \ offline_decoder_sliding_chunk_main \
--feature_rspecifier=scp:$data/split${nj}/JOB/feats.scp \ --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \
--model_path=$aishell_online_model/avg_1.jit.pdmodel \ --model_path=$aishell_online_model/avg_1.jit.pdmodel \
--param_path=$aishell_online_model/avg_1.jit.pdiparams \ --param_path=$aishell_online_model/avg_1.jit.pdiparams \
--dict_file=$model_dir/vocab.txt \ --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
--lm_path=$model_dir/avg_1.jit.klm \ --dict_file=$lm_model_dir/vocab.txt \
--lm_path=$lm_model_dir/avg_1.jit.klm \
--result_wspecifier=ark,t:$data/split${nj}/JOB/result --result_wspecifier=ark,t:$data/split${nj}/JOB/result
cat $data/split${nj}/*/result > $label_file cat $data/split${nj}/*/result > $label_file
local/compute-wer.py --char=1 --v=1 $label_file $text > $wer local/compute-wer.py --char=1 --v=1 $label_file $text > $wer
tail $wer

@ -34,6 +34,12 @@ DEFINE_int32(receptive_field_length,
DEFINE_int32(downsampling_rate, DEFINE_int32(downsampling_rate,
4, 4,
"two CNN(kernel=5) module downsampling rate."); "two CNN(kernel=5) module downsampling rate.");
DEFINE_string(model_output_names,
"save_infer_model/scale_0.tmp_1,save_infer_model/"
"scale_1.tmp_1,save_infer_model/scale_2.tmp_1,save_infer_model/"
"scale_3.tmp_1",
"model output names");
DEFINE_string(model_cache_names, "5-1-1024,5-1-1024", "model cache names");
using kaldi::BaseFloat; using kaldi::BaseFloat;
using kaldi::Matrix; using kaldi::Matrix;
@ -68,7 +74,8 @@ int main(int argc, char* argv[]) {
ppspeech::ModelOptions model_opts; ppspeech::ModelOptions model_opts;
model_opts.model_path = model_graph; model_opts.model_path = model_graph;
model_opts.params_path = model_params; model_opts.params_path = model_params;
model_opts.cache_shape = "5-1-1024,5-1-1024"; model_opts.cache_shape = FLAGS_model_cache_names;
model_opts.output_names = FLAGS_model_output_names;
std::shared_ptr<ppspeech::PaddleNnet> nnet( std::shared_ptr<ppspeech::PaddleNnet> nnet(
new ppspeech::PaddleNnet(model_opts)); new ppspeech::PaddleNnet(model_opts));
std::shared_ptr<ppspeech::DataCache> raw_data(new ppspeech::DataCache()); std::shared_ptr<ppspeech::DataCache> raw_data(new ppspeech::DataCache());

@ -7,4 +7,12 @@ target_link_libraries(mfcc-test kaldi-mfcc)
add_executable(linear_spectrogram_main ${CMAKE_CURRENT_SOURCE_DIR}/linear_spectrogram_main.cc) add_executable(linear_spectrogram_main ${CMAKE_CURRENT_SOURCE_DIR}/linear_spectrogram_main.cc)
target_include_directories(linear_spectrogram_main PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi) target_include_directories(linear_spectrogram_main PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
target_link_libraries(linear_spectrogram_main frontend kaldi-util kaldi-feat-common gflags glog) target_link_libraries(linear_spectrogram_main frontend kaldi-util kaldi-feat-common gflags glog)
add_executable(linear_spectrogram_without_db_norm_main ${CMAKE_CURRENT_SOURCE_DIR}/linear_spectrogram_without_db_norm_main.cc)
target_include_directories(linear_spectrogram_without_db_norm_main PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
target_link_libraries(linear_spectrogram_without_db_norm_main frontend kaldi-util kaldi-feat-common gflags glog)
add_executable(cmvn_json2binary_main ${CMAKE_CURRENT_SOURCE_DIR}/cmvn_json2binary_main.cc)
target_include_directories(cmvn_json2binary_main PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
target_link_libraries(cmvn_json2binary_main utils kaldi-util kaldi-matrix gflags glog)

@ -182,6 +182,7 @@ int main(int argc, char* argv[]) {
ppspeech::LinearSpectrogramOptions opt; ppspeech::LinearSpectrogramOptions opt;
opt.frame_opts.frame_length_ms = 20; opt.frame_opts.frame_length_ms = 20;
opt.frame_opts.frame_shift_ms = 10; opt.frame_opts.frame_shift_ms = 10;
opt.streaming_chunk = FLAGS_streaming_chunk;
opt.frame_opts.dither = 0.0; opt.frame_opts.dither = 0.0;
opt.frame_opts.remove_dc_offset = false; opt.frame_opts.remove_dc_offset = false;
opt.frame_opts.window_type = "hanning"; opt.frame_opts.window_type = "hanning";
@ -257,6 +258,7 @@ int main(int argc, char* argv[]) {
} }
} }
feat_writer.Write(utt, features); feat_writer.Write(utt, features);
feature_cache.Reset();
if (num_done % 50 == 0 && num_done != 0) if (num_done % 50 == 0 && num_done != 0)
KALDI_VLOG(2) << "Processed " << num_done << " utterances"; KALDI_VLOG(2) << "Processed " << num_done << " utterances";

@ -21,15 +21,20 @@ using kaldi::BaseFloat;
using kaldi::VectorBase; using kaldi::VectorBase;
using kaldi::Vector; using kaldi::Vector;
AudioCache::AudioCache(int buffer_size) AudioCache::AudioCache(int buffer_size, bool convert2PCM32)
: finished_(false), : finished_(false),
capacity_(buffer_size), capacity_(buffer_size),
size_(0), size_(0),
offset_(0), offset_(0),
timeout_(1) { timeout_(1),
convert2PCM32_(convert2PCM32) {
ring_buffer_.resize(capacity_); ring_buffer_.resize(capacity_);
} }
BaseFloat AudioCache::Convert2PCM32(BaseFloat val) {
return val * (1. / std::pow(2.0, 15));
}
void AudioCache::Accept(const VectorBase<BaseFloat>& waves) { void AudioCache::Accept(const VectorBase<BaseFloat>& waves) {
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
while (size_ + waves.Dim() > ring_buffer_.size()) { while (size_ + waves.Dim() > ring_buffer_.size()) {
@ -38,6 +43,8 @@ void AudioCache::Accept(const VectorBase<BaseFloat>& waves) {
for (size_t idx = 0; idx < waves.Dim(); ++idx) { for (size_t idx = 0; idx < waves.Dim(); ++idx) {
int32 buffer_idx = (idx + offset_) % ring_buffer_.size(); int32 buffer_idx = (idx + offset_) % ring_buffer_.size();
ring_buffer_[buffer_idx] = waves(idx); ring_buffer_[buffer_idx] = waves(idx);
if (convert2PCM32_)
ring_buffer_[buffer_idx] = Convert2PCM32(waves(idx));
} }
size_ += waves.Dim(); size_ += waves.Dim();
} }

@ -23,7 +23,8 @@ namespace ppspeech {
// waves cache // waves cache
class AudioCache : public FrontendInterface { class AudioCache : public FrontendInterface {
public: public:
explicit AudioCache(int buffer_size = 100*kint16max); explicit AudioCache(int buffer_size = 1000 * kint16max,
bool convert2PCM32 = false);
virtual void Accept(const kaldi::VectorBase<BaseFloat>& waves); virtual void Accept(const kaldi::VectorBase<BaseFloat>& waves);
@ -46,6 +47,8 @@ class AudioCache : public FrontendInterface {
} }
private: private:
kaldi::BaseFloat Convert2PCM32(kaldi::BaseFloat val);
std::vector<kaldi::BaseFloat> ring_buffer_; std::vector<kaldi::BaseFloat> ring_buffer_;
size_t offset_; // offset in ring_buffer_ size_t offset_; // offset in ring_buffer_
size_t size_; // samples in ring_buffer_ now size_t size_; // samples in ring_buffer_ now
@ -54,6 +57,7 @@ class AudioCache : public FrontendInterface {
mutable std::mutex mutex_; mutable std::mutex mutex_;
std::condition_variable ready_feed_condition_; std::condition_variable ready_feed_condition_;
kaldi::int32 timeout_; // millisecond kaldi::int32 timeout_; // millisecond
bool convert2PCM32_;
DISALLOW_COPY_AND_ASSIGN(AudioCache); DISALLOW_COPY_AND_ASSIGN(AudioCache);
}; };

@ -46,7 +46,10 @@ class LinearSpectrogram : public FrontendInterface {
virtual size_t Dim() const { return dim_; } virtual size_t Dim() const { return dim_; }
virtual void SetFinished() { base_extractor_->SetFinished(); } virtual void SetFinished() { base_extractor_->SetFinished(); }
virtual bool IsFinished() const { return base_extractor_->IsFinished(); } virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
virtual void Reset() { base_extractor_->Reset(); } virtual void Reset() {
base_extractor_->Reset();
reminded_wav_.Resize(0);
}
private: private:
bool Compute(const kaldi::Vector<kaldi::BaseFloat>& waves, bool Compute(const kaldi::Vector<kaldi::BaseFloat>& waves,

@ -1,4 +1,5 @@
add_library(utils add_library(utils
file_utils.cc file_utils.cc
simdjson.cpp
) )

@ -31,4 +31,14 @@ bool ReadFileToVector(const std::string& filename,
return true; return true;
} }
}
std::string ReadFile2String(const std::string& path) {
std::ifstream input_file(path);
if (!input_file.is_open()) {
std::cerr << "please input a valid file" << std::endl;
}
return std::string((std::istreambuf_iterator<char>(input_file)),
std::istreambuf_iterator<char>());
}
}

@ -18,4 +18,7 @@ namespace ppspeech {
bool ReadFileToVector(const std::string& filename, bool ReadFileToVector(const std::string& filename,
std::vector<std::string>* data); std::vector<std::string>* data);
std::string ReadFile2String(const std::string& path);
} }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save