diff --git a/speechx/.gitignore b/speechx/.gitignore index e0c618470..9a93805c0 100644 --- a/speechx/.gitignore +++ b/speechx/.gitignore @@ -1 +1,2 @@ tools/valgrind* +*log diff --git a/speechx/speechx/nnet/nnet_itf.h b/speechx/speechx/nnet/nnet_itf.h index 12fe3c272..b98f5ebd0 100644 --- a/speechx/speechx/nnet/nnet_itf.h +++ b/speechx/speechx/nnet/nnet_itf.h @@ -21,15 +21,15 @@ namespace ppspeech { -struct NnetOut{ - // nnet out, maybe logprob or prob - kaldi::Vector logprobs; - int32 vocab_dim; +struct NnetOut { + // nnet out, maybe logprob or prob + kaldi::Vector logprobs; + int32 vocab_dim; - // nnet state. Only using in Attention model. - std::vector> encoder_outs; + // nnet state. Only using in Attention model. + std::vector> encoder_outs; - NnetOut() : logprobs({}), vocab_dim(-1), encoder_outs({}) {} + NnetOut() : logprobs({}), vocab_dim(-1), encoder_outs({}) {} }; diff --git a/speechx/speechx/nnet/u2_nnet.cc b/speechx/speechx/nnet/u2_nnet.cc index 26d7da8f9..ddb815d20 100644 --- a/speechx/speechx/nnet/u2_nnet.cc +++ b/speechx/speechx/nnet/u2_nnet.cc @@ -313,10 +313,8 @@ void U2Nnet::ForwardEncoderChunkImpl( // call. std::vector inputs = { feats, offset, /*required_cache_size, */ att_cache_, cnn_cache_}; - VLOG(3) << "inputs size: " << inputs.size(); CHECK(inputs.size() == 4); std::vector outputs = forward_encoder_chunk_(inputs); - VLOG(3) << "outputs size: " << outputs.size(); CHECK(outputs.size() == 3); #ifdef USE_GPU @@ -351,10 +349,12 @@ void U2Nnet::ForwardEncoderChunkImpl( // current offset in decoder frame // not used in nnet offset_ += chunk_out.shape()[1]; + VLOG(2) << "encoder out chunk size: " << chunk_out.shape()[1] << " total: " << offset_ ; + // collects encoder outs. - VLOG(2) << "encoder_outs_ size: " << encoder_outs_.size(); encoder_outs_.push_back(chunk_out); + VLOG(2) << "encoder_outs_ size: " << encoder_outs_.size(); #ifdef TEST_DEBUG { diff --git a/speechx/speechx/nnet/u2_nnet.h b/speechx/speechx/nnet/u2_nnet.h index 874429599..775a078a5 100644 --- a/speechx/speechx/nnet/u2_nnet.h +++ b/speechx/speechx/nnet/u2_nnet.h @@ -137,7 +137,9 @@ class U2Nnet : public U2NnetBase { // debug void FeedEncoderOuts(paddle::Tensor& encoder_out); - const std::vector& EncoderOuts() const {return encoder_outs_; } + const std::vector& EncoderOuts() const { + return encoder_outs_; + } private: U2ModelOptions opts_; diff --git a/speechx/speechx/nnet/u2_nnet_main.cc b/speechx/speechx/nnet/u2_nnet_main.cc index 1a1a5e02d..b602ac4db 100644 --- a/speechx/speechx/nnet/u2_nnet_main.cc +++ b/speechx/speechx/nnet/u2_nnet_main.cc @@ -95,29 +95,29 @@ int main(int argc, char* argv[]) { // kaldi::kCopyData); // } - int32 num_chunks = (feature.NumRows() - chunk_size) / chunk_stride + 1; int32 frame_idx = 0; std::vector> prob_vec; int32 ori_feature_len = feature.NumRows(); + int32 num_chunks = feature.NumRows() / chunk_stride + 1; + LOG(INFO) << "num_chunks: " << num_chunks; for (int chunk_idx = 0; chunk_idx < num_chunks; ++chunk_idx) { - kaldi::Vector feature_chunk(chunk_size * - feat_dim); - - int32 feature_chunk_size = 0; + int32 this_chunk_size = 0; if (ori_feature_len > chunk_idx * chunk_stride) { - feature_chunk_size = std::min( + this_chunk_size = std::min( ori_feature_len - chunk_idx * chunk_stride, chunk_size); } - if (feature_chunk_size < receptive_field_length) { + if (this_chunk_size < receptive_field_length) { LOG(WARNING) << "utt: " << utt << " skip last " - << feature_chunk_size << " frames, expect is " + << this_chunk_size << " frames, expect is " << receptive_field_length; break; } + kaldi::Vector feature_chunk(this_chunk_size * + feat_dim); int32 start = chunk_idx * chunk_stride; - for (int row_id = 0; row_id < chunk_size; ++row_id) { + for (int row_id = 0; row_id < this_chunk_size; ++row_id) { kaldi::SubVector feat_row(feature, start); kaldi::SubVector feature_chunk_row( feature_chunk.Data() + row_id * feat_dim, feat_dim); diff --git a/speechx/tools/clang-format.sh b/speechx/tools/clang-format.sh new file mode 100755 index 000000000..30f636ff4 --- /dev/null +++ b/speechx/tools/clang-format.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +find speechx -name '*.c' -o -name '*.h' -not -path "*kaldi*" | xargs -I{} clang-format -i {}