fix u2 nnet out frames num

pull/2524/head
Hui Zhang 3 years ago
parent cd1ced4ea0
commit a75abc1828

@ -1 +1,2 @@
tools/valgrind*
*log

@ -21,15 +21,15 @@
namespace ppspeech {
struct NnetOut{
// nnet out, maybe logprob or prob
kaldi::Vector<kaldi::BaseFloat> logprobs;
int32 vocab_dim;
struct NnetOut {
// nnet out, maybe logprob or prob
kaldi::Vector<kaldi::BaseFloat> logprobs;
int32 vocab_dim;
// nnet state. Only using in Attention model.
std::vector<std::vector<kaldi::BaseFloat>> encoder_outs;
// nnet state. Only using in Attention model.
std::vector<std::vector<kaldi::BaseFloat>> encoder_outs;
NnetOut() : logprobs({}), vocab_dim(-1), encoder_outs({}) {}
NnetOut() : logprobs({}), vocab_dim(-1), encoder_outs({}) {}
};

@ -313,10 +313,8 @@ void U2Nnet::ForwardEncoderChunkImpl(
// call.
std::vector<paddle::Tensor> inputs = {
feats, offset, /*required_cache_size, */ att_cache_, cnn_cache_};
VLOG(3) << "inputs size: " << inputs.size();
CHECK(inputs.size() == 4);
std::vector<paddle::Tensor> outputs = forward_encoder_chunk_(inputs);
VLOG(3) << "outputs size: " << outputs.size();
CHECK(outputs.size() == 3);
#ifdef USE_GPU
@ -351,10 +349,12 @@ void U2Nnet::ForwardEncoderChunkImpl(
// current offset in decoder frame
// not used in nnet
offset_ += chunk_out.shape()[1];
VLOG(2) << "encoder out chunk size: " << chunk_out.shape()[1] << " total: " << offset_ ;
// collects encoder outs.
VLOG(2) << "encoder_outs_ size: " << encoder_outs_.size();
encoder_outs_.push_back(chunk_out);
VLOG(2) << "encoder_outs_ size: " << encoder_outs_.size();
#ifdef TEST_DEBUG
{

@ -137,7 +137,9 @@ class U2Nnet : public U2NnetBase {
// debug
void FeedEncoderOuts(paddle::Tensor& encoder_out);
const std::vector<paddle::Tensor>& EncoderOuts() const {return encoder_outs_; }
const std::vector<paddle::Tensor>& EncoderOuts() const {
return encoder_outs_;
}
private:
U2ModelOptions opts_;

@ -95,29 +95,29 @@ int main(int argc, char* argv[]) {
// kaldi::kCopyData);
// }
int32 num_chunks = (feature.NumRows() - chunk_size) / chunk_stride + 1;
int32 frame_idx = 0;
std::vector<kaldi::Vector<kaldi::BaseFloat>> prob_vec;
int32 ori_feature_len = feature.NumRows();
int32 num_chunks = feature.NumRows() / chunk_stride + 1;
LOG(INFO) << "num_chunks: " << num_chunks;
for (int chunk_idx = 0; chunk_idx < num_chunks; ++chunk_idx) {
kaldi::Vector<kaldi::BaseFloat> feature_chunk(chunk_size *
feat_dim);
int32 feature_chunk_size = 0;
int32 this_chunk_size = 0;
if (ori_feature_len > chunk_idx * chunk_stride) {
feature_chunk_size = std::min(
this_chunk_size = std::min(
ori_feature_len - chunk_idx * chunk_stride, chunk_size);
}
if (feature_chunk_size < receptive_field_length) {
if (this_chunk_size < receptive_field_length) {
LOG(WARNING) << "utt: " << utt << " skip last "
<< feature_chunk_size << " frames, expect is "
<< this_chunk_size << " frames, expect is "
<< receptive_field_length;
break;
}
kaldi::Vector<kaldi::BaseFloat> feature_chunk(this_chunk_size *
feat_dim);
int32 start = chunk_idx * chunk_stride;
for (int row_id = 0; row_id < chunk_size; ++row_id) {
for (int row_id = 0; row_id < this_chunk_size; ++row_id) {
kaldi::SubVector<kaldi::BaseFloat> feat_row(feature, start);
kaldi::SubVector<kaldi::BaseFloat> feature_chunk_row(
feature_chunk.Data() + row_id * feat_dim, feat_dim);

@ -0,0 +1,3 @@
#!/bin/bash
find speechx -name '*.c' -o -name '*.h' -not -path "*kaldi*" | xargs -I{} clang-format -i {}
Loading…
Cancel
Save