[Fix] soundclassifier

pull/3931/head
megemini 10 months ago
parent 67ae7c8dd2
commit 85cbf6853a

@ -98,6 +98,8 @@ for epoch in range(1, epochs + 1):
# Need a padding when lengths of waveforms differ in a batch. # Need a padding when lengths of waveforms differ in a batch.
feats = feature_extractor(waveforms) feats = feature_extractor(waveforms)
feats = paddle.transpose(feats, [0, 2, 1]) feats = paddle.transpose(feats, [0, 2, 1])
if feats.dim() == 3:
feats = feats.unsqueeze(1)
logits = model(feats) logits = model(feats)
loss = criterion(logits, labels) loss = criterion(logits, labels)
loss.backward() loss.backward()

@ -498,6 +498,8 @@
" waveforms, labels = batch\n", " waveforms, labels = batch\n",
" feats = feature_extractor(waveforms)\n", " feats = feature_extractor(waveforms)\n",
" feats = paddle.transpose(feats, [0, 2, 1]) # [B, N, T] -> [B, T, N]\n", " feats = paddle.transpose(feats, [0, 2, 1]) # [B, N, T] -> [B, T, N]\n",
" if feats.dim() == 3:\n",
" feats = feats.unsqueeze(1)\n",
" logits = model(feats)\n", " logits = model(feats)\n",
"\n", "\n",
" loss = criterion(logits, labels)\n", " loss = criterion(logits, labels)\n",
@ -541,7 +543,9 @@
" waveforms, labels = batch\n", " waveforms, labels = batch\n",
" feats = feature_extractor(waveforms)\n", " feats = feature_extractor(waveforms)\n",
" feats = paddle.transpose(feats, [0, 2, 1])\n", " feats = paddle.transpose(feats, [0, 2, 1])\n",
" \n", " if feats.dim() == 3:\n",
" feats = feats.unsqueeze(1)\n",
"\n",
" logits = model(feats)\n", " logits = model(feats)\n",
"\n", "\n",
" preds = paddle.argmax(logits, axis=1)\n", " preds = paddle.argmax(logits, axis=1)\n",
@ -576,6 +580,8 @@
"feats = feature_extractor(paddle.to_tensor(paddle.to_tensor(waveform).unsqueeze(0)))\n", "feats = feature_extractor(paddle.to_tensor(paddle.to_tensor(waveform).unsqueeze(0)))\n",
"feats = paddle.transpose(feats, [0, 2, 1]) # [B, N, T] -> [B, T, N]\n", "feats = paddle.transpose(feats, [0, 2, 1]) # [B, N, T] -> [B, T, N]\n",
"print(feats.shape)\n", "print(feats.shape)\n",
"if feats.dim() == 3:\n",
" feats = feats.unsqueeze(1)\n",
"\n", "\n",
"logits = model(feats)\n", "logits = model(feats)\n",
"probs = nn.functional.softmax(logits, axis=1).numpy()\n", "probs = nn.functional.softmax(logits, axis=1).numpy()\n",

@ -109,6 +109,8 @@ if __name__ == "__main__":
num_samples = 0 num_samples = 0
for batch_idx, batch in enumerate(train_loader): for batch_idx, batch in enumerate(train_loader):
feats, labels, length = batch # feats-->(N, length, n_mels) feats, labels, length = batch # feats-->(N, length, n_mels)
if feats.dim() == 3:
feats = feats.unsqueeze(1)
logits = model(feats) logits = model(feats)
@ -170,6 +172,9 @@ if __name__ == "__main__":
with logger.processing('Evaluation on validation dataset'): with logger.processing('Evaluation on validation dataset'):
for batch_idx, batch in enumerate(dev_loader): for batch_idx, batch in enumerate(dev_loader):
feats, labels, length = batch feats, labels, length = batch
if feats.dim() == 3:
feats = feats.unsqueeze(1)
logits = model(feats) logits = model(feats)
preds = paddle.argmax(logits, axis=1) preds = paddle.argmax(logits, axis=1)

@ -38,8 +38,9 @@ if __name__ == '__main__':
model, model,
input_spec=[ input_spec=[
paddle.static.InputSpec( paddle.static.InputSpec(
shape=[None, None, 64], dtype=paddle.float32) shape=[None, 1, None, 64], dtype=paddle.float32)
]) ],
full_graph=True)
# Save in static graph model. # Save in static graph model.
paddle.jit.save(model, os.path.join(args.output_dir, "inference")) paddle.jit.save(model, os.path.join(args.output_dir, "inference"))

@ -62,6 +62,9 @@ if __name__ == '__main__':
model.eval() model.eval()
feat = extract_features(predicting_conf['audio_file'], **feat_conf) feat = extract_features(predicting_conf['audio_file'], **feat_conf)
if feat.dim() == 3:
feat = feat.unsqueeze(1)
logits = model(feat) logits = model(feat)
probs = F.softmax(logits, axis=1).numpy() probs = F.softmax(logits, axis=1).numpy()

@ -89,6 +89,8 @@ if __name__ == "__main__":
waveforms waveforms
) # Need a padding when lengths of waveforms differ in a batch. ) # Need a padding when lengths of waveforms differ in a batch.
feats = paddle.transpose(feats, [0, 2, 1]) # To [N, length, n_mels] feats = paddle.transpose(feats, [0, 2, 1]) # To [N, length, n_mels]
if feats.dim() == 3:
feats = feats.unsqueeze(1)
logits = model(feats) logits = model(feats)
@ -150,6 +152,8 @@ if __name__ == "__main__":
waveforms, labels = batch waveforms, labels = batch
feats = feature_extractor(waveforms) feats = feature_extractor(waveforms)
feats = paddle.transpose(feats, [0, 2, 1]) feats = paddle.transpose(feats, [0, 2, 1])
if feats.dim() == 3:
feats = feats.unsqueeze(1)
logits = model(feats) logits = model(feats)

@ -28,7 +28,9 @@ class SoundClassifier(nn.Layer):
def forward(self, x): def forward(self, x):
# x: (batch_size, num_frames, num_melbins) -> (batch_size, 1, num_frames, num_melbins) # x: (batch_size, num_frames, num_melbins) -> (batch_size, 1, num_frames, num_melbins)
x = x.unsqueeze(1) if x.dim() == 3:
x = x.unsqueeze(1)
x = self.backbone(x) x = self.backbone(x)
x = self.dropout(x) x = self.dropout(x)
logits = self.fc(x) logits = self.fc(x)

Loading…
Cancel
Save