fix some bug and complete the recog.py

pull/930/head
huangyuxin 3 years ago
parent c0295aa131
commit e4a9328c40

@ -29,6 +29,7 @@ from deepspeech.exps import dynamic_import_tester
from deepspeech.io.reader import LoadInputsAndTargets from deepspeech.io.reader import LoadInputsAndTargets
from deepspeech.models.asr_interface import ASRInterface from deepspeech.models.asr_interface import ASRInterface
from deepspeech.utils.log import Log from deepspeech.utils.log import Log
from deepspeech.models.lm.transformer import TransformerLM
# from espnet.asr.asr_utils import get_model_conf # from espnet.asr.asr_utils import get_model_conf
# from espnet.asr.asr_utils import torch_load # from espnet.asr.asr_utils import torch_load
# from espnet.nets.lm_interface import dynamic_import_lm # from espnet.nets.lm_interface import dynamic_import_lm
@ -83,12 +84,18 @@ def recog_v2(args):
) )
if args.rnnlm: if args.rnnlm:
lm_args = get_model_conf(args.rnnlm, args.rnnlm_conf) lm_path = args.rnnlm
# NOTE: for a compatibility with less than 0.5.0 version models lm = TransformerLM(
lm_model_module = getattr(lm_args, "model_module", "default") n_vocab=5002,
lm_class = dynamic_import_lm(lm_model_module, lm_args.backend) pos_enc=None,
lm = lm_class(len(char_list), lm_args) embed_unit=128,
torch_load(args.rnnlm, lm) att_unit=512,
head=8,
unit=2048,
layer=16,
dropout_rate=0.5, )
model_dict = paddle.load(lm_path)
lm.set_state_dict(model_dict)
lm.eval() lm.eval()
else: else:
lm = None lm = None

@ -23,9 +23,9 @@ import paddle.nn.functional as F
from deepspeech.modules.mask import subsequent_mask from deepspeech.modules.mask import subsequent_mask
from deepspeech.modules.encoder import TransformerEncoder from deepspeech.modules.encoder import TransformerEncoder
from deepspeech.decoders.scorers.scorer_interface import BatchScorerInterface from deepspeech.decoders.scorers.scorer_interface import BatchScorerInterface
from deepspeech.models.lm_interface import from deepspeech.models.lm_interface import LMInterface
#LMInterface
import logging
class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface): class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface):
def __init__( def __init__(
self, self,
@ -84,6 +84,8 @@ class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface):
), "Tie Weights: True need embedding and final dimensions to match" ), "Tie Weights: True need embedding and final dimensions to match"
self.decoder.weight = self.embed.weight self.decoder.weight = self.embed.weight
def _target_mask(self, ys_in_pad): def _target_mask(self, ys_in_pad):
ys_mask = ys_in_pad != 0 ys_mask = ys_in_pad != 0
m = subsequent_mask(ys_mask.size(-1)).unsqueeze(0) m = subsequent_mask(ys_mask.size(-1)).unsqueeze(0)
@ -151,7 +153,7 @@ class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface):
emb, self._target_mask(y), cache=state emb, self._target_mask(y), cache=state
) )
h = self.decoder(h[:, -1]) h = self.decoder(h[:, -1])
logp = h.log_softmax(axis=-1).squeeze(0) logp = F.log_softmax(h).squeeze(0)
return logp, cache return logp, cache
# batch beam search API (see BatchScorerInterface) # batch beam search API (see BatchScorerInterface)
@ -194,7 +196,7 @@ class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface):
emb, self._target_mask(ys), cache=batch_state emb, self._target_mask(ys), cache=batch_state
) )
h = self.decoder(h[:, -1]) h = self.decoder(h[:, -1])
logp = h.log_softmax(axi=-1) logp = F.log_softmax(h)
# transpose state of [layer, batch] into [batch, layer] # transpose state of [layer, batch] into [batch, layer]
state_list = [[states[i][b] for i in range(n_layers)] for b in range(n_batch)] state_list = [[states[i][b] for i in range(n_layers)] for b in range(n_batch)]
@ -231,14 +233,14 @@ if __name__ == "__main__":
#Test the score #Test the score
input2 = np.array([5]) input2 = np.array([5])
input2 = paddle.to_tensor(input2) input2 = paddle.to_tensor(input2)
state = (None, None, 0) state = None
output, state = tlm.score(input2, state, None) output, state = tlm.score(input2, state, None)
input3 = np.array([10]) input3 = np.array([5,10])
input3 = paddle.to_tensor(input3) input3 = paddle.to_tensor(input3)
output, state = tlm.score(input3, state, None) output, state = tlm.score(input3, state, None)
input4 = np.array([0]) input4 = np.array([5,10,0])
input4 = paddle.to_tensor(input4) input4 = paddle.to_tensor(input4)
output, state = tlm.score(input4, state, None) output, state = tlm.score(input4, state, None)
print("output", output) print("output", output)

@ -399,7 +399,7 @@ class TransformerEncoder(BaseEncoder):
#TODO(Hui Zhang): self.embed(xs, masks, offset=0), stride_slice not support bool tensor #TODO(Hui Zhang): self.embed(xs, masks, offset=0), stride_slice not support bool tensor
xs, pos_emb, masks = self.embed(xs, masks.astype(xs.dtype), offset=0) xs, pos_emb, masks = self.embed(xs, masks.astype(xs.dtype), offset=0)
else: else:
xs = self.embed(xs) xs , pos_emb, masks= self.embed(xs, masks.astype(xs.dtype), offset=0)
#TODO(Hui Zhang): remove mask.astype, stride_slice not support bool tensor #TODO(Hui Zhang): remove mask.astype, stride_slice not support bool tensor
masks = masks.astype(paddle.bool) masks = masks.astype(paddle.bool)

Loading…
Cancel
Save