|
|
@ -23,9 +23,9 @@ import paddle.nn.functional as F
|
|
|
|
from deepspeech.modules.mask import subsequent_mask
|
|
|
|
from deepspeech.modules.mask import subsequent_mask
|
|
|
|
from deepspeech.modules.encoder import TransformerEncoder
|
|
|
|
from deepspeech.modules.encoder import TransformerEncoder
|
|
|
|
from deepspeech.decoders.scorers.scorer_interface import BatchScorerInterface
|
|
|
|
from deepspeech.decoders.scorers.scorer_interface import BatchScorerInterface
|
|
|
|
from deepspeech.models.lm_interface import
|
|
|
|
from deepspeech.models.lm_interface import LMInterface
|
|
|
|
#LMInterface
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import logging
|
|
|
|
class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface):
|
|
|
|
class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface):
|
|
|
|
def __init__(
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
self,
|
|
|
@ -84,6 +84,8 @@ class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface):
|
|
|
|
), "Tie Weights: True need embedding and final dimensions to match"
|
|
|
|
), "Tie Weights: True need embedding and final dimensions to match"
|
|
|
|
self.decoder.weight = self.embed.weight
|
|
|
|
self.decoder.weight = self.embed.weight
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _target_mask(self, ys_in_pad):
|
|
|
|
def _target_mask(self, ys_in_pad):
|
|
|
|
ys_mask = ys_in_pad != 0
|
|
|
|
ys_mask = ys_in_pad != 0
|
|
|
|
m = subsequent_mask(ys_mask.size(-1)).unsqueeze(0)
|
|
|
|
m = subsequent_mask(ys_mask.size(-1)).unsqueeze(0)
|
|
|
@ -151,7 +153,7 @@ class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface):
|
|
|
|
emb, self._target_mask(y), cache=state
|
|
|
|
emb, self._target_mask(y), cache=state
|
|
|
|
)
|
|
|
|
)
|
|
|
|
h = self.decoder(h[:, -1])
|
|
|
|
h = self.decoder(h[:, -1])
|
|
|
|
logp = h.log_softmax(axis=-1).squeeze(0)
|
|
|
|
logp = F.log_softmax(h).squeeze(0)
|
|
|
|
return logp, cache
|
|
|
|
return logp, cache
|
|
|
|
|
|
|
|
|
|
|
|
# batch beam search API (see BatchScorerInterface)
|
|
|
|
# batch beam search API (see BatchScorerInterface)
|
|
|
@ -194,7 +196,7 @@ class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface):
|
|
|
|
emb, self._target_mask(ys), cache=batch_state
|
|
|
|
emb, self._target_mask(ys), cache=batch_state
|
|
|
|
)
|
|
|
|
)
|
|
|
|
h = self.decoder(h[:, -1])
|
|
|
|
h = self.decoder(h[:, -1])
|
|
|
|
logp = h.log_softmax(axi=-1)
|
|
|
|
logp = F.log_softmax(h)
|
|
|
|
|
|
|
|
|
|
|
|
# transpose state of [layer, batch] into [batch, layer]
|
|
|
|
# transpose state of [layer, batch] into [batch, layer]
|
|
|
|
state_list = [[states[i][b] for i in range(n_layers)] for b in range(n_batch)]
|
|
|
|
state_list = [[states[i][b] for i in range(n_layers)] for b in range(n_batch)]
|
|
|
@ -231,14 +233,14 @@ if __name__ == "__main__":
|
|
|
|
#Test the score
|
|
|
|
#Test the score
|
|
|
|
input2 = np.array([5])
|
|
|
|
input2 = np.array([5])
|
|
|
|
input2 = paddle.to_tensor(input2)
|
|
|
|
input2 = paddle.to_tensor(input2)
|
|
|
|
state = (None, None, 0)
|
|
|
|
state = None
|
|
|
|
output, state = tlm.score(input2, state, None)
|
|
|
|
output, state = tlm.score(input2, state, None)
|
|
|
|
|
|
|
|
|
|
|
|
input3 = np.array([10])
|
|
|
|
input3 = np.array([5,10])
|
|
|
|
input3 = paddle.to_tensor(input3)
|
|
|
|
input3 = paddle.to_tensor(input3)
|
|
|
|
output, state = tlm.score(input3, state, None)
|
|
|
|
output, state = tlm.score(input3, state, None)
|
|
|
|
|
|
|
|
|
|
|
|
input4 = np.array([0])
|
|
|
|
input4 = np.array([5,10,0])
|
|
|
|
input4 = paddle.to_tensor(input4)
|
|
|
|
input4 = paddle.to_tensor(input4)
|
|
|
|
output, state = tlm.score(input4, state, None)
|
|
|
|
output, state = tlm.score(input4, state, None)
|
|
|
|
print("output", output)
|
|
|
|
print("output", output)
|
|
|
|