|
|
|
@ -15,25 +15,23 @@
|
|
|
|
|
from typing import Optional
|
|
|
|
|
|
|
|
|
|
import paddle
|
|
|
|
|
from paddle import nn
|
|
|
|
|
import paddle.nn.functional as F
|
|
|
|
|
|
|
|
|
|
from paddle import nn
|
|
|
|
|
from paddle.fluid.layers import fc
|
|
|
|
|
from paddle.nn import GRU
|
|
|
|
|
from paddle.nn import LayerList
|
|
|
|
|
from paddle.nn import LayerNorm
|
|
|
|
|
from paddle.nn import Linear
|
|
|
|
|
from paddle.nn import LSTM
|
|
|
|
|
from yacs.config import CfgNode
|
|
|
|
|
|
|
|
|
|
from deepspeech.models.ds2_online.conv import ConvStack
|
|
|
|
|
from deepspeech.modules.ctc import CTCDecoder
|
|
|
|
|
from deepspeech.models.ds2_online.rnn import RNNStack
|
|
|
|
|
from deepspeech.modules.ctc import CTCDecoder
|
|
|
|
|
from deepspeech.utils import layer_tools
|
|
|
|
|
from deepspeech.utils.checkpoint import Checkpoint
|
|
|
|
|
from deepspeech.utils.log import Log
|
|
|
|
|
|
|
|
|
|
from paddle.nn import LSTM, GRU, Linear
|
|
|
|
|
from paddle.nn import LayerNorm
|
|
|
|
|
from paddle.nn import LayerList
|
|
|
|
|
|
|
|
|
|
from paddle.fluid.layers import fc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
logger = Log(__name__).getlog()
|
|
|
|
|
|
|
|
|
|
__all__ = ['DeepSpeech2ModelOnline', 'DeepSpeech2InferModeOnline']
|
|
|
|
@ -68,20 +66,39 @@ class CRNNEncoder(nn.Layer):
|
|
|
|
|
layernorm_size = rnn_size
|
|
|
|
|
|
|
|
|
|
if use_gru == True:
|
|
|
|
|
self.rnn.append(GRU(input_size=i_size, hidden_size=rnn_size, num_layers=1, direction = rnn_direction))
|
|
|
|
|
self.rnn.append(
|
|
|
|
|
GRU(input_size=i_size,
|
|
|
|
|
hidden_size=rnn_size,
|
|
|
|
|
num_layers=1,
|
|
|
|
|
direction=rnn_direction))
|
|
|
|
|
self.layernorm_list.append(LayerNorm(layernorm_size))
|
|
|
|
|
for i in range(1, num_rnn_layers):
|
|
|
|
|
self.rnn.append(GRU(input_size=layernorm_size, hidden_size=rnn_size, num_layers=1, direction = rnn_direction))
|
|
|
|
|
self.rnn.append(
|
|
|
|
|
GRU(input_size=layernorm_size,
|
|
|
|
|
hidden_size=rnn_size,
|
|
|
|
|
num_layers=1,
|
|
|
|
|
direction=rnn_direction))
|
|
|
|
|
self.layernorm_list.append(LayerNorm(layernorm_size))
|
|
|
|
|
else:
|
|
|
|
|
self.rnn.append(LSTM(input_size=i_size, hidden_size=rnn_size, num_layers=1, direction = rnn_direction))
|
|
|
|
|
self.rnn.append(
|
|
|
|
|
LSTM(
|
|
|
|
|
input_size=i_size,
|
|
|
|
|
hidden_size=rnn_size,
|
|
|
|
|
num_layers=1,
|
|
|
|
|
direction=rnn_direction))
|
|
|
|
|
self.layernorm_list.append(LayerNorm(layernorm_size))
|
|
|
|
|
for i in range(1, num_rnn_layers):
|
|
|
|
|
self.rnn.append(LSTM(input_size=layernorm_size, hidden_size=rnn_size, num_layers=1, direction = rnn_direction))
|
|
|
|
|
self.rnn.append(
|
|
|
|
|
LSTM(
|
|
|
|
|
input_size=layernorm_size,
|
|
|
|
|
hidden_size=rnn_size,
|
|
|
|
|
num_layers=1,
|
|
|
|
|
direction=rnn_direction))
|
|
|
|
|
self.layernorm_list.append(LayerNorm(layernorm_size))
|
|
|
|
|
fc_input_size = layernorm_size
|
|
|
|
|
for i in range(self.num_fc_layers):
|
|
|
|
|
self.fc_layers_list.append(nn.Linear(fc_input_size, fc_layers_size_list[i]))
|
|
|
|
|
self.fc_layers_list.append(
|
|
|
|
|
nn.Linear(fc_input_size, fc_layers_size_list[i]))
|
|
|
|
|
fc_input_size = fc_layers_size_list[i]
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|