|
|
|
@ -322,7 +322,15 @@ class DeepSpeech2Model(object):
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def adapt_instance(instance):
|
|
|
|
|
padded_audio, text, audio_len = instance
|
|
|
|
|
if len(instance) < 2 or len(instance) > 3:
|
|
|
|
|
raise ValueError("Size of instance should be 2 or 3.")
|
|
|
|
|
padded_audio = instance[0]
|
|
|
|
|
text = instance[1]
|
|
|
|
|
# no padding part
|
|
|
|
|
if len(instance) == 2:
|
|
|
|
|
audio_len = padded_audio.shape[1]
|
|
|
|
|
else:
|
|
|
|
|
audio_len = instance[2]
|
|
|
|
|
adapted_instance = [padded_audio, text]
|
|
|
|
|
# Stride size for conv0 is (3, 2)
|
|
|
|
|
# Stride size for conv1 to convN is (1, 2)
|
|
|
|
|