|
|
|
@ -15,9 +15,7 @@
|
|
|
|
|
Unified Streaming and Non-streaming Two-pass End-to-end Model for Speech Recognition
|
|
|
|
|
(https://arxiv.org/pdf/2012.05481.pdf)
|
|
|
|
|
"""
|
|
|
|
|
import sys
|
|
|
|
|
import time
|
|
|
|
|
from collections import defaultdict
|
|
|
|
|
from typing import Dict
|
|
|
|
|
from typing import List
|
|
|
|
|
from typing import Optional
|
|
|
|
@ -36,18 +34,14 @@ from deepspeech.modules.decoder import TransformerDecoder
|
|
|
|
|
from deepspeech.modules.encoder import ConformerEncoder
|
|
|
|
|
from deepspeech.modules.encoder import TransformerEncoder
|
|
|
|
|
from deepspeech.modules.loss import LabelSmoothingLoss
|
|
|
|
|
from deepspeech.modules.mask import make_pad_mask
|
|
|
|
|
from deepspeech.modules.mask import mask_finished_preds
|
|
|
|
|
from deepspeech.modules.mask import mask_finished_scores
|
|
|
|
|
from deepspeech.modules.mask import subsequent_mask
|
|
|
|
|
from deepspeech.utils import checkpoint
|
|
|
|
|
from deepspeech.utils import layer_tools
|
|
|
|
|
from deepspeech.utils.ctc_utils import remove_duplicates_and_blank
|
|
|
|
|
from deepspeech.utils.log import Log
|
|
|
|
|
from deepspeech.utils.tensor_utils import add_sos_eos
|
|
|
|
|
from deepspeech.utils.tensor_utils import pad_sequence
|
|
|
|
|
from deepspeech.utils.tensor_utils import th_accuracy
|
|
|
|
|
from deepspeech.utils.utility import log_add
|
|
|
|
|
|
|
|
|
|
__all__ = ["U2STModel", "U2STInferModel"]
|
|
|
|
|
|
|
|
|
|