Support dy2st

2 years ago · 4e7106d9e2
parent e21cceea51
commit 4e7106d9e2
3 changed files with 210 additions and 14 deletions
--- a/paddlespeech/s2t/exps/u2/model.py
+++ b/paddlespeech/s2t/exps/u2/model.py
@ -471,6 +471,165 @@ class U2Tester(U2Trainer):
        infer_model, input_spec = self.load_inferspec()
        assert isinstance(input_spec, list), type(input_spec)
        infer_model.eval()
-        static_model = paddle.jit.to_static(infer_model, input_spec=input_spec)
+        # static_model = paddle.jit.to_static(infer_model, input_spec=input_spec)
-        logger.info(f"Export code: {static_model.forward.code}")
+        # logger.info(f"Export code: {static_model.forward.code}")
-        paddle.jit.save(static_model, self.args.export_path)
+        # paddle.jit.save(static_model, self.args.export_path)
        # # to check outputs
        # def flatten(out):
        #     if isinstance(out, paddle.Tensor):
        #         return [out]
        #     flatten_out = []
        #     for var in out:
        #         if isinstance(var, (list, tuple)):
        #             flatten_out.extend(flatten(var))
        #         else:
        #             flatten_out.append(var)
        #     return flatten_out
        # ######################### infer_model.forward_attention_decoder ########################
        # a = paddle.full(shape=[10, 8], fill_value=10, dtype='int64')
        # b = paddle.full(shape=[10], fill_value=8, dtype='int64')
        # # c =  paddle.rand(shape=[1, 20, 512], dtype='float32')
        # c = paddle.full(shape=[1, 20, 512], fill_value=1, dtype='float32')
        # out1 = infer_model.forward_attention_decoder(a, b, c)
        # print(out1)
        # input_spec = [paddle.static.InputSpec(shape=[None, None], dtype='int64'), 
        #               paddle.static.InputSpec(shape=[None], dtype='int64'), 
        #               paddle.static.InputSpec(shape=[1, None, 512], dtype='float32')]
        # static_model = paddle.jit.to_static(infer_model.forward_attention_decoder, input_spec=input_spec)
        # paddle.jit.save(static_model, self.args.export_path)
        # static_model = paddle.jit.load(self.args.export_path)
        # out2 = static_model(a, b, c)
        # # print(out2)
        # out1 = flatten(out1)
        # out2 = flatten(out2)
        # for i in range(len(out1)):
        #     print(np.equal(out1[i].numpy(), out2[i].numpy()).all())
        # ######################### infer_model.forward_encoder_chunk ########################
        # xs =  paddle.rand(shape=[1, 67, 80], dtype='float32')
        # offset = paddle.to_tensor([80], dtype='int32')
        # required_cache_size = -16
        # att_cache = paddle.randn(shape=[12, 8, 80, 128], dtype='float32')
        # cnn_cache = paddle.randn(shape=[12, 1, 512, 14], dtype='float32')
        # # out1 = infer_model.forward_encoder_chunk(xs, offset, required_cache_size, att_cache, cnn_cache)
        # # print(out1)
        # zero_out1 = infer_model.forward_encoder_chunk(xs, offset, required_cache_size, att_cache=paddle.zeros([0, 0, 0, 0]), cnn_cache=paddle.zeros([0, 0, 0, 0]))
        # # print(zero_out1)
        # input_spec = [
        #     paddle.static.InputSpec(shape=[1, None, 80], dtype='float32'), 
        #     paddle.static.InputSpec(shape=[1], dtype='int32'), 
        #     -16,
        #     paddle.static.InputSpec(shape=[None, None, None, None], dtype='float32'),
        #     paddle.static.InputSpec(shape=[None, None, None, None], dtype='float32')]
        # static_model = paddle.jit.to_static(infer_model.forward_encoder_chunk, input_spec=input_spec)
        # paddle.jit.save(static_model, self.args.export_path)
        # static_model = paddle.jit.load(self.args.export_path)
        # # out2 = static_model(xs, offset, att_cache, cnn_cache)
        # # print(out2)
        # zero_out2 = static_model(xs, offset, paddle.zeros([0, 0, 0, 0]), paddle.zeros([0, 0, 0, 0]))
        # # out1 = flatten(out1)
        # # out2 = flatten(out2)
        # # for i in range(len(out1)):
        # #     print(np.equal(out1[i].numpy(), out2[i].numpy()).all())
        # zero_out1 = flatten(zero_out1)
        # zero_out2 = flatten(zero_out2)
        # for i in range(len(zero_out1)):
        #     print(np.equal(zero_out1[i].numpy(), zero_out2[i].numpy()).all())
        # ######################### infer_model.forward_encoder_chunk zero Tensor online ########################
        # xs1 =  paddle.rand(shape=[1, 67, 80], dtype='float32')
        # offset = paddle.to_tensor([0], dtype='int32')
        # required_cache_size = -16
        # att_cache = paddle.zeros([0, 0, 0, 0])
        # cnn_cache=paddle.zeros([0, 0, 0, 0])
        # xs, att_cache, cnn_cache = infer_model.forward_encoder_chunk(xs1, offset, required_cache_size, att_cache, cnn_cache)
        # xs2 = paddle.rand(shape=[1, 67, 80], dtype='float32')
        # offset = paddle.to_tensor([16], dtype='int32')
        # out1 = infer_model.forward_encoder_chunk(xs2, offset, required_cache_size, att_cache, cnn_cache)
        # # print(out1)
        # input_spec = [
        #     paddle.static.InputSpec(shape=[1, None, 80], dtype='float32'), 
        #     paddle.static.InputSpec(shape=[1], dtype='int32'), 
        #     -16,
        #     paddle.static.InputSpec(shape=[None, None, None, None], dtype='float32'),
        #     paddle.static.InputSpec(shape=[None, None, None, None], dtype='float32')]
        # static_model = paddle.jit.to_static(infer_model.forward_encoder_chunk, input_spec=input_spec)
        # paddle.jit.save(static_model, self.args.export_path)
        # static_model = paddle.jit.load(self.args.export_path)
        # offset = paddle.to_tensor([0], dtype='int32')
        # att_cache = paddle.zeros([0, 0, 0, 0])
        # cnn_cache=paddle.zeros([0, 0, 0, 0])
        # xs, att_cache, cnn_cache = static_model(xs1, offset, att_cache, cnn_cache)
        # xs =  paddle.rand(shape=[1, 67, 80], dtype='float32')
        # offset = paddle.to_tensor([16], dtype='int32')
        # out2 = static_model(xs2, offset, att_cache, cnn_cache)
        # # print(out2)
        # out1 = flatten(out1)
        # out2 = flatten(out2)
        # for i in range(len(out1)):
        #     print(np.equal(out1[i].numpy(), out2[i].numpy()).all())
        ###################### save/load combine ########################
        paddle.jit.save(infer_model, '/workspace/conformer/PaddleSpeech-conformer/conformer/conformer', combine_params=True)
        # xs1 =  paddle.rand(shape=[1, 67, 80], dtype='float32')
        # offset = paddle.to_tensor([0], dtype='int32')
        # required_cache_size = -16
        # att_cache = paddle.zeros([0, 0, 0, 0])
        # cnn_cache=paddle.zeros([0, 0, 0, 0])
        # xs, att_cache, cnn_cache = infer_model.forward_encoder_chunk(xs1, offset, required_cache_size, att_cache, cnn_cache)
        # xs2 = paddle.rand(shape=[1, 67, 80], dtype='float32')
        # offset = paddle.to_tensor([16], dtype='int32')
        # out1 = infer_model.forward_encoder_chunk(xs2, offset, required_cache_size, att_cache, cnn_cache)
        # # print(out1)
        # from paddle.jit.layer import Layer
        # layer = Layer()
        # layer.load('/workspace/conformer/PaddleSpeech-conformer/conformer/conformer', paddle.CUDAPlace(0))
        # offset = paddle.to_tensor([0], dtype='int32')
        # att_cache = paddle.zeros([0, 0, 0, 0])
        # cnn_cache=paddle.zeros([0, 0, 0, 0])
        # xs, att_cache, cnn_cache = layer.forward_encoder_chunk(xs1, offset, att_cache, cnn_cache)
        # offset = paddle.to_tensor([16], dtype='int32')
        # out2 = layer.forward_encoder_chunk(xs2, offset, att_cache, cnn_cache)
        # # print(out2)
        # out1 = flatten(out1)
        # out2 = flatten(out2)
        # for i in range(len(out1)):
        #     print(np.equal(out1[i].numpy(), out2[i].numpy()).all())
--- a/paddlespeech/s2t/models/u2/u2.py
+++ b/paddlespeech/s2t/models/u2/u2.py
@ -59,6 +59,20 @@ __all__ = ["U2Model", "U2InferModel"]
 logger = Log(__name__).getlog()
 # input_spec1 = [paddle.static.InputSpec(shape=[None, None], dtype='int64'), 
 #               paddle.static.InputSpec(shape=[None], dtype='int64'), 
 #               paddle.static.InputSpec(shape=[1, None, 512], dtype='float32')]
 # input_spec2 = [
 #     paddle.static.InputSpec(shape=[1, None, 80], dtype='float32'), 
 #     paddle.static.InputSpec(shape=[1], dtype='int32'), 
 #     -16,
 #     paddle.static.InputSpec(shape=[None, None, None, None], dtype='float32'), 
 #     paddle.static.InputSpec(shape=[None, None, None, None], dtype='float32')]
 # input_spec3 = [paddle.static.InputSpec(shape=[1, 1, 1], dtype='int64'), 
 #                paddle.static.InputSpec(shape=[1], dtype='int64')]
 class U2BaseModel(ASRInterface, nn.Layer):
    """CTC-Attention hybrid Encoder-Decoder model"""
@ -599,7 +613,12 @@ class U2BaseModel(ASRInterface, nn.Layer):
        """
        return self.eos
-    @jit.to_static
+    @jit.to_static(input_spec=[
        paddle.static.InputSpec(shape=[1, None, 80], dtype='float32'), 
        paddle.static.InputSpec(shape=[1], dtype='int32'), 
        -16,
        paddle.static.InputSpec(shape=[None, None, None, None], dtype='float32'), 
        paddle.static.InputSpec(shape=[None, None, None, None], dtype='float32')])
    def forward_encoder_chunk(
            self,
            xs: paddle.Tensor,
@ -655,7 +674,10 @@ class U2BaseModel(ASRInterface, nn.Layer):
        """
        return self.ctc.log_softmax(xs)
-    @jit.to_static
+    @jit.to_static(input_spec=[
        paddle.static.InputSpec(shape=[None, None], dtype='int64'), 
        paddle.static.InputSpec(shape=[None], dtype='int64'), 
        paddle.static.InputSpec(shape=[1, None, 512], dtype='float32')])
    def forward_attention_decoder(
            self,
            hyps: paddle.Tensor,
@ -918,6 +940,9 @@ class U2InferModel(U2Model):
    def __init__(self, configs: dict):
        super().__init__(configs)
    @jit.to_static(input_spec=[
        paddle.static.InputSpec(shape=[1, 1, 1], dtype='int64'), 
        paddle.static.InputSpec(shape=[1], dtype='int64')])
    def forward(self,
                feats,
                feats_lengths,
@ -933,9 +958,10 @@ class U2InferModel(U2Model):
        Returns:
            List[List[int]]: best path result
        """
-        return self.ctc_greedy_search(
+        # return self.ctc_greedy_search(
-            feats,
+        #     feats,
-            feats_lengths,
+        #     feats_lengths,
-            decoding_chunk_size=decoding_chunk_size,
+        #     decoding_chunk_size=decoding_chunk_size,
-            num_decoding_left_chunks=num_decoding_left_chunks,
+        #     num_decoding_left_chunks=num_decoding_left_chunks,
-            simulate_streaming=simulate_streaming)
+        #     simulate_streaming=simulate_streaming)
        return feats, feats_lengths
--- a/paddlespeech/server/engine/asr/online/python/asr_engine.py
+++ b/paddlespeech/server/engine/asr/online/python/asr_engine.py
@ -80,6 +80,10 @@ class PaddleASRConnectionHanddler:
        self.init_decoder()
        self.reset()
        from paddle.jit.layer import Layer
        self.jit_layer = Layer()
        self.jit_layer.load('/workspace/conformer/PaddleSpeech-conformer/conformer/conformer', paddle.CUDAPlace(1))
    def init_decoder(self):
        if "deepspeech2" in self.model_type:
            assert self.continuous_decoding is False, "ds2 model not support endpoint"
@ -474,9 +478,16 @@ class PaddleASRConnectionHanddler:
            # cur chunk
            chunk_xs = self.cached_feat[:, cur:end, :]
            # forward chunk
-            (y, self.att_cache, self.cnn_cache) = self.model.encoder.forward_chunk(
+            # (y, self.att_cache, self.cnn_cache) = self.model.encoder.forward_chunk(
-                 chunk_xs, self.offset, required_cache_size,
+            #      chunk_xs, self.offset, required_cache_size,
-                 self.att_cache, self.cnn_cache)
+            #      self.att_cache, self.cnn_cache)
            (y, self.att_cache, self.cnn_cache) = self.jit_layer.forward_encoder_chunk(
                                                    chunk_xs, 
                                                    paddle.to_tensor([self.offset], dtype='int32'), 
                                                    self.att_cache, 
                                                    self.cnn_cache)
            outputs.append(y)
            # update the global offset, in decoding frame unit