fix the asr online client bug, return None, test=doc

4 years ago · 48fa84bee9
parent babac27a79
commit 48fa84bee9
7 changed files with 40 additions and 18 deletions
--- a/paddlespeech/s2t/modules/encoder.py
+++ b/paddlespeech/s2t/modules/encoder.py
@ -317,8 +317,6 @@ class BaseEncoder(nn.Layer):
        outputs = []
        offset = 0
        # Feed forward overlap input step by step
        print(f"context: {context}")
        print(f"stride: {stride}")
        for cur in range(0, num_frames - context + 1, stride):
            end = min(cur + decoding_window, num_frames)
            chunk_xs = xs[:, cur:end, :]
--- a/paddlespeech/server/README.md
+++ b/paddlespeech/server/README.md
@ -35,3 +35,16 @@
 ```bash
 paddlespeech_client cls --server_ip 127.0.0.1 --port 8090 --input input.wav
 ```
 ## Online ASR Server
 ### Lanuch online asr server
 ```
 paddlespeech_server start --config_file conf/ws_conformer_application.yaml
 ```
 ### Access online asr server
 ```
 paddlespeech_client asr_online  --server_ip 127.0.0.1 --port 8090 --input input_16k.wav
 ```
--- a/paddlespeech/server/README_cn.md
+++ b/paddlespeech/server/README_cn.md
@ -35,3 +35,17 @@
 ```bash
 paddlespeech_client cls --server_ip 127.0.0.1 --port 8090 --input input.wav
 ```
 ## 流式ASR
 ### 启动流式语音识别服务
 ```
 paddlespeech_server start --config_file conf/ws_conformer_application.yaml
 ```
 ### 访问流式语音识别服务
 ```
 paddlespeech_client asr_online  --server_ip 127.0.0.1 --port 8090 --input zh.wav
 ```
--- a/paddlespeech/server/bin/paddlespeech_client.py
+++ b/paddlespeech/server/bin/paddlespeech_client.py
@ -277,11 +277,12 @@ class ASRClientExecutor(BaseExecutor):
                lang=lang,
                audio_format=audio_format)
            time_end = time.time()
-            logger.info(res.json())
+            logger.info(res)
            logger.info("Response time %f s." % (time_end - time_start))
            return True
        except Exception as e:
            logger.error("Failed to speech recognition.")
            logger.error(e)
            return False
    @stats_wrapper
@ -299,9 +300,10 @@ class ASRClientExecutor(BaseExecutor):
        logging.info("asr websocket client start")
        handler = ASRAudioHandler(server_ip, port)
        loop = asyncio.get_event_loop()
-        loop.run_until_complete(handler.run(input))
+        res = loop.run_until_complete(handler.run(input))
        logging.info("asr websocket client finished")
        return res['asr_results']
@cli_client_register(
    name='paddlespeech_client.cls', description='visit cls service')
--- a/paddlespeech/server/engine/asr/online/asr_engine.py
+++ b/paddlespeech/server/engine/asr/online/asr_engine.py
@ -473,7 +473,7 @@ class PaddleASRConnectionHanddler:
        ctc_probs = self.model.ctc.log_softmax(ys)  # (1, maxlen, vocab_size)
        ctc_probs = ctc_probs.squeeze(0)
-        self.searcher.search(None, ctc_probs, self.cached_feat.place)
+        self.searcher.search(ctc_probs, self.cached_feat.place)
        self.hyps = self.searcher.get_one_best_hyps()
        assert self.cached_feat.shape[0] == 1
@ -823,7 +823,7 @@ class ASRServerExecutor(ASRExecutor):
        ctc_probs = self.model.ctc.log_softmax(
            encoder_out)  # (1, maxlen, vocab_size)
        ctc_probs = ctc_probs.squeeze(0)
-        self.searcher.search(xs, ctc_probs, xs.place)
+        self.searcher.search(ctc_probs, xs.place)
        # update the one best result
        self.hyps = self.searcher.get_one_best_hyps()
--- a/paddlespeech/server/engine/asr/online/ctc_search.py
+++ b/paddlespeech/server/engine/asr/online/ctc_search.py
@ -24,19 +24,18 @@ class CTCPrefixBeamSearch:
        """Implement the ctc prefix beam search
        Args:
-            config (_type_): _description_
+            config (yacs.config.CfgNode): _description_
        """
        self.config = config
        self.reset()
-    def search(self, xs, ctc_probs, device, blank_id=0):
+    def search(self, ctc_probs, device, blank_id=0):
        """ctc prefix beam search method decode a chunk feature
        Args:
            xs (paddle.Tensor): feature data
            ctc_probs (paddle.Tensor): the ctc probability of all the tokens
-            encoder_out (paddle.Tensor): _description_
+            device (paddle.fluid.core_avx.Place): the feature host device, such as CUDAPlace(0).
            encoder_mask (_type_): _description_
            blank_id (int, optional): the blank id in the vocab. Defaults to 0.
        Returns:
@ -45,7 +44,6 @@ class CTCPrefixBeamSearch:
        # decode 
        logger.info("start to ctc prefix search")
        # device = xs.place
        batch_size = 1
        beam_size = self.config.beam_size
        maxlen = ctc_probs.shape[0]
--- a/paddlespeech/server/tests/asr/online/websocket_client.py
+++ b/paddlespeech/server/tests/asr/online/websocket_client.py
@ -34,10 +34,9 @@ class ASRAudioHandler:
    def read_wave(self, wavfile_path: str):
        samples, sample_rate = soundfile.read(wavfile_path, dtype='int16')
        x_len = len(samples)
        # chunk_stride = 40 * 16  #40ms, sample_rate = 16kHz
        chunk_size = 80 * 16  #80ms, sample_rate = 16kHz
-        if x_len % chunk_size != 0:
+        chunk_size = 85 * 16  #80ms, sample_rate = 16kHz
        if x_len % chunk_size!= 0:
            padding_len_x = chunk_size - x_len % chunk_size
        else:
            padding_len_x = 0
@ -48,7 +47,6 @@ class ASRAudioHandler:
        assert (x_len + padding_len_x) % chunk_size == 0
        num_chunk = (x_len + padding_len_x) / chunk_size
        num_chunk = int(num_chunk)
        for i in range(0, num_chunk):
            start = i * chunk_size
            end = start + chunk_size
@ -82,7 +80,6 @@ class ASRAudioHandler:
                msg = json.loads(msg)
                logging.info("receive msg={}".format(msg))
            result = msg
            # finished 
            audio_info = json.dumps(
                {
@ -98,8 +95,8 @@ class ASRAudioHandler:
            # decode the bytes to str
            msg = json.loads(msg)
-            logging.info("receive msg={}".format(msg))
+            logging.info("final receive msg={}".format(msg))
-
+            result = msg
            return result