diff --git a/demos/streaming_asr_server/websocket_client.py b/demos/streaming_asr_server/websocket_client.py index 3cadd72a..3451b8d0 100644 --- a/demos/streaming_asr_server/websocket_client.py +++ b/demos/streaming_asr_server/websocket_client.py @@ -37,7 +37,7 @@ def main(args): if args.wavfile and os.path.exists(args.wavfile): logger.info(f"start to process the wavscp: {args.wavfile}") result = loop.run_until_complete(handler.run(args.wavfile)) - # result = result["result"] + result = result["result"] logger.info(f"asr websocket client finished : {result}") # support to process batch audios from wav.scp diff --git a/paddlespeech/cli/vector/infer.py b/paddlespeech/cli/vector/infer.py index 8afb0f5c..3111badf 100644 --- a/paddlespeech/cli/vector/infer.py +++ b/paddlespeech/cli/vector/infer.py @@ -285,8 +285,10 @@ class VectorExecutor(BaseExecutor): Defaults to None. ckpt_path (Optional[os.PathLike], optional): the pretrained model path, which is stored in the disk. Defaults to None. + task (str, optional): the model task type """ # stage 0: avoid to init the mode again + self.task = task if hasattr(self, "model"): logger.info("Model has been initialized") return @@ -435,6 +437,7 @@ class VectorExecutor(BaseExecutor): if self.sample_rate != 16000 and self.sample_rate != 8000: logger.error( "invalid sample rate, please input --sr 8000 or --sr 16000") + logger.error(f"The model sample rate: {self.sample_rate}, the external sample rate is: {sample_rate}") return False if isinstance(audio_file, (str, os.PathLike)): diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py index 32f78942..cd1cd51a 100644 --- a/paddlespeech/server/bin/paddlespeech_client.py +++ b/paddlespeech/server/bin/paddlespeech_client.py @@ -602,7 +602,11 @@ class VectorClientExecutor(BaseExecutor): default=None, help='sentence to be process by text server.') self.parser.add_argument( - '--task', type=str, default="spk", help="The vector service task") + '--task', + type=str, + default="spk", + choices=["spk", "score"], + help="The vector service task") self.parser.add_argument( "--enroll", type=str, default=None, help="The enroll audio") self.parser.add_argument( diff --git a/paddlespeech/server/engine/vector/python/vector_engine.py b/paddlespeech/server/engine/vector/python/vector_engine.py index 866c2229..2fd8dec6 100644 --- a/paddlespeech/server/engine/vector/python/vector_engine.py +++ b/paddlespeech/server/engine/vector/python/vector_engine.py @@ -99,8 +99,8 @@ class PaddleVectorConnectionHandler: """extract the audio embedding Args: - audio (_type_): _description_ - sample_rate (int, optional): _description_. Defaults to 16000. + audio (str): the audio data + sample_rate (int, optional): the audio sample rate. Defaults to 16000. """ # we can not reuse the cache io.BytesIO(audio) data, # because the soundfile will change the io.BytesIO(audio) to the end diff --git a/paddlespeech/server/restful/request.py b/paddlespeech/server/restful/request.py index 4e88280a..b7a32481 100644 --- a/paddlespeech/server/restful/request.py +++ b/paddlespeech/server/restful/request.py @@ -115,7 +115,7 @@ class VectorScoreRequest(BaseModel): { "enroll_audio": "exSI6ICJlbiIsCgkgICAgInBvc2l0aW9uIjogImZhbHNlIgoJf...", "test_audio": "exSI6ICJlbiIsCgkgICAgInBvc2l0aW9uIjogImZhbHNlIgoJf...", - "task": "spk", + "task": "score", "audio_format": "wav", "sample_rate": 16000, }