diff --git a/paddlespeech/server/README.md b/paddlespeech/server/README.md new file mode 100644 index 00000000..4ce9605d --- /dev/null +++ b/paddlespeech/server/README.md @@ -0,0 +1,33 @@ +# PaddleSpeech Server Command Line + +([简体中文](./README_cn.md)|English) + + The simplest approach to use PaddleSpeech Server including server and client. + + ## PaddleSpeech Server + ### Help + ```bash + paddlespeech_server help + ``` + ### Start the server + First set the service-related configuration parameters, similar to `./conf/application.yaml`, + Then start the service: + ```bash + paddlespeech_server start --config_file ./conf/application.yaml + ``` + + ## PaddleSpeech Client + ### Help + ```bash + paddlespeech_client help + ``` + ### Access speech recognition services + ``` + paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./tests/16_audio.wav + ``` + + ### Access text to speech services + ```bash + paddlespeech_client tts --server_ip 127.0.0.1 --port 8090 --input "你好,欢迎使用百度飞桨深度学习框架!" --output output.wav + ``` + diff --git a/paddlespeech/server/README_cn.md b/paddlespeech/server/README_cn.md new file mode 100644 index 00000000..2dfd9474 --- /dev/null +++ b/paddlespeech/server/README_cn.md @@ -0,0 +1,32 @@ +# PaddleSpeech Server 命令行工具 + +(简体中文|[English](./README.md)) + +它提供了最简便的方式调用 PaddleSpeech 语音服务用一行命令就可以轻松启动服务和调用服务。 + + ## 服务端命令行使用 + ### 帮助 + ```bash + paddlespeech_server help + ``` + ### 启动服务 + 首先设置服务相关配置文件,类似于 `./conf/application.yaml`,同时设置服务配置中的语音任务模型相关配置,类似于 `./conf/tts/tts.yaml`。 + 然后启动服务: + ```bash + paddlespeech_server start --config_file ./conf/application.yaml + ``` + + ## 客户端命令行使用 + ### 帮助 + ```bash + paddlespeech_client help + ``` + ### 访问语音识别服务 + ``` + paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input input_16k.wav + ``` + + ### 访问语音合成服务 + ```bash + paddlespeech_client tts --server_ip 127.0.0.1 --port 8090 --input "你好,欢迎使用百度飞桨深度学习框架!" --output output.wav + ``` diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py index ff8e34fa..0e030da9 100644 --- a/paddlespeech/server/bin/paddlespeech_client.py +++ b/paddlespeech/server/bin/paddlespeech_client.py @@ -24,6 +24,7 @@ import numpy as np import requests import soundfile +from ..executor import BaseExecutor from ..util import cli_client_register from paddlespeech.server.utils.audio_process import wav2pcm from paddlespeech.server.utils.util import wav2base64 @@ -33,7 +34,7 @@ __all__ = ['TTSClientExecutor', 'ASRClientExecutor'] @cli_client_register( name='paddlespeech_client.tts', description='visit tts service') -class TTSClientExecutor(): +class TTSClientExecutor(BaseExecutor): def __init__(self): super().__init__() self.parser = argparse.ArgumentParser() @@ -42,7 +43,7 @@ class TTSClientExecutor(): self.parser.add_argument( '--port', type=int, default=8090, help='server port') self.parser.add_argument( - '--text', + '--input', type=str, default="你好,欢迎使用语音合成服务", help='A sentence to be synthesized') @@ -60,20 +61,20 @@ class TTSClientExecutor(): self.parser.add_argument( '--output', type=str, - default="./out.wav", + default="./output.wav", help='Synthesized audio file') # Request and response def tts_client(self, args): """ Request and response Args: - text: A sentence to be synthesized + input: A sentence to be synthesized outfile: Synthetic audio file """ url = 'http://' + args.server_ip + ":" + str( args.port) + '/paddlespeech/tts' request = { - "text": args.text, + "text": args.input, "spk_id": args.spk_id, "speed": args.speed, "volume": args.volume, @@ -119,7 +120,7 @@ class TTSClientExecutor(): @cli_client_register( name='paddlespeech_client.asr', description='visit asr service') -class ASRClientExecutor(): +class ASRClientExecutor(BaseExecutor): def __init__(self): super().__init__() self.parser = argparse.ArgumentParser() @@ -128,29 +129,34 @@ class ASRClientExecutor(): self.parser.add_argument( '--port', type=int, default=8090, help='server port') self.parser.add_argument( - '--audio_file', + '--input', type=str, default="./paddlespeech/server/tests/16_audio.wav", help='Audio file to be recognized') self.parser.add_argument( '--sample_rate', type=int, default=16000, help='audio sample rate') + self.parser.add_argument( + '--lang', type=str, default="zh_cn", help='language') + self.parser.add_argument( + '--audio_format', type=str, default="wav", help='audio format') def execute(self, argv: List[str]) -> bool: args = self.parser.parse_args(argv) url = 'http://' + args.server_ip + ":" + str( args.port) + '/paddlespeech/asr' - audio = wav2base64(args.audio_file) + audio = wav2base64(args.input) data = { "audio": audio, - "audio_format": "wav", + "audio_format": args.audio_format, "sample_rate": args.sample_rate, - "lang": "zh_cn", + "lang": args.lang, } time_start = time.time() try: r = requests.post(url=url, data=json.dumps(data)) # ending Timestamp time_end = time.time() + print(r.json()) print('time cost', time_end - time_start, 's') except: print("Failed to speech recognition.") diff --git a/paddlespeech/server/bin/paddlespeech_server.py b/paddlespeech/server/bin/paddlespeech_server.py index 029ba612..367375fc 100644 --- a/paddlespeech/server/bin/paddlespeech_server.py +++ b/paddlespeech/server/bin/paddlespeech_server.py @@ -17,6 +17,7 @@ from typing import List import uvicorn from fastapi import FastAPI +from ..executor import BaseExecutor from ..util import cli_server_register from paddlespeech.server.engine.engine_factory import EngineFactory from paddlespeech.server.restful.api import setup_router @@ -29,8 +30,8 @@ app = FastAPI( @cli_server_register( - name='paddlespeech_server.server', description='Start the service') -class ServerExecutor(): + name='paddlespeech_server.start', description='Start the service') +class ServerExecutor(BaseExecutor): def __init__(self): super().__init__() self.parser = argparse.ArgumentParser() diff --git a/paddlespeech/server/executor.py b/paddlespeech/server/executor.py new file mode 100644 index 00000000..192e1f17 --- /dev/null +++ b/paddlespeech/server/executor.py @@ -0,0 +1,38 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +from abc import ABC +from abc import abstractmethod +from typing import List + +class BaseExecutor(ABC): + """ + An abstract executor of paddlespeech server tasks. + """ + + def __init__(self): + self.parser = argparse.ArgumentParser() + + @abstractmethod + def execute(self, argv: List[str]) -> bool: + """ + Command line entry. This method can only be accessed by a command line such as `paddlespeech asr`. + + Args: + argv (List[str]): Arguments from command line. + + Returns: + int: Result of the command execution. `True` for a success and `False` for a failure. + """ + pass