diff --git a/demos/speech_server/16_audio.wav b/demos/speech_server/16_audio.wav new file mode 100644 index 00000000..3cfa5074 Binary files /dev/null and b/demos/speech_server/16_audio.wav differ diff --git a/demos/speech_server/README.md b/demos/speech_server/README.md new file mode 100644 index 00000000..b1d6b71f --- /dev/null +++ b/demos/speech_server/README.md @@ -0,0 +1,217 @@ +([简体中文](./README_cn.md)|English) + +# Speech Server + +## Introduction +This demo is an implementation of starting the voice service and accessing the service. It can be achieved with a single command using `paddlespeech_server` and `paddlespeech_client` or a few lines of code in python. + + +## Usage +### 1. Installation +see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). + +You can choose one way from easy, meduim and hard to install paddlespeech. + +### 2. Prepare config File +The configuration file contains the service-related configuration files and the model configuration related to the voice tasks contained in the service. They are all under the `conf` folder. + +### 3. Server Usage +- Command Line (Recommended) + + ```bash + # start the service + paddlespeech_server start --config_file ./conf/application.yaml + ``` + + Usage: + + ```bash + paddlespeech_server start --help + ``` + Arguments: + - `config_file`: yaml file of the app, defalut: ./conf/application.yaml + - `log_file`: log file. Default: ./log/paddlespeech.log + + Output: + ```bash + [2022-02-23 11:17:32] [INFO] [server.py:64] Started server process [6384] + INFO: Waiting for application startup. + [2022-02-23 11:17:32] [INFO] [on.py:26] Waiting for application startup. + INFO: Application startup complete. + [2022-02-23 11:17:32] [INFO] [on.py:38] Application startup complete. + INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + [2022-02-23 11:17:32] [INFO] [server.py:204] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + + ``` + +- Python API + ```python + from paddlespeech.server.bin.paddlespeech_server import ServerExecutor + + server_executor = ServerExecutor() + server_executor( + config_file="./conf/application.yaml", + log_file="./log/paddlespeech.log") + ``` + + Output: + ```bash + INFO: Started server process [529] + [2022-02-23 14:57:56] [INFO] [server.py:64] Started server process [529] + INFO: Waiting for application startup. + [2022-02-23 14:57:56] [INFO] [on.py:26] Waiting for application startup. + INFO: Application startup complete. + [2022-02-23 14:57:56] [INFO] [on.py:38] Application startup complete. + INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + [2022-02-23 14:57:56] [INFO] [server.py:204] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + + ``` + + +### 4. ASR Client Usage +- Command Line (Recommended) + ``` + paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./16_audio.wav + ``` + + Usage: + + ```bash + paddlespeech_client asr --help + ``` + Arguments: + - `server_ip`: server ip. Default: 127.0.0.1 + - `port`: server port. Default: 8090 + - `input`(required): Audio file to be recognized. + - `sample_rate`: Audio ampling rate, default: 16000. + - `lang`: Language. Default: "zh_cn". + - `audio_format`: Audio format. Default: "wav". + + Output: + ```bash + [2022-02-23 11:19:45,646] [ INFO] - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'transcription': '广州医生跑北马中断比赛就心跳骤停者'}} + [2022-02-23 11:19:45,646] [ INFO] - time cost 0.659491 s. + ``` + +- Python API + ```python + from paddlespeech.server.bin.paddlespeech_client import ASRClientExecutor + + asrclient_executor = ASRClientExecutor() + asrclient_executor( + input="./16_audio.wav", + server_ip="127.0.0.1", + port=8090, + sample_rate=16000, + lang="zh_cn", + audio_format="wav") + ``` + + Output: + ```bash + {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'transcription': '广州医生跑北马中断比赛就心跳骤停者'}} + time cost 0.802639 s. + + ``` + +### 5. TTS Client Usage +- Command Line (Recommended) + ```bash + paddlespeech_client tts --server_ip 127.0.0.1 --port 8090 --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.wav + ``` + Usage: + + ```bash + paddlespeech_client tts --help + ``` + Arguments: + - `server_ip`: server ip. Default: 127.0.0.1 + - `port`: server port. Default: 8090 + - `input`(required): Input text to generate. + - `spk_id`: Speaker id for multi-speaker text to speech. Default: 0 + - `speed`: Audio speed, the value should be set between 0 and 3. Default: 1.0 + - `volume`: Audio volume, the value should be set between 0 and 3. Default: 1.0 + - `sample_rate`: Sampling rate, choice: [0, 8000, 16000], the default is the same as the model. Default: 0 + - `output`: Output wave filepath. Default: `output.wav`. + + Output: + ```bash + [2022-02-23 15:20:37,875] [ INFO] - {'description': 'success.'} + [2022-02-23 15:20:37,875] [ INFO] - Save synthesized audio successfully on output.wav. + [2022-02-23 15:20:37,875] [ INFO] - Audio duration: 3.612500 s. + [2022-02-23 15:20:37,875] [ INFO] - Response time: 0.348050 s. + [2022-02-23 15:20:37,875] [ INFO] - RTF: 0.096346 + + + ``` + +- Python API + ```python + from paddlespeech.server.bin.paddlespeech_client import TTSClientExecutor + + ttsclient_executor = TTSClientExecutor() + ttsclient_executor( + input="您好,欢迎使用百度飞桨语音合成服务。", + server_ip="127.0.0.1", + port=8090, + spk_id=0, + speed=1.0, + volume=1.0, + sample_rate=0, + output="./output.wav") + ``` + + Output: + ```bash + {'description': 'success.'} + Save synthesized audio successfully on ./output.wav. + Audio duration: 3.612500 s. + Response time: 0.388317 s. + RTF: 0.107493 + + ``` + + +## Pretrained Models +### ASR model +Here is a list of [ASR pretrained models](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/speech_recognition/README.md#4pretrained-models) released by PaddleSpeech, both command line and python interfaces are available: + +| Model | Language | Sample Rate +| :--- | :---: | :---: | +| conformer_wenetspeech| zh| 16000 +| transformer_librispeech| en| 16000 + +### TTS model +Here is a list of [TTS pretrained models](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/text_to_speech/README.md#4-pretrained-models) released by PaddleSpeech, both command line and python interfaces are available: + +- Acoustic model + | Model | Language + | :--- | :---: | + | speedyspeech_csmsc| zh + | fastspeech2_csmsc| zh + | fastspeech2_aishell3| zh + | fastspeech2_ljspeech| en + | fastspeech2_vctk| en + +- Vocoder + | Model | Language + | :--- | :---: | + | pwgan_csmsc| zh + | pwgan_aishell3| zh + | pwgan_ljspeech| en + | pwgan_vctk| en + | mb_melgan_csmsc| zh + +Here is a list of **TTS pretrained static models** released by PaddleSpeech, both command line and python interfaces are available: +- Acoustic model + | Model | Language + | :--- | :---: | + | speedyspeech_csmsc| zh + | fastspeech2_csmsc| zh + +- Vocoder + | Model | Language + | :--- | :---: | + | pwgan_csmsc| zh + | mb_melgan_csmsc| zh + | hifigan_csmsc| zh \ No newline at end of file diff --git a/demos/speech_server/README_cn.md b/demos/speech_server/README_cn.md new file mode 100644 index 00000000..f97a85d3 --- /dev/null +++ b/demos/speech_server/README_cn.md @@ -0,0 +1,216 @@ +([简体中文](./README_cn.md)|English) + +# 语音服务 + +## 介绍 +这个demo是一个启动语音服务和访问服务的实现。 它可以通过使用`paddlespeech_server` 和 `paddlespeech_client`的单个命令或 python 的几行代码来实现。 + + +## 使用方法 +### 1. 安装 +请看 [安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). + +你可以从 easy,medium,hard 三中方式中选择一种方式安装 PaddleSpeech。 + +### 2. 准备配置文件 +配置文件包含服务相关的配置文件和服务中包含的语音任务相关的模型配置。 它们都在 `conf` 文件夹下。 + +### 3. 服务端使用方法 +- 命令行 (推荐使用) + + ```bash + # 启动服务 + paddlespeech_server start --config_file ./conf/application.yaml + ``` + + 使用方法: + + ```bash + paddlespeech_server start --help + ``` + 参数: + - `config_file`: 服务的配置文件,默认: ./conf/application.yaml + - `log_file`: log 文件. 默认:./log/paddlespeech.log + + 输出: + ```bash + [2022-02-23 11:17:32] [INFO] [server.py:64] Started server process [6384] + INFO: Waiting for application startup. + [2022-02-23 11:17:32] [INFO] [on.py:26] Waiting for application startup. + INFO: Application startup complete. + [2022-02-23 11:17:32] [INFO] [on.py:38] Application startup complete. + INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + [2022-02-23 11:17:32] [INFO] [server.py:204] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + + ``` + +- Python API + ```python + from paddlespeech.server.bin.paddlespeech_server import ServerExecutor + + server_executor = ServerExecutor() + server_executor( + config_file="./conf/application.yaml", + log_file="./log/paddlespeech.log") + ``` + + 输出: + ```bash + INFO: Started server process [529] + [2022-02-23 14:57:56] [INFO] [server.py:64] Started server process [529] + INFO: Waiting for application startup. + [2022-02-23 14:57:56] [INFO] [on.py:26] Waiting for application startup. + INFO: Application startup complete. + [2022-02-23 14:57:56] [INFO] [on.py:38] Application startup complete. + INFO: Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + [2022-02-23 14:57:56] [INFO] [server.py:204] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit) + + ``` + +### 4. ASR客户端使用方法 +- 命令行 (推荐使用) + ``` + paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./paddlespeech/server/tests/16_audio.wav + ``` + + 使用帮助: + + ```bash + paddlespeech_client asr --help + ``` + + 参数: + - `server_ip`: 服务端ip地址,默认: 127.0.0.1。 + - `port`: 服务端口,默认: 8090。 + - `input`(必须输入): 用于识别的音频文件。 + - `sample_rate`: 音频采样率,默认值:16000。 + - `lang`: 模型语言,默认值:zh_cn。 + - `audio_format`: 音频格式,默认值:wav。 + + 输出: + + ```bash + [2022-02-23 11:19:45,646] [ INFO] - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'transcription': '广州医生跑北马中断比赛就心跳骤停者'}} + [2022-02-23 11:19:45,646] [ INFO] - time cost 0.659491 s. + + ``` + +- Python API + ```python + from paddlespeech.server.bin.paddlespeech_client import ASRClientExecutor + + asrclient_executor = ASRClientExecutor() + asrclient_executor( + input="./16_audio.wav", + server_ip="127.0.0.1", + port=8090, + sample_rate=16000, + lang="zh_cn", + audio_format="wav") + ``` + + 输出: + ```bash + {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'transcription': '广州医生跑北马中断比赛就心跳骤停者'}} + time cost 0.802639 s. + + ``` + +### 5. TTS客户端使用方法 + ```bash + paddlespeech_client tts --server_ip 127.0.0.1 --port 8090 --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.wav + ``` + 使用帮助: + + ```bash + paddlespeech_client tts --help + ``` + + 参数: + - `server_ip`: 服务端ip地址,默认: 127.0.0.1。 + - `port`: 服务端口,默认: 8090。 + - `input`(必须输入): 待合成的文本。 + - `spk_id`: 说话人 id,用于多说话人语音合成,默认值: 0。 + - `speed`: 音频速度,该值应设置在 0 到 3 之间。 默认值:1.0 + - `volume`: 音频音量,该值应设置在 0 到 3 之间。 默认值: 1.0 + - `sample_rate`: 采样率,可选 [0, 8000, 16000],默认与模型相同。 默认值:0 + - `output`: 输出音频的路径, 默认值:output.wav。 + + 输出: + ```bash + [2022-02-23 15:20:37,875] [ INFO] - {'description': 'success.'} + [2022-02-23 15:20:37,875] [ INFO] - Save synthesized audio successfully on output.wav. + [2022-02-23 15:20:37,875] [ INFO] - Audio duration: 3.612500 s. + [2022-02-23 15:20:37,875] [ INFO] - Response time: 0.348050 s. + [2022-02-23 15:20:37,875] [ INFO] - RTF: 0.096346 + ``` + +- Python API + ```python + from paddlespeech.server.bin.paddlespeech_client import TTSClientExecutor + + ttsclient_executor = TTSClientExecutor() + ttsclient_executor( + input="您好,欢迎使用百度飞桨语音合成服务。", + server_ip="127.0.0.1", + port=8090, + spk_id=0, + speed=1.0, + volume=1.0, + sample_rate=0, + output="./output.wav") + ``` + + 输出: + ```bash + {'description': 'success.'} + Save synthesized audio successfully on ./output.wav. + Audio duration: 3.612500 s. + Response time: 0.388317 s. + RTF: 0.107493 + + ``` + +## Pretrained Models +### ASR model +下面是PaddleSpeech发布的[ASR预训练模型](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/speech_recognition/README.md#4pretrained-models)列表,命令行和python接口均可用: + +| Model | Language | Sample Rate +| :--- | :---: | :---: | +| conformer_wenetspeech| zh| 16000 +| transformer_librispeech| en| 16000 + +### TTS model +下面是PaddleSpeech发布的 [TTS预训练模型](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/text_to_speech/README.md#4-pretrained-models) 列表,命令行和python接口均可用: + +- Acoustic model + | Model | Language + | :--- | :---: | + | speedyspeech_csmsc| zh + | fastspeech2_csmsc| zh + | fastspeech2_aishell3| zh + | fastspeech2_ljspeech| en + | fastspeech2_vctk| en + +- Vocoder + | Model | Language + | :--- | :---: | + | pwgan_csmsc| zh + | pwgan_aishell3| zh + | pwgan_ljspeech| en + | pwgan_vctk| en + | mb_melgan_csmsc| zh + +下面是PaddleSpeech发布的 **TTS预训练静态模型** 列表,命令行和python接口均可用: +- Acoustic model + | Model | Language + | :--- | :---: | + | speedyspeech_csmsc| zh + | fastspeech2_csmsc| zh + +- Vocoder + | Model | Language + | :--- | :---: | + | pwgan_csmsc| zh + | mb_melgan_csmsc| zh + | hifigan_csmsc| zh \ No newline at end of file diff --git a/demos/speech_server/asr_client.sh b/demos/speech_server/asr_client.sh new file mode 100644 index 00000000..1132263f --- /dev/null +++ b/demos/speech_server/asr_client.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./16_audio.wav diff --git a/demos/speech_server/conf/application.yaml b/demos/speech_server/conf/application.yaml new file mode 100644 index 00000000..c8d71f2f --- /dev/null +++ b/demos/speech_server/conf/application.yaml @@ -0,0 +1,17 @@ +# This is the parameter configuration file for PaddleSpeech Serving. + +################################################################## +# SERVER SETTING # +################################################################## +host: '0.0.0.0' +port: 8090 + +################################################################## +# CONFIG FILE # +################################################################## +# add engine type (Options: asr, tts) and config file here. + +engine_backend: + asr: 'conf/asr/asr.yaml' + tts: 'conf/tts/tts.yaml' + diff --git a/demos/speech_server/conf/asr/asr.yaml b/demos/speech_server/conf/asr/asr.yaml new file mode 100644 index 00000000..4c3b0a67 --- /dev/null +++ b/demos/speech_server/conf/asr/asr.yaml @@ -0,0 +1,7 @@ +model: 'conformer_wenetspeech' +lang: 'zh' +sample_rate: 16000 +cfg_path: +ckpt_path: +decode_method: 'attention_rescoring' +force_yes: False diff --git a/demos/speech_server/conf/tts/tts.yaml b/demos/speech_server/conf/tts/tts.yaml new file mode 100644 index 00000000..cb4829c8 --- /dev/null +++ b/demos/speech_server/conf/tts/tts.yaml @@ -0,0 +1,32 @@ +# This is the parameter configuration file for TTS server. + +################################################################## +# ACOUSTIC MODEL SETTING # +# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', +# 'fastspeech2_ljspeech', 'fastspeech2_aishell3', +# 'fastspeech2_vctk'] +################################################################## +am: 'fastspeech2_csmsc' +am_config: +am_ckpt: +am_stat: +phones_dict: +tones_dict: +speaker_dict: +spk_id: 0 + +################################################################## +# VOCODER SETTING # +# voc choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', +# 'pwgan_vctk', 'mb_melgan_csmsc'] +################################################################## +voc: 'pwgan_csmsc' +voc_config: +voc_ckpt: +voc_stat: + +################################################################## +# OTHERS # +################################################################## +lang: 'zh' +device: 'gpu:2' diff --git a/demos/speech_server/conf/tts/tts_pd.yaml b/demos/speech_server/conf/tts/tts_pd.yaml new file mode 100644 index 00000000..c268c6a3 --- /dev/null +++ b/demos/speech_server/conf/tts/tts_pd.yaml @@ -0,0 +1,41 @@ +# This is the parameter configuration file for TTS server. +# These are the static models that support paddle inference. + +################################################################## +# ACOUSTIC MODEL SETTING # +# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] +################################################################## +am: 'fastspeech2_csmsc' +am_model: # the pdmodel file of am static model +am_params: # the pdiparams file of am static model +am_sample_rate: 24000 +phones_dict: +tones_dict: +speaker_dict: +spk_id: 0 + +am_predictor_conf: + use_gpu: True + enable_mkldnn: True + switch_ir_optim: True + + +################################################################## +# VOCODER SETTING # +# voc choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] +################################################################## +voc: 'pwgan_csmsc' +voc_model: # the pdmodel file of vocoder static model +voc_params: # the pdiparams file of vocoder static model +voc_sample_rate: 24000 + +voc_predictor_conf: + use_gpu: True + enable_mkldnn: True + switch_ir_optim: True + +################################################################## +# OTHERS # +################################################################## +lang: 'zh' +device: paddle.get_device() diff --git a/demos/speech_server/server.sh b/demos/speech_server/server.sh new file mode 100644 index 00000000..d9367ec0 --- /dev/null +++ b/demos/speech_server/server.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +paddlespeech_server start --config_file ./conf/application.yaml \ No newline at end of file diff --git a/demos/speech_server/tts_client.sh b/demos/speech_server/tts_client.sh new file mode 100644 index 00000000..a756dfd3 --- /dev/null +++ b/demos/speech_server/tts_client.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +paddlespeech_client tts --server_ip 127.0.0.1 --port 8090 --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.wav diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py index 0e030da9..1b06fe9d 100644 --- a/paddlespeech/server/bin/paddlespeech_client.py +++ b/paddlespeech/server/bin/paddlespeech_client.py @@ -26,6 +26,8 @@ import soundfile from ..executor import BaseExecutor from ..util import cli_client_register +from ..util import stats_wrapper +from paddlespeech.cli.log import logger from paddlespeech.server.utils.audio_process import wav2pcm from paddlespeech.server.utils.util import wav2base64 @@ -36,8 +38,9 @@ __all__ = ['TTSClientExecutor', 'ASRClientExecutor'] name='paddlespeech_client.tts', description='visit tts service') class TTSClientExecutor(BaseExecutor): def __init__(self): - super().__init__() - self.parser = argparse.ArgumentParser() + super(TTSClientExecutor, self).__init__() + self.parser = argparse.ArgumentParser( + prog='paddlespeech_client.tts', add_help=True) self.parser.add_argument( '--server_ip', type=str, default='127.0.0.1', help='server ip') self.parser.add_argument( @@ -46,17 +49,24 @@ class TTSClientExecutor(BaseExecutor): '--input', type=str, default="你好,欢迎使用语音合成服务", - help='A sentence to be synthesized') + help='A sentence to be synthesized.') self.parser.add_argument( '--spk_id', type=int, default=0, help='Speaker id') self.parser.add_argument( - '--speed', type=float, default=1.0, help='Audio speed') + '--speed', + type=float, + default=1.0, + help='Audio speed, the value should be set between 0 and 3') self.parser.add_argument( - '--volume', type=float, default=1.0, help='Audio volume') + '--volume', + type=float, + default=1.0, + help='Audio volume, the value should be set between 0 and 3') self.parser.add_argument( '--sample_rate', type=int, default=0, + choices=[0, 8000, 16000], help='Sampling rate, the default is the same as the model') self.parser.add_argument( '--output', @@ -64,36 +74,14 @@ class TTSClientExecutor(BaseExecutor): default="./output.wav", help='Synthesized audio file') - # Request and response - def tts_client(self, args): - """ Request and response - Args: - input: A sentence to be synthesized - outfile: Synthetic audio file - """ - url = 'http://' + args.server_ip + ":" + str( - args.port) + '/paddlespeech/tts' - request = { - "text": args.input, - "spk_id": args.spk_id, - "speed": args.speed, - "volume": args.volume, - "sample_rate": args.sample_rate, - "save_path": args.output - } - - response = requests.post(url, json.dumps(request)) - response_dict = response.json() - print(response_dict["message"]) + def postprocess(self, response_dict: dict, outfile: str) -> float: wav_base64 = response_dict["result"]["audio"] - audio_data_byte = base64.b64decode(wav_base64) # from byte samples, sample_rate = soundfile.read( io.BytesIO(audio_data_byte), dtype='float32') # transform audio - outfile = args.output if outfile.endswith(".wav"): soundfile.write(outfile, samples, sample_rate) elif outfile.endswith(".pcm"): @@ -102,18 +90,79 @@ class TTSClientExecutor(BaseExecutor): wav2pcm(temp_wav, outfile, data_type=np.int16) os.system("rm %s" % (temp_wav)) else: - print("The format for saving audio only supports wav or pcm") + logger.error("The format for saving audio only supports wav or pcm") - return len(samples), sample_rate + duration = len(samples) / sample_rate + return duration def execute(self, argv: List[str]) -> bool: args = self.parser.parse_args(argv) - st = time.time() try: - samples_length, sample_rate = self.tts_client(args) + url = 'http://' + args.server_ip + ":" + str( + args.port) + '/paddlespeech/tts' + request = { + "text": args.input, + "spk_id": args.spk_id, + "speed": args.speed, + "volume": args.volume, + "sample_rate": args.sample_rate, + "save_path": args.output + } + st = time.time() + response = requests.post(url, json.dumps(request)) + time_consume = time.time() - st + + response_dict = response.json() + duration = self.postprocess(response_dict, args.output) + + logger.info(response_dict["message"]) + logger.info("Save synthesized audio successfully on %s." % + (args.output)) + logger.info("Audio duration: %f s." % (duration)) + logger.info("Response time: %f s." % (time_consume)) + logger.info("RTF: %f " % (time_consume / duration)) + + return True + except: + logger.error("Failed to synthesized audio.") + return False + + @stats_wrapper + def __call__(self, + input: str, + server_ip: str="127.0.0.1", + port: int=8090, + spk_id: int=0, + speed: float=1.0, + volume: float=1.0, + sample_rate: int=0, + output: str="./output.wav"): + """ + Python API to call an executor. + """ + + url = 'http://' + server_ip + ":" + str(port) + '/paddlespeech/tts' + request = { + "text": input, + "spk_id": spk_id, + "speed": speed, + "volume": volume, + "sample_rate": sample_rate, + "save_path": output + } + + try: + st = time.time() + response = requests.post(url, json.dumps(request)) time_consume = time.time() - st - print("Save synthesized audio successfully on %s." % (args.output)) - print("Inference time: %f s." % (time_consume)) + response_dict = response.json() + duration = self.postprocess(response_dict, output) + + print(response_dict["message"]) + print("Save synthesized audio successfully on %s." % (output)) + print("Audio duration: %f s." % (duration)) + print("Response time: %f s." % (time_consume)) + print("RTF: %f " % (time_consume / duration)) except: print("Failed to synthesized audio.") @@ -122,8 +171,9 @@ class TTSClientExecutor(BaseExecutor): name='paddlespeech_client.asr', description='visit asr service') class ASRClientExecutor(BaseExecutor): def __init__(self): - super().__init__() - self.parser = argparse.ArgumentParser() + super(ASRClientExecutor, self).__init__() + self.parser = argparse.ArgumentParser( + prog='paddlespeech_client.asr', add_help=True) self.parser.add_argument( '--server_ip', type=str, default='127.0.0.1', help='server ip') self.parser.add_argument( @@ -152,11 +202,43 @@ class ASRClientExecutor(BaseExecutor): "lang": args.lang, } time_start = time.time() + try: + r = requests.post(url=url, data=json.dumps(data)) + # ending Timestamp + time_end = time.time() + logger.info(r.json()) + logger.info("time cost %f s." % (time_end - time_start)) + return True + except: + logger.error("Failed to speech recognition.") + return False + + @stats_wrapper + def __call__(self, + input: str, + server_ip: str="127.0.0.1", + port: int=8090, + sample_rate: int=16000, + lang: str="zh_cn", + audio_format: str="wav"): + """ + Python API to call an executor. + """ + + url = 'http://' + server_ip + ":" + str(port) + '/paddlespeech/asr' + audio = wav2base64(input) + data = { + "audio": audio, + "audio_format": audio_format, + "sample_rate": sample_rate, + "lang": lang, + } + time_start = time.time() try: r = requests.post(url=url, data=json.dumps(data)) # ending Timestamp time_end = time.time() print(r.json()) - print('time cost', time_end - time_start, 's') + print("time cost %f s." % (time_end - time_start)) except: print("Failed to speech recognition.") diff --git a/paddlespeech/server/bin/paddlespeech_server.py b/paddlespeech/server/bin/paddlespeech_server.py index 367375fc..80e65cb4 100644 --- a/paddlespeech/server/bin/paddlespeech_server.py +++ b/paddlespeech/server/bin/paddlespeech_server.py @@ -19,6 +19,7 @@ from fastapi import FastAPI from ..executor import BaseExecutor from ..util import cli_server_register +from ..util import stats_wrapper from paddlespeech.server.engine.engine_factory import EngineFactory from paddlespeech.server.restful.api import setup_router from paddlespeech.server.utils.config import get_config @@ -33,8 +34,9 @@ app = FastAPI( name='paddlespeech_server.start', description='Start the service') class ServerExecutor(BaseExecutor): def __init__(self): - super().__init__() - self.parser = argparse.ArgumentParser() + super(ServerExecutor, self).__init__() + self.parser = argparse.ArgumentParser( + prog='paddlespeech_server.start', add_help=True) self.parser.add_argument( "--config_file", action="store", @@ -77,3 +79,14 @@ class ServerExecutor(BaseExecutor): if self.init(config): uvicorn.run(app, host=config.host, port=config.port, debug=True) + + @stats_wrapper + def __call__(self, + config_file: str="./conf/application.yaml", + log_file: str="./log/paddlespeech.log"): + """ + Python API to call an executor. + """ + config = get_config(config_file) + if self.init(config): + uvicorn.run(app, host=config.host, port=config.port, debug=True) diff --git a/paddlespeech/server/executor.py b/paddlespeech/server/executor.py index 192e1f17..fa2d01a9 100644 --- a/paddlespeech/server/executor.py +++ b/paddlespeech/server/executor.py @@ -16,6 +16,7 @@ from abc import ABC from abc import abstractmethod from typing import List + class BaseExecutor(ABC): """ An abstract executor of paddlespeech server tasks. @@ -36,3 +37,10 @@ class BaseExecutor(ABC): int: Result of the command execution. `True` for a success and `False` for a failure. """ pass + + @abstractmethod + def __call__(self, *arg, **kwargs): + """ + Python API to call an executor. + """ + pass