From 6bd011d7ce55bed0baf041e170122ff35ae0ddc1 Mon Sep 17 00:00:00 2001 From: WilliamZhang06 <97937340+WilliamZhang06@users.noreply.github.com> Date: Tue, 25 Jan 2022 10:12:10 +0800 Subject: [PATCH 1/4] added engine framework (#1383) --- speechserving/setup.py | 13 ++++ speechserving/speechserving/__init__.py | 13 ++++ speechserving/speechserving/bin/main.py | 16 +++-- .../speechserving/bin/paddlespeech-client.py | 16 +++-- .../speechserving/bin/paddlespeech-server.py | 16 +++-- .../speechserving/conf/application.yaml | 8 +++ .../speechserving/engine/__init__.py | 13 ++++ .../engine/asr/python/asr_engine.py | 17 +++-- .../speechserving/engine/base_engine.py | 1 + speechserving/speechserving/main.py | 42 +++++++++--- .../speechserving/restful/__init__.py | 13 ++++ speechserving/speechserving/restful/api.py | 20 ++---- .../speechserving/restful/asr_api.py | 63 ++++++++++++++++++ .../speechserving/restful/request.py | 10 +-- .../speechserving/restful/response.py | 7 +- .../{controller/api.py => restful/tts_api.py} | 37 +++-------- speechserving/speechserving/utils/__init__.py | 0 speechserving/speechserving/utils/errors.py | 0 speechserving/speechserving/utils/log.py | 59 +++++++++++++++++ speechserving/tests/http_client.py | 66 +++++++++++++++++++ 20 files changed, 345 insertions(+), 85 deletions(-) create mode 100644 speechserving/speechserving/restful/asr_api.py rename speechserving/speechserving/{controller/api.py => restful/tts_api.py} (58%) create mode 100644 speechserving/speechserving/utils/__init__.py create mode 100644 speechserving/speechserving/utils/errors.py create mode 100644 speechserving/speechserving/utils/log.py create mode 100644 speechserving/tests/http_client.py diff --git a/speechserving/setup.py b/speechserving/setup.py index e69de29b..97043fd7 100644 --- a/speechserving/setup.py +++ b/speechserving/setup.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/speechserving/speechserving/__init__.py b/speechserving/speechserving/__init__.py index e69de29b..97043fd7 100644 --- a/speechserving/speechserving/__init__.py +++ b/speechserving/speechserving/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/speechserving/speechserving/bin/main.py b/speechserving/speechserving/bin/main.py index ffc2228f..c5167dd4 100644 --- a/speechserving/speechserving/bin/main.py +++ b/speechserving/speechserving/bin/main.py @@ -14,7 +14,6 @@ import argparse - def init(args): """ 系统初始化 """ @@ -27,13 +26,18 @@ def main(args): app.run(host='0.0.0.0', port=conf.port) - if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--config_file", action="store", - help="yaml file of the app", default="./conf/application.yaml") - parser.add_argument("--log_file", action="store", - help="log file", default="./log/paddlespeech.log") + parser.add_argument( + "--config_file", + action="store", + help="yaml file of the app", + default="./conf/application.yaml") + parser.add_argument( + "--log_file", + action="store", + help="log file", + default="./log/paddlespeech.log") args = parser.parse_args() main(args) diff --git a/speechserving/speechserving/bin/paddlespeech-client.py b/speechserving/speechserving/bin/paddlespeech-client.py index ffc2228f..c5167dd4 100644 --- a/speechserving/speechserving/bin/paddlespeech-client.py +++ b/speechserving/speechserving/bin/paddlespeech-client.py @@ -14,7 +14,6 @@ import argparse - def init(args): """ 系统初始化 """ @@ -27,13 +26,18 @@ def main(args): app.run(host='0.0.0.0', port=conf.port) - if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--config_file", action="store", - help="yaml file of the app", default="./conf/application.yaml") - parser.add_argument("--log_file", action="store", - help="log file", default="./log/paddlespeech.log") + parser.add_argument( + "--config_file", + action="store", + help="yaml file of the app", + default="./conf/application.yaml") + parser.add_argument( + "--log_file", + action="store", + help="log file", + default="./log/paddlespeech.log") args = parser.parse_args() main(args) diff --git a/speechserving/speechserving/bin/paddlespeech-server.py b/speechserving/speechserving/bin/paddlespeech-server.py index ffc2228f..c5167dd4 100644 --- a/speechserving/speechserving/bin/paddlespeech-server.py +++ b/speechserving/speechserving/bin/paddlespeech-server.py @@ -14,7 +14,6 @@ import argparse - def init(args): """ 系统初始化 """ @@ -27,13 +26,18 @@ def main(args): app.run(host='0.0.0.0', port=conf.port) - if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--config_file", action="store", - help="yaml file of the app", default="./conf/application.yaml") - parser.add_argument("--log_file", action="store", - help="log file", default="./log/paddlespeech.log") + parser.add_argument( + "--config_file", + action="store", + help="yaml file of the app", + default="./conf/application.yaml") + parser.add_argument( + "--log_file", + action="store", + help="log file", + default="./log/paddlespeech.log") args = parser.parse_args() main(args) diff --git a/speechserving/speechserving/conf/application.yaml b/speechserving/speechserving/conf/application.yaml index e69de29b..358a76dc 100644 --- a/speechserving/speechserving/conf/application.yaml +++ b/speechserving/speechserving/conf/application.yaml @@ -0,0 +1,8 @@ +# This is the parameter configuration file for PaddleSpeech Serving. + +################################################################## +# SERVER SETTING # +################################################################## +host: '0.0.0.0' +port: 8090 + diff --git a/speechserving/speechserving/engine/__init__.py b/speechserving/speechserving/engine/__init__.py index e69de29b..97043fd7 100644 --- a/speechserving/speechserving/engine/__init__.py +++ b/speechserving/speechserving/engine/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/speechserving/speechserving/engine/asr/python/asr_engine.py b/speechserving/speechserving/engine/asr/python/asr_engine.py index 1ea3cd2a..6bd8b616 100644 --- a/speechserving/speechserving/engine/asr/python/asr_engine.py +++ b/speechserving/speechserving/engine/asr/python/asr_engine.py @@ -11,15 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from engine import BaseEngine +from engine.base_engine import BaseEngine + +from utils.log import logger __all__ = ['ASREngine'] -class ASREngine(BaseEngine): - def __init__(self, name): +class ASREngine(BaseEngine): + def __init__(self, name=None): super(ASREngine, self).__init__() - + self.executor = name + self.input = None + self.output = None def init(self): pass @@ -28,8 +32,8 @@ class ASREngine(BaseEngine): pass def run(self): - pass - + logger.info("start run asr engine") + return "hello world" if __name__ == "__main__": @@ -39,4 +43,3 @@ if __name__ == "__main__": print(class1 is class2) print(id(class1)) print(id(class2)) - diff --git a/speechserving/speechserving/engine/base_engine.py b/speechserving/speechserving/engine/base_engine.py index 1f9f6fa0..36048dcc 100644 --- a/speechserving/speechserving/engine/base_engine.py +++ b/speechserving/speechserving/engine/base_engine.py @@ -18,6 +18,7 @@ from typing import Union from pattern_singleton import Singleton + class BaseEngine(metaclass=Singleton): """ An base engine class diff --git a/speechserving/speechserving/main.py b/speechserving/speechserving/main.py index 91046984..8374b291 100644 --- a/speechserving/speechserving/main.py +++ b/speechserving/speechserving/main.py @@ -13,31 +13,55 @@ # limitations under the License. import argparse -import asr_api as api_run -import tts_api as api_run +import uvicorn +import yaml +from engine.asr.python.asr_engine import ASREngine +from fastapi import FastAPI +from restful.api import router as api_router +from utils.log import logger + +app = FastAPI( + title="PaddleSpeech Serving API", description="Api", version="0.0.1") def init(args): """ 系统初始化 """ + app.include_router(api_router) + + # engine single + ASR_ENGINE = ASREngine("asr") + + # todo others + + return True def main(args): """主程序入口""" - if init(args): - api_run.run() - app.run(host='0.0.0.0', port=conf.port) + #TODO configuration + from yacs.config import CfgNode + with open(args.config_file, 'rt') as f: + config = CfgNode(yaml.safe_load(f)) + if init(args): + uvicorn.run(app, host=config.host, port=config.port, debug=True) if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--config_file", action="store", - help="yaml file of the app", default="./conf/application.yaml") - parser.add_argument("--log_file", action="store", - help="log file", default="./log/paddlespeech.log") + parser.add_argument( + "--config_file", + action="store", + help="yaml file of the app", + default="./conf/application.yaml") + parser.add_argument( + "--log_file", + action="store", + help="log file", + default="./log/paddlespeech.log") args = parser.parse_args() main(args) diff --git a/speechserving/speechserving/restful/__init__.py b/speechserving/speechserving/restful/__init__.py index e69de29b..97043fd7 100644 --- a/speechserving/speechserving/restful/__init__.py +++ b/speechserving/speechserving/restful/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/speechserving/speechserving/restful/api.py b/speechserving/speechserving/restful/api.py index 6324fac2..dc309fab 100644 --- a/speechserving/speechserving/restful/api.py +++ b/speechserving/speechserving/restful/api.py @@ -13,19 +13,9 @@ # limitations under the License. from fastapi import APIRouter -router = APIRouter() - - -router.include_router(auth_router) -router.include_router(user_router) -router.include_router(profile_router) -router.include_router(comment_router) -router.include_router(article_router) -router.include_router(tag_router) - +from .asr_api import router as asr_router +from .tts_api import router as tts_router - - -def init_app(app): - - app.include_router(router) +router = APIRouter() +router.include_router(asr_router) +router.include_router(tts_router) diff --git a/speechserving/speechserving/restful/asr_api.py b/speechserving/speechserving/restful/asr_api.py new file mode 100644 index 00000000..eb335e09 --- /dev/null +++ b/speechserving/speechserving/restful/asr_api.py @@ -0,0 +1,63 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from fastapi import APIRouter +import base64 + + +from engine.asr.python.asr_engine import ASREngine +from .response import ASRResponse +from .request import ASRRequest + +router = APIRouter() + + +@router.get('/paddlespeech/asr/help') +def help(): + """help + + Returns: + json: [description] + """ + return {'hello': 'world'} + + +@router.post("/paddlespeech/asr", response_model=ASRResponse) +def asr(request_body: ASRRequest): + """asr api + + Args: + request_body (ASRRequest): [description] + + Returns: + json: [description] + """ + # single + asr_engine = ASREngine() + + asr_engine.init() + asr_results = asr_engine.run() + asr_engine.postprocess() + + json_body = { + "success": True, + "code": 0, + "message": { + "description": "success" + }, + "result": { + "transcription": asr_results + } + } + + return json_body diff --git a/speechserving/speechserving/restful/request.py b/speechserving/speechserving/restful/request.py index 4721decd..df47038f 100644 --- a/speechserving/speechserving/restful/request.py +++ b/speechserving/speechserving/restful/request.py @@ -11,14 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional from typing import List +from typing import Optional from pydantic import BaseModel - __all__ = ['ASRRequest, TTSRequest'] + #****************************************************************************************/ #************************************ ASR request ***************************************/ #****************************************************************************************/ @@ -29,8 +29,8 @@ class ASRRequest(BaseModel): "audio": "exSI6ICJlbiIsCgkgICAgInBvc2l0aW9uIjogImZhbHNlIgoJf...", "audio_format": "wav", "sample_rate": 16000, - "lang ": "zh_cn", - "ptt ":false + "lang": "zh_cn", + "ptt":false } """ audio: str @@ -53,4 +53,4 @@ class TTSRequest(BaseModel): "lang ": "zh_cn", "ptt ":false } - """ \ No newline at end of file + """ diff --git a/speechserving/speechserving/restful/response.py b/speechserving/speechserving/restful/response.py index fdb07a84..de61c99c 100644 --- a/speechserving/speechserving/restful/response.py +++ b/speechserving/speechserving/restful/response.py @@ -11,8 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional from typing import List +from typing import Optional from pydantic import BaseModel @@ -22,12 +22,14 @@ __all__ = ['ASRResponse'] class Message(BaseModel): description: str + #****************************************************************************************/ #************************************ ASR response **************************************/ #****************************************************************************************/ class AsrResult(BaseModel): transcription: str + class ASRResponse(BaseModel): """ response example @@ -36,7 +38,7 @@ class ASRResponse(BaseModel): "code": 0, "message": { "description": "success" - } + }, "result": { "transcription": "你好,飞桨" } @@ -47,6 +49,7 @@ class ASRResponse(BaseModel): message: Message result: AsrResult + #****************************************************************************************/ #************************************ TTS response **************************************/ #****************************************************************************************/ diff --git a/speechserving/speechserving/controller/api.py b/speechserving/speechserving/restful/tts_api.py similarity index 58% rename from speechserving/speechserving/controller/api.py rename to speechserving/speechserving/restful/tts_api.py index 188dba30..05121718 100644 --- a/speechserving/speechserving/controller/api.py +++ b/speechserving/speechserving/restful/tts_api.py @@ -13,38 +13,17 @@ # limitations under the License. from fastapi import APIRouter -router = APIRouter() - - -router.include_router(auth_router) -router.include_router(user_router) -router.include_router(profile_router) -router.include_router(comment_router) -router.include_router(article_router) -router.include_router(tag_router) - - - - -def init_app(app): - - asr,tts - +router = APIRouter() - - - if asr - backend - dyload(asr) - asr.register_router(router) - if tts - backend - dyload(asr) +@router.get('/paddlespeech/tts/help') +def help(): + """help + Returns: + json: [description] + """ + return {'hello': 'world'} - asr.register_router(router) - app.include_router(router) - diff --git a/speechserving/speechserving/utils/__init__.py b/speechserving/speechserving/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/speechserving/speechserving/utils/errors.py b/speechserving/speechserving/utils/errors.py new file mode 100644 index 00000000..e69de29b diff --git a/speechserving/speechserving/utils/log.py b/speechserving/speechserving/utils/log.py new file mode 100644 index 00000000..8644064c --- /dev/null +++ b/speechserving/speechserving/utils/log.py @@ -0,0 +1,59 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import functools +import logging + +__all__ = [ + 'logger', +] + + +class Logger(object): + def __init__(self, name: str=None): + name = 'PaddleSpeech' if not name else name + self.logger = logging.getLogger(name) + + log_config = { + 'DEBUG': 10, + 'INFO': 20, + 'TRAIN': 21, + 'EVAL': 22, + 'WARNING': 30, + 'ERROR': 40, + 'CRITICAL': 50, + 'EXCEPTION': 100, + } + for key, level in log_config.items(): + logging.addLevelName(level, key) + if key == 'EXCEPTION': + self.__dict__[key.lower()] = self.logger.exception + else: + self.__dict__[key.lower()] = functools.partial(self.__call__, + level) + + self.format = logging.Formatter( + fmt='[%(asctime)-15s] [%(levelname)8s] - %(message)s') + + self.handler = logging.StreamHandler() + self.handler.setFormatter(self.format) + + self.logger.addHandler(self.handler) + self.logger.setLevel(logging.DEBUG) + self.logger.propagate = False + + def __call__(self, log_level: str, msg: str): + self.logger.log(log_level, msg) + + +logger = Logger() diff --git a/speechserving/tests/http_client.py b/speechserving/tests/http_client.py new file mode 100644 index 00000000..3787d764 --- /dev/null +++ b/speechserving/tests/http_client.py @@ -0,0 +1,66 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the +import requests +import json +import time +import base64 + +import argparse + +def readwav2base64(wav_file): + """ + read wave file and covert to base64 string + """ + with open(wav_file, 'rb') as f: + base64_bytes = base64.b64encode(f.read()) + base64_string = base64_bytes.decode('utf-8') + return base64_string + + +def main(args): + """ + main func + """ + url = "http://127.0.0.1:8090/paddlespeech/asr" + + # start Timestamp + time_start=time.time() + + # test_audio_dir = "test_data/16_audio.wav" + # audio = readwav2base64(test_audio_dir) + + data = { + "audio": "exSI6ICJlbiIsCgkgICAgInBvc2l0aW9uIjogImZhbHNlIgoJf", + "audio_format": "wav", + "sample_rate": 16000, + "lang": "zh_cn", + } + + r = requests.post(url=url, data=json.dumps(data)) + + # ending Timestamp + time_end=time.time() + print('time cost',time_end - time_start, 's') + + print(r.json()) + + + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--model_type", action="store", + help="model type: u2, dp2", default="dp2") + args = parser.parse_args() + + main(args) From 777a0262775b2a6d7c1914746838b294c43ab38d Mon Sep 17 00:00:00 2001 From: lym0302 Date: Tue, 25 Jan 2022 20:32:36 +0800 Subject: [PATCH 2/4] add tts server, test=tts --- speechserving/speechserving/conf/tts/tts.yaml | 38 +++++ .../engine/tts/python/tts_engine.py | 143 ++++++++++++++++++ speechserving/speechserving/main.py | 42 +++-- speechserving/speechserving/restful/api.py | 20 +-- .../speechserving/restful/request.py | 30 ++-- .../speechserving/restful/response.py | 42 ++++- 6 files changed, 279 insertions(+), 36 deletions(-) create mode 100644 speechserving/speechserving/conf/tts/tts.yaml create mode 100644 speechserving/speechserving/engine/tts/python/tts_engine.py diff --git a/speechserving/speechserving/conf/tts/tts.yaml b/speechserving/speechserving/conf/tts/tts.yaml new file mode 100644 index 00000000..8e08d51c --- /dev/null +++ b/speechserving/speechserving/conf/tts/tts.yaml @@ -0,0 +1,38 @@ +# This is the parameter configuration file for TTS server. + +################################################################## +# TTS SERVER SETTING # +################################################################## +host: '0.0.0.0' +port: 8692 + +################################################################## +# ACOUSTIC MODEL SETTING # +# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', +# 'fastspeech2_ljspeech', 'fastspeech2_aishell3', +# 'fastspeech2_vctk'] +################################################################## +am: 'fastspeech2_csmsc' +am_config: +am_ckpt: +am_stat: +phones_dict: +tones_dict: +speaker_dict: +spk_id: 0 + +################################################################## +# VOCODER SETTING # +# voc choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', +# 'pwgan_vctk', 'mb_melgan_csmsc'] +################################################################## +voc: 'pwgan_csmsc' +voc_config: +voc_ckpt: +voc_stat: + +################################################################## +# OTHERS # +################################################################## +lang: 'zh' +device: paddle.get_device() \ No newline at end of file diff --git a/speechserving/speechserving/engine/tts/python/tts_engine.py b/speechserving/speechserving/engine/tts/python/tts_engine.py new file mode 100644 index 00000000..d790aa31 --- /dev/null +++ b/speechserving/speechserving/engine/tts/python/tts_engine.py @@ -0,0 +1,143 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import base64 + +import librosa +import numpy as np +import soundfile as sf +import yaml +from engine.base_engine import BaseEngine + +from paddlespeech.cli.log import logger +from paddlespeech.cli.tts.infer import TTSExecutor + +__all__ = ['TTSEngine'] + + +class TTSServerExecutor(TTSExecutor): + def __init__(self): + super().__init__() + + self.parser = argparse.ArgumentParser( + prog='paddlespeech.tts', add_help=True) + self.parser.add_argument( + '--conf', + type=str, + default='./conf/tts/tts.yaml', + help='Configuration parameters.') + + +class TTSEngine(BaseEngine): + """TTS server engine + + Args: + metaclass: Defaults to Singleton. + """ + + def __init__(self, name=None): + """Initialize TTS server engine + """ + super(TTSEngine, self).__init__() + self.executor = TTSServerExecutor() + + config_path = self.executor.parser.parse_args().conf + with open(config_path, 'rt') as f: + self.conf_dict = yaml.safe_load(f) + + self.executor._init_from_path( + am=self.conf_dict["am"], + am_config=self.conf_dict["am_config"], + am_ckpt=self.conf_dict["am_ckpt"], + am_stat=self.conf_dict["am_stat"], + phones_dict=self.conf_dict["phones_dict"], + tones_dict=self.conf_dict["tones_dict"], + speaker_dict=self.conf_dict["speaker_dict"], + voc=self.conf_dict["voc"], + voc_config=self.conf_dict["voc_config"], + voc_ckpt=self.conf_dict["voc_ckpt"], + voc_stat=self.conf_dict["voc_stat"], + lang=self.conf_dict["lang"]) + + logger.info("Initialize TTS server engine successfully.") + + def postprocess(self, + wav, + original_fs: int, + target_fs: int=16000, + volume: float=1.0, + speed: float=1.0, + audio_path: str=None, + audio_format: str="wav"): + """Post-processing operations, including speech, volume, sample rate, save audio file + + Args: + wav (numpy(float)): Synthesized audio sample points + original_fs (int): original audio sample rate + target_fs (int): target audio sample rate + volume (float): target volume + speed (float): target speed + """ + + # transform sample_rate + if target_fs == 0 or target_fs > original_fs: + target_fs = original_fs + wav_tar_fs = wav + else: + wav_tar_fs = librosa.resample( + np.squeeze(wav), original_fs, target_fs) + + # transform volume + wav_vol = wav_tar_fs * volume + + # transform speed + # TODO + target_wav = wav_vol.reshape(-1, 1) + + # save audio + if audio_path is not None: + sf.write(audio_path, target_wav, target_fs) + logger.info('Wave file has been generated: {}'.format(audio_path)) + + # wav to base64 + base64_bytes = base64.b64encode(target_wav) + base64_string = base64_bytes.decode('utf-8') + wav_base64 = base64_string + + return target_fs, wav_base64 + + def run(self, + sentence: str, + spk_id: int=0, + speed: float=1.0, + volume: float=1.0, + sample_rate: int=0, + save_path: str=None, + audio_format: str="wav"): + + lang = self.conf_dict["lang"] + + self.executor.infer( + text=sentence, lang=lang, am=self.conf_dict["am"], spk_id=spk_id) + + target_sample_rate, wav_base64 = self.postprocess( + wav=self.executor._outputs['wav'].numpy(), + original_fs=self.executor.am_config.fs, + target_fs=sample_rate, + volume=volume, + speed=speed, + audio_path=save_path, + audio_format=audio_format) + + return lang, target_sample_rate, wav_base64 diff --git a/speechserving/speechserving/main.py b/speechserving/speechserving/main.py index 91046984..864c543d 100644 --- a/speechserving/speechserving/main.py +++ b/speechserving/speechserving/main.py @@ -13,31 +13,55 @@ # limitations under the License. import argparse -import asr_api as api_run -import tts_api as api_run +import uvicorn +import yaml +from engine.tts.python.tts_engine import TTSEngine +from fastapi import FastAPI +from restful.api import router as api_router +from paddlespeech.cli.log import logger + +app = FastAPI( + title="PaddleSpeech Serving API", description="Api", version="0.0.1") def init(args): """ 系统初始化 """ + app.include_router(api_router) + + # engine single + TTS_ENGINE = TTSEngine() + + # todo others + + return True def main(args): """主程序入口""" - if init(args): - api_run.run() - app.run(host='0.0.0.0', port=conf.port) + #TODO configuration + from yacs.config import CfgNode + with open(args.config_file, 'rt') as f: + config = CfgNode(yaml.safe_load(f)) + if init(args): + uvicorn.run(app, host=config.host, port=config.port, debug=True) if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--config_file", action="store", - help="yaml file of the app", default="./conf/application.yaml") - parser.add_argument("--log_file", action="store", - help="log file", default="./log/paddlespeech.log") + parser.add_argument( + "--config_file", + action="store", + help="yaml file of the app", + default="./server.yaml") + parser.add_argument( + "--log_file", + action="store", + help="log file", + default="./log/paddlespeech.log") args = parser.parse_args() main(args) diff --git a/speechserving/speechserving/restful/api.py b/speechserving/speechserving/restful/api.py index 6324fac2..c6250ce7 100644 --- a/speechserving/speechserving/restful/api.py +++ b/speechserving/speechserving/restful/api.py @@ -13,19 +13,9 @@ # limitations under the License. from fastapi import APIRouter -router = APIRouter() - - -router.include_router(auth_router) -router.include_router(user_router) -router.include_router(profile_router) -router.include_router(comment_router) -router.include_router(article_router) -router.include_router(tag_router) - +from .tts_api import router as tts_router +#from .asr_api import router as asr_router - - -def init_app(app): - - app.include_router(router) +router = APIRouter() +#router.include_router(asr_router) +router.include_router(tts_router) diff --git a/speechserving/speechserving/restful/request.py b/speechserving/speechserving/restful/request.py index 4721decd..f1fa4bcb 100644 --- a/speechserving/speechserving/restful/request.py +++ b/speechserving/speechserving/restful/request.py @@ -11,13 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional from typing import List +from typing import Optional from pydantic import BaseModel +__all__ = ['ASRRequest', 'TTSRequest'] -__all__ = ['ASRRequest, TTSRequest'] #****************************************************************************************/ #************************************ ASR request ***************************************/ @@ -44,13 +44,25 @@ class ASRRequest(BaseModel): #************************************ TTS request ***************************************/ #****************************************************************************************/ class TTSRequest(BaseModel): - """ + """TTS request + request body example { - "audio": "exSI6ICJlbiIsCgkgICAgInBvc2l0aW9uIjogImZhbHNlIgoJf...", - "audio_format": "wav", - "sample_rate": 16000, - "lang ": "zh_cn", - "ptt ":false + "text": "你好,欢迎使用百度飞桨语音合成服务。", + "spk_id": 0, + "speed": 1.0, + "volume": 1.0, + "sample_rate": 0, + "tts_audio_path": "./tts.wav", + "audio_format": "wav" } - """ \ No newline at end of file + + """ + + text: str + spk_id: int = 0 + speed: float = 1.0 + volume: float = 1.0 + sample_rate: int = 0 + save_path: str = None + audio_format: str = "wav" diff --git a/speechserving/speechserving/restful/response.py b/speechserving/speechserving/restful/response.py index fdb07a84..684a37f9 100644 --- a/speechserving/speechserving/restful/response.py +++ b/speechserving/speechserving/restful/response.py @@ -11,23 +11,25 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional from typing import List +from typing import Optional from pydantic import BaseModel -__all__ = ['ASRResponse'] +__all__ = ['ASRResponse', 'TTSResponse'] class Message(BaseModel): description: str + #****************************************************************************************/ #************************************ ASR response **************************************/ #****************************************************************************************/ class AsrResult(BaseModel): transcription: str + class ASRResponse(BaseModel): """ response example @@ -36,7 +38,7 @@ class ASRResponse(BaseModel): "code": 0, "message": { "description": "success" - } + }, "result": { "transcription": "你好,飞桨" } @@ -47,6 +49,40 @@ class ASRResponse(BaseModel): message: Message result: AsrResult + #****************************************************************************************/ #************************************ TTS response **************************************/ #****************************************************************************************/ +class TTSResult(BaseModel): + lang: str = "zh" + sample_rate: int + spk_id: int = 0 + speed: float = 1.0 + volume: float = 1.0 + save_path: str = None + audio: str + + +class TTSResponse(BaseModel): + """ + response example + { + "success": true, + "code": 0, + "message": { + "description": "success" + }, + "result": { + "lang": "zh", + "sample_rate": 24000, + "speed": 1.0, + "volume": 1.0, + "audio": "LTI1OTIuNjI1OTUwMzQsOTk2OS41NDk4...", + "save_path": "./tts.wav" + } + } + """ + success: bool + code: int + message: Message + result: TTSResult From eb7e964e8276762b04f3bcc49edfbc3d6fc9f95c Mon Sep 17 00:00:00 2001 From: lym0302 Date: Wed, 26 Jan 2022 14:16:14 +0800 Subject: [PATCH 3/4] add tts api, test=tts --- .../speechserving/restful/tts_api.py | 63 ++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/speechserving/speechserving/restful/tts_api.py b/speechserving/speechserving/restful/tts_api.py index 05121718..c78eaf63 100644 --- a/speechserving/speechserving/restful/tts_api.py +++ b/speechserving/speechserving/restful/tts_api.py @@ -11,8 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from engine.tts.python.tts_engine import TTSEngine from fastapi import APIRouter +from .request import TTSRequest +from .response import TTSResponse router = APIRouter() @@ -24,6 +27,64 @@ def help(): Returns: json: [description] """ - return {'hello': 'world'} + json_body = { + "success": true, + "code": 0, + "message": { + "global": "success" + }, + "result": { + "description": "tts server", + "text": "sentence to be synthesized", + "audio": "the base64 of audio" + } + } + return json_body +@router.post("/paddlespeech/tts", response_model=TTSResponse) +def tts(request_body: TTSRequest): + """tts api + + Args: + request_body (TTSRequest): [description] + + Returns: + json: [description] + """ + # json to dict + item_dict = request_body.dict() + sentence = item_dict['text'] + spk_id = item_dict['spk_id'] + speed = item_dict['speed'] + volume = item_dict['volume'] + sample_rate = item_dict['sample_rate'] + save_path = item_dict['save_path'] + audio_format = item_dict['audio_format'] + + # single + tts_engine = TTSEngine() + + #tts_engine.init() + lang, target_sample_rate, wav_base64 = tts_engine.run( + sentence, spk_id, speed, volume, sample_rate, save_path, audio_format) + #tts_engine.postprocess() + + json_body = { + "success": True, + "code": 0, + "message": { + "description": "success" + }, + "result": { + "lang": lang, + "spk_id": spk_id, + "speed": speed, + "volume": volume, + "sample_rate": target_sample_rate, + "save_path": save_path, + "audio": wav_base64 + } + } + + return json_body From 1ee4577d3ecadf9494416e88dbdce32b7fb2dda5 Mon Sep 17 00:00:00 2001 From: liangym <34430015+lym0302@users.noreply.github.com> Date: Wed, 26 Jan 2022 17:28:06 +0800 Subject: [PATCH 4/4] add error code, test=server (#1391) --- .../engine/tts/python/tts_engine.py | 35 ++++++++---- .../speechserving/restful/response.py | 2 +- .../speechserving/restful/tts_api.py | 32 ++++++++--- speechserving/speechserving/utils/errors.py | 57 +++++++++++++++++++ .../speechserving/utils/exception.py | 30 ++++++++++ 5 files changed, 137 insertions(+), 19 deletions(-) create mode 100644 speechserving/speechserving/utils/exception.py diff --git a/speechserving/speechserving/engine/tts/python/tts_engine.py b/speechserving/speechserving/engine/tts/python/tts_engine.py index d790aa31..4f0e9906 100644 --- a/speechserving/speechserving/engine/tts/python/tts_engine.py +++ b/speechserving/speechserving/engine/tts/python/tts_engine.py @@ -22,6 +22,8 @@ from engine.base_engine import BaseEngine from paddlespeech.cli.log import logger from paddlespeech.cli.tts.infer import TTSExecutor +from utils.errors import ErrorCode +from utils.exception import ServerBaseException __all__ = ['TTSEngine'] @@ -128,16 +130,27 @@ class TTSEngine(BaseEngine): lang = self.conf_dict["lang"] - self.executor.infer( - text=sentence, lang=lang, am=self.conf_dict["am"], spk_id=spk_id) - - target_sample_rate, wav_base64 = self.postprocess( - wav=self.executor._outputs['wav'].numpy(), - original_fs=self.executor.am_config.fs, - target_fs=sample_rate, - volume=volume, - speed=speed, - audio_path=save_path, - audio_format=audio_format) + try: + self.executor.infer( + text=sentence, + lang=lang, + am=self.conf_dict["am"], + spk_id=spk_id) + except: + raise ServerBaseException(ErrorCode.SERVER_INTERNAL_ERR, + "tts infer failed.") + + try: + target_sample_rate, wav_base64 = self.postprocess( + wav=self.executor._outputs['wav'].numpy(), + original_fs=self.executor.am_config.fs, + target_fs=sample_rate, + volume=volume, + speed=speed, + audio_path=save_path, + audio_format=audio_format) + except: + raise ServerBaseException(ErrorCode.SERVER_INTERNAL_ERR, + "tts postprocess failed.") return lang, target_sample_rate, wav_base64 diff --git a/speechserving/speechserving/restful/response.py b/speechserving/speechserving/restful/response.py index 684a37f9..db24f531 100644 --- a/speechserving/speechserving/restful/response.py +++ b/speechserving/speechserving/restful/response.py @@ -68,7 +68,7 @@ class TTSResponse(BaseModel): response example { "success": true, - "code": 0, + "code": 200, "message": { "description": "success" }, diff --git a/speechserving/speechserving/restful/tts_api.py b/speechserving/speechserving/restful/tts_api.py index c78eaf63..69930f24 100644 --- a/speechserving/speechserving/restful/tts_api.py +++ b/speechserving/speechserving/restful/tts_api.py @@ -11,11 +11,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import traceback + from engine.tts.python.tts_engine import TTSEngine from fastapi import APIRouter from .request import TTSRequest from .response import TTSResponse +from utils.errors import ErrorCode +from utils.errors import ErrorMsg +from utils.errors import failed_response +from utils.exception import ServerBaseException router = APIRouter() @@ -28,7 +34,7 @@ def help(): json: [description] """ json_body = { - "success": true, + "success": "True", "code": 0, "message": { "global": "success" @@ -62,19 +68,31 @@ def tts(request_body: TTSRequest): save_path = item_dict['save_path'] audio_format = item_dict['audio_format'] + # Check parameters + if speed <=0 or speed > 3 or volume <=0 or volume > 3 or \ + sample_rate not in [0, 16000, 8000] or \ + audio_format not in ["pcm", "wav"]: + return failed_response(ErrorCode.SERVER_PARAM_ERR) + # single tts_engine = TTSEngine() - #tts_engine.init() - lang, target_sample_rate, wav_base64 = tts_engine.run( - sentence, spk_id, speed, volume, sample_rate, save_path, audio_format) - #tts_engine.postprocess() + # run + try: + lang, target_sample_rate, wav_base64 = tts_engine.run( + sentence, spk_id, speed, volume, sample_rate, save_path, + audio_format) + except ServerBaseException as e: + response = failed_response(e.error_code, e.msg) + except: + response = failed_response(ErrorCode.SERVER_UNKOWN_ERR) + traceback.print_exc() json_body = { "success": True, - "code": 0, + "code": 200, "message": { - "description": "success" + "description": "success." }, "result": { "lang": lang, diff --git a/speechserving/speechserving/utils/errors.py b/speechserving/speechserving/utils/errors.py index e69de29b..aa858cb0 100644 --- a/speechserving/speechserving/utils/errors.py +++ b/speechserving/speechserving/utils/errors.py @@ -0,0 +1,57 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +from enum import IntEnum + +from fastapi import Response + + +class ErrorCode(IntEnum): + SERVER_OK = 200 # success. + + SERVER_PARAM_ERR = 400 # Input parameters are not valid. + SERVER_TASK_NOT_EXIST = 404 # Task is not exist. + + SERVER_INTERNAL_ERR = 500 # Internal error. + SERVER_NETWORK_ERR = 502 # Network exception. + SERVER_UNKOWN_ERR = 509 # Unknown error occurred. + + +ErrorMsg = { + ErrorCode.SERVER_OK: "success.", + ErrorCode.SERVER_PARAM_ERR: "Input parameters are not valid.", + ErrorCode.SERVER_TASK_NOT_EXIST: "Task is not exist.", + ErrorCode.SERVER_INTERNAL_ERR: "Internal error.", + ErrorCode.SERVER_NETWORK_ERR: "Network exception.", + ErrorCode.SERVER_UNKOWN_ERR: "Unknown error occurred." +} + + +def failed_response(code, msg=""): + """Interface call failure response + + Args: + code (int): error code number + msg (str, optional): Interface call failure information. Defaults to "". + + Returns: + Response (json): failure json information. + """ + + if not msg: + msg = ErrorMsg.get(code, "Unknown error occurred.") + + res = {"success": False, "code": int(code), "message": {"global": msg}} + + return Response(content=json.dumps(res), media_type="application/json") diff --git a/speechserving/speechserving/utils/exception.py b/speechserving/speechserving/utils/exception.py new file mode 100644 index 00000000..03a6deee --- /dev/null +++ b/speechserving/speechserving/utils/exception.py @@ -0,0 +1,30 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import traceback + +from utils.errors import ErrorMsg + + +class ServerBaseException(Exception): + """ Server Base exception + """ + + def __init__(self, error_code, msg=None): + #if msg: + #log.error(msg) + msg = msg if msg else ErrorMsg.get(error_code, "") + super(ServerBaseException, self).__init__(error_code, msg) + self.error_code = error_code + self.msg = msg + traceback.print_exc()