parent
d94ab22e92
commit
3535079434
@ -0,0 +1,69 @@
|
|||||||
|
([简体中文](./README_cn.md)|English)
|
||||||
|
# ACS (Audio Content Search)
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
ACS, or Audio Content Search, refers to the problem of getting the key word time stamp to from automatically transcribe spoken language (speech-to-text).
|
||||||
|
|
||||||
|
This demo is an implementation to get the key word stamp from the text from a specific audio file. It can be done by a single command or a few lines in python using `PaddleSpeech`.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
### 1. Installation
|
||||||
|
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).
|
||||||
|
|
||||||
|
You can choose one way from meduim and hard to install paddlespeech.
|
||||||
|
|
||||||
|
### 2. Prepare Input File
|
||||||
|
The input of this demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.
|
||||||
|
|
||||||
|
Here are sample files for this demo that can be downloaded:
|
||||||
|
```bash
|
||||||
|
wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Usage
|
||||||
|
- Command Line(Recommended)
|
||||||
|
```bash
|
||||||
|
# Chinese
|
||||||
|
paddlespeech_client acs --server_ip 127.0.0.1 --port 8090 --input ./zh.wav
|
||||||
|
```
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
```bash
|
||||||
|
paddlespeech asr --help
|
||||||
|
```
|
||||||
|
Arguments:
|
||||||
|
- `input`(required): Audio file to recognize.
|
||||||
|
- `server_ip`: the server ip.
|
||||||
|
- `port`: the server port.
|
||||||
|
- `lang`: the language type of the model. Default: `zh`.
|
||||||
|
- `sample_rate`: Sample rate of the model. Default: `16000`.
|
||||||
|
- `audio_format`: The audio format.
|
||||||
|
|
||||||
|
Output:
|
||||||
|
```bash
|
||||||
|
[2022-05-15 15:00:58,185] [ INFO] - acs http client start
|
||||||
|
[2022-05-15 15:00:58,185] [ INFO] - endpoint: http://127.0.0.1:8490/paddlespeech/asr/search
|
||||||
|
[2022-05-15 15:01:03,220] [ INFO] - acs http client finished
|
||||||
|
[2022-05-15 15:01:03,221] [ INFO] - ACS result: {'transcription': '我认为跑步最重要的就是给我带来了身体健康', 'acs': [{'w': '我', 'bg': 0, 'ed': 1.6800000000000002}, {'w': '我', 'bg': 2.1, 'ed': 4.28}, {'w': '康', 'bg': 3.2, 'ed': 4.92}]}
|
||||||
|
[2022-05-15 15:01:03,221] [ INFO] - Response time 5.036084 s.
|
||||||
|
```
|
||||||
|
|
||||||
|
- Python API
|
||||||
|
```python
|
||||||
|
from paddlespeech.server.bin.paddlespeech_client import ACSClientExecutor
|
||||||
|
|
||||||
|
acs_executor = ACSClientExecutor()
|
||||||
|
res = acs_executor(
|
||||||
|
input='./zh.wav',
|
||||||
|
server_ip="127.0.0.1",
|
||||||
|
port=8490,)
|
||||||
|
print(res)
|
||||||
|
```
|
||||||
|
|
||||||
|
Output:
|
||||||
|
```bash
|
||||||
|
[2022-05-15 15:08:13,955] [ INFO] - acs http client start
|
||||||
|
[2022-05-15 15:08:13,956] [ INFO] - endpoint: http://127.0.0.1:8490/paddlespeech/asr/search
|
||||||
|
[2022-05-15 15:08:19,026] [ INFO] - acs http client finished
|
||||||
|
{'transcription': '我认为跑步最重要的就是给我带来了身体健康', 'acs': [{'w': '我', 'bg': 0, 'ed': 1.6800000000000002}, {'w': '我', 'bg': 2.1, 'ed': 4.28}, {'w': '康', 'bg': 3.2, 'ed': 4.92}]}
|
||||||
|
```
|
@ -0,0 +1,6 @@
|
|||||||
|
export CUDA_VISIBLE_DEVICE=0,1,2,3
|
||||||
|
#nohup python3 streaming_asr_server.py --config_file conf/ws_conformer_application.yaml &> streaming_asr.log &
|
||||||
|
|
||||||
|
# nohup python3 punc_server.py --config_file conf/punc_application.yaml > punc.log 2>&1 &
|
||||||
|
paddlespeech_server start --config_file conf/acs_application.yaml
|
||||||
|
|
@ -0,0 +1,101 @@
|
|||||||
|
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
import base64
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
from fastapi import APIRouter
|
||||||
|
|
||||||
|
from paddlespeech.cli.log import logger
|
||||||
|
from paddlespeech.server.engine.engine_pool import get_engine_pool
|
||||||
|
from paddlespeech.server.restful.request import ASRRequest
|
||||||
|
from paddlespeech.server.restful.response import ACSResponse
|
||||||
|
from paddlespeech.server.restful.response import ErrorResponse
|
||||||
|
from paddlespeech.server.utils.errors import ErrorCode
|
||||||
|
from paddlespeech.server.utils.errors import failed_response
|
||||||
|
from paddlespeech.server.utils.exception import ServerBaseException
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
@router.get('/paddlespeech/asr/search/help')
|
||||||
|
def help():
|
||||||
|
"""help
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
json: the audio content search result
|
||||||
|
"""
|
||||||
|
response = {
|
||||||
|
"success": "True",
|
||||||
|
"code": 200,
|
||||||
|
"message": {
|
||||||
|
"global": "success"
|
||||||
|
},
|
||||||
|
"result": {
|
||||||
|
"description": "acs server",
|
||||||
|
"input": "base64 string of wavfile",
|
||||||
|
"output": {
|
||||||
|
"asr_result": "你好",
|
||||||
|
"acs_result": [{
|
||||||
|
'w': '你',
|
||||||
|
'bg': 0.0,
|
||||||
|
'ed': 1.2
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/paddlespeech/asr/search",
|
||||||
|
response_model=Union[ACSResponse, ErrorResponse])
|
||||||
|
def acs(request_body: ASRRequest):
|
||||||
|
"""acs api
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request_body (ASRRequest): the acs request, we reuse the http ASRRequest
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
json: the acs result
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 1. get the audio data via base64 decoding
|
||||||
|
audio_data = base64.b64decode(request_body.audio)
|
||||||
|
|
||||||
|
# 2. get single engine from engine pool
|
||||||
|
engine_pool = get_engine_pool()
|
||||||
|
acs_engine = engine_pool['acs']
|
||||||
|
|
||||||
|
# 3. no data stored in acs_engine, so we need to create the another instance process the data
|
||||||
|
acs_result, asr_result = acs_engine.run(audio_data)
|
||||||
|
|
||||||
|
response = {
|
||||||
|
"success": True,
|
||||||
|
"code": 200,
|
||||||
|
"message": {
|
||||||
|
"description": "success"
|
||||||
|
},
|
||||||
|
"result": {
|
||||||
|
"transcription": asr_result,
|
||||||
|
"acs": acs_result
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
except ServerBaseException as e:
|
||||||
|
response = failed_response(e.error_code, e.msg)
|
||||||
|
except BaseException as e:
|
||||||
|
response = failed_response(ErrorCode.SERVER_UNKOWN_ERR)
|
||||||
|
logger.error(e)
|
||||||
|
|
||||||
|
return response
|
Loading…
Reference in new issue