Merge branch 'develop' of https://github.com/PaddlePaddle/PaddleSpeech into new_api
commit
ac680aa783
@ -1,3 +1,3 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
paddlespeech_server start --config_file ./conf/application.yaml
|
paddlespeech_server start --config_file ./conf/application.yaml &> server.log &
|
||||||
|
@ -0,0 +1,10 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
|
||||||
|
wget -c https://paddlespeech.bj.bcebos.com/vector/audio/123456789.wav
|
||||||
|
|
||||||
|
# sid extract
|
||||||
|
paddlespeech_client vector --server_ip 127.0.0.1 --port 8090 --task spk --input ./85236145389.wav
|
||||||
|
|
||||||
|
# sid score
|
||||||
|
paddlespeech_client vector --server_ip 127.0.0.1 --port 8090 --task score --enroll ./85236145389.wav --test ./123456789.wav
|
@ -0,0 +1,4 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
|
||||||
|
paddlespeech_client text --server_ip 127.0.0.1 --port 8090 --input 今天的天气真好啊你下午有空吗我想约你一起去吃饭
|
@ -1,9 +1,8 @@
|
|||||||
export CUDA_VISIBLE_DEVICE=0,1,2,3
|
#export CUDA_VISIBLE_DEVICE=0,1,2,3
|
||||||
export CUDA_VISIBLE_DEVICE=0,1,2,3
|
|
||||||
|
|
||||||
# nohup python3 punc_server.py --config_file conf/punc_application.yaml > punc.log 2>&1 &
|
# nohup python3 local/punc_server.py --config_file conf/punc_application.yaml > punc.log 2>&1 &
|
||||||
paddlespeech_server start --config_file conf/punc_application.yaml &> punc.log &
|
paddlespeech_server start --config_file conf/punc_application.yaml &> punc.log &
|
||||||
|
|
||||||
# nohup python3 streaming_asr_server.py --config_file conf/ws_conformer_wenetspeech_application.yaml > streaming_asr.log 2>&1 &
|
# nohup python3 local/streaming_asr_server.py --config_file conf/ws_conformer_wenetspeech_application.yaml > streaming_asr.log 2>&1 &
|
||||||
paddlespeech_server start --config_file conf/ws_conformer_wenetspeech_application.yaml &> streaming_asr.log &
|
paddlespeech_server start --config_file conf/ws_conformer_wenetspeech_application.yaml &> streaming_asr.log &
|
||||||
|
|
||||||
|
@ -0,0 +1,103 @@
|
|||||||
|
# This is the parameter configuration file for streaming tts server.
|
||||||
|
|
||||||
|
#################################################################################
|
||||||
|
# SERVER SETTING #
|
||||||
|
#################################################################################
|
||||||
|
host: 0.0.0.0
|
||||||
|
port: 8192
|
||||||
|
|
||||||
|
# The task format in the engin_list is: <speech task>_<engine type>
|
||||||
|
# engine_list choices = ['tts_online', 'tts_online-onnx'], the inference speed of tts_online-onnx is faster than tts_online.
|
||||||
|
# protocol choices = ['websocket', 'http']
|
||||||
|
protocol: 'websocket'
|
||||||
|
engine_list: ['tts_online-onnx']
|
||||||
|
|
||||||
|
|
||||||
|
#################################################################################
|
||||||
|
# ENGINE CONFIG #
|
||||||
|
#################################################################################
|
||||||
|
|
||||||
|
################################### TTS #########################################
|
||||||
|
################### speech task: tts; engine_type: online #######################
|
||||||
|
tts_online:
|
||||||
|
# am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']
|
||||||
|
# fastspeech2_cnndecoder_csmsc support streaming am infer.
|
||||||
|
am: 'fastspeech2_csmsc'
|
||||||
|
am_config:
|
||||||
|
am_ckpt:
|
||||||
|
am_stat:
|
||||||
|
phones_dict:
|
||||||
|
tones_dict:
|
||||||
|
speaker_dict:
|
||||||
|
spk_id: 0
|
||||||
|
|
||||||
|
# voc (vocoder) choices=['mb_melgan_csmsc, hifigan_csmsc']
|
||||||
|
# Both mb_melgan_csmsc and hifigan_csmsc support streaming voc inference
|
||||||
|
voc: 'mb_melgan_csmsc'
|
||||||
|
voc_config:
|
||||||
|
voc_ckpt:
|
||||||
|
voc_stat:
|
||||||
|
|
||||||
|
# others
|
||||||
|
lang: 'zh'
|
||||||
|
device: 'cpu' # set 'gpu:id' or 'cpu'
|
||||||
|
# am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
|
||||||
|
# when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
|
||||||
|
am_block: 72
|
||||||
|
am_pad: 12
|
||||||
|
# voc_pad and voc_block voc model to streaming voc infer,
|
||||||
|
# when voc model is mb_melgan_csmsc, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
|
||||||
|
# when voc model is hifigan_csmsc, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
|
||||||
|
voc_block: 36
|
||||||
|
voc_pad: 14
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#################################################################################
|
||||||
|
# ENGINE CONFIG #
|
||||||
|
#################################################################################
|
||||||
|
|
||||||
|
################################### TTS #########################################
|
||||||
|
################### speech task: tts; engine_type: online-onnx #######################
|
||||||
|
tts_online-onnx:
|
||||||
|
# am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']
|
||||||
|
# fastspeech2_cnndecoder_csmsc_onnx support streaming am infer.
|
||||||
|
am: 'fastspeech2_cnndecoder_csmsc_onnx'
|
||||||
|
# am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
|
||||||
|
# if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
|
||||||
|
am_ckpt: # list
|
||||||
|
am_stat:
|
||||||
|
phones_dict:
|
||||||
|
tones_dict:
|
||||||
|
speaker_dict:
|
||||||
|
spk_id: 0
|
||||||
|
am_sample_rate: 24000
|
||||||
|
am_sess_conf:
|
||||||
|
device: "cpu" # set 'gpu:id' or 'cpu'
|
||||||
|
use_trt: False
|
||||||
|
cpu_threads: 4
|
||||||
|
|
||||||
|
# voc (vocoder) choices=['mb_melgan_csmsc_onnx, hifigan_csmsc_onnx']
|
||||||
|
# Both mb_melgan_csmsc_onnx and hifigan_csmsc_onnx support streaming voc inference
|
||||||
|
voc: 'hifigan_csmsc_onnx'
|
||||||
|
voc_ckpt:
|
||||||
|
voc_sample_rate: 24000
|
||||||
|
voc_sess_conf:
|
||||||
|
device: "cpu" # set 'gpu:id' or 'cpu'
|
||||||
|
use_trt: False
|
||||||
|
cpu_threads: 4
|
||||||
|
|
||||||
|
# others
|
||||||
|
lang: 'zh'
|
||||||
|
# am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
|
||||||
|
# when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
|
||||||
|
am_block: 72
|
||||||
|
am_pad: 12
|
||||||
|
# voc_pad and voc_block voc model to streaming voc infer,
|
||||||
|
# when voc model is mb_melgan_csmsc_onnx, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
|
||||||
|
# when voc model is hifigan_csmsc_onnx, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
|
||||||
|
voc_block: 36
|
||||||
|
voc_pad: 14
|
||||||
|
# voc_upsample should be same as n_shift on voc config.
|
||||||
|
voc_upsample: 300
|
||||||
|
|
@ -0,0 +1,10 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# http server
|
||||||
|
paddlespeech_server start --config_file ./conf/tts_online_application.yaml &> tts.http.log &
|
||||||
|
|
||||||
|
|
||||||
|
# websocket server
|
||||||
|
paddlespeech_server start --config_file ./conf/tts_online_ws_application.yaml &> tts.ws.log &
|
||||||
|
|
||||||
|
|
@ -1,3 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# start server
|
|
||||||
paddlespeech_server start --config_file ./conf/tts_online_application.yaml
|
|
@ -0,0 +1,77 @@
|
|||||||
|
FROM nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu16.04
|
||||||
|
|
||||||
|
RUN echo "deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial main restricted \n\
|
||||||
|
deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-updates main restricted \n\
|
||||||
|
deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial universe \n\
|
||||||
|
deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-updates universe \n\
|
||||||
|
deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial multiverse \n\
|
||||||
|
deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-updates multiverse \n\
|
||||||
|
deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-backports main restricted universe multiverse \n\
|
||||||
|
deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-security main restricted \n\
|
||||||
|
deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-security universe \n\
|
||||||
|
deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-security multiverse" > /etc/apt/sources.list
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y inetutils-ping wget vim curl cmake git sox libsndfile1 libpng12-dev \
|
||||||
|
libpng-dev swig libzip-dev openssl bc libflac* libgdk-pixbuf2.0-dev libpango1.0-dev libcairo2-dev \
|
||||||
|
libgtk2.0-dev pkg-config zip unzip zlib1g-dev libreadline-dev libbz2-dev liblapack-dev libjpeg-turbo8-dev \
|
||||||
|
sudo lrzsz libsqlite3-dev libx11-dev libsm6 apt-utils libopencv-dev libavcodec-dev libavformat-dev \
|
||||||
|
libswscale-dev locales liblzma-dev python-lzma m4 libxext-dev strace libibverbs-dev libpcre3 libpcre3-dev \
|
||||||
|
build-essential libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libreadline-dev libffi-dev xz-utils \
|
||||||
|
libfreetype6-dev libxslt1-dev libxml2-dev libgeos-3.5.0 libgeos-dev && apt-get install -y --allow-downgrades \
|
||||||
|
--allow-change-held-packages libnccl2 libnccl-dev && DEBIAN_FRONTEND=noninteractive apt-get install -y tzdata \
|
||||||
|
&& /bin/cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && dpkg-reconfigure -f noninteractive tzdata && \
|
||||||
|
cd /usr/lib/x86_64-linux-gnu && ln -s libcudnn.so.8 libcudnn.so && \
|
||||||
|
cd /usr/local/cuda-11.2/targets/x86_64-linux/lib && ln -s libcublas.so.11.4.1.1043 libcublas.so && \
|
||||||
|
ln -s libcusolver.so.11.1.0.152 libcusolver.so && ln -s libcusparse.so.11 libcusparse.so && \
|
||||||
|
ln -s libcufft.so.10.4.1.152 libcufft.so
|
||||||
|
|
||||||
|
RUN echo "set meta-flag on" >> /etc/inputrc && echo "set convert-meta off" >> /etc/inputrc && \
|
||||||
|
locale-gen en_US.UTF-8 && /sbin/ldconfig -v && groupadd -g 10001 paddle && \
|
||||||
|
useradd -m -s /bin/bash -N -u 10001 paddle -g paddle && chmod g+w /etc/passwd && \
|
||||||
|
echo "paddle ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
|
||||||
|
|
||||||
|
ENV LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 LANGUAGE=en_US.UTF-8 TZ=Asia/Shanghai
|
||||||
|
|
||||||
|
# official download site: https://www.python.org/ftp/python/3.7.13/Python-3.7.13.tgz
|
||||||
|
RUN wget https://cdn.npmmirror.com/binaries/python/3.7.13/Python-3.7.13.tgz && tar xvf Python-3.7.13.tgz && \
|
||||||
|
cd Python-3.7.13 && ./configure --prefix=/home/paddle/python3.7 && make -j8 && make install && \
|
||||||
|
rm -rf ../Python-3.7.13 ../Python-3.7.13.tgz && chown -R paddle:paddle /home/paddle/python3.7
|
||||||
|
|
||||||
|
RUN cd /tmp && wget https://mirrors.sjtug.sjtu.edu.cn/gnu/gmp/gmp-6.1.0.tar.bz2 && tar xvf gmp-6.1.0.tar.bz2 && \
|
||||||
|
cd gmp-6.1.0 && ./configure --prefix=/usr/local && make -j8 && make install && \
|
||||||
|
rm -rf ../gmp-6.1.0.tar.bz2 ../gmp-6.1.0 && cd /tmp && \
|
||||||
|
wget https://www.mpfr.org/mpfr-3.1.4/mpfr-3.1.4.tar.bz2 && tar xvf mpfr-3.1.4.tar.bz2 && cd mpfr-3.1.4 && \
|
||||||
|
./configure --prefix=/usr/local && make -j8 && make install && rm -rf ../mpfr-3.1.4.tar.bz2 ../mpfr-3.1.4 && \
|
||||||
|
cd /tmp && wget https://mirrors.sjtug.sjtu.edu.cn/gnu/mpc/mpc-1.0.3.tar.gz && tar xvf mpc-1.0.3.tar.gz && \
|
||||||
|
cd mpc-1.0.3 && ./configure --prefix=/usr/local && make -j8 && make install && \
|
||||||
|
rm -rf ../mpc-1.0.3.tar.gz ../mpc-1.0.3 && cd /tmp && \
|
||||||
|
wget http://www.mirrorservice.org/sites/sourceware.org/pub/gcc/infrastructure/isl-0.18.tar.bz2 && \
|
||||||
|
tar xvf isl-0.18.tar.bz2 && cd isl-0.18 && ./configure --prefix=/usr/local && make -j8 && make install \
|
||||||
|
&& rm -rf ../isl-0.18.tar.bz2 ../isl-0.18 && cd /tmp && \
|
||||||
|
wget http://mirrors.ustc.edu.cn/gnu/gcc/gcc-8.2.0/gcc-8.2.0.tar.gz --no-check-certificate && \
|
||||||
|
tar xvf gcc-8.2.0.tar.gz && cd gcc-8.2.0 && unset LIBRARY_PATH && ./configure --prefix=/home/paddle/gcc82 \
|
||||||
|
--enable-threads=posix --disable-checking --disable-multilib --enable-languages=c,c++ --with-gmp=/usr/local \
|
||||||
|
--with-mpfr=/usr/local --with-mpc=/usr/local --with-isl=/usr/local && make -j8 && make install && \
|
||||||
|
rm -rf ../gcc-8.2.0.tar.gz ../gcc-8.2.0 && chown -R paddle:paddle /home/paddle/gcc82
|
||||||
|
|
||||||
|
WORKDIR /home/paddle
|
||||||
|
ENV PATH=/home/paddle/python3.7/bin:/home/paddle/gcc82/bin:${PATH} \
|
||||||
|
LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:/usr/local/cuda-11.2/targets/x86_64-linux/lib:${LD_LIBRARY_PATH}
|
||||||
|
|
||||||
|
RUN mkdir -p ~/.pip && echo "[global]" > ~/.pip/pip.conf && \
|
||||||
|
echo "index-url=https://mirror.baidu.com/pypi/simple" >> ~/.pip/pip.conf && \
|
||||||
|
echo "trusted-host=mirror.baidu.com" >> ~/.pip/pip.conf && \
|
||||||
|
python3 -m pip install --upgrade pip && \
|
||||||
|
pip install paddlepaddle-gpu==2.3.1.post112 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html && \
|
||||||
|
rm -rf ~/.cache/pip
|
||||||
|
|
||||||
|
RUN git clone https://github.com/PaddlePaddle/PaddleSpeech.git && cd PaddleSpeech && \
|
||||||
|
pip3 install pytest-runner paddleaudio -i https://pypi.tuna.tsinghua.edu.cn/simple && \
|
||||||
|
pip3 install -e .[develop] -i https://pypi.tuna.tsinghua.edu.cn/simple && \
|
||||||
|
pip3 install importlib-metadata==4.2.0 urllib3==1.25.10 -i https://pypi.tuna.tsinghua.edu.cn/simple && \
|
||||||
|
rm -rf ~/.cache/pip && \
|
||||||
|
sudo cp -f /home/paddle/gcc82/lib64/libstdc++.so.6.0.25 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 && \
|
||||||
|
chown -R paddle:paddle /home/paddle/PaddleSpeech
|
||||||
|
|
||||||
|
USER paddle
|
||||||
|
CMD ['bash']
|
@ -1,20 +1,3 @@
|
|||||||
# Callcenter 8k sample rate
|
# Callcenter 8k sample rate
|
||||||
|
|
||||||
Data distribution:
|
This recipe only has model/data config for 8k ASR, user need to prepare data and generate manifest metafile. You can see Aishell or Libripseech.
|
||||||
|
|
||||||
```
|
|
||||||
676048 utts
|
|
||||||
491.4004722221223 h
|
|
||||||
4357792.0 text
|
|
||||||
2.4633630739178654 text/sec
|
|
||||||
2.6167397877068495 sec/utt
|
|
||||||
```
|
|
||||||
|
|
||||||
train/dev/test partition:
|
|
||||||
|
|
||||||
```
|
|
||||||
33802 manifest.dev
|
|
||||||
67606 manifest.test
|
|
||||||
574640 manifest.train
|
|
||||||
676048 total
|
|
||||||
```
|
|
||||||
|
@ -0,0 +1,13 @@
|
|||||||
|
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
Loading…
Reference in new issue