[TTS][Paddle-Lite]add a TTS demo for ARM Linux (#2991)

pull/3008/head
老虎会游泳 2 years ago committed by GitHub
parent 59cabdc967
commit cac96ac9e2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,4 @@
build/
output/
libs/
models/

@ -0,0 +1,88 @@
# PaddleSpeech TTS 文本到语音 ARM Linux Demo
修改自[demos/TTSAndroid](../TTSAndroid)模型也来自该安卓Demo。
### 配置编译选项
打开 [config.sh](config.sh) 按需修改配置。
默认编译64位版本如果要编译32位版本把`ARM_ABI=armv8`改成`ARM_ABI=armv7hf`。
### 安装依赖
```
# Ubuntu
sudo apt install build-essential cmake wget tar unzip
# CentOS
sudo yum groupinstall "Development Tools"
sudo yum install cmake wget tar unzip
```
### 下载Paddle Lite库文件和模型文件
预编译的二进制使用与安卓Demo版本相同的Paddle-Lite推理库[Paddle-Lite:68b66fd35](https://github.com/PaddlePaddle/Paddle-Lite/tree/68b66fd356c875c92167d311ad458e6093078449))和模型([fs2cnn_mbmelgan_cpu_v1.3.0](https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz))。
可用以下命令下载:
```
git clone https://github.com/PaddlePaddle/PaddleSpeech.git
cd PaddleSpeech/demos/TTSArmLinux
./download.sh
```
### 编译Demo
```
./build.sh
```
预编译的二进制兼容 Ubuntu 16.04 到 20.04。
如果编译或链接失败说明发行版与预编译库不兼容请尝试手动编译Paddle Lite库具体步骤在最下面。
### 运行
```
./run.sh
```
将把[src/main.cpp](src/main.cpp)里定义在`sentencesToChoose`数组中的十句话转换为`wav`文件,保存在`output`文件夹中。
-----
### 手动编译Paddle Lite库
预编译的二进制兼容 Ubuntu 16.04 到 20.04,如果你的发行版与其不兼容,可以自行从源代码编译。
注意,我们只能保证 [Paddle-Lite:68b66fd35](https://github.com/PaddlePaddle/Paddle-Lite/tree/68b66fd356c875c92167d311ad458e6093078449) 与通过`download.sh`下载的模型兼容。
如果使用其他版本的Paddle Lite库可能需要用对应版本的opt工具重新导出模型。
此外,[Paddle-Lite 2.12](https://github.com/PaddlePaddle/Paddle-Lite/releases/tag/v2.12) 与TTS不兼容无法导出或运行TTS模型需要使用更新的版本比如`develop`分支中的代码)。
但`develop`分支中的代码可能与通过`download.sh`下载的模型不兼容Demo运行起来可能会崩溃。
#### 安装Paddle Lite的编译依赖
```
# Ubuntu
sudo apt install build-essential cmake git python
# CentOS
sudo yum groupinstall "Development Tools"
sudo yum install cmake git python
```
#### 编译Paddle Lite 68b66fd35
```
git clone https://github.com/PaddlePaddle/Paddle-Lite.git
cd Paddle-Lite
git checkout 68b66fd356c875c92167d311ad458e6093078449
./lite/tools/build_linux.sh --with_extra=ON
```
编译完成后打开Demo的[config.sh](config.sh),把 `PADDLE_LITE_DIR` 改成以下值即可(注意替换`/path/to/`为实际目录):
```
PADDLE_LITE_DIR="/path/to/Paddle-Lite/build.lite.linux.${ARM_ABI}.gcc/inference_lite_lib.armlinux.${ARM_ABI}/cxx"
```

@ -0,0 +1,20 @@
#!/bin/bash
set -e
cd "$(dirname "$(realpath "$0")")"
# load configure
. ./config.sh
# build
echo "ARM_ABI is ${ARM_ABI}"
echo "PADDLE_LITE_DIR is ${PADDLE_LITE_DIR}"
rm -rf build
mkdir -p build
cd build
cmake -DPADDLE_LITE_DIR="${PADDLE_LITE_DIR}" -DARM_ABI="${ARM_ABI}" ../src
make
echo "make successful!"

@ -0,0 +1,14 @@
#!/bin/bash
set -e
cd "$(dirname "$(realpath "$0")")"
# load configure
. ./config.sh
# remove dirs
set -x
rm -rf "$OUTPUT_DIR"
rm -rf "$LIBS_DIR"
rm -rf "$MODELS_DIR"

@ -0,0 +1,14 @@
# configuration
ARM_ABI=armv8
#ARM_ABI=armv7hf
MODELS_DIR="${PWD}/models"
LIBS_DIR="${PWD}/libs"
OUTPUT_DIR="${PWD}/output"
PADDLE_LITE_DIR="${LIBS_DIR}/inference_lite_lib.armlinux.${ARM_ABI}.gcc.with_extra.with_cv/cxx"
#PADDLE_LITE_DIR="/path/to/Paddle-Lite/build.lite.linux.${ARM_ABI}.gcc/inference_lite_lib.armlinux.${ARM_ABI}/cxx"
AM_MODEL_PATH="${MODELS_DIR}/cpu/fastspeech2_csmsc_arm.nb"
VOC_MODEL_PATH="${MODELS_DIR}/cpu/mb_melgan_csmsc_arm.nb"

@ -0,0 +1,56 @@
#!/bin/bash
set -e
cd "$(dirname "$(realpath "$0")")"
# load configure
. ./config.sh
mkdir -p "$LIBS_DIR" "$MODELS_DIR"
download() {
file="$1"
url="$2"
md5="$3"
dir="$4"
cd "$dir"
if [ -f "$file" ] && [ "$(md5sum "$file" | awk '{ print $1 }')" = "$md5" ]; then
echo "File $file (MD5: $md5) has been downloaded."
else
echo "Downloading $file..."
wget -O "$file" "$url"
# MD5 verify
fileMd5="$(md5sum "$file" | awk '{ print $1 }')"
if [ "$fileMd5" == "$md5" ]; then
echo "File $file (MD5: $md5) has been downloaded."
else
echo "MD5 mismatch, file may be corrupt"
echo "$file MD5: $fileMd5, it should be $md5"
fi
fi
echo "Extracting $file..."
echo '-----------------------'
tar -vxf "$file"
echo '======================='
}
download 'inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \
'https://paddlespeech.bj.bcebos.com/demos/TTSArmLinux/inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \
'39e0c6604f97c70f5d13c573d7e709b9' \
"$LIBS_DIR"
download 'inference_lite_lib.armlinux.armv7hf.gcc.with_extra.with_cv.tar.gz' \
'https://paddlespeech.bj.bcebos.com/demos/TTSArmLinux/inference_lite_lib.armlinux.armv7hf.gcc.with_extra.with_cv.tar.gz' \
'f5ceb509f0b610dafb8379889c5f36f8' \
"$LIBS_DIR"
download 'fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz' \
'https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz' \
'93ef17d44b498aff3bea93e2c5c09a1e' \
"$MODELS_DIR"
echo "Done."

@ -0,0 +1,18 @@
#!/bin/bash
set -e
cd "$(dirname "$(realpath "$0")")"
# load configure
. ./config.sh
# create dir
rm -rf "$OUTPUT_DIR"
mkdir -p "$OUTPUT_DIR"
# run
for i in {1..10}; do
(set -x; ./build/paddlespeech_tts_demo "$AM_MODEL_PATH" "$VOC_MODEL_PATH" $i "$OUTPUT_DIR/$i.wav")
done
ls -lh "$OUTPUT_DIR"/*.wav

@ -0,0 +1,47 @@
cmake_minimum_required(VERSION 3.10)
set(CMAKE_SYSTEM_NAME Linux)
if(ARM_ABI STREQUAL "armv8")
set(CMAKE_SYSTEM_PROCESSOR aarch64)
#set(CMAKE_C_COMPILER "aarch64-linux-gnu-gcc")
#set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++")
elseif(ARM_ABI STREQUAL "armv7hf")
set(CMAKE_SYSTEM_PROCESSOR arm)
#set(CMAKE_C_COMPILER "arm-linux-gnueabihf-gcc")
#set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++")
else()
message(FATAL_ERROR "Unknown arch abi ${ARM_ABI}, only support armv8 and armv7hf.")
return()
endif()
project(paddlespeech_tts_demo)
message(STATUS "TARGET ARCH ABI: ${ARM_ABI}")
message(STATUS "PADDLE LITE DIR: ${PADDLE_LITE_DIR}")
include_directories(${PADDLE_LITE_DIR}/include)
link_directories(${PADDLE_LITE_DIR}/libs/${ARM_ABI})
link_directories(${PADDLE_LITE_DIR}/lib)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
if(ARM_ABI STREQUAL "armv8")
set(CMAKE_CXX_FLAGS "-march=armv8-a ${CMAKE_CXX_FLAGS}")
set(CMAKE_C_FLAGS "-march=armv8-a ${CMAKE_C_FLAGS}")
elseif(ARM_ABI STREQUAL "armv7hf")
set(CMAKE_CXX_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_CXX_FLAGS}")
set(CMAKE_C_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_C_FLAGS}" )
endif()
find_package(OpenMP REQUIRED)
if(OpenMP_FOUND OR OpenMP_CXX_FOUND)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
message(STATUS "Found OpenMP ${OpenMP_VERSION} ${OpenMP_CXX_VERSION}")
message(STATUS "OpenMP C flags: ${OpenMP_C_FLAGS}")
message(STATUS "OpenMP CXX flags: ${OpenMP_CXX_FLAGS}")
message(STATUS "OpenMP OpenMP_CXX_LIB_NAMES: ${OpenMP_CXX_LIB_NAMES}")
message(STATUS "OpenMP OpenMP_CXX_LIBRARIES: ${OpenMP_CXX_LIBRARIES}")
else()
message(FATAL_ERROR "Could not found OpenMP!")
return()
endif()
add_executable(paddlespeech_tts_demo main.cc)
target_link_libraries(paddlespeech_tts_demo paddle_light_api_shared)

@ -0,0 +1,232 @@
#include <algorithm>
#include <chrono>
#include <iostream>
#include <fstream>
#include <memory>
#include <string>
#include <vector>
#include "paddle_api.h"
using namespace paddle::lite_api;
typedef int16_t WavDataType;
class Predictor {
public:
bool Init(const std::string &AMModelPath, const std::string &VOCModelPath, int cpuThreadNum, const std::string &cpuPowerMode) {
// Release model if exists
ReleaseModel();
AM_predictor_ = LoadModel(AMModelPath, cpuThreadNum, cpuPowerMode);
if (AM_predictor_ == nullptr) {
return false;
}
VOC_predictor_ = LoadModel(VOCModelPath, cpuThreadNum, cpuPowerMode);
if (VOC_predictor_ == nullptr) {
return false;
}
return true;
}
~Predictor() {
ReleaseModel();
ReleaseWav();
}
std::shared_ptr<PaddlePredictor> LoadModel(const std::string &modelPath, int cpuThreadNum, const std::string &cpuPowerMode) {
if (modelPath.empty()) {
return nullptr;
}
// 设置MobileConfig
MobileConfig config;
config.set_model_from_file(modelPath);
config.set_threads(cpuThreadNum);
if (cpuPowerMode == "LITE_POWER_HIGH") {
config.set_power_mode(PowerMode::LITE_POWER_HIGH);
} else if (cpuPowerMode == "LITE_POWER_LOW") {
config.set_power_mode(PowerMode::LITE_POWER_LOW);
} else if (cpuPowerMode == "LITE_POWER_FULL") {
config.set_power_mode(PowerMode::LITE_POWER_FULL);
} else if (cpuPowerMode == "LITE_POWER_NO_BIND") {
config.set_power_mode(PowerMode::LITE_POWER_NO_BIND);
} else if (cpuPowerMode == "LITE_POWER_RAND_HIGH") {
config.set_power_mode(PowerMode::LITE_POWER_RAND_HIGH);
} else if (cpuPowerMode == "LITE_POWER_RAND_LOW") {
config.set_power_mode(PowerMode::LITE_POWER_RAND_LOW);
} else {
std::cerr << "Unknown cpu power mode!" << std::endl;
return nullptr;
}
return CreatePaddlePredictor<MobileConfig>(config);
}
void ReleaseModel() {
AM_predictor_ = nullptr;
VOC_predictor_ = nullptr;
}
bool RunModel(const std::vector<int64_t> &phones) {
if (!IsLoaded()) {
return false;
}
// 计时开始
auto start = std::chrono::system_clock::now();
// 执行推理
VOCOutputToWav(GetAMOutput(phones));
// 计时结束
auto end = std::chrono::system_clock::now();
// 计算用时
std::chrono::duration<float> duration = end - start;
inference_time_ = duration.count() * 1000; // 单位:毫秒
return true;
}
std::unique_ptr<const Tensor> GetAMOutput(const std::vector<int64_t> &phones) {
auto phones_handle = AM_predictor_->GetInput(0);
phones_handle->Resize({static_cast<int64_t>(phones.size())});
phones_handle->CopyFromCpu(phones.data());
AM_predictor_->Run();
// 获取输出Tensor
auto am_output_handle = AM_predictor_->GetOutput(0);
// 打印输出Tensor的shape
std::cout << "AM Output shape: ";
auto shape = am_output_handle->shape();
for (auto s : shape) {
std::cout << s << ", ";
}
std::cout << std::endl;
return am_output_handle;
}
void VOCOutputToWav(std::unique_ptr<const Tensor> &&input) {
auto mel_handle = VOC_predictor_->GetInput(0);
// [?, 80]
auto dims = input->shape();
mel_handle->Resize(dims);
auto am_output_data = input->mutable_data<float>();
mel_handle->CopyFromCpu(am_output_data);
VOC_predictor_->Run();
// 获取输出Tensor
auto voc_output_handle = VOC_predictor_->GetOutput(0);
// 打印输出Tensor的shape
std::cout << "VOC Output shape: ";
auto shape = voc_output_handle->shape();
for (auto s : shape) {
std::cout << s << ", ";
}
std::cout << std::endl;
// 获取输出Tensor的数据
int64_t output_size = 1;
for (auto dim : voc_output_handle->shape()) {
output_size *= dim;
}
auto output_data = voc_output_handle->mutable_data<float>();
SaveFloatWav(output_data, output_size);
}
inline float Abs(float number) {
return (number < 0) ? -number : number;
}
void SaveFloatWav(float *floatWav, int64_t size) {
wav_.resize(size);
float maxSample = 0.01;
// 寻找最大采样值
for (int64_t i=0; i<size; i++) {
float sample = Abs(floatWav[i]);
if (sample > maxSample) {
maxSample = sample;
}
}
// 把采样值缩放到 int_16 范围
for (int64_t i=0; i<size; i++) {
wav_[i] = floatWav[i] * 32767.0f / maxSample;
}
}
bool IsLoaded() {
return AM_predictor_ != nullptr && VOC_predictor_ != nullptr;
}
float GetInferenceTime() {
return inference_time_;
}
const std::vector<WavDataType> & GetWav() {
return wav_;
}
int GetWavSize() {
return wav_.size() * sizeof(WavDataType);
}
void ReleaseWav() {
wav_.clear();
}
struct WavHeader {
// RIFF 头
char riff[4] = {'R', 'I', 'F', 'F'};
uint32_t size = 0;
char wave[4] = {'W', 'A', 'V', 'E'};
// FMT 头
char fmt[4] = {'f', 'm', 't', ' '};
uint32_t fmt_size = 16;
uint16_t audio_format = 1; // 1为整数编码3为浮点编码
uint16_t num_channels = 1;
// 如果播放速度和音调异常,请修改采样率
// 常见采样率16000, 24000, 32000, 44100, 48000, 96000
uint32_t sample_rate = 24000;
uint32_t byte_rate = 64000;
uint16_t block_align = 2;
uint16_t bits_per_sample = sizeof(WavDataType) * 8;
// DATA 头
char data[4] = {'d', 'a', 't', 'a'};
uint32_t data_size = 0;
};
bool WriteWavToFile(const std::string &wavPath) {
std::ofstream fout(wavPath, std::ios::binary);
if (!fout.is_open()) {
return false;
}
// 写入头信息
WavHeader header;
header.data_size = GetWavSize();
header.size = sizeof(header) - 8 + header.data_size;
header.byte_rate = header.sample_rate * header.num_channels * header.bits_per_sample / 8;
header.block_align = header.num_channels * header.bits_per_sample / 8;
fout.write(reinterpret_cast<const char*>(&header), sizeof(header));
// 写入wav数据
fout.write(reinterpret_cast<const char*>(wav_.data()), header.data_size);
fout.close();
return true;
}
private:
float inference_time_ = 0;
std::shared_ptr<PaddlePredictor> AM_predictor_ = nullptr;
std::shared_ptr<PaddlePredictor> VOC_predictor_ = nullptr;
std::vector<WavDataType> wav_;
};

@ -0,0 +1,72 @@
#include <cstdlib>
#include <iostream>
#include <memory>
#include "paddle_api.h"
#include "Predictor.hpp"
using namespace paddle::lite_api;
std::vector<std::vector<int64_t>> sentencesToChoose = {
// 009901 昨日,这名“伤者”与医生全部被警方依法刑事拘留。
{261, 231, 175, 116, 179, 262, 44, 154, 126, 177, 19, 262, 42, 241, 72, 177, 56, 174, 245, 37, 186, 37, 49, 151, 127, 69, 19, 179, 72, 69, 4, 260, 126, 177, 116, 151, 239, 153, 141},
// 009902 钱伟长想到上海来办学校是经过深思熟虑的。
{174, 83, 213, 39, 20, 260, 89, 40, 30, 177, 22, 71, 9, 153, 8, 37, 17, 260, 251, 260, 99, 179, 177, 116, 151, 125, 70, 233, 177, 51, 176, 108, 177, 184, 153, 242, 40, 45},
// 009903 她见我一进门就骂,吃饭时也骂,骂得我抬不起头。
{182, 2, 151, 85, 232, 73, 151, 123, 154, 52, 151, 143, 154, 5, 179, 39, 113, 69, 17, 177, 114, 105, 154, 5, 179, 154, 5, 40, 45, 232, 182, 8, 37, 186, 174, 74, 182, 168},
// 009904 李述德在离开之前,只说了一句“柱驼杀父亲了”。
{153, 74, 177, 186, 40, 42, 261, 10, 153, 73, 152, 7, 262, 113, 174, 83, 179, 262, 115, 177, 230, 153, 45, 73, 151, 242, 180, 262, 186, 182, 231, 177, 2, 69, 186, 174, 124, 153, 45},
// 009905 这种车票和保险单捆绑出售属于重复性购买。
{262, 44, 262, 163, 39, 41, 173, 99, 71, 42, 37, 28, 260, 84, 40, 14, 179, 152, 220, 37, 21, 39, 183, 177, 170, 179, 177, 185, 240, 39, 162, 69, 186, 260, 128, 70, 170, 154, 9},
// 009906 戴佩妮的男友西米露接唱情歌,让她非常开心。
{40, 10, 173, 49, 155, 72, 40, 45, 155, 15, 142, 260, 72, 154, 74, 153, 186, 179, 151, 103, 39, 22, 174, 126, 70, 41, 179, 175, 22, 182, 2, 69, 46, 39, 20, 152, 7, 260, 120},
// 009907 观大势、谋大局、出大策始终是该院的办院方针。
{70, 199, 40, 5, 177, 116, 154, 168, 40, 5, 151, 240, 179, 39, 183, 40, 5, 38, 44, 179, 177, 115, 262, 161, 177, 116, 70, 7, 247, 40, 45, 37, 17, 247, 69, 19, 262, 51},
// 009908 他们骑着摩托回家,正好为农忙时的父母帮忙。
{182, 2, 154, 55, 174, 73, 262, 45, 154, 157, 182, 230, 71, 212, 151, 77, 180, 262, 59, 71, 29, 214, 155, 162, 154, 20, 177, 114, 40, 45, 69, 186, 154, 185, 37, 19, 154, 20},
// 009909 但是因为还没到退休年龄,只能掰着指头捱日子。
{40, 17, 177, 116, 120, 214, 71, 8, 154, 47, 40, 30, 182, 214, 260, 140, 155, 83, 153, 126, 180, 262, 115, 155, 57, 37, 7, 262, 45, 262, 115, 182, 171, 8, 175, 116, 261, 112},
// 009910 这几天雨水不断,人们恨不得待在家里不出门。
{262, 44, 151, 74, 182, 82, 240, 177, 213, 37, 184, 40, 202, 180, 175, 52, 154, 55, 71, 54, 37, 186, 40, 42, 40, 7, 261, 10, 151, 77, 153, 74, 37, 186, 39, 183, 154, 52},
};
void usage(const char *binName) {
std::cerr << "Usage:" << std::endl
<< "\t" << binName << " <AM-model-path> <VOC-model-path> <sentences-index:1-10> <output-wav-path>" << std::endl;
}
int main(int argc, char *argv[]) {
if (argc < 5) {
usage(argv[0]);
return -1;
}
const char *AMModelPath = argv[1];
const char *VOCModelPath = argv[2];
int sentencesIndex = atoi(argv[3]) - 1;
const char *outputWavPath = argv[4];
if (sentencesIndex < 0 || sentencesIndex >= sentencesToChoose.size()) {
std::cerr << "sentences-index out of range" << std::endl;
return -1;
}
Predictor predictor;
if (!predictor.Init(AMModelPath, VOCModelPath, 1, "LITE_POWER_HIGH")) {
std::cerr << "predictor init failed" << std::endl;
return -1;
}
if (!predictor.RunModel(sentencesToChoose[sentencesIndex])) {
std::cerr << "predictor run model failed" << std::endl;
return -1;
}
std::cout << "Inference time: " << predictor.GetInferenceTime() << " ms, "
<< "WAV size (without header): " << predictor.GetWavSize() << " bytes" << std::endl;
if (!predictor.WriteWavToFile(outputWavPath)) {
std::cerr << "write wav file failed" << std::endl;
return -1;
}
return 0;
}
Loading…
Cancel
Save