add a TTS demo for ARM Linux

pull/2991/head
彭逸豪 3 years ago
parent 5430a366ab
commit 96827a00dd

@ -0,0 +1,4 @@
build/
output/
libs/
models/

@ -0,0 +1,43 @@
# PaddleSpeech TTS 文本到语音 ARM Linux Demo
修改自[demos/TTSAndroid](../TTSAndroid)模型也来自该安卓Demo。
使用与安卓Demo版本相同的[Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite)推理库([Paddle-Lite:68b66fd35](https://github.com/SwimmingTiger/Paddle-Lite/releases/tag/68b66fd35)
该库兼容 Ubuntu 16.04 到 20.04,如果你的发行版与其不兼容,可以自行从源代码编译。
该Demo自带的模型与[Paddle-Lite 2.12](https://github.com/PaddlePaddle/Paddle-Lite/releases/tag/v2.12)不兼容,运行会崩溃,需要使用更新的版本。
不过如果换成用 Paddle-Lite 2.12 opt 工具优化的模型,应该可以兼容。
### 配置
打开 [config.sh](config.sh) 按需修改配置。
默认编译64位版本如果要编译32位版本把`ARM_ABI=armv8`改成`ARM_ABI=armv7hf`。
### 下载Paddle Lite库文件和模型文件
```
./download.sh
```
### 安装依赖
以 Ubuntu 18.04 为例:
```
sudo apt install build-essential cmake libopencv-dev
```
### 编译
```
./build.sh
```
### 运行
```
./run.sh
```
将把[src/main.cpp](src/main.cpp)里定义在`sentencesToChoose`数组中的十句话转换为`wav`文件,保存在`output`文件夹中。

@ -0,0 +1,20 @@
#!/bin/bash
set -e
cd "$(dirname "$(realpath "$0")")"
# load configure
. ./config.sh
# build
echo "ARM_ABI is ${ARM_ABI}"
echo "PADDLE_LITE_DIR is ${PADDLE_LITE_DIR}"
rm -rf build
mkdir -p build
cd build
cmake -DPADDLE_LITE_DIR="${PADDLE_LITE_DIR}" -DARM_ABI="${ARM_ABI}" ../src
make
echo "make successful!"

@ -0,0 +1,14 @@
# configuration
ARM_ABI=armv8
#ARM_ABI=armv7hf
MODELS_DIR="${PWD}/models"
LIBS_DIR="${PWD}/libs"
PADDLE_LITE_DOWNLOAD_URL="https://github.com/SwimmingTiger/Paddle-Lite/releases/download/68b66fd35/inference_lite_lib.armlinux.${ARM_ABI}.gcc.with_extra.with_cv.tar.gz"
PADDLE_LITE_DIR="${LIBS_DIR}/inference_lite_lib.armlinux.${ARM_ABI}.gcc.with_extra.with_cv/cxx"
MODEL_DOWNLOAD_URL="https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz"
AM_MODEL_PATH="${MODELS_DIR}/cpu/fastspeech2_csmsc_arm.nb"
VOC_MODEL_PATH="${MODELS_DIR}/cpu/mb_melgan_csmsc_arm.nb"

@ -0,0 +1,27 @@
#!/bin/bash
set -e
cd "$(dirname "$(realpath "$0")")"
basedir="$PWD"
mkdir -p ./libs ./models
download() {
file="$1"
url="$2"
dir="$3"
cd "$dir"
echo "Downloading $file..."
wget -O "$file" "$url"
echo "Extracting $file..."
tar -vxf "$file"
}
download 'inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \
'https://github.com/SwimmingTiger/Paddle-Lite/releases/download/68b66fd35/inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \
"$basedir/libs"
download 'fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz' \
'https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz' \
"$basedir/models"

@ -0,0 +1,18 @@
#!/bin/bash
set -e
cd "$(dirname "$(realpath "$0")")"
# load configure
. ./config.sh
# create dir
rm -rf ./output
mkdir -p ./output
# run
for i in {1..10}; do
(set -x; ./build/paddlespeech_tts_demo "$AM_MODEL_PATH" "$VOC_MODEL_PATH" $i ./output/$i.wav)
done
ls -lh "$PWD"/output/*.wav

@ -0,0 +1,58 @@
cmake_minimum_required(VERSION 3.10)
set(CMAKE_SYSTEM_NAME Linux)
if(ARM_ABI STREQUAL "armv8")
set(CMAKE_SYSTEM_PROCESSOR aarch64)
set(CMAKE_C_COMPILER "aarch64-linux-gnu-gcc")
set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++")
elseif(ARM_ABI STREQUAL "armv7hf")
set(CMAKE_SYSTEM_PROCESSOR arm)
set(CMAKE_C_COMPILER "arm-linux-gnueabihf-gcc")
set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++")
else()
message(FATAL_ERROR "Unknown arch abi ${ARM_ABI}, only support armv8 and armv7hf.")
return()
endif()
project(paddlespeech_tts_demo)
message(STATUS "TARGET ARCH ABI: ${ARM_ABI}")
message(STATUS "PADDLE LITE DIR: ${PADDLE_LITE_DIR}")
include_directories(${PADDLE_LITE_DIR}/include)
link_directories(${PADDLE_LITE_DIR}/libs/${ARM_ABI})
link_directories(${PADDLE_LITE_DIR}/lib)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
if(ARM_ABI STREQUAL "armv8")
set(CMAKE_CXX_FLAGS "-march=armv8-a ${CMAKE_CXX_FLAGS}")
set(CMAKE_C_FLAGS "-march=armv8-a ${CMAKE_C_FLAGS}")
elseif(ARM_ABI STREQUAL "armv7hf")
set(CMAKE_CXX_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_CXX_FLAGS}")
set(CMAKE_C_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_C_FLAGS}" )
endif()
find_package(OpenMP REQUIRED)
if(OpenMP_FOUND OR OpenMP_CXX_FOUND)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
message(STATUS "Found OpenMP ${OpenMP_VERSION} ${OpenMP_CXX_VERSION}")
message(STATUS "OpenMP C flags: ${OpenMP_C_FLAGS}")
message(STATUS "OpenMP CXX flags: ${OpenMP_CXX_FLAGS}")
message(STATUS "OpenMP OpenMP_CXX_LIB_NAMES: ${OpenMP_CXX_LIB_NAMES}")
message(STATUS "OpenMP OpenMP_CXX_LIBRARIES: ${OpenMP_CXX_LIBRARIES}")
else()
message(FATAL_ERROR "Could not found OpenMP!")
return()
endif()
find_package(OpenCV REQUIRED)
if(OpenCV_FOUND OR OpenCV_CXX_FOUND)
include_directories(${OpenCV_INCLUDE_DIRS})
message(STATUS "OpenCV library status:")
message(STATUS " version: ${OpenCV_VERSION}")
message(STATUS " libraries: ${OpenCV_LIBS}")
message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}")
else()
message(FATAL_ERROR "Could not found OpenCV!")
return()
endif()
add_executable(paddlespeech_tts_demo main.cc)
target_link_libraries(paddlespeech_tts_demo paddle_light_api_shared)

@ -0,0 +1,208 @@
#include <algorithm>
#include <chrono>
#include <iostream>
#include <fstream>
#include <memory>
#include <string>
#include <vector>
#include "paddle_api.h"
using namespace paddle::lite_api;
class Predictor {
private:
float inferenceTime = 0;
std::shared_ptr<PaddlePredictor> AMPredictor = nullptr;
std::shared_ptr<PaddlePredictor> VOCPredictor = nullptr;
std::vector<float> wav;
public:
bool init(const std::string &AMModelPath, const std::string &VOCModelPath, int cpuThreadNum, const std::string &cpuPowerMode) {
// Release model if exists
releaseModel();
AMPredictor = loadModel(AMModelPath, cpuThreadNum, cpuPowerMode);
if (AMPredictor == nullptr) {
return false;
}
VOCPredictor = loadModel(VOCModelPath, cpuThreadNum, cpuPowerMode);
if (VOCPredictor == nullptr) {
return false;
}
return true;
}
~Predictor() {
releaseModel();
releaseWav();
}
std::shared_ptr<PaddlePredictor> loadModel(const std::string &modelPath, int cpuThreadNum, const std::string &cpuPowerMode) {
if (modelPath.empty()) {
return nullptr;
}
// 设置MobileConfig
MobileConfig config;
config.set_model_from_file(modelPath);
config.set_threads(cpuThreadNum);
if (cpuPowerMode == "LITE_POWER_HIGH") {
config.set_power_mode(PowerMode::LITE_POWER_HIGH);
} else if (cpuPowerMode == "LITE_POWER_LOW") {
config.set_power_mode(PowerMode::LITE_POWER_LOW);
} else if (cpuPowerMode == "LITE_POWER_FULL") {
config.set_power_mode(PowerMode::LITE_POWER_FULL);
} else if (cpuPowerMode == "LITE_POWER_NO_BIND") {
config.set_power_mode(PowerMode::LITE_POWER_NO_BIND);
} else if (cpuPowerMode == "LITE_POWER_RAND_HIGH") {
config.set_power_mode(PowerMode::LITE_POWER_RAND_HIGH);
} else if (cpuPowerMode == "LITE_POWER_RAND_LOW") {
config.set_power_mode(PowerMode::LITE_POWER_RAND_LOW);
} else {
std::cerr << "Unknown cpu power mode!" << std::endl;
return nullptr;
}
return CreatePaddlePredictor<MobileConfig>(config);
}
void releaseModel() {
AMPredictor = nullptr;
VOCPredictor = nullptr;
}
bool runModel(const std::vector<float> &phones) {
if (!isLoaded()) {
return false;
}
// 计时开始
auto start = std::chrono::system_clock::now();
// 执行推理
VOCOutputToWav(getAMOutput(phones));
// 计时结束
auto end = std::chrono::system_clock::now();
// 计算用时
std::chrono::duration<float> duration = end - start;
inferenceTime = duration.count() * 1000; // 单位:毫秒
return true;
}
std::unique_ptr<const Tensor> getAMOutput(const std::vector<float> &phones) {
auto phones_handle = AMPredictor->GetInput(0);
phones_handle->Resize({static_cast<int64_t>(phones.size())});
phones_handle->CopyFromCpu(phones.data());
AMPredictor->Run();
// 获取输出Tensor
auto am_output_handle = AMPredictor->GetOutput(0);
// 打印输出Tensor的shape
std::cout << "AM Output shape: ";
auto shape = am_output_handle->shape();
for (auto s : shape) {
std::cout << s << ", ";
}
std::cout << std::endl;
// 获取输出Tensor的数据
auto am_output_data = am_output_handle->mutable_data<float>();
return am_output_handle;
}
void VOCOutputToWav(std::unique_ptr<const Tensor> &&input) {
auto mel_handle = VOCPredictor->GetInput(0);
// [?, 80]
auto dims = input->shape();
mel_handle->Resize(dims);
auto am_output_data = input->mutable_data<float>();
mel_handle->CopyFromCpu(am_output_data);
VOCPredictor->Run();
// 获取输出Tensor
auto voc_output_handle = VOCPredictor->GetOutput(0);
// 打印输出Tensor的shape
std::cout << "VOC Output shape: ";
auto shape = voc_output_handle->shape();
for (auto s : shape) {
std::cout << s << ", ";
}
std::cout << std::endl;
// 获取输出Tensor的数据
int64_t output_size = 1;
for (auto dim : voc_output_handle->shape()) {
output_size *= dim;
}
wav.resize(output_size);
auto output_data = voc_output_handle->mutable_data<float>();
std::copy_n(output_data, output_size, wav.data());
}
bool isLoaded() {
return AMPredictor != nullptr && VOCPredictor != nullptr;
}
float getInferenceTime() {
return inferenceTime;
}
const std::vector<float> & getWav() {
return wav;
}
void releaseWav() {
wav.clear();
}
struct WavHeader {
// RIFF 头
char riff[4] = {'R', 'I', 'F', 'F'};
uint32_t size = 0;
char wave[4] = {'W', 'A', 'V', 'E'};
// FMT 头
char fmt[4] = {'f', 'm', 't', ' '};
uint32_t fmt_size = 16;
uint16_t audio_format = 3;
uint16_t num_channels = 1;
// 如果播放速度和音调异常,请修改采样率
// 常见采样率16000, 24000, 32000, 44100, 48000, 96000
uint32_t sample_rate = 24000;
uint32_t byte_rate = 64000;
uint16_t block_align = 4;
uint16_t bits_per_sample = 32;
// DATA 头
char data[4] = {'d', 'a', 't', 'a'};
uint32_t data_size = 0;
};
bool writeWavToFile(const std::string &wavPath) {
std::ofstream fout(wavPath, std::ios::binary);
if (!fout.is_open()) {
return false;
}
// 写入头信息
WavHeader header;
header.size = sizeof(header) - 8;
header.data_size = wav.size() * sizeof(float);
header.byte_rate = header.sample_rate * header.num_channels * header.bits_per_sample / 8;
header.block_align = header.num_channels * header.bits_per_sample / 8;
fout.write(reinterpret_cast<const char*>(&header), sizeof(header));
// 写入wav数据
fout.write(reinterpret_cast<const char*>(wav.data()), header.data_size);
fout.close();
return true;
}
};

@ -0,0 +1,71 @@
#include <cstdlib>
#include <iostream>
#include <memory>
#include "paddle_api.h"
#include "Predictor.hpp"
using namespace paddle::lite_api;
std::vector<std::vector<float>> sentencesToChoose = {
// 009901 昨日,这名“伤者”与医生全部被警方依法刑事拘留。
{261, 231, 175, 116, 179, 262, 44, 154, 126, 177, 19, 262, 42, 241, 72, 177, 56, 174, 245, 37, 186, 37, 49, 151, 127, 69, 19, 179, 72, 69, 4, 260, 126, 177, 116, 151, 239, 153, 141},
// 009902 钱伟长想到上海来办学校是经过深思熟虑的。
{174, 83, 213, 39, 20, 260, 89, 40, 30, 177, 22, 71, 9, 153, 8, 37, 17, 260, 251, 260, 99, 179, 177, 116, 151, 125, 70, 233, 177, 51, 176, 108, 177, 184, 153, 242, 40, 45},
// 009903 她见我一进门就骂,吃饭时也骂,骂得我抬不起头。
{182, 2, 151, 85, 232, 73, 151, 123, 154, 52, 151, 143, 154, 5, 179, 39, 113, 69, 17, 177, 114, 105, 154, 5, 179, 154, 5, 40, 45, 232, 182, 8, 37, 186, 174, 74, 182, 168},
// 009904 李述德在离开之前,只说了一句“柱驼杀父亲了”。
{153, 74, 177, 186, 40, 42, 261, 10, 153, 73, 152, 7, 262, 113, 174, 83, 179, 262, 115, 177, 230, 153, 45, 73, 151, 242, 180, 262, 186, 182, 231, 177, 2, 69, 186, 174, 124, 153, 45},
// 009905 这种车票和保险单捆绑出售属于重复性购买。
{262, 44, 262, 163, 39, 41, 173, 99, 71, 42, 37, 28, 260, 84, 40, 14, 179, 152, 220, 37, 21, 39, 183, 177, 170, 179, 177, 185, 240, 39, 162, 69, 186, 260, 128, 70, 170, 154, 9},
// 009906 戴佩妮的男友西米露接唱情歌,让她非常开心。
{40, 10, 173, 49, 155, 72, 40, 45, 155, 15, 142, 260, 72, 154, 74, 153, 186, 179, 151, 103, 39, 22, 174, 126, 70, 41, 179, 175, 22, 182, 2, 69, 46, 39, 20, 152, 7, 260, 120},
// 009907 观大势、谋大局、出大策始终是该院的办院方针。
{70, 199, 40, 5, 177, 116, 154, 168, 40, 5, 151, 240, 179, 39, 183, 40, 5, 38, 44, 179, 177, 115, 262, 161, 177, 116, 70, 7, 247, 40, 45, 37, 17, 247, 69, 19, 262, 51},
// 009908 他们骑着摩托回家,正好为农忙时的父母帮忙。
{182, 2, 154, 55, 174, 73, 262, 45, 154, 157, 182, 230, 71, 212, 151, 77, 180, 262, 59, 71, 29, 214, 155, 162, 154, 20, 177, 114, 40, 45, 69, 186, 154, 185, 37, 19, 154, 20},
// 009909 但是因为还没到退休年龄,只能掰着指头捱日子。
{40, 17, 177, 116, 120, 214, 71, 8, 154, 47, 40, 30, 182, 214, 260, 140, 155, 83, 153, 126, 180, 262, 115, 155, 57, 37, 7, 262, 45, 262, 115, 182, 171, 8, 175, 116, 261, 112},
// 009910 这几天雨水不断,人们恨不得待在家里不出门。
{262, 44, 151, 74, 182, 82, 240, 177, 213, 37, 184, 40, 202, 180, 175, 52, 154, 55, 71, 54, 37, 186, 40, 42, 40, 7, 261, 10, 151, 77, 153, 74, 37, 186, 39, 183, 154, 52},
};
void usage(const char *binName) {
std::cerr << "Usage:" << std::endl
<< "\t" << binName << " <AM-model-path> <VOC-model-path> <sentences-index:1-10> <output-wav-path>" << std::endl;
}
int main(int argc, char *argv[]) {
if (argc < 5) {
usage(argv[0]);
return -1;
}
const char *AMModelPath = argv[1];
const char *VOCModelPath = argv[2];
int sentencesIndex = atoi(argv[3]) - 1;
const char *outputWavPath = argv[4];
if (sentencesIndex < 0 || sentencesIndex >= sentencesToChoose.size()) {
std::cerr << "sentences-index out of range" << std::endl;
return -1;
}
Predictor predictor;
if (!predictor.init(AMModelPath, VOCModelPath, 1, "LITE_POWER_HIGH")) {
std::cerr << "predictor init failed" << std::endl;
return -1;
}
if (!predictor.runModel(sentencesToChoose[sentencesIndex])) {
std::cerr << "predictor run model failed" << std::endl;
return -1;
}
std::cout << "Inference time: " << predictor.getInferenceTime() << "ms, WAV size: " << predictor.getWav().size() << std::endl;
if (!predictor.writeWavToFile(outputWavPath)) {
std::cerr << "write wav file failed" << std::endl;
return -1;
}
return 0;
}
Loading…
Cancel
Save