diff --git a/demos/TTSArmLinux/.gitignore b/demos/TTSArmLinux/.gitignore new file mode 100644 index 000000000..13135e376 --- /dev/null +++ b/demos/TTSArmLinux/.gitignore @@ -0,0 +1,4 @@ +build/ +output/ +libs/ +models/ diff --git a/demos/TTSArmLinux/README.md b/demos/TTSArmLinux/README.md new file mode 100644 index 000000000..6fe66c5b6 --- /dev/null +++ b/demos/TTSArmLinux/README.md @@ -0,0 +1,43 @@ +# PaddleSpeech TTS 文本到语音 ARM Linux Demo + +修改自[demos/TTSAndroid](../TTSAndroid),模型也来自该安卓Demo。 + +使用与安卓Demo版本相同的[Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite)推理库([Paddle-Lite:68b66fd35](https://github.com/SwimmingTiger/Paddle-Lite/releases/tag/68b66fd35)), +该库兼容 Ubuntu 16.04 到 20.04,如果你的发行版与其不兼容,可以自行从源代码编译。 + +该Demo自带的模型与[Paddle-Lite 2.12](https://github.com/PaddlePaddle/Paddle-Lite/releases/tag/v2.12)不兼容,运行会崩溃,需要使用更新的版本。 +不过如果换成用 Paddle-Lite 2.12 opt 工具优化的模型,应该可以兼容。 + +### 配置 + +打开 [config.sh](config.sh) 按需修改配置。 + +默认编译64位版本,如果要编译32位版本,把`ARM_ABI=armv8`改成`ARM_ABI=armv7hf`。 + +### 下载Paddle Lite库文件和模型文件 + +``` +./download.sh +``` + +### 安装依赖 + +以 Ubuntu 18.04 为例: + +``` +sudo apt install build-essential cmake libopencv-dev +``` + +### 编译 + +``` +./build.sh +``` + +### 运行 + +``` +./run.sh +``` + +将把[src/main.cpp](src/main.cpp)里定义在`sentencesToChoose`数组中的十句话转换为`wav`文件,保存在`output`文件夹中。 diff --git a/demos/TTSArmLinux/build.sh b/demos/TTSArmLinux/build.sh new file mode 100755 index 000000000..c872e5749 --- /dev/null +++ b/demos/TTSArmLinux/build.sh @@ -0,0 +1,20 @@ +#!/bin/bash +set -e + +cd "$(dirname "$(realpath "$0")")" + +# load configure +. ./config.sh + +# build +echo "ARM_ABI is ${ARM_ABI}" +echo "PADDLE_LITE_DIR is ${PADDLE_LITE_DIR}" + +rm -rf build +mkdir -p build +cd build + +cmake -DPADDLE_LITE_DIR="${PADDLE_LITE_DIR}" -DARM_ABI="${ARM_ABI}" ../src +make + +echo "make successful!" diff --git a/demos/TTSArmLinux/config.sh b/demos/TTSArmLinux/config.sh new file mode 100644 index 000000000..9b895aa75 --- /dev/null +++ b/demos/TTSArmLinux/config.sh @@ -0,0 +1,14 @@ +# configuration + +ARM_ABI=armv8 +#ARM_ABI=armv7hf + +MODELS_DIR="${PWD}/models" +LIBS_DIR="${PWD}/libs" + +PADDLE_LITE_DOWNLOAD_URL="https://github.com/SwimmingTiger/Paddle-Lite/releases/download/68b66fd35/inference_lite_lib.armlinux.${ARM_ABI}.gcc.with_extra.with_cv.tar.gz" +PADDLE_LITE_DIR="${LIBS_DIR}/inference_lite_lib.armlinux.${ARM_ABI}.gcc.with_extra.with_cv/cxx" + +MODEL_DOWNLOAD_URL="https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz" +AM_MODEL_PATH="${MODELS_DIR}/cpu/fastspeech2_csmsc_arm.nb" +VOC_MODEL_PATH="${MODELS_DIR}/cpu/mb_melgan_csmsc_arm.nb" diff --git a/demos/TTSArmLinux/download.sh b/demos/TTSArmLinux/download.sh new file mode 100755 index 000000000..6114dd7a6 --- /dev/null +++ b/demos/TTSArmLinux/download.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -e + +cd "$(dirname "$(realpath "$0")")" +basedir="$PWD" + +mkdir -p ./libs ./models + +download() { + file="$1" + url="$2" + dir="$3" + + cd "$dir" + echo "Downloading $file..." + wget -O "$file" "$url" + echo "Extracting $file..." + tar -vxf "$file" +} + +download 'inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \ + 'https://github.com/SwimmingTiger/Paddle-Lite/releases/download/68b66fd35/inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \ + "$basedir/libs" + +download 'fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz' \ + 'https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz' \ + "$basedir/models" diff --git a/demos/TTSArmLinux/run.sh b/demos/TTSArmLinux/run.sh new file mode 100755 index 000000000..69a9a1b22 --- /dev/null +++ b/demos/TTSArmLinux/run.sh @@ -0,0 +1,18 @@ +#!/bin/bash +set -e + +cd "$(dirname "$(realpath "$0")")" + +# load configure +. ./config.sh + +# create dir +rm -rf ./output +mkdir -p ./output + +# run +for i in {1..10}; do + (set -x; ./build/paddlespeech_tts_demo "$AM_MODEL_PATH" "$VOC_MODEL_PATH" $i ./output/$i.wav) +done + +ls -lh "$PWD"/output/*.wav diff --git a/demos/TTSArmLinux/src/CMakeLists.txt b/demos/TTSArmLinux/src/CMakeLists.txt new file mode 100644 index 000000000..b15d89934 --- /dev/null +++ b/demos/TTSArmLinux/src/CMakeLists.txt @@ -0,0 +1,58 @@ +cmake_minimum_required(VERSION 3.10) +set(CMAKE_SYSTEM_NAME Linux) +if(ARM_ABI STREQUAL "armv8") + set(CMAKE_SYSTEM_PROCESSOR aarch64) + set(CMAKE_C_COMPILER "aarch64-linux-gnu-gcc") + set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++") +elseif(ARM_ABI STREQUAL "armv7hf") + set(CMAKE_SYSTEM_PROCESSOR arm) + set(CMAKE_C_COMPILER "arm-linux-gnueabihf-gcc") + set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++") +else() + message(FATAL_ERROR "Unknown arch abi ${ARM_ABI}, only support armv8 and armv7hf.") + return() +endif() + +project(paddlespeech_tts_demo) +message(STATUS "TARGET ARCH ABI: ${ARM_ABI}") +message(STATUS "PADDLE LITE DIR: ${PADDLE_LITE_DIR}") + +include_directories(${PADDLE_LITE_DIR}/include) +link_directories(${PADDLE_LITE_DIR}/libs/${ARM_ABI}) +link_directories(${PADDLE_LITE_DIR}/lib) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +if(ARM_ABI STREQUAL "armv8") + set(CMAKE_CXX_FLAGS "-march=armv8-a ${CMAKE_CXX_FLAGS}") + set(CMAKE_C_FLAGS "-march=armv8-a ${CMAKE_C_FLAGS}") +elseif(ARM_ABI STREQUAL "armv7hf") + set(CMAKE_CXX_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_CXX_FLAGS}") + set(CMAKE_C_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_C_FLAGS}" ) +endif() + +find_package(OpenMP REQUIRED) +if(OpenMP_FOUND OR OpenMP_CXX_FOUND) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + message(STATUS "Found OpenMP ${OpenMP_VERSION} ${OpenMP_CXX_VERSION}") + message(STATUS "OpenMP C flags: ${OpenMP_C_FLAGS}") + message(STATUS "OpenMP CXX flags: ${OpenMP_CXX_FLAGS}") + message(STATUS "OpenMP OpenMP_CXX_LIB_NAMES: ${OpenMP_CXX_LIB_NAMES}") + message(STATUS "OpenMP OpenMP_CXX_LIBRARIES: ${OpenMP_CXX_LIBRARIES}") +else() + message(FATAL_ERROR "Could not found OpenMP!") + return() +endif() +find_package(OpenCV REQUIRED) +if(OpenCV_FOUND OR OpenCV_CXX_FOUND) + include_directories(${OpenCV_INCLUDE_DIRS}) + message(STATUS "OpenCV library status:") + message(STATUS " version: ${OpenCV_VERSION}") + message(STATUS " libraries: ${OpenCV_LIBS}") + message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}") +else() + message(FATAL_ERROR "Could not found OpenCV!") + return() +endif() + +add_executable(paddlespeech_tts_demo main.cc) +target_link_libraries(paddlespeech_tts_demo paddle_light_api_shared) diff --git a/demos/TTSArmLinux/src/Predictor.hpp b/demos/TTSArmLinux/src/Predictor.hpp new file mode 100644 index 000000000..8c4f4655d --- /dev/null +++ b/demos/TTSArmLinux/src/Predictor.hpp @@ -0,0 +1,208 @@ +#include +#include +#include +#include +#include +#include +#include +#include "paddle_api.h" + +using namespace paddle::lite_api; + +class Predictor { +private: + float inferenceTime = 0; + std::shared_ptr AMPredictor = nullptr; + std::shared_ptr VOCPredictor = nullptr; + std::vector wav; + +public: + bool init(const std::string &AMModelPath, const std::string &VOCModelPath, int cpuThreadNum, const std::string &cpuPowerMode) { + // Release model if exists + releaseModel(); + + AMPredictor = loadModel(AMModelPath, cpuThreadNum, cpuPowerMode); + if (AMPredictor == nullptr) { + return false; + } + VOCPredictor = loadModel(VOCModelPath, cpuThreadNum, cpuPowerMode); + if (VOCPredictor == nullptr) { + return false; + } + + return true; + } + + ~Predictor() { + releaseModel(); + releaseWav(); + } + + std::shared_ptr loadModel(const std::string &modelPath, int cpuThreadNum, const std::string &cpuPowerMode) { + if (modelPath.empty()) { + return nullptr; + } + + // 设置MobileConfig + MobileConfig config; + config.set_model_from_file(modelPath); + config.set_threads(cpuThreadNum); + + if (cpuPowerMode == "LITE_POWER_HIGH") { + config.set_power_mode(PowerMode::LITE_POWER_HIGH); + } else if (cpuPowerMode == "LITE_POWER_LOW") { + config.set_power_mode(PowerMode::LITE_POWER_LOW); + } else if (cpuPowerMode == "LITE_POWER_FULL") { + config.set_power_mode(PowerMode::LITE_POWER_FULL); + } else if (cpuPowerMode == "LITE_POWER_NO_BIND") { + config.set_power_mode(PowerMode::LITE_POWER_NO_BIND); + } else if (cpuPowerMode == "LITE_POWER_RAND_HIGH") { + config.set_power_mode(PowerMode::LITE_POWER_RAND_HIGH); + } else if (cpuPowerMode == "LITE_POWER_RAND_LOW") { + config.set_power_mode(PowerMode::LITE_POWER_RAND_LOW); + } else { + std::cerr << "Unknown cpu power mode!" << std::endl; + return nullptr; + } + + return CreatePaddlePredictor(config); + } + + void releaseModel() { + AMPredictor = nullptr; + VOCPredictor = nullptr; + } + + bool runModel(const std::vector &phones) { + if (!isLoaded()) { + return false; + } + + // 计时开始 + auto start = std::chrono::system_clock::now(); + + // 执行推理 + VOCOutputToWav(getAMOutput(phones)); + + // 计时结束 + auto end = std::chrono::system_clock::now(); + + // 计算用时 + std::chrono::duration duration = end - start; + inferenceTime = duration.count() * 1000; // 单位:毫秒 + + return true; + } + + std::unique_ptr getAMOutput(const std::vector &phones) { + auto phones_handle = AMPredictor->GetInput(0); + phones_handle->Resize({static_cast(phones.size())}); + phones_handle->CopyFromCpu(phones.data()); + AMPredictor->Run(); + + // 获取输出Tensor + auto am_output_handle = AMPredictor->GetOutput(0); + // 打印输出Tensor的shape + std::cout << "AM Output shape: "; + auto shape = am_output_handle->shape(); + for (auto s : shape) { + std::cout << s << ", "; + } + std::cout << std::endl; + + // 获取输出Tensor的数据 + auto am_output_data = am_output_handle->mutable_data(); + return am_output_handle; + } + + void VOCOutputToWav(std::unique_ptr &&input) { + auto mel_handle = VOCPredictor->GetInput(0); + // [?, 80] + auto dims = input->shape(); + mel_handle->Resize(dims); + auto am_output_data = input->mutable_data(); + mel_handle->CopyFromCpu(am_output_data); + VOCPredictor->Run(); + + // 获取输出Tensor + auto voc_output_handle = VOCPredictor->GetOutput(0); + // 打印输出Tensor的shape + std::cout << "VOC Output shape: "; + auto shape = voc_output_handle->shape(); + for (auto s : shape) { + std::cout << s << ", "; + } + std::cout << std::endl; + + // 获取输出Tensor的数据 + int64_t output_size = 1; + for (auto dim : voc_output_handle->shape()) { + output_size *= dim; + } + wav.resize(output_size); + auto output_data = voc_output_handle->mutable_data(); + std::copy_n(output_data, output_size, wav.data()); + } + + bool isLoaded() { + return AMPredictor != nullptr && VOCPredictor != nullptr; + } + + float getInferenceTime() { + return inferenceTime; + } + + const std::vector & getWav() { + return wav; + } + + void releaseWav() { + wav.clear(); + } + + struct WavHeader { + // RIFF 头 + char riff[4] = {'R', 'I', 'F', 'F'}; + uint32_t size = 0; + char wave[4] = {'W', 'A', 'V', 'E'}; + + // FMT 头 + char fmt[4] = {'f', 'm', 't', ' '}; + uint32_t fmt_size = 16; + uint16_t audio_format = 3; + uint16_t num_channels = 1; + + // 如果播放速度和音调异常,请修改采样率 + // 常见采样率:16000, 24000, 32000, 44100, 48000, 96000 + uint32_t sample_rate = 24000; + + uint32_t byte_rate = 64000; + uint16_t block_align = 4; + uint16_t bits_per_sample = 32; + + // DATA 头 + char data[4] = {'d', 'a', 't', 'a'}; + uint32_t data_size = 0; + }; + + bool writeWavToFile(const std::string &wavPath) { + std::ofstream fout(wavPath, std::ios::binary); + if (!fout.is_open()) { + return false; + } + + // 写入头信息 + WavHeader header; + header.size = sizeof(header) - 8; + header.data_size = wav.size() * sizeof(float); + header.byte_rate = header.sample_rate * header.num_channels * header.bits_per_sample / 8; + header.block_align = header.num_channels * header.bits_per_sample / 8; + fout.write(reinterpret_cast(&header), sizeof(header)); + + // 写入wav数据 + fout.write(reinterpret_cast(wav.data()), header.data_size); + + fout.close(); + return true; + } +}; diff --git a/demos/TTSArmLinux/src/main.cc b/demos/TTSArmLinux/src/main.cc new file mode 100644 index 000000000..64aeaa857 --- /dev/null +++ b/demos/TTSArmLinux/src/main.cc @@ -0,0 +1,71 @@ +#include +#include +#include +#include "paddle_api.h" +#include "Predictor.hpp" + +using namespace paddle::lite_api; + +std::vector> sentencesToChoose = { + // 009901 昨日,这名“伤者”与医生全部被警方依法刑事拘留。 + {261, 231, 175, 116, 179, 262, 44, 154, 126, 177, 19, 262, 42, 241, 72, 177, 56, 174, 245, 37, 186, 37, 49, 151, 127, 69, 19, 179, 72, 69, 4, 260, 126, 177, 116, 151, 239, 153, 141}, + // 009902 钱伟长想到上海来办学校是经过深思熟虑的。 + {174, 83, 213, 39, 20, 260, 89, 40, 30, 177, 22, 71, 9, 153, 8, 37, 17, 260, 251, 260, 99, 179, 177, 116, 151, 125, 70, 233, 177, 51, 176, 108, 177, 184, 153, 242, 40, 45}, + // 009903 她见我一进门就骂,吃饭时也骂,骂得我抬不起头。 + {182, 2, 151, 85, 232, 73, 151, 123, 154, 52, 151, 143, 154, 5, 179, 39, 113, 69, 17, 177, 114, 105, 154, 5, 179, 154, 5, 40, 45, 232, 182, 8, 37, 186, 174, 74, 182, 168}, + // 009904 李述德在离开之前,只说了一句“柱驼杀父亲了”。 + {153, 74, 177, 186, 40, 42, 261, 10, 153, 73, 152, 7, 262, 113, 174, 83, 179, 262, 115, 177, 230, 153, 45, 73, 151, 242, 180, 262, 186, 182, 231, 177, 2, 69, 186, 174, 124, 153, 45}, + // 009905 这种车票和保险单捆绑出售属于重复性购买。 + {262, 44, 262, 163, 39, 41, 173, 99, 71, 42, 37, 28, 260, 84, 40, 14, 179, 152, 220, 37, 21, 39, 183, 177, 170, 179, 177, 185, 240, 39, 162, 69, 186, 260, 128, 70, 170, 154, 9}, + // 009906 戴佩妮的男友西米露接唱情歌,让她非常开心。 + {40, 10, 173, 49, 155, 72, 40, 45, 155, 15, 142, 260, 72, 154, 74, 153, 186, 179, 151, 103, 39, 22, 174, 126, 70, 41, 179, 175, 22, 182, 2, 69, 46, 39, 20, 152, 7, 260, 120}, + // 009907 观大势、谋大局、出大策始终是该院的办院方针。 + {70, 199, 40, 5, 177, 116, 154, 168, 40, 5, 151, 240, 179, 39, 183, 40, 5, 38, 44, 179, 177, 115, 262, 161, 177, 116, 70, 7, 247, 40, 45, 37, 17, 247, 69, 19, 262, 51}, + // 009908 他们骑着摩托回家,正好为农忙时的父母帮忙。 + {182, 2, 154, 55, 174, 73, 262, 45, 154, 157, 182, 230, 71, 212, 151, 77, 180, 262, 59, 71, 29, 214, 155, 162, 154, 20, 177, 114, 40, 45, 69, 186, 154, 185, 37, 19, 154, 20}, + // 009909 但是因为还没到退休年龄,只能掰着指头捱日子。 + {40, 17, 177, 116, 120, 214, 71, 8, 154, 47, 40, 30, 182, 214, 260, 140, 155, 83, 153, 126, 180, 262, 115, 155, 57, 37, 7, 262, 45, 262, 115, 182, 171, 8, 175, 116, 261, 112}, + // 009910 这几天雨水不断,人们恨不得待在家里不出门。 + {262, 44, 151, 74, 182, 82, 240, 177, 213, 37, 184, 40, 202, 180, 175, 52, 154, 55, 71, 54, 37, 186, 40, 42, 40, 7, 261, 10, 151, 77, 153, 74, 37, 186, 39, 183, 154, 52}, +}; + +void usage(const char *binName) { + std::cerr << "Usage:" << std::endl + << "\t" << binName << " " << std::endl; +} + +int main(int argc, char *argv[]) { + if (argc < 5) { + usage(argv[0]); + return -1; + } + const char *AMModelPath = argv[1]; + const char *VOCModelPath = argv[2]; + int sentencesIndex = atoi(argv[3]) - 1; + const char *outputWavPath = argv[4]; + + if (sentencesIndex < 0 || sentencesIndex >= sentencesToChoose.size()) { + std::cerr << "sentences-index out of range" << std::endl; + return -1; + } + + Predictor predictor; + if (!predictor.init(AMModelPath, VOCModelPath, 1, "LITE_POWER_HIGH")) { + std::cerr << "predictor init failed" << std::endl; + return -1; + } + + if (!predictor.runModel(sentencesToChoose[sentencesIndex])) { + std::cerr << "predictor run model failed" << std::endl; + return -1; + } + + std::cout << "Inference time: " << predictor.getInferenceTime() << "ms, WAV size: " << predictor.getWav().size() << std::endl; + + if (!predictor.writeWavToFile(outputWavPath)) { + std::cerr << "write wav file failed" << std::endl; + return -1; + } + + return 0; +}