add a TTS demo for ARM Linux

3 years ago · 96827a00dd
parent 5430a366ab
commit 96827a00dd
9 changed files with 463 additions and 0 deletions
--- a/demos/TTSArmLinux/.gitignore
+++ b/demos/TTSArmLinux/.gitignore
@ -0,0 +1,4 @@
+build/
+output/
+libs/
+models/
--- a/demos/TTSArmLinux/README.md
+++ b/demos/TTSArmLinux/README.md
@ -0,0 +1,43 @@
+# PaddleSpeech TTS 文本到语音 ARM Linux Demo
+
+修改自[demos/TTSAndroid](../TTSAndroid)，模型也来自该安卓Demo。
+
+使用与安卓Demo版本相同的[Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite)推理库（[Paddle-Lite:68b66fd35](https://github.com/SwimmingTiger/Paddle-Lite/releases/tag/68b66fd35)），
+该库兼容 Ubuntu 16.04 到 20.04，如果你的发行版与其不兼容，可以自行从源代码编译。
+
+该Demo自带的模型与[Paddle-Lite 2.12](https://github.com/PaddlePaddle/Paddle-Lite/releases/tag/v2.12)不兼容，运行会崩溃，需要使用更新的版本。
+不过如果换成用 Paddle-Lite 2.12 opt 工具优化的模型，应该可以兼容。
+
+### 配置
+
+打开 [config.sh](config.sh) 按需修改配置。
+
+默认编译64位版本，如果要编译32位版本，把`ARM_ABI=armv8`改成`ARM_ABI=armv7hf`。
+
+### 下载Paddle Lite库文件和模型文件
+
+```
+./download.sh
+```
+
+### 安装依赖
+
+以 Ubuntu 18.04 为例：
+
+```
+sudo apt install build-essential cmake libopencv-dev
+```
+
+### 编译
+
+```
+./build.sh
+```
+
+### 运行
+
+```
+./run.sh
+```
+
+将把[src/main.cpp](src/main.cpp)里定义在`sentencesToChoose`数组中的十句话转换为`wav`文件，保存在`output`文件夹中。
--- a/demos/TTSArmLinux/build.sh
+++ b/demos/TTSArmLinux/build.sh
@ -0,0 +1,20 @@
+#!/bin/bash
+set -e
+
+cd "$(dirname "$(realpath "$0")")"
+
+# load configure
+. ./config.sh
+
+# build
+echo "ARM_ABI is ${ARM_ABI}"
+echo "PADDLE_LITE_DIR is ${PADDLE_LITE_DIR}"
+
+rm -rf build
+mkdir -p build
+cd build
+
+cmake -DPADDLE_LITE_DIR="${PADDLE_LITE_DIR}" -DARM_ABI="${ARM_ABI}" ../src
+make
+
+echo "make successful!"
--- a/demos/TTSArmLinux/config.sh
+++ b/demos/TTSArmLinux/config.sh
@ -0,0 +1,14 @@
+# configuration
+
+ARM_ABI=armv8
+#ARM_ABI=armv7hf
+
+MODELS_DIR="${PWD}/models"
+LIBS_DIR="${PWD}/libs"
+
+PADDLE_LITE_DOWNLOAD_URL="https://github.com/SwimmingTiger/Paddle-Lite/releases/download/68b66fd35/inference_lite_lib.armlinux.${ARM_ABI}.gcc.with_extra.with_cv.tar.gz"
+PADDLE_LITE_DIR="${LIBS_DIR}/inference_lite_lib.armlinux.${ARM_ABI}.gcc.with_extra.with_cv/cxx"
+
+MODEL_DOWNLOAD_URL="https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz"
+AM_MODEL_PATH="${MODELS_DIR}/cpu/fastspeech2_csmsc_arm.nb"
+VOC_MODEL_PATH="${MODELS_DIR}/cpu/mb_melgan_csmsc_arm.nb"
--- a/demos/TTSArmLinux/download.sh
+++ b/demos/TTSArmLinux/download.sh
@ -0,0 +1,27 @@
+#!/bin/bash
+set -e
+
+cd "$(dirname "$(realpath "$0")")"
+basedir="$PWD"
+
+mkdir -p ./libs ./models
+
+download() {
+    file="$1"
+    url="$2"
+    dir="$3"
+
+    cd "$dir"
+    echo "Downloading $file..."
+    wget -O "$file" "$url"
+    echo "Extracting $file..."
+    tar -vxf "$file"
+}
+
+download 'inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \
+    'https://github.com/SwimmingTiger/Paddle-Lite/releases/download/68b66fd35/inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \
+    "$basedir/libs"
+
+download 'fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz' \
+    'https://paddlespeech.bj.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz' \
+    "$basedir/models"
--- a/demos/TTSArmLinux/run.sh
+++ b/demos/TTSArmLinux/run.sh
@ -0,0 +1,18 @@
+#!/bin/bash
+set -e
+
+cd "$(dirname "$(realpath "$0")")"
+
+# load configure
+. ./config.sh
+
+# create dir
+rm -rf ./output
+mkdir -p ./output
+
+# run
+for i in {1..10}; do
+    (set -x; ./build/paddlespeech_tts_demo "$AM_MODEL_PATH" "$VOC_MODEL_PATH" $i ./output/$i.wav)
+done
+
+ls -lh "$PWD"/output/*.wav
--- a/demos/TTSArmLinux/src/CMakeLists.txt
+++ b/demos/TTSArmLinux/src/CMakeLists.txt
@ -0,0 +1,58 @@
+cmake_minimum_required(VERSION 3.10)
+set(CMAKE_SYSTEM_NAME Linux)
+if(ARM_ABI STREQUAL "armv8")
+    set(CMAKE_SYSTEM_PROCESSOR aarch64)
+    set(CMAKE_C_COMPILER "aarch64-linux-gnu-gcc")
+    set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++")
+elseif(ARM_ABI STREQUAL "armv7hf")
+    set(CMAKE_SYSTEM_PROCESSOR arm)
+    set(CMAKE_C_COMPILER "arm-linux-gnueabihf-gcc")
+    set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++")
+else()
+    message(FATAL_ERROR "Unknown arch abi ${ARM_ABI}, only support armv8 and armv7hf.")
+    return()
+endif()
+
+project(paddlespeech_tts_demo)
+message(STATUS "TARGET ARCH ABI: ${ARM_ABI}")
+message(STATUS "PADDLE LITE DIR: ${PADDLE_LITE_DIR}")
+
+include_directories(${PADDLE_LITE_DIR}/include)
+link_directories(${PADDLE_LITE_DIR}/libs/${ARM_ABI})
+link_directories(${PADDLE_LITE_DIR}/lib)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+if(ARM_ABI STREQUAL "armv8")
+    set(CMAKE_CXX_FLAGS "-march=armv8-a ${CMAKE_CXX_FLAGS}")
+    set(CMAKE_C_FLAGS "-march=armv8-a ${CMAKE_C_FLAGS}")
+elseif(ARM_ABI STREQUAL "armv7hf")
+    set(CMAKE_CXX_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_CXX_FLAGS}")
+    set(CMAKE_C_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_C_FLAGS}" )
+endif()
+
+find_package(OpenMP REQUIRED)
+if(OpenMP_FOUND OR OpenMP_CXX_FOUND)
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+    message(STATUS "Found OpenMP ${OpenMP_VERSION} ${OpenMP_CXX_VERSION}")
+    message(STATUS "OpenMP C flags:  ${OpenMP_C_FLAGS}")
+    message(STATUS "OpenMP CXX flags:  ${OpenMP_CXX_FLAGS}")
+    message(STATUS "OpenMP OpenMP_CXX_LIB_NAMES:  ${OpenMP_CXX_LIB_NAMES}")
+    message(STATUS "OpenMP OpenMP_CXX_LIBRARIES:  ${OpenMP_CXX_LIBRARIES}")
+else()
+    message(FATAL_ERROR "Could not found OpenMP!")
+    return()
+endif()
+find_package(OpenCV REQUIRED)
+if(OpenCV_FOUND OR OpenCV_CXX_FOUND)
+    include_directories(${OpenCV_INCLUDE_DIRS})
+    message(STATUS "OpenCV library status:")
+    message(STATUS "    version: ${OpenCV_VERSION}")
+    message(STATUS "    libraries: ${OpenCV_LIBS}")
+    message(STATUS "    include path: ${OpenCV_INCLUDE_DIRS}")
+else()
+    message(FATAL_ERROR "Could not found OpenCV!")
+    return()
+endif()
+
+add_executable(paddlespeech_tts_demo main.cc)
+target_link_libraries(paddlespeech_tts_demo paddle_light_api_shared)
--- a/demos/TTSArmLinux/src/Predictor.hpp
+++ b/demos/TTSArmLinux/src/Predictor.hpp
@ -0,0 +1,208 @@
+#include <algorithm>
+#include <chrono>
+#include <iostream>
+#include <fstream>
+#include <memory>
+#include <string>
+#include <vector>
+#include "paddle_api.h"
+
+using namespace paddle::lite_api;
+
+class Predictor {
+private:
+    float inferenceTime = 0;
+    std::shared_ptr<PaddlePredictor> AMPredictor = nullptr;
+    std::shared_ptr<PaddlePredictor> VOCPredictor = nullptr;
+    std::vector<float> wav;
+
+public:
+    bool init(const std::string &AMModelPath, const std::string &VOCModelPath, int cpuThreadNum, const std::string &cpuPowerMode) {
+        // Release model if exists
+        releaseModel();
+
+        AMPredictor = loadModel(AMModelPath, cpuThreadNum, cpuPowerMode);
+        if (AMPredictor == nullptr) {
+            return false;
+        }
+        VOCPredictor = loadModel(VOCModelPath, cpuThreadNum, cpuPowerMode);
+        if (VOCPredictor == nullptr) {
+            return false;
+        }
+
+        return true;
+    }
+
+    ~Predictor() {
+        releaseModel();
+        releaseWav();
+    }
+
+    std::shared_ptr<PaddlePredictor> loadModel(const std::string &modelPath, int cpuThreadNum, const std::string &cpuPowerMode) {
+        if (modelPath.empty()) {
+            return nullptr;
+        }
+
+        // 设置MobileConfig
+        MobileConfig config;
+        config.set_model_from_file(modelPath);
+        config.set_threads(cpuThreadNum);
+
+        if (cpuPowerMode == "LITE_POWER_HIGH") {
+            config.set_power_mode(PowerMode::LITE_POWER_HIGH);
+        } else if (cpuPowerMode == "LITE_POWER_LOW") {
+            config.set_power_mode(PowerMode::LITE_POWER_LOW);
+        } else if (cpuPowerMode == "LITE_POWER_FULL") {
+            config.set_power_mode(PowerMode::LITE_POWER_FULL);
+        } else if (cpuPowerMode == "LITE_POWER_NO_BIND") {
+            config.set_power_mode(PowerMode::LITE_POWER_NO_BIND);
+        } else if (cpuPowerMode == "LITE_POWER_RAND_HIGH") {
+            config.set_power_mode(PowerMode::LITE_POWER_RAND_HIGH);
+        } else if (cpuPowerMode == "LITE_POWER_RAND_LOW") {
+            config.set_power_mode(PowerMode::LITE_POWER_RAND_LOW);
+        } else {
+            std::cerr << "Unknown cpu power mode!" << std::endl;
+            return nullptr;
+        }
+
+        return CreatePaddlePredictor<MobileConfig>(config);
+    }
+
+    void releaseModel() {
+        AMPredictor = nullptr;
+        VOCPredictor = nullptr;
+    }
+
+    bool runModel(const std::vector<float> &phones) {
+        if (!isLoaded()) {
+            return false;
+        }
+
+        // 计时开始
+        auto start = std::chrono::system_clock::now();
+
+        // 执行推理
+        VOCOutputToWav(getAMOutput(phones));
+
+        // 计时结束
+        auto end = std::chrono::system_clock::now();
+
+        // 计算用时
+        std::chrono::duration<float> duration = end - start;
+        inferenceTime = duration.count() * 1000; // 单位：毫秒
+
+        return true;
+    }
+
+    std::unique_ptr<const Tensor> getAMOutput(const std::vector<float> &phones) {
+        auto phones_handle = AMPredictor->GetInput(0);
+        phones_handle->Resize({static_cast<int64_t>(phones.size())});
+        phones_handle->CopyFromCpu(phones.data());
+        AMPredictor->Run();
+
+        // 获取输出Tensor
+        auto am_output_handle = AMPredictor->GetOutput(0);
+        // 打印输出Tensor的shape
+        std::cout << "AM Output shape: ";
+        auto shape = am_output_handle->shape();
+        for (auto s : shape) {
+            std::cout << s << ", ";
+        }
+        std::cout << std::endl;
+
+        // 获取输出Tensor的数据
+        auto am_output_data = am_output_handle->mutable_data<float>();
+        return am_output_handle;
+    }
+
+    void VOCOutputToWav(std::unique_ptr<const Tensor> &&input) {
+        auto mel_handle = VOCPredictor->GetInput(0);
+        // [?, 80]
+        auto dims = input->shape();
+        mel_handle->Resize(dims);
+        auto am_output_data = input->mutable_data<float>();
+        mel_handle->CopyFromCpu(am_output_data);
+        VOCPredictor->Run();
+
+        // 获取输出Tensor
+        auto voc_output_handle = VOCPredictor->GetOutput(0);
+        // 打印输出Tensor的shape
+        std::cout << "VOC Output shape: ";
+        auto shape = voc_output_handle->shape();
+        for (auto s : shape) {
+            std::cout << s << ", ";
+        }
+        std::cout << std::endl;
+
+        // 获取输出Tensor的数据
+        int64_t output_size = 1;
+        for (auto dim : voc_output_handle->shape()) {
+            output_size *= dim;
+        }
+        wav.resize(output_size);
+        auto output_data = voc_output_handle->mutable_data<float>();
+        std::copy_n(output_data, output_size, wav.data());
+    }
+
+    bool isLoaded() {
+        return AMPredictor != nullptr && VOCPredictor != nullptr;
+    }
+
+    float getInferenceTime() {
+        return inferenceTime;
+    }
+
+    const std::vector<float> & getWav() {
+        return wav;
+    }
+
+    void releaseWav() {
+        wav.clear();
+    }
+
+    struct WavHeader {
+        // RIFF 头
+        char riff[4] = {'R', 'I', 'F', 'F'};
+        uint32_t size = 0;
+        char wave[4] = {'W', 'A', 'V', 'E'};
+
+        // FMT 头
+        char fmt[4] = {'f', 'm', 't', ' '};
+        uint32_t fmt_size = 16;
+        uint16_t audio_format = 3;
+        uint16_t num_channels = 1;
+
+        // 如果播放速度和音调异常，请修改采样率
+        // 常见采样率：16000, 24000, 32000, 44100, 48000, 96000
+        uint32_t sample_rate = 24000;
+
+        uint32_t byte_rate = 64000;
+        uint16_t block_align = 4;
+        uint16_t bits_per_sample = 32;
+
+        // DATA 头
+        char data[4] = {'d', 'a', 't', 'a'};
+        uint32_t data_size = 0;
+    };
+
+    bool writeWavToFile(const std::string &wavPath) {
+        std::ofstream fout(wavPath, std::ios::binary);
+        if (!fout.is_open()) {
+            return false;
+        }
+
+        // 写入头信息
+        WavHeader header;
+        header.size = sizeof(header) - 8;
+        header.data_size = wav.size() * sizeof(float);
+        header.byte_rate = header.sample_rate * header.num_channels * header.bits_per_sample / 8;
+        header.block_align = header.num_channels * header.bits_per_sample / 8;
+        fout.write(reinterpret_cast<const char*>(&header), sizeof(header));
+
+        // 写入wav数据
+        fout.write(reinterpret_cast<const char*>(wav.data()), header.data_size);
+
+        fout.close();
+        return true;
+    }
+};
--- a/demos/TTSArmLinux/src/main.cc
+++ b/demos/TTSArmLinux/src/main.cc
@ -0,0 +1,71 @@
+#include <cstdlib>
+#include <iostream>
+#include <memory>
+#include "paddle_api.h"
+#include "Predictor.hpp"
+
+using namespace paddle::lite_api;
+
+std::vector<std::vector<float>> sentencesToChoose = {
+    // 009901 昨日，这名“伤者”与医生全部被警方依法刑事拘留。
+    {261, 231, 175, 116, 179, 262, 44, 154, 126, 177, 19, 262, 42, 241, 72, 177, 56, 174, 245, 37, 186, 37, 49, 151, 127, 69, 19, 179, 72, 69, 4, 260, 126, 177, 116, 151, 239, 153, 141},
+    // 009902 钱伟长想到上海来办学校是经过深思熟虑的。
+    {174, 83, 213, 39, 20, 260, 89, 40, 30, 177, 22, 71, 9, 153, 8, 37, 17, 260, 251, 260, 99, 179, 177, 116, 151, 125, 70, 233, 177, 51, 176, 108, 177, 184, 153, 242, 40, 45},
+    // 009903 她见我一进门就骂，吃饭时也骂，骂得我抬不起头。
+    {182, 2, 151, 85, 232, 73, 151, 123, 154, 52, 151, 143, 154, 5, 179, 39, 113, 69, 17, 177, 114, 105, 154, 5, 179, 154, 5, 40, 45, 232, 182, 8, 37, 186, 174, 74, 182, 168},
+    // 009904 李述德在离开之前，只说了一句“柱驼杀父亲了”。
+    {153, 74, 177, 186, 40, 42, 261, 10, 153, 73, 152, 7, 262, 113, 174, 83, 179, 262, 115, 177, 230, 153, 45, 73, 151, 242, 180, 262, 186, 182, 231, 177, 2, 69, 186, 174, 124, 153, 45},
+    // 009905 这种车票和保险单捆绑出售属于重复性购买。
+    {262, 44, 262, 163, 39, 41, 173, 99, 71, 42, 37, 28, 260, 84, 40, 14, 179, 152, 220, 37, 21, 39, 183, 177, 170, 179, 177, 185, 240, 39, 162, 69, 186, 260, 128, 70, 170, 154, 9},
+    // 009906 戴佩妮的男友西米露接唱情歌，让她非常开心。
+    {40, 10, 173, 49, 155, 72, 40, 45, 155, 15, 142, 260, 72, 154, 74, 153, 186, 179, 151, 103, 39, 22, 174, 126, 70, 41, 179, 175, 22, 182, 2, 69, 46, 39, 20, 152, 7, 260, 120},
+    // 009907 观大势、谋大局、出大策始终是该院的办院方针。
+    {70, 199, 40, 5, 177, 116, 154, 168, 40, 5, 151, 240, 179, 39, 183, 40, 5, 38, 44, 179, 177, 115, 262, 161, 177, 116, 70, 7, 247, 40, 45, 37, 17, 247, 69, 19, 262, 51},
+    // 009908 他们骑着摩托回家，正好为农忙时的父母帮忙。
+    {182, 2, 154, 55, 174, 73, 262, 45, 154, 157, 182, 230, 71, 212, 151, 77, 180, 262, 59, 71, 29, 214, 155, 162, 154, 20, 177, 114, 40, 45, 69, 186, 154, 185, 37, 19, 154, 20},
+    // 009909 但是因为还没到退休年龄，只能掰着指头捱日子。
+    {40, 17, 177, 116, 120, 214, 71, 8, 154, 47, 40, 30, 182, 214, 260, 140, 155, 83, 153, 126, 180, 262, 115, 155, 57, 37, 7, 262, 45, 262, 115, 182, 171, 8, 175, 116, 261, 112},
+    // 009910 这几天雨水不断，人们恨不得待在家里不出门。
+    {262, 44, 151, 74, 182, 82, 240, 177, 213, 37, 184, 40, 202, 180, 175, 52, 154, 55, 71, 54, 37, 186, 40, 42, 40, 7, 261, 10, 151, 77, 153, 74, 37, 186, 39, 183, 154, 52},
+};
+
+void usage(const char *binName) {
+    std::cerr << "Usage:" << std::endl
+        << "\t" << binName << " <AM-model-path> <VOC-model-path> <sentences-index:1-10> <output-wav-path>" << std::endl;
+}
+
+int main(int argc, char *argv[]) {
+    if (argc < 5) {
+        usage(argv[0]);
+        return -1;
+    }
+    const char *AMModelPath = argv[1];
+    const char *VOCModelPath = argv[2];
+    int sentencesIndex = atoi(argv[3]) - 1;
+    const char *outputWavPath = argv[4];
+
+    if (sentencesIndex < 0 || sentencesIndex >= sentencesToChoose.size()) {
+        std::cerr << "sentences-index out of range" << std::endl;
+        return -1;
+    }
+
+    Predictor predictor;
+    if (!predictor.init(AMModelPath, VOCModelPath, 1, "LITE_POWER_HIGH")) {
+        std::cerr << "predictor init failed" << std::endl;
+        return -1;
+    }
+    
+    if (!predictor.runModel(sentencesToChoose[sentencesIndex])) {
+        std::cerr << "predictor run model failed" << std::endl;
+        return -1;
+    }
+
+    std::cout << "Inference time: " << predictor.getInferenceTime() << "ms, WAV size: " << predictor.getWav().size() << std::endl;
+
+    if (!predictor.writeWavToFile(outputWavPath)) {
+        std::cerr << "write wav file failed" << std::endl;
+        return -1;
+    }
+
+    return 0;
+}