diff --git a/demos/TTSArmLinux/config.sh b/demos/TTSArmLinux/config.sh index 0a04f18ee..19d53781a 100644 --- a/demos/TTSArmLinux/config.sh +++ b/demos/TTSArmLinux/config.sh @@ -10,5 +10,5 @@ OUTPUT_DIR="${PWD}/output" PADDLE_LITE_DIR="${LIBS_DIR}/inference_lite_lib.armlinux.${ARM_ABI}.gcc.with_extra.with_cv/cxx" #PADDLE_LITE_DIR="/path/to/Paddle-Lite/build.lite.linux.${ARM_ABI}.gcc/inference_lite_lib.armlinux.${ARM_ABI}/cxx" -AM_MODEL_PATH="${MODELS_DIR}/cpu/fastspeech2_csmsc_arm.nb" -VOC_MODEL_PATH="${MODELS_DIR}/cpu/mb_melgan_csmsc_arm.nb" +ACOUSTIC_MODEL_PATH="${MODELS_DIR}/cpu/fastspeech2_csmsc_arm.nb" +VOCODER_PATH="${MODELS_DIR}/cpu/mb_melgan_csmsc_arm.nb" diff --git a/demos/TTSArmLinux/run.sh b/demos/TTSArmLinux/run.sh index efcb61b5b..2adcc1b56 100755 --- a/demos/TTSArmLinux/run.sh +++ b/demos/TTSArmLinux/run.sh @@ -12,7 +12,7 @@ mkdir -p "$OUTPUT_DIR" # run for i in {1..10}; do - (set -x; ./build/paddlespeech_tts_demo "$AM_MODEL_PATH" "$VOC_MODEL_PATH" $i "$OUTPUT_DIR/$i.wav") + (set -x; ./build/paddlespeech_tts_demo "$ACOUSTIC_MODEL_PATH" "$VOCODER_PATH" $i "$OUTPUT_DIR/$i.wav") done ls -lh "$OUTPUT_DIR"/*.wav diff --git a/demos/TTSArmLinux/src/Predictor.hpp b/demos/TTSArmLinux/src/Predictor.hpp index 5c59d417f..0878c9d65 100644 --- a/demos/TTSArmLinux/src/Predictor.hpp +++ b/demos/TTSArmLinux/src/Predictor.hpp @@ -45,8 +45,8 @@ public: inline uint16_t GetWavAudioFormat(); bool Init( - const std::string &AMModelPath, - const std::string &VOCModelPath, + const std::string &AcousticModelPath, + const std::string &VocoderPath, PowerMode cpuPowerMode, int cpuThreadNum, // WAV采样率(必须与模型输出匹配) @@ -57,12 +57,12 @@ public: // Release model if exists ReleaseModel(); - AM_predictor_ = LoadModel(AMModelPath, cpuThreadNum, cpuPowerMode); - if (AM_predictor_ == nullptr) { + acoustic_model_predictor_ = LoadModel(AcousticModelPath, cpuThreadNum, cpuPowerMode); + if (acoustic_model_predictor_ == nullptr) { return false; } - VOC_predictor_ = LoadModel(VOCModelPath, cpuThreadNum, cpuPowerMode); - if (VOC_predictor_ == nullptr) { + vocoder_predictor_ = LoadModel(VocoderPath, cpuThreadNum, cpuPowerMode); + if (vocoder_predictor_ == nullptr) { return false; } @@ -91,8 +91,8 @@ public: } void ReleaseModel() { - AM_predictor_ = nullptr; - VOC_predictor_ = nullptr; + acoustic_model_predictor_ = nullptr; + vocoder_predictor_ = nullptr; } bool RunModel(const std::vector &phones) { @@ -104,7 +104,7 @@ public: auto start = std::chrono::system_clock::now(); // 执行推理 - VOCOutputToWav(GetVOCOutput(GetAMOutput(phones))); + VocoderOutputToWav(GetVocoderOutput(GetAcousticModelOutput(phones))); // 计时结束 auto end = std::chrono::system_clock::now(); @@ -116,16 +116,16 @@ public: return true; } - std::unique_ptr GetAMOutput(const std::vector &phones) { - auto phones_handle = AM_predictor_->GetInput(0); + std::unique_ptr GetAcousticModelOutput(const std::vector &phones) { + auto phones_handle = acoustic_model_predictor_->GetInput(0); phones_handle->Resize({static_cast(phones.size())}); phones_handle->CopyFromCpu(phones.data()); - AM_predictor_->Run(); + acoustic_model_predictor_->Run(); // 获取输出Tensor - auto am_output_handle = AM_predictor_->GetOutput(0); + auto am_output_handle = acoustic_model_predictor_->GetOutput(0); // 打印输出Tensor的shape - std::cout << "AM Output shape: "; + std::cout << "Acoustic Model Output shape: "; auto shape = am_output_handle->shape(); for (auto s : shape) { std::cout << s << ", "; @@ -135,19 +135,19 @@ public: return am_output_handle; } - std::unique_ptr GetVOCOutput(std::unique_ptr &&amOutput) { - auto mel_handle = VOC_predictor_->GetInput(0); + std::unique_ptr GetVocoderOutput(std::unique_ptr &&amOutput) { + auto mel_handle = vocoder_predictor_->GetInput(0); // [?, 80] auto dims = amOutput->shape(); mel_handle->Resize(dims); auto am_output_data = amOutput->mutable_data(); mel_handle->CopyFromCpu(am_output_data); - VOC_predictor_->Run(); + vocoder_predictor_->Run(); // 获取输出Tensor - auto voc_output_handle = VOC_predictor_->GetOutput(0); + auto voc_output_handle = vocoder_predictor_->GetOutput(0); // 打印输出Tensor的shape - std::cout << "VOC Output shape: "; + std::cout << "Vocoder Output shape: "; auto shape = voc_output_handle->shape(); for (auto s : shape) { std::cout << s << ", "; @@ -157,7 +157,7 @@ public: return voc_output_handle; } - void VOCOutputToWav(std::unique_ptr &&vocOutput) { + void VocoderOutputToWav(std::unique_ptr &&vocOutput) { // 获取输出Tensor的数据 int64_t output_size = 1; for (auto dim : vocOutput->shape()) { @@ -175,7 +175,7 @@ public: void SaveFloatWav(float *floatWav, int64_t size); bool IsLoaded() { - return AM_predictor_ != nullptr && VOC_predictor_ != nullptr; + return acoustic_model_predictor_ != nullptr && vocoder_predictor_ != nullptr; } float GetInferenceTime() { @@ -231,8 +231,8 @@ private: float inference_time_ = 0; uint32_t wav_sample_rate_ = 0; std::vector wav_; - std::shared_ptr AM_predictor_ = nullptr; - std::shared_ptr VOC_predictor_ = nullptr; + std::shared_ptr acoustic_model_predictor_ = nullptr; + std::shared_ptr vocoder_predictor_ = nullptr; }; template<>