demos/TTSArmLinux: Use the full spellings Acoustic Model and Vocoder instead of the abbreviations AM and VOC

pull/3018/head
彭逸豪 3 years ago
parent 47e1621e69
commit bc739c7b78

@ -10,5 +10,5 @@ OUTPUT_DIR="${PWD}/output"
PADDLE_LITE_DIR="${LIBS_DIR}/inference_lite_lib.armlinux.${ARM_ABI}.gcc.with_extra.with_cv/cxx"
#PADDLE_LITE_DIR="/path/to/Paddle-Lite/build.lite.linux.${ARM_ABI}.gcc/inference_lite_lib.armlinux.${ARM_ABI}/cxx"
AM_MODEL_PATH="${MODELS_DIR}/cpu/fastspeech2_csmsc_arm.nb"
VOC_MODEL_PATH="${MODELS_DIR}/cpu/mb_melgan_csmsc_arm.nb"
ACOUSTIC_MODEL_PATH="${MODELS_DIR}/cpu/fastspeech2_csmsc_arm.nb"
VOCODER_PATH="${MODELS_DIR}/cpu/mb_melgan_csmsc_arm.nb"

@ -12,7 +12,7 @@ mkdir -p "$OUTPUT_DIR"
# run
for i in {1..10}; do
(set -x; ./build/paddlespeech_tts_demo "$AM_MODEL_PATH" "$VOC_MODEL_PATH" $i "$OUTPUT_DIR/$i.wav")
(set -x; ./build/paddlespeech_tts_demo "$ACOUSTIC_MODEL_PATH" "$VOCODER_PATH" $i "$OUTPUT_DIR/$i.wav")
done
ls -lh "$OUTPUT_DIR"/*.wav

@ -45,8 +45,8 @@ public:
inline uint16_t GetWavAudioFormat();
bool Init(
const std::string &AMModelPath,
const std::string &VOCModelPath,
const std::string &AcousticModelPath,
const std::string &VocoderPath,
PowerMode cpuPowerMode,
int cpuThreadNum,
// WAV采样率必须与模型输出匹配
@ -57,12 +57,12 @@ public:
// Release model if exists
ReleaseModel();
AM_predictor_ = LoadModel(AMModelPath, cpuThreadNum, cpuPowerMode);
if (AM_predictor_ == nullptr) {
acoustic_model_predictor_ = LoadModel(AcousticModelPath, cpuThreadNum, cpuPowerMode);
if (acoustic_model_predictor_ == nullptr) {
return false;
}
VOC_predictor_ = LoadModel(VOCModelPath, cpuThreadNum, cpuPowerMode);
if (VOC_predictor_ == nullptr) {
vocoder_predictor_ = LoadModel(VocoderPath, cpuThreadNum, cpuPowerMode);
if (vocoder_predictor_ == nullptr) {
return false;
}
@ -91,8 +91,8 @@ public:
}
void ReleaseModel() {
AM_predictor_ = nullptr;
VOC_predictor_ = nullptr;
acoustic_model_predictor_ = nullptr;
vocoder_predictor_ = nullptr;
}
bool RunModel(const std::vector<int64_t> &phones) {
@ -104,7 +104,7 @@ public:
auto start = std::chrono::system_clock::now();
// 执行推理
VOCOutputToWav(GetVOCOutput(GetAMOutput(phones)));
VocoderOutputToWav(GetVocoderOutput(GetAcousticModelOutput(phones)));
// 计时结束
auto end = std::chrono::system_clock::now();
@ -116,16 +116,16 @@ public:
return true;
}
std::unique_ptr<const Tensor> GetAMOutput(const std::vector<int64_t> &phones) {
auto phones_handle = AM_predictor_->GetInput(0);
std::unique_ptr<const Tensor> GetAcousticModelOutput(const std::vector<int64_t> &phones) {
auto phones_handle = acoustic_model_predictor_->GetInput(0);
phones_handle->Resize({static_cast<int64_t>(phones.size())});
phones_handle->CopyFromCpu(phones.data());
AM_predictor_->Run();
acoustic_model_predictor_->Run();
// 获取输出Tensor
auto am_output_handle = AM_predictor_->GetOutput(0);
auto am_output_handle = acoustic_model_predictor_->GetOutput(0);
// 打印输出Tensor的shape
std::cout << "AM Output shape: ";
std::cout << "Acoustic Model Output shape: ";
auto shape = am_output_handle->shape();
for (auto s : shape) {
std::cout << s << ", ";
@ -135,19 +135,19 @@ public:
return am_output_handle;
}
std::unique_ptr<const Tensor> GetVOCOutput(std::unique_ptr<const Tensor> &&amOutput) {
auto mel_handle = VOC_predictor_->GetInput(0);
std::unique_ptr<const Tensor> GetVocoderOutput(std::unique_ptr<const Tensor> &&amOutput) {
auto mel_handle = vocoder_predictor_->GetInput(0);
// [?, 80]
auto dims = amOutput->shape();
mel_handle->Resize(dims);
auto am_output_data = amOutput->mutable_data<float>();
mel_handle->CopyFromCpu(am_output_data);
VOC_predictor_->Run();
vocoder_predictor_->Run();
// 获取输出Tensor
auto voc_output_handle = VOC_predictor_->GetOutput(0);
auto voc_output_handle = vocoder_predictor_->GetOutput(0);
// 打印输出Tensor的shape
std::cout << "VOC Output shape: ";
std::cout << "Vocoder Output shape: ";
auto shape = voc_output_handle->shape();
for (auto s : shape) {
std::cout << s << ", ";
@ -157,7 +157,7 @@ public:
return voc_output_handle;
}
void VOCOutputToWav(std::unique_ptr<const Tensor> &&vocOutput) {
void VocoderOutputToWav(std::unique_ptr<const Tensor> &&vocOutput) {
// 获取输出Tensor的数据
int64_t output_size = 1;
for (auto dim : vocOutput->shape()) {
@ -175,7 +175,7 @@ public:
void SaveFloatWav(float *floatWav, int64_t size);
bool IsLoaded() {
return AM_predictor_ != nullptr && VOC_predictor_ != nullptr;
return acoustic_model_predictor_ != nullptr && vocoder_predictor_ != nullptr;
}
float GetInferenceTime() {
@ -231,8 +231,8 @@ private:
float inference_time_ = 0;
uint32_t wav_sample_rate_ = 0;
std::vector<WavDataType> wav_;
std::shared_ptr<PaddlePredictor> AM_predictor_ = nullptr;
std::shared_ptr<PaddlePredictor> VOC_predictor_ = nullptr;
std::shared_ptr<PaddlePredictor> acoustic_model_predictor_ = nullptr;
std::shared_ptr<PaddlePredictor> vocoder_predictor_ = nullptr;
};
template<>

Loading…
Cancel
Save