From ac47ceeeb1d8321b327f69982b433111175df857 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E9=80=B8=E8=B1=AA?= Date: Tue, 7 Mar 2023 02:02:41 +0000 Subject: [PATCH] demos/TTSArmLinux: save wav in 16-bit PCM format to reduce compatibility issues --- demos/TTSArmLinux/src/Predictor.hpp | 40 +++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/demos/TTSArmLinux/src/Predictor.hpp b/demos/TTSArmLinux/src/Predictor.hpp index d32ed1fae..268d98c10 100644 --- a/demos/TTSArmLinux/src/Predictor.hpp +++ b/demos/TTSArmLinux/src/Predictor.hpp @@ -9,6 +9,8 @@ using namespace paddle::lite_api; +typedef int16_t WavDataType; + class Predictor { public: bool Init(const std::string &AMModelPath, const std::string &VOCModelPath, int cpuThreadNum, const std::string &cpuPowerMode) { @@ -104,8 +106,6 @@ public: } std::cout << std::endl; - // 获取输出Tensor的数据 - auto am_output_data = am_output_handle->mutable_data(); return am_output_handle; } @@ -133,9 +133,29 @@ public: for (auto dim : voc_output_handle->shape()) { output_size *= dim; } - wav_.resize(output_size); auto output_data = voc_output_handle->mutable_data(); - std::copy_n(output_data, output_size, wav_.data()); + + SaveFloatWav(output_data, output_size); + } + + inline float Abs(float number) { + return (number < 0) ? -number : number; + } + + void SaveFloatWav(float *floatWav, int64_t size) { + wav_.resize(size); + float maxSample = 0.01; + // 寻找最大采样值 + for (int64_t i=0; i maxSample) { + maxSample = sample; + } + } + // 把采样值缩放到 int_16 范围 + for (int64_t i=0; i & GetWav() { + const std::vector & GetWav() { return wav_; } int GetWavSize() { - return wav_.size() * sizeof(float); + return wav_.size() * sizeof(WavDataType); } void ReleaseWav() { @@ -167,7 +187,7 @@ public: // FMT 头 char fmt[4] = {'f', 'm', 't', ' '}; uint32_t fmt_size = 16; - uint16_t audio_format = 3; + uint16_t audio_format = 1; // 1为整数编码,3为浮点编码 uint16_t num_channels = 1; // 如果播放速度和音调异常,请修改采样率 @@ -175,8 +195,8 @@ public: uint32_t sample_rate = 24000; uint32_t byte_rate = 64000; - uint16_t block_align = 4; - uint16_t bits_per_sample = 32; + uint16_t block_align = 2; + uint16_t bits_per_sample = sizeof(WavDataType) * 8; // DATA 头 char data[4] = {'d', 'a', 't', 'a'}; @@ -208,5 +228,5 @@ private: float inference_time_ = 0; std::shared_ptr AM_predictor_ = nullptr; std::shared_ptr VOC_predictor_ = nullptr; - std::vector wav_; + std::vector wav_; };