[TTS]Add license and reformat for TTSCppFrontend (#3030)

3 years ago · 1aa7495dab
parent 259f4936ee
commit 1aa7495dab
13 changed files with 1323 additions and 848 deletions
--- a/demos/TTSArmLinux/src/Predictor.hpp
+++ b/demos/TTSArmLinux/src/Predictor.hpp
@ -1,7 +1,20 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include <algorithm>
 #include <chrono>
-#include <iostream>
 #include <fstream>
+#include <iostream>
 #include <memory>
 #include <string>
 #include <vector>
@ -10,24 +23,28 @@
 using namespace paddle::lite_api;

 class PredictorInterface {
-public:
+  public:
    virtual ~PredictorInterface() = 0;
-    virtual bool Init(
-            const std::string &AcousticModelPath,
-            const std::string &VocoderPath,
-            PowerMode cpuPowerMode,
-            int cpuThreadNum,
-            // WAV采样率（必须与模型输出匹配）
-            // 如果播放速度和音调异常，请修改采样率
-            // 常见采样率：16000, 24000, 32000, 44100, 48000, 96000
-            uint32_t wavSampleRate
-    ) = 0;
-    virtual std::shared_ptr<PaddlePredictor> LoadModel(const std::string &modelPath, int cpuThreadNum, PowerMode cpuPowerMode) = 0;
+    virtual bool Init(const std::string &AcousticModelPath,
+                      const std::string &VocoderPath,
+                      PowerMode cpuPowerMode,
+                      int cpuThreadNum,
+                      // WAV采样率（必须与模型输出匹配）
+                      // 如果播放速度和音调异常，请修改采样率
+                      // 常见采样率：16000, 24000, 32000, 44100, 48000, 96000
+                      uint32_t wavSampleRate) = 0;
+    virtual std::shared_ptr<PaddlePredictor> LoadModel(
+        const std::string &modelPath,
+        int cpuThreadNum,
+        PowerMode cpuPowerMode) = 0;
    virtual void ReleaseModel() = 0;
    virtual bool RunModel(const std::vector<int64_t> &phones) = 0;
-    virtual std::unique_ptr<const Tensor> GetAcousticModelOutput(const std::vector<int64_t> &phones) = 0;
-    virtual std::unique_ptr<const Tensor> GetVocoderOutput(std::unique_ptr<const Tensor> &&amOutput) = 0;
-    virtual void VocoderOutputToWav(std::unique_ptr<const Tensor> &&vocOutput) = 0;
+    virtual std::unique_ptr<const Tensor> GetAcousticModelOutput(
+        const std::vector<int64_t> &phones) = 0;
+    virtual std::unique_ptr<const Tensor> GetVocoderOutput(
+        std::unique_ptr<const Tensor> &&amOutput) = 0;
+    virtual void VocoderOutputToWav(
+        std::unique_ptr<const Tensor> &&vocOutput) = 0;
    virtual void SaveFloatWav(float *floatWav, int64_t size) = 0;
    virtual bool IsLoaded() = 0;
    virtual float GetInferenceTime() = 0;
@ -45,23 +62,22 @@ PredictorInterface::~PredictorInterface() {}
 // WavDataType: WAV数据类型
 // 可在 int16_t 和 float 之间切换，
 // 用于生成 16-bit PCM 或 32-bit IEEE float 格式的 WAV
-template<typename WavDataType>
+template <typename WavDataType>
 class Predictor : public PredictorInterface {
-public:
-    virtual bool Init(
-            const std::string &AcousticModelPath,
-            const std::string &VocoderPath,
-            PowerMode cpuPowerMode,
-            int cpuThreadNum,
-            // WAV采样率（必须与模型输出匹配）
-            // 如果播放速度和音调异常，请修改采样率
-            // 常见采样率：16000, 24000, 32000, 44100, 48000, 96000
-            uint32_t wavSampleRate
-    ) override {
+  public:
+    bool Init(const std::string &AcousticModelPath,
+              const std::string &VocoderPath,
+              PowerMode cpuPowerMode,
+              int cpuThreadNum,
+              // WAV采样率（必须与模型输出匹配）
+              // 如果播放速度和音调异常，请修改采样率
+              // 常见采样率：16000, 24000, 32000, 44100, 48000, 96000
+              uint32_t wavSampleRate) override {
        // Release model if exists
        ReleaseModel();

-        acoustic_model_predictor_ = LoadModel(AcousticModelPath, cpuThreadNum, cpuPowerMode);
+        acoustic_model_predictor_ =
+            LoadModel(AcousticModelPath, cpuThreadNum, cpuPowerMode);
        if (acoustic_model_predictor_ == nullptr) {
            return false;
        }
@ -80,7 +96,10 @@ public:
        ReleaseWav();
    }

-    virtual std::shared_ptr<PaddlePredictor> LoadModel(const std::string &modelPath, int cpuThreadNum, PowerMode cpuPowerMode) override {
+    std::shared_ptr<PaddlePredictor> LoadModel(
+        const std::string &modelPath,
+        int cpuThreadNum,
+        PowerMode cpuPowerMode) override {
        if (modelPath.empty()) {
            return nullptr;
        }
@ -94,12 +113,12 @@ public:
        return CreatePaddlePredictor<MobileConfig>(config);
    }

-    virtual void ReleaseModel() override {
+    void ReleaseModel() override {
        acoustic_model_predictor_ = nullptr;
        vocoder_predictor_ = nullptr;
    }

-    virtual bool RunModel(const std::vector<int64_t> &phones) override {
+    bool RunModel(const std::vector<int64_t> &phones) override {
        if (!IsLoaded()) {
            return false;
        }
@ -115,12 +134,13 @@ public:

        // 计算用时
        std::chrono::duration<float> duration = end - start;
-        inference_time_ = duration.count() * 1000; // 单位：毫秒
+        inference_time_ = duration.count() * 1000;  // 单位：毫秒

        return true;
    }

-    virtual std::unique_ptr<const Tensor> GetAcousticModelOutput(const std::vector<int64_t> &phones) override {
+    std::unique_ptr<const Tensor> GetAcousticModelOutput(
+        const std::vector<int64_t> &phones) override {
        auto phones_handle = acoustic_model_predictor_->GetInput(0);
        phones_handle->Resize({static_cast<int64_t>(phones.size())});
        phones_handle->CopyFromCpu(phones.data());
@ -139,7 +159,8 @@ public:
        return am_output_handle;
    }

-    virtual std::unique_ptr<const Tensor> GetVocoderOutput(std::unique_ptr<const Tensor> &&amOutput) override {
+    std::unique_ptr<const Tensor> GetVocoderOutput(
+        std::unique_ptr<const Tensor> &&amOutput) override {
        auto mel_handle = vocoder_predictor_->GetInput(0);
        // [?, 80]
        auto dims = amOutput->shape();
@ -161,7 +182,8 @@ public:
        return voc_output_handle;
    }

-    virtual void VocoderOutputToWav(std::unique_ptr<const Tensor> &&vocOutput) override {
+    void VocoderOutputToWav(
+        std::unique_ptr<const Tensor> &&vocOutput) override {
        // 获取输出Tensor的数据
        int64_t output_size = 1;
        for (auto dim : vocOutput->shape()) {
@ -172,39 +194,31 @@ public:
        SaveFloatWav(output_data, output_size);
    }

-    virtual void SaveFloatWav(float *floatWav, int64_t size) override;
+    void SaveFloatWav(float *floatWav, int64_t size) override;

-    virtual bool IsLoaded() override {
-        return acoustic_model_predictor_ != nullptr && vocoder_predictor_ != nullptr;
+    bool IsLoaded() override {
+        return acoustic_model_predictor_ != nullptr &&
+               vocoder_predictor_ != nullptr;
    }

-    virtual float GetInferenceTime() override {
-        return inference_time_;
-    }
+    float GetInferenceTime() override { return inference_time_; }

-    const std::vector<WavDataType> & GetWav() {
-        return wav_;
-    }
+    const std::vector<WavDataType> &GetWav() { return wav_; }

-    virtual int GetWavSize() override {
-        return wav_.size() * sizeof(WavDataType);
-    }
+    int GetWavSize() override { return wav_.size() * sizeof(WavDataType); }

    // 获取WAV持续时间（单位：毫秒）
-    virtual float GetWavDuration() override {
-        return static_cast<float>(GetWavSize()) / sizeof(WavDataType) / static_cast<float>(wav_sample_rate_) * 1000;
+    float GetWavDuration() override {
+        return static_cast<float>(GetWavSize()) / sizeof(WavDataType) /
+               static_cast<float>(wav_sample_rate_) * 1000;
    }

    // 获取RTF（合成时间 / 音频时长）
-    virtual float GetRTF() override {
-        return GetInferenceTime() / GetWavDuration();
-    }
+    float GetRTF() override { return GetInferenceTime() / GetWavDuration(); }

-    virtual void ReleaseWav() override {
-        wav_.clear();
-    }
+    void ReleaseWav() override { wav_.clear(); }

-    virtual bool WriteWavToFile(const std::string &wavPath) override {
+    bool WriteWavToFile(const std::string &wavPath) override {
        std::ofstream fout(wavPath, std::ios::binary);
        if (!fout.is_open()) {
            return false;
@ -216,18 +230,20 @@ public:
        header.data_size = GetWavSize();
        header.size = sizeof(header) - 8 + header.data_size;
        header.sample_rate = wav_sample_rate_;
-        header.byte_rate = header.sample_rate * header.num_channels * header.bits_per_sample / 8;
+        header.byte_rate = header.sample_rate * header.num_channels *
+                           header.bits_per_sample / 8;
        header.block_align = header.num_channels * header.bits_per_sample / 8;
-        fout.write(reinterpret_cast<const char*>(&header), sizeof(header));
+        fout.write(reinterpret_cast<const char *>(&header), sizeof(header));

        // 写入wav数据
-        fout.write(reinterpret_cast<const char*>(wav_.data()), header.data_size);
+        fout.write(reinterpret_cast<const char *>(wav_.data()),
+                   header.data_size);

        fout.close();
        return true;
    }

-protected:
+  protected:
    struct WavHeader {
        // RIFF 头
        char riff[4] = {'R', 'I', 'F', 'F'};
@ -250,19 +266,17 @@ protected:
    };

    enum WavAudioFormat {
-        WAV_FORMAT_16BIT_PCM   = 1, // 16-bit PCM 格式
+        WAV_FORMAT_16BIT_PCM = 1,   // 16-bit PCM 格式
        WAV_FORMAT_32BIT_FLOAT = 3  // 32-bit IEEE float 格式
    };

-protected:
+  protected:
    // 返回值通过模板特化由 WavDataType 决定
    inline uint16_t GetWavAudioFormat();

-    inline float Abs(float number) {
-        return (number < 0) ? -number : number;
-    }
+    inline float Abs(float number) { return (number < 0) ? -number : number; }

-protected:
+  protected:
    float inference_time_ = 0;
    uint32_t wav_sample_rate_ = 0;
    std::vector<WavDataType> wav_;
@ -270,36 +284,36 @@ protected:
    std::shared_ptr<PaddlePredictor> vocoder_predictor_ = nullptr;
 };

-template<>
+template <>
 uint16_t Predictor<int16_t>::GetWavAudioFormat() {
    return Predictor::WAV_FORMAT_16BIT_PCM;
 }

-template<>
+template <>
 uint16_t Predictor<float>::GetWavAudioFormat() {
    return Predictor::WAV_FORMAT_32BIT_FLOAT;
 }

 // 保存 16-bit PCM 格式 WAV
-template<>
+template <>
 void Predictor<int16_t>::SaveFloatWav(float *floatWav, int64_t size) {
    wav_.resize(size);
    float maxSample = 0.01;
    // 寻找最大采样值
-    for (int64_t i=0; i<size; i++) {
+    for (int64_t i = 0; i < size; i++) {
        float sample = Abs(floatWav[i]);
        if (sample > maxSample) {
            maxSample = sample;
        }
    }
    // 把采样值缩放到 int_16 范围
-    for (int64_t i=0; i<size; i++) {
+    for (int64_t i = 0; i < size; i++) {
        wav_[i] = floatWav[i] * 32767.0f / maxSample;
    }
 }

 // 保存 32-bit IEEE float 格式 WAV
-template<>
+template <>
 void Predictor<float>::SaveFloatWav(float *floatWav, int64_t size) {
    wav_.resize(size);
    std::copy_n(floatWav, size, wav_.data());
--- a/demos/TTSArmLinux/src/main.cc
+++ b/demos/TTSArmLinux/src/main.cc
@ -1,23 +1,48 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <front/front_interface.h>
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+#include <paddle_api.h>
 #include <cstdlib>
 #include <iostream>
+#include <map>
 #include <memory>
 #include <string>
-#include <map>
-#include <glog/logging.h>
-#include <gflags/gflags.h>
-#include <paddle_api.h>
-#include <front/front_interface.h>
 #include "Predictor.hpp"

 using namespace paddle::lite_api;

-DEFINE_string(sentence, "你好，欢迎使用语音合成服务", "Text to be synthesized (Chinese only. English will crash the program.)");
+DEFINE_string(
+    sentence,
+    "你好，欢迎使用语音合成服务",
+    "Text to be synthesized (Chinese only. English will crash the program.)");
 DEFINE_string(front_conf, "./front.conf", "Front configuration file");
-DEFINE_string(acoustic_model, "./models/cpu/fastspeech2_csmsc_arm.nb", "Acoustic model .nb file");
-DEFINE_string(vocoder, "./models/cpu/fastspeech2_csmsc_arm.nb", "vocoder .nb file");
+DEFINE_string(acoustic_model,
+              "./models/cpu/fastspeech2_csmsc_arm.nb",
+              "Acoustic model .nb file");
+DEFINE_string(vocoder,
+              "./models/cpu/fastspeech2_csmsc_arm.nb",
+              "vocoder .nb file");
 DEFINE_string(output_wav, "./output/tts.wav", "Output WAV file");
-DEFINE_string(wav_bit_depth, "16", "WAV bit depth, 16 (16-bit PCM) or 32 (32-bit IEEE float)");
-DEFINE_string(wav_sample_rate, "24000", "WAV sample rate, should match the output of the vocoder");
+DEFINE_string(wav_bit_depth,
+              "16",
+              "WAV bit depth, 16 (16-bit PCM) or 32 (32-bit IEEE float)");
+DEFINE_string(wav_sample_rate,
+              "24000",
+              "WAV sample rate, should match the output of the vocoder");
 DEFINE_string(cpu_thread, "1", "CPU thread numbers");

 int main(int argc, char *argv[]) {
@ -53,7 +78,7 @@ int main(int argc, char *argv[]) {

    // 繁体转简体
    std::wstring sentence_simp;
-    front_inst->Trand2Simp(ws_sentence, sentence_simp); 
+    front_inst->Trand2Simp(ws_sentence, &sentence_simp);
    ws_sentence = sentence_simp;

    std::string s_sentence;
@ -63,28 +88,30 @@ int main(int argc, char *argv[]) {

    // 根据标点进行分句
    LOG(INFO) << "Start to segment sentences by punctuation";
-    front_inst->SplitByPunc(ws_sentence, sentence_part); 
+    front_inst->SplitByPunc(ws_sentence, &sentence_part);
    LOG(INFO) << "Segment sentences through punctuation successfully";

    // 分句后获取音素id
-    LOG(INFO) << "Start to get the phoneme and tone id sequence of each sentence";
-    for(int i = 0; i < sentence_part.size(); i++) {
-
-        LOG(INFO) << "Raw sentence is: " << ppspeech::wstring2utf8string(sentence_part[i]);
-        front_inst->SentenceNormalize(sentence_part[i]);
+    LOG(INFO)
+        << "Start to get the phoneme and tone id sequence of each sentence";
+    for (int i = 0; i < sentence_part.size(); i++) {
+        LOG(INFO) << "Raw sentence is: "
+                  << ppspeech::wstring2utf8string(sentence_part[i]);
+        front_inst->SentenceNormalize(&sentence_part[i]);
        s_sentence = ppspeech::wstring2utf8string(sentence_part[i]);
        LOG(INFO) << "After normalization sentence is: " << s_sentence;
-        
-        if (0 != front_inst->GetSentenceIds(s_sentence, phoneids, toneids)) {
+
+        if (0 != front_inst->GetSentenceIds(s_sentence, &phoneids, &toneids)) {
            LOG(ERROR) << "TTS inst get sentence phoneids and toneids failed";
            return -1;
        }
-            
    }
-    LOG(INFO) << "The phoneids of the sentence is: " << limonp::Join(phoneids.begin(), phoneids.end(), " ");
-    LOG(INFO) << "The toneids of the sentence is: " << limonp::Join(toneids.begin(), toneids.end(), " ");
+    LOG(INFO) << "The phoneids of the sentence is: "
+              << limonp::Join(phoneids.begin(), phoneids.end(), " ");
+    LOG(INFO) << "The toneids of the sentence is: "
+              << limonp::Join(toneids.begin(), toneids.end(), " ");
    LOG(INFO) << "Get the phoneme id sequence of each sentence successfully";
- 
+

    /////////////////////////// 后端：音素转音频 ///////////////////////////

@ -99,13 +126,19 @@ int main(int argc, char *argv[]) {
    // CPU电源模式
    const PowerMode cpuPowerMode = PowerMode::LITE_POWER_HIGH;

-    if (!predictor->Init(FLAGS_acoustic_model, FLAGS_vocoder, cpuPowerMode, cpuThreadNum, wavSampleRate)) {
+    if (!predictor->Init(FLAGS_acoustic_model,
+                         FLAGS_vocoder,
+                         cpuPowerMode,
+                         cpuThreadNum,
+                         wavSampleRate)) {
        LOG(ERROR) << "predictor init failed" << std::endl;
        return -1;
    }

    std::vector<int64_t> phones(phoneids.size());
-    std::transform(phoneids.begin(), phoneids.end(), phones.begin(), [](int x) { return static_cast<int64_t>(x); });
+    std::transform(phoneids.begin(), phoneids.end(), phones.begin(), [](int x) {
+        return static_cast<int64_t>(x);
+    });

    if (!predictor->RunModel(phones)) {
        LOG(ERROR) << "predictor run model failed" << std::endl;
@ -113,7 +146,8 @@ int main(int argc, char *argv[]) {
    }

    LOG(INFO) << "Inference time: " << predictor->GetInferenceTime() << " ms, "
-              << "WAV size (without header): " << predictor->GetWavSize() << " bytes, "
+              << "WAV size (without header): " << predictor->GetWavSize()
+              << " bytes, "
              << "WAV duration: " << predictor->GetWavDuration() << " ms, "
              << "RTF: " << predictor->GetRTF() << std::endl;

--- a/demos/TTSCppFrontend/README.md
+++ b/demos/TTSCppFrontend/README.md
@ -38,6 +38,7 @@ If the download speed is too slow, you can open [third-party/CMakeLists.txt](thi
 ```

 ## Run
+You can change `--phone2id_path` in `./front_demo/front.conf` to the `phone_id_map.txt` of your own acoustic model.

 ```
 ./run_front_demo.sh
--- a/demos/TTSCppFrontend/front_demo/front_demo.cpp
+++ b/demos/TTSCppFrontend/front_demo/front_demo.cpp
@ -1,19 +1,32 @@
-#include <string>
-//#include "utils/dir_utils.h"
-#include "front/front_interface.h"
-#include <glog/logging.h>
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #include <gflags/gflags.h>
+#include <glog/logging.h>
 #include <map>
+#include <string>
+#include "front/front_interface.h"

 DEFINE_string(sentence, "你好，欢迎使用语音合成服务", "Text to be synthesized");
 DEFINE_string(front_conf, "./front_demo/front.conf", "Front conf file");
-//DEFINE_string(seperate_tone, "true", "If true, get phoneids and tonesid");
+// DEFINE_string(seperate_tone, "true", "If true, get phoneids and tonesid");


 int main(int argc, char** argv) {
    gflags::ParseCommandLineFlags(&argc, &argv, true);
    // 实例化文本前端引擎
-    ppspeech::FrontEngineInterface *front_inst = nullptr;
+    ppspeech::FrontEngineInterface* front_inst = nullptr;
    front_inst = new ppspeech::FrontEngineInterface(FLAGS_front_conf);
    if ((!front_inst) || (front_inst->init())) {
        LOG(ERROR) << "Creater tts engine failed!";
@ -28,7 +41,7 @@ int main(int argc, char** argv) {

    // 繁体转简体
    std::wstring sentence_simp;
-    front_inst->Trand2Simp(ws_sentence, sentence_simp); 
+    front_inst->Trand2Simp(ws_sentence, &sentence_simp);
    ws_sentence = sentence_simp;

    std::string s_sentence;
@ -38,28 +51,29 @@ int main(int argc, char** argv) {

    // 根据标点进行分句
    LOG(INFO) << "Start to segment sentences by punctuation";
-    front_inst->SplitByPunc(ws_sentence, sentence_part); 
+    front_inst->SplitByPunc(ws_sentence, &sentence_part);
    LOG(INFO) << "Segment sentences through punctuation successfully";

    // 分句后获取音素id
-    LOG(INFO) << "Start to get the phoneme and tone id sequence of each sentence";
-    for(int i = 0; i < sentence_part.size(); i++) {
-
-        LOG(INFO) << "Raw sentence is: " << ppspeech::wstring2utf8string(sentence_part[i]);
-        front_inst->SentenceNormalize(sentence_part[i]);
+    LOG(INFO)
+        << "Start to get the phoneme and tone id sequence of each sentence";
+    for (int i = 0; i < sentence_part.size(); i++) {
+        LOG(INFO) << "Raw sentence is: "
+                  << ppspeech::wstring2utf8string(sentence_part[i]);
+        front_inst->SentenceNormalize(&sentence_part[i]);
        s_sentence = ppspeech::wstring2utf8string(sentence_part[i]);
        LOG(INFO) << "After normalization sentence is: " << s_sentence;
-        
-        if (0 != front_inst->GetSentenceIds(s_sentence, phoneids, toneids)) {
+
+        if (0 != front_inst->GetSentenceIds(s_sentence, &phoneids, &toneids)) {
            LOG(ERROR) << "TTS inst get sentence phoneids and toneids failed";
            return -1;
        }
-            
    }
-    LOG(INFO) << "The phoneids of the sentence is: " << limonp::Join(phoneids.begin(), phoneids.end(), " ");
-    LOG(INFO) << "The toneids of the sentence is: " << limonp::Join(toneids.begin(), toneids.end(), " ");
+    LOG(INFO) << "The phoneids of the sentence is: "
+              << limonp::Join(phoneids.begin(), phoneids.end(), " ");
+    LOG(INFO) << "The toneids of the sentence is: "
+              << limonp::Join(toneids.begin(), toneids.end(), " ");
    LOG(INFO) << "Get the phoneme id sequence of each sentence successfully";
- 
+
    return EXIT_SUCCESS;
 }
-
--- a/demos/TTSCppFrontend/front_demo/gentools/gen_dict_paddlespeech.py
+++ b/demos/TTSCppFrontend/front_demo/gentools/gen_dict_paddlespeech.py
@ -1,19 +1,28 @@
-# !/usr/bin/env python3
-# -*- coding: utf-8 -*-
-########################################################################
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 #
-# Copyright     2021    liangyunming(liangyunming@baidu.com)
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
-# Execute the script when PaddleSpeech has been installed
-# PaddleSpeech: https://github.com/PaddlePaddle/PaddleSpeech
-
-########################################################################
-
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import argparse
 import configparser
+
 from paddlespeech.t2s.frontend.zh_frontend import Frontend

-def get_phone(frontend, word, merge_sentences=True, print_info=False, robot=False, get_tone_ids=False):
+
+def get_phone(frontend,
+              word,
+              merge_sentences=True,
+              print_info=False,
+              robot=False,
+              get_tone_ids=False):
    phonemes = frontend.get_phonemes(word, merge_sentences, print_info, robot)
    # Some optimizations
    phones, tones = frontend._get_phone_tone(phonemes[0], get_tone_ids)
@ -22,7 +31,10 @@ def get_phone(frontend, word, merge_sentences=True, print_info=False, robot=Fals
    return phones, tones


-def gen_word2phone_dict(frontend, jieba_words_dict, word2phone_dict, get_tone=False):
+def gen_word2phone_dict(frontend,
+                        jieba_words_dict,
+                        word2phone_dict,
+                        get_tone=False):
    with open(jieba_words_dict, "r") as f1, open(word2phone_dict, "w+") as f2:
        for line in f1.readlines():
            word = line.split(" ")[0]
@ -30,9 +42,9 @@ def gen_word2phone_dict(frontend, jieba_words_dict, word2phone_dict, get_tone=Fa
            phone_str = ""

            if tone:
-                assert(len(phone) == len(tone))
+                assert (len(phone) == len(tone))
                for i in range(len(tone)):
-                    phone_tone = phone[i] + tone[i] 
+                    phone_tone = phone[i] + tone[i]
                    phone_str += (" " + phone_tone)
                phone_str = phone_str.strip("sp0").strip(" ")
            else:
@ -45,43 +57,55 @@ def gen_word2phone_dict(frontend, jieba_words_dict, word2phone_dict, get_tone=Fa


 def main():
-    parser = argparse.ArgumentParser(
-        description="Generate dictionary")
+    parser = argparse.ArgumentParser(description="Generate dictionary")
    parser.add_argument(
        "--config", type=str, default="./config.ini", help="config file.")
    parser.add_argument(
-        "--am_type", type=str, default="fastspeech2", help="fastspeech2 or speedyspeech")
+        "--am_type",
+        type=str,
+        default="fastspeech2",
+        help="fastspeech2 or speedyspeech")
    args = parser.parse_args()

    # Read config
    cf = configparser.ConfigParser()
    cf.read(args.config)
-    jieba_words_dict_file = cf.get("jieba", "jieba_words_dict")  # get words dict
+    jieba_words_dict_file = cf.get("jieba",
+                                   "jieba_words_dict")  # get words dict

    am_type = args.am_type
-    if(am_type == "fastspeech2"):
+    if (am_type == "fastspeech2"):
        phone2id_dict_file = cf.get(am_type, "phone2id_dict")
        word2phone_dict_file = cf.get(am_type, "word2phone_dict")

        frontend = Frontend(phone_vocab_path=phone2id_dict_file)
        print("frontend done!")

-        gen_word2phone_dict(frontend, jieba_words_dict_file, word2phone_dict_file, get_tone=False)
-        
-    elif(am_type == "speedyspeech"):
+        gen_word2phone_dict(
+            frontend,
+            jieba_words_dict_file,
+            word2phone_dict_file,
+            get_tone=False)
+
+    elif (am_type == "speedyspeech"):
        phone2id_dict_file = cf.get(am_type, "phone2id_dict")
        tone2id_dict_file = cf.get(am_type, "tone2id_dict")
        word2phone_dict_file = cf.get(am_type, "word2phone_dict")

-        frontend = Frontend(phone_vocab_path=phone2id_dict_file, tone_vocab_path=tone2id_dict_file)
+        frontend = Frontend(
+            phone_vocab_path=phone2id_dict_file,
+            tone_vocab_path=tone2id_dict_file)
        print("frontend done!")

-        gen_word2phone_dict(frontend, jieba_words_dict_file, word2phone_dict_file, get_tone=True)
-        
+        gen_word2phone_dict(
+            frontend,
+            jieba_words_dict_file,
+            word2phone_dict_file,
+            get_tone=True)

    else:
        print("Please set correct am type, fastspeech2 or speedyspeech.")
-     
-    
+
+
 if __name__ == "__main__":
    main()
--- a/demos/TTSCppFrontend/front_demo/gentools/genid.py
+++ b/demos/TTSCppFrontend/front_demo/gentools/genid.py
@ -1,10 +1,23 @@
-#from parakeet.frontend.vocab import Vocab
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

 PHONESFILE = "./dict/phones.txt"
 PHONES_ID_FILE = "./dict/phonesid.dict"
 TONESFILE = "./dict/tones.txt"
 TONES_ID_FILE = "./dict/tonesid.dict"

+
 def GenIdFile(file, idfile):
    id = 2
    with open(file, 'r') as f1, open(idfile, "w+") as f2:
@ -16,7 +29,7 @@ def GenIdFile(file, idfile):
            f2.write(phone + " " + str(id) + "\n")
            id += 1

+
 if __name__ == "__main__":
    GenIdFile(PHONESFILE, PHONES_ID_FILE)
    GenIdFile(TONESFILE, TONES_ID_FILE)
-
--- a/demos/TTSCppFrontend/front_demo/gentools/word2phones.py
+++ b/demos/TTSCppFrontend/front_demo/gentools/word2phones.py
@ -1,9 +1,25 @@
-from pypinyin import lazy_pinyin, Style
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import re

+from pypinyin import lazy_pinyin
+from pypinyin import Style
+
 worddict = "./dict/jieba_part.dict.utf8"
 newdict = "./dict/word_phones.dict"

+
 def GenPhones(initials, finals, seperate=True):

    phones = []
@ -14,9 +30,9 @@ def GenPhones(initials, finals, seperate=True):
            elif c in ['zh', 'ch', 'sh', 'r']:
                v = re.sub('i', 'iii', v)
        if c:
-            if seperate == True:
+            if seperate is True:
                phones.append(c + '0')
-            elif seperate == False:
+            elif seperate is False:
                phones.append(c)
            else:
                print("Not sure whether phone and tone need to be separated")
@ -28,8 +44,10 @@ def GenPhones(initials, finals, seperate=True):
 with open(worddict, "r") as f1, open(newdict, "w+") as f2:
    for line in f1.readlines():
        word = line.split(" ")[0]
-        initials = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS)
-        finals = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
+        initials = lazy_pinyin(
+            word, neutral_tone_with_five=True, style=Style.INITIALS)
+        finals = lazy_pinyin(
+            word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)

        phones = GenPhones(initials, finals, True)

--- a/demos/TTSCppFrontend/src/base/type_conv.cpp
+++ b/demos/TTSCppFrontend/src/base/type_conv.cpp
@ -1,18 +1,28 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "base/type_conv.h"

 namespace ppspeech {
 // wstring to string
-std::string wstring2utf8string(const std::wstring& str)
-{
-    static std::wstring_convert<std::codecvt_utf8<wchar_t> > strCnv;
+std::string wstring2utf8string(const std::wstring& str) {
+    static std::wstring_convert<std::codecvt_utf8<wchar_t>> strCnv;
    return strCnv.to_bytes(str);
 }
- 
-// string to wstring 
-std::wstring utf8string2wstring(const std::string& str)
-{
-    static std::wstring_convert< std::codecvt_utf8<wchar_t> > strCnv;
-    return strCnv.from_bytes(str);
-}

+// string to wstring
+std::wstring utf8string2wstring(const std::string& str) {
+    static std::wstring_convert<std::codecvt_utf8<wchar_t>> strCnv;
+    return strCnv.from_bytes(str);
 }
+}  // namespace ppspeech
--- a/demos/TTSCppFrontend/src/base/type_conv.h
+++ b/demos/TTSCppFrontend/src/base/type_conv.h
@ -1,18 +1,31 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #ifndef BASE_TYPE_CONVC_H
 #define BASE_TYPE_CONVC_H

-#include <string>
-#include <locale>
 #include <codecvt>
+#include <locale>
+#include <string>


 namespace ppspeech {
 // wstring to string
 std::string wstring2utf8string(const std::wstring& str);
- 
-// string to wstring 
-std::wstring utf8string2wstring(const std::string& str);

+// string to wstring
+std::wstring utf8string2wstring(const std::string& str);
 }

 #endif  // BASE_TYPE_CONVC_H
--- a/demos/TTSCppFrontend/src/front/front_interface.cpp
+++ b/demos/TTSCppFrontend/src/front/front_interface.cpp
--- a/demos/TTSCppFrontend/src/front/front_interface.h
+++ b/demos/TTSCppFrontend/src/front/front_interface.h
@ -1,156 +1,198 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #ifndef PADDLE_TTS_SERVING_FRONT_FRONT_INTERFACE_H
 #define PADDLE_TTS_SERVING_FRONT_FRONT_INTERFACE_H

+#include <glog/logging.h>
+#include <fstream>
 #include <map>
-#include <string>
 #include <memory>
-#include <fstream>
-#include <glog/logging.h>
+#include <string>
 //#include "utils/dir_utils.h"
 #include <cppjieba/Jieba.hpp>
-#include "front/text_normalize.h"
 #include "absl/strings/str_split.h"
+#include "front/text_normalize.h"


 namespace ppspeech {
-    
-    class FrontEngineInterface : public TextNormalizer{
-        public:
-            FrontEngineInterface(std::string conf) : _conf_file(conf) {
-                TextNormalizer();
-                _jieba = nullptr;
-                _initialed = false;
-                init();
-            }
-
-            int init();
-            ~FrontEngineInterface() {
-
-            }
-
-            // 读取配置文件
-            int ReadConfFile();
-
-            // 简体转繁体
-            int Trand2Simp(const std::wstring &sentence, std::wstring &sentence_simp);
-
-            // 生成字典
-            int GenDict(const std::string &file, std::map<std::string, std::string> &map);
-
-            // 由 词+词性的分词结果转为仅包含词的结果
-            int GetSegResult(std::vector<std::pair<std::string, std::string>> &seg, std::vector<std::string> &seg_words);
-
-            // 生成句子的音素，音调id。如果音素和音调未分开，则 toneids 为空（fastspeech2），反之则不为空(speedyspeech)
-            int GetSentenceIds(const std::string &sentence, std::vector<int> &phoneids, std::vector<int> &toneids);
-
-            // 根据分词结果获取词的音素，音调id，并对读音进行适当修改 (ModifyTone)。如果音素和音调未分开，则 toneids 为空（fastspeech2），反之则不为空(speedyspeech)
-            int GetWordsIds(const std::vector<std::pair<std::string, std::string>> &cut_result, std::vector<int> &phoneids, std::vector<int> &toneids);
-
-            // 结巴分词生成包含词和词性的分词结果，再对分词结果进行适当修改 (MergeforModify)
-            int Cut(const std::string &sentence, std::vector<std::pair<std::string, std::string>> &cut_result);
-
-            // 字词到音素的映射，查找字典
-            int GetPhone(const std::string &word, std::string &phone);
-
-            // 音素到音素id
-            int Phone2Phoneid(const std::string &phone, std::vector<int> &phoneid, std::vector<int> &toneids);
-
-
-            // 根据韵母判断该词中每个字的读音都为第三声。true表示词中每个字都是第三声
-            bool AllToneThree(const std::vector<std::string> &finals);
-
-            // 判断词是否是叠词
-            bool IsReduplication(const std::string &word);
-
-            // 获取每个字词的声母韵母列表
-            int GetInitialsFinals(const std::string &word, std::vector<std::string> &word_initials, std::vector<std::string> &word_finals);
-
-            // 获取每个字词的韵母列表
-            int GetFinals(const std::string &word, std::vector<std::string> &word_finals);

-            // 整个词转成向量形式，向量的每个元素对应词的一个字
-            int Word2WordVec(const std::string &word, std::vector<std::wstring> &wordvec);
+class FrontEngineInterface : public TextNormalizer {
+  public:
+    explicit FrontEngineInterface(std::string conf) : _conf_file(conf) {
+        TextNormalizer();
+        _jieba = nullptr;
+        _initialed = false;
+        init();
+    }

-            // 将整个词重新进行 full cut，分词后，各个词会在词典中
-            int SplitWord(const std::string &word, std::vector<std::string> &fullcut_word);
-    
-            // 对分词结果进行处理：对包含“不”字的分词结果进行整理
-            std::vector<std::pair<std::string, std::string>> MergeBu(std::vector<std::pair<std::string, std::string>> &seg_result);
+    int init();
+    ~FrontEngineInterface() {}

-            // 对分词结果进行处理：对包含“一”字的分词结果进行整理
-            std::vector<std::pair<std::string, std::string>> Mergeyi(std::vector<std::pair<std::string, std::string>> &seg_result);
+    // 读取配置文件
+    int ReadConfFile();

-            // 对分词结果进行处理：对前后相同的两个字进行合并
-            std::vector<std::pair<std::string, std::string>> MergeReduplication(std::vector<std::pair<std::string, std::string>> &seg_result);
+    // 简体转繁体
+    int Trand2Simp(const std::wstring &sentence, std::wstring *sentence_simp);

-            // 对一个词和后一个词他们的读音均为第三声的两个词进行合并
-            std::vector<std::pair<std::string, std::string>> MergeThreeTones(std::vector<std::pair<std::string, std::string>> &seg_result);
+    // 生成字典
+    int GenDict(const std::string &file,
+                std::map<std::string, std::string> *map);

-            // 对一个词的最后一个读音和后一个词的第一个读音为第三声的两个词进行合并
-            std::vector<std::pair<std::string, std::string>> MergeThreeTones2(std::vector<std::pair<std::string, std::string>> &seg_result);
+    // 由 词+词性的分词结果转为仅包含词的结果
+    int GetSegResult(std::vector<std::pair<std::string, std::string>> *seg,
+                     std::vector<std::string> *seg_words);

-            // 对分词结果进行处理：对包含“儿”字的分词结果进行整理
-            std::vector<std::pair<std::string, std::string>> MergeEr(std::vector<std::pair<std::string, std::string>> &seg_result);
+    // 生成句子的音素，音调id。如果音素和音调未分开，则 toneids
+    // 为空（fastspeech2），反之则不为空(speedyspeech)
+    int GetSentenceIds(const std::string &sentence,
+                       std::vector<int> *phoneids,
+                       std::vector<int> *toneids);

-            // 对分词结果进行处理、修改
-            int MergeforModify(std::vector<std::pair<std::string, std::string>> &seg_result, std::vector<std::pair<std::string, std::string>> &merge_seg_result);
+    // 根据分词结果获取词的音素，音调id，并对读音进行适当修改
+    // (ModifyTone)。如果音素和音调未分开，则 toneids
+    // 为空（fastspeech2），反之则不为空(speedyspeech)
+    int GetWordsIds(
+        const std::vector<std::pair<std::string, std::string>> &cut_result,
+        std::vector<int> *phoneids,
+        std::vector<int> *toneids);

+    // 结巴分词生成包含词和词性的分词结果，再对分词结果进行适当修改
+    // (MergeforModify)
+    int Cut(const std::string &sentence,
+            std::vector<std::pair<std::string, std::string>> *cut_result);

-            // 对包含“不”字的相关词音调进行修改
-            int BuSandi(const std::string &word, std::vector<std::string> &finals);
+    // 字词到音素的映射，查找字典
+    int GetPhone(const std::string &word, std::string *phone);

-            // 对包含“一”字的相关词音调进行修改
-            int YiSandhi(const std::string &word, std::vector<std::string> &finals);
+    // 音素到音素id
+    int Phone2Phoneid(const std::string &phone,
+                      std::vector<int> *phoneid,
+                      std::vector<int> *toneids);

-            // 对一些特殊词（包括量词，语助词等）的相关词音调进行修改
-            int NeuralSandhi(const std::string &word, const std::string &pos, std::vector<std::string> &finals);

-            // 对包含第三声的相关词音调进行修改
-            int ThreeSandhi(const std::string &word, std::vector<std::string> &finals);
+    // 根据韵母判断该词中每个字的读音都为第三声。true表示词中每个字都是第三声
+    bool AllToneThree(const std::vector<std::string> &finals);

-            // 对字词音调进行处理、修改
-            int ModifyTone(const std::string &word, const std::string &pos, std::vector<std::string> &finals);
+    // 判断词是否是叠词
+    bool IsReduplication(const std::string &word);
+
+    // 获取每个字词的声母韵母列表
+    int GetInitialsFinals(const std::string &word,
+                          std::vector<std::string> *word_initials,
+                          std::vector<std::string> *word_finals);

+    // 获取每个字词的韵母列表
+    int GetFinals(const std::string &word,
+                  std::vector<std::string> *word_finals);
+
+    // 整个词转成向量形式，向量的每个元素对应词的一个字
+    int Word2WordVec(const std::string &word,
+                     std::vector<std::wstring> *wordvec);
+
+    // 将整个词重新进行 full cut，分词后，各个词会在词典中
+    int SplitWord(const std::string &word,
+                  std::vector<std::string> *fullcut_word);
+
+    // 对分词结果进行处理：对包含“不”字的分词结果进行整理
+    std::vector<std::pair<std::string, std::string>> MergeBu(
+        std::vector<std::pair<std::string, std::string>> *seg_result);
+
+    // 对分词结果进行处理：对包含“一”字的分词结果进行整理
+    std::vector<std::pair<std::string, std::string>> Mergeyi(
+        std::vector<std::pair<std::string, std::string>> *seg_result);
+
+    // 对分词结果进行处理：对前后相同的两个字进行合并
+    std::vector<std::pair<std::string, std::string>> MergeReduplication(
+        std::vector<std::pair<std::string, std::string>> *seg_result);
+
+    // 对一个词和后一个词他们的读音均为第三声的两个词进行合并
+    std::vector<std::pair<std::string, std::string>> MergeThreeTones(
+        std::vector<std::pair<std::string, std::string>> *seg_result);
+
+    // 对一个词的最后一个读音和后一个词的第一个读音为第三声的两个词进行合并
+    std::vector<std::pair<std::string, std::string>> MergeThreeTones2(
+        std::vector<std::pair<std::string, std::string>> *seg_result);
+
+    // 对分词结果进行处理：对包含“儿”字的分词结果进行整理
+    std::vector<std::pair<std::string, std::string>> MergeEr(
+        std::vector<std::pair<std::string, std::string>> *seg_result);
+
+    // 对分词结果进行处理、修改
+    int MergeforModify(
+        std::vector<std::pair<std::string, std::string>> *seg_result,
+        std::vector<std::pair<std::string, std::string>> *merge_seg_result);

-            // 对儿化音进行处理
-            std::vector<std::vector<std::string>> MergeErhua(const std::vector<std::string> &initials, const std::vector<std::string> &finals, const std::string &word, const std::string &pos);

-        
+    // 对包含“不”字的相关词音调进行修改
+    int BuSandi(const std::string &word, std::vector<std::string> *finals);

-        private:
-            bool _initialed;
-            cppjieba::Jieba *_jieba;
-            std::vector<std::string> _punc;
-            std::vector<std::string> _punc_omit;
+    // 对包含“一”字的相关词音调进行修改
+    int YiSandhi(const std::string &word, std::vector<std::string> *finals);
+
+    // 对一些特殊词（包括量词，语助词等）的相关词音调进行修改
+    int NeuralSandhi(const std::string &word,
+                     const std::string &pos,
+                     std::vector<std::string> *finals);

-            std::string _conf_file;
-            std::map<std::string, std::string> conf_map;
-            std::map<std::string, std::string> word_phone_map;
-            std::map<std::string, std::string> phone_id_map;
-            std::map<std::string, std::string> tone_id_map;
-            std::map<std::string, std::string> trand_simp_map;
+    // 对包含第三声的相关词音调进行修改
+    int ThreeSandhi(const std::string &word, std::vector<std::string> *finals);
+
+    // 对字词音调进行处理、修改
+    int ModifyTone(const std::string &word,
+                   const std::string &pos,
+                   std::vector<std::string> *finals);


-            std::string _jieba_dict_path;
-            std::string _jieba_hmm_path;
-            std::string _jieba_user_dict_path;
-            std::string _jieba_idf_path;
-            std::string _jieba_stop_word_path;
+    // 对儿化音进行处理
+    std::vector<std::vector<std::string>> MergeErhua(
+        const std::vector<std::string> &initials,
+        const std::vector<std::string> &finals,
+        const std::string &word,
+        const std::string &pos);
+

-            std::string _seperate_tone;
-            std::string _word2phone_path;
-            std::string _phone2id_path;
-            std::string _tone2id_path;
-            std::string _trand2simp_path;
+  private:
+    bool _initialed;
+    cppjieba::Jieba *_jieba;
+    std::vector<std::string> _punc;
+    std::vector<std::string> _punc_omit;

-            std::vector<std::string> must_erhua;
-            std::vector<std::string> not_erhua;
+    std::string _conf_file;
+    std::map<std::string, std::string> conf_map;
+    std::map<std::string, std::string> word_phone_map;
+    std::map<std::string, std::string> phone_id_map;
+    std::map<std::string, std::string> tone_id_map;
+    std::map<std::string, std::string> trand_simp_map;

-            std::vector<std::string> must_not_neural_tone_words;
-            std::vector<std::string> must_neural_tone_words;

+    std::string _jieba_dict_path;
+    std::string _jieba_hmm_path;
+    std::string _jieba_user_dict_path;
+    std::string _jieba_idf_path;
+    std::string _jieba_stop_word_path;

+    std::string _seperate_tone;
+    std::string _word2phone_path;
+    std::string _phone2id_path;
+    std::string _tone2id_path;
+    std::string _trand2simp_path;
+
+    std::vector<std::string> must_erhua;
+    std::vector<std::string> not_erhua;

-    };
-}
+    std::vector<std::string> must_not_neural_tone_words;
+    std::vector<std::string> must_neural_tone_words;
+};
+}  // namespace ppspeech
 #endif
--- a/demos/TTSCppFrontend/src/front/text_normalize.cpp
+++ b/demos/TTSCppFrontend/src/front/text_normalize.cpp
@ -1,10 +1,22 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "front/text_normalize.h"

 namespace ppspeech {

 // 初始化 digits_map and unit_map
 int TextNormalizer::InitMap() {
-   
    digits_map["0"] = "零";
    digits_map["1"] = "一";
    digits_map["2"] = "二";
@ -21,77 +33,84 @@ int TextNormalizer::InitMap() {
    units_map[3] = "千";
    units_map[4] = "万";
    units_map[8] = "亿";
-   
+
    return 0;
 }

 // 替换
-int TextNormalizer::Replace(std::wstring &sentence, const int &pos, const int &len, const std::wstring &repstr) {
+int TextNormalizer::Replace(std::wstring *sentence,
+                            const int &pos,
+                            const int &len,
+                            const std::wstring &repstr) {
    // 删除原来的
-    sentence.erase(pos, len);
+    sentence->erase(pos, len);
    // 插入新的
-    sentence.insert(pos, repstr);
+    sentence->insert(pos, repstr);
    return 0;
-
 }

 // 根据标点符号切分句子
-int TextNormalizer::SplitByPunc(const std::wstring &sentence, std::vector<std::wstring> &sentence_part) {
+int TextNormalizer::SplitByPunc(const std::wstring &sentence,
+                                std::vector<std::wstring> *sentence_part) {
    std::wstring temp = sentence;
    std::wregex reg(L"[：，；。？！,;?!]");
    std::wsmatch match;

-    while (std::regex_search (temp, match, reg)) {
-        sentence_part.push_back(temp.substr(0, match.position(0) + match.length(0)));
-        Replace(temp, 0, match.position(0) + match.length(0), L"");
+    while (std::regex_search(temp, match, reg)) {
+        sentence_part->push_back(
+            temp.substr(0, match.position(0) + match.length(0)));
+        Replace(&temp, 0, match.position(0) + match.length(0), L"");
    }
    // 如果最后没有标点符号
-    if(temp != L"") {
-        sentence_part.push_back(temp);
+    if (temp != L"") {
+        sentence_part->push_back(temp);
    }
    return 0;
 }

-//数字转文本，10200 - > 一万零二百
-std::string TextNormalizer::CreateTextValue(const std::string &num_str, bool use_zero) {
-
-    std::string num_lstrip = std::string(absl::StripPrefix(num_str, "0")).data();
+// 数字转文本，10200 - > 一万零二百
+std::string TextNormalizer::CreateTextValue(const std::string &num_str,
+                                            bool use_zero) {
+    std::string num_lstrip =
+        std::string(absl::StripPrefix(num_str, "0")).data();
    int len = num_lstrip.length();
-    
-    if(len == 0) {
+
+    if (len == 0) {
        return "";
    } else if (len == 1) {
-        if(use_zero && (len < num_str.length())) {
+        if (use_zero && (len < num_str.length())) {
            return digits_map["0"] + digits_map[num_lstrip];
        } else {
            return digits_map[num_lstrip];
        }
    } else {
-        int largest_unit = 0; // 最大单位
+        int largest_unit = 0;  // 最大单位
        std::string first_part;
        std::string second_part;

-        if (len > 1 and len <= 2) {
+        if (len > 1 && len <= 2) {
            largest_unit = 1;
-        } else if (len > 2 and len <= 3) {
+        } else if (len > 2 && len <= 3) {
            largest_unit = 2;
-        } else if (len > 3 and len <= 4) {
+        } else if (len > 3 && len <= 4) {
            largest_unit = 3;
-        } else if (len > 4 and len <= 8) {
+        } else if (len > 4 && len <= 8) {
            largest_unit = 4;
        } else if (len > 8) {
-            largest_unit = 8;  
-        }  
+            largest_unit = 8;
+        }

        first_part = num_str.substr(0, num_str.length() - largest_unit);
        second_part = num_str.substr(num_str.length() - largest_unit);
-        
-        return CreateTextValue(first_part, use_zero) + units_map[largest_unit] + CreateTextValue(second_part, use_zero);
+
+        return CreateTextValue(first_part, use_zero) + units_map[largest_unit] +
+               CreateTextValue(second_part, use_zero);
    }
 }

-//  数字一个一个对应，可直接用于年份，电话，手机，
-std::string TextNormalizer::SingleDigit2Text(const std::string &num_str, bool alt_one) {
+// 数字一个一个对应，可直接用于年份，电话，手机，
+std::string TextNormalizer::SingleDigit2Text(const std::string &num_str,
+                                             bool alt_one) {
    std::string text = "";
    if (alt_one) {
        digits_map["1"] = "幺";
@ -110,13 +129,16 @@ std::string TextNormalizer::SingleDigit2Text(const std::string &num_str, bool al
    return text;
 }

-std::string TextNormalizer::SingleDigit2Text(const std::wstring &num, bool alt_one) {
+std::string TextNormalizer::SingleDigit2Text(const std::wstring &num,
+                                             bool alt_one) {
    std::string num_str = wstring2utf8string(num);
    return SingleDigit2Text(num_str, alt_one);
 }

 //  数字整体对应，可直接用于月份，日期，数值整数部分
-std::string TextNormalizer::MultiDigit2Text(const std::string &num_str, bool alt_one, bool use_zero) {
+std::string TextNormalizer::MultiDigit2Text(const std::string &num_str,
+                                            bool alt_one,
+                                            bool use_zero) {
    LOG(INFO) << "aaaaaaaaaaaaaaaa: " << alt_one << use_zero;
    if (alt_one) {
        digits_map["1"] = "幺";
@ -124,18 +146,22 @@ std::string TextNormalizer::MultiDigit2Text(const std::string &num_str, bool alt
        digits_map["1"] = "一";
    }

-    std::wstring result = utf8string2wstring(CreateTextValue(num_str, use_zero));
+    std::wstring result =
+        utf8string2wstring(CreateTextValue(num_str, use_zero));
    std::wstring result_0(1, result[0]);
    std::wstring result_1(1, result[1]);
    // 一十八 --> 十八
-    if ((result_0 == utf8string2wstring(digits_map["1"])) && (result_1 == utf8string2wstring(units_map[1]))) {
-        return wstring2utf8string(result.substr(1,result.length()));
+    if ((result_0 == utf8string2wstring(digits_map["1"])) &&
+        (result_1 == utf8string2wstring(units_map[1]))) {
+        return wstring2utf8string(result.substr(1, result.length()));
    } else {
        return wstring2utf8string(result);
    }
 }

-std::string TextNormalizer::MultiDigit2Text(const std::wstring &num, bool alt_one, bool use_zero) {
+std::string TextNormalizer::MultiDigit2Text(const std::wstring &num,
+                                            bool alt_one,
+                                            bool use_zero) {
    std::string num_str = wstring2utf8string(num);
    return MultiDigit2Text(num_str, alt_one, use_zero);
 }
@ -145,15 +171,20 @@ std::string TextNormalizer::Digits2Text(const std::string &num_str) {
    std::string text;
    std::vector<std::string> integer_decimal;
    integer_decimal = absl::StrSplit(num_str, ".");
-    
-    if(integer_decimal.size() == 1) {  // 整数
+
+    if (integer_decimal.size() == 1) {  // 整数
        text = MultiDigit2Text(integer_decimal[0]);
-    } else if(integer_decimal.size() == 2) {   // 小数
-        if(integer_decimal[0] == "") {  // 无整数的小数类型，例如：.22
-            text = "点" + SingleDigit2Text(std::string(absl::StripSuffix(integer_decimal[1], "0")).data());
+    } else if (integer_decimal.size() == 2) {  // 小数
+        if (integer_decimal[0] == "") {  // 无整数的小数类型，例如：.22
+            text = "点" +
+                   SingleDigit2Text(
+                       std::string(absl::StripSuffix(integer_decimal[1], "0"))
+                           .data());
        } else {  // 常规小数类型，例如：12.34
-            text = MultiDigit2Text(integer_decimal[0]) + "点" + \
-                   SingleDigit2Text(std::string(absl::StripSuffix(integer_decimal[1], "0")).data());
+            text = MultiDigit2Text(integer_decimal[0]) + "点" +
+                   SingleDigit2Text(
+                       std::string(absl::StripSuffix(integer_decimal[1], "0"))
+                           .data());
        }
    } else {
        return "The value does not conform to the numeric format";
@ -168,23 +199,28 @@ std::string TextNormalizer::Digits2Text(const std::wstring &num) {
 }

 // 日期，2021年8月18日 --> 二零二一年八月十八日
-int TextNormalizer::ReData(std::wstring &sentence) {
-    std::wregex reg(L"(\\d{4}|\\d{2})年((0?[1-9]|1[0-2])月)?(((0?[1-9])|((1|2)[0-9])|30|31)([日号]))?");
+int TextNormalizer::ReData(std::wstring *sentence) {
+    std::wregex reg(
+        L"(\\d{4}|\\d{2})年((0?[1-9]|1[0-2])月)?(((0?[1-9])|((1|2)[0-9])|30|31)"
+        L"([日号]))?");
    std::wsmatch match;
    std::string rep;

-    while (std::regex_search (sentence, match, reg)) {
+    while (std::regex_search(*sentence, match, reg)) {
        rep = "";
        rep += SingleDigit2Text(match[1]) + "年";
-        if(match[3] != L"") {
+        if (match[3] != L"") {
            rep += MultiDigit2Text(match[3], false, false) + "月";
        }
-        if(match[5] != L"") {
-            rep += MultiDigit2Text(match[5], false, false) + wstring2utf8string(match[9]);
+        if (match[5] != L"") {
+            rep += MultiDigit2Text(match[5], false, false) +
+                   wstring2utf8string(match[9]);
        }

-        Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep));
-
+        Replace(sentence,
+                match.position(0),
+                match.length(0),
+                utf8string2wstring(rep));
    }

    return 0;
@ -192,255 +228,301 @@ int TextNormalizer::ReData(std::wstring &sentence) {


 // XX-XX-XX or XX/XX/XX 例如：2021/08/18 --> 二零二一年八月十八日
-int TextNormalizer::ReData2(std::wstring &sentence) {
-    std::wregex reg(L"(\\d{4})([- /.])(0[1-9]|1[012])\\2(0[1-9]|[12][0-9]|3[01])");
+int TextNormalizer::ReData2(std::wstring *sentence) {
+    std::wregex reg(
+        L"(\\d{4})([- /.])(0[1-9]|1[012])\\2(0[1-9]|[12][0-9]|3[01])");
    std::wsmatch match;
    std::string rep;
-     
-    while (std::regex_search (sentence, match, reg)) {
+
+    while (std::regex_search(*sentence, match, reg)) {
        rep = "";
        rep += (SingleDigit2Text(match[1]) + "年");
        rep += (MultiDigit2Text(match[3], false, false) + "月");
        rep += (MultiDigit2Text(match[4], false, false) + "日");
-        Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep));
-
+        Replace(sentence,
+                match.position(0),
+                match.length(0),
+                utf8string2wstring(rep));
    }
-    
+
    return 0;
 }

 // XX:XX:XX   09:09:02 --> 九点零九分零二秒
-int TextNormalizer::ReTime(std::wstring &sentence) {
+int TextNormalizer::ReTime(std::wstring *sentence) {
    std::wregex reg(L"([0-1]?[0-9]|2[0-3]):([0-5][0-9])(:([0-5][0-9]))?");
    std::wsmatch match;
    std::string rep;
-    
-    while (std::regex_search (sentence, match, reg)) {
+
+    while (std::regex_search(*sentence, match, reg)) {
        rep = "";
        rep += (MultiDigit2Text(match[1], false, false) + "点");
-        if(absl::StartsWith(wstring2utf8string(match[2]), "0")) {
+        if (absl::StartsWith(wstring2utf8string(match[2]), "0")) {
            rep += "零";
        }
        rep += (MultiDigit2Text(match[2]) + "分");
-        if(absl::StartsWith(wstring2utf8string(match[4]), "0")) {
+        if (absl::StartsWith(wstring2utf8string(match[4]), "0")) {
            rep += "零";
        }
        rep += (MultiDigit2Text(match[4]) + "秒");

-        Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep));
+        Replace(sentence,
+                match.position(0),
+                match.length(0),
+                utf8string2wstring(rep));
    }

    return 0;
 }

 // 温度，例如：-24.3℃ --> 零下二十四点三度
-int TextNormalizer::ReTemperature(std::wstring &sentence) {
-    std::wregex reg(L"(-?)(\\d+(\\.\\d+)?)(°C|℃|度|摄氏度)"); 
+int TextNormalizer::ReTemperature(std::wstring *sentence) {
+    std::wregex reg(L"(-?)(\\d+(\\.\\d+)?)(°C|℃|度|摄氏度)");
    std::wsmatch match;
    std::string rep;
    std::string sign;
    std::vector<std::string> integer_decimal;
    std::string unit;

-    while (std::regex_search (sentence, match, reg)) {
+    while (std::regex_search(*sentence, match, reg)) {
        match[1] == L"-" ? sign = "负" : sign = "";
-        match[4] == L"摄氏度"? unit = "摄氏度" : unit = "度";
+        match[4] == L"摄氏度" ? unit = "摄氏度" : unit = "度";
        rep = sign + Digits2Text(match[2]) + unit;
-        
-        Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep));

+        Replace(sentence,
+                match.position(0),
+                match.length(0),
+                utf8string2wstring(rep));
    }

    return 0;
-
 }

 // 分数，例如： 1/3 --> 三分之一
-int TextNormalizer::ReFrac(std::wstring &sentence) {
-    std::wregex reg(L"(-?)(\\d+)/(\\d+)"); 
+int TextNormalizer::ReFrac(std::wstring *sentence) {
+    std::wregex reg(L"(-?)(\\d+)/(\\d+)");
    std::wsmatch match;
    std::string sign;
    std::string rep;
-    while (std::regex_search (sentence, match, reg)) {
+    while (std::regex_search(*sentence, match, reg)) {
        match[1] == L"-" ? sign = "负" : sign = "";
-        rep = sign + MultiDigit2Text(match[3]) + "分之" + MultiDigit2Text(match[2]);
-        Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep));
+        rep = sign + MultiDigit2Text(match[3]) + "分之" +
+              MultiDigit2Text(match[2]);
+        Replace(sentence,
+                match.position(0),
+                match.length(0),
+                utf8string2wstring(rep));
    }

    return 0;
 }

 // 百分数，例如：45.5% --> 百分之四十五点五
-int TextNormalizer::RePercentage(std::wstring &sentence) {
-    std::wregex reg(L"(-?)(\\d+(\\.\\d+)?)%"); 
+int TextNormalizer::RePercentage(std::wstring *sentence) {
+    std::wregex reg(L"(-?)(\\d+(\\.\\d+)?)%");
    std::wsmatch match;
    std::string sign;
    std::string rep;
    std::vector<std::string> integer_decimal;
-    
-    while (std::regex_search (sentence, match, reg)) {
+
+    while (std::regex_search(*sentence, match, reg)) {
        match[1] == L"-" ? sign = "负" : sign = "";
        rep = sign + "百分之" + Digits2Text(match[2]);
-         
-        Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep));
+
+        Replace(sentence,
+                match.position(0),
+                match.length(0),
+                utf8string2wstring(rep));
    }
-    
+
    return 0;
 }

 // 手机号码，例如：+86 18883862235 --> 八六幺八八八三八六二二三五
-int TextNormalizer::ReMobilePhone(std::wstring &sentence) {
-    std::wregex reg(L"(\\d)?((\\+?86 ?)?1([38]\\d|5[0-35-9]|7[678]|9[89])\\d{8})(\\d)?");
+int TextNormalizer::ReMobilePhone(std::wstring *sentence) {
+    std::wregex reg(
+        L"(\\d)?((\\+?86 ?)?1([38]\\d|5[0-35-9]|7[678]|9[89])\\d{8})(\\d)?");
    std::wsmatch match;
    std::string rep;
    std::vector<std::string> country_phonenum;

-    while (std::regex_search (sentence, match, reg)) {
+    while (std::regex_search(*sentence, match, reg)) {
        country_phonenum = absl::StrSplit(wstring2utf8string(match[0]), "+");
        rep = "";
-        for(int i = 0; i < country_phonenum.size(); i++) {
+        for (int i = 0; i < country_phonenum.size(); i++) {
            LOG(INFO) << country_phonenum[i];
            rep += SingleDigit2Text(country_phonenum[i], true);
        }
-        Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep));
-
+        Replace(sentence,
+                match.position(0),
+                match.length(0),
+                utf8string2wstring(rep));
    }
-    
+
    return 0;
 }

 // 座机号码，例如：010-51093154 --> 零幺零五幺零九三幺五四
-int TextNormalizer::RePhone(std::wstring &sentence) {
-    std::wregex reg(L"(\\d)?((0(10|2[1-3]|[3-9]\\d{2})-?)?[1-9]\\d{6,7})(\\d)?");
+int TextNormalizer::RePhone(std::wstring *sentence) {
+    std::wregex reg(
+        L"(\\d)?((0(10|2[1-3]|[3-9]\\d{2})-?)?[1-9]\\d{6,7})(\\d)?");
    std::wsmatch match;
    std::vector<std::string> zone_phonenum;
    std::string rep;

-    while (std::regex_search (sentence, match, reg)) {
+    while (std::regex_search(*sentence, match, reg)) {
        rep = "";
        zone_phonenum = absl::StrSplit(wstring2utf8string(match[0]), "-");
-        for(int i = 0; i < zone_phonenum.size(); i ++) {
+        for (int i = 0; i < zone_phonenum.size(); i++) {
            rep += SingleDigit2Text(zone_phonenum[i], true);
        }
-        Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep));
+        Replace(sentence,
+                match.position(0),
+                match.length(0),
+                utf8string2wstring(rep));
    }

    return 0;
 }

 // 范围，例如：60~90 --> 六十到九十
-int TextNormalizer::ReRange(std::wstring &sentence) {
-    std::wregex reg(L"((-?)((\\d+)(\\.\\d+)?)|(\\.(\\d+)))[-~]((-?)((\\d+)(\\.\\d+)?)|(\\.(\\d+)))");
+int TextNormalizer::ReRange(std::wstring *sentence) {
+    std::wregex reg(
+        L"((-?)((\\d+)(\\.\\d+)?)|(\\.(\\d+)))[-~]((-?)((\\d+)(\\.\\d+)?)|(\\.("
+        L"\\d+)))");
    std::wsmatch match;
    std::string rep;
    std::string sign1;
    std::string sign2;

-    while (std::regex_search (sentence, match, reg)) {
+    while (std::regex_search(*sentence, match, reg)) {
        rep = "";
        match[2] == L"-" ? sign1 = "负" : sign1 = "";
-        if(match[6] != L"") {
+        if (match[6] != L"") {
            rep += sign1 + Digits2Text(match[6]) + "到";
        } else {
            rep += sign1 + Digits2Text(match[3]) + "到";
        }
        match[9] == L"-" ? sign2 = "负" : sign2 = "";
-        if(match[13] != L"") {
+        if (match[13] != L"") {
            rep += sign2 + Digits2Text(match[13]);
        } else {
            rep += sign2 + Digits2Text(match[10]);
        }

-        Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep));
+        Replace(sentence,
+                match.position(0),
+                match.length(0),
+                utf8string2wstring(rep));
    }

    return 0;
 }

 // 带负号的整数，例如：-10 --> 负十
-int TextNormalizer::ReInterger(std::wstring &sentence) {
-    std::wregex reg(L"(-)(\\d+)"); 
+int TextNormalizer::ReInterger(std::wstring *sentence) {
+    std::wregex reg(L"(-)(\\d+)");
    std::wsmatch match;
    std::string rep;
-    while (std::regex_search (sentence, match, reg)) {
+    while (std::regex_search(*sentence, match, reg)) {
        rep = "负" + MultiDigit2Text(match[2]);
-        Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep));
+        Replace(sentence,
+                match.position(0),
+                match.length(0),
+                utf8string2wstring(rep));
    }
-    
+
    return 0;
 }

 // 纯小数
-int TextNormalizer::ReDecimalNum(std::wstring &sentence) {
-    std::wregex reg(L"(-?)((\\d+)(\\.\\d+))|(\\.(\\d+))"); 
+int TextNormalizer::ReDecimalNum(std::wstring *sentence) {
+    std::wregex reg(L"(-?)((\\d+)(\\.\\d+))|(\\.(\\d+))");
    std::wsmatch match;
    std::string sign;
    std::string rep;
-    //std::vector<std::string> integer_decimal;
-    while (std::regex_search (sentence, match, reg)) {
+    // std::vector<std::string> integer_decimal;
+    while (std::regex_search(*sentence, match, reg)) {
        match[1] == L"-" ? sign = "负" : sign = "";
-        if(match[5] != L"") {
+        if (match[5] != L"") {
            rep = sign + Digits2Text(match[5]);
        } else {
            rep = sign + Digits2Text(match[2]);
        }

-        Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep));
+        Replace(sentence,
+                match.position(0),
+                match.length(0),
+                utf8string2wstring(rep));
    }

    return 0;
 }

 // 正整数 + 量词
-int TextNormalizer::RePositiveQuantifiers(std::wstring &sentence) {
-    std::wstring common_quantifiers = L"(朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲| \
-    墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|针|线|管|名|位|身|堂| \
-    课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|毫|厘|(公)分|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘| \
-    毫|微)米|米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日| \
-    季|刻|时|周|天|秒|分|旬|纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|元|(亿|千万|百万| \
-    万|千|百)|(亿|千万|百万|万|千|百|美|)元|(亿|千万|百万|万|千|百|)块|角|毛|分)";
-    std::wregex reg(L"(\\d+)([多余几])?" + common_quantifiers); 
+int TextNormalizer::RePositiveQuantifiers(std::wstring *sentence) {
+    std::wstring common_quantifiers =
+        L"(朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|"
+        L"担|颗|壳|窠|曲|墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|"
+        L"溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|针|线|管|名|位|身|堂|课|"
+        L"本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|"
+        L"毫|厘|(公)分|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|米|撮|勺|"
+        L"合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|"
+        L"卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|旬|纪|岁|世|更|"
+        L"夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|"
+        L"元|(亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|美|)元|(亿|千万|"
+        L"百万|万|千|百|)块|角|毛|分)";
+    std::wregex reg(L"(\\d+)([多余几])?" + common_quantifiers);
    std::wsmatch match;
    std::string rep;
-    while (std::regex_search (sentence, match, reg)) {
+    while (std::regex_search(*sentence, match, reg)) {
        rep = MultiDigit2Text(match[1]);
-        Replace(sentence, match.position(1), match.length(1), utf8string2wstring(rep));
+        Replace(sentence,
+                match.position(1),
+                match.length(1),
+                utf8string2wstring(rep));
    }

    return 0;
 }

 // 编号类数字，例如： 89757 --> 八九七五七
-int TextNormalizer::ReDefalutNum(std::wstring &sentence) {
-    std::wregex reg(L"\\d{3}\\d*"); 
+int TextNormalizer::ReDefalutNum(std::wstring *sentence) {
+    std::wregex reg(L"\\d{3}\\d*");
    std::wsmatch match;
-    while (std::regex_search (sentence, match, reg)) {
-        Replace(sentence, match.position(0), match.length(0), utf8string2wstring(SingleDigit2Text(match[0])));
+    while (std::regex_search(*sentence, match, reg)) {
+        Replace(sentence,
+                match.position(0),
+                match.length(0),
+                utf8string2wstring(SingleDigit2Text(match[0])));
    }

    return 0;
 }

-int TextNormalizer::ReNumber(std::wstring &sentence) {
-    std::wregex reg(L"(-?)((\\d+)(\\.\\d+)?)|(\\.(\\d+))"); 
+int TextNormalizer::ReNumber(std::wstring *sentence) {
+    std::wregex reg(L"(-?)((\\d+)(\\.\\d+)?)|(\\.(\\d+))");
    std::wsmatch match;
    std::string sign;
    std::string rep;
-    while (std::regex_search (sentence, match, reg)) {
+    while (std::regex_search(*sentence, match, reg)) {
        match[1] == L"-" ? sign = "负" : sign = "";
-        if(match[5] != L"") {
+        if (match[5] != L"") {
            rep = sign + Digits2Text(match[5]);
        } else {
            rep = sign + Digits2Text(match[2]);
        }
-        
-        Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep));
+
+        Replace(sentence,
+                match.position(0),
+                match.length(0),
+                utf8string2wstring(rep));
    }
    return 0;
 }

 // 整体正则，按顺序
-int TextNormalizer::SentenceNormalize(std::wstring &sentence) {
+int TextNormalizer::SentenceNormalize(std::wstring *sentence) {
    ReData(sentence);
    ReData2(sentence);
    ReTime(sentence);
@ -452,11 +534,9 @@ int TextNormalizer::SentenceNormalize(std::wstring &sentence) {
    ReRange(sentence);
    ReInterger(sentence);
    ReDecimalNum(sentence);
-    RePositiveQuantifiers(sentence);  
+    RePositiveQuantifiers(sentence);
    ReDefalutNum(sentence);
    ReNumber(sentence);
-    return 0;   
+    return 0;
 }
-
-
-}
+}  // namespace ppspeech
--- a/demos/TTSCppFrontend/src/front/text_normalize.h
+++ b/demos/TTSCppFrontend/src/front/text_normalize.h
@ -1,11 +1,24 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #ifndef PADDLE_TTS_SERVING_FRONT_TEXT_NORMALIZE_H
 #define PADDLE_TTS_SERVING_FRONT_TEXT_NORMALIZE_H

+#include <glog/logging.h>
+#include <codecvt>
 #include <map>
 #include <regex>
 #include <string>
-#include <codecvt>
-#include <glog/logging.h>
 #include "absl/strings/str_split.h"
 #include "absl/strings/strip.h"
 #include "base/type_conv.h"
@ -13,50 +26,52 @@
 namespace ppspeech {

 class TextNormalizer {
-public:
-    TextNormalizer() {
-        InitMap();
-    }
-    ~TextNormalizer() {
-
-    }
+  public:
+    TextNormalizer() { InitMap(); }
+    ~TextNormalizer() {}

    int InitMap();
-    int Replace(std::wstring &sentence, const int &pos, const int &len, const std::wstring &repstr);
-    int SplitByPunc(const std::wstring &sentence, std::vector<std::wstring> &sentence_part);
+    int Replace(std::wstring *sentence,
+                const int &pos,
+                const int &len,
+                const std::wstring &repstr);
+    int SplitByPunc(const std::wstring &sentence,
+                    std::vector<std::wstring> *sentence_part);

-    std::string CreateTextValue(const std::string &num,  bool use_zero=true);
-    std::string SingleDigit2Text(const std::string &num_str, bool alt_one = false);
+    std::string CreateTextValue(const std::string &num, bool use_zero = true);
+    std::string SingleDigit2Text(const std::string &num_str,
+                                 bool alt_one = false);
    std::string SingleDigit2Text(const std::wstring &num, bool alt_one = false);
-    std::string MultiDigit2Text(const std::string &num_str, bool alt_one = false, bool use_zero = true);
-    std::string MultiDigit2Text(const std::wstring &num, bool alt_one = false, bool use_zero = true);
+    std::string MultiDigit2Text(const std::string &num_str,
+                                bool alt_one = false,
+                                bool use_zero = true);
+    std::string MultiDigit2Text(const std::wstring &num,
+                                bool alt_one = false,
+                                bool use_zero = true);
    std::string Digits2Text(const std::string &num_str);
    std::string Digits2Text(const std::wstring &num);

-    int ReData(std::wstring &sentence);
-    int ReData2(std::wstring &sentence);
-    int ReTime(std::wstring &sentence);
-    int ReTemperature(std::wstring &sentence);
-    int ReFrac(std::wstring &sentence);
-    int RePercentage(std::wstring &sentence);
-    int ReMobilePhone(std::wstring &sentence);
-    int RePhone(std::wstring &sentence);
-    int ReRange(std::wstring &sentence);
-    int ReInterger(std::wstring &sentence);
-    int ReDecimalNum(std::wstring &sentence);
-    int RePositiveQuantifiers(std::wstring &sentence);
-    int ReDefalutNum(std::wstring &sentence);
-    int ReNumber(std::wstring &sentence);
-    int SentenceNormalize(std::wstring &sentence);
-
-
-private:
-    std::map<std::string, std::string> digits_map;
-    std::map<int, std::string> units_map;
+    int ReData(std::wstring *sentence);
+    int ReData2(std::wstring *sentence);
+    int ReTime(std::wstring *sentence);
+    int ReTemperature(std::wstring *sentence);
+    int ReFrac(std::wstring *sentence);
+    int RePercentage(std::wstring *sentence);
+    int ReMobilePhone(std::wstring *sentence);
+    int RePhone(std::wstring *sentence);
+    int ReRange(std::wstring *sentence);
+    int ReInterger(std::wstring *sentence);
+    int ReDecimalNum(std::wstring *sentence);
+    int RePositiveQuantifiers(std::wstring *sentence);
+    int ReDefalutNum(std::wstring *sentence);
+    int ReNumber(std::wstring *sentence);
+    int SentenceNormalize(std::wstring *sentence);


+  private:
+    std::map<std::string, std::string> digits_map;
+    std::map<int, std::string> units_map;
 };
-
-}
+}  // namespace ppspeech

 #endif