diff --git a/demos/TTSCppFrontend/front_demo/front_demo.cpp b/demos/TTSCppFrontend/front_demo/front_demo.cpp index a0bbd5913..19f16758b 100644 --- a/demos/TTSCppFrontend/front_demo/front_demo.cpp +++ b/demos/TTSCppFrontend/front_demo/front_demo.cpp @@ -60,7 +60,7 @@ int main(int argc, char** argv) { for (int i = 0; i < sentence_part.size(); i++) { LOG(INFO) << "Raw sentence is: " << ppspeech::wstring2utf8string(sentence_part[i]); - front_inst->SentenceNormalize(sentence_part[i]); + front_inst->SentenceNormalize(&sentence_part[i]); s_sentence = ppspeech::wstring2utf8string(sentence_part[i]); LOG(INFO) << "After normalization sentence is: " << s_sentence; diff --git a/demos/TTSCppFrontend/src/front/text_normalize.cpp b/demos/TTSCppFrontend/src/front/text_normalize.cpp index 0d8e33e33..8420e8407 100644 --- a/demos/TTSCppFrontend/src/front/text_normalize.cpp +++ b/demos/TTSCppFrontend/src/front/text_normalize.cpp @@ -199,14 +199,14 @@ std::string TextNormalizer::Digits2Text(const std::wstring &num) { } // 日期,2021年8月18日 --> 二零二一年八月十八日 -int TextNormalizer::ReData(std::wstring &sentence) { +int TextNormalizer::ReData(std::wstring *sentence) { std::wregex reg( L"(\\d{4}|\\d{2})年((0?[1-9]|1[0-2])月)?(((0?[1-9])|((1|2)[0-9])|30|31)" L"([日号]))?"); std::wsmatch match; std::string rep; - while (std::regex_search(sentence, match, reg)) { + while (std::regex_search(*sentence, match, reg)) { rep = ""; rep += SingleDigit2Text(match[1]) + "年"; if (match[3] != L"") { @@ -217,7 +217,7 @@ int TextNormalizer::ReData(std::wstring &sentence) { wstring2utf8string(match[9]); } - Replace(&sentence, + Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep)); @@ -228,18 +228,18 @@ int TextNormalizer::ReData(std::wstring &sentence) { // XX-XX-XX or XX/XX/XX 例如:2021/08/18 --> 二零二一年八月十八日 -int TextNormalizer::ReData2(std::wstring &sentence) { +int TextNormalizer::ReData2(std::wstring *sentence) { std::wregex reg( L"(\\d{4})([- /.])(0[1-9]|1[012])\\2(0[1-9]|[12][0-9]|3[01])"); std::wsmatch match; std::string rep; - while (std::regex_search(sentence, match, reg)) { + while (std::regex_search(*sentence, match, reg)) { rep = ""; rep += (SingleDigit2Text(match[1]) + "年"); rep += (MultiDigit2Text(match[3], false, false) + "月"); rep += (MultiDigit2Text(match[4], false, false) + "日"); - Replace(&sentence, + Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep)); @@ -249,12 +249,12 @@ int TextNormalizer::ReData2(std::wstring &sentence) { } // XX:XX:XX 09:09:02 --> 九点零九分零二秒 -int TextNormalizer::ReTime(std::wstring &sentence) { +int TextNormalizer::ReTime(std::wstring *sentence) { std::wregex reg(L"([0-1]?[0-9]|2[0-3]):([0-5][0-9])(:([0-5][0-9]))?"); std::wsmatch match; std::string rep; - while (std::regex_search(sentence, match, reg)) { + while (std::regex_search(*sentence, match, reg)) { rep = ""; rep += (MultiDigit2Text(match[1], false, false) + "点"); if (absl::StartsWith(wstring2utf8string(match[2]), "0")) { @@ -266,7 +266,7 @@ int TextNormalizer::ReTime(std::wstring &sentence) { } rep += (MultiDigit2Text(match[4]) + "秒"); - Replace(&sentence, + Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep)); @@ -276,7 +276,7 @@ int TextNormalizer::ReTime(std::wstring &sentence) { } // 温度,例如:-24.3℃ --> 零下二十四点三度 -int TextNormalizer::ReTemperature(std::wstring &sentence) { +int TextNormalizer::ReTemperature(std::wstring *sentence) { std::wregex reg(L"(-?)(\\d+(\\.\\d+)?)(°C|℃|度|摄氏度)"); std::wsmatch match; std::string rep; @@ -284,12 +284,12 @@ int TextNormalizer::ReTemperature(std::wstring &sentence) { std::vector integer_decimal; std::string unit; - while (std::regex_search(sentence, match, reg)) { + while (std::regex_search(*sentence, match, reg)) { match[1] == L"-" ? sign = "负" : sign = ""; match[4] == L"摄氏度" ? unit = "摄氏度" : unit = "度"; rep = sign + Digits2Text(match[2]) + unit; - Replace(&sentence, + Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep)); @@ -299,16 +299,16 @@ int TextNormalizer::ReTemperature(std::wstring &sentence) { } // 分数,例如: 1/3 --> 三分之一 -int TextNormalizer::ReFrac(std::wstring &sentence) { +int TextNormalizer::ReFrac(std::wstring *sentence) { std::wregex reg(L"(-?)(\\d+)/(\\d+)"); std::wsmatch match; std::string sign; std::string rep; - while (std::regex_search(sentence, match, reg)) { + while (std::regex_search(*sentence, match, reg)) { match[1] == L"-" ? sign = "负" : sign = ""; rep = sign + MultiDigit2Text(match[3]) + "分之" + MultiDigit2Text(match[2]); - Replace(&sentence, + Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep)); @@ -318,18 +318,18 @@ int TextNormalizer::ReFrac(std::wstring &sentence) { } // 百分数,例如:45.5% --> 百分之四十五点五 -int TextNormalizer::RePercentage(std::wstring &sentence) { +int TextNormalizer::RePercentage(std::wstring *sentence) { std::wregex reg(L"(-?)(\\d+(\\.\\d+)?)%"); std::wsmatch match; std::string sign; std::string rep; std::vector integer_decimal; - while (std::regex_search(sentence, match, reg)) { + while (std::regex_search(*sentence, match, reg)) { match[1] == L"-" ? sign = "负" : sign = ""; rep = sign + "百分之" + Digits2Text(match[2]); - Replace(&sentence, + Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep)); @@ -339,21 +339,21 @@ int TextNormalizer::RePercentage(std::wstring &sentence) { } // 手机号码,例如:+86 18883862235 --> 八六幺八八八三八六二二三五 -int TextNormalizer::ReMobilePhone(std::wstring &sentence) { +int TextNormalizer::ReMobilePhone(std::wstring *sentence) { std::wregex reg( L"(\\d)?((\\+?86 ?)?1([38]\\d|5[0-35-9]|7[678]|9[89])\\d{8})(\\d)?"); std::wsmatch match; std::string rep; std::vector country_phonenum; - while (std::regex_search(sentence, match, reg)) { + while (std::regex_search(*sentence, match, reg)) { country_phonenum = absl::StrSplit(wstring2utf8string(match[0]), "+"); rep = ""; for (int i = 0; i < country_phonenum.size(); i++) { LOG(INFO) << country_phonenum[i]; rep += SingleDigit2Text(country_phonenum[i], true); } - Replace(&sentence, + Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep)); @@ -363,20 +363,20 @@ int TextNormalizer::ReMobilePhone(std::wstring &sentence) { } // 座机号码,例如:010-51093154 --> 零幺零五幺零九三幺五四 -int TextNormalizer::RePhone(std::wstring &sentence) { +int TextNormalizer::RePhone(std::wstring *sentence) { std::wregex reg( L"(\\d)?((0(10|2[1-3]|[3-9]\\d{2})-?)?[1-9]\\d{6,7})(\\d)?"); std::wsmatch match; std::vector zone_phonenum; std::string rep; - while (std::regex_search(sentence, match, reg)) { + while (std::regex_search(*sentence, match, reg)) { rep = ""; zone_phonenum = absl::StrSplit(wstring2utf8string(match[0]), "-"); for (int i = 0; i < zone_phonenum.size(); i++) { rep += SingleDigit2Text(zone_phonenum[i], true); } - Replace(&sentence, + Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep)); @@ -386,7 +386,7 @@ int TextNormalizer::RePhone(std::wstring &sentence) { } // 范围,例如:60~90 --> 六十到九十 -int TextNormalizer::ReRange(std::wstring &sentence) { +int TextNormalizer::ReRange(std::wstring *sentence) { std::wregex reg( L"((-?)((\\d+)(\\.\\d+)?)|(\\.(\\d+)))[-~]((-?)((\\d+)(\\.\\d+)?)|(\\.(" L"\\d+)))"); @@ -395,7 +395,7 @@ int TextNormalizer::ReRange(std::wstring &sentence) { std::string sign1; std::string sign2; - while (std::regex_search(sentence, match, reg)) { + while (std::regex_search(*sentence, match, reg)) { rep = ""; match[2] == L"-" ? sign1 = "负" : sign1 = ""; if (match[6] != L"") { @@ -410,7 +410,7 @@ int TextNormalizer::ReRange(std::wstring &sentence) { rep += sign2 + Digits2Text(match[10]); } - Replace(&sentence, + Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep)); @@ -420,13 +420,13 @@ int TextNormalizer::ReRange(std::wstring &sentence) { } // 带负号的整数,例如:-10 --> 负十 -int TextNormalizer::ReInterger(std::wstring &sentence) { +int TextNormalizer::ReInterger(std::wstring *sentence) { std::wregex reg(L"(-)(\\d+)"); std::wsmatch match; std::string rep; - while (std::regex_search(sentence, match, reg)) { + while (std::regex_search(*sentence, match, reg)) { rep = "负" + MultiDigit2Text(match[2]); - Replace(&sentence, + Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep)); @@ -436,13 +436,13 @@ int TextNormalizer::ReInterger(std::wstring &sentence) { } // 纯小数 -int TextNormalizer::ReDecimalNum(std::wstring &sentence) { +int TextNormalizer::ReDecimalNum(std::wstring *sentence) { std::wregex reg(L"(-?)((\\d+)(\\.\\d+))|(\\.(\\d+))"); std::wsmatch match; std::string sign; std::string rep; // std::vector integer_decimal; - while (std::regex_search(sentence, match, reg)) { + while (std::regex_search(*sentence, match, reg)) { match[1] == L"-" ? sign = "负" : sign = ""; if (match[5] != L"") { rep = sign + Digits2Text(match[5]); @@ -450,7 +450,7 @@ int TextNormalizer::ReDecimalNum(std::wstring &sentence) { rep = sign + Digits2Text(match[2]); } - Replace(&sentence, + Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep)); @@ -460,7 +460,7 @@ int TextNormalizer::ReDecimalNum(std::wstring &sentence) { } // 正整数 + 量词 -int TextNormalizer::RePositiveQuantifiers(std::wstring &sentence) { +int TextNormalizer::RePositiveQuantifiers(std::wstring *sentence) { std::wstring common_quantifiers = L"(朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|" L"担|颗|壳|窠|曲|墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|" @@ -475,9 +475,9 @@ int TextNormalizer::RePositiveQuantifiers(std::wstring &sentence) { std::wregex reg(L"(\\d+)([多余几])?" + common_quantifiers); std::wsmatch match; std::string rep; - while (std::regex_search(sentence, match, reg)) { + while (std::regex_search(*sentence, match, reg)) { rep = MultiDigit2Text(match[1]); - Replace(&sentence, + Replace(sentence, match.position(1), match.length(1), utf8string2wstring(rep)); @@ -487,11 +487,11 @@ int TextNormalizer::RePositiveQuantifiers(std::wstring &sentence) { } // 编号类数字,例如: 89757 --> 八九七五七 -int TextNormalizer::ReDefalutNum(std::wstring &sentence) { +int TextNormalizer::ReDefalutNum(std::wstring *sentence) { std::wregex reg(L"\\d{3}\\d*"); std::wsmatch match; - while (std::regex_search(sentence, match, reg)) { - Replace(&sentence, + while (std::regex_search(*sentence, match, reg)) { + Replace(sentence, match.position(0), match.length(0), utf8string2wstring(SingleDigit2Text(match[0]))); @@ -500,12 +500,12 @@ int TextNormalizer::ReDefalutNum(std::wstring &sentence) { return 0; } -int TextNormalizer::ReNumber(std::wstring &sentence) { +int TextNormalizer::ReNumber(std::wstring *sentence) { std::wregex reg(L"(-?)((\\d+)(\\.\\d+)?)|(\\.(\\d+))"); std::wsmatch match; std::string sign; std::string rep; - while (std::regex_search(sentence, match, reg)) { + while (std::regex_search(*sentence, match, reg)) { match[1] == L"-" ? sign = "负" : sign = ""; if (match[5] != L"") { rep = sign + Digits2Text(match[5]); @@ -513,7 +513,7 @@ int TextNormalizer::ReNumber(std::wstring &sentence) { rep = sign + Digits2Text(match[2]); } - Replace(&sentence, + Replace(sentence, match.position(0), match.length(0), utf8string2wstring(rep)); @@ -522,7 +522,7 @@ int TextNormalizer::ReNumber(std::wstring &sentence) { } // 整体正则,按顺序 -int TextNormalizer::SentenceNormalize(std::wstring &sentence) { +int TextNormalizer::SentenceNormalize(std::wstring *sentence) { ReData(sentence); ReData2(sentence); ReTime(sentence); diff --git a/demos/TTSCppFrontend/src/front/text_normalize.h b/demos/TTSCppFrontend/src/front/text_normalize.h index d80d39559..4383fa1b4 100644 --- a/demos/TTSCppFrontend/src/front/text_normalize.h +++ b/demos/TTSCppFrontend/src/front/text_normalize.h @@ -51,21 +51,21 @@ class TextNormalizer { std::string Digits2Text(const std::string &num_str); std::string Digits2Text(const std::wstring &num); - int ReData(std::wstring &sentence); - int ReData2(std::wstring &sentence); - int ReTime(std::wstring &sentence); - int ReTemperature(std::wstring &sentence); - int ReFrac(std::wstring &sentence); - int RePercentage(std::wstring &sentence); - int ReMobilePhone(std::wstring &sentence); - int RePhone(std::wstring &sentence); - int ReRange(std::wstring &sentence); - int ReInterger(std::wstring &sentence); - int ReDecimalNum(std::wstring &sentence); - int RePositiveQuantifiers(std::wstring &sentence); - int ReDefalutNum(std::wstring &sentence); - int ReNumber(std::wstring &sentence); - int SentenceNormalize(std::wstring &sentence); + int ReData(std::wstring *sentence); + int ReData2(std::wstring *sentence); + int ReTime(std::wstring *sentence); + int ReTemperature(std::wstring *sentence); + int ReFrac(std::wstring *sentence); + int RePercentage(std::wstring *sentence); + int ReMobilePhone(std::wstring *sentence); + int RePhone(std::wstring *sentence); + int ReRange(std::wstring *sentence); + int ReInterger(std::wstring *sentence); + int ReDecimalNum(std::wstring *sentence); + int RePositiveQuantifiers(std::wstring *sentence); + int ReDefalutNum(std::wstring *sentence); + int ReNumber(std::wstring *sentence); + int SentenceNormalize(std::wstring *sentence); private: