From ab4217c2e421521442bdfabfe653fa06c4c4a594 Mon Sep 17 00:00:00 2001 From: jlqian98 <49509499+jlqian98@users.noreply.github.com> Date: Wed, 22 Mar 2023 17:42:24 +0800 Subject: [PATCH] [Engine] add TN/ITN functions (#3047) * add AddBlk, ReverseFrac function * rename text processing functions --- runtime/engine/common/utils/text_process.cc | 74 +++++++++++++++++++ runtime/engine/common/utils/text_process.h | 13 ++++ .../engine/common/utils/text_process_test.cc | 47 ++++++++++++ 3 files changed, 134 insertions(+) create mode 100644 runtime/engine/common/utils/text_process.cc create mode 100644 runtime/engine/common/utils/text_process.h create mode 100644 runtime/engine/common/utils/text_process_test.cc diff --git a/runtime/engine/common/utils/text_process.cc b/runtime/engine/common/utils/text_process.cc new file mode 100644 index 00000000..f9ade280 --- /dev/null +++ b/runtime/engine/common/utils/text_process.cc @@ -0,0 +1,74 @@ +#include "utils/text_process.h" + +namespace ppspeech { + +std::string DelBlank(const std::string& str) { + std::string out = ""; + int ptr_in = 0; // the pointer of input string (for traversal) + int end = str.size(); + int ptr_out = -1; // the pointer of output string (last char) + while (ptr_in != end) { + while (ptr_in != end && str[ptr_in] == ' ') { + ptr_in += 1; + } + if (ptr_in == end) + return out; + if (ptr_out != -1 && isalpha(str[ptr_in]) && isalpha(str[ptr_out]) && str[ptr_in-1] == ' ') + // add a space when the last and current chars are in English and there have space(s) between them + out += ' '; + out += str[ptr_in]; + ptr_out = ptr_in; + ptr_in += 1; + } + return out; +} + +std::string AddBlank(const std::string& str) { + std::string out = ""; + int ptr = 0; // the pointer of the input string + int end = str.size(); + while (ptr != end) { + if (isalpha(str[ptr])) { + if (ptr == 0 or str[ptr-1] != ' ') + out += " "; // add pre-space for an English word + while (isalpha(str[ptr])) { + out += str[ptr]; + ptr += 1; + } + out += " "; // add post-space for an English word + } else { + out += str[ptr]; + ptr += 1; + } + } + return out; +} + +std::string ReverseFraction(const std::string& str) { + std::string out = ""; + int ptr = 0; // the pointer of the input string + int end = str.size(); + int left, right, frac; // the start index of the left tag, right tag and '/'. + left = right = frac = 0; + int len_tag = 5; // length of "" + + while (ptr != end) { + // find the position of left tag, right tag and '/'. (xxxnum1/num2) + left = str.find("", ptr); + if (left == -1) + break; + out += str.substr(ptr, left - ptr); // content before left tag (xxx) + frac = str.find("/", left); + right = str.find("", frac); + + out += str.substr(frac + 1, right - frac - 1) + '/' + + str.substr(left + len_tag, frac - left - len_tag); // num2/num1 + ptr = right + len_tag; + } + if (ptr != end) { + out += str.substr(ptr, end - ptr); + } + return out; +} + +} // namespace ppspeech \ No newline at end of file diff --git a/runtime/engine/common/utils/text_process.h b/runtime/engine/common/utils/text_process.h new file mode 100644 index 00000000..9dbc5322 --- /dev/null +++ b/runtime/engine/common/utils/text_process.h @@ -0,0 +1,13 @@ +#include +#include +#include + +namespace ppspeech { + +std::string DelBlank(const std::string& str); + +std::string AddBlank(const std::string& str); + +std::string ReverseFraction(const std::string& str); + +} // namespace ppspeech \ No newline at end of file diff --git a/runtime/engine/common/utils/text_process_test.cc b/runtime/engine/common/utils/text_process_test.cc new file mode 100644 index 00000000..e6da8259 --- /dev/null +++ b/runtime/engine/common/utils/text_process_test.cc @@ -0,0 +1,47 @@ +#include "utils/text_process.h" + +#include +#include + +TEST(TextProcess, DelBlankTest) { + std::string test_str = "我 今天 去 了 超市 花了 120 元。"; + std::string out_str = ppspeech::DelBlank(test_str); + int ret = out_str.compare("我今天去了超市花了120元。"); + EXPECT_EQ(ret, 0); + + test_str = "how are you today"; + out_str = ppspeech::DelBlank(test_str); + ret = out_str.compare("how are you today"); + EXPECT_EQ(ret, 0); + + test_str = "我 的 paper 在 哪里?"; + out_str = ppspeech::DelBlank(test_str); + ret = out_str.compare("我的paper在哪里?"); + EXPECT_EQ(ret, 0); +} + +TEST(TextProcess, AddBlankTest) { + std::string test_str = "how are you"; + std::string out_str = ppspeech::AddBlank(test_str); + int ret = out_str.compare(" how are you "); + EXPECT_EQ(ret, 0); + + test_str = "欢迎来到China。"; + out_str = ppspeech::AddBlank(test_str); + ret = out_str.compare("欢迎来到 China 。"); + EXPECT_EQ(ret, 0); +} + +TEST(TextProcess, ReverseFractionTest) { + std::string test_str = "3/1"; + std::string out_str = ppspeech::ReverseFraction(test_str); + int ret = out_str.compare("1/3"); + std::cout<