diff --git a/runtime/engine/common/utils/blank_process.cc b/runtime/engine/common/utils/blank_process.cc deleted file mode 100644 index d45eefcdb..000000000 --- a/runtime/engine/common/utils/blank_process.cc +++ /dev/null @@ -1,26 +0,0 @@ -#include "utils/blank_process.h" - -namespace ppspeech { - -std::string RemoveBlk(const std::string& str) { - std::string out = ""; - int ptr_in = 0; // the pointer of input string (for traversal) - int end = str.size(); - int ptr_out = -1; // the pointer of output string (last char) - while (ptr_in != end) { - while (ptr_in != end && str[ptr_in] == ' ') { - ptr_in += 1; - } - if (ptr_in == end) - return out; - if (ptr_out != -1 && isalpha(str[ptr_in]) && isalpha(str[ptr_out]) && str[ptr_in-1] == ' ') - // add a space when the last and current chars are in English and there have space(s) between them - out += ' '; - out += str[ptr_in]; - ptr_out = ptr_in; - ptr_in += 1; - } - return out; -} - -} // namespace ppspeech \ No newline at end of file diff --git a/runtime/engine/common/utils/blank_process.h b/runtime/engine/common/utils/blank_process.h deleted file mode 100644 index 84952651b..000000000 --- a/runtime/engine/common/utils/blank_process.h +++ /dev/null @@ -1,9 +0,0 @@ -#include -#include -#include - -namespace ppspeech { - -std::string BlankProcess(const std::string& str); - -} // namespace ppspeech \ No newline at end of file diff --git a/runtime/engine/common/utils/blank_process_test.cc b/runtime/engine/common/utils/blank_process_test.cc deleted file mode 100644 index 75f762ae6..000000000 --- a/runtime/engine/common/utils/blank_process_test.cc +++ /dev/null @@ -1,26 +0,0 @@ -#include "utils/blank_process.h" - -#include -#include - -TEST(BlankProcess, BlankProcessTest) { - std::string test_str = "我 今天 去 了 超市 花了 120 元。"; - std::string out_str = ppspeech::BlankProcess(test_str); - int ret = out_str.compare("我今天去了超市花了120元。"); - EXPECT_EQ(ret, 0); - - test_str = "how are you today"; - out_str = ppspeech::BlankProcess(test_str); - ret = out_str.compare("how are you today"); - EXPECT_EQ(ret, 0); - - test_str = "我 的 paper 在 哪里?"; - out_str = ppspeech::BlankProcess(test_str); - ret = out_str.compare("我的paper在哪里?"); - EXPECT_EQ(ret, 0); - - test_str = "我 今天 去 了 超市 花了 120 元。"; - out_str = ppspeech::BlankProcess(test_str); - ret = out_str.compare("我今天去了超市花了120元。"); - EXPECT_EQ(ret, 0); -} \ No newline at end of file diff --git a/runtime/engine/common/utils/text_process.cc b/runtime/engine/common/utils/text_process.cc new file mode 100644 index 000000000..bcaffd7ae --- /dev/null +++ b/runtime/engine/common/utils/text_process.cc @@ -0,0 +1,77 @@ +#include "utils/text_process.h" + +namespace ppspeech { + +std::string RemoveBlk(const std::string& str) { + std::string out = ""; + int ptr_in = 0; // the pointer of input string (for traversal) + int end = str.size(); + int ptr_out = -1; // the pointer of output string (last char) + while (ptr_in != end) { + while (ptr_in != end && str[ptr_in] == ' ') { + ptr_in += 1; + } + if (ptr_in == end) + return out; + if (ptr_out != -1 && isalpha(str[ptr_in]) && isalpha(str[ptr_out]) && str[ptr_in-1] == ' ') + // add a space when the last and current chars are in English and there have space(s) between them + out += ' '; + out += str[ptr_in]; + ptr_out = ptr_in; + ptr_in += 1; + } + return out; +} + +std::string AddBlk(const std::string& str) { + std::string out = ""; + int ptr = 0; // the pointer of the input string + int end = str.size(); + while (ptr != end) { + if (isalpha(str[ptr])) { + if (ptr == 0 or str[ptr-1] != ' ') + out += " "; // add pre-space for an English word + while (isalpha(str[ptr])) { + out += str[ptr]; + ptr += 1; + } + out += " "; // add post-space for an English word + } else { + out += str[ptr]; + ptr += 1; + } + } + return out; +} + +std::string ReverseFrac(const std::string& str, + const std::string& left_tag, + const std::string& right_tag) { + std::string out = ""; + int ptr = 0; // the pointer of the input string + int end = str.size(); + int left, right, frac; // the start index of the left tag, right tag and '/'. + left = right = frac = 0; + int len_left_tag = left_tag.size(); + int len_right_tag = right_tag.size(); + + while (ptr != end) { + // find the position of left tag, right tag and '/'. (xxxnum1/num2) + left = str.find(left_tag, ptr); + if (left == -1) + break; + out += str.substr(ptr, left - ptr); // content before left tag (xxx) + frac = str.find("/", left); + right = str.find(right_tag, frac); + + out += str.substr(frac + 1, right - frac - 1) + '/' + + str.substr(left + len_left_tag, frac - left - len_left_tag); // num2/num1 + ptr = right + len_right_tag; + } + if (ptr != end) { + out += str.substr(ptr, end - right - len_right_tag); + } + return out; +} + +} // namespace ppspeech \ No newline at end of file diff --git a/runtime/engine/common/utils/text_process.h b/runtime/engine/common/utils/text_process.h new file mode 100644 index 000000000..e084794a5 --- /dev/null +++ b/runtime/engine/common/utils/text_process.h @@ -0,0 +1,15 @@ +#include +#include +#include + +namespace ppspeech { + +std::string RemoveBlk(const std::string& str); + +std::string AddBlk(const std::string& str); + +std::string ReverseFrac(const std::string& str, + const std::string& left_tag = "", + const std::string& right_tag = ""); + +} // namespace ppspeech \ No newline at end of file diff --git a/runtime/engine/common/utils/text_process_test.cc b/runtime/engine/common/utils/text_process_test.cc new file mode 100644 index 000000000..3e36ca0d1 --- /dev/null +++ b/runtime/engine/common/utils/text_process_test.cc @@ -0,0 +1,45 @@ +#include "utils/text_process.h" + +#include +#include + +TEST(TextProcess, RemoveBlkTest) { + std::string test_str = "我 今天 去 了 超市 花了 120 元。"; + std::string out_str = ppspeech::RemoveBlk(test_str); + int ret = out_str.compare("我今天去了超市花了120元。"); + EXPECT_EQ(ret, 0); + + test_str = "how are you today"; + out_str = ppspeech::RemoveBlk(test_str); + ret = out_str.compare("how are you today"); + EXPECT_EQ(ret, 0); + + test_str = "我 的 paper 在 哪里?"; + out_str = ppspeech::RemoveBlk(test_str); + ret = out_str.compare("我的paper在哪里?"); + EXPECT_EQ(ret, 0); +} + +TEST(TextProcess, AddBlkTest) { + std::string test_str = "how are you"; + std::string out_str = ppspeech::AddBlk(test_str); + int ret = out_str.compare(" how are you "); + EXPECT_EQ(ret, 0); + + test_str = "欢迎来到China。"; + out_str = ppspeech::AddBlk(test_str); + ret = out_str.compare("欢迎来到 China 。"); + EXPECT_EQ(ret, 0); +} + +TEST(TextProcess, ReverseFracTest) { + std::string test_str = "3/1"; + std::string out_str = ppspeech::ReverseFrac(test_str); + int ret = out_str.compare("1/3"); + EXPECT_EQ(ret, 0); + + test_str = "3/1 100/10000"; + out_str = ppspeech::ReverseFrac(test_str); + ret = out_str.compare("1/3 10000/100"); + EXPECT_EQ(ret, 0); +} \ No newline at end of file