add AddBlk, ReverseFrac function

pull/3047/head
jlqian 3 years ago
parent 6417547bee
commit 242834d3ee

@ -1,26 +0,0 @@
#include "utils/blank_process.h"
namespace ppspeech {
std::string RemoveBlk(const std::string& str) {
std::string out = "";
int ptr_in = 0; // the pointer of input string (for traversal)
int end = str.size();
int ptr_out = -1; // the pointer of output string (last char)
while (ptr_in != end) {
while (ptr_in != end && str[ptr_in] == ' ') {
ptr_in += 1;
}
if (ptr_in == end)
return out;
if (ptr_out != -1 && isalpha(str[ptr_in]) && isalpha(str[ptr_out]) && str[ptr_in-1] == ' ')
// add a space when the last and current chars are in English and there have space(s) between them
out += ' ';
out += str[ptr_in];
ptr_out = ptr_in;
ptr_in += 1;
}
return out;
}
} // namespace ppspeech

@ -1,9 +0,0 @@
#include <string>
#include <vector>
#include <cctype>
namespace ppspeech {
std::string BlankProcess(const std::string& str);
} // namespace ppspeech

@ -1,26 +0,0 @@
#include "utils/blank_process.h"
#include <gtest/gtest.h>
#include <gmock/gmock.h>
TEST(BlankProcess, BlankProcessTest) {
std::string test_str = "我 今天 去 了 超市 花了 120 元。";
std::string out_str = ppspeech::BlankProcess(test_str);
int ret = out_str.compare("我今天去了超市花了120元。");
EXPECT_EQ(ret, 0);
test_str = "how are you today";
out_str = ppspeech::BlankProcess(test_str);
ret = out_str.compare("how are you today");
EXPECT_EQ(ret, 0);
test_str = "我 的 paper 在 哪里?";
out_str = ppspeech::BlankProcess(test_str);
ret = out_str.compare("我的paper在哪里");
EXPECT_EQ(ret, 0);
test_str = "我 今天 去 了 超市 花了 120 元。";
out_str = ppspeech::BlankProcess(test_str);
ret = out_str.compare("我今天去了超市花了120元。");
EXPECT_EQ(ret, 0);
}

@ -0,0 +1,77 @@
#include "utils/text_process.h"
namespace ppspeech {
std::string RemoveBlk(const std::string& str) {
std::string out = "";
int ptr_in = 0; // the pointer of input string (for traversal)
int end = str.size();
int ptr_out = -1; // the pointer of output string (last char)
while (ptr_in != end) {
while (ptr_in != end && str[ptr_in] == ' ') {
ptr_in += 1;
}
if (ptr_in == end)
return out;
if (ptr_out != -1 && isalpha(str[ptr_in]) && isalpha(str[ptr_out]) && str[ptr_in-1] == ' ')
// add a space when the last and current chars are in English and there have space(s) between them
out += ' ';
out += str[ptr_in];
ptr_out = ptr_in;
ptr_in += 1;
}
return out;
}
std::string AddBlk(const std::string& str) {
std::string out = "";
int ptr = 0; // the pointer of the input string
int end = str.size();
while (ptr != end) {
if (isalpha(str[ptr])) {
if (ptr == 0 or str[ptr-1] != ' ')
out += " "; // add pre-space for an English word
while (isalpha(str[ptr])) {
out += str[ptr];
ptr += 1;
}
out += " "; // add post-space for an English word
} else {
out += str[ptr];
ptr += 1;
}
}
return out;
}
std::string ReverseFrac(const std::string& str,
const std::string& left_tag,
const std::string& right_tag) {
std::string out = "";
int ptr = 0; // the pointer of the input string
int end = str.size();
int left, right, frac; // the start index of the left tag, right tag and '/'.
left = right = frac = 0;
int len_left_tag = left_tag.size();
int len_right_tag = right_tag.size();
while (ptr != end) {
// find the position of left tag, right tag and '/'. (xxx<tag>num1/num2</tag>)
left = str.find(left_tag, ptr);
if (left == -1)
break;
out += str.substr(ptr, left - ptr); // content before left tag (xxx)
frac = str.find("/", left);
right = str.find(right_tag, frac);
out += str.substr(frac + 1, right - frac - 1) + '/' +
str.substr(left + len_left_tag, frac - left - len_left_tag); // num2/num1
ptr = right + len_right_tag;
}
if (ptr != end) {
out += str.substr(ptr, end - right - len_right_tag);
}
return out;
}
} // namespace ppspeech

@ -0,0 +1,15 @@
#include <string>
#include <vector>
#include <cctype>
namespace ppspeech {
std::string RemoveBlk(const std::string& str);
std::string AddBlk(const std::string& str);
std::string ReverseFrac(const std::string& str,
const std::string& left_tag = "<tag>",
const std::string& right_tag = "</tag>");
} // namespace ppspeech

@ -0,0 +1,45 @@
#include "utils/text_process.h"
#include <gtest/gtest.h>
#include <gmock/gmock.h>
TEST(TextProcess, RemoveBlkTest) {
std::string test_str = "我 今天 去 了 超市 花了 120 元。";
std::string out_str = ppspeech::RemoveBlk(test_str);
int ret = out_str.compare("我今天去了超市花了120元。");
EXPECT_EQ(ret, 0);
test_str = "how are you today";
out_str = ppspeech::RemoveBlk(test_str);
ret = out_str.compare("how are you today");
EXPECT_EQ(ret, 0);
test_str = "我 的 paper 在 哪里?";
out_str = ppspeech::RemoveBlk(test_str);
ret = out_str.compare("我的paper在哪里");
EXPECT_EQ(ret, 0);
}
TEST(TextProcess, AddBlkTest) {
std::string test_str = "how are you";
std::string out_str = ppspeech::AddBlk(test_str);
int ret = out_str.compare(" how are you ");
EXPECT_EQ(ret, 0);
test_str = "欢迎来到China。";
out_str = ppspeech::AddBlk(test_str);
ret = out_str.compare("欢迎来到 China 。");
EXPECT_EQ(ret, 0);
}
TEST(TextProcess, ReverseFracTest) {
std::string test_str = "<tag>3/1</tag>";
std::string out_str = ppspeech::ReverseFrac(test_str);
int ret = out_str.compare("1/3");
EXPECT_EQ(ret, 0);
test_str = "<tag>3/1</tag> <tag>100/10000</tag>";
out_str = ppspeech::ReverseFrac(test_str);
ret = out_str.compare("1/3 10000/100");
EXPECT_EQ(ret, 0);
}
Loading…
Cancel
Save