add text blank preprocess, test=asr (#3025)

pull/3071/head
jlqian98 2 years ago committed by GitHub
parent f0ef6f1caf
commit b9bdeca6c5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,26 @@
#include "utils/blank_process.h"
namespace ppspeech {
std::string BlankProcess(const std::string& str) {
std::string out = "";
int p = 0;
int end = str.size();
int q = -1; // last char of the output string
while (p != end) {
while (p != end && str[p] == ' ') {
p += 1;
}
if (p == end)
return out;
if (q != -1 && isalpha(str[p]) && isalpha(str[q]) && str[p-1] == ' ')
// add a space when the last and current chars are in English and there have space(s) between them
out += ' ';
out += str[p];
q = p;
p += 1;
}
return out;
}
} // namespace ppspeech

@ -0,0 +1,9 @@
#include <string>
#include <vector>
#include <cctype>
namespace ppspeech {
std::string BlankProcess(const std::string& str);
} // namespace ppspeech

@ -0,0 +1,26 @@
#include "utils/blank_process.h"
#include <gtest/gtest.h>
#include <gmock/gmock.h>
TEST(BlankProcess, BlankProcessTest) {
std::string test_str = "我 今天 去 了 超市 花了 120 元。";
std::string out_str = ppspeech::BlankProcess(test_str);
int ret = out_str.compare("我今天去了超市花了120元。");
EXPECT_EQ(ret, 0);
test_str = "how are you today";
out_str = ppspeech::BlankProcess(test_str);
ret = out_str.compare("how are you today");
EXPECT_EQ(ret, 0);
test_str = "我 的 paper 在 哪里?";
out_str = ppspeech::BlankProcess(test_str);
ret = out_str.compare("我的paper在哪里");
EXPECT_EQ(ret, 0);
test_str = "我 今天 去 了 超市 花了 120 元。";
out_str = ppspeech::BlankProcess(test_str);
ret = out_str.compare("我今天去了超市花了120元。");
EXPECT_EQ(ret, 0);
}
Loading…
Cancel
Save