fix SplitWord

pull/3030/head
TianYuan 3 years ago
parent 9c208f7ff2
commit 620d10993d

@ -449,7 +449,7 @@ int FrontEngineInterface::Word2WordVec(const std::string &word,
// yuantian01解释把一个词再进行分词找到。例子小雨伞 --> 小 雨伞 或者 小雨 伞
int FrontEngineInterface::SplitWord(const std::string &word,
std::vector<std::string> &new_word_vec) {
std::vector<std::string> *new_word_vec) {
std::vector<std::string> word_vec;
std::string second_subword;
_jieba->CutForSearch(word, word_vec);
@ -461,12 +461,12 @@ int FrontEngineInterface::SplitWord(const std::string &word,
int first_begin_idx = word.find_first_of(first_subword);
if (first_begin_idx == 0) {
second_subword = word.substr(first_subword.length());
new_word_vec.push_back(first_subword);
new_word_vec.push_back(second_subword);
new_word_vec->push_back(first_subword);
new_word_vec->push_back(second_subword);
} else {
second_subword = word.substr(0, word.length() - first_subword.length());
new_word_vec.push_back(second_subword);
new_word_vec.push_back(first_subword);
new_word_vec->push_back(second_subword);
new_word_vec->push_back(first_subword);
}
return 0;
@ -940,7 +940,7 @@ int FrontEngineInterface::NeuralSandhi(const std::string &word,
// 进行进一步分词,把长词切分更短些
std::vector<std::string> word_list;
if (0 != SplitWord(word, word_list)) {
if (0 != SplitWord(word, &word_list)) {
LOG(ERROR) << "Failed to split word.";
return -1;
}
@ -997,7 +997,7 @@ int FrontEngineInterface::ThreeSandhi(const std::string &word,
} else if (word_num == 3) {
// 进行进一步分词,把长词切分更短些
std::vector<std::string> word_list;
if (0 != SplitWord(word, word_list)) {
if (0 != SplitWord(word, &word_list)) {
LOG(ERROR) << "Failed to split word.";
return -1;
}

@ -102,7 +102,7 @@ class FrontEngineInterface : public TextNormalizer {
// 将整个词重新进行 full cut分词后各个词会在词典中
int SplitWord(const std::string &word,
std::vector<std::string> &fullcut_word);
std::vector<std::string> *fullcut_word);
// 对分词结果进行处理:对包含“不”字的分词结果进行整理
std::vector<std::pair<std::string, std::string>> MergeBu(

Loading…
Cancel
Save