|
|
|
@ -4,7 +4,7 @@
|
|
|
|
|
#include <cmath>
|
|
|
|
|
#include <limits>
|
|
|
|
|
|
|
|
|
|
size_t get_utf8_str_len(const std::string& str) {
|
|
|
|
|
size_t get_utf8_str_len(const std::string &str) {
|
|
|
|
|
size_t str_len = 0;
|
|
|
|
|
for (char c : str) {
|
|
|
|
|
str_len += ((c & 0xc0) != 0x80);
|
|
|
|
@ -12,7 +12,7 @@ size_t get_utf8_str_len(const std::string& str) {
|
|
|
|
|
return str_len;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<std::string> split_utf8_str(const std::string& str) {
|
|
|
|
|
std::vector<std::string> split_utf8_str(const std::string &str) {
|
|
|
|
|
std::vector<std::string> result;
|
|
|
|
|
std::string out_str;
|
|
|
|
|
|
|
|
|
@ -31,8 +31,8 @@ std::vector<std::string> split_utf8_str(const std::string& str) {
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<std::string> split_str(const std::string& s,
|
|
|
|
|
const std::string& delim) {
|
|
|
|
|
std::vector<std::string> split_str(const std::string &s,
|
|
|
|
|
const std::string &delim) {
|
|
|
|
|
std::vector<std::string> result;
|
|
|
|
|
std::size_t start = 0, delim_len = delim.size();
|
|
|
|
|
while (true) {
|
|
|
|
@ -51,7 +51,7 @@ std::vector<std::string> split_str(const std::string& s,
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool prefix_compare(const PathTrie* x, const PathTrie* y) {
|
|
|
|
|
bool prefix_compare(const PathTrie *x, const PathTrie *y) {
|
|
|
|
|
if (x->score == y->score) {
|
|
|
|
|
if (x->character == y->character) {
|
|
|
|
|
return false;
|
|
|
|
@ -63,8 +63,8 @@ bool prefix_compare(const PathTrie* x, const PathTrie* y) {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void add_word_to_fst(const std::vector<int>& word,
|
|
|
|
|
fst::StdVectorFst* dictionary) {
|
|
|
|
|
void add_word_to_fst(const std::vector<int> &word,
|
|
|
|
|
fst::StdVectorFst *dictionary) {
|
|
|
|
|
if (dictionary->NumStates() == 0) {
|
|
|
|
|
fst::StdVectorFst::StateId start = dictionary->AddState();
|
|
|
|
|
assert(start == 0);
|
|
|
|
@ -81,16 +81,16 @@ void add_word_to_fst(const std::vector<int>& word,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool add_word_to_dictionary(
|
|
|
|
|
const std::string& word,
|
|
|
|
|
const std::unordered_map<std::string, int>& char_map,
|
|
|
|
|
const std::string &word,
|
|
|
|
|
const std::unordered_map<std::string, int> &char_map,
|
|
|
|
|
bool add_space,
|
|
|
|
|
int SPACE_ID,
|
|
|
|
|
fst::StdVectorFst* dictionary) {
|
|
|
|
|
fst::StdVectorFst *dictionary) {
|
|
|
|
|
auto characters = split_utf8_str(word);
|
|
|
|
|
|
|
|
|
|
std::vector<int> int_word;
|
|
|
|
|
|
|
|
|
|
for (auto& c : characters) {
|
|
|
|
|
for (auto &c : characters) {
|
|
|
|
|
if (c == " ") {
|
|
|
|
|
int_word.push_back(SPACE_ID);
|
|
|
|
|
} else {
|
|
|
|
@ -108,5 +108,5 @@ bool add_word_to_dictionary(
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
add_word_to_fst(int_word, dictionary);
|
|
|
|
|
return true;
|
|
|
|
|
return true; // return with successful adding
|
|
|
|
|
}
|
|
|
|
|