|
|
@ -135,13 +135,13 @@ class TextFeaturizer():
|
|
|
|
"""Character detokenizer.
|
|
|
|
"""Character detokenizer.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
Args:
|
|
|
|
tokens (List[str]): tokens.
|
|
|
|
tokens (str): tokens.
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Returns:
|
|
|
|
str: text string.
|
|
|
|
str: text string.
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
tokens = [t.replace(SPACE, " ") for t in tokens ]
|
|
|
|
tokens = tokens.replace(SPACE, " ")
|
|
|
|
return "".join(tokens)
|
|
|
|
return tokens
|
|
|
|
|
|
|
|
|
|
|
|
def word_tokenize(self, text):
|
|
|
|
def word_tokenize(self, text):
|
|
|
|
"""Word tokenizer, separate by <space>."""
|
|
|
|
"""Word tokenizer, separate by <space>."""
|
|
|
|