Add detailed errors about symbols out of vocab

pull/188/head
Rai220 8 years ago
parent dad1c2727e
commit f52ad5e5a8

@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
"""Contains the text featurizer class.""" """Contains the text featurizer class."""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
@ -32,8 +33,12 @@ class TextFeaturizer(object):
:return: List of char-level token indices. :return: List of char-level token indices.
:rtype: list :rtype: list
""" """
tokens = self._char_tokenize(text) result = []
return [self._vocab_dict[token] for token in tokens] try:
result = [self._vocab_dict[token] for token in text]
except KeyError, e:
print('Incorrect symbol "%s" found in string: ' % str(e).encode('utf-8'), text.encode('utf-8'))
return result
@property @property
def vocab_size(self): def vocab_size(self):

Loading…
Cancel
Save