modify language model scoring

pull/2/head
Yibing Liu 7 years ago
parent ac370eca85
commit 21ff590e6d

@ -52,13 +52,19 @@ class Scorer(object):
""" """
def __init__(self, alpha, beta, model_path): def __init__(self, alpha, beta, model_path):
self._alpha = alpha self._alpha = alpha
self._beta = beta self._beta = beta
self._language_model = kenlm.LanguageModel(model_path) self._language_model = kenlm.LanguageModel(model_path)
def language_model_score(self, sentence, bos=True, eos=False): def language_model_score(self, sentence, bos=True, eos=False):
words = sentence.strip().split(' ')
length = len(words)
if length == 1:
log_prob = self._language_model.score(sentence, bos, eos) log_prob = self._language_model.score(sentence, bos, eos)
else:
prefix_sent = ' '.join(words[0:length - 1])
log_prob = self._language_model.score(sentence, bos, eos) \
- self._language_model.score(prefix_sent, bos, eos)
return np.power(10, log_prob) return np.power(10, log_prob)
def word_count(self, sentence): def word_count(self, sentence):

Loading…
Cancel
Save