Merge pull request #747 from LittleChenCc/develop

refine the code and correct yaml
pull/748/head
Hui Zhang 4 years ago committed by GitHub
commit 5e8e46ed79
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -35,6 +35,7 @@ def bleu(hypothesis, reference):
return sacrebleu.corpus_bleu(hypothesis, reference) return sacrebleu.corpus_bleu(hypothesis, reference)
def char_bleu(hypothesis, reference): def char_bleu(hypothesis, reference):
"""Calculate BLEU. BLEU compares reference text and """Calculate BLEU. BLEU compares reference text and
hypothesis text in char-level using scarebleu. hypothesis text in char-level using scarebleu.
@ -47,7 +48,8 @@ def char_bleu(hypothesis, reference):
:type hypothesis: list[str] :type hypothesis: list[str]
:raises ValueError: If the reference number is zero. :raises ValueError: If the reference number is zero.
""" """
hypothesis =[' '.join(list(hyp.replace(' ', ''))) for hyp in hypothesis] hypothesis = [' '.join(list(hyp.replace(' ', ''))) for hyp in hypothesis]
reference = [[' '.join(list(ref_i.replace(' ', ''))) for ref_i in ref ]for ref in reference ] reference = [[' '.join(list(ref_i.replace(' ', ''))) for ref_i in ref]
for ref in reference]
return sacrebleu.corpus_bleu(hypothesis, reference) return sacrebleu.corpus_bleu(hypothesis, reference)

@ -44,9 +44,11 @@ def create_manifest(data_dir, manifest_path_prefix):
print("Creating manifest %s ..." % manifest_path_prefix) print("Creating manifest %s ..." % manifest_path_prefix)
json_lines = [] json_lines = []
data_types_infos = [('train', 'train-split/train-segment', 'En-Zh/train.en-zh'), data_types_infos = [
('dev', 'test-segment/tst2010', 'En-Zh/tst2010.en-zh'), ('train', 'train-split/train-segment', 'En-Zh/train.en-zh'),
('test', 'test-segment/tst2015', 'En-Zh/tst2015.en-zh')] ('dev', 'test-segment/tst2010', 'En-Zh/tst2010.en-zh'),
('test', 'test-segment/tst2015', 'En-Zh/tst2015.en-zh')
]
for data_info in data_types_infos: for data_info in data_types_infos:
dtype, audio_relative_dir, text_relative_path = data_info dtype, audio_relative_dir, text_relative_path = data_info
del json_lines[:] del json_lines[:]

@ -3,8 +3,8 @@ data:
train_manifest: data/manifest.train train_manifest: data/manifest.train
dev_manifest: data/manifest.dev dev_manifest: data/manifest.dev
test_manifest: data/manifest.test test_manifest: data/manifest.test
min_input_len: 0.5 # second min_input_len: 0.05 # second
max_input_len: 3000.0 # second max_input_len: 30.0 # second
min_output_len: 0.0 # tokens min_output_len: 0.0 # tokens
max_output_len: 400.0 # tokens max_output_len: 400.0 # tokens
min_output_input_ratio: 0.01 min_output_input_ratio: 0.01

Loading…
Cancel
Save