refine the code

pull/747/head
Junkun 3 years ago
parent 9d05a749e2
commit 515497ae1f

@ -563,7 +563,7 @@ class KaldiPrePorocessedCollator(SpeechCollator):
@property
def feature_size(self):
return self._feat_dim
@property
def stride_ms(self):
return self._stride_ms

@ -35,6 +35,7 @@ def bleu(hypothesis, reference):
return sacrebleu.corpus_bleu(hypothesis, reference)
def char_bleu(hypothesis, reference):
"""Calculate BLEU. BLEU compares reference text and
hypothesis text in char-level using scarebleu.
@ -47,7 +48,8 @@ def char_bleu(hypothesis, reference):
:type hypothesis: list[str]
:raises ValueError: If the reference number is zero.
"""
hypothesis =[' '.join(list(hyp.replace(' ', ''))) for hyp in hypothesis]
reference = [[' '.join(list(ref_i.replace(' ', ''))) for ref_i in ref ]for ref in reference ]
hypothesis = [' '.join(list(hyp.replace(' ', ''))) for hyp in hypothesis]
reference = [[' '.join(list(ref_i.replace(' ', ''))) for ref_i in ref]
for ref in reference]
return sacrebleu.corpus_bleu(hypothesis, reference)
return sacrebleu.corpus_bleu(hypothesis, reference)

@ -44,9 +44,11 @@ def create_manifest(data_dir, manifest_path_prefix):
print("Creating manifest %s ..." % manifest_path_prefix)
json_lines = []
data_types_infos = [('train', 'train-split/train-segment', 'En-Zh/train.en-zh'),
('dev', 'test-segment/tst2010', 'En-Zh/tst2010.en-zh'),
('test', 'test-segment/tst2015', 'En-Zh/tst2015.en-zh')]
data_types_infos = [
('train', 'train-split/train-segment', 'En-Zh/train.en-zh'),
('dev', 'test-segment/tst2010', 'En-Zh/tst2010.en-zh'),
('test', 'test-segment/tst2015', 'En-Zh/tst2015.en-zh')
]
for data_info in data_types_infos:
dtype, audio_relative_dir, text_relative_path = data_info
del json_lines[:]
@ -63,7 +65,7 @@ def create_manifest(data_dir, manifest_path_prefix):
continue
audio_id, trancription, translation = line.split('\t')
utt = audio_id.split('.')[0]
audio_path = os.path.join(audio_dir, audio_id)
if os.path.exists(audio_path):
if os.path.getsize(audio_path) < 30000:

Loading…
Cancel
Save