From 515497ae1f3616efc1a8c020897d795fce5ecea1 Mon Sep 17 00:00:00 2001
From: Junkun <junkun.chen.cn@gmail.com>
Date: Wed, 4 Aug 2021 21:33:23 -0700
Subject: [PATCH] refine the code

---
 deepspeech/io/collator_st.py            |  2 +-
 deepspeech/utils/bleu_score.py          |  8 +++++---
 examples/dataset/ted_en_zh/ted_en_zh.py | 10 ++++++----
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/deepspeech/io/collator_st.py b/deepspeech/io/collator_st.py
index 34933312..1be6445d 100644
--- a/deepspeech/io/collator_st.py
+++ b/deepspeech/io/collator_st.py
@@ -563,7 +563,7 @@ class KaldiPrePorocessedCollator(SpeechCollator):
     @property
     def feature_size(self):
         return self._feat_dim
-    
+
     @property
     def stride_ms(self):
         return self._stride_ms
diff --git a/deepspeech/utils/bleu_score.py b/deepspeech/utils/bleu_score.py
index 580fbf61..f1bf5261 100644
--- a/deepspeech/utils/bleu_score.py
+++ b/deepspeech/utils/bleu_score.py
@@ -35,6 +35,7 @@ def bleu(hypothesis, reference):
 
     return sacrebleu.corpus_bleu(hypothesis, reference)
 
+
 def char_bleu(hypothesis, reference):
     """Calculate BLEU. BLEU compares reference text and
     hypothesis text in char-level using scarebleu.
@@ -47,7 +48,8 @@ def char_bleu(hypothesis, reference):
     :type hypothesis: list[str]
     :raises ValueError: If the reference number is zero.
     """
-    hypothesis =[' '.join(list(hyp.replace(' ', ''))) for hyp in hypothesis]
-    reference = [[' '.join(list(ref_i.replace(' ', ''))) for ref_i in ref ]for ref in reference ]
+    hypothesis = [' '.join(list(hyp.replace(' ', ''))) for hyp in hypothesis]
+    reference = [[' '.join(list(ref_i.replace(' ', ''))) for ref_i in ref]
+                 for ref in reference]
 
-    return sacrebleu.corpus_bleu(hypothesis, reference)
\ No newline at end of file
+    return sacrebleu.corpus_bleu(hypothesis, reference)
diff --git a/examples/dataset/ted_en_zh/ted_en_zh.py b/examples/dataset/ted_en_zh/ted_en_zh.py
index 08f15119..14bef01d 100644
--- a/examples/dataset/ted_en_zh/ted_en_zh.py
+++ b/examples/dataset/ted_en_zh/ted_en_zh.py
@@ -44,9 +44,11 @@ def create_manifest(data_dir, manifest_path_prefix):
     print("Creating manifest %s ..." % manifest_path_prefix)
     json_lines = []
 
-    data_types_infos = [('train', 'train-split/train-segment', 'En-Zh/train.en-zh'), 
-                ('dev', 'test-segment/tst2010', 'En-Zh/tst2010.en-zh'), 
-                ('test', 'test-segment/tst2015', 'En-Zh/tst2015.en-zh')]
+    data_types_infos = [
+        ('train', 'train-split/train-segment', 'En-Zh/train.en-zh'),
+        ('dev', 'test-segment/tst2010', 'En-Zh/tst2010.en-zh'),
+        ('test', 'test-segment/tst2015', 'En-Zh/tst2015.en-zh')
+    ]
     for data_info in data_types_infos:
         dtype, audio_relative_dir, text_relative_path = data_info
         del json_lines[:]
@@ -63,7 +65,7 @@ def create_manifest(data_dir, manifest_path_prefix):
                 continue
             audio_id, trancription, translation = line.split('\t')
             utt = audio_id.split('.')[0]
-            
+
             audio_path = os.path.join(audio_dir, audio_id)
             if os.path.exists(audio_path):
                 if os.path.getsize(audio_path) < 30000: