From 0f11e93e7f0815f51401a5eeace9fcdc3e267430 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Sun, 7 Feb 2021 07:24:36 +0000
Subject: [PATCH] fix data

---
 data_utils/data.py    | 37 ++++++++++++++-----------------------
 data_utils/dataset.py | 17 +++++++++++++++++
 2 files changed, 31 insertions(+), 23 deletions(-)
 create mode 100644 data_utils/dataset.py

diff --git a/data_utils/data.py b/data_utils/data.py
index 43a14718e..1ff4a9138 100644
--- a/data_utils/data.py
+++ b/data_utils/data.py
@@ -27,8 +27,10 @@ from data_utils.featurizer.speech_featurizer import SpeechFeaturizer
 from data_utils.speech import SpeechSegment
 from data_utils.normalizer import FeatureNormalizer
 
+__all__ = ['DataGenerator']
 
-class DataGenerator(object):
+
+class DataGenerator():
     """
     DataGenerator provides basic audio data preprocessing pipeline, and offers
     data reader interfaces of PaddlePaddle requirements.
@@ -310,43 +312,32 @@ class DataGenerator(object):
                 raise ValueError("If padding_to is not -1, it should be larger "
                                  "than any instance's shape in the batch")
             max_length = padding_to
+        max_text_length = max([len(text) for audio, text in batch])
         # padding
         padded_audios = []
-        texts, text_lens = [], []
         audio_lens = []
-        masks = []
+        texts, text_lens = [], []
         for audio, text in batch:
             padded_audio = np.zeros([audio.shape[0], max_length])
             padded_audio[:, :audio.shape[1]] = audio
             if flatten:
                 padded_audio = padded_audio.flatten()
             padded_audios.append(padded_audio)
+            audio_lens.append(audio.shape[1])
             if self._is_training:
-                texts += text
+                padded_text = np.zeros([max_text_length])
+                padded_text[:len(text)] = text
+                texts.append(padded_text)
             else:
                 texts.append(text)
             text_lens.append(len(text))
-            audio_lens.append(audio.shape[1])
-            mask_shape0 = (audio.shape[0] - 1) // 2 + 1
-            mask_shape1 = (audio.shape[1] - 1) // 3 + 1
-            mask_max_len = (max_length - 1) // 3 + 1
-            mask_ones = np.ones((mask_shape0, mask_shape1))
-            mask_zeros = np.zeros((mask_shape0, mask_max_len - mask_shape1))
-            mask = np.repeat(
-                np.reshape(
-                    np.concatenate((mask_ones, mask_zeros), axis=1),
-                    (1, mask_shape0, mask_max_len)),
-                32,
-                axis=0)
-            masks.append(mask)
+
         padded_audios = np.array(padded_audios).astype('float32')
+        audio_lens = np.array(audio_lens).astype('int64')
         if self._is_training:
-            texts = np.expand_dims(np.array(texts).astype('int32'), axis=-1)
-            texts = fluid.create_lod_tensor(
-                texts, recursive_seq_lens=[text_lens], place=self._place)
-        audio_lens = np.array(audio_lens).astype('int64').reshape([-1, 1])
-        masks = np.array(masks).astype('float32')
-        return padded_audios, texts, audio_lens, masks
+            texts = np.array(texts).astype('int32')
+            text_lens = np.array(text_lens).astype('int64')
+        return padded_audios, texts, audio_lens, text_lens
 
     def _batch_shuffle(self, manifest, batch_size, clipped=False):
         """Put similarly-sized instances into minibatches for better efficiency
diff --git a/data_utils/dataset.py b/data_utils/dataset.py
new file mode 100644
index 000000000..06ebc78fb
--- /dev/null
+++ b/data_utils/dataset.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+from paddle.io import Dataset
+from paddle.io import DataLoader