From f38d948193a1fb6ef967e2036e5c7cbceabaec16 Mon Sep 17 00:00:00 2001
From: yangyaming <mxscmxsc@gmail.com>
Date: Fri, 10 Nov 2017 14:43:05 +0800
Subject: [PATCH] Add more comments.

---
 data_utils/data.py     | 5 +++++
 model_utils/network.py | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/data_utils/data.py b/data_utils/data.py
index 1469beb0..d913e48a 100644
--- a/data_utils/data.py
+++ b/data_utils/data.py
@@ -320,6 +320,9 @@ class DataGenerator(object):
             if flatten:
                 padded_audio = padded_audio.flatten()
 
+            # Stride size for conv0 is (3, 2)
+            # Stride size for conv1 to convN is (1, 2)
+            # Same as the network, hard-coded here
             padded_instance = [padded_audio, text]
             padded_conv0_h = (padded_audio.shape[0] - 1) // 2 + 1
             padded_conv0_w = (padded_audio.shape[1] - 1) // 3 + 1
@@ -327,6 +330,8 @@ class DataGenerator(object):
             padded_instance += [
                 [0],  # sequence offset, always 0
                 [valid_w],  # valid sequence length
+                # Index ranges for channel, height and width
+                # Please refer scale_sub_region layer to see details
                 [1, 32, 1, padded_conv0_h, valid_w + 1, padded_conv0_w]
             ]
             pre_padded_h = padded_conv0_h
diff --git a/model_utils/network.py b/model_utils/network.py
index 2053e906..7b4b8ab2 100644
--- a/model_utils/network.py
+++ b/model_utils/network.py
@@ -270,7 +270,7 @@ def deep_speech_v2_network(audio_data,
         block_x=1,
         block_y=conv_group_height)
     # remove padding part
-    remove_padding = paddle.layer.sub_seq(
+    remove_padding_data = paddle.layer.sub_seq(
         input=conv2seq,
         offsets=seq_offset_data,
         sizes=seq_len_data,
@@ -278,7 +278,7 @@ def deep_speech_v2_network(audio_data,
         bias_attr=False)
     # rnn group
     rnn_group_output = rnn_group(
-        input=remove_padding,
+        input=remove_padding_data,
         size=rnn_size,
         num_stacks=num_rnn_layers,
         use_gru=use_gru,