From 4b26bf620cc32c908964d7ec68b7ec6bec491206 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Mon, 14 Aug 2017 20:42:09 +0800 Subject: [PATCH 1/3] Rename self.local_data to self._local_data in class DataGenerator. --- data_utils/data.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/data_utils/data.py b/data_utils/data.py index 98180b4b..33fcadc7 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -85,9 +85,9 @@ class DataGenerator(object): self._rng = random.Random(random_seed) self._epoch = 0 # for caching tar files info - self.local_data = local() - self.local_data.tar2info = {} - self.local_data.tar2object = {} + self._local_data = local() + self._local_data.tar2info = {} + self._local_data.tar2object = {} def process_utterance(self, filename, transcript): """Load, augment, featurize and normalize for speech data. @@ -240,16 +240,16 @@ class DataGenerator(object): """ if file.startswith('tar:'): tarpath, filename = file.split(':', 1)[1].split('#', 1) - if 'tar2info' not in self.local_data.__dict__: - self.local_data.tar2info = {} - if 'tar2object' not in self.local_data.__dict__: - self.local_data.tar2object = {} - if tarpath not in self.local_data.tar2info: + if 'tar2info' not in self._local_data.__dict__: + self._local_data.tar2info = {} + if 'tar2object' not in self._local_data.__dict__: + self._local_data.tar2object = {} + if tarpath not in self._local_data.tar2info: object, infoes = self._parse_tar(tarpath) - self.local_data.tar2info[tarpath] = infoes - self.local_data.tar2object[tarpath] = object - return self.local_data.tar2object[tarpath].extractfile( - self.local_data.tar2info[tarpath][filename]) + self._local_data.tar2info[tarpath] = infoes + self._local_data.tar2object[tarpath] = object + return self._local_data.tar2object[tarpath].extractfile( + self._local_data.tar2info[tarpath][filename]) else: return open(file, 'r') From be37b03f0c4c181f3921697bfaf5a17a50f11b51 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Thu, 17 Aug 2017 11:10:49 +0800 Subject: [PATCH 2/3] Fix a typo caused exception for audio_featurizer.py. --- data_utils/featurizer/audio_featurizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_utils/featurizer/audio_featurizer.py b/data_utils/featurizer/audio_featurizer.py index f0d223cf..39f45301 100644 --- a/data_utils/featurizer/audio_featurizer.py +++ b/data_utils/featurizer/audio_featurizer.py @@ -57,7 +57,7 @@ class AudioFeaturizer(object): def featurize(self, audio_segment, allow_downsampling=True, - allow_upsamplling=True): + allow_upsampling=True): """Extract audio features from AudioSegment or SpeechSegment. :param audio_segment: Audio/speech segment to extract features from. From 1d163ad15f7bd37799c7015024cbebb110680b95 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Thu, 31 Aug 2017 12:22:27 +0800 Subject: [PATCH 3/3] Fixed a serious mistake of bidirectional simple rnn for DS2. --- cloud/pcloud_submit.sh | 4 ++-- layer.py | 16 ++++++++++------ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/cloud/pcloud_submit.sh b/cloud/pcloud_submit.sh index a7fb42cb..3c9a1c26 100644 --- a/cloud/pcloud_submit.sh +++ b/cloud/pcloud_submit.sh @@ -1,6 +1,6 @@ TRAIN_MANIFEST="cloud/cloud.manifest.train" DEV_MANIFEST="cloud/cloud.manifest.dev" -CLOUD_MODEL_DIR="/pfs/dlnel/home/USERNAME/deepspeech2/model" +CLOUD_MODEL_DIR="./checkpoints" BATCH_SIZE=256 NUM_GPU=8 NUM_NODE=1 @@ -11,7 +11,7 @@ DS2_PATH=${PWD%/*} cp -f pcloud_train.sh ${DS2_PATH} paddlecloud submit \ --image bootstrapper:5000/wanghaoshuang/pcloud_ds2:latest \ +-image bootstrapper:5000/paddlepaddle/pcloud_ds2:latest \ -jobname ${JOB_NAME} \ -cpu ${NUM_GPU} \ -gpu ${NUM_GPU} \ diff --git a/layer.py b/layer.py index 3b492645..ef25c0a1 100644 --- a/layer.py +++ b/layer.py @@ -55,16 +55,20 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, act): :rtype: LayerOutput """ # input-hidden weights shared across bi-direcitonal rnn. - input_proj = paddle.layer.fc( + input_proj_forward = paddle.layer.fc( input=input, size=size, act=paddle.activation.Linear(), bias_attr=False) - # batch norm is only performed on input-state projection - input_proj_bn = paddle.layer.batch_norm( - input=input_proj, act=paddle.activation.Linear()) + input_proj_backward = paddle.layer.fc( + input=input, size=size, act=paddle.activation.Linear(), bias_attr=False) + # batch norm is only performed on input-state projection + input_proj_bn_forward = paddle.layer.batch_norm( + input=input_proj_forward, act=paddle.activation.Linear()) + input_proj_bn_backward = paddle.layer.batch_norm( + input=input_proj_backward, act=paddle.activation.Linear()) # forward and backward in time forward_simple_rnn = paddle.layer.recurrent( - input=input_proj_bn, act=act, reverse=False) + input=input_proj_bn_forward, act=act, reverse=False) backward_simple_rnn = paddle.layer.recurrent( - input=input_proj_bn, act=act, reverse=True) + input=input_proj_bn_backward, act=act, reverse=True) return paddle.layer.concat(input=[forward_simple_rnn, backward_simple_rnn])