parent
bbe47a4318
commit
9fa9a352ac
@ -0,0 +1,45 @@
|
|||||||
|
#DeepSpeech2 on paddle cloud
|
||||||
|
|
||||||
|
## Run DS2 by public data
|
||||||
|
|
||||||
|
**Step1: ** Make sure current dir is `models/deep_speech_2/cloud/`
|
||||||
|
|
||||||
|
**Step2:** Submit job by cmd: `sh pcloud_submit.sh`
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sh pcloud_submit.sh
|
||||||
|
$ uploading: deepspeech.tar.gz...
|
||||||
|
$ uploading: pcloud_prepare_data.py...
|
||||||
|
$ uploading: pcloud_split_data.py...
|
||||||
|
$ uploading: pcloud_submit.sh...
|
||||||
|
$ uploading: pcloud_train.sh...
|
||||||
|
$ deepspeech20170727130129 submited.
|
||||||
|
```
|
||||||
|
The we can get job name 'deepspeech20170727130129' at last line
|
||||||
|
|
||||||
|
**Step3:** Get logs from paddle cloud by cmd: `paddlecloud logs -n 10000 deepspeech20170727130129`.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ paddlecloud logs -n 10000 deepspeech20170727130129
|
||||||
|
$ ==========================deepspeech20170727130129-trainer-6vk3m==========================
|
||||||
|
label selector: paddle-job-pserver=deepspeech20170727130129, desired: 1
|
||||||
|
running pod list: [('Running', '10.1.3.6')]
|
||||||
|
label selector: paddle-job=deepspeech20170727130129, desired: 1
|
||||||
|
running pod list: [('Running', '10.1.83.14')]
|
||||||
|
Starting training job: /pfs/dlnel/home/yanxu05@baidu.com/jobs/deepspeech20170727130129, num_gradient_servers: 1, trainer_id: 0, version: v2
|
||||||
|
I0727 05:01:42.969719 25 Util.cpp:166] commandline: --num_gradient_servers=1 --ports_num_for_sparse=1 --use_gpu=1 --trainer_id=0 --pservers=10.1.3.6 --trainer_count=4 --num_passes=1 --ports_num=1 --port=7164
|
||||||
|
[INFO 2017-07-27 05:01:50,279 layers.py:2430] output for __conv_0__: c = 32, h = 81, w = 54, size = 139968
|
||||||
|
[WARNING 2017-07-27 05:01:50,280 layers.py:2789] brelu is not recommend for batch normalization's activation, maybe the relu is better
|
||||||
|
[INFO 2017-07-27 05:01:50,283 layers.py:2430] output for __conv_1__: c = 32, h = 41, w = 54, size = 70848
|
||||||
|
[WARNING 2017-07-27 05:01:50,283 layers.py:2789] brelu is not recommend for batch normalization's activation, maybe the relu is better
|
||||||
|
[WARNING 2017-07-27 05:01:50,287 layers.py:2789] is not recommend for batch normalization's activation, maybe the relu is better
|
||||||
|
[WARNING 2017-07-27 05:01:50,291 layers.py:2789] is not recommend for batch normalization's activation, maybe the relu is better
|
||||||
|
[WARNING 2017-07-27 05:01:50,295 layers.py:2789] is not recommend for batch normalization's activation, maybe the relu is better
|
||||||
|
I0727 05:01:50.316176 25 MultiGradientMachine.cpp:99] numLogicalDevices=1 numThreads=4 numDevices=4
|
||||||
|
I0727 05:01:50.454787 25 GradientMachine.cpp:85] Initing parameters..
|
||||||
|
I0727 05:01:50.690007 25 GradientMachine.cpp:92] Init parameters done.
|
||||||
|
```
|
||||||
|
[More optins and cmd aoubt paddle cloud](https://github.com/PaddlePaddle/cloud/blob/develop/doc/usage_cn.md)
|
||||||
|
|
||||||
|
## Run DS2 by customize data
|
||||||
|
TODO
|
@ -0,0 +1,50 @@
|
|||||||
|
"""
|
||||||
|
This tool is used for splitting data into each node of
|
||||||
|
paddle cloud by total trainer count and current trainer id.
|
||||||
|
The meaning of trainer is a instance of k8s cluster.
|
||||||
|
This script should be called in paddle cloud.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description=__doc__)
|
||||||
|
parser.add_argument(
|
||||||
|
"--in_manifest_path",
|
||||||
|
default='./cloud/data/dev.mani',
|
||||||
|
type=str,
|
||||||
|
help="Input manifest path. (default: %(default)s)")
|
||||||
|
parser.add_argument(
|
||||||
|
"--data_tar_path",
|
||||||
|
default='./cloud/data/dev.tar',
|
||||||
|
type=str,
|
||||||
|
help="Data tar file path. (default: %(default)s)")
|
||||||
|
parser.add_argument(
|
||||||
|
"--out_manifest_path",
|
||||||
|
default='./cloud/data/dev.mani.split',
|
||||||
|
type=str,
|
||||||
|
help="Out manifest file path. (default: %(default)s)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def split_data(in_manifest, tar_path, out_manifest):
|
||||||
|
with open("/trainer_id", "r") as f:
|
||||||
|
trainer_id = int(f.readline()[:-1])
|
||||||
|
with open("/trainer_count", "r") as f:
|
||||||
|
trainer_count = int(f.readline()[:-1])
|
||||||
|
|
||||||
|
tar_path = os.path.abspath(tar_path)
|
||||||
|
result = []
|
||||||
|
for index, json_line in enumerate(open(in_manifest)):
|
||||||
|
if (index % trainer_count) == trainer_id:
|
||||||
|
json_data = json.loads(json_line)
|
||||||
|
json_data['audio_filepath'] = "tar:%s#%s" % (
|
||||||
|
tar_path, json_data['audio_filepath'])
|
||||||
|
result.append("%s\n" % json.dumps(json_data))
|
||||||
|
with open(out_manifest, 'w') as manifest:
|
||||||
|
manifest.writelines(result)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
split_data(args.in_manifest_path, args.data_tar_path,
|
||||||
|
args.out_manifest_path)
|
@ -0,0 +1,17 @@
|
|||||||
|
DS2_PATH=../
|
||||||
|
tar -czf deepspeech.tar.gz ${DS2_PATH}
|
||||||
|
JOB_NAME=deepspeech`date +%Y%m%d%H%M%S`
|
||||||
|
cp pcloud_train.sh ${DS2_PATH}
|
||||||
|
paddlecloud submit \
|
||||||
|
-image wanghaoshuang/pcloud_ds2:latest-gpu-cudnn \
|
||||||
|
-jobname ${JOB_NAME} \
|
||||||
|
-cpu 4 \
|
||||||
|
-gpu 4 \
|
||||||
|
-memory 10Gi \
|
||||||
|
-parallelism 1 \
|
||||||
|
-pscpu 1 \
|
||||||
|
-pservers 1 \
|
||||||
|
-psmemory 10Gi \
|
||||||
|
-passes 1 \
|
||||||
|
-entry "sh pcloud_train.sh" \
|
||||||
|
.
|
@ -0,0 +1,37 @@
|
|||||||
|
DATA_PATH=/pfs/dlnel/public/dataset/speech/libri
|
||||||
|
#setted by user
|
||||||
|
TRAIN_MANI=${DATA_PATH}/manifest_pcloud.train
|
||||||
|
#setted by user
|
||||||
|
DEV_MANI=${DATA_PATH}/manifest_pcloud.dev
|
||||||
|
#setted by user
|
||||||
|
TRAIN_TAR=${DATA_PATH}/data.train.tar
|
||||||
|
#setted by user
|
||||||
|
DEV_TAR=${DATA_PATH}/data.dev.tar
|
||||||
|
#setted by user
|
||||||
|
VOCAB_PATH=${DATA_PATH}/eng_vocab.txt
|
||||||
|
#setted by user
|
||||||
|
MEAN_STD_FILE=${DATA_PATH}/mean_std.npz
|
||||||
|
|
||||||
|
tar -xzf deepspeech.tar.gz
|
||||||
|
rm -rf ./cloud/data/*
|
||||||
|
|
||||||
|
# split train data for each pcloud node
|
||||||
|
python ./cloud/pcloud_split_data.py \
|
||||||
|
--in_manifest_path=$TRAIN_MANI \
|
||||||
|
--data_tar_path=$TRAIN_TAR \
|
||||||
|
--out_manifest_path='./cloud/data/train.mani'
|
||||||
|
|
||||||
|
# split dev data for each pcloud node
|
||||||
|
python pcloud_split_data.py \
|
||||||
|
--in_manifest_path=$DEV_MANI \
|
||||||
|
--data_tar_path=$DEV_TAR \
|
||||||
|
--out_manifest_path='./cloud/data/dev.mani'
|
||||||
|
|
||||||
|
python train.py \
|
||||||
|
--use_gpu=1 \
|
||||||
|
--trainer_count=4 \
|
||||||
|
--batch_size=256 \
|
||||||
|
--mean_std_filepath=$MEAN_STD_FILE \
|
||||||
|
--train_manifest_path='./cloud/data/train.mani' \
|
||||||
|
--dev_manifest_path='./cloud/data/dev.mani' \
|
||||||
|
--vocab_filepath=$VOCAB_PATH \
|
@ -1,47 +0,0 @@
|
|||||||
import os
|
|
||||||
import json
|
|
||||||
import argparse
|
|
||||||
|
|
||||||
|
|
||||||
def split_data(inManifest, tar_path, outManifest):
|
|
||||||
trainer_id = 1
|
|
||||||
trainer_count = 2
|
|
||||||
#with open("/trainer_id", "r") as f:
|
|
||||||
# trainer_id = int(f.readline()[:-1])
|
|
||||||
#with open("/trainer_count", "r") as f:
|
|
||||||
# trainer_count = int(f.readline()[:-1])
|
|
||||||
|
|
||||||
tarPath = os.path.abspath(tar_path)
|
|
||||||
result = []
|
|
||||||
for index, json_line in enumerate(open(inManifest)):
|
|
||||||
if (index % trainer_count) == trainer_id:
|
|
||||||
json_data = json.loads(json_line)
|
|
||||||
json_data['audio_filepath'] = "tar:%s#%s" % (
|
|
||||||
tarPath, json_data['audio_filepath'])
|
|
||||||
result.append("%s\n" % json.dumps(json_data))
|
|
||||||
with open(outManifest, 'w') as manifest:
|
|
||||||
manifest.writelines(result)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
parser = argparse.ArgumentParser(description=__doc__)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--in_manifest_path",
|
|
||||||
default='datasets/dev.mani',
|
|
||||||
type=str,
|
|
||||||
help="Input manifest path. (default: %(default)s)")
|
|
||||||
parser.add_argument(
|
|
||||||
"--data_tar_path",
|
|
||||||
default='datasets/dev.tar',
|
|
||||||
type=str,
|
|
||||||
help="Data tar file path. (default: %(default)s)")
|
|
||||||
parser.add_argument(
|
|
||||||
"--out_manifest_path",
|
|
||||||
default='datasets/dev.mani.split',
|
|
||||||
type=str,
|
|
||||||
help="Out manifest file path. (default: %(default)s)")
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
split_data(args.in_manifest_path, args.data_tar_path,
|
|
||||||
args.out_manifest_path)
|
|
@ -1,13 +0,0 @@
|
|||||||
paddlecloud submit \
|
|
||||||
-image wanghaoshuang/pcloud_ds2 \
|
|
||||||
-jobname ds23 \
|
|
||||||
-cpu 1 \
|
|
||||||
-gpu 0 \
|
|
||||||
-memory 10Gi \
|
|
||||||
-parallelism 1 \
|
|
||||||
-pscpu 1 \
|
|
||||||
-pservers 1 \
|
|
||||||
-psmemory 10Gi \
|
|
||||||
-passes 1 \
|
|
||||||
-entry "sh pcloud_train.sh" \
|
|
||||||
./deep_speech_2
|
|
@ -1,32 +1,37 @@
|
|||||||
|
DATA_PATH=/pfs/dlnel/public/dataset/speech/libri
|
||||||
#setted by user
|
#setted by user
|
||||||
TRAIN_MANI='/pfs/dlnel/home/yanxu05@baidu.com/wanghaoshuang/data/ds2_data/demo.mani'
|
TRAIN_MANI=${DATA_PATH}/manifest_pcloud.train
|
||||||
#setted by user
|
#setted by user
|
||||||
DEV_MANI='/pfs/dlnel/home/yanxu05@baidu.com/wanghaoshuang/data/ds2_data/demo.mani'
|
DEV_MANI=${DATA_PATH}/manifest_pcloud.dev
|
||||||
#setted by user
|
#setted by user
|
||||||
TRAIN_TAR='/pfs/dlnel/home/yanxu05@baidu.com/wanghaoshuang/data/ds2_data/demo.tar'
|
TRAIN_TAR=${DATA_PATH}/data.train.tar
|
||||||
#setted by user
|
#setted by user
|
||||||
DEV_TAR='/pfs/dlnel/home/yanxu05@baidu.com/wanghaoshuang/data/ds2_data/demo.tar'
|
DEV_TAR=${DATA_PATH}/data.dev.tar
|
||||||
#setted by user
|
#setted by user
|
||||||
VOCAB_PATH='/pfs/dlnel/home/yanxu05@baidu.com/wanghaoshuang/data/ds2_data/eng_vocab.txt'
|
VOCAB_PATH=${DATA_PATH}/eng_vocab.txt
|
||||||
#setted by user
|
#setted by user
|
||||||
MEAN_STD_FILE='/pfs/dlnel/home/yanxu05@baidu.com/wanghaoshuang/data/ds2_data/mean_std.npz'
|
MEAN_STD_FILE=${DATA_PATH}/mean_std.npz
|
||||||
|
|
||||||
|
tar -xzvf deepspeech.tar.gz
|
||||||
|
rm -rf ./cloud/data/*
|
||||||
|
|
||||||
# split train data for each pcloud node
|
# split train data for each pcloud node
|
||||||
python pcloud_split_data.py \
|
python ./cloud/pcloud_split_data.py \
|
||||||
--in_manifest_path=$TRAIN_MANI \
|
--in_manifest_path=$TRAIN_MANI \
|
||||||
--data_tar_path=$TRAIN_TAR \
|
--data_tar_path=$TRAIN_TAR \
|
||||||
--out_manifest_path='./train.mani'
|
--out_manifest_path='./cloud/data/train.mani'
|
||||||
|
|
||||||
# split dev data for each pcloud node
|
# split dev data for each pcloud node
|
||||||
python pcloud_split_data.py \
|
python pcloud_split_data.py \
|
||||||
--in_manifest_path=$DEV_MANI \
|
--in_manifest_path=$DEV_MANI \
|
||||||
--data_tar_path=$DEV_TAR \
|
--data_tar_path=$DEV_TAR \
|
||||||
--out_manifest_path='./dev.mani'
|
--out_manifest_path='./cloud/data/dev.mani'
|
||||||
|
|
||||||
python train.py \
|
python train.py \
|
||||||
--use_gpu=0 \
|
--use_gpu=1 \
|
||||||
--trainer_count=4 \
|
--trainer_count=4 \
|
||||||
--batch_size=2 \
|
--batch_size=256 \
|
||||||
--mean_std_filepath=$MEAN_STD_FILE \
|
--mean_std_filepath=$MEAN_STD_FILE \
|
||||||
--train_manifest_path='./train.mani' \
|
--train_manifest_path='./cloud/data/train.mani' \
|
||||||
--dev_manifest_path='./dev.mani' \
|
--dev_manifest_path='./cloud/data/dev.mani' \
|
||||||
--vocab_filepath=$VOCAB_PATH \
|
--vocab_filepath=$VOCAB_PATH \
|
||||||
|
Loading…
Reference in new issue