# Configure input data set in local filesystem TRAIN_MANIFEST="/home/work/demo/ds2/pcloud/models/deep_speech_2/datasets/manifest.dev" TEST_MANIFEST="/home/work/demo/ds2/pcloud/models/deep_speech_2/datasets/manifest.dev" VOCAB_FILE="/home/work/demo/ds2/pcloud/models/deep_speech_2/datasets/vocab/eng_vocab.txt" MEAN_STD_FILE="/home/work/demo/ds2/pcloud/models/deep_speech_2/mean_std.npz" # Configure output path in PaddleCloud filesystem CLOUD_DATA_DIR="/pfs/dlnel/home/demo/deepspeech2/data" CLOUD_MODEL_DIR="/pfs/dlnel/home/demo/deepspeech2/model" # Pack and upload local data to PaddleCloud filesystem python upload_data.py \ --train_manifest_path=${TRAIN_MANIFEST} \ --test_manifest_path=${TEST_MANIFEST} \ --vocab_file=${VOCAB_FILE} \ --mean_std_file=${MEAN_STD_FILE} \ --cloud_data_path=${CLOUD_DATA_DIR} if [ $? -ne 0 ] then echo "upload data failed!" exit 1 fi JOB_NAME=deepspeech`date +%Y%m%d%H%M%S` DS2_PATH=${PWD%/*} cp -f pcloud_train.sh ${DS2_PATH} # Configure computation resource and submit job to PaddleCloud paddlecloud submit \ -image bootstrapper:5000/wanghaoshuang/pcloud_ds2:latest \ -jobname ${JOB_NAME} \ -cpu 4 \ -gpu 4 \ -memory 10Gi \ -parallelism 2 \ -pscpu 1 \ -pservers 1 \ -psmemory 10Gi \ -passes 1 \ -entry "sh pcloud_train.sh ${CLOUD_DATA_DIR} ${CLOUD_MODEL_DIR}" \ ${DS2_PATH} rm ${DS2_PATH}/pcloud_train.sh