add barrier (#2309)
parent
7cc1d66863
commit
f795d6f034
@ -0,0 +1,10 @@
|
||||
set -ex
|
||||
NNODES=${PADDLE_TRAINERS_NUM:-"1"}
|
||||
PYTHON=${PYTHON:-"python"}
|
||||
TIMEOUT=${1:-"10m"}
|
||||
|
||||
if [[ "$NNODES" -gt 1 ]]; then
|
||||
while ! timeout "$TIMEOUT" "$PYTHON" -m paddle.distributed.launch run_check; do
|
||||
echo "Retry barrier ......"
|
||||
done
|
||||
fi
|
Loading…
Reference in new issue