add barrier (#2309)
parent
7cc1d66863
commit
f795d6f034
@ -0,0 +1,10 @@
|
|||||||
|
set -ex
|
||||||
|
NNODES=${PADDLE_TRAINERS_NUM:-"1"}
|
||||||
|
PYTHON=${PYTHON:-"python"}
|
||||||
|
TIMEOUT=${1:-"10m"}
|
||||||
|
|
||||||
|
if [[ "$NNODES" -gt 1 ]]; then
|
||||||
|
while ! timeout "$TIMEOUT" "$PYTHON" -m paddle.distributed.launch run_check; do
|
||||||
|
echo "Retry barrier ......"
|
||||||
|
done
|
||||||
|
fi
|
Loading…
Reference in new issue