changed examples scripts
This commit is contained in:
parent
30b25c8146
commit
cf7dbf6d7c
|
@ -53,6 +53,13 @@ source activate YourEnv
|
||||||
export NCCL_DEBUG=INFO
|
export NCCL_DEBUG=INFO
|
||||||
export PYTHONFAULTHANDLER=1
|
export PYTHONFAULTHANDLER=1
|
||||||
|
|
||||||
|
# on your cluster you might need these:
|
||||||
|
# set the network interface
|
||||||
|
export NCCL_SOCKET_IFNAME=^docker0,lo
|
||||||
|
|
||||||
|
# might need the latest cuda
|
||||||
|
module load NCCL/2.4.7-1-cuda.10.0
|
||||||
|
|
||||||
# random port between 12k and 20k
|
# random port between 12k and 20k
|
||||||
export MASTER_PORT=$((12000 + RANDOM % 20000))$
|
export MASTER_PORT=$((12000 + RANDOM % 20000))$
|
||||||
|
|
||||||
|
|
|
@ -10,5 +10,21 @@
|
||||||
# activate conda env
|
# activate conda env
|
||||||
conda activate my_env
|
conda activate my_env
|
||||||
|
|
||||||
|
# -------------------------
|
||||||
|
# debugging flags (optional)
|
||||||
|
# export NCCL_DEBUG=INFO
|
||||||
|
# export PYTHONFAULTHANDLER=1
|
||||||
|
|
||||||
|
# on your cluster you might need these:
|
||||||
|
# set the network interface
|
||||||
|
# export NCCL_SOCKET_IFNAME=^docker0,lo
|
||||||
|
|
||||||
|
# might need the latest cuda
|
||||||
|
# module load NCCL/2.4.7-1-cuda.10.0
|
||||||
|
# -------------------------
|
||||||
|
|
||||||
|
# random port between 12k and 20k
|
||||||
|
export MASTER_PORT=$((12000 + RANDOM % 20000))$
|
||||||
|
|
||||||
# run script from above
|
# run script from above
|
||||||
python minimal_multi_node_demo.py
|
python minimal_multi_node_demo.py
|
Loading…
Reference in New Issue