[App] Fix multi-node pytorch example CI (#15753)

This commit is contained in:
Ethan Harris 2022-11-21 16:02:30 +00:00 committed by GitHub
parent 1ffbe1bf1e
commit bc797fd376
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 5 additions and 5 deletions

View File

@ -246,7 +246,7 @@ subprojects:
- ".github/workflows/ci-app-examples.yml" - ".github/workflows/ci-app-examples.yml"
- "src/lightning_app/**" - "src/lightning_app/**"
- "tests/tests_app_examples/**" - "tests/tests_app_examples/**"
- "examples/app_*" - "examples/app_*/**"
- "requirements/app/**" - "requirements/app/**"
- "setup.py" - "setup.py"
- ".actions/**" - ".actions/**"

View File

@ -11,7 +11,7 @@ on:
- ".github/workflows/ci-app-examples.yml" - ".github/workflows/ci-app-examples.yml"
- "src/lightning_app/**" - "src/lightning_app/**"
- "tests/tests_app_examples/**" - "tests/tests_app_examples/**"
- "examples/app_*" - "examples/app_*/**"
- "requirements/app/**" - "requirements/app/**"
- "setup.py" - "setup.py"
- ".actions/**" - ".actions/**"

View File

@ -22,7 +22,7 @@ def distributed_train(local_rank: int, main_address: str, main_port: int, num_no
# 2. PREPARE DISTRIBUTED MODEL # 2. PREPARE DISTRIBUTED MODEL
model = torch.nn.Linear(32, 2) model = torch.nn.Linear(32, 2)
device = torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else torch.device("cpu") device = torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else torch.device("cpu")
model = DistributedDataParallel(model, device_ids=[local_rank]).to(device) model = DistributedDataParallel(model, device_ids=[local_rank] if torch.cuda.is_available() else None).to(device)
# 3. SETUP LOSS AND OPTIMIZER # 3. SETUP LOSS AND OPTIMIZER
criterion = torch.nn.MSELoss() criterion = torch.nn.MSELoss()

View File

@ -23,7 +23,7 @@ def distributed_train(local_rank: int, main_address: str, main_port: int, num_no
# 2. PREPARE DISTRIBUTED MODEL # 2. PREPARE DISTRIBUTED MODEL
model = torch.nn.Linear(32, 2) model = torch.nn.Linear(32, 2)
device = torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else torch.device("cpu") device = torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else torch.device("cpu")
model = DistributedDataParallel(model, device_ids=[local_rank]).to(device) model = DistributedDataParallel(model, device_ids=[local_rank] if torch.cuda.is_available() else None).to(device)
# 3. SETUP LOSS AND OPTIMIZER # 3. SETUP LOSS AND OPTIMIZER
criterion = torch.nn.MSELoss() criterion = torch.nn.MSELoss()
@ -55,7 +55,7 @@ class PyTorchDistributed(L.LightningWork):
) )
# 32 GPUs: (8 nodes x 4 v 100) # 8 GPUs: (2 nodes x 4 v 100)
compute = L.CloudCompute("gpu-fast-multi") # 4xV100 compute = L.CloudCompute("gpu-fast-multi") # 4xV100
component = MultiNode(PyTorchDistributed, num_nodes=2, cloud_compute=compute) component = MultiNode(PyTorchDistributed, num_nodes=2, cloud_compute=compute)
app = L.LightningApp(component) app = L.LightningApp(component)