2022-11-08 12:55:31 +00:00
|
|
|
import torch
|
|
|
|
|
|
|
|
import lightning as L
|
|
|
|
from lightning.app.components import LiteMultiNode
|
|
|
|
from lightning.lite import LightningLite
|
|
|
|
|
|
|
|
|
|
|
|
class LitePyTorchDistributed(L.LightningWork):
|
2022-11-11 10:06:40 +00:00
|
|
|
def run(self):
|
|
|
|
# 1. Prepare the model
|
|
|
|
model = torch.nn.Sequential(
|
|
|
|
torch.nn.Linear(1, 1),
|
|
|
|
torch.nn.ReLU(),
|
|
|
|
torch.nn.Linear(1, 1),
|
|
|
|
)
|
2022-11-08 12:55:31 +00:00
|
|
|
|
2022-11-11 10:06:40 +00:00
|
|
|
# 2. Create LightningLite.
|
|
|
|
lite = LightningLite(strategy="ddp", precision=16)
|
|
|
|
model, optimizer = lite.setup(model, torch.optim.SGD(model.parameters(), lr=0.01))
|
2022-11-08 12:55:31 +00:00
|
|
|
criterion = torch.nn.MSELoss()
|
|
|
|
|
2022-11-11 10:06:40 +00:00
|
|
|
# 3. Train the model for 1000 steps.
|
|
|
|
for step in range(1000):
|
2022-11-08 12:55:31 +00:00
|
|
|
model.zero_grad()
|
2022-11-11 10:06:40 +00:00
|
|
|
x = torch.tensor([0.8]).to(lite.device)
|
|
|
|
target = torch.tensor([1.0]).to(lite.device)
|
2022-11-08 12:55:31 +00:00
|
|
|
output = model(x)
|
2022-11-11 10:06:40 +00:00
|
|
|
loss = criterion(output, target)
|
2022-11-08 12:55:31 +00:00
|
|
|
print(f"global_rank: {lite.global_rank} step: {step} loss: {loss}")
|
|
|
|
lite.backward(loss)
|
|
|
|
optimizer.step()
|
|
|
|
|
|
|
|
|
|
|
|
# Run over 2 nodes of 4 x V100
|
|
|
|
app = L.LightningApp(
|
|
|
|
LiteMultiNode(
|
|
|
|
LitePyTorchDistributed,
|
|
|
|
cloud_compute=L.CloudCompute("gpu-fast-multi"), # 4 x V100
|
|
|
|
num_nodes=2,
|
|
|
|
)
|
|
|
|
)
|