2020-10-13 11:18:07 +00:00
|
|
|
# Copyright The PyTorch Lightning team.
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
2020-10-01 05:21:38 +00:00
|
|
|
import io
|
|
|
|
from typing import Any
|
2020-12-08 22:02:10 +00:00
|
|
|
|
|
|
|
import torch
|
2020-10-01 05:21:38 +00:00
|
|
|
from torch import distributed as torch_distrib
|
|
|
|
|
2020-12-14 14:49:05 +00:00
|
|
|
from pytorch_lightning.utilities import _GROUP_AVAILABLE
|
2020-12-08 22:02:10 +00:00
|
|
|
|
|
|
|
WORLD = None
|
2020-12-14 14:49:05 +00:00
|
|
|
if _GROUP_AVAILABLE:
|
2020-12-08 22:02:10 +00:00
|
|
|
from torch.distributed import group
|
|
|
|
WORLD = group.WORLD
|
|
|
|
|
2020-10-01 05:21:38 +00:00
|
|
|
|
|
|
|
class LightningDistributed:
|
|
|
|
|
|
|
|
def __init__(self, rank=None, device=None):
|
|
|
|
self.rank = rank
|
|
|
|
self.device = device
|
|
|
|
|
2020-12-08 22:02:10 +00:00
|
|
|
def broadcast(self, obj: Any, group=WORLD):
|
2020-10-01 05:21:38 +00:00
|
|
|
if self.rank == 0:
|
2020-12-08 22:02:10 +00:00
|
|
|
self._emit(obj, group)
|
2020-10-01 05:21:38 +00:00
|
|
|
else:
|
2020-12-08 22:02:10 +00:00
|
|
|
obj = self._receive(group)
|
2020-10-01 05:21:38 +00:00
|
|
|
return obj
|
|
|
|
|
2020-12-08 22:02:10 +00:00
|
|
|
def _broadcast(self, tensor, src=0, group=WORLD):
|
|
|
|
if group is None:
|
|
|
|
return torch_distrib.broadcast(tensor, src=src)
|
|
|
|
return torch_distrib.broadcast(tensor, src=0, group=group)
|
|
|
|
|
|
|
|
def _emit(self, obj: Any, group=WORLD):
|
2020-10-01 05:21:38 +00:00
|
|
|
buffer = io.BytesIO()
|
|
|
|
torch.save(obj, buffer)
|
|
|
|
data = bytearray(buffer.getbuffer())
|
|
|
|
length_tensor = torch.tensor([len(data)]).long().to(self.device)
|
2020-12-08 22:02:10 +00:00
|
|
|
length_tensor = self._broadcast(length_tensor, src=0, group=group)
|
2020-10-01 05:21:38 +00:00
|
|
|
data_tensor = torch.ByteTensor(data).to(self.device)
|
2020-12-08 22:02:10 +00:00
|
|
|
data_tensor = self._broadcast(data_tensor, src=0, group=group)
|
2020-10-01 05:21:38 +00:00
|
|
|
|
2020-12-08 22:02:10 +00:00
|
|
|
def _receive(self, group=WORLD):
|
2020-10-01 05:21:38 +00:00
|
|
|
length_tensor = torch.tensor([0]).long().to(self.device)
|
2020-12-08 22:02:10 +00:00
|
|
|
self._broadcast(length_tensor, src=0, group=group)
|
2020-10-01 05:21:38 +00:00
|
|
|
data_tensor = torch.empty([length_tensor.item()], dtype=torch.uint8).to(self.device)
|
2020-12-08 22:02:10 +00:00
|
|
|
self._broadcast(data_tensor, src=0, group=group)
|
2020-10-01 05:21:38 +00:00
|
|
|
buffer = io.BytesIO(data_tensor.cpu().numpy())
|
|
|
|
obj = torch.load(buffer)
|
|
|
|
return obj
|