2020-05-05 02:16:54 +00:00
|
|
|
.. testsetup:: *
|
|
|
|
|
|
|
|
from pytorch_lightning.core.lightning import LightningModule
|
|
|
|
|
2020-08-13 22:56:51 +00:00
|
|
|
.. _multiple_loaders:
|
|
|
|
|
2020-04-08 15:38:12 +00:00
|
|
|
Multiple Datasets
|
|
|
|
=================
|
|
|
|
Lightning supports multiple dataloaders in a few ways.
|
|
|
|
|
2020-10-07 18:25:52 +00:00
|
|
|
1. Create a dataloader that iterates multiple datasets under the hood.
|
2020-04-08 15:38:12 +00:00
|
|
|
2. In the validation and test loop you also have the option to return multiple dataloaders
|
|
|
|
which lightning will call sequentially.
|
|
|
|
|
2020-06-19 06:38:10 +00:00
|
|
|
----------
|
|
|
|
|
2020-04-08 15:38:12 +00:00
|
|
|
Multiple training dataloaders
|
|
|
|
-----------------------------
|
2020-10-07 18:25:52 +00:00
|
|
|
For training, the best way to use multiple dataloaders is to create a ``DataLoader`` class
|
|
|
|
which wraps your multiple dataloaders (this of course also works for testing and validation
|
2020-04-08 15:38:12 +00:00
|
|
|
dataloaders).
|
|
|
|
|
|
|
|
(`reference <https://discuss.pytorch.org/t/train-simultaneously-on-two-datasets/649/2>`_)
|
|
|
|
|
2020-05-05 02:16:54 +00:00
|
|
|
.. testcode::
|
2020-04-08 15:38:12 +00:00
|
|
|
|
|
|
|
class ConcatDataset(torch.utils.data.Dataset):
|
|
|
|
def __init__(self, *datasets):
|
|
|
|
self.datasets = datasets
|
|
|
|
|
|
|
|
def __getitem__(self, i):
|
|
|
|
return tuple(d[i] for d in self.datasets)
|
|
|
|
|
|
|
|
def __len__(self):
|
|
|
|
return min(len(d) for d in self.datasets)
|
|
|
|
|
|
|
|
class LitModel(LightningModule):
|
2020-05-05 02:16:54 +00:00
|
|
|
|
2020-04-08 15:38:12 +00:00
|
|
|
def train_dataloader(self):
|
|
|
|
concat_dataset = ConcatDataset(
|
|
|
|
datasets.ImageFolder(traindir_A),
|
|
|
|
datasets.ImageFolder(traindir_B)
|
|
|
|
)
|
|
|
|
|
|
|
|
loader = torch.utils.data.DataLoader(
|
|
|
|
concat_dataset,
|
|
|
|
batch_size=args.batch_size,
|
|
|
|
shuffle=True,
|
|
|
|
num_workers=args.workers,
|
|
|
|
pin_memory=True
|
|
|
|
)
|
|
|
|
return loader
|
|
|
|
|
|
|
|
def val_dataloader(self):
|
|
|
|
# SAME
|
2020-05-05 02:16:54 +00:00
|
|
|
...
|
2020-04-08 15:38:12 +00:00
|
|
|
|
|
|
|
def test_dataloader(self):
|
|
|
|
# SAME
|
2020-05-05 02:16:54 +00:00
|
|
|
...
|
2020-04-08 15:38:12 +00:00
|
|
|
|
2020-06-19 06:38:10 +00:00
|
|
|
----------
|
|
|
|
|
2020-04-08 15:38:12 +00:00
|
|
|
Test/Val dataloaders
|
|
|
|
--------------------
|
2020-10-07 18:25:52 +00:00
|
|
|
For validation and test dataloaders, lightning also gives you the additional
|
|
|
|
option of passing multiple dataloaders back from each call.
|
2020-04-08 15:38:12 +00:00
|
|
|
|
|
|
|
See the following for more details:
|
|
|
|
|
2020-09-14 01:04:21 +00:00
|
|
|
- :meth:`~pytorch_lightning.core.datamodule.LightningDataModule.val_dataloader`
|
|
|
|
- :meth:`~pytorch_lightning.core.datamodule.LightningDataModule.test_dataloader`
|
2020-04-08 15:38:12 +00:00
|
|
|
|
2020-05-05 02:16:54 +00:00
|
|
|
.. testcode::
|
2020-04-08 15:38:12 +00:00
|
|
|
|
|
|
|
def val_dataloader(self):
|
|
|
|
loader_1 = Dataloader()
|
|
|
|
loader_2 = Dataloader()
|
|
|
|
return [loader_1, loader_2]
|