lightning/tests/base/datamodules.py

import os
from torch.utils.data import random_split, DataLoader

from pytorch_lightning.core.datamodule import LightningDataModule
from tests.base.datasets import TrialMNIST, MNIST
from torch.utils.data.distributed import DistributedSampler


class TrialMNISTDataModule(LightningDataModule):

    def __init__(self, data_dir: str = './'):
        super().__init__()
        self.data_dir = data_dir
        self.non_picklable = None

    def prepare_data(self):
        TrialMNIST(self.data_dir, train=True, download=True)
        TrialMNIST(self.data_dir, train=False, download=True)

    def setup(self, stage: str = None):

        if stage == 'fit' or stage is None:
            mnist_full = TrialMNIST(root=self.data_dir, train=True, num_samples=64, download=True)
            self.mnist_train, self.mnist_val = random_split(mnist_full, [128, 64])
            self.dims = self.mnist_train[0][0].shape

        if stage == 'test' or stage is None:
            self.mnist_test = TrialMNIST(root=self.data_dir, train=False, num_samples=64, download=True)
            self.dims = getattr(self, 'dims', self.mnist_test[0][0].shape)

        self.non_picklable = lambda x: x**2

    def train_dataloader(self):
        return DataLoader(self.mnist_train, batch_size=32)

    def val_dataloader(self):
        return DataLoader(self.mnist_val, batch_size=32)

    def test_dataloader(self):
        return DataLoader(self.mnist_test, batch_size=32)


class MNISTDataModule(LightningDataModule):
    def __init__(
        self, data_dir: str = './', batch_size: int = 32, dist_sampler: bool = False
    ) -> None:
        super().__init__()

        self.dist_sampler = dist_sampler
        self.data_dir = data_dir
        self.batch_size = batch_size

        # self.dims is returned when you call dm.size()
        # Setting default dims here because we know them.
        # Could optionally be assigned dynamically in dm.setup()
        self.dims = (1, 28, 28)

    def prepare_data(self):
        # download only
        MNIST(self.data_dir, train=True, download=True, normalize=(0.1307, 0.3081))
        MNIST(self.data_dir, train=False, download=True, normalize=(0.1307, 0.3081))

    def setup(self, stage: str = None):

        # Assign train/val datasets for use in dataloaders
        # TODO: need to split using random_split once updated to torch >= 1.6
        if stage == 'fit' or stage is None:
            self.mnist_train = MNIST(self.data_dir, train=True, normalize=(0.1307, 0.3081))

        # Assign test dataset for use in dataloader(s)
        if stage == 'test' or stage is None:
            self.mnist_test = MNIST(self.data_dir, train=False, normalize=(0.1307, 0.3081))

    def train_dataloader(self):
        dist_sampler = None
        if self.dist_sampler:
            dist_sampler = DistributedSampler(self.mnist_train, shuffle=False)

        return DataLoader(
            self.mnist_train, batch_size=self.batch_size, sampler=dist_sampler, shuffle=False
        )

    def test_dataloader(self):
        return DataLoader(self.mnist_test, batch_size=self.batch_size, shuffle=False)
updated sync bn (#2838) * updated sync bn * updated sync bn * updated sync bn * updated sync bn * updated sync bn * updated sync bn * updated sync bn * updated sync bn * added ddp_spawn test * updated test * clean * clean Co-authored-by: Jirka Borovec <jirka@pytorchlightning.ai> 2020-08-05 23:12:11 +00:00			`import os`
Datamodule (#2668) * :sparkles: Add copy of pl_bolts datamodule to lightning * :sparkles: add datamodule to necessary init files * :construction: add datamodule property to LightningModule * :construction: . * :art: Let DataModule do its own thing * :construction: add back setup and run both hooks implicitly * :construction: . * :bug: fix add_argparse_args * :lipstick: apply black formatting and isort * :pencil: docstrings * :pencil: . * :pencil: . * :bug: overwrite cls prepare_data instead of instance * :pencil: . * :white_check_mark: add some tests * Update datamodule.py * Update datamodule.py * Update datamodule.py Co-authored-by: William Falcon <waf2107@columbia.edu> 2020-07-24 15:42:15 +00:00			`from torch.utils.data import random_split, DataLoader`

Enable val/test loop disabling + datamodule tests (#2692) * :art: warn instead of error out on loaders * :bug: test misconfiguration should still fail * :construction: . * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj Co-authored-by: William Falcon <waf2107@columbia.edu> 2020-07-25 16:57:40 +00:00			`from pytorch_lightning.core.datamodule import LightningDataModule`
updated sync bn (#2838) * updated sync bn * updated sync bn * updated sync bn * updated sync bn * updated sync bn * updated sync bn * updated sync bn * updated sync bn * added ddp_spawn test * updated test * clean * clean Co-authored-by: Jirka Borovec <jirka@pytorchlightning.ai> 2020-08-05 23:12:11 +00:00			`from tests.base.datasets import TrialMNIST, MNIST`
			`from torch.utils.data.distributed import DistributedSampler`
Datamodule (#2668) * :sparkles: Add copy of pl_bolts datamodule to lightning * :sparkles: add datamodule to necessary init files * :construction: add datamodule property to LightningModule * :construction: . * :art: Let DataModule do its own thing * :construction: add back setup and run both hooks implicitly * :construction: . * :bug: fix add_argparse_args * :lipstick: apply black formatting and isort * :pencil: docstrings * :pencil: . * :pencil: . * :bug: overwrite cls prepare_data instead of instance * :pencil: . * :white_check_mark: add some tests * Update datamodule.py * Update datamodule.py * Update datamodule.py Co-authored-by: William Falcon <waf2107@columbia.edu> 2020-07-24 15:42:15 +00:00

Enable val/test loop disabling + datamodule tests (#2692) * :art: warn instead of error out on loaders * :bug: test misconfiguration should still fail * :construction: . * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj Co-authored-by: William Falcon <waf2107@columbia.edu> 2020-07-25 16:57:40 +00:00			`class TrialMNISTDataModule(LightningDataModule):`
Call DataModule hooks implicitly in trainer (#2755) * :sparkles: call dm hooks in trainer implicitly * :white_check_mark: update tests * :pencil: remove unused stage arg from dm docs * :white_check_mark: update tests * :white_check_mark: update tests * :construction: include stage in datamodule.setup * :pencil: docs * :pencil: docs * added more dm tests * added more dm tests * :bug: call dm.setup everywhere * :fire: pickle tests now implied by accelerator tests * :art: set dm as attr of trainer * :bug: . * :construction: wip * add can prepare test * add can prepare test * verified setup in fit * fixed setup call * fixed setup call * fixed setup call Co-authored-by: William Falcon <waf2107@columbia.edu> 2020-08-02 00:17:57 +00:00
Datamodule (#2668) * :sparkles: Add copy of pl_bolts datamodule to lightning * :sparkles: add datamodule to necessary init files * :construction: add datamodule property to LightningModule * :construction: . * :art: Let DataModule do its own thing * :construction: add back setup and run both hooks implicitly * :construction: . * :bug: fix add_argparse_args * :lipstick: apply black formatting and isort * :pencil: docstrings * :pencil: . * :pencil: . * :bug: overwrite cls prepare_data instead of instance * :pencil: . * :white_check_mark: add some tests * Update datamodule.py * Update datamodule.py * Update datamodule.py Co-authored-by: William Falcon <waf2107@columbia.edu> 2020-07-24 15:42:15 +00:00			`def __init__(self, data_dir: str = './'):`
Enable val/test loop disabling + datamodule tests (#2692) * :art: warn instead of error out on loaders * :bug: test misconfiguration should still fail * :construction: . * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj Co-authored-by: William Falcon <waf2107@columbia.edu> 2020-07-25 16:57:40 +00:00			`super().__init__()`
Datamodule (#2668) * :sparkles: Add copy of pl_bolts datamodule to lightning * :sparkles: add datamodule to necessary init files * :construction: add datamodule property to LightningModule * :construction: . * :art: Let DataModule do its own thing * :construction: add back setup and run both hooks implicitly * :construction: . * :bug: fix add_argparse_args * :lipstick: apply black formatting and isort * :pencil: docstrings * :pencil: . * :pencil: . * :bug: overwrite cls prepare_data instead of instance * :pencil: . * :white_check_mark: add some tests * Update datamodule.py * Update datamodule.py * Update datamodule.py Co-authored-by: William Falcon <waf2107@columbia.edu> 2020-07-24 15:42:15 +00:00			`self.data_dir = data_dir`
Call DataModule hooks implicitly in trainer (#2755) * :sparkles: call dm hooks in trainer implicitly * :white_check_mark: update tests * :pencil: remove unused stage arg from dm docs * :white_check_mark: update tests * :white_check_mark: update tests * :construction: include stage in datamodule.setup * :pencil: docs * :pencil: docs * added more dm tests * added more dm tests * :bug: call dm.setup everywhere * :fire: pickle tests now implied by accelerator tests * :art: set dm as attr of trainer * :bug: . * :construction: wip * add can prepare test * add can prepare test * verified setup in fit * fixed setup call * fixed setup call * fixed setup call Co-authored-by: William Falcon <waf2107@columbia.edu> 2020-08-02 00:17:57 +00:00			`self.non_picklable = None`
Datamodule (#2668) * :sparkles: Add copy of pl_bolts datamodule to lightning * :sparkles: add datamodule to necessary init files * :construction: add datamodule property to LightningModule * :construction: . * :art: Let DataModule do its own thing * :construction: add back setup and run both hooks implicitly * :construction: . * :bug: fix add_argparse_args * :lipstick: apply black formatting and isort * :pencil: docstrings * :pencil: . * :pencil: . * :bug: overwrite cls prepare_data instead of instance * :pencil: . * :white_check_mark: add some tests * Update datamodule.py * Update datamodule.py * Update datamodule.py Co-authored-by: William Falcon <waf2107@columbia.edu> 2020-07-24 15:42:15 +00:00
			`def prepare_data(self):`
Enable val/test loop disabling + datamodule tests (#2692) * :art: warn instead of error out on loaders * :bug: test misconfiguration should still fail * :construction: . * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj * updated docs with new result obj Co-authored-by: William Falcon <waf2107@columbia.edu> 2020-07-25 16:57:40 +00:00			`TrialMNIST(self.data_dir, train=True, download=True)`
			`TrialMNIST(self.data_dir, train=False, download=True)`
pytorch 1.6 (#2745) * pt 1.6 * don't use the new zipfile serialization for now * quick flake8 fixes * remove unnecessary f * coalesce strings * remove comma * remove extra commas * Apply suggestions from code review Co-authored-by: Peter Yu <2057325+yukw777@users.noreply.github.com> * set _use_new_zipfile_serialization to False only for pytorch 1.6.0 * remove unnecessary comments * flake8 fixes * use pkg_resources instead of packaging * readme * format * version * chlog Co-authored-by: Peter Yu <peter@asapp.com> Co-authored-by: Peter Yu <2057325+yukw777@users.noreply.github.com> 2020-07-31 09:18:32 +00:00
Call DataModule hooks implicitly in trainer (#2755) * :sparkles: call dm hooks in trainer implicitly * :white_check_mark: update tests * :pencil: remove unused stage arg from dm docs * :white_check_mark: update tests * :white_check_mark: update tests * :construction: include stage in datamodule.setup * :pencil: docs * :pencil: docs * added more dm tests * added more dm tests * :bug: call dm.setup everywhere * :fire: pickle tests now implied by accelerator tests * :art: set dm as attr of trainer * :bug: . * :construction: wip * add can prepare test * add can prepare test * verified setup in fit * fixed setup call * fixed setup call * fixed setup call Co-authored-by: William Falcon <waf2107@columbia.edu> 2020-08-02 00:17:57 +00:00			`def setup(self, stage: str = None):`

			`if stage == 'fit' or stage is None:`
			`mnist_full = TrialMNIST(root=self.data_dir, train=True, num_samples=64, download=True)`
			`self.mnist_train, self.mnist_val = random_split(mnist_full, [128, 64])`
			`self.dims = self.mnist_train[0][0].shape`

			`if stage == 'test' or stage is None:`
re-trigger build (#2988) * fixed build * fixed build 2020-08-16 01:13:00 +00:00			`self.mnist_test = TrialMNIST(root=self.data_dir, train=False, num_samples=64, download=True)`
Call DataModule hooks implicitly in trainer (#2755) * :sparkles: call dm hooks in trainer implicitly * :white_check_mark: update tests * :pencil: remove unused stage arg from dm docs * :white_check_mark: update tests * :white_check_mark: update tests * :construction: include stage in datamodule.setup * :pencil: docs * :pencil: docs * added more dm tests * added more dm tests * :bug: call dm.setup everywhere * :fire: pickle tests now implied by accelerator tests * :art: set dm as attr of trainer * :bug: . * :construction: wip * add can prepare test * add can prepare test * verified setup in fit * fixed setup call * fixed setup call * fixed setup call Co-authored-by: William Falcon <waf2107@columbia.edu> 2020-08-02 00:17:57 +00:00			`self.dims = getattr(self, 'dims', self.mnist_test[0][0].shape)`

			`self.non_picklable = lambda x: x**2`
Datamodule (#2668) * :sparkles: Add copy of pl_bolts datamodule to lightning * :sparkles: add datamodule to necessary init files * :construction: add datamodule property to LightningModule * :construction: . * :art: Let DataModule do its own thing * :construction: add back setup and run both hooks implicitly * :construction: . * :bug: fix add_argparse_args * :lipstick: apply black formatting and isort * :pencil: docstrings * :pencil: . * :pencil: . * :bug: overwrite cls prepare_data instead of instance * :pencil: . * :white_check_mark: add some tests * Update datamodule.py * Update datamodule.py * Update datamodule.py Co-authored-by: William Falcon <waf2107@columbia.edu> 2020-07-24 15:42:15 +00:00
			`def train_dataloader(self):`
			`return DataLoader(self.mnist_train, batch_size=32)`

			`def val_dataloader(self):`
			`return DataLoader(self.mnist_val, batch_size=32)`

			`def test_dataloader(self):`
			`return DataLoader(self.mnist_test, batch_size=32)`
updated sync bn (#2838) * updated sync bn * updated sync bn * updated sync bn * updated sync bn * updated sync bn * updated sync bn * updated sync bn * updated sync bn * added ddp_spawn test * updated test * clean * clean Co-authored-by: Jirka Borovec <jirka@pytorchlightning.ai> 2020-08-05 23:12:11 +00:00

			`class MNISTDataModule(LightningDataModule):`
			`def __init__(`
			`self, data_dir: str = './', batch_size: int = 32, dist_sampler: bool = False`
			`) -> None:`
			`super().__init__()`

			`self.dist_sampler = dist_sampler`
			`self.data_dir = data_dir`
			`self.batch_size = batch_size`

			`# self.dims is returned when you call dm.size()`
			`# Setting default dims here because we know them.`
			`# Could optionally be assigned dynamically in dm.setup()`
			`self.dims = (1, 28, 28)`

			`def prepare_data(self):`
			`# download only`
			`MNIST(self.data_dir, train=True, download=True, normalize=(0.1307, 0.3081))`
			`MNIST(self.data_dir, train=False, download=True, normalize=(0.1307, 0.3081))`

			`def setup(self, stage: str = None):`

			`# Assign train/val datasets for use in dataloaders`
			`# TODO: need to split using random_split once updated to torch >= 1.6`
			`if stage == 'fit' or stage is None:`
			`self.mnist_train = MNIST(self.data_dir, train=True, normalize=(0.1307, 0.3081))`

			`# Assign test dataset for use in dataloader(s)`
			`if stage == 'test' or stage is None:`
			`self.mnist_test = MNIST(self.data_dir, train=False, normalize=(0.1307, 0.3081))`

			`def train_dataloader(self):`
			`dist_sampler = None`
			`if self.dist_sampler:`
			`dist_sampler = DistributedSampler(self.mnist_train, shuffle=False)`

			`return DataLoader(`
			`self.mnist_train, batch_size=self.batch_size, sampler=dist_sampler, shuffle=False`
			`)`

			`def test_dataloader(self):`
			`return DataLoader(self.mnist_test, batch_size=self.batch_size, shuffle=False)`