Parity test (#7832)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Jirka <jirka.borovec@seznam.cz> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
This commit is contained in:
parent
ea13f6021c
commit
4c79b3a5b3
|
@ -0,0 +1,27 @@
|
|||
name: GPU Parity testing
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 0 * * *" # At the end of every day
|
||||
|
||||
jobs:
|
||||
parity-test:
|
||||
timeoutInMinutes: 120
|
||||
|
||||
cancelTimeoutInMinutes: 2
|
||||
|
||||
pool: gridai-spot-pool
|
||||
|
||||
container:
|
||||
# base ML image: mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04
|
||||
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.8-torch1.6"
|
||||
|
||||
workspace:
|
||||
clean: all
|
||||
|
||||
steps:
|
||||
- bash: |
|
||||
python -m pytest benchmarks -v --durations=0
|
||||
displayName: 'Testing: benchmarks'
|
||||
env:
|
||||
PL_RUNNING_BENCHMARKS: 1
|
|
@ -13,5 +13,6 @@
|
|||
# limitations under the License.
|
||||
import os
|
||||
|
||||
BENCHMARK_ROOT = os.path.dirname(__file__)
|
||||
PROJECT_ROOT = os.path.dirname(BENCHMARK_ROOT)
|
||||
_BENCHMARK_ROOT = os.path.dirname(__file__)
|
||||
_PROJECT_ROOT = os.path.dirname(_BENCHMARK_ROOT)
|
||||
_PATH_DATASETS = os.path.join(_PROJECT_ROOT, 'Datasets')
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import gc
|
||||
import os
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
|
@ -20,7 +21,11 @@ import torch
|
|||
from tqdm import tqdm
|
||||
|
||||
from pytorch_lightning import LightningModule, seed_everything, Trainer
|
||||
from tests.helpers.advanced_models import ParityModuleMNIST, ParityModuleRNN
|
||||
from tests.helpers.advanced_models import ParityModuleCIFAR, ParityModuleMNIST, ParityModuleRNN
|
||||
|
||||
_EXTEND_BENCHMARKS = os.getenv("PL_RUNNING_BENCHMARKS", '0') == '1'
|
||||
_SHORT_BENCHMARKS = not _EXTEND_BENCHMARKS
|
||||
_MARK_SHORT_BM = pytest.mark.skipif(_SHORT_BENCHMARKS, reason="Only run during Benchmarking")
|
||||
|
||||
|
||||
def assert_parity_relative(pl_values, pt_values, norm_by: float = 1, max_diff: float = 0.1):
|
||||
|
@ -43,20 +48,16 @@ def assert_parity_absolute(pl_values, pt_values, norm_by: float = 1, max_diff: f
|
|||
|
||||
# ParityModuleMNIST runs with num_workers=1
|
||||
@pytest.mark.parametrize(
|
||||
'cls_model,max_diff_speed,max_diff_memory',
|
||||
'cls_model,max_diff_speed,max_diff_memory,num_epochs,num_runs',
|
||||
[
|
||||
(ParityModuleRNN, 0.05, 0.001),
|
||||
(ParityModuleMNIST, 0.25, 0.001), # todo: lower this thr
|
||||
(ParityModuleRNN, 0.05, 0.001, 4, 3),
|
||||
(ParityModuleMNIST, 0.25, 0.001, 4, 3), # todo: lower this thr
|
||||
pytest.param(ParityModuleCIFAR, 4.0, 0.0002, 2, 2, marks=_MARK_SHORT_BM),
|
||||
]
|
||||
)
|
||||
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
|
||||
def test_pytorch_parity(
|
||||
tmpdir,
|
||||
cls_model: LightningModule,
|
||||
max_diff_speed: float,
|
||||
max_diff_memory: float,
|
||||
num_epochs: int = 4,
|
||||
num_runs: int = 3,
|
||||
tmpdir, cls_model: LightningModule, max_diff_speed: float, max_diff_memory: float, num_epochs: int, num_runs: int
|
||||
):
|
||||
"""
|
||||
Verify that the same pytorch and lightning models achieve the same results
|
||||
|
|
|
@ -29,7 +29,7 @@ import torchvision
|
|||
import torchvision.models as models
|
||||
import torchvision.transforms as T
|
||||
|
||||
from pl_examples import cli_lightning_logo
|
||||
from pl_examples import _DATASETS_PATH, cli_lightning_logo
|
||||
from pytorch_lightning import LightningDataModule, LightningModule
|
||||
from pytorch_lightning.utilities.cli import LightningCLI
|
||||
|
||||
|
@ -75,11 +75,13 @@ class CIFAR10DataModule(LightningDataModule):
|
|||
transform = T.Compose([T.Resize(256), T.CenterCrop(224), T.ToTensor()])
|
||||
|
||||
def train_dataloader(self, *args, **kwargs):
|
||||
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=self.transform)
|
||||
trainset = torchvision.datasets.CIFAR10(
|
||||
root=_DATASETS_PATH, train=True, download=True, transform=self.transform
|
||||
)
|
||||
return torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=0)
|
||||
|
||||
def val_dataloader(self, *args, **kwargs):
|
||||
valset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=self.transform)
|
||||
valset = torchvision.datasets.CIFAR10(root=_DATASETS_PATH, train=False, download=True, transform=self.transform)
|
||||
return torch.utils.data.DataLoader(valset, batch_size=32, shuffle=True, num_workers=0)
|
||||
|
||||
|
||||
|
|
|
@ -19,8 +19,8 @@ import numpy as np
|
|||
_TEST_ROOT = os.path.dirname(__file__)
|
||||
_PROJECT_ROOT = os.path.dirname(_TEST_ROOT)
|
||||
_TEMP_PATH = os.path.join(_PROJECT_ROOT, 'test_temp')
|
||||
PATH_DATASETS = os.path.join(_PROJECT_ROOT, 'Datasets')
|
||||
PATH_LEGACY = os.path.join(_PROJECT_ROOT, 'legacy')
|
||||
_PATH_DATASETS = os.path.join(_PROJECT_ROOT, 'Datasets')
|
||||
_PATH_LEGACY = os.path.join(_PROJECT_ROOT, 'legacy')
|
||||
|
||||
# todo: this setting `PYTHONPATH` may not be used by other evns like Conda for import packages
|
||||
if _PROJECT_ROOT not in os.getenv('PYTHONPATH', ""):
|
||||
|
|
|
@ -18,7 +18,7 @@ import torch.nn as nn
|
|||
import torch.nn.functional as F
|
||||
|
||||
from pytorch_lightning.core.lightning import LightningModule
|
||||
from tests import PATH_DATASETS
|
||||
from tests import _PATH_DATASETS
|
||||
from tests.base.model_optimizers import ConfigureOptimizersPool
|
||||
from tests.base.model_test_dataloaders import TestDataloaderVariations
|
||||
from tests.base.model_test_epoch_ends import TestEpochEndVariations
|
||||
|
@ -59,7 +59,7 @@ class EvalModelTemplate(
|
|||
in_features: int = 28 * 28,
|
||||
learning_rate: float = 0.001 * 8,
|
||||
optimizer_name: str = 'adam',
|
||||
data_root: str = PATH_DATASETS,
|
||||
data_root: str = _PATH_DATASETS,
|
||||
out_features: int = 10,
|
||||
hidden_dim: int = 1000,
|
||||
b1: float = 0.5,
|
||||
|
@ -131,7 +131,7 @@ class EvalModelTemplate(
|
|||
in_features=28 * 28,
|
||||
learning_rate=0.001 * 8,
|
||||
optimizer_name='adam',
|
||||
data_root=PATH_DATASETS,
|
||||
data_root=_PATH_DATASETS,
|
||||
out_features=10,
|
||||
hidden_dim=1000,
|
||||
b1=0.5,
|
||||
|
|
|
@ -18,9 +18,9 @@ import sys
|
|||
import pytest
|
||||
|
||||
from pytorch_lightning import Trainer
|
||||
from tests import PATH_LEGACY
|
||||
from tests import _PATH_LEGACY
|
||||
|
||||
LEGACY_CHECKPOINTS_PATH = os.path.join(PATH_LEGACY, 'checkpoints')
|
||||
LEGACY_CHECKPOINTS_PATH = os.path.join(_PATH_LEGACY, 'checkpoints')
|
||||
CHECKPOINT_EXTENSION = ".ckpt"
|
||||
|
||||
|
||||
|
|
|
@ -16,11 +16,19 @@ import sys
|
|||
import threading
|
||||
from functools import partial, wraps
|
||||
from http.server import SimpleHTTPRequestHandler
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import torch.distributed
|
||||
import torch.multiprocessing as mp
|
||||
|
||||
from tests import _PATH_DATASETS
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def datadir():
|
||||
return Path(_PATH_DATASETS)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def preserve_global_rank_variable():
|
||||
|
|
|
@ -19,9 +19,14 @@ import torch.nn.functional as F
|
|||
from torch.utils.data import DataLoader
|
||||
|
||||
from pytorch_lightning.core.lightning import LightningModule
|
||||
from tests import PATH_DATASETS
|
||||
from pytorch_lightning.utilities.imports import _TORCHVISION_AVAILABLE
|
||||
from tests import _PATH_DATASETS
|
||||
from tests.helpers.datasets import AverageDataset, MNIST, TrialMNIST
|
||||
|
||||
if _TORCHVISION_AVAILABLE:
|
||||
from torchvision import models, transforms
|
||||
from torchvision.datasets import CIFAR10
|
||||
|
||||
|
||||
class Generator(nn.Module):
|
||||
|
||||
|
@ -155,7 +160,7 @@ class BasicGAN(LightningModule):
|
|||
return [opt_g, opt_d], []
|
||||
|
||||
def train_dataloader(self):
|
||||
return DataLoader(TrialMNIST(root=PATH_DATASETS, train=True, download=True), batch_size=16)
|
||||
return DataLoader(TrialMNIST(root=_PATH_DATASETS, train=True, download=True), batch_size=16)
|
||||
|
||||
|
||||
class ParityModuleRNN(LightningModule):
|
||||
|
@ -213,7 +218,47 @@ class ParityModuleMNIST(LightningModule):
|
|||
|
||||
def train_dataloader(self):
|
||||
return DataLoader(MNIST(
|
||||
root=PATH_DATASETS,
|
||||
root=_PATH_DATASETS,
|
||||
train=True,
|
||||
download=True,
|
||||
), batch_size=128, num_workers=1)
|
||||
|
||||
|
||||
class ParityModuleCIFAR(LightningModule):
|
||||
|
||||
def __init__(self, backbone="resnet101", hidden_dim=1024, learning_rate=1e-3, pretrained=True):
|
||||
super().__init__()
|
||||
self.save_hyperparameters()
|
||||
|
||||
self.learning_rate = learning_rate
|
||||
self.num_classes = 10
|
||||
self.backbone = getattr(models, backbone)(pretrained=pretrained)
|
||||
|
||||
self.classifier = torch.nn.Sequential(
|
||||
torch.nn.Linear(1000, hidden_dim), torch.nn.Linear(hidden_dim, self.num_classes)
|
||||
)
|
||||
self.transform = transforms.Compose([
|
||||
transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
|
||||
])
|
||||
|
||||
def training_step(self, batch, batch_idx):
|
||||
x, y = batch
|
||||
y_hat = self.backbone(x)
|
||||
y_hat = self.classifier(y_hat)
|
||||
loss = F.cross_entropy(y_hat, y)
|
||||
return {'loss': loss}
|
||||
|
||||
def configure_optimizers(self):
|
||||
return torch.optim.Adam(self.parameters(), lr=self.learning_rate)
|
||||
|
||||
def train_dataloader(self):
|
||||
return DataLoader(
|
||||
CIFAR10(
|
||||
root=_PATH_DATASETS,
|
||||
train=True,
|
||||
download=True,
|
||||
transform=self.transform,
|
||||
),
|
||||
batch_size=32,
|
||||
num_workers=1
|
||||
)
|
||||
|
|
|
@ -154,7 +154,7 @@ class BoringModel(LightningModule):
|
|||
|
||||
class BoringDataModule(LightningDataModule):
|
||||
|
||||
def __init__(self, data_dir: str = './'):
|
||||
def __init__(self, data_dir: str = "./"):
|
||||
super().__init__()
|
||||
self.data_dir = data_dir
|
||||
self.non_picklable = None
|
||||
|
|
|
@ -16,14 +16,14 @@ import pickle
|
|||
import cloudpickle
|
||||
import pytest
|
||||
|
||||
from tests import PATH_DATASETS
|
||||
from tests import _PATH_DATASETS
|
||||
from tests.helpers.datasets import AverageDataset, MNIST, TrialMNIST
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'dataset_cls,args', [
|
||||
(MNIST, dict(root=PATH_DATASETS)),
|
||||
(TrialMNIST, dict(root=PATH_DATASETS)),
|
||||
(MNIST, dict(root=_PATH_DATASETS)),
|
||||
(TrialMNIST, dict(root=_PATH_DATASETS)),
|
||||
(AverageDataset, {}),
|
||||
]
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue