lightning/benchmarks/test_basic_parity.py

178 lines
5.8 KiB
Python
Raw Normal View History

# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import gc
import time
import numpy as np
import pytest
import torch
from tqdm import tqdm
from pytorch_lightning import LightningModule, seed_everything, Trainer
from tests.helpers.advanced_models import ParityModuleMNIST, ParityModuleRNN
def assert_parity_relative(pl_values, pt_values, norm_by: float = 1, max_diff: float = 0.1):
# assert speeds
diffs = np.asarray(pl_values) - np.mean(pt_values)
# norm by vanilla time
diffs = diffs / norm_by
# relative to mean reference value
diffs = diffs / np.mean(pt_values)
assert np.mean(diffs) < max_diff, f"Lightning diff {diffs} was worse than vanilla PT (threshold {max_diff})"
def assert_parity_absolute(pl_values, pt_values, norm_by: float = 1, max_diff: float = 0.55):
# assert speeds
diffs = np.asarray(pl_values) - np.mean(pt_values)
# norm by event count
diffs = diffs / norm_by
assert np.mean(diffs) < max_diff, f"Lightning {diffs} was worse than vanilla PT (threshold {max_diff})"
2020-11-27 18:36:50 +00:00
# ParityModuleMNIST runs with num_workers=1
@pytest.mark.parametrize(
'cls_model,max_diff_speed,max_diff_memory',
[
move batch to device before sending it to hooks (#7378) * update train step * test * x * limits * val * typeo * x * x * step * min gpus * run all loops * x * limit test * profiler * clean up accelerator code * move files * rename * move tests * changelog * reorder callbacks and model hooks * add test description * replace unneccessary method * fix chlog * adjust batch_to_device for DP Plugin * update tests for dataloader idx * unused imports * hook change * switch None * clear memory * change to None * None * None * memory savings * remove redundant todo * hack * cheat * Revert "cheat" This reverts commit a8433bd0b4bd35f218993335f7d4ff18977ae423. * Revert "hack" This reverts commit 43a6d1edeb62a15ac69ef69ef2352581ba1947a5. * update new epoch loop * remove from old loop code * update chlog * update hook test * changelog * teardown * integrate changes in new eval loop * fix hook calls * add prediction step * bad merge * Revert "bad merge" This reverts commit 488080863cf012dcf04446be3b7d973b7340687e. * fix train batch hook test * rm -rf _notebooks * update chlog * release memory * fix type * notebooks mess * debug * Revert "debug" This reverts commit eec4ee2f77b5eb39965211a250598ed5d2320e88. * teardown * fix teardown bug * debug * x * debug * Revert "debug" This reverts commit a6e61019462b80d09d31b65bed289fa6e4dd15f6. Revert "debug" This reverts commit 5ddeaec06911e96730aade1be6ee71d097b46b9a. debug debug Revert "debug" This reverts commit 605be746f7daedf265b2c05a1c153ce543394435. Revert "Revert "debug"" This reverts commit a7612d5410409ed886cfb609457349ecf44cbfa8. debug x x x s tol x tol * Fix changelog Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
2021-07-05 08:31:39 +00:00
(ParityModuleRNN, 0.05, 0.001),
(ParityModuleMNIST, 0.25, 0.001), # todo: lower this thr
]
)
@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
def test_pytorch_parity(
tmpdir,
cls_model: LightningModule,
max_diff_speed: float,
max_diff_memory: float,
num_epochs: int = 4,
num_runs: int = 3,
):
"""
Verify that the same pytorch and lightning models achieve the same results
"""
lightning = measure_loops(cls_model, kind="PT Lightning", num_epochs=num_epochs, num_runs=num_runs)
vanilla = measure_loops(cls_model, kind="Vanilla PT", num_epochs=num_epochs, num_runs=num_runs)
# make sure the losses match exactly to 5 decimal places
print(f"Losses are for... \n vanilla: {vanilla['losses']} \n lightning: {lightning['losses']}")
for pl_out, pt_out in zip(lightning['losses'], vanilla['losses']):
np.testing.assert_almost_equal(pl_out, pt_out, 5)
# drop the first run for initialize dataset (download & filter)
assert_parity_absolute(
lightning['durations'][1:], vanilla['durations'][1:], norm_by=num_epochs, max_diff=max_diff_speed
)
assert_parity_relative(lightning['memory'], vanilla['memory'], max_diff=max_diff_memory)
def _hook_memory():
if torch.cuda.is_available():
torch.cuda.synchronize()
used_memory = torch.cuda.max_memory_allocated()
else:
used_memory = np.nan
return used_memory
def measure_loops(cls_model, kind, num_runs=10, num_epochs=10):
"""
Returns an array with the last loss from each epoch for each run
"""
hist_losses = []
hist_durations = []
hist_memory = []
device_type = "cuda" if torch.cuda.is_available() else "cpu"
Option to provide seed to random generators to ensure reproducibility (#1572) * Option to provide seed to random generators to ensure reproducibility I added small function in utilities which imports torch, numpy, python random and sets seed for all of the libraries to ensure reproducibility of results. * Apply recommendations from core contributors on seeding 1. Moved the seeding code to another file 2. Make deterministic as a parameter for trainer class 3. Add assertions for seeding numpy 4. Added warnings 5. torch.manual_seed should be enough for seeding torch * Revert "Apply recommendations from core contributors on seeding" This reverts commit a213c8e6882eec8a9e7408b9418926d2db7c5461. * Revert "Revert "Apply recommendations from core contributors on seeding"" This reverts commit 59b2da53c62878de7aab0aa3feb3115e105eea06. * Change in test, for correct seeding * Allow seed equal to 0 * Allow seed to be uint32.max * Added deterministic to benchmarks * Cuda manual seed as in benchmark seeding * Seeding should be done before model initialization * cuda manual_seed is not necessary * Fixing seed test_cpu_lbfgs On some seeds seems like lbfgs doesn't converge. So I fixed the seed during testing. * rebasing issue with old reproducibility.py * Improved documentation and ability to seed before initializing Train class * Change in docs * Removed seed from trainer, update for documentation * Typo in the docs * Added seed_everything to _all_ * Fixing old changes * Model initialization should be earlier then Trainer * Update pytorch_lightning/trainer/__init__.py From Example to testcode Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * Fixing according to the contributors suggestions * Moving horovod deterministic to Trainer class * deterministic flag affects horovod docs update * Improved static typing * Added deterministic to test runners of horovod It is failing on some versions, not very predictable * static seeds for horovod tests * Change for reset_seed function in tests * Seeding horovod using reset_seed from tutils * Update pytorch_lightning/trainer/__init__.py * chlog * Update trainer.py * change "testcode" to "Example" in trainer init documentation * Update pytorch_lightning/trainer/seed.py, first line in comment Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Jirka <jirka.borovec@seznam.cz> Co-authored-by: William Falcon <waf2107@columbia.edu>
2020-05-12 11:53:20 +00:00
torch.backends.cudnn.deterministic = True
for i in tqdm(range(num_runs), desc=f'{kind} with {cls_model.__name__}'):
gc.collect()
if device_type == 'cuda':
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_cached()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_accumulated_memory_stats()
torch.cuda.reset_peak_memory_stats()
time.sleep(1)
time_start = time.perf_counter()
_loop = lightning_loop if kind == "PT Lightning" else vanilla_loop
final_loss, used_memory = _loop(cls_model, idx=i, device_type=device_type, num_epochs=num_epochs)
time_end = time.perf_counter()
hist_losses.append(final_loss)
hist_durations.append(time_end - time_start)
hist_memory.append(used_memory)
return {
'losses': hist_losses,
'durations': hist_durations,
'memory': hist_memory,
}
def vanilla_loop(cls_model, idx, device_type: str = 'cuda', num_epochs=10):
device = torch.device(device_type)
# set seed
seed_everything(idx)
# init model parts
model = cls_model()
dl = model.train_dataloader()
optimizer = model.configure_optimizers()
# model to GPU
model = model.to(device)
epoch_losses = []
# as the first run is skipped, no need to run it long
for epoch in range(num_epochs if idx > 0 else 1):
# run through full training set
for j, batch in enumerate(dl):
batch = [x.to(device) for x in batch]
loss_dict = model.training_step(batch, j)
loss = loss_dict['loss']
loss.backward()
optimizer.step()
optimizer.zero_grad()
# track last epoch loss
epoch_losses.append(loss.item())
return epoch_losses[-1], _hook_memory()
def lightning_loop(cls_model, idx, device_type: str = 'cuda', num_epochs=10):
seed_everything(idx)
model = cls_model()
# init model parts
trainer = Trainer(
# as the first run is skipped, no need to run it long
max_epochs=num_epochs if idx > 0 else 1,
progress_bar_refresh_rate=0,
weights_summary=None,
gpus=1 if device_type == 'cuda' else 0,
checkpoint_callback=False,
deterministic=True,
logger=False,
replace_sampler_ddp=False,
)
trainer.fit(model)
return trainer.fit_loop.running_loss.last().item(), _hook_memory()