658 lines
23 KiB
Python
658 lines
23 KiB
Python
# Copyright The PyTorch Lightning team.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
import os
|
|
from unittest import mock
|
|
|
|
import pytest
|
|
import torch
|
|
import torch.nn.functional as F
|
|
from torch.utils.data import DistributedSampler
|
|
|
|
from pytorch_lightning import Callback, seed_everything, Trainer
|
|
from pytorch_lightning.accelerators import IPUAccelerator
|
|
from pytorch_lightning.core.module import LightningModule
|
|
from pytorch_lightning.demos.boring_classes import BoringModel
|
|
from pytorch_lightning.plugins import IPUPrecisionPlugin
|
|
from pytorch_lightning.strategies.ipu import IPUStrategy
|
|
from pytorch_lightning.trainer.states import RunningStage, TrainerFn
|
|
from pytorch_lightning.trainer.supporters import CombinedLoader
|
|
from pytorch_lightning.utilities import _IPU_AVAILABLE
|
|
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
|
from tests_pytorch.helpers.datamodules import ClassifDataModule
|
|
from tests_pytorch.helpers.runif import RunIf
|
|
from tests_pytorch.helpers.simple_models import ClassificationModel
|
|
|
|
if _IPU_AVAILABLE:
|
|
import poptorch
|
|
|
|
|
|
class IPUModel(BoringModel):
|
|
def training_step(self, batch, batch_idx):
|
|
output = self(batch)
|
|
loss = self.loss(batch, output)
|
|
return loss
|
|
|
|
def validation_step(self, batch, batch_idx):
|
|
output = self(batch)
|
|
loss = self.loss(batch, output)
|
|
return loss
|
|
|
|
def test_step(self, batch, batch_idx):
|
|
output = self(batch)
|
|
loss = self.loss(batch, output)
|
|
return loss
|
|
|
|
def training_epoch_end(self, outputs) -> None:
|
|
pass
|
|
|
|
def validation_epoch_end(self, outputs) -> None:
|
|
pass
|
|
|
|
def test_epoch_end(self, outputs) -> None:
|
|
pass
|
|
|
|
|
|
class IPUClassificationModel(ClassificationModel):
|
|
def training_step(self, batch, batch_idx):
|
|
x, y = batch
|
|
logits = self(x)
|
|
loss = F.cross_entropy(logits, y)
|
|
return loss
|
|
|
|
def validation_step(self, batch, batch_idx):
|
|
x, y = batch
|
|
logits = self(x)
|
|
acc = self.accuracy(logits, y)
|
|
return acc
|
|
|
|
def test_step(self, batch, batch_idx):
|
|
x, y = batch
|
|
logits = self(x)
|
|
acc = self.accuracy(logits, y)
|
|
return acc
|
|
|
|
def accuracy(self, logits, y):
|
|
# todo (sean): currently IPU poptorch doesn't implicit convert bools to tensor
|
|
# hence we use an explicit calculation for accuracy here. Once fixed in poptorch
|
|
# we can use the accuracy metric.
|
|
acc = torch.sum(torch.eq(torch.argmax(logits, -1), y).to(torch.float32)) / len(y)
|
|
return acc
|
|
|
|
def validation_epoch_end(self, outputs) -> None:
|
|
self.log("val_acc", torch.stack(outputs).mean())
|
|
|
|
def test_epoch_end(self, outputs) -> None:
|
|
self.log("test_acc", torch.stack(outputs).mean())
|
|
|
|
|
|
def test_auto_device_count():
|
|
assert IPUAccelerator.auto_device_count() == 4
|
|
|
|
|
|
@pytest.mark.skipif(_IPU_AVAILABLE, reason="test requires non-IPU machine")
|
|
@mock.patch("pytorch_lightning.accelerators.ipu.IPUAccelerator.is_available", return_value=True)
|
|
def test_fail_if_no_ipus(_, tmpdir):
|
|
with pytest.raises(MisconfigurationException, match="IPU Accelerator requires IPU devices to run"):
|
|
Trainer(default_root_dir=tmpdir, accelerator="ipu", devices=1)
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_accelerator_selected(tmpdir):
|
|
assert IPUAccelerator.is_available()
|
|
trainer = Trainer(default_root_dir=tmpdir, accelerator="ipu", devices=1)
|
|
assert isinstance(trainer.accelerator, IPUAccelerator)
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_warning_if_ipus_not_used():
|
|
with pytest.warns(UserWarning, match="IPU available but not used. Set `accelerator` and `devices`"):
|
|
Trainer(accelerator="cpu")
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_no_warning_strategy(tmpdir):
|
|
with pytest.warns(None) as record:
|
|
Trainer(default_root_dir=tmpdir, max_epochs=1, strategy=IPUStrategy(training_opts=poptorch.Options()))
|
|
assert len(record) == 0
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
@pytest.mark.parametrize("devices", [1, 4])
|
|
def test_all_stages(tmpdir, devices):
|
|
model = IPUModel()
|
|
trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, accelerator="ipu", devices=devices)
|
|
trainer.fit(model)
|
|
trainer.validate(model)
|
|
trainer.test(model)
|
|
trainer.predict(model)
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
@pytest.mark.parametrize("devices", [1, 4])
|
|
def test_inference_only(tmpdir, devices):
|
|
model = IPUModel()
|
|
|
|
trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, accelerator="ipu", devices=devices)
|
|
trainer.validate(model)
|
|
trainer.test(model)
|
|
trainer.predict(model)
|
|
|
|
|
|
@RunIf(ipu=True, sklearn=True)
|
|
@mock.patch.dict(os.environ, os.environ.copy(), clear=True)
|
|
def test_optimization(tmpdir):
|
|
seed_everything(42)
|
|
|
|
dm = ClassifDataModule(length=1024)
|
|
model = IPUClassificationModel()
|
|
|
|
trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, accelerator="ipu", devices=2)
|
|
|
|
# fit model
|
|
trainer.fit(model, dm)
|
|
assert trainer.state.finished, f"Training failed with {trainer.state}"
|
|
assert dm.trainer is not None
|
|
|
|
# validate
|
|
result = trainer.validate(datamodule=dm)
|
|
assert dm.trainer is not None
|
|
assert result[0]["val_acc"] > 0.7
|
|
|
|
# test
|
|
result = trainer.test(model, datamodule=dm)
|
|
assert dm.trainer is not None
|
|
test_result = result[0]["test_acc"]
|
|
assert test_result > 0.6
|
|
|
|
# test saved model
|
|
model_path = os.path.join(tmpdir, "model.pt")
|
|
trainer.save_checkpoint(model_path)
|
|
|
|
model = IPUClassificationModel.load_from_checkpoint(model_path)
|
|
|
|
trainer = Trainer(default_root_dir=tmpdir, accelerator="ipu", devices=2)
|
|
|
|
result = trainer.test(model, datamodule=dm)
|
|
saved_result = result[0]["test_acc"]
|
|
assert saved_result == test_result
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_half_precision(tmpdir):
|
|
class TestCallback(Callback):
|
|
def setup(self, trainer: Trainer, pl_module: LightningModule, stage: str) -> None:
|
|
assert trainer.strategy.model.precision == 16
|
|
raise SystemExit
|
|
|
|
model = IPUModel()
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir, fast_dev_run=True, accelerator="ipu", devices=1, precision=16, callbacks=TestCallback()
|
|
)
|
|
assert isinstance(trainer.strategy.precision_plugin, IPUPrecisionPlugin)
|
|
assert trainer.strategy.precision_plugin.precision == 16
|
|
with pytest.raises(SystemExit):
|
|
trainer.fit(model)
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_pure_half_precision(tmpdir):
|
|
class TestCallback(Callback):
|
|
def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
|
|
assert trainer.strategy.precision_plugin.precision == 16
|
|
for param in trainer.strategy.model.parameters():
|
|
assert param.dtype == torch.float16
|
|
raise SystemExit
|
|
|
|
model = IPUModel()
|
|
model = model.half()
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir, fast_dev_run=True, accelerator="ipu", devices=1, precision=16, callbacks=TestCallback()
|
|
)
|
|
|
|
assert isinstance(trainer.strategy, IPUStrategy)
|
|
assert isinstance(trainer.strategy.precision_plugin, IPUPrecisionPlugin)
|
|
assert trainer.strategy.precision_plugin.precision == 16
|
|
|
|
changed_dtypes = [torch.float, torch.float64]
|
|
data = [torch.zeros((1), dtype=dtype) for dtype in changed_dtypes]
|
|
new_data = trainer.strategy.batch_to_device(data)
|
|
assert all(val.dtype is torch.half for val in new_data)
|
|
|
|
not_changed_dtypes = [torch.uint8, torch.int8, torch.int32, torch.int64]
|
|
data = [torch.zeros((1), dtype=dtype) for dtype in not_changed_dtypes]
|
|
new_data = trainer.strategy.batch_to_device(data)
|
|
assert all(val.dtype is dtype for val, dtype in zip(new_data, not_changed_dtypes))
|
|
|
|
with pytest.raises(SystemExit):
|
|
trainer.fit(model)
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_device_iterations_ipu_strategy(tmpdir):
|
|
class TestCallback(Callback):
|
|
def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
|
|
assert trainer.strategy.device_iterations == 2
|
|
# assert device iterations has been set correctly within the poptorch options
|
|
poptorch_model = trainer.strategy.poptorch_models[RunningStage.TRAINING]
|
|
assert poptorch_model._options.toDict()["device_iterations"] == 2
|
|
raise SystemExit
|
|
|
|
model = IPUModel()
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir,
|
|
fast_dev_run=True,
|
|
accelerator="ipu",
|
|
devices=1,
|
|
strategy=IPUStrategy(device_iterations=2),
|
|
callbacks=TestCallback(),
|
|
)
|
|
assert isinstance(trainer.strategy, IPUStrategy)
|
|
with pytest.raises(SystemExit):
|
|
trainer.fit(model)
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_accumulated_batches(tmpdir):
|
|
class TestCallback(Callback):
|
|
def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
|
|
# ensure the accumulation_scheduler is overridden to accumulate every batch
|
|
# since ipu handle accumulation
|
|
assert trainer.accumulation_scheduler.scheduling == {0: 1}
|
|
# assert poptorch option have been set correctly
|
|
poptorch_model = trainer.strategy.poptorch_models[RunningStage.TRAINING]
|
|
assert poptorch_model._options.Training.toDict()["gradient_accumulation"] == 2
|
|
raise SystemExit
|
|
|
|
model = IPUModel()
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir,
|
|
fast_dev_run=True,
|
|
accelerator="ipu",
|
|
devices=1,
|
|
accumulate_grad_batches=2,
|
|
callbacks=TestCallback(),
|
|
)
|
|
with pytest.raises(SystemExit):
|
|
trainer.fit(model)
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_stages_correct(tmpdir):
|
|
"""Ensure all stages correctly are traced correctly by asserting the output for each stage."""
|
|
|
|
class StageModel(IPUModel):
|
|
def training_step(self, batch, batch_idx):
|
|
loss = super().training_step(batch, batch_idx)
|
|
# tracing requires a loss value that depends on the model.
|
|
# force it to be a value but ensure we use the loss.
|
|
return (loss - loss) + torch.tensor(1)
|
|
|
|
def validation_step(self, batch, batch_idx):
|
|
loss = super().validation_step(batch, batch_idx)
|
|
return (loss - loss) + torch.tensor(2)
|
|
|
|
def test_step(self, batch, batch_idx):
|
|
loss = super().validation_step(batch, batch_idx)
|
|
return (loss - loss) + torch.tensor(3)
|
|
|
|
def predict_step(self, batch, batch_idx, dataloader_idx=0):
|
|
output = super().predict_step(batch, batch_idx)
|
|
return (output - output) + torch.tensor(4)
|
|
|
|
class TestCallback(Callback):
|
|
def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx) -> None:
|
|
assert outputs["loss"].item() == 1
|
|
|
|
def on_validation_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx) -> None:
|
|
assert outputs.item() == 2
|
|
|
|
def on_test_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx) -> None:
|
|
assert outputs.item() == 3
|
|
|
|
def on_predict_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx) -> None:
|
|
assert torch.all(outputs == 4).item()
|
|
|
|
model = StageModel()
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir, fast_dev_run=True, accelerator="ipu", devices=1, callbacks=TestCallback()
|
|
)
|
|
trainer.fit(model)
|
|
trainer.test(model)
|
|
trainer.validate(model)
|
|
trainer.predict(model, model.test_dataloader())
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_different_accumulate_grad_batches_fails(tmpdir):
|
|
model = IPUModel()
|
|
trainer = Trainer(default_root_dir=tmpdir, accelerator="ipu", devices=1, accumulate_grad_batches={1: 2})
|
|
with pytest.raises(
|
|
MisconfigurationException, match="IPUs currently does not support different `accumulate_grad_batches`"
|
|
):
|
|
trainer.fit(model)
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_clip_gradients_fails(tmpdir):
|
|
model = IPUModel()
|
|
trainer = Trainer(default_root_dir=tmpdir, accelerator="ipu", devices=1, gradient_clip_val=10)
|
|
with pytest.raises(MisconfigurationException, match="IPUs currently do not support clipping gradients."):
|
|
trainer.fit(model)
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_autoreport(tmpdir):
|
|
"""Ensure autoreport dumps to a file."""
|
|
model = IPUModel()
|
|
autoreport_path = os.path.join(tmpdir, "report/")
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir,
|
|
accelerator="ipu",
|
|
devices=1,
|
|
fast_dev_run=True,
|
|
strategy=IPUStrategy(autoreport=True, autoreport_dir=autoreport_path),
|
|
)
|
|
trainer.fit(model)
|
|
assert os.path.exists(autoreport_path)
|
|
assert os.path.isfile(autoreport_path + "training/profile.pop")
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_manual_poptorch_dataloader(tmpdir):
|
|
model_options = poptorch.Options()
|
|
|
|
class IPUTestModel(IPUModel):
|
|
def train_dataloader(self):
|
|
dataloader = super().train_dataloader()
|
|
# save to instance to compare the reference later
|
|
self.poptorch_dataloader = poptorch.DataLoader(model_options, dataloader.dataset, drop_last=True)
|
|
return self.poptorch_dataloader
|
|
|
|
model = IPUTestModel()
|
|
other_options = poptorch.Options()
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir,
|
|
fast_dev_run=True,
|
|
accelerator="ipu",
|
|
devices=2,
|
|
strategy=IPUStrategy(training_opts=other_options),
|
|
)
|
|
trainer.fit(model)
|
|
|
|
assert isinstance(trainer.strategy, IPUStrategy)
|
|
assert trainer.strategy.training_opts is other_options
|
|
dataloader = trainer.train_dataloader.loaders
|
|
assert dataloader is model.poptorch_dataloader # exact object, was not recreated
|
|
# dataloader uses the options in the model, not the strategy
|
|
assert dataloader.options is model_options
|
|
assert dataloader.options is not other_options
|
|
assert dataloader.drop_last # was kept
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_manual_poptorch_opts(tmpdir):
|
|
"""Ensure if the user passes manual poptorch Options, we run with the correct object."""
|
|
model = IPUModel()
|
|
inference_opts = poptorch.Options()
|
|
training_opts = poptorch.Options()
|
|
|
|
trainer = Trainer(
|
|
default_root_dir=tmpdir,
|
|
accelerator="ipu",
|
|
devices=2,
|
|
fast_dev_run=True,
|
|
strategy=IPUStrategy(inference_opts=inference_opts, training_opts=training_opts),
|
|
)
|
|
trainer.fit(model)
|
|
|
|
assert isinstance(trainer.strategy, IPUStrategy)
|
|
assert trainer.strategy.training_opts == training_opts
|
|
assert trainer.strategy.inference_opts == inference_opts
|
|
|
|
dataloader = trainer.train_dataloader.loaders
|
|
assert isinstance(dataloader, poptorch.DataLoader)
|
|
assert dataloader.options == training_opts
|
|
assert trainer.num_devices > 1 # testing this only makes sense in a distributed setting
|
|
assert not isinstance(dataloader.sampler, DistributedSampler)
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_manual_poptorch_opts_custom(tmpdir):
|
|
"""Ensure if the user passes manual poptorch Options with custom parameters set, we respect them in our
|
|
poptorch options and the dataloaders."""
|
|
|
|
model = IPUModel()
|
|
training_opts = poptorch.Options()
|
|
training_opts.deviceIterations(8)
|
|
training_opts.replicationFactor(2)
|
|
training_opts.Training.gradientAccumulation(2)
|
|
|
|
inference_opts = poptorch.Options()
|
|
inference_opts.deviceIterations(16)
|
|
inference_opts.replicationFactor(1)
|
|
inference_opts.Training.gradientAccumulation(1)
|
|
|
|
class TestCallback(Callback):
|
|
def on_fit_end(self, trainer: Trainer, pl_module: LightningModule) -> None:
|
|
# ensure dataloaders were correctly set up during training.
|
|
strategy = trainer.strategy
|
|
assert isinstance(strategy, IPUStrategy)
|
|
assert strategy.training_opts.replication_factor == 2
|
|
assert strategy.inference_opts.replication_factor == 1
|
|
|
|
val_dataloader = trainer.val_dataloaders[0]
|
|
train_dataloader = trainer.train_dataloader
|
|
assert isinstance(train_dataloader, CombinedLoader)
|
|
train_dataloader = train_dataloader.loaders
|
|
assert isinstance(val_dataloader, poptorch.DataLoader)
|
|
assert isinstance(train_dataloader, poptorch.DataLoader)
|
|
assert train_dataloader.options.replication_factor == 2
|
|
assert val_dataloader.options.replication_factor == 1
|
|
|
|
strategy = IPUStrategy(inference_opts=inference_opts, training_opts=training_opts)
|
|
# ensure we default to the training options replication factor
|
|
assert strategy.replication_factor == 2
|
|
trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, strategy=strategy, callbacks=TestCallback())
|
|
trainer.fit(model)
|
|
|
|
strategy = trainer.strategy
|
|
assert isinstance(strategy, IPUStrategy)
|
|
|
|
training_opts = strategy.training_opts
|
|
assert training_opts.device_iterations == 8
|
|
assert training_opts.replication_factor == 2
|
|
assert training_opts.Training.gradient_accumulation == 2
|
|
|
|
inference_opts = strategy.inference_opts
|
|
assert inference_opts.device_iterations == 16
|
|
assert inference_opts.replication_factor == 1
|
|
assert inference_opts.Training.gradient_accumulation == 1
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_replication_factor(tmpdir):
|
|
"""Ensure if the user passes manual poptorch Options with custom parameters set, we set them correctly in the
|
|
dataloaders."""
|
|
|
|
strategy = IPUStrategy()
|
|
trainer = Trainer(accelerator="ipu", devices=2, default_root_dir=tmpdir, fast_dev_run=True, strategy=strategy)
|
|
assert isinstance(trainer.accelerator, IPUAccelerator)
|
|
assert trainer.num_devices == 2
|
|
assert trainer.strategy.replication_factor == 2
|
|
|
|
model = BoringModel()
|
|
training_opts = poptorch.Options()
|
|
inference_opts = poptorch.Options()
|
|
training_opts.replicationFactor(8)
|
|
inference_opts.replicationFactor(7)
|
|
strategy = IPUStrategy(inference_opts=inference_opts, training_opts=training_opts)
|
|
|
|
trainer = Trainer(default_root_dir=tmpdir, accelerator="ipu", devices=1, strategy=strategy)
|
|
trainer.optimizers = model.configure_optimizers()[0]
|
|
strategy._lightning_module = model
|
|
model.trainer = trainer
|
|
trainer.state.fn = TrainerFn.FITTING
|
|
trainer.strategy.setup(trainer)
|
|
|
|
trainer.state.stage = RunningStage.TRAINING
|
|
assert trainer.strategy.replication_factor == 8
|
|
trainer.state.stage = RunningStage.VALIDATING
|
|
assert trainer.strategy.replication_factor == 7
|
|
|
|
for fn, stage in (
|
|
(TrainerFn.VALIDATING, RunningStage.VALIDATING),
|
|
(TrainerFn.TESTING, RunningStage.TESTING),
|
|
(TrainerFn.PREDICTING, RunningStage.PREDICTING),
|
|
):
|
|
trainer.state.fn = fn
|
|
trainer.state.stage = stage
|
|
trainer.strategy.setup(trainer)
|
|
assert trainer.strategy.replication_factor == 7
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_default_opts(tmpdir):
|
|
"""Ensure default opts are set correctly in the IPUStrategy."""
|
|
|
|
model = IPUModel()
|
|
|
|
trainer = Trainer(default_root_dir=tmpdir, accelerator="ipu", devices=1, fast_dev_run=True)
|
|
trainer.fit(model)
|
|
assert isinstance(trainer.strategy, IPUStrategy)
|
|
inference_opts = trainer.strategy.inference_opts
|
|
training_opts = trainer.strategy.training_opts
|
|
for opts in (inference_opts, training_opts):
|
|
assert isinstance(opts, poptorch.Options)
|
|
assert opts.Training.gradient_accumulation == 1
|
|
assert opts.device_iterations == 1
|
|
assert opts.replication_factor == 1
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_multi_optimizers_fails(tmpdir):
|
|
"""Ensure if there are multiple optimizers, we throw an exception."""
|
|
|
|
class TestModel(IPUModel):
|
|
def configure_optimizers(self):
|
|
return [torch.optim.Adam(self.parameters()), torch.optim.Adam(self.parameters())]
|
|
|
|
model = TestModel()
|
|
|
|
trainer = Trainer(default_root_dir=tmpdir, accelerator="ipu", devices=1)
|
|
with pytest.raises(MisconfigurationException, match="IPUs currently only support one optimizer."):
|
|
trainer.fit(model)
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_precision_plugin():
|
|
"""Ensure precision plugin value is set correctly."""
|
|
|
|
plugin = IPUPrecisionPlugin(precision=16)
|
|
assert plugin.precision == 16
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_accelerator_ipu():
|
|
trainer = Trainer(accelerator="ipu", devices=1)
|
|
assert isinstance(trainer.accelerator, IPUAccelerator)
|
|
|
|
trainer = Trainer(accelerator="ipu")
|
|
assert isinstance(trainer.accelerator, IPUAccelerator)
|
|
|
|
trainer = Trainer(accelerator="auto", devices=8)
|
|
assert isinstance(trainer.accelerator, IPUAccelerator)
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_accelerator_ipu_with_devices():
|
|
|
|
trainer = Trainer(accelerator="ipu", devices=8)
|
|
assert isinstance(trainer.strategy, IPUStrategy)
|
|
assert isinstance(trainer.accelerator, IPUAccelerator)
|
|
assert trainer.num_devices == 8
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_accelerator_auto_with_devices_ipu():
|
|
trainer = Trainer(accelerator="auto", devices=8)
|
|
assert isinstance(trainer.accelerator, IPUAccelerator)
|
|
assert trainer.num_devices == 8
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_accelerator_ipu_with_ipus_priority():
|
|
"""Test for checking `ipus` flag takes priority over `devices`."""
|
|
|
|
ipus = 8
|
|
with pytest.warns(UserWarning, match="The flag `devices=1` will be ignored,"):
|
|
trainer = Trainer(accelerator="ipu", devices=1, ipus=ipus)
|
|
|
|
assert isinstance(trainer.accelerator, IPUAccelerator)
|
|
assert trainer.num_devices == ipus
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_set_devices_if_none_ipu():
|
|
with pytest.deprecated_call(match=r"is deprecated in v1.7 and will be removed in v2.0."):
|
|
trainer = Trainer(accelerator="ipu", ipus=8)
|
|
assert trainer.num_devices == 8
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_strategy_choice_ipu_strategy():
|
|
trainer = Trainer(strategy=IPUStrategy(), accelerator="ipu", devices=8)
|
|
assert isinstance(trainer.strategy, IPUStrategy)
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_device_type_when_ipu_strategy_passed():
|
|
trainer = Trainer(strategy=IPUStrategy(), accelerator="ipu", devices=8)
|
|
assert isinstance(trainer.strategy, IPUStrategy)
|
|
assert isinstance(trainer.accelerator, IPUAccelerator)
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_poptorch_models_at_different_stages(tmpdir):
|
|
strategy = IPUStrategy()
|
|
trainer = Trainer(default_root_dir=tmpdir, strategy=strategy, accelerator="ipu", devices=8)
|
|
model = BoringModel()
|
|
model.trainer = trainer
|
|
strategy._lightning_module = model
|
|
|
|
trainer.optimizers = model.configure_optimizers()[0]
|
|
trainer.state.fn = TrainerFn.FITTING
|
|
trainer.strategy.setup(trainer)
|
|
assert list(trainer.strategy.poptorch_models) == [
|
|
RunningStage.TRAINING,
|
|
RunningStage.VALIDATING,
|
|
RunningStage.SANITY_CHECKING,
|
|
]
|
|
|
|
for fn, stage in (
|
|
(TrainerFn.VALIDATING, RunningStage.VALIDATING),
|
|
(TrainerFn.TESTING, RunningStage.TESTING),
|
|
(TrainerFn.PREDICTING, RunningStage.PREDICTING),
|
|
):
|
|
trainer.state.fn = fn
|
|
trainer.state.stage = stage
|
|
trainer.strategy.setup(trainer)
|
|
assert list(trainer.strategy.poptorch_models) == [stage]
|
|
|
|
|
|
@RunIf(ipu=True)
|
|
def test_devices_auto_choice_ipu():
|
|
trainer = Trainer(accelerator="auto", devices="auto")
|
|
assert trainer.num_devices == 4
|
|
assert isinstance(trainer.accelerator, IPUAccelerator)
|