2021-03-23 20:43:21 +00:00
|
|
|
# Copyright The PyTorch Lightning team.
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
2021-09-06 12:49:09 +00:00
|
|
|
"""This script will generate 2 traces: one for `training_step` and one for `validation_step`. The traces can be
|
|
|
|
visualized in 2 ways:
|
|
|
|
|
2021-03-23 20:43:21 +00:00
|
|
|
* With Chrome:
|
|
|
|
1. Open Chrome and copy/paste this url: `chrome://tracing/`.
|
|
|
|
2. Once tracing opens, click on `Load` at the top-right and load one of the generated traces.
|
|
|
|
* With PyTorch Tensorboard Profiler (Instructions are here: https://github.com/pytorch/kineto/tree/master/tb_plugin)
|
|
|
|
1. pip install tensorboard torch-tb-profiler
|
|
|
|
2. tensorboard --logdir={FOLDER}
|
|
|
|
"""
|
|
|
|
|
|
|
|
import sys
|
2022-06-15 12:53:51 +00:00
|
|
|
from os import path
|
2021-03-23 20:43:21 +00:00
|
|
|
|
|
|
|
import torch
|
|
|
|
import torchvision
|
|
|
|
import torchvision.models as models
|
|
|
|
import torchvision.transforms as T
|
|
|
|
|
2022-06-15 12:53:51 +00:00
|
|
|
from pytorch_lightning import cli_lightning_logo, LightningDataModule, LightningModule
|
2022-06-22 14:36:31 +00:00
|
|
|
from pytorch_lightning.profilers.pytorch import PyTorchProfiler
|
2021-04-15 15:01:16 +00:00
|
|
|
from pytorch_lightning.utilities.cli import LightningCLI
|
2021-03-23 20:43:21 +00:00
|
|
|
|
|
|
|
DEFAULT_CMD_LINE = (
|
2021-09-15 07:01:41 +00:00
|
|
|
"fit",
|
2021-04-15 15:01:16 +00:00
|
|
|
"--trainer.max_epochs=1",
|
|
|
|
"--trainer.limit_train_batches=15",
|
|
|
|
"--trainer.limit_val_batches=15",
|
|
|
|
"--trainer.profiler=pytorch",
|
2022-03-28 14:44:59 +00:00
|
|
|
"--trainer.accelerator=gpu",
|
|
|
|
f"--trainer.devices={int(torch.cuda.is_available())}",
|
2021-03-23 20:43:21 +00:00
|
|
|
)
|
2022-06-15 12:53:51 +00:00
|
|
|
DATASETS_PATH = path.join(path.dirname(__file__), "..", "..", "Datasets")
|
2021-03-23 20:43:21 +00:00
|
|
|
|
|
|
|
|
|
|
|
class ModelToProfile(LightningModule):
|
2021-09-06 10:45:34 +00:00
|
|
|
def __init__(self, name: str = "resnet18", automatic_optimization: bool = True):
|
2021-03-23 20:43:21 +00:00
|
|
|
super().__init__()
|
2021-04-15 15:01:16 +00:00
|
|
|
self.model = getattr(models, name)(pretrained=True)
|
2021-03-23 20:43:21 +00:00
|
|
|
self.criterion = torch.nn.CrossEntropyLoss()
|
2021-09-06 10:45:34 +00:00
|
|
|
self.automatic_optimization = automatic_optimization
|
|
|
|
self.training_step = (
|
|
|
|
self.automatic_optimization_training_step
|
|
|
|
if automatic_optimization
|
|
|
|
else self.manual_optimization_training_step
|
|
|
|
)
|
2021-03-23 20:43:21 +00:00
|
|
|
|
2021-09-06 10:45:34 +00:00
|
|
|
def automatic_optimization_training_step(self, batch, batch_idx):
|
2021-03-23 20:43:21 +00:00
|
|
|
inputs, labels = batch
|
|
|
|
outputs = self.model(inputs)
|
|
|
|
loss = self.criterion(outputs, labels)
|
|
|
|
self.log("train_loss", loss)
|
|
|
|
return loss
|
|
|
|
|
2021-09-06 10:45:34 +00:00
|
|
|
def manual_optimization_training_step(self, batch, batch_idx):
|
|
|
|
opt = self.optimizers()
|
|
|
|
opt.zero_grad()
|
|
|
|
inputs, labels = batch
|
|
|
|
outputs = self.model(inputs)
|
|
|
|
loss = self.criterion(outputs, labels)
|
|
|
|
self.log("train_loss", loss)
|
|
|
|
self.manual_backward(loss)
|
|
|
|
opt.step()
|
|
|
|
|
2021-03-23 20:43:21 +00:00
|
|
|
def validation_step(self, batch, batch_idx):
|
|
|
|
inputs, labels = batch
|
|
|
|
outputs = self.model(inputs)
|
|
|
|
loss = self.criterion(outputs, labels)
|
|
|
|
self.log("val_loss", loss)
|
|
|
|
|
2021-06-16 11:23:27 +00:00
|
|
|
def predict_step(self, batch, batch_idx, dataloader_idx: int = None):
|
|
|
|
inputs = batch[0]
|
|
|
|
return self.model(inputs)
|
|
|
|
|
2021-03-23 20:43:21 +00:00
|
|
|
def configure_optimizers(self):
|
|
|
|
return torch.optim.SGD(self.parameters(), lr=0.001, momentum=0.9)
|
|
|
|
|
|
|
|
|
|
|
|
class CIFAR10DataModule(LightningDataModule):
|
|
|
|
|
|
|
|
transform = T.Compose([T.Resize(256), T.CenterCrop(224), T.ToTensor()])
|
|
|
|
|
|
|
|
def train_dataloader(self, *args, **kwargs):
|
2022-06-15 12:53:51 +00:00
|
|
|
trainset = torchvision.datasets.CIFAR10(root=DATASETS_PATH, train=True, download=True, transform=self.transform)
|
2021-09-06 10:45:34 +00:00
|
|
|
return torch.utils.data.DataLoader(trainset, batch_size=2, shuffle=True, num_workers=0)
|
2021-03-23 20:43:21 +00:00
|
|
|
|
|
|
|
def val_dataloader(self, *args, **kwargs):
|
2022-06-15 12:53:51 +00:00
|
|
|
valset = torchvision.datasets.CIFAR10(root=DATASETS_PATH, train=False, download=True, transform=self.transform)
|
2021-09-06 10:45:34 +00:00
|
|
|
return torch.utils.data.DataLoader(valset, batch_size=2, shuffle=True, num_workers=0)
|
2021-03-23 20:43:21 +00:00
|
|
|
|
|
|
|
|
|
|
|
def cli_main():
|
2021-04-15 15:01:16 +00:00
|
|
|
if len(sys.argv) == 1:
|
|
|
|
sys.argv += DEFAULT_CMD_LINE
|
2021-03-23 20:43:21 +00:00
|
|
|
|
2021-09-06 10:45:34 +00:00
|
|
|
LightningCLI(
|
|
|
|
ModelToProfile, CIFAR10DataModule, save_config_overwrite=True, trainer_defaults={"profiler": PyTorchProfiler()}
|
|
|
|
)
|
2021-03-23 20:43:21 +00:00
|
|
|
|
|
|
|
|
2021-07-26 11:37:35 +00:00
|
|
|
if __name__ == "__main__":
|
2021-03-23 20:43:21 +00:00
|
|
|
cli_lightning_logo()
|
|
|
|
cli_main()
|