Disable quantization aware training observers (#8540)

Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: tchaton <thomas@grid.ai> Co-authored-by: rohitgr7 <rohitgr1998@gmail.com>
2021-10-25 23:46:09 +08:00 · 2021-10-25 23:46:09 +08:00 · cfb2d87765
parent f8a7f3fde0
commit cfb2d87765
3 changed files with 207 additions and 8 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -328,13 +328,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - `pytorch_lightning.utilities.grads.grad_norm` now raises an exception if parameter `norm_type <= 0` ([#9765](https://github.com/PyTorchLightning/pytorch-lightning/pull/9765))
 - Updated error message for interactive incompatible plugins ([#9896](https://github.com/PyTorchLightning/pytorch-lightning/pull/9896))
 - Updated several places in the loops and trainer to access `training_type_plugin` directly instead of `accelerator` ([#9901](https://github.com/PyTorchLightning/pytorch-lightning/pull/9901))
 - Disable quantization aware training observers by default during validating/testing/predicting stages ([#8540](https://github.com/PyTorchLightning/pytorch-lightning/pull/8540))
 ### Deprecated
@ -409,6 +410,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Deprecated `GPUStatsMonitor` and `XLAStatsMonitor` in favor of `DeviceStatsMonitor` callback ([#9924](https://github.com/PyTorchLightning/pytorch-lightning/pull/9924))
 ### Removed
 - Removed deprecated `metrics` ([#8586](https://github.com/PyTorchLightning/pytorch-lightning/pull/8586/))
@ -611,7 +613,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed `LearningRateMonitor` logging with multiple param groups optimizer with no scheduler ([#10044](https://github.com/PyTorchLightning/pytorch-lightning/pull/10044))
 - Fixed undesired side effects being caused by `Trainer` patching dataloader methods on the `LightningModule` ([#9764](https://github.com/PyTorchLightning/pytorch-lightning/pull/9764))
--- a/pytorch_lightning/callbacks/quantization.py
+++ b/pytorch_lightning/callbacks/quantization.py
@ -16,10 +16,20 @@ Quantization
 ^^^^^^^^^^^^
 """
 import copy
 import functools
-from typing import Any, Callable, Optional, Sequence, Union
+from typing import Any, Callable, Dict, Optional, Sequence, Union
 import torch
 from torch import Tensor
 from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_8
 if _TORCH_GREATER_EQUAL_1_8:
    from torch.quantization import FakeQuantizeBase
 else:
    # For torch 1.6 and 1.7.
    from torch.quantization import FakeQuantize as FakeQuantizeBase
 import pytorch_lightning as pl
 from pytorch_lightning.callbacks.base import Callback
@ -126,11 +136,25 @@ class QuantizationAwareTraining(Callback):
        quantize_on_fit_end: perform the quantization in `on_fit_end`.
            Note that once converted, the model cannot be put in training mode again.
        observer_enabled_stages: allow fake-quantization modules' observers to do calibration during provided stages:
            - ``'train'``: the observers can do calibration during training.
            - ``'validate'``: the observers can do calibration during validating.
              Note that we don't disable observers during the sanity check as the model hasn't been calibrated with
              training data yet. After the sanity check, the fake-quantization modules are restored to initial states.
            - ``'test'``: the observers can do calibration during testing.
            - ``'predict'``: the observers can do calibration during predicting.
            Note that we only handle observers belonging to fake-quantization modules. When ``qconfig`` is a ``str`` and
            ``observer_type`` is ``'histogram'``, the observers won't belong to any fake-quantization modules and will
            not be controlled by the callback.
    .. _PyTorch Quantization: https://pytorch.org/docs/stable/quantization.html#quantization-aware-training
    .. _torch.quantization.QConfig: https://pytorch.org/docs/stable/torch.quantization.html#torch.quantization.QConfig
    """
    OBSERVER_TYPES = ("histogram", "average")
    OBSERVER_STAGES = ("train", "validate", "test", "predict")
    def __init__(
        self,
@ -140,6 +164,7 @@ class QuantizationAwareTraining(Callback):
        modules_to_fuse: Optional[Sequence] = None,
        input_compatible: bool = True,
        quantize_on_fit_end: bool = True,
        observer_enabled_stages: Sequence[str] = ("train",),
    ) -> None:
        _valid_qconf_str = isinstance(qconfig, str) and qconfig in torch.backends.quantized.supported_engines
        if not isinstance(qconfig, QConfig) and not _valid_qconf_str:
@ -163,9 +188,20 @@ class QuantizationAwareTraining(Callback):
        self.modules_to_fuse = modules_to_fuse
        self._input_compatible = input_compatible
        self._convert_on_fit_end = quantize_on_fit_end
        self._forward_calls = 0
-    def _check_feasible_fuse(self, model):
+        observer_enabled_stages = set(observer_enabled_stages)
        unsupported_stages = observer_enabled_stages - set(self.OBSERVER_STAGES)
        if unsupported_stages:
            raise MisconfigurationException(
                f'Unsupported stages "{tuple(sorted(unsupported_stages))}", allowed are {self.OBSERVER_STAGES}.'
            )
        self._observer_disabled_stages = set(self.OBSERVER_STAGES) - observer_enabled_stages
        self._forward_calls = 0
        self._fake_quant_to_initial_state_dict = {}
        self._last_fake_quant_to_observer_enabled = {}
    def _check_feasible_fuse(self, model: "pl.LightningModule") -> bool:
        if not self.modules_to_fuse:
            return False
        for group in self.modules_to_fuse:
@ -175,7 +211,20 @@ class QuantizationAwareTraining(Callback):
                )
        return True
-    def on_fit_start(self, trainer, pl_module):
+    def _collect_observer_enabled(self) -> Dict[FakeQuantizeBase, Tensor]:
        return {
            fake_quant: fake_quant.observer_enabled.clone() for fake_quant in self._fake_quant_to_initial_state_dict
        }
    def _disable_observer(self, pl_module: "pl.LightningModule") -> None:
        self._last_fake_quant_to_observer_enabled = self._collect_observer_enabled()
        pl_module.apply(torch.quantization.disable_observer)
    def _restore_last_observer_enabled(self) -> None:
        for fake_quant, observer_enabled in self._last_fake_quant_to_observer_enabled.items():
            fake_quant.observer_enabled.copy_(observer_enabled)
    def on_fit_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
        # QuantStub converts tensors from floating point to quantized
        pl_module.quant = torch.quantization.QuantStub()
        # DeQuantStub converts tensors from quantized to floating point
@ -209,7 +258,12 @@ class QuantizationAwareTraining(Callback):
        # the model that will observe weight and activation tensors during calibration.
        torch.quantization.prepare_qat(pl_module, inplace=True)
-    def on_fit_end(self, trainer, pl_module):
+        fake_quants = tuple(module for module in pl_module.modules() if isinstance(module, FakeQuantizeBase))
        self._fake_quant_to_initial_state_dict = {
            fake_quant: copy.deepcopy(fake_quant.state_dict()) for fake_quant in fake_quants
        }
    def on_fit_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
        if not self._convert_on_fit_end:
            pl_module.forward = self.__module_forward
            return
@ -224,3 +278,43 @@ class QuantizationAwareTraining(Callback):
            pl_module.forward = wrap_quantize_forward_context(model=pl_module, func=self.__module_forward)
        else:
            pl_module.forward = self.__module_forward
    def on_train_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
        if "train" in self._observer_disabled_stages:
            self._disable_observer(pl_module)
    def on_train_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
        if "train" in self._observer_disabled_stages:
            self._restore_last_observer_enabled()
    def on_validation_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
        if "validate" in self._observer_disabled_stages and not trainer.sanity_checking:
            # ``torch.quantization.MovingAveragePerChannelMinMaxObserver`` and ``torch.quantization.HistogramObserver``
            # need to see at least one batch to infer the shapes of quantization ``scale`` and ``zero_point``. So we
            # don't disable observers during the sanity check so that they can infer the shapes of quantization
            # parameters with validation data.
            self._disable_observer(pl_module)
    def on_validation_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
        if "validate" in self._observer_disabled_stages:
            if trainer.sanity_checking:
                for fake_quant, state_dict in self._fake_quant_to_initial_state_dict.items():
                    fake_quant.load_state_dict(state_dict)
            else:
                self._restore_last_observer_enabled()
    def on_test_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
        if "test" in self._observer_disabled_stages:
            self._disable_observer(pl_module)
    def on_test_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
        if "test" in self._observer_disabled_stages:
            self._restore_last_observer_enabled()
    def on_predict_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
        if "predict" in self._observer_disabled_stages:
            self._disable_observer(pl_module)
    def on_predict_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
        if "predict" in self._observer_disabled_stages:
            self._restore_last_observer_enabled()
--- a/tests/callbacks/test_quantization.py
+++ b/tests/callbacks/test_quantization.py
@ -21,11 +21,19 @@ from torchmetrics.functional import mean_absolute_percentage_error as mape
 from pytorch_lightning import seed_everything, Trainer
 from pytorch_lightning.callbacks import QuantizationAwareTraining
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_8
 from pytorch_lightning.utilities.memory import get_model_size_mb
 from tests.helpers.boring_model import RandomDataset
 from tests.helpers.datamodules import RegressDataModule
 from tests.helpers.runif import RunIf
 from tests.helpers.simple_models import RegressionModel
 if _TORCH_GREATER_EQUAL_1_8:
    from torch.quantization import FakeQuantizeBase
 else:
    # For torch 1.6 and 1.7.
    from torch.quantization import FakeQuantize as FakeQuantizeBase
@pytest.mark.parametrize("observe", ["average", "histogram"])
@pytest.mark.parametrize("fuse", [True, False])
@ -45,7 +53,12 @@ def test_quantization(tmpdir, observe: str, fuse: bool, convert: bool):
    org_score = torch.mean(torch.tensor([mape(model(x), y) for x, y in dm.test_dataloader()]))
    fusing_layers = [(f"layer_{i}", f"layer_{i}a") for i in range(3)] if fuse else None
-    qcb = QuantizationAwareTraining(observer_type=observe, modules_to_fuse=fusing_layers, quantize_on_fit_end=convert)
+    qcb = QuantizationAwareTraining(
        observer_type=observe,
        modules_to_fuse=fusing_layers,
        quantize_on_fit_end=convert,
        observer_enabled_stages=("train", "validate"),
    )
    trainer = Trainer(callbacks=[qcb], **trainer_args)
    trainer.fit(qmodel, datamodule=dm)
@ -105,6 +118,9 @@ def test_quantization_exceptions(tmpdir):
    with pytest.raises(MisconfigurationException, match="Unsupported `collect_quantization`"):
        QuantizationAwareTraining(collect_quantization=1.2)
    with pytest.raises(MisconfigurationException, match="Unsupported stages"):
        QuantizationAwareTraining(observer_enabled_stages=("abc",))
    fusing_layers = [(f"layers.mlp_{i}", f"layers.NONE-mlp_{i}a") for i in range(3)]
    qcb = QuantizationAwareTraining(modules_to_fuse=fusing_layers)
    trainer = Trainer(callbacks=[qcb], default_root_dir=tmpdir, max_epochs=1)
@ -140,3 +156,91 @@ def test_quantization_triggers(tmpdir, trigger_fn: Union[None, int, Callable], e
    trainer.fit(qmodel, datamodule=dm)
    assert qcb._forward_calls == expected_count
 def _get_observer_enabled(fake_quant: FakeQuantizeBase):
    # ``torch.quantization.FakeQuantize`` checks ``observer_enabled[0] == 1``.
    return fake_quant.observer_enabled[0] == 1
@pytest.mark.parametrize(
    "observer_enabled_stages",
    [("train", "validate", "test", "predict"), ("train",), ("validate",), ("test",), ("predict",), ()],
 )
@RunIf(quantization=True)
 def test_quantization_disable_observers(tmpdir, observer_enabled_stages):
    """Test disabling observers."""
    qmodel = RegressionModel()
    qcb = QuantizationAwareTraining(observer_enabled_stages=observer_enabled_stages)
    trainer = Trainer(callbacks=[qcb], default_root_dir=tmpdir)
    # Quantize qmodel.
    qcb.on_fit_start(trainer, qmodel)
    fake_quants = list(module for module in qmodel.modules() if isinstance(module, FakeQuantizeBase))
    # Disable some of observers before fitting.
    for fake_quant in fake_quants[::2]:
        fake_quant.disable_observer()
    for stage, on_stage_start, on_stage_end in [
        ("train", qcb.on_train_start, qcb.on_train_end),
        ("validate", qcb.on_validation_start, qcb.on_validation_end),
        ("test", qcb.on_test_start, qcb.on_test_end),
        ("predict", qcb.on_predict_start, qcb.on_predict_end),
    ]:
        before_stage_observer_enabled = torch.as_tensor(list(map(_get_observer_enabled, fake_quants)))
        on_stage_start(trainer, qmodel)
        expected_stage_observer_enabled = torch.as_tensor(
            before_stage_observer_enabled if stage in observer_enabled_stages else [False] * len(fake_quants)
        )
        assert torch.equal(
            torch.as_tensor(list(map(_get_observer_enabled, fake_quants))), expected_stage_observer_enabled
        )
        on_stage_end(trainer, qmodel)
        assert torch.equal(
            torch.as_tensor(list(map(_get_observer_enabled, fake_quants))), before_stage_observer_enabled
        )
@RunIf(quantization=True)
 def test_quantization_val_test_predict(tmpdir):
    """Test the default quantization aware training not affected by validating, testing and predicting."""
    seed_everything(42)
    num_features = 16
    dm = RegressDataModule(num_features=num_features)
    qmodel = RegressionModel()
    val_test_predict_qmodel = copy.deepcopy(qmodel)
    trainer = Trainer(
        callbacks=[QuantizationAwareTraining(quantize_on_fit_end=False)],
        default_root_dir=tmpdir,
        limit_train_batches=1,
        limit_val_batches=1,
        limit_test_batches=1,
        limit_predict_batches=1,
        val_check_interval=1,
        num_sanity_val_steps=1,
        max_epochs=4,
    )
    trainer.fit(val_test_predict_qmodel, datamodule=dm)
    trainer.validate(model=val_test_predict_qmodel, verbose=False)
    trainer.test(model=val_test_predict_qmodel, verbose=False)
    trainer.predict(
        model=val_test_predict_qmodel, dataloaders=[torch.utils.data.DataLoader(RandomDataset(num_features, 16))]
    )
    expected_qmodel = copy.deepcopy(qmodel)
    # No validation in ``expected_qmodel`` fitting.
    Trainer(
        callbacks=[QuantizationAwareTraining(quantize_on_fit_end=False)],
        default_root_dir=tmpdir,
        limit_train_batches=1,
        limit_val_batches=0,
        max_epochs=4,
    ).fit(expected_qmodel, datamodule=dm)
    expected_state_dict = expected_qmodel.state_dict()
    for key, value in val_test_predict_qmodel.state_dict().items():
        expected_value = expected_state_dict[key]
        assert torch.allclose(value, expected_value)