diff --git a/CHANGELOG.md b/CHANGELOG.md
index b203856de1..8f76eb75be 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -409,6 +409,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Removed `Strategy.optimizer_zero_grad` ([#11246](https://github.com/PyTorchLightning/pytorch-lightning/pull/11246))
 
+
+- Removed `Strategy.on_gpu` ([#11537](https://github.com/PyTorchLightning/pytorch-lightning/pull/11537))
+
+
 ### Fixed
 
 - Fixed security vulnerabilities CVE-2020-1747 and CVE-2020-14343 caused by the `PyYAML` dependency ([#11099](https://github.com/PyTorchLightning/pytorch-lightning/pull/11099))
diff --git a/pytorch_lightning/strategies/ddp.py b/pytorch_lightning/strategies/ddp.py
index 976938a726..4aa67baaed 100644
--- a/pytorch_lightning/strategies/ddp.py
+++ b/pytorch_lightning/strategies/ddp.py
@@ -296,7 +296,7 @@ class DDPStrategy(ParallelStrategy):
         # In 1.8, DDP communication hooks only work with NCCL backend and SPSD (single process single device) mode
         # Since 1.9, DDP communication hooks can work on all backends.
         if _TORCH_GREATER_EQUAL_1_9 or (
-            _TORCH_GREATER_EQUAL_1_8 and self.on_gpu and self._is_single_process_single_device
+            _TORCH_GREATER_EQUAL_1_8 and self.root_device.type == "cuda" and self._is_single_process_single_device
         ):
             register_ddp_comm_hook(
                 model=self.model,
@@ -514,7 +514,7 @@ class DDPStrategy(ParallelStrategy):
         if self.sync_batchnorm:
             self.model = _revert_sync_batchnorm(self.model)
 
-        if self.on_gpu:
+        if self.root_device.type == "cuda":
             # GPU teardown
             log.detail(f"{self.__class__.__name__}: moving model to CPU")
             self.lightning_module.cpu()
diff --git a/pytorch_lightning/strategies/ddp_spawn.py b/pytorch_lightning/strategies/ddp_spawn.py
index 4f95c31545..097992dc19 100644
--- a/pytorch_lightning/strategies/ddp_spawn.py
+++ b/pytorch_lightning/strategies/ddp_spawn.py
@@ -200,7 +200,7 @@ class DDPSpawnStrategy(ParallelStrategy):
     def _register_ddp_hooks(self) -> None:
         # currently, DDP communication hooks only work with NCCL backend and SPSD (single process single device) mode
         # https://github.com/pytorch/pytorch/blob/v1.8.0/torch/nn/parallel/distributed.py#L1080-L1084
-        if _TORCH_GREATER_EQUAL_1_8 and self.on_gpu and self._is_single_process_single_device:
+        if _TORCH_GREATER_EQUAL_1_8 and self.root_device.type == "cuda" and self._is_single_process_single_device:
             register_ddp_comm_hook(
                 model=self.model,
                 ddp_comm_state=self._ddp_comm_state,
@@ -378,7 +378,7 @@ class DDPSpawnStrategy(ParallelStrategy):
         if self.sync_batchnorm:
             self.model = _revert_sync_batchnorm(self.model)
 
-        if self.on_gpu:
+        if self.root_device.type == "cuda":
             # GPU teardown
             self.lightning_module.cpu()
             # clean up memory
diff --git a/pytorch_lightning/strategies/dp.py b/pytorch_lightning/strategies/dp.py
index 300360d085..71d0090e2c 100644
--- a/pytorch_lightning/strategies/dp.py
+++ b/pytorch_lightning/strategies/dp.py
@@ -153,7 +153,7 @@ class DataParallelStrategy(ParallelStrategy):
 
     def teardown(self) -> None:
         super().teardown()
-        if self.on_gpu:
+        if self.root_device.type == "cuda":
             # GPU teardown
             self.lightning_module.cpu()
             # clean up memory
diff --git a/pytorch_lightning/strategies/fully_sharded.py b/pytorch_lightning/strategies/fully_sharded.py
index f99fa3462d..a3c00a03b8 100644
--- a/pytorch_lightning/strategies/fully_sharded.py
+++ b/pytorch_lightning/strategies/fully_sharded.py
@@ -126,7 +126,7 @@ class DDPFullyShardedStrategy(DDPStrategy):
         return self._process_group
 
     def setup_distributed(self) -> None:
-        if not self.on_gpu:
+        if not self.root_device.type == "cuda":
             raise MisconfigurationException(
                 "You selected strategy to be `ddp_fully_sharded`, but GPU is not available."
             )
diff --git a/pytorch_lightning/strategies/horovod.py b/pytorch_lightning/strategies/horovod.py
index 19fa1ca3d2..8b68fdd156 100644
--- a/pytorch_lightning/strategies/horovod.py
+++ b/pytorch_lightning/strategies/horovod.py
@@ -125,13 +125,13 @@ class HorovodStrategy(ParallelStrategy):
         return obj
 
     def model_to_device(self):
-        if self.on_gpu:
+        if self.root_device.type == "cuda":
             # this can potentially be removed after #8312. Not done due to lack of horovod testing
             torch.cuda.set_device(self.root_device)
         self.model.to(self.root_device)
 
     def join(self):
-        if self.on_gpu:
+        if self.root_device.type == "cuda":
             hvd.join(self.local_rank)
         else:
             hvd.join()
@@ -201,7 +201,7 @@ class HorovodStrategy(ParallelStrategy):
         self._exit_stack = None
         # Make sure all workers have finished training before returning to the user
         self.join()
-        if self.on_gpu:
+        if self.root_device.type == "cuda":
             # GPU teardown
             self.lightning_module.cpu()
             # clean up memory
diff --git a/pytorch_lightning/strategies/ipu.py b/pytorch_lightning/strategies/ipu.py
index 7044082a78..22b575a590 100644
--- a/pytorch_lightning/strategies/ipu.py
+++ b/pytorch_lightning/strategies/ipu.py
@@ -335,10 +335,6 @@ class IPUStrategy(ParallelStrategy):
         optimizer = self.optimizers[0]
         self.poptorch_models[RunningStage.TRAINING].setOptimizer(optimizer)
 
-    @property
-    def on_gpu(self) -> bool:
-        return False
-
     @property
     def root_device(self) -> torch.device:
         pass
diff --git a/pytorch_lightning/strategies/parallel.py b/pytorch_lightning/strategies/parallel.py
index ac50cc028e..9c7d94a52f 100644
--- a/pytorch_lightning/strategies/parallel.py
+++ b/pytorch_lightning/strategies/parallel.py
@@ -49,10 +49,6 @@ class ParallelStrategy(Strategy, ABC):
     def root_device(self) -> torch.device:
         """Return the root device."""
 
-    @property
-    def on_gpu(self) -> bool:
-        return self.root_device.type == "cuda" and torch.cuda.is_available()
-
     @property
     def on_tpu(self) -> bool:
         return self.root_device.type == "xla" and _XLA_AVAILABLE
@@ -103,7 +99,7 @@ class ParallelStrategy(Strategy, ABC):
     def torch_distributed_backend(self):
         torch_backend = os.getenv("PL_TORCH_DISTRIBUTED_BACKEND")
         if torch_backend is None:
-            torch_backend = "nccl" if self.on_gpu else "gloo"
+            torch_backend = "nccl" if self.root_device.type == "cuda" else "gloo"
         return torch_backend
 
     @staticmethod
diff --git a/pytorch_lightning/strategies/single_device.py b/pytorch_lightning/strategies/single_device.py
index bccbfa13fa..18bf619420 100644
--- a/pytorch_lightning/strategies/single_device.py
+++ b/pytorch_lightning/strategies/single_device.py
@@ -45,10 +45,6 @@ class SingleDeviceStrategy(Strategy):
     def on_tpu(self) -> bool:
         return self.root_device.type == "xla" and _XLA_AVAILABLE
 
-    @property
-    def on_gpu(self) -> bool:
-        return self.root_device.type == "cuda" and torch.cuda.is_available()
-
     def reduce(self, tensor: Any | torch.Tensor, *args: Any, **kwargs: Any) -> Any | torch.Tensor:
         """Reduces a tensor from several distributed processes to one aggregated tensor. As this plugin only
         operates with a single device, the reduction is simply the identity.
@@ -90,7 +86,7 @@ class SingleDeviceStrategy(Strategy):
 
     def teardown(self) -> None:
         super().teardown()
-        if self.on_gpu:
+        if self.root_device.type == "cuda":
             # GPU teardown
             self.lightning_module.cpu()
             # clean up memory
diff --git a/pytorch_lightning/strategies/strategy.py b/pytorch_lightning/strategies/strategy.py
index 5019890ad4..f966b926b8 100644
--- a/pytorch_lightning/strategies/strategy.py
+++ b/pytorch_lightning/strategies/strategy.py
@@ -228,11 +228,6 @@ class Strategy(ABC):
             return model._apply_batch_transfer_handler(batch, device=device, dataloader_idx=dataloader_idx)
         return move_data_to_device(batch, device)
 
-    @property
-    @abstractmethod
-    def on_gpu(self) -> bool:
-        """Returns whether the current process is done on GPU."""
-
     @property
     @abstractmethod
     def on_tpu(self) -> bool:
diff --git a/tests/strategies/test_ddp_spawn_strategy.py b/tests/strategies/test_ddp_spawn_strategy.py
index 25bb66661c..0fbe0d62cf 100644
--- a/tests/strategies/test_ddp_spawn_strategy.py
+++ b/tests/strategies/test_ddp_spawn_strategy.py
@@ -57,7 +57,6 @@ def test_ddp_cpu():
     # assert training type plugin attributes for device setting
 
     assert isinstance(trainer.strategy, DDPSpawnStrategy)
-    assert not trainer.strategy.on_gpu
     assert not trainer.strategy.on_tpu
     assert trainer.strategy.root_device == torch.device("cpu")
 
@@ -73,7 +72,6 @@ def test_ddp_spawn_extra_parameters(tmpdir):
     trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, gpus=2, strategy="ddp_spawn")
 
     assert isinstance(trainer.strategy, DDPSpawnStrategy)
-    assert trainer.strategy.on_gpu
     assert trainer.strategy.root_device == torch.device("cuda:0")
 
     val: float = 1.0
diff --git a/tests/strategies/test_ddp_strategy.py b/tests/strategies/test_ddp_strategy.py
index fa47380c7d..e56ad131bd 100644
--- a/tests/strategies/test_ddp_strategy.py
+++ b/tests/strategies/test_ddp_strategy.py
@@ -39,7 +39,6 @@ def test_ddp_with_2_gpus():
     trainer = Trainer(gpus=2, strategy="ddp", fast_dev_run=True)
     # assert training type plugin attributes for device setting
     assert isinstance(trainer.strategy, DDPStrategy)
-    assert trainer.strategy.on_gpu
     assert not trainer.strategy.on_tpu
     local_rank = trainer.strategy.local_rank
     assert trainer.strategy.root_device == torch.device(f"cuda:{local_rank}")
diff --git a/tests/strategies/test_single_device_strategy.py b/tests/strategies/test_single_device_strategy.py
index 56b776d402..0007d72129 100644
--- a/tests/strategies/test_single_device_strategy.py
+++ b/tests/strategies/test_single_device_strategy.py
@@ -23,10 +23,9 @@ from tests.helpers.runif import RunIf
 
 
 def test_single_cpu():
-    """Tests if on_gpu and on_tpu is set correctly for single CPU strategy."""
+    """Tests if on_tpu is set correctly for single CPU strategy."""
     trainer = Trainer()
     assert isinstance(trainer.strategy, SingleDeviceStrategy)
-    assert not trainer.strategy.on_gpu
     assert not trainer.strategy.on_tpu
     assert trainer.strategy.root_device == torch.device("cpu")
 
@@ -44,7 +43,6 @@ def test_single_gpu():
     trainer = Trainer(gpus=1, fast_dev_run=True)
     # assert training strategy attributes for device setting
     assert isinstance(trainer.strategy, SingleDeviceStrategy)
-    assert trainer.strategy.on_gpu
     assert not trainer.strategy.on_tpu
     assert trainer.strategy.root_device == torch.device("cuda:0")
 
diff --git a/tests/strategies/test_tpu_spawn.py b/tests/strategies/test_tpu_spawn.py
index b62e175c23..26b59bad54 100644
--- a/tests/strategies/test_tpu_spawn.py
+++ b/tests/strategies/test_tpu_spawn.py
@@ -96,7 +96,6 @@ def test_model_tpu_one_core():
     trainer = Trainer(tpu_cores=1, fast_dev_run=True, strategy=TPUSpawnStrategy(debug=True))
     # assert training strategy attributes for device setting
     assert isinstance(trainer.strategy, TPUSpawnStrategy)
-    assert not trainer.strategy.on_gpu
     assert trainer.strategy.on_tpu
     assert trainer.strategy.root_device == torch.device("xla", index=1)
     model = BoringModelTPU()
diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
index a50570d0ae..97793cbe39 100644
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -1544,13 +1544,10 @@ def test_index_batch_sampler_wrapper_with_iterable_dataset(dataset_cls, tmpdir):
 
 
 @pytest.mark.skipif(_IS_WINDOWS and not _TORCH_GREATER_EQUAL_1_8, reason="torch.distributed support required")
-@patch("torch.cuda.device_count", return_value=2)
-@patch("torch.cuda.is_available", return_value=True)
-@pytest.mark.parametrize("accelerator", ("cpu", "gpu"))
-def test_spawn_predict_return_predictions(_, __, accelerator):
+def test_spawn_predict_return_predictions(tmpdir):
     """Test that `return_predictions=True` raise a MisconfigurationException with spawn training type plugins."""
     model = BoringModel()
-    trainer = Trainer(accelerator=accelerator, strategy="ddp_spawn", devices=2, fast_dev_run=True)
+    trainer = Trainer(default_root_dir=tmpdir, accelerator="cpu", strategy="ddp_spawn", devices=2, fast_dev_run=True)
     assert isinstance(trainer.strategy, DDPSpawnStrategy)
     with pytest.raises(ProcessRaisedException, match="`return_predictions` should be set to `False`"):
         trainer.predict(model, dataloaders=model.train_dataloader(), return_predictions=True)