diff --git a/CHANGELOG.md b/CHANGELOG.md
index bd92a19c11..927d71bf9c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -835,6 +835,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed to avoid common hook warning if no hook is overridden ([#12131](https://github.com/PyTorchLightning/pytorch-lightning/pull/12131))
 
 
+- Fixed deepspeed keeping old sub-folders in same ckpt path ([#12194](https://github.com/PyTorchLightning/pytorch-lightning/pull/12194))
+
+
 - Fixed returning logged metrics instead of callback metrics during evaluation ([#12224](https://github.com/PyTorchLightning/pytorch-lightning/pull/12224))
 
 
diff --git a/pytorch_lightning/strategies/deepspeed.py b/pytorch_lightning/strategies/deepspeed.py
index 133dc1ec75..bdec69c43b 100644
--- a/pytorch_lightning/strategies/deepspeed.py
+++ b/pytorch_lightning/strategies/deepspeed.py
@@ -760,6 +760,7 @@ class DeepSpeedStrategy(DDPStrategy):
                 "`Trainer.save_checkpoint(..., storage_options=...)` with `storage_options` arg"
                 f" is not supported for `{self.__class__.__name__}` as `CheckpointIO` is not used."
             )
+
         if self.zero_stage_3 and self._multi_device and self.is_global_zero:
             warning_cache.warn(
                 "When saving the DeepSpeed Stage 3 checkpoint, "
@@ -772,7 +773,7 @@ class DeepSpeedStrategy(DDPStrategy):
         # dump states as a checkpoint dictionary object
         _exclude_keys = ["state_dict", "optimizer_states"]
         checkpoint = {k: v for k, v in checkpoint.items() if k not in _exclude_keys}
-        self.deepspeed_engine.save_checkpoint(filepath, client_state=checkpoint)
+        self.deepspeed_engine.save_checkpoint(filepath, client_state=checkpoint, tag="checkpoint")
 
     def load_checkpoint(self, checkpoint_path: _PATH) -> Dict[str, Any]:
         if self.load_full_weights and self.zero_stage_3:
diff --git a/tests/strategies/test_deepspeed_strategy.py b/tests/strategies/test_deepspeed_strategy.py
index 272da2d3b9..6891c0b397 100644
--- a/tests/strategies/test_deepspeed_strategy.py
+++ b/tests/strategies/test_deepspeed_strategy.py
@@ -1085,3 +1085,25 @@ def test_deepspeed_with_meta_device(tmpdir):
     )
     trainer.fit(model)
     assert model.layer.weight.device.type == "cpu"
+
+
+@RunIf(min_gpus=2, deepspeed=True, standalone=True)
+def test_deepspeed_multi_save_same_filepath(tmpdir):
+    """Test that verifies that deepspeed saves only latest checkpoint in the specified path and deletes the old
+    sharded checkpoints."""
+    model = BoringModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        strategy="deepspeed",
+        accelerator="gpu",
+        devices=2,
+        callbacks=[ModelCheckpoint(save_top_k=1, save_last=True)],
+        limit_train_batches=1,
+        limit_val_batches=0,
+        num_sanity_val_steps=0,
+        max_epochs=2,
+    )
+    trainer.fit(model)
+    ckpt_path = os.path.join(trainer.checkpoint_callback.dirpath, "last.ckpt")
+    expected = ["latest", "zero_to_fp32.py", "checkpoint"]
+    assert set(expected) == set(os.listdir(ckpt_path))