From 9d8faecdb2b873b52b95f2772f4bf48068a0af9a Mon Sep 17 00:00:00 2001 From: Dan Dale Date: Sat, 5 Feb 2022 11:13:21 -0800 Subject: [PATCH] Allow Horovod `teardown()` to complete gracefully if exception thrown in callback setup (#11752) --- CHANGELOG.md | 2 ++ pytorch_lightning/strategies/horovod.py | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cc6c395a3c..6355b86096 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -472,6 +472,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed +- Fixed an issue where `HorovodStrategy.teardown()` did not complete gracefully if an exception was thrown during callback setup [#11752](https://github.com/PyTorchLightning/pytorch-lightning/pull/11752) + - Fixed security vulnerabilities CVE-2020-1747 and CVE-2020-14343 caused by the `PyYAML` dependency ([#11099](https://github.com/PyTorchLightning/pytorch-lightning/pull/11099)) diff --git a/pytorch_lightning/strategies/horovod.py b/pytorch_lightning/strategies/horovod.py index 3eca681add..bf21be4c74 100644 --- a/pytorch_lightning/strategies/horovod.py +++ b/pytorch_lightning/strategies/horovod.py @@ -197,8 +197,10 @@ class HorovodStrategy(ParallelStrategy): def teardown(self) -> None: super().teardown() - self._exit_stack.__exit__(None, None, None) - self._exit_stack = None + # teardown may be called before `_exit_stack` is set + if self._exit_stack: + self._exit_stack.__exit__(None, None, None) + self._exit_stack = None # Make sure all workers have finished training before returning to the user self.join() if self.root_device.type == "cuda":