Ensure the existence of `DDPPlugin._sync_dir` in `reconciliate_processes` (#8939)
Co-authored-by: Yifu Wang <yifuwang@2012@gmail.com>
This commit is contained in:
parent
938a191406
commit
14f1475c25
|
@ -143,6 +143,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
|
|||
|
||||
### Fixed
|
||||
|
||||
- Ensure the existence of `DDPPlugin._sync_dir` in `reconciliate_processes` ([#8939](https://github.com/PyTorchLightning/pytorch-lightning/pull/8939))
|
||||
|
||||
- Restore original loaders if replaced by entrypoint ([#8885](https://github.com/PyTorchLightning/pytorch-lightning/pull/8885))
|
||||
|
||||
- Fixed `trainer.fit_loop.split_idx` always returning `None` ([#8601](https://github.com/PyTorchLightning/pytorch-lightning/pull/8601))
|
||||
|
|
|
@ -19,6 +19,7 @@ import subprocess
|
|||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
from time import sleep
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
|
@ -441,6 +442,11 @@ class DDPPlugin(ParallelPlugin):
|
|||
|
||||
sync_dir = self._sync_dir
|
||||
|
||||
# The cluster may be configured to periodically purge the `/tmp`
|
||||
# directory, in which case `sync_dir` may not exist anymore at this
|
||||
# point. Idempotently create it to ensure its existence.
|
||||
Path(sync_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# save a file locally.
|
||||
torch.save(True, os.path.join(sync_dir, f"{self.global_rank}.pl"))
|
||||
|
||||
|
|
Loading…
Reference in New Issue