From 1eff3b53c1ff9d362fc24a1e4fea6c0cfe78696b Mon Sep 17 00:00:00 2001 From: four4fish <88516121+four4fish@users.noreply.github.com> Date: Mon, 21 Mar 2022 04:38:55 -0700 Subject: [PATCH] Update fairscale version (#11567) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Aki Nitta Co-authored-by: Carlos Mocholi Co-authored-by: Jirka Co-authored-by: Adrian Wälchli --- .azure-pipelines/gpu-tests.yml | 2 +- dockers/base-cuda/Dockerfile | 2 +- .../strategies/test_ddp_fully_sharded_with_full_state_dict.py | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml index 7c19a0ba8c..bf1bb6664a 100644 --- a/.azure-pipelines/gpu-tests.yml +++ b/.azure-pipelines/gpu-tests.yml @@ -52,7 +52,7 @@ jobs: - bash: | python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)" - pip install fairscale==0.4.0 + pip install fairscale==0.4.5 pip install deepspeed==0.5.7 pip install bagua-cuda102==0.9.0 pip install . --requirement requirements/devel.txt diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index bf57fb9102..e5f3c8d132 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -133,7 +133,7 @@ RUN \ RUN \ # install FairScale - pip install fairscale==0.4.0 && \ + pip install fairscale==0.4.5 && \ python -c "import fairscale; print(fairscale.__version__)" RUN \ diff --git a/tests/strategies/test_ddp_fully_sharded_with_full_state_dict.py b/tests/strategies/test_ddp_fully_sharded_with_full_state_dict.py index 7179029f21..4354a3444f 100644 --- a/tests/strategies/test_ddp_fully_sharded_with_full_state_dict.py +++ b/tests/strategies/test_ddp_fully_sharded_with_full_state_dict.py @@ -83,8 +83,7 @@ class TestFSDPModel(BoringModel): assert isinstance(self.layer, FullyShardedDataParallel) assert isinstance(self.layer.module[0], FullyShardedDataParallel) assert isinstance(self.layer.module[2], FullyShardedDataParallel) - # root should not be resharding - assert self.layer.reshard_after_forward is False + # Assert that the nested layers are set reshard_after_forward to True assert self.layer.module[0].reshard_after_forward is True assert self.layer.module[2].reshard_after_forward is True