From 5cef9772a4c77c77b93a191d38e696b4bf9a2ba2 Mon Sep 17 00:00:00 2001 From: Kaushik B <45285388+kaushikb11@users.noreply.github.com> Date: Mon, 5 Jul 2021 17:46:32 +0530 Subject: [PATCH] Add tests for GCS filesystem (#7946) --- requirements/extra.txt | 3 +- tests/utilities/test_remote_filesystem.py | 106 ++++++++++++++++++++++ 2 files changed, 108 insertions(+), 1 deletion(-) create mode 100644 tests/utilities/test_remote_filesystem.py diff --git a/requirements/extra.txt b/requirements/extra.txt index 291813e05e..47d3a66e0d 100644 --- a/requirements/extra.txt +++ b/requirements/extra.txt @@ -4,7 +4,8 @@ matplotlib>3.1 horovod>=0.21.2 # no need to install with [pytorch] as pytorch is already installed omegaconf>=2.0.1 torchtext>=0.5 -# onnx>=1.7.0 +onnx>=1.7.0 onnxruntime>=1.3.0 hydra-core>=1.0 jsonargparse[signatures]>=3.15.0 +gcsfs>=2021.5.0 diff --git a/tests/utilities/test_remote_filesystem.py b/tests/utilities/test_remote_filesystem.py new file mode 100644 index 0000000000..15a7dbb4c9 --- /dev/null +++ b/tests/utilities/test_remote_filesystem.py @@ -0,0 +1,106 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +import fsspec +import pytest + +from pytorch_lightning import Trainer +from pytorch_lightning.callbacks import ModelCheckpoint +from pytorch_lightning.loggers import TensorBoardLogger +from tests.helpers import BoringModel + +GCS_BUCKET_PATH = os.getenv("GCS_BUCKET_PATH", None) +_GCS_BUCKET_PATH_AVAILABLE = GCS_BUCKET_PATH is not None + +gcs_fs = fsspec.filesystem("gs") if _GCS_BUCKET_PATH_AVAILABLE else None + + +def gcs_path_join(dir_path): + return GCS_BUCKET_PATH + str(dir_path) + + +def gcs_rm_dir(dir_path): + gcs_fs.rm(dir_path, recursive=True) + return True + + +@pytest.mark.skipif(not _GCS_BUCKET_PATH_AVAILABLE, reason="Test requires GCS bucket path") +def test_gcs_model_checkpoint_contents(tmpdir): + dir_path = gcs_path_join(tmpdir) + + model = BoringModel() + checkpoint_callback = ModelCheckpoint(dirpath=dir_path, save_top_k=-1, save_last=True) + epochs = 2 + + trainer = Trainer( + default_root_dir=dir_path, + callbacks=[checkpoint_callback], + limit_train_batches=10, + limit_val_batches=10, + max_epochs=2, + logger=False, + ) + + trainer.fit(model) + + assert checkpoint_callback.best_model_path == os.path.join(dir_path, 'epoch=1-step=19.ckpt') + assert checkpoint_callback.last_model_path == os.path.join(dir_path, 'last.ckpt') + + expected = [f'epoch={i}-step={j}.ckpt' for i, j in zip(range(epochs), [9, 19])] + expected.append('last.ckpt') + + gcs_ckpt_paths = [os.path.basename(path) for path in gcs_fs.listdir(dir_path, detail=False)] + assert gcs_ckpt_paths == expected + + assert gcs_rm_dir(dir_path) + + +@pytest.mark.skipif(not _GCS_BUCKET_PATH_AVAILABLE, reason="Test requires GCS bucket path") +def test_gcs_logging(tmpdir): + dir_path = gcs_path_join(tmpdir) + + name = "tb_versioning" + log_dir = os.path.join(dir_path, name) + gcs_fs.mkdir(log_dir) + expected_version = "101" + + logger = TensorBoardLogger(save_dir=dir_path, name=name, version=expected_version) + logger.log_hyperparams({"a": 1, "b": 2, 123: 3, 3.5: 4, 5j: 5}) + + assert logger.version == expected_version + + gcs_paths = [os.path.basename(path) for path in gcs_fs.listdir(log_dir, detail=False)] + gcs_paths = list(filter(lambda x: len(x) > 0, gcs_paths)) + + assert gcs_paths == [expected_version] + assert gcs_fs.listdir(os.path.join(log_dir, expected_version), detail=False) + + assert gcs_rm_dir(dir_path) + + +@pytest.mark.skipif(not _GCS_BUCKET_PATH_AVAILABLE, reason="Test requires GCS bucket path") +def test_gcs_save_hparams_to_yaml_file(tmpdir): + dir_path = gcs_path_join(tmpdir) + + model = BoringModel() + logger = TensorBoardLogger(save_dir=dir_path, default_hp_metric=False) + trainer = Trainer(max_steps=1, default_root_dir=dir_path, logger=logger) + assert trainer.log_dir == trainer.logger.log_dir + trainer.fit(model) + + hparams_file = "hparams.yaml" + assert gcs_fs.isfile(os.path.join(trainer.log_dir, hparams_file)) + + assert gcs_rm_dir(dir_path)