From 3d2d0f25362fceea31eded88b1b576fdf42c0ee7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Fri, 19 Nov 2021 15:38:42 +0100 Subject: [PATCH] MANIFEST.in and setup.py clean-up (#7614) --- .github/workflows/ci_pkg-install.yml | 5 +- MANIFEST.in | 67 +----------- .../basic_examples/mnist_datamodule.py | 102 +++++++++++++++++- pl_examples/run_examples.sh | 1 + pl_examples/test_examples.py | 6 +- requirements/test.txt | 1 - setup.cfg | 8 -- setup.py | 4 +- tests/helpers/datasets.py | 5 +- tests/special_tests.sh | 1 + 10 files changed, 114 insertions(+), 86 deletions(-) diff --git a/.github/workflows/ci_pkg-install.yml b/.github/workflows/ci_pkg-install.yml index 1fd7ed49d5..bf7de876d1 100644 --- a/.github/workflows/ci_pkg-install.yml +++ b/.github/workflows/ci_pkg-install.yml @@ -26,12 +26,11 @@ jobs: - name: Prepare env run: | - pip install check-manifest "twine==3.2" setuptools wheel + pip install "twine==3.2" setuptools wheel - name: Create package run: | - check-manifest - # python setup.py check --metadata --strict + python setup.py check --metadata --strict python setup.py sdist bdist_wheel - name: Check package diff --git a/MANIFEST.in b/MANIFEST.in index b810937f1a..a68fc82474 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -11,69 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -# Manifest syntax https://docs.python.org/2/distutils/sourcedist.html -graft wheelhouse - -recursive-exclude __pycache__ *.py[cod] *.orig - -# Include the README and CHANGELOG -include *.md - -# Include the license file -include LICENSE - -# Include the citation info -include *.cff - -exclude *.sh -exclude *.svg -recursive-include pytorch_lightning *.py - -# Include marker file for PEP 561 -include pytorch_lightning/py.typed - -# include examples -recursive-include pl_examples *.py *.md *.sh *.txt *.toml - -# exclude tests from package -recursive-exclude tests * -recursive-exclude site * -exclude tests - -# Exclude the documentation files -recursive-exclude docs * -exclude docs -recursive-include docs/source/_static/images/logos/ * -recursive-include docs/source/_static/images/general/ pl_overview* tf_* tutorial_* PTL101_* - -# Include the Requirements +include pytorch_lightning/py.typed # marker file for PEP 561 +include CHANGELOG.md recursive-include requirements *.txt -recursive-exclude requirements *.sh *.py include requirements.txt -include pyproject.toml - -# Exclude build configs -exclude *.yml -exclude *.yaml -exclude *.toml -exclude *.jsonnet - -# Exclude pyright config -exclude .pyrightconfig.json - -# Exclude submodules -exclude .gitmodules -exclude _notebooks - -# Exclude Makefile -exclude Makefile - -prune .git -prune .github -prune .circleci -prune temp* -prune test* -prune benchmark* -prune dockers -prune legacy +include *.cff # citation info diff --git a/pl_examples/basic_examples/mnist_datamodule.py b/pl_examples/basic_examples/mnist_datamodule.py index 335a36f0a3..31a2e284dd 100644 --- a/pl_examples/basic_examples/mnist_datamodule.py +++ b/pl_examples/basic_examples/mnist_datamodule.py @@ -11,13 +11,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging import os import platform -from typing import Optional +import random +import time +import urllib +from typing import Optional, Tuple from urllib.error import HTTPError from warnings import warn -from torch.utils.data import DataLoader, random_split +import torch +from torch.utils.data import DataLoader, Dataset, random_split from pl_examples import _DATASETS_PATH from pytorch_lightning import LightningDataModule @@ -27,6 +32,97 @@ if _TORCHVISION_AVAILABLE: from torchvision import transforms as transform_lib +class _MNIST(Dataset): + """Carbon copy of ``tests.helpers.datasets.MNIST``. + + We cannot import the tests as they are not distributed with the package. + See https://github.com/PyTorchLightning/pytorch-lightning/pull/7614#discussion_r671183652 for more context. + """ + + RESOURCES = ( + "https://pl-public-data.s3.amazonaws.com/MNIST/processed/training.pt", + "https://pl-public-data.s3.amazonaws.com/MNIST/processed/test.pt", + ) + + TRAIN_FILE_NAME = "training.pt" + TEST_FILE_NAME = "test.pt" + cache_folder_name = "complete" + + def __init__( + self, root: str, train: bool = True, normalize: tuple = (0.1307, 0.3081), download: bool = True, **kwargs + ): + super().__init__() + self.root = root + self.train = train # training set or test set + self.normalize = normalize + + self.prepare_data(download) + + data_file = self.TRAIN_FILE_NAME if self.train else self.TEST_FILE_NAME + self.data, self.targets = self._try_load(os.path.join(self.cached_folder_path, data_file)) + + def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]: + img = self.data[idx].float().unsqueeze(0) + target = int(self.targets[idx]) + + if self.normalize is not None and len(self.normalize) == 2: + img = self.normalize_tensor(img, *self.normalize) + + return img, target + + def __len__(self) -> int: + return len(self.data) + + @property + def cached_folder_path(self) -> str: + return os.path.join(self.root, "MNIST", self.cache_folder_name) + + def _check_exists(self, data_folder: str) -> bool: + existing = True + for fname in (self.TRAIN_FILE_NAME, self.TEST_FILE_NAME): + existing = existing and os.path.isfile(os.path.join(data_folder, fname)) + return existing + + def prepare_data(self, download: bool = True): + if download and not self._check_exists(self.cached_folder_path): + self._download(self.cached_folder_path) + if not self._check_exists(self.cached_folder_path): + raise RuntimeError("Dataset not found.") + + def _download(self, data_folder: str) -> None: + os.makedirs(data_folder, exist_ok=True) + for url in self.RESOURCES: + logging.info(f"Downloading {url}") + fpath = os.path.join(data_folder, os.path.basename(url)) + urllib.request.urlretrieve(url, fpath) + + @staticmethod + def _try_load(path_data, trials: int = 30, delta: float = 1.0): + """Resolving loading from the same time from multiple concurrent processes.""" + res, exception = None, None + assert trials, "at least some trial has to be set" + assert os.path.isfile(path_data), f"missing file: {path_data}" + for _ in range(trials): + try: + res = torch.load(path_data) + # todo: specify the possible exception + except Exception as e: + exception = e + time.sleep(delta * random.random()) + else: + break + if exception is not None: + # raise the caught exception + raise exception + return res + + @staticmethod + def normalize_tensor(tensor: torch.Tensor, mean: float = 0.0, std: float = 1.0) -> torch.Tensor: + mean = torch.as_tensor(mean, dtype=tensor.dtype, device=tensor.device) + std = torch.as_tensor(std, dtype=tensor.dtype, device=tensor.device) + return tensor.sub(mean).div(std) + + def MNIST(*args, **kwargs): torchvision_mnist_available = not bool(os.getenv("PL_USE_MOCKED_MNIST", False)) if torchvision_mnist_available: @@ -39,7 +135,7 @@ def MNIST(*args, **kwargs): torchvision_mnist_available = False if not torchvision_mnist_available: print("`torchvision.datasets.MNIST` not available. Using our hosted version") - from tests.helpers.datasets import MNIST + MNIST = _MNIST return MNIST(*args, **kwargs) diff --git a/pl_examples/run_examples.sh b/pl_examples/run_examples.sh index 4a15c3367d..a04a57631d 100755 --- a/pl_examples/run_examples.sh +++ b/pl_examples/run_examples.sh @@ -1,6 +1,7 @@ #!/bin/bash set -ex +export PYTHONPATH="${PYTHONPATH}:$(pwd)" dir_path=$(dirname "${BASH_SOURCE[0]}") args=" --data.batch_size=32 diff --git a/pl_examples/test_examples.py b/pl_examples/test_examples.py index 19d09836ef..00ca558c53 100644 --- a/pl_examples/test_examples.py +++ b/pl_examples/test_examples.py @@ -14,9 +14,10 @@ from unittest import mock import pytest +import torch from pl_examples import _DALI_AVAILABLE -from tests.helpers.runif import RunIf +from pytorch_lightning.utilities.imports import _IS_WINDOWS ARGS_DEFAULT = ( "--trainer.default_root_dir %(tmpdir)s " @@ -31,7 +32,8 @@ ARGS_GPU = ARGS_DEFAULT + "--trainer.gpus 1 " @pytest.mark.skipif(not _DALI_AVAILABLE, reason="Nvidia DALI required") -@RunIf(min_gpus=1, skip_windows=True) +@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA required") +@pytest.mark.skipif(_IS_WINDOWS, reason="Not supported on Windows") @pytest.mark.parametrize("cli_args", [ARGS_GPU]) def test_examples_mnist_dali(tmpdir, cli_args): from pl_examples.integration_examples.dali_image_classifier import cli_main diff --git a/requirements/test.txt b/requirements/test.txt index de749e2339..d86137b037 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -2,7 +2,6 @@ coverage>5.2.0 codecov>=2.1 pytest>=6.0 pytest-rerunfailures>=10.2 -check-manifest twine==3.2 mypy>=0.900 flake8>=3.9.2 diff --git a/setup.cfg b/setup.cfg index 4494537280..5ebf1bf73a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -73,14 +73,6 @@ ignore = W503 # Ignore "Line break occurred before a binary operator" E203 # Ignore "whitespace before ':'" -# setup.cfg or tox.ini -[check-manifest] -ignore = - *.yml - .github - .github/* - .circleci - [metadata] license_file = LICENSE diff --git a/setup.py b/setup.py index ddbae8b974..9d54a0d564 100755 --- a/setup.py +++ b/setup.py @@ -74,10 +74,10 @@ setup( url=about.__homepage__, download_url="https://github.com/PyTorchLightning/pytorch-lightning", license=about.__license__, - packages=find_packages(exclude=["tests", "tests/*", "benchmarks", "legacy", "legacy/*"]), + packages=find_packages(exclude=["tests*", "pl_examples*", "legacy*"]), + include_package_data=True, long_description=long_description, long_description_content_type="text/markdown", - include_package_data=True, zip_safe=False, keywords=["deep learning", "pytorch", "AI"], python_requires=">=3.6", diff --git a/tests/helpers/datasets.py b/tests/helpers/datasets.py index 561642ae8c..33bf1d9b8e 100644 --- a/tests/helpers/datasets.py +++ b/tests/helpers/datasets.py @@ -19,7 +19,6 @@ import urllib.request from typing import Optional, Sequence, Tuple import torch -from torch import Tensor from torch.utils.data import Dataset @@ -70,7 +69,7 @@ class MNIST(Dataset): data_file = self.TRAIN_FILE_NAME if self.train else self.TEST_FILE_NAME self.data, self.targets = self._try_load(os.path.join(self.cached_folder_path, data_file)) - def __getitem__(self, idx: int) -> Tuple[Tensor, int]: + def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]: img = self.data[idx].float().unsqueeze(0) target = int(self.targets[idx]) @@ -126,7 +125,7 @@ class MNIST(Dataset): return res @staticmethod - def normalize_tensor(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> Tensor: + def normalize_tensor(tensor: torch.Tensor, mean: float = 0.0, std: float = 1.0) -> torch.Tensor: mean = torch.as_tensor(mean, dtype=tensor.dtype, device=tensor.device) std = torch.as_tensor(std, dtype=tensor.dtype, device=tensor.device) return tensor.sub(mean).div(std) diff --git a/tests/special_tests.sh b/tests/special_tests.sh index 6a3701a7ee..27abaa6cc6 100755 --- a/tests/special_tests.sh +++ b/tests/special_tests.sh @@ -81,6 +81,7 @@ fi # report+="Ran\ttests/plugins/environments/torch_elastic_deadlock.py\n" # test that a user can manually launch individual processes +export PYTHONPATH="${PYTHONPATH}:$(pwd)" args="--trainer.gpus 2 --trainer.strategy ddp --trainer.max_epochs=1 --trainer.limit_train_batches=1 --trainer.limit_val_batches=1 --trainer.limit_test_batches=1" MASTER_ADDR="localhost" MASTER_PORT=1234 LOCAL_RANK=1 python pl_examples/basic_examples/mnist_examples/image_classifier_5_lightning_datamodule.py ${args} & MASTER_ADDR="localhost" MASTER_PORT=1234 LOCAL_RANK=0 python pl_examples/basic_examples/mnist_examples/image_classifier_5_lightning_datamodule.py ${args}