MANIFEST.in and setup.py clean-up (#7614)

This commit is contained in:
Carlos Mocholí 2021-11-19 15:38:42 +01:00 committed by GitHub
parent 94390aba56
commit 3d2d0f2536
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 114 additions and 86 deletions

View File

@ -26,12 +26,11 @@ jobs:
- name: Prepare env
run: |
pip install check-manifest "twine==3.2" setuptools wheel
pip install "twine==3.2" setuptools wheel
- name: Create package
run: |
check-manifest
# python setup.py check --metadata --strict
python setup.py check --metadata --strict
python setup.py sdist bdist_wheel
- name: Check package

View File

@ -11,69 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Manifest syntax https://docs.python.org/2/distutils/sourcedist.html
graft wheelhouse
recursive-exclude __pycache__ *.py[cod] *.orig
# Include the README and CHANGELOG
include *.md
# Include the license file
include LICENSE
# Include the citation info
include *.cff
exclude *.sh
exclude *.svg
recursive-include pytorch_lightning *.py
# Include marker file for PEP 561
include pytorch_lightning/py.typed
# include examples
recursive-include pl_examples *.py *.md *.sh *.txt *.toml
# exclude tests from package
recursive-exclude tests *
recursive-exclude site *
exclude tests
# Exclude the documentation files
recursive-exclude docs *
exclude docs
recursive-include docs/source/_static/images/logos/ *
recursive-include docs/source/_static/images/general/ pl_overview* tf_* tutorial_* PTL101_*
# Include the Requirements
include pytorch_lightning/py.typed # marker file for PEP 561
include CHANGELOG.md
recursive-include requirements *.txt
recursive-exclude requirements *.sh *.py
include requirements.txt
include pyproject.toml
# Exclude build configs
exclude *.yml
exclude *.yaml
exclude *.toml
exclude *.jsonnet
# Exclude pyright config
exclude .pyrightconfig.json
# Exclude submodules
exclude .gitmodules
exclude _notebooks
# Exclude Makefile
exclude Makefile
prune .git
prune .github
prune .circleci
prune temp*
prune test*
prune benchmark*
prune dockers
prune legacy
include *.cff # citation info

View File

@ -11,13 +11,18 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
import platform
from typing import Optional
import random
import time
import urllib
from typing import Optional, Tuple
from urllib.error import HTTPError
from warnings import warn
from torch.utils.data import DataLoader, random_split
import torch
from torch.utils.data import DataLoader, Dataset, random_split
from pl_examples import _DATASETS_PATH
from pytorch_lightning import LightningDataModule
@ -27,6 +32,97 @@ if _TORCHVISION_AVAILABLE:
from torchvision import transforms as transform_lib
class _MNIST(Dataset):
"""Carbon copy of ``tests.helpers.datasets.MNIST``.
We cannot import the tests as they are not distributed with the package.
See https://github.com/PyTorchLightning/pytorch-lightning/pull/7614#discussion_r671183652 for more context.
"""
RESOURCES = (
"https://pl-public-data.s3.amazonaws.com/MNIST/processed/training.pt",
"https://pl-public-data.s3.amazonaws.com/MNIST/processed/test.pt",
)
TRAIN_FILE_NAME = "training.pt"
TEST_FILE_NAME = "test.pt"
cache_folder_name = "complete"
def __init__(
self, root: str, train: bool = True, normalize: tuple = (0.1307, 0.3081), download: bool = True, **kwargs
):
super().__init__()
self.root = root
self.train = train # training set or test set
self.normalize = normalize
self.prepare_data(download)
data_file = self.TRAIN_FILE_NAME if self.train else self.TEST_FILE_NAME
self.data, self.targets = self._try_load(os.path.join(self.cached_folder_path, data_file))
def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]:
img = self.data[idx].float().unsqueeze(0)
target = int(self.targets[idx])
if self.normalize is not None and len(self.normalize) == 2:
img = self.normalize_tensor(img, *self.normalize)
return img, target
def __len__(self) -> int:
return len(self.data)
@property
def cached_folder_path(self) -> str:
return os.path.join(self.root, "MNIST", self.cache_folder_name)
def _check_exists(self, data_folder: str) -> bool:
existing = True
for fname in (self.TRAIN_FILE_NAME, self.TEST_FILE_NAME):
existing = existing and os.path.isfile(os.path.join(data_folder, fname))
return existing
def prepare_data(self, download: bool = True):
if download and not self._check_exists(self.cached_folder_path):
self._download(self.cached_folder_path)
if not self._check_exists(self.cached_folder_path):
raise RuntimeError("Dataset not found.")
def _download(self, data_folder: str) -> None:
os.makedirs(data_folder, exist_ok=True)
for url in self.RESOURCES:
logging.info(f"Downloading {url}")
fpath = os.path.join(data_folder, os.path.basename(url))
urllib.request.urlretrieve(url, fpath)
@staticmethod
def _try_load(path_data, trials: int = 30, delta: float = 1.0):
"""Resolving loading from the same time from multiple concurrent processes."""
res, exception = None, None
assert trials, "at least some trial has to be set"
assert os.path.isfile(path_data), f"missing file: {path_data}"
for _ in range(trials):
try:
res = torch.load(path_data)
# todo: specify the possible exception
except Exception as e:
exception = e
time.sleep(delta * random.random())
else:
break
if exception is not None:
# raise the caught exception
raise exception
return res
@staticmethod
def normalize_tensor(tensor: torch.Tensor, mean: float = 0.0, std: float = 1.0) -> torch.Tensor:
mean = torch.as_tensor(mean, dtype=tensor.dtype, device=tensor.device)
std = torch.as_tensor(std, dtype=tensor.dtype, device=tensor.device)
return tensor.sub(mean).div(std)
def MNIST(*args, **kwargs):
torchvision_mnist_available = not bool(os.getenv("PL_USE_MOCKED_MNIST", False))
if torchvision_mnist_available:
@ -39,7 +135,7 @@ def MNIST(*args, **kwargs):
torchvision_mnist_available = False
if not torchvision_mnist_available:
print("`torchvision.datasets.MNIST` not available. Using our hosted version")
from tests.helpers.datasets import MNIST
MNIST = _MNIST
return MNIST(*args, **kwargs)

View File

@ -1,6 +1,7 @@
#!/bin/bash
set -ex
export PYTHONPATH="${PYTHONPATH}:$(pwd)"
dir_path=$(dirname "${BASH_SOURCE[0]}")
args="
--data.batch_size=32

View File

@ -14,9 +14,10 @@
from unittest import mock
import pytest
import torch
from pl_examples import _DALI_AVAILABLE
from tests.helpers.runif import RunIf
from pytorch_lightning.utilities.imports import _IS_WINDOWS
ARGS_DEFAULT = (
"--trainer.default_root_dir %(tmpdir)s "
@ -31,7 +32,8 @@ ARGS_GPU = ARGS_DEFAULT + "--trainer.gpus 1 "
@pytest.mark.skipif(not _DALI_AVAILABLE, reason="Nvidia DALI required")
@RunIf(min_gpus=1, skip_windows=True)
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA required")
@pytest.mark.skipif(_IS_WINDOWS, reason="Not supported on Windows")
@pytest.mark.parametrize("cli_args", [ARGS_GPU])
def test_examples_mnist_dali(tmpdir, cli_args):
from pl_examples.integration_examples.dali_image_classifier import cli_main

View File

@ -2,7 +2,6 @@ coverage>5.2.0
codecov>=2.1
pytest>=6.0
pytest-rerunfailures>=10.2
check-manifest
twine==3.2
mypy>=0.900
flake8>=3.9.2

View File

@ -73,14 +73,6 @@ ignore =
W503 # Ignore "Line break occurred before a binary operator"
E203 # Ignore "whitespace before ':'"
# setup.cfg or tox.ini
[check-manifest]
ignore =
*.yml
.github
.github/*
.circleci
[metadata]
license_file = LICENSE

View File

@ -74,10 +74,10 @@ setup(
url=about.__homepage__,
download_url="https://github.com/PyTorchLightning/pytorch-lightning",
license=about.__license__,
packages=find_packages(exclude=["tests", "tests/*", "benchmarks", "legacy", "legacy/*"]),
packages=find_packages(exclude=["tests*", "pl_examples*", "legacy*"]),
include_package_data=True,
long_description=long_description,
long_description_content_type="text/markdown",
include_package_data=True,
zip_safe=False,
keywords=["deep learning", "pytorch", "AI"],
python_requires=">=3.6",

View File

@ -19,7 +19,6 @@ import urllib.request
from typing import Optional, Sequence, Tuple
import torch
from torch import Tensor
from torch.utils.data import Dataset
@ -70,7 +69,7 @@ class MNIST(Dataset):
data_file = self.TRAIN_FILE_NAME if self.train else self.TEST_FILE_NAME
self.data, self.targets = self._try_load(os.path.join(self.cached_folder_path, data_file))
def __getitem__(self, idx: int) -> Tuple[Tensor, int]:
def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]:
img = self.data[idx].float().unsqueeze(0)
target = int(self.targets[idx])
@ -126,7 +125,7 @@ class MNIST(Dataset):
return res
@staticmethod
def normalize_tensor(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> Tensor:
def normalize_tensor(tensor: torch.Tensor, mean: float = 0.0, std: float = 1.0) -> torch.Tensor:
mean = torch.as_tensor(mean, dtype=tensor.dtype, device=tensor.device)
std = torch.as_tensor(std, dtype=tensor.dtype, device=tensor.device)
return tensor.sub(mean).div(std)

View File

@ -81,6 +81,7 @@ fi
# report+="Ran\ttests/plugins/environments/torch_elastic_deadlock.py\n"
# test that a user can manually launch individual processes
export PYTHONPATH="${PYTHONPATH}:$(pwd)"
args="--trainer.gpus 2 --trainer.strategy ddp --trainer.max_epochs=1 --trainer.limit_train_batches=1 --trainer.limit_val_batches=1 --trainer.limit_test_batches=1"
MASTER_ADDR="localhost" MASTER_PORT=1234 LOCAL_RANK=1 python pl_examples/basic_examples/mnist_examples/image_classifier_5_lightning_datamodule.py ${args} &
MASTER_ADDR="localhost" MASTER_PORT=1234 LOCAL_RANK=0 python pl_examples/basic_examples/mnist_examples/image_classifier_5_lightning_datamodule.py ${args}