lightning/.actions/assistant.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

508 lines
20 KiB
Python
Raw Normal View History

# Copyright The Lightning AI team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import glob
Lightning Dataset (including optimized dataloading of s3 buckets) (#17743) * Lightning DataLoader * lightning dataloader * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * init * example * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * env var * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update src/lightning/pytorch/utilities/data/__init__.py Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * remove unused functions * extra reqs * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update src/lightning/pytorch/utilities/data/fileio.py Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * imports work now! yay * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * imports * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * missing import * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * error handling * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update creds for local use case * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * codeowners * recursive get index * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * index * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * clean up get index * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update imagenet example * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * docstrings * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * docstrings * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * docstrings * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * example cleanup * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * changelog * reqs * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * codeowners * requirements * expose LightningDataset too * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * expost LightningDataset at top level * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove unused private methods from init * remove private imports * upper bound on extra requirements * review comments * loosen req * deps * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * test updating fabric base req * remove version pin on s3fs to test * recover missing function * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * tests * update * random * torchdata >= 0.3.0 * update torchdata version * remove torchdata version to test * try rem torch version pin * req * update bucket in test * req * skips * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * import * update structure to lightning.data * base.txt for data reqs * fix imports * rename to LightningS3Dataset * new workflow * dont need to test warnings * reqs * req * revert data folder in pytorch * test import * tests * req * req * req * torch version * req * req * open dep * reformatted * pin strict * pin strict extra * req * modify workflow, no cache * try * patch * import * fix * dataset test * update getattr * pin everything to test * remove torch preinstall from workflow * workflow * req * Update .github/workflows/ci-tests-data.yml Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> * workflow * workflow * req * Update .github/workflows/ci-tests-data.yml Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> * workflow * print * skip test for now * update path join * revert app dep version bump * Update .github/workflows/ci-tests-data.yml Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> * workflow updates * app base req * req * window test failure * add data req to assistant * try * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add missing comma * updates * update * typo * requirements * try widening req * older torch version * update * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update * update * update * update * cleanup tests * typo again * update * remove unnecessary line * Update .github/CODEOWNERS * Discard changes to requirements/pytorch/base.txt * Discard changes to requirements/fabric/base.txt * Discard changes to requirements/app/base.txt * requirements * requirements * one line * app workflow pick only app reqs * rename package * undo * don't use cache * examples CI * pytorch and fabric CI * try remove cache * Apply suggestions from code review * jirka playing * jirka playing * jirka playing * blah * flatten LightningDataset * cleans up dataset class * jirka playing * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * jirka playing * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * extra * fix dataset test * update checkgroups * Luca's review comments * val error fix * unskip test * min * fix precommit warning * cpu * docstrings * req * 2.0.1 * add return type * typing errors * req * return types with quotations * import for type-checking * no botocore in cloudagnostic code * exit args * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * backends typing * remove oldest from data tests * typing * typing * typing * types * type * typing * typing * typing * import fix * Changelog --------- Co-authored-by: Noha Alon <nohaalon@Nohas-MacBook-Air.local> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> Co-authored-by: Jirka <jirka.borovec@seznam.cz> Co-authored-by: Justus Schock <justus.schock@posteo.de>
2023-06-13 10:44:41 +00:00
import logging
import os
import re
import shutil
import tempfile
import urllib.request
from collections.abc import Iterable, Iterator, Sequence
from itertools import chain
from os.path import dirname, isfile
from pathlib import Path
from typing import Any, Optional
from packaging.requirements import Requirement
from packaging.version import Version
REQUIREMENT_FILES = {
"pytorch": (
"requirements/pytorch/base.txt",
"requirements/pytorch/extra.txt",
"requirements/pytorch/strategies.txt",
"requirements/pytorch/examples.txt",
),
"fabric": (
"requirements/fabric/base.txt",
"requirements/fabric/strategies.txt",
),
"data": ("requirements/data/data.txt",),
}
REQUIREMENT_FILES_ALL = list(chain(*REQUIREMENT_FILES.values()))
_PROJECT_ROOT = os.path.dirname(os.path.dirname(__file__))
2023-01-12 16:25:29 +00:00
class _RequirementWithComment(Requirement):
strict_string = "# strict"
def __init__(self, *args: Any, comment: str = "", pip_argument: Optional[str] = None, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
self.comment = comment
assert pip_argument is None or pip_argument # sanity check that it's not an empty str
self.pip_argument = pip_argument
self.strict = self.strict_string in comment.lower()
def adjust(self, unfreeze: str) -> str:
"""Remove version restrictions unless they are strict.
>>> _RequirementWithComment("arrow<=1.2.2,>=1.2.0", comment="# anything").adjust("none")
'arrow<=1.2.2,>=1.2.0'
>>> _RequirementWithComment("arrow<=1.2.2,>=1.2.0", comment="# strict").adjust("none")
'arrow<=1.2.2,>=1.2.0 # strict'
>>> _RequirementWithComment("arrow<=1.2.2,>=1.2.0", comment="# my name").adjust("all")
'arrow>=1.2.0'
>>> _RequirementWithComment("arrow>=1.2.0, <=1.2.2", comment="# strict").adjust("all")
'arrow<=1.2.2,>=1.2.0 # strict'
>>> _RequirementWithComment("arrow").adjust("all")
'arrow'
>>> _RequirementWithComment("arrow>=1.2.0, <=1.2.2", comment="# cool").adjust("major")
'arrow<2.0,>=1.2.0'
>>> _RequirementWithComment("arrow>=1.2.0, <=1.2.2", comment="# strict").adjust("major")
'arrow<=1.2.2,>=1.2.0 # strict'
>>> _RequirementWithComment("arrow>=1.2.0").adjust("major")
'arrow>=1.2.0'
>>> _RequirementWithComment("arrow").adjust("major")
'arrow'
2023-01-12 16:25:29 +00:00
"""
out = str(self)
if self.strict:
return f"{out} {self.strict_string}"
specs = [(spec.operator, spec.version) for spec in self.specifier]
2023-01-12 16:25:29 +00:00
if unfreeze == "major":
for operator, version in specs:
2023-01-12 16:25:29 +00:00
if operator in ("<", "<="):
major = Version(version).major
2023-01-12 16:25:29 +00:00
# replace upper bound with major version increased by one
return out.replace(f"{operator}{version}", f"<{major + 1}.0")
elif unfreeze == "all":
for operator, version in specs:
2023-01-12 16:25:29 +00:00
if operator in ("<", "<="):
# drop upper bound
return out.replace(f"{operator}{version},", "")
elif unfreeze != "none":
raise ValueError(f"Unexpected unfreeze: {unfreeze!r} value.")
return out
def _parse_requirements(lines: Iterable[str]) -> Iterator[_RequirementWithComment]:
2023-01-12 16:25:29 +00:00
"""Adapted from `pkg_resources.parse_requirements` to include comments.
>>> txt = ['# ignored', '', 'this # is an', '--piparg', 'example', 'foo # strict', 'thing', '-r different/file.txt']
>>> [r.adjust('none') for r in _parse_requirements(txt)]
['this', 'example', 'foo # strict', 'thing']
"""
2023-01-12 16:25:29 +00:00
pip_argument = None
for line in lines:
line = line.strip()
if not line or line.startswith("#"):
continue
2023-01-12 16:25:29 +00:00
# Drop comments -- a hash without a space may be in a URL.
if " #" in line:
comment_pos = line.find(" #")
line, comment = line[:comment_pos], line[comment_pos:]
else:
comment = ""
# If there's a pip argument, save it
if line.startswith("--"):
pip_argument = line
continue
if line.startswith("-r "):
# linked requirement files are unsupported
continue
yield _RequirementWithComment(line, comment=comment, pip_argument=pip_argument)
pip_argument = None
def load_requirements(path_dir: str, file_name: str = "base.txt", unfreeze: str = "all") -> list[str]:
"""Loading requirements from a file.
>>> path_req = os.path.join(_PROJECT_ROOT, "requirements")
>>> load_requirements(path_req, "docs.txt", unfreeze="major") # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
2023-06-15 15:25:59 +00:00
['sphinx<...]
"""
assert unfreeze in {"none", "major", "all"}
2023-01-12 16:25:29 +00:00
path = Path(path_dir) / file_name
Lightning Dataset (including optimized dataloading of s3 buckets) (#17743) * Lightning DataLoader * lightning dataloader * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * init * example * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * env var * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update src/lightning/pytorch/utilities/data/__init__.py Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * remove unused functions * extra reqs * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update src/lightning/pytorch/utilities/data/fileio.py Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * imports work now! yay * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * imports * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * missing import * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * error handling * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update creds for local use case * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * codeowners * recursive get index * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * index * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * clean up get index * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update imagenet example * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * docstrings * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * docstrings * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * docstrings * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * example cleanup * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * changelog * reqs * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * codeowners * requirements * expose LightningDataset too * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * expost LightningDataset at top level * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove unused private methods from init * remove private imports * upper bound on extra requirements * review comments * loosen req * deps * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * test updating fabric base req * remove version pin on s3fs to test * recover missing function * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * tests * update * random * torchdata >= 0.3.0 * update torchdata version * remove torchdata version to test * try rem torch version pin * req * update bucket in test * req * skips * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * import * update structure to lightning.data * base.txt for data reqs * fix imports * rename to LightningS3Dataset * new workflow * dont need to test warnings * reqs * req * revert data folder in pytorch * test import * tests * req * req * req * torch version * req * req * open dep * reformatted * pin strict * pin strict extra * req * modify workflow, no cache * try * patch * import * fix * dataset test * update getattr * pin everything to test * remove torch preinstall from workflow * workflow * req * Update .github/workflows/ci-tests-data.yml Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> * workflow * workflow * req * Update .github/workflows/ci-tests-data.yml Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> * workflow * print * skip test for now * update path join * revert app dep version bump * Update .github/workflows/ci-tests-data.yml Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> * workflow updates * app base req * req * window test failure * add data req to assistant * try * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add missing comma * updates * update * typo * requirements * try widening req * older torch version * update * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update * update * update * update * cleanup tests * typo again * update * remove unnecessary line * Update .github/CODEOWNERS * Discard changes to requirements/pytorch/base.txt * Discard changes to requirements/fabric/base.txt * Discard changes to requirements/app/base.txt * requirements * requirements * one line * app workflow pick only app reqs * rename package * undo * don't use cache * examples CI * pytorch and fabric CI * try remove cache * Apply suggestions from code review * jirka playing * jirka playing * jirka playing * blah * flatten LightningDataset * cleans up dataset class * jirka playing * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * jirka playing * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * extra * fix dataset test * update checkgroups * Luca's review comments * val error fix * unskip test * min * fix precommit warning * cpu * docstrings * req * 2.0.1 * add return type * typing errors * req * return types with quotations * import for type-checking * no botocore in cloudagnostic code * exit args * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * backends typing * remove oldest from data tests * typing * typing * typing * types * type * typing * typing * typing * import fix * Changelog --------- Co-authored-by: Noha Alon <nohaalon@Nohas-MacBook-Air.local> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> Co-authored-by: Jirka <jirka.borovec@seznam.cz> Co-authored-by: Justus Schock <justus.schock@posteo.de>
2023-06-13 10:44:41 +00:00
if not path.exists():
logging.warning(f"Folder {path_dir} does not have any base requirements.")
return []
2023-01-12 16:25:29 +00:00
assert path.exists(), (path_dir, file_name, path)
text = path.read_text().splitlines()
2023-01-12 16:25:29 +00:00
return [req.adjust(unfreeze) for req in _parse_requirements(text)]
def load_readme_description(path_dir: str, homepage: str, version: str) -> str:
"""Load readme as decribtion.
>>> load_readme_description(_PROJECT_ROOT, "", "") # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
'...PyTorch Lightning is just organized PyTorch...'
"""
path_readme = os.path.join(path_dir, "README.md")
2023-04-24 21:57:08 +00:00
with open(path_readme, encoding="utf-8") as fo:
text = fo.read()
# drop images from readme
text = text.replace(
"![PT to PL](docs/source-pytorch/_static/images/general/pl_quick_start_full_compressed.gif)", ""
)
# https://github.com/Lightning-AI/lightning/raw/master/docs/source/_static/images/lightning_module/pt_to_pl.png
github_source_url = os.path.join(homepage, "raw", version)
# replace relative repository path to absolute link to the release
# do not replace all "docs" as in the readme we reger some other sources with particular path to docs
text = text.replace(
"docs/source-pytorch/_static/", f"{os.path.join(github_source_url, 'docs/source-app/_static/')}"
)
# readthedocs badge
text = text.replace("badge/?version=stable", f"badge/?version={version}")
text = text.replace("pytorch-lightning.readthedocs.io/en/stable/", f"pytorch-lightning.readthedocs.io/en/{version}")
# codecov badge
text = text.replace("/branch/master/graph/badge.svg", f"/release/{version}/graph/badge.svg")
# github actions badge
text = text.replace("badge.svg?branch=master&event=push", f"badge.svg?tag={version}")
# azure pipelines badge
text = text.replace("?branchName=master", f"?branchName=refs%2Ftags%2F{version}")
skip_begin = r"<!-- following section will be skipped from PyPI description -->"
skip_end = r"<!-- end skipping PyPI description -->"
# todo: wrap content as commented description
return re.sub(rf"{skip_begin}.+?{skip_end}", "<!-- -->", text, flags=re.IGNORECASE + re.DOTALL)
# # https://github.com/Borda/pytorch-lightning/releases/download/1.1.0a6/codecov_badge.png
# github_release_url = os.path.join(homepage, "releases", "download", version)
# # download badge and replace url with local file
# text = _parse_for_badge(text, github_release_url)
def distribute_version(src_folder: str, ver_file: str = "version.info") -> None:
"""Copy the global version to all packages."""
ls_ver = glob.glob(os.path.join(src_folder, "*", "__version__.py"))
ver_template = os.path.join(src_folder, ver_file)
for fpath in ls_ver:
fpath = os.path.join(os.path.dirname(fpath), ver_file)
print("Distributing the version to", fpath)
if os.path.isfile(fpath):
os.remove(fpath)
shutil.copy2(ver_template, fpath)
def _load_aggregate_requirements(req_dir: str = "requirements", freeze_requirements: bool = False) -> None:
"""Load all base requirements from all particular packages and prune duplicates.
>>> _load_aggregate_requirements(os.path.join(_PROJECT_ROOT, "requirements"))
"""
requires = [
2023-01-12 16:25:29 +00:00
load_requirements(d, unfreeze="none" if freeze_requirements else "major")
for d in glob.glob(os.path.join(req_dir, "*"))
Lightning Dataset (including optimized dataloading of s3 buckets) (#17743) * Lightning DataLoader * lightning dataloader * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * init * example * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * env var * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update src/lightning/pytorch/utilities/data/__init__.py Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * remove unused functions * extra reqs * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update src/lightning/pytorch/utilities/data/fileio.py Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * imports work now! yay * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * imports * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * missing import * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * error handling * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update creds for local use case * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * codeowners * recursive get index * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * index * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * clean up get index * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update imagenet example * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * docstrings * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * docstrings * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * docstrings * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * example cleanup * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * changelog * reqs * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * codeowners * requirements * expose LightningDataset too * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * expost LightningDataset at top level * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove unused private methods from init * remove private imports * upper bound on extra requirements * review comments * loosen req * deps * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * test updating fabric base req * remove version pin on s3fs to test * recover missing function * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * tests * update * random * torchdata >= 0.3.0 * update torchdata version * remove torchdata version to test * try rem torch version pin * req * update bucket in test * req * skips * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * import * update structure to lightning.data * base.txt for data reqs * fix imports * rename to LightningS3Dataset * new workflow * dont need to test warnings * reqs * req * revert data folder in pytorch * test import * tests * req * req * req * torch version * req * req * open dep * reformatted * pin strict * pin strict extra * req * modify workflow, no cache * try * patch * import * fix * dataset test * update getattr * pin everything to test * remove torch preinstall from workflow * workflow * req * Update .github/workflows/ci-tests-data.yml Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> * workflow * workflow * req * Update .github/workflows/ci-tests-data.yml Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> * workflow * print * skip test for now * update path join * revert app dep version bump * Update .github/workflows/ci-tests-data.yml Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> * workflow updates * app base req * req * window test failure * add data req to assistant * try * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add missing comma * updates * update * typo * requirements * try widening req * older torch version * update * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update * update * update * update * cleanup tests * typo again * update * remove unnecessary line * Update .github/CODEOWNERS * Discard changes to requirements/pytorch/base.txt * Discard changes to requirements/fabric/base.txt * Discard changes to requirements/app/base.txt * requirements * requirements * one line * app workflow pick only app reqs * rename package * undo * don't use cache * examples CI * pytorch and fabric CI * try remove cache * Apply suggestions from code review * jirka playing * jirka playing * jirka playing * blah * flatten LightningDataset * cleans up dataset class * jirka playing * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * jirka playing * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * extra * fix dataset test * update checkgroups * Luca's review comments * val error fix * unskip test * min * fix precommit warning * cpu * docstrings * req * 2.0.1 * add return type * typing errors * req * return types with quotations * import for type-checking * no botocore in cloudagnostic code * exit args * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * backends typing * remove oldest from data tests * typing * typing * typing * types * type * typing * typing * typing * import fix * Changelog --------- Co-authored-by: Noha Alon <nohaalon@Nohas-MacBook-Air.local> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> Co-authored-by: Jirka <jirka.borovec@seznam.cz> Co-authored-by: Justus Schock <justus.schock@posteo.de>
2023-06-13 10:44:41 +00:00
# skip empty folder (git artifacts), and resolving Will's special issue
if os.path.isdir(d) and len(glob.glob(os.path.join(d, "*"))) > 0 and not os.path.basename(d).startswith("_")
]
if not requires:
2023-01-12 16:25:29 +00:00
return
# TODO: add some smarter version aggregation per each package
requires = sorted(set(chain(*requires)))
with open(os.path.join(req_dir, "base.txt"), "w") as fp:
fp.writelines([ln + os.linesep for ln in requires] + [os.linesep])
def _retrieve_files(directory: str, *ext: str) -> list[str]:
all_files = []
for root, _, files in os.walk(directory):
for fname in files:
if not ext or any(os.path.split(fname)[1].lower().endswith(e) for e in ext):
all_files.append(os.path.join(root, fname))
return all_files
def _replace_imports(lines: list[str], mapping: list[tuple[str, str]], lightning_by: str = "") -> list[str]:
"""Replace imports of standalone package to lightning.
>>> lns = [
... '"lightning_app"',
... "lightning_app",
... "lightning_app/",
... "delete_cloud_lightning_apps",
... "from lightning_app import",
... "lightning_apps = []",
... "lightning_app and pytorch_lightning are ours",
... "def _lightning_app():",
... ":class:`~lightning_app.core.flow.LightningFlow`",
... "http://pytorch_lightning.ai",
... "from lightning import __version__",
... "@lightning.ai"
... ]
>>> mapping = [("lightning_app", "lightning.app"), ("pytorch_lightning", "lightning.pytorch")]
>>> _replace_imports(lns, mapping, lightning_by="lightning_fabric") # doctest: +NORMALIZE_WHITESPACE
['"lightning.app"', \
'lightning.app', \
'lightning_app/', \
'delete_cloud_lightning_apps', \
'from lightning.app import', \
'lightning_apps = []', \
'lightning.app and lightning.pytorch are ours', \
'def _lightning_app():', \
':class:`~lightning.app.core.flow.LightningFlow`', \
'http://pytorch_lightning.ai', \
'from lightning_fabric import __version__', \
'@lightning.ai']
"""
out = lines[:]
for source_import, target_import in mapping:
for i, ln in enumerate(out):
out[i] = re.sub(
rf"([^_/@]|^){source_import}([^_\w/]|$)",
rf"\1{target_import}\2",
ln,
)
if lightning_by: # in addition, replace base package
out[i] = out[i].replace("from lightning import ", f"from {lightning_by} import ")
out[i] = out[i].replace("import lightning ", f"import {lightning_by} ")
return out
def copy_replace_imports(
source_dir: str,
source_imports: Sequence[str],
target_imports: Sequence[str],
target_dir: Optional[str] = None,
lightning_by: str = "",
) -> None:
"""Copy package content with import adjustments."""
print(f"Replacing imports: {locals()}")
assert len(source_imports) == len(target_imports), (
"source and target imports must have the same length, "
f"source: {len(source_imports)}, target: {len(target_imports)}"
)
if target_dir is None:
target_dir = source_dir
ls = _retrieve_files(source_dir)
for fp in ls:
fp_new = fp.replace(source_dir, target_dir)
_, ext = os.path.splitext(fp)
if ext in (".png", ".jpg", ".ico"):
os.makedirs(dirname(fp_new), exist_ok=True)
if not isfile(fp_new):
shutil.copy(fp, fp_new)
continue
2023-05-05 09:34:40 +00:00
if ext in (".pyc",):
continue
# Try to parse everything else
with open(fp, encoding="utf-8") as fo:
try:
lines = fo.readlines()
except UnicodeDecodeError:
# a binary file, skip
print(f"Skipped replacing imports for {fp}")
continue
lines = _replace_imports(lines, list(zip(source_imports, target_imports)), lightning_by=lightning_by)
os.makedirs(os.path.dirname(fp_new), exist_ok=True)
with open(fp_new, "w", encoding="utf-8") as fo:
fo.writelines(lines)
def create_mirror_package(source_dir: str, package_mapping: dict[str, str]) -> None:
"""Create a mirror package with adjusted imports."""
# replace imports and copy the code
mapping = package_mapping.copy()
mapping.pop("lightning", None) # pop this key to avoid replacing `lightning` to `lightning.lightning`
mapping = {f"lightning.{sp}": sl for sp, sl in mapping.items()}
for pkg_from, pkg_to in mapping.items():
source_imports, target_imports = zip(*mapping.items())
copy_replace_imports(
source_dir=os.path.join(source_dir, pkg_from.replace(".", os.sep)),
# pytorch_lightning uses lightning_fabric, so we need to replace all imports for all directories
source_imports=source_imports,
target_imports=target_imports,
target_dir=os.path.join(source_dir, pkg_to.replace(".", os.sep)),
lightning_by=pkg_from,
)
class AssistantCLI:
@staticmethod
def requirements_prune_pkgs(packages: Sequence[str], req_files: Sequence[str] = REQUIREMENT_FILES_ALL) -> None:
2022-03-21 12:50:50 +00:00
"""Remove some packages from given requirement files."""
if isinstance(req_files, str):
req_files = [req_files]
for req in req_files:
AssistantCLI._prune_packages(req, packages)
@staticmethod
def _prune_packages(req_file: str, packages: Sequence[str]) -> None:
"""Remove some packages from given requirement files."""
path = Path(req_file)
assert path.exists()
text = path.read_text()
lines = text.splitlines()
final = []
for line in lines:
ln_ = line.strip()
if not ln_ or ln_.startswith("#"):
final.append(line)
continue
req = list(_parse_requirements([ln_]))[0]
if req.name not in packages:
final.append(line)
print(final)
path.write_text("\n".join(final) + "\n")
@staticmethod
def _replace_min(fname: str) -> None:
2023-04-24 21:57:08 +00:00
with open(fname, encoding="utf-8") as fo:
req = fo.read().replace(">=", "==")
with open(fname, "w", encoding="utf-8") as fw:
fw.write(req)
@staticmethod
def replace_oldest_ver(requirement_fnames: Sequence[str] = REQUIREMENT_FILES_ALL) -> None:
"""Replace the min package version by fixed one."""
for fname in requirement_fnames:
Lightning Dataset (including optimized dataloading of s3 buckets) (#17743) * Lightning DataLoader * lightning dataloader * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * init * example * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * env var * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update src/lightning/pytorch/utilities/data/__init__.py Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * remove unused functions * extra reqs * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update src/lightning/pytorch/utilities/data/fileio.py Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * imports work now! yay * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * imports * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * missing import * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * error handling * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update creds for local use case * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * codeowners * recursive get index * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * index * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * clean up get index * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update imagenet example * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * docstrings * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * docstrings * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * docstrings * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * example cleanup * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * changelog * reqs * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * codeowners * requirements * expose LightningDataset too * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * expost LightningDataset at top level * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove unused private methods from init * remove private imports * upper bound on extra requirements * review comments * loosen req * deps * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * test updating fabric base req * remove version pin on s3fs to test * recover missing function * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * tests * update * random * torchdata >= 0.3.0 * update torchdata version * remove torchdata version to test * try rem torch version pin * req * update bucket in test * req * skips * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * import * update structure to lightning.data * base.txt for data reqs * fix imports * rename to LightningS3Dataset * new workflow * dont need to test warnings * reqs * req * revert data folder in pytorch * test import * tests * req * req * req * torch version * req * req * open dep * reformatted * pin strict * pin strict extra * req * modify workflow, no cache * try * patch * import * fix * dataset test * update getattr * pin everything to test * remove torch preinstall from workflow * workflow * req * Update .github/workflows/ci-tests-data.yml Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> * workflow * workflow * req * Update .github/workflows/ci-tests-data.yml Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> * workflow * print * skip test for now * update path join * revert app dep version bump * Update .github/workflows/ci-tests-data.yml Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> * workflow updates * app base req * req * window test failure * add data req to assistant * try * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add missing comma * updates * update * typo * requirements * try widening req * older torch version * update * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update * update * update * update * cleanup tests * typo again * update * remove unnecessary line * Update .github/CODEOWNERS * Discard changes to requirements/pytorch/base.txt * Discard changes to requirements/fabric/base.txt * Discard changes to requirements/app/base.txt * requirements * requirements * one line * app workflow pick only app reqs * rename package * undo * don't use cache * examples CI * pytorch and fabric CI * try remove cache * Apply suggestions from code review * jirka playing * jirka playing * jirka playing * blah * flatten LightningDataset * cleans up dataset class * jirka playing * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * jirka playing * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * extra * fix dataset test * update checkgroups * Luca's review comments * val error fix * unskip test * min * fix precommit warning * cpu * docstrings * req * 2.0.1 * add return type * typing errors * req * return types with quotations * import for type-checking * no botocore in cloudagnostic code * exit args * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * backends typing * remove oldest from data tests * typing * typing * typing * types * type * typing * typing * typing * import fix * Changelog --------- Co-authored-by: Noha Alon <nohaalon@Nohas-MacBook-Air.local> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> Co-authored-by: Jirka <jirka.borovec@seznam.cz> Co-authored-by: Justus Schock <justus.schock@posteo.de>
2023-06-13 10:44:41 +00:00
print(fname)
AssistantCLI._replace_min(fname)
@staticmethod
def copy_replace_imports(
source_dir: str,
source_import: str,
target_import: str,
target_dir: Optional[str] = None,
lightning_by: str = "",
) -> None:
"""Copy package content with import adjustments."""
source_imports = source_import.strip().split(",")
target_imports = target_import.strip().split(",")
copy_replace_imports(
source_dir, source_imports, target_imports, target_dir=target_dir, lightning_by=lightning_by
)
@staticmethod
def pull_docs_files(
gh_user_repo: str,
target_dir: str = "docs/source-pytorch/XXX",
checkout: str = "refs/tags/1.0.0",
source_dir: str = "docs/source",
single_page: Optional[str] = None,
as_orphan: bool = False,
) -> None:
"""Pull docs pages from external source and append to local docs.
Args:
gh_user_repo: standard GitHub user/repo string
target_dir: relative location inside the docs folder
checkout: specific tag or branch to checkout
source_dir: relative location inside the remote / external repo
single_page: copy only single page from the remote repo and name it as the repo name
as_orphan: append orphan statement to the page
"""
import zipfile
zip_url = f"https://github.com/{gh_user_repo}/archive/{checkout}.zip"
with tempfile.TemporaryDirectory() as tmp:
zip_file = os.path.join(tmp, "repo.zip")
try:
urllib.request.urlretrieve(zip_url, zip_file)
except urllib.error.HTTPError:
raise RuntimeError(f"Requesting file '{zip_url}' does not exist or it is just unavailable.")
with zipfile.ZipFile(zip_file, "r") as zip_ref:
zip_ref.extractall(tmp)
zip_dirs = [d for d in glob.glob(os.path.join(tmp, "*")) if os.path.isdir(d)]
# check that the extracted archive has only repo folder
assert len(zip_dirs) == 1
repo_dir = zip_dirs[0]
if single_page: # special case for copying single page
single_page = os.path.join(repo_dir, source_dir, single_page)
assert os.path.isfile(single_page), f"File '{single_page}' does not exist."
name = re.sub(r"lightning[-_]?", "", gh_user_repo.split("/")[-1])
new_rst = os.path.join(_PROJECT_ROOT, target_dir, f"{name}.rst")
AssistantCLI._copy_rst(single_page, new_rst, as_orphan=as_orphan)
return
# continue with copying all pages
ls_pages = glob.glob(os.path.join(repo_dir, source_dir, "*.rst"))
ls_pages += glob.glob(os.path.join(repo_dir, source_dir, "**", "*.rst"))
for rst in ls_pages:
rel_rst = rst.replace(os.path.join(repo_dir, source_dir) + os.path.sep, "")
rel_dir = os.path.dirname(rel_rst)
os.makedirs(os.path.join(_PROJECT_ROOT, target_dir, rel_dir), exist_ok=True)
new_rst = os.path.join(_PROJECT_ROOT, target_dir, rel_rst)
if os.path.isfile(new_rst):
logging.warning(f"Page {new_rst} already exists in the local tree so it will be skipped.")
continue
AssistantCLI._copy_rst(rst, new_rst, as_orphan=as_orphan)
@staticmethod
def _copy_rst(rst_in, rst_out, as_orphan: bool = False):
"""Copy RST page with optional inserting orphan statement."""
with open(rst_in, encoding="utf-8") as fopen:
page = fopen.read()
if as_orphan and ":orphan:" not in page:
page = ":orphan:\n\n" + page
with open(rst_out, "w", encoding="utf-8") as fopen:
fopen.write(page)
@staticmethod
def convert_version2nightly(ver_file: str = "src/version.info") -> None:
"""Load the actual version and convert it to the nightly version."""
from datetime import datetime
with open(ver_file) as fo:
version = fo.read().strip()
# parse X.Y.Z version and prune any suffix
vers = re.match(r"(\d+)\.(\d+)\.(\d+).*", version)
# create timestamp YYYYMMDD
timestamp = datetime.now().strftime("%Y%m%d")
version = f"{'.'.join(vers.groups())}.dev{timestamp}"
with open(ver_file, "w") as fo:
fo.write(version + os.linesep)
@staticmethod
def generate_docker_tags(
release_version: str,
python_version: str,
torch_version: str,
cuda_version: str,
docker_project: str = "pytorchlightning/pytorch_lightning",
add_latest: bool = False,
) -> None:
"""Generate docker tags for the given versions."""
tags = [f"latest-py{python_version}-torch{torch_version}-cuda{cuda_version}"]
if release_version:
tags += [f"{release_version}-py{python_version}-torch{torch_version}-cuda{cuda_version}"]
if add_latest:
tags += ["latest"]
tags = [f"{docker_project}:{tag}" for tag in tags]
print(",".join(tags))
if __name__ == "__main__":
import jsonargparse
jsonargparse.CLI(AssistantCLI, as_positional=False)