Notify the user of ignored requirements (#15799)

This commit is contained in:
Carlos Mocholí 2022-11-24 15:09:25 +01:00 committed by GitHub
parent 3a99a256d3
commit 9e43604df5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 189 additions and 111 deletions

View File

@ -135,7 +135,6 @@ module = [
"lightning_app.utilities.name_generator",
"lightning_app.utilities.network",
"lightning_app.utilities.openapi",
"lightning_app.utilities.packaging.build_config",
"lightning_app.utilities.packaging.cloud_compute",
"lightning_app.utilities.packaging.docker",
"lightning_app.utilities.packaging.lightning_utils",

View File

@ -10,6 +10,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Added the CLI command `lightning run model` to launch a `LightningLite` accelerated script ([#15506](https://github.com/Lightning-AI/lightning/pull/15506))
- Show a message when `BuildConfig(requirements=[...])` is passed but a `requirements.txt` file is already present in the Work ([#15799](https://github.com/Lightning-AI/lightning/pull/15799))
- Show a message when `BuildConfig(dockerfile="...")` is passed but a `Dockerfile` file is already present in the Work ([#15799](https://github.com/Lightning-AI/lightning/pull/15799))
### Changed

View File

@ -1,6 +1,5 @@
import fnmatch
import json
import os
import random
import string
import sys
@ -50,6 +49,7 @@ from lightning_app.core.constants import (
CLOUD_UPLOAD_WARNING,
DEFAULT_NUMBER_OF_EXPOSED_PORTS,
DISABLE_DEPENDENCY_CACHE,
DOT_IGNORE_FILENAME,
ENABLE_APP_COMMENT_COMMAND_EXECUTION,
ENABLE_MULTIPLE_WORKS_IN_DEFAULT_CONTAINER,
ENABLE_MULTIPLE_WORKS_IN_NON_DEFAULT_CONTAINER,
@ -488,12 +488,12 @@ class CloudRuntime(Runtime):
largest_paths = sorted((x for x in path_sizes if x[-1] > 0.01), key=lambda x: x[1], reverse=True)[:25]
largest_paths_msg = "\n".join(f"{round(s, 5)} MB: {p}" for p, s in largest_paths)
warning_msg = (
f"Your application folder {root} is more than {CLOUD_UPLOAD_WARNING} MB. "
f"Found {app_folder_size_in_mb} MB \n"
"Here are the largest files: \n"
f"{largest_paths_msg}"
f"Your application folder '{root.absolute()}' is more than {CLOUD_UPLOAD_WARNING} MB. "
f"The total size is {app_folder_size_in_mb} MB\n"
f"Here are the largest files: \n{largest_paths_msg}\n"
"Perhaps you should try running the app in an empty directory."
)
if not os.path.exists(os.path.join(root, ".lightningignore")):
if not (root / DOT_IGNORE_FILENAME).is_file():
warning_msg = (
warning_msg
+ "\nIn order to ignore some files or folder, "

View File

@ -1,5 +1,6 @@
import fnmatch
import os
from functools import partial
from pathlib import Path
from shutil import copy2, copystat, Error
from typing import Callable, List, Set, Union
@ -58,8 +59,10 @@ def _copytree(
_ignore_filename_spell_check(src)
src = Path(src)
dst = Path(dst)
if src.joinpath(DOT_IGNORE_FILENAME).exists():
ignore_fn = _get_ignore_function(src)
ignore_filepath = src / DOT_IGNORE_FILENAME
if ignore_filepath.is_file():
patterns = _read_lightningignore(ignore_filepath)
ignore_fn = partial(_filter_ignored, src, patterns)
# creating new list so we won't modify the original
ignore_functions = [*ignore_functions, ignore_fn]
@ -108,19 +111,22 @@ def _copytree(
return files_copied
def _get_ignore_function(src: Path) -> Callable:
patterns = _read_lightningignore(src / DOT_IGNORE_FILENAME)
def _filter_ignored(src: Path, patterns: Set[str], current_dir: Path, entries: List[Path]) -> List[Path]:
relative_dir = current_dir.relative_to(src)
names = [str(relative_dir / entry.name) for entry in entries]
ignored_names = set()
for pattern in patterns:
ignored_names.update(fnmatch.filter(names, pattern))
return [entry for entry in entries if str(relative_dir / entry.name) not in ignored_names]
def filter_ignored(current_dir: Path, entries: List[Path]) -> List[Path]:
relative_dir = current_dir.relative_to(src)
names = [str(relative_dir / entry.name) for entry in entries]
ignored_names = []
for pattern in patterns:
ignored_names.extend(fnmatch.filter(names, pattern))
ignored_names_set = set(ignored_names)
return [entry for entry in entries if str(relative_dir / entry.name) not in ignored_names_set]
return filter_ignored
def _parse_lightningignore(lines: List[str]) -> Set[str]:
"""Creates a set that removes empty lines and comments."""
lines = [ln.strip() for ln in lines]
# removes first `/` character for posix and `\\` for windows
lines = [ln.lstrip("/").lstrip("\\") for ln in lines if ln != "" and not ln.startswith("#")]
# convert to path and converting back to string to sanitize the pattern
return {str(Path(ln)) for ln in lines}
def _read_lightningignore(path: Path) -> Set[str]:
@ -137,14 +143,8 @@ def _read_lightningignore(path: Path) -> Set[str]:
Set[str]
Set of unique lines.
"""
raw_lines = [ln.strip() for ln in path.open().readlines()]
# creates a set that removes empty lines and comments
lines = {ln for ln in raw_lines if ln != "" and ln is not None and not ln.startswith("#")}
# removes first `/` character for posix and `\\` for windows
# also converting to path and converting back to string to sanitize the pattern
return {str(Path(ln.lstrip("/").lstrip("\\"))) for ln in lines}
raw_lines = path.open().readlines()
return _parse_lightningignore(raw_lines)
def _ignore_filename_spell_check(src: Path):

View File

@ -1,16 +1,16 @@
import inspect
import os
import re
from dataclasses import asdict, dataclass
from types import FrameType
from typing import cast, List, Optional, TYPE_CHECKING, Union
from dataclasses import asdict, dataclass, field
from pathlib import Path
from typing import Dict, List, Optional, Union
from typing_extensions import Self
import lightning_app as L
from lightning_app.utilities.app_helpers import Logger
from lightning_app.utilities.packaging.cloud_compute import CloudCompute
if TYPE_CHECKING:
from lightning_app import LightningWork
logger = Logger(__name__)
@ -19,11 +19,10 @@ def load_requirements(
) -> List[str]:
"""Load requirements from a file.
.. code-block:: python
path_req = os.path.join(_PROJECT_ROOT, "requirements")
requirements = load_requirements(path_req)
print(requirements) # ['numpy...', 'torch...', ...]
>>> from lightning_app import _PROJECT_ROOT
>>> path_req = os.path.join(_PROJECT_ROOT, "requirements")
>>> load_requirements(path_req, "docs.txt") # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE +SKIP
['sphinx>=4.0', ...] # TODO: remove SKIP, fails on python 3.7
"""
path = os.path.join(path_dir, file_name)
if not os.path.isfile(path):
@ -49,12 +48,19 @@ def load_requirements(
return reqs
@dataclass
class _Dockerfile:
path: str
data: List[str]
@dataclass
class BuildConfig:
"""The Build Configuration describes how the environment a LightningWork runs in should be set up.
Arguments:
requirements: List of requirements or paths to requirement files.
requirements: List of requirements or list of paths to requirement files. If not passed, they will be
automatically extracted from a `requirements.txt` if it exists.
dockerfile: The path to a dockerfile to be used to build your container.
You need to add those lines to ensure your container works in the cloud.
@ -64,20 +70,19 @@ class BuildConfig:
WORKDIR /gridai/project
COPY . .
Learn more by checking out:
https://docs.grid.ai/features/runs/running-experiments-with-a-dockerfile
image: The base image that the work runs on. This should be a publicly accessible image from a registry that
doesn't enforce rate limits (such as DockerHub) to pull this image, otherwise your application will not
start.
"""
requirements: Optional[Union[str, List[str]]] = None
dockerfile: Optional[str] = None
requirements: List[str] = field(default_factory=list)
dockerfile: Optional[Union[str, Path, _Dockerfile]] = None
image: Optional[str] = None
def __post_init__(self):
self._call_dir = os.path.dirname(cast(FrameType, inspect.currentframe()).f_back.f_back.f_code.co_filename)
def __post_init__(self) -> None:
current_frame = inspect.currentframe()
co_filename = current_frame.f_back.f_back.f_code.co_filename # type: ignore[union-attr]
self._call_dir = os.path.dirname(co_filename)
self._prepare_requirements()
self._prepare_dockerfile()
@ -101,76 +106,90 @@ class BuildConfig:
"""
return []
def on_work_init(self, work, cloud_compute: Optional["CloudCompute"] = None):
def on_work_init(self, work: "L.LightningWork", cloud_compute: Optional["CloudCompute"] = None) -> None:
"""Override with your own logic to load the requirements or dockerfile."""
try:
self.requirements = sorted(self.requirements or self._find_requirements(work) or [])
self.dockerfile = self.dockerfile or self._find_dockerfile(work)
except TypeError:
logger.debug("The provided work couldn't be found.")
found_requirements = self._find_requirements(work)
if self.requirements:
if found_requirements and self.requirements != found_requirements:
# notify the user of this silent behaviour
logger.info(
f"A 'requirements.txt' exists with {found_requirements} but {self.requirements} was passed to"
f" the `{type(self).__name__}` in {work.name!r}. The `requirements.txt` file will be ignored."
)
else:
self.requirements = found_requirements
self._prepare_requirements()
def _find_requirements(self, work: "LightningWork") -> List[str]:
found_dockerfile = self._find_dockerfile(work)
if self.dockerfile:
if found_dockerfile and self.dockerfile != found_dockerfile:
# notify the user of this silent behaviour
logger.info(
f"A Dockerfile exists at {found_dockerfile!r} but {self.dockerfile!r} was passed to"
f" the `{type(self).__name__}` in {work.name!r}. {found_dockerfile!r}` will be ignored."
)
else:
self.dockerfile = found_dockerfile
self._prepare_dockerfile()
def _find_requirements(self, work: "L.LightningWork", filename: str = "requirements.txt") -> List[str]:
# 1. Get work file
file = inspect.getfile(work.__class__)
# 2. Try to find a requirement file associated the file.
dirname = os.path.dirname(file) or "."
requirement_files = [os.path.join(dirname, f) for f in os.listdir(dirname) if f == "requirements.txt"]
if not requirement_files:
file = _get_work_file(work)
if file is None:
return []
dirname, basename = os.path.dirname(requirement_files[0]), os.path.basename(requirement_files[0])
# 2. Try to find a requirement file associated the file.
dirname = os.path.dirname(file)
try:
requirements = load_requirements(dirname, basename)
requirements = load_requirements(dirname, filename)
except NotADirectoryError:
requirements = []
return []
return [r for r in requirements if r != "lightning"]
def _find_dockerfile(self, work: "LightningWork") -> List[str]:
def _find_dockerfile(self, work: "L.LightningWork", filename: str = "Dockerfile") -> Optional[str]:
# 1. Get work file
file = inspect.getfile(work.__class__)
# 2. Check for Dockerfile.
dirname = os.path.dirname(file) or "."
dockerfiles = [os.path.join(dirname, f) for f in os.listdir(dirname) if f == "Dockerfile"]
if not dockerfiles:
return []
# 3. Read the dockerfile
with open(dockerfiles[0]) as f:
dockerfile = list(f.readlines())
return dockerfile
def _prepare_requirements(self) -> Optional[Union[str, List[str]]]:
if not self.requirements:
file = _get_work_file(work)
if file is None:
return None
# 2. Check for Dockerfile.
dirname = os.path.dirname(file)
dockerfile = os.path.join(dirname, filename)
if os.path.isfile(dockerfile):
return dockerfile
def _prepare_requirements(self) -> None:
requirements = []
for req in self.requirements:
# 1. Check for relative path
path = os.path.join(self._call_dir, req)
if os.path.exists(path):
if os.path.isfile(path):
try:
requirements.extend(
load_requirements(os.path.dirname(path), os.path.basename(path)),
)
new_requirements = load_requirements(self._call_dir, req)
except NotADirectoryError:
pass
continue
requirements.extend(new_requirements)
else:
requirements.append(req)
self.requirements = requirements
def _prepare_dockerfile(self):
if self.dockerfile:
dockerfile_path = os.path.join(self._call_dir, self.dockerfile)
if os.path.exists(dockerfile_path):
with open(dockerfile_path) as f:
self.dockerfile = list(f.readlines())
def _prepare_dockerfile(self) -> None:
if isinstance(self.dockerfile, (str, Path)):
path = os.path.join(self._call_dir, self.dockerfile)
if os.path.exists(path):
with open(path) as f:
self.dockerfile = _Dockerfile(path, f.readlines())
def to_dict(self):
def to_dict(self) -> Dict:
return {"__build_config__": asdict(self)}
@classmethod
def from_dict(cls, d):
def from_dict(cls, d: Dict) -> Self: # type: ignore[valid-type]
return cls(**d["__build_config__"])
def _get_work_file(work: "L.LightningWork") -> Optional[str]:
cls = work.__class__
try:
return inspect.getfile(cls)
except TypeError:
logger.debug(f"The {cls.__name__} file couldn't be found.")
return None

View File

@ -11,11 +11,9 @@ from tests_app import _PROJECT_ROOT
from lightning_app import LightningApp
from lightning_app.cli.lightning_cli import _run_app, run_app
from lightning_app.runners.runtime_type import RuntimeType
from lightning_app.testing.helpers import _RunIf
from lightning_app.utilities.app_helpers import convert_print_to_logger_info
@_RunIf(skip_linux=True)
@mock.patch("click.launch")
@pytest.mark.parametrize("open_ui", (True, False))
def test_lightning_run_app(lauch_mock: mock.MagicMock, open_ui, caplog, monkeypatch):
@ -51,7 +49,7 @@ def test_lightning_run_app(lauch_mock: mock.MagicMock, open_ui, caplog, monkeypa
else:
lauch_mock.assert_not_called()
assert result.exit_code == 0
assert len(caplog.messages) == 2
assert len(caplog.messages) == 4
assert bool(int(caplog.messages[0])) is open_ui

View File

@ -73,3 +73,30 @@ def another_tmpdir(tmp_path: Path) -> py.path.local:
random_dir = datetime.now().strftime("%m-%d-%Y-%H-%M-%S")
tmp_path = os.path.join(tmp_path, random_dir)
return py.path.local(tmp_path)
@pytest.fixture
def caplog(caplog):
"""Workaround for https://github.com/pytest-dev/pytest/issues/3697.
Setting ``filterwarnings`` with pytest breaks ``caplog`` when ``not logger.propagate``.
"""
import logging
root_logger = logging.getLogger()
root_propagate = root_logger.propagate
root_logger.propagate = True
propagation_dict = {
name: logging.getLogger(name).propagate
for name in logging.root.manager.loggerDict
if name.startswith("lightning_app")
}
for name in propagation_dict.keys():
logging.getLogger(name).propagate = True
yield caplog
root_logger.propagate = root_propagate
for name, propagate in propagation_dict.items():
logging.getLogger(name).propagate = propagate

View File

@ -974,11 +974,16 @@ class NonUpdatedLightningTestApp(LightningTestApp):
def test_non_updated_flow(caplog):
"""This tests validate the app can run 3 times and call the flow only once."""
"""Validate that the app can run 3 times and calls the flow only once."""
app = NonUpdatedLightningTestApp(FlowUpdated())
runtime = MultiProcessRuntime(app, start_server=False)
with caplog.at_level(logging.INFO):
app = NonUpdatedLightningTestApp(FlowUpdated())
MultiProcessRuntime(app, start_server=False).dispatch()
assert caplog.messages == ["Hello World"]
runtime.dispatch()
assert caplog.messages == [
"Hello World",
"Your Lightning App is being stopped. This won't take long.",
"Your Lightning App has been stopped successfully!",
]
assert app.counter == 3

View File

@ -1198,13 +1198,17 @@ def test_check_uploaded_folder(monkeypatch, tmpdir, caplog):
assert caplog.messages == []
mock = MagicMock()
mock.st_mode = 33188
mock.st_size = 5 * 1000 * 1000
repo.files = [str(Path("./a.png"))]
monkeypatch.setattr(Path, "stat", MagicMock(return_value=mock))
path = Path(".")
with caplog.at_level(logging.WARN):
backend._check_uploaded_folder(Path("."), repo)
assert caplog.messages[0].startswith("Your application folder . is more than 2 MB. Found 5.0 MB")
backend._check_uploaded_folder(path, repo)
assert caplog.messages[0].startswith(
f"Your application folder '{path.absolute()}' is more than 2 MB. The total size is 5.0 MB"
)
@mock.patch("lightning_app.core.queues.QueuingSystem", MagicMock())

View File

@ -1,5 +1,7 @@
import logging
import os
import sys
from unittest.mock import Mock
from tests_app import _TESTS_ROOT
@ -48,14 +50,14 @@ def test_build_config_dockerfile_provided():
spec = BuildConfig(dockerfile="./projects/Dockerfile.cpu")
assert not spec.requirements
# ugly hack due to replacing `pytorch_lightning string
assert "pytorchlightning/pytorch_" + "lightning" in spec.dockerfile[0]
assert "pytorchlightning/pytorch_" + "lightning" in spec.dockerfile.data[0]
class DockerfileLightningTestApp(LightningTestApp):
def on_after_run_once(self):
print(self.root.work.local_build_config.dockerfile)
# ugly hack due to replacing `pytorch_lightning string
assert "pytorchlightning/pytorch_" + "lightning" in self.root.work.local_build_config.dockerfile[0]
assert "pytorchlightning/pytorch_" + "lightning" in self.root.work.local_build_config.dockerfile.data[0]
return super().on_after_run_once()
@ -79,3 +81,25 @@ def test_build_config_requirements():
command_line = [os.path.join(_TESTS_ROOT, "utilities/packaging/projects/req/app.py")]
application_testing(RequirementsLightningTestApp, command_line + EXTRAS_ARGS)
sys.path = sys.path[:-1]
def test_build_config_requirements_warns(monkeypatch, caplog):
requirements = ["foo", "bar"]
bc = BuildConfig(requirements=requirements)
monkeypatch.setattr(bc, "_find_requirements", lambda *_, **__: ["baz"])
work = Mock()
with caplog.at_level(logging.INFO):
bc.on_work_init(work)
assert "requirements.txt' exists with ['baz'] but ['foo', 'bar']" in caplog.text
assert bc.requirements == requirements # they are not merged or replaced
def test_build_config_dockerfile_warns(monkeypatch, caplog):
dockerfile = "foo"
bc = BuildConfig(dockerfile=dockerfile)
monkeypatch.setattr(bc, "_find_dockerfile", lambda *_, **__: "bar")
work = Mock()
with caplog.at_level(logging.INFO):
bc.on_work_init(work)
assert "exists at 'bar' but 'foo' was passed" in caplog.text
assert bc.dockerfile == dockerfile # they are not merged or replaced

View File

@ -37,8 +37,8 @@ def test_extract_metadata_from_component():
"cls_name": "WorkA",
"module": "__main__",
"docstring": "WorkA.",
"local_build_config": {"__build_config__": {"requirements": [], "dockerfile": [], "image": None}},
"cloud_build_config": {"__build_config__": {"requirements": [], "dockerfile": [], "image": None}},
"local_build_config": {"__build_config__": {"requirements": [], "dockerfile": None, "image": None}},
"cloud_build_config": {"__build_config__": {"requirements": [], "dockerfile": None, "image": None}},
"cloud_compute": {
"type": "__cloud_compute__",
"name": "default",
@ -60,8 +60,8 @@ def test_extract_metadata_from_component():
"cls_name": "WorkA",
"module": "__main__",
"docstring": "WorkA.",
"local_build_config": {"__build_config__": {"requirements": [], "dockerfile": [], "image": None}},
"cloud_build_config": {"__build_config__": {"requirements": [], "dockerfile": [], "image": None}},
"local_build_config": {"__build_config__": {"requirements": [], "dockerfile": None, "image": None}},
"cloud_build_config": {"__build_config__": {"requirements": [], "dockerfile": None, "image": None}},
"cloud_compute": {
"type": "__cloud_compute__",
"name": "default",
@ -78,8 +78,8 @@ def test_extract_metadata_from_component():
"cls_name": "WorkB",
"module": "__main__",
"docstring": "WorkB.",
"local_build_config": {"__build_config__": {"requirements": [], "dockerfile": [], "image": None}},
"cloud_build_config": {"__build_config__": {"requirements": [], "dockerfile": [], "image": None}},
"local_build_config": {"__build_config__": {"requirements": [], "dockerfile": None, "image": None}},
"cloud_build_config": {"__build_config__": {"requirements": [], "dockerfile": None, "image": None}},
"cloud_compute": {
"type": "__cloud_compute__",
"name": "gpu",