Revert "Update Lightning App docs (#13537)" (#13655)

This reverts commit cd31ba3f87.
This commit is contained in:
Carlos Mocholí 2022-07-14 15:45:46 +02:00 committed by GitHub
parent cd31ba3f87
commit eec862ef2f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
214 changed files with 2557 additions and 7756 deletions

View File

@ -1,34 +0,0 @@
import os
from lightning_cloud.openapi.rest import ApiException
from lightning_app.utilities.cloud import _get_project
from lightning_app.utilities.network import LightningClient
client = LightningClient()
try:
PR_NUMBER = int(os.getenv("PR_NUMBER", None))
except (TypeError, ValueError):
# Failed when the PR is running master or 'PR_NUMBER' isn't defined.
PR_NUMBER = ""
APP_NAME = os.getenv("TEST_APP_NAME", "")
project = _get_project(client)
list_lightningapps = client.lightningapp_instance_service_list_lightningapp_instances(project.project_id)
print([lightningapp.name for lightningapp in list_lightningapps.lightningapps])
for lightningapp in list_lightningapps.lightningapps:
if PR_NUMBER and APP_NAME and not lightningapp.name.startswith(f"test-{PR_NUMBER}-{APP_NAME}-"):
continue
print(f"Deleting {lightningapp.name}")
try:
res = client.lightningapp_instance_service_delete_lightningapp_instance(
project_id=project.project_id,
id=lightningapp.id,
)
assert res == {}
except ApiException as e:
print(f"Failed to delete {lightningapp.name}. Exception {e}")

View File

@ -1,5 +0,0 @@
import lightning_app
from lightning_app.utilities.packaging.lightning_utils import download_frontend
if __name__ == "__main__":
download_frontend(lightning_app._PROJECT_ROOT)

View File

@ -12,7 +12,6 @@ trigger:
- "master"
- "release/*"
- "refs/tags/*"
pr:
- "master"
- "release/*"
@ -38,19 +37,6 @@ jobs:
steps:
- bash: |
CHANGED_FILES=$(git diff --name-status master | awk '{print $2}')
echo $CHANGED_FILES > changed_files.txt
MATCHES=$(cat changed_files.txt | grep -E $FILTER)
echo $MATCHES
if [ -z "$MATCHES" ]; then
echo "Skip"
else
echo "Continue"
fi
displayName: Decide if skipping should be done.
- bash: |
lspci | egrep 'VGA|3D'
whereis nvidia

View File

@ -9,7 +9,6 @@ trigger:
- "master"
- "release/*"
- "refs/tags/*"
pr:
- "master"
- "release/*"

View File

@ -7,7 +7,6 @@ trigger:
- master
- release/*
- refs/tags/*
pr:
- master
- release/*

View File

@ -14,7 +14,6 @@ trigger:
- "master"
- "release/*"
- "refs/tags/*"
pr:
- "master"
- "release/*"

View File

@ -13,9 +13,12 @@ jobs:
- name: Get changed files using defaults
id: changed-files
uses: tj-actions/changed-files@v23
- name: List all added files
run: |
for file in ${{ steps.changed-files.outputs.all_changed_and_modified_files }}; do
echo "$file"
done
- name: Block edits in docs/source-app
if: contains(steps.changed-files.outputs.all_changed_and_modified_files, 'docs/source-app')
run: exit 1

View File

@ -6,6 +6,11 @@ on: # Trigger the workflow on push or pull request, but only for the master bra
branches: [master, "release/*"]
pull_request:
branches: [master, "release/*"]
paths-ignore:
- "src/lightning_app/**" # todo: implement job skip
- "tests/tests_app/**" # todo: implement job skip
- "tests/tests_app_examples/**" # todo: implement job skip
- "examples/app_*" # todo: implement job skip
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}
@ -29,34 +34,13 @@ jobs:
- {python-version: "3.9", pytorch-version: "1.11"}
timeout-minutes: 30
steps:
- name: Workaround for https://github.com/actions/checkout/issues/760
run: git config --global --add safe.directory /__w/lightning/lightning
- uses: actions/checkout@v2
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v23.1
- name: Decide if the test should be skipped
id: skip
run: |
FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_'
echo "${{ steps.changed-files.outputs.all_changed_files }}" | tr " " "\n" > changed_files.txt
MATCHES=$(cat changed_files.txt | grep -E $FILTER)
echo $MATCHES
if [ -z "$MATCHES" ]; then
echo "Skip"
echo "::set-output name=continue::'0'"
else
echo "Continue"
echo "::set-output name=continue::'1'"
fi
- name: Update base dependencies
if: ${{ (steps.skip.outputs.continue == '1') }}
env:
PACKAGE_NAME: pytorch
FREEZE_REQUIREMENTS: 1
@ -66,12 +50,10 @@ jobs:
pip install -e .[test]
- name: DocTests
if: ${{ (steps.skip.outputs.continue == '1') }}
working-directory: ./src
run: pytest pytorch_lightning --cov=pytorch_lightning
- name: Update all dependencies
if: ${{ (steps.skip.outputs.continue == '1') }}
env:
HOROVOD_BUILD_ARCH_FLAGS: "-mfma"
HOROVOD_WITHOUT_MXNET: 1
@ -90,11 +72,9 @@ jobs:
python requirements/pytorch/check-avail-extras.py
- name: Pull legacy checkpoints
if: ${{ (steps.skip.outputs.continue == '1') }}
run: bash .actions/pull_legacy_checkpoints.sh
- name: Testing PyTorch
if: ${{ (steps.skip.outputs.continue == '1') }}
working-directory: tests/tests_pytorch
run: coverage run --source pytorch_lightning -m pytest -v --timeout 150 --durations=50 --junitxml=results-${{ runner.os }}-torch${{ matrix.pytorch-version }}.xml
@ -106,7 +86,7 @@ jobs:
if: failure()
- name: Statistics
if: ${{ success() && (steps.skip.outputs.continue == '1') }}
if: success()
working-directory: tests/tests_pytorch
run: |
coverage report
@ -114,7 +94,7 @@ jobs:
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
if: ${{ success() && (steps.skip.outputs.continue == '1') }}
if: always()
# see: https://github.com/actions/toolkit/issues/399
continue-on-error: true
with:

View File

@ -1,4 +1,4 @@
name: Test PyTorch full
name: Test PyTorch full
# see: https://help.github.com/en/actions/reference/events-that-trigger-workflows
on: # Trigger the workflow on push or pull request, but only for the master branch
@ -7,6 +7,11 @@ on: # Trigger the workflow on push or pull request, but only for the master bra
pull_request:
branches: [master, "release/*"]
types: [opened, reopened, ready_for_review, synchronize]
paths-ignore:
- "src/lightning_app/**" # todo: implement job skip
- "tests/tests_app/**" # todo: implement job skip
- "tests/tests_app_examples/**" # todo: implement job skip
- "examples/app_*" # todo: implement job skip
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}
@ -32,67 +37,40 @@ jobs:
steps:
- uses: actions/checkout@v2
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v23.1
- name: Decide if the test should be skipped
id: skip
run: |
FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_'
echo "${{ steps.changed-files.outputs.all_changed_files }}" | tr " " "\n" > changed_files.txt
MATCHES=$(cat changed_files.txt | grep -E $FILTER)
echo $MATCHES
if [ -z "$MATCHES" ]; then
echo "Skip"
echo "::set-output name=continue::'0'"
else
echo "Continue"
echo "::set-output name=continue::'1'"
fi
- name: Set up Python ${{ matrix.python-version }}
if: ${{ (steps.skip.outputs.continue == '1') }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Reset caching
if: ${{ (steps.skip.outputs.continue == '1') }}
run: python -c "import time; days = time.time() / 60 / 60 / 24; print(f'TIME_PERIOD=d{int(days / 2) * 2}')" >> $GITHUB_ENV
- name: basic setup
if: ${{ (steps.skip.outputs.continue == '1') }}
run: |
pip --version
pip install -q fire
# Github Actions: Run step on specific OS: https://stackoverflow.com/a/57948488/4521646
- name: Setup macOS
if: ${{ (runner.os == 'macOS') && (steps.skip.outputs.continue == '1') }}
if: runner.os == 'macOS'
run: |
brew install openmpi libuv # Horovod on macOS requires OpenMPI, Gloo not currently supported
- name: Setup Windows
if: ${{ (runner.os == 'windows') && (steps.skip.outputs.continue == '1') }}
if: runner.os == 'windows'
run: |
python .actions/assistant.py requirements_prune_pkgs horovod
- name: Set min. dependencies
if: ${{ (matrix.requires == 'oldest') && (steps.skip.outputs.continue == '1') }}
if: matrix.requires == 'oldest'
run: |
python .actions/assistant.py replace_oldest_ver
# Note: This uses an internal pip API and may not always work
# https://github.com/actions/cache/blob/master/examples.md#multiple-oss-in-a-workflow
- name: Get pip cache dir
if: ${{ (steps.skip.outputs.continue == '1') }}
id: pip-cache
run: echo "::set-output name=dir::$(pip cache dir)"
- name: pip cache
if: ${{ (steps.skip.outputs.continue == '1') }}
uses: actions/cache@v2
with:
path: ${{ steps.pip-cache.outputs.dir }}
@ -101,11 +79,9 @@ jobs:
${{ runner.os }}-pip-td${{ env.TIME_PERIOD }}-py${{ matrix.python-version }}-${{ matrix.release }}-${{ matrix.requires }}-
- name: Pull legacy checkpoints
if: ${{ (steps.skip.outputs.continue == '1') }}
run: bash .actions/pull_legacy_checkpoints.sh
- name: Install dependencies
if: ${{ (steps.skip.outputs.continue == '1') }}
env:
PACKAGE_NAME: pytorch
FREEZE_REQUIREMENTS: 1
@ -117,12 +93,10 @@ jobs:
shell: bash
- name: DocTests
if: ${{ (steps.skip.outputs.continue == '1') }}
working-directory: ./src
run: pytest pytorch_lightning --cov=pytorch_lightning
- name: Install extra dependencies
if: ${{ (steps.skip.outputs.continue == '1') }}
run: |
# adjust versions according installed Torch version
python ./requirements/pytorch/adjust-versions.py requirements/pytorch/extra.txt
@ -131,7 +105,7 @@ jobs:
shell: bash
- name: Reinstall Horovod if necessary
if: ${{ (runner.os != 'windows') && (steps.skip.outputs.continue == '1') }}
if: runner.os != 'windows'
env:
HOROVOD_BUILD_ARCH_FLAGS: "-mfma"
HOROVOD_WITHOUT_MXNET: 1
@ -148,43 +122,38 @@ jobs:
shell: bash
- name: Cache datasets
if: ${{ (steps.skip.outputs.continue == '1') }}
uses: actions/cache@v2
with:
path: Datasets
key: pl-dataset
- name: Sanity check
if: ${{ (steps.skip.outputs.continue == '1') }}
run: python requirements/pytorch/check-avail-extras.py
- name: Testing PyTorch
if: ${{ (steps.skip.outputs.continue == '1') }}
working-directory: tests/tests_pytorch
# NOTE: do not include coverage report here, see: https://github.com/nedbat/coveragepy/issues/1003
run: coverage run --source pytorch_lightning -m pytest -v --durations=50 --junitxml=results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }}.xml
- name: Upload pytest results
if: ${{ (failure()) && (steps.skip.outputs.continue == '1') }}
uses: actions/upload-artifact@v3
with:
name: unittest-results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }}
path: tests/tests_pytorch/results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }}.xml
if: failure()
- name: Prepare Examples
if: ${{ (steps.skip.outputs.continue == '1') }}
run: |
# adjust versions according installed Torch version
python ./requirements/pytorch/adjust-versions.py requirements/pytorch/examples.txt
pip install -r requirements/pytorch/examples.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --upgrade
- name: Run Examples
if: ${{ (steps.skip.outputs.continue == '1') }}
working-directory: ./examples
run: python -m pytest test_pl_examples.py -v --durations=10
- name: Statistics
if: ${{ (success()) && (steps.skip.outputs.continue == '1') }}
if: success()
working-directory: tests/tests_pytorch
run: |
coverage report
@ -192,7 +161,7 @@ jobs:
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
if: ${{ (always()) && (steps.skip.outputs.continue == '1') }}
if: always()
# see: https://github.com/actions/toolkit/issues/399
continue-on-error: true
with:

View File

@ -7,6 +7,11 @@ on: # Trigger the workflow on push or pull request, but only for the master bra
pull_request:
branches: [master, "release/*"]
types: [opened, reopened, ready_for_review, synchronize]
paths-ignore:
- "src/lightning_app/**" # todo: implement job skip
- "tests/tests_app/**" # todo: implement job skip
- "tests/tests_app_examples/**" # todo: implement job skip
- "examples/app_*" # todo: implement job skip
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}
@ -27,43 +32,19 @@ jobs:
timeout-minutes: 20
steps:
- uses: actions/checkout@v2
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v23.1
- name: Decide if the test should be skipped
id: skip
run: |
FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_'
echo "${{ steps.changed-files.outputs.all_changed_files }}" | tr " " "\n" > changed_files.txt
MATCHES=$(cat changed_files.txt | grep -E $FILTER)
echo $MATCHES
if [ -z "$MATCHES" ]; then
echo "Skip"
echo "::set-output name=continue::'0'"
else
echo "Continue"
echo "::set-output name=continue::'1'"
fi
- uses: actions/setup-python@v2
if: ${{ (steps.skip.outputs.continue == '1') }}
with:
python-version: ${{ matrix.python-version }}
- name: Reset caching
if: ${{ (steps.skip.outputs.continue == '1') }}
run: python -c "import time; days = time.time() / 60 / 60 / 24; print(f'TIME_PERIOD=d{int(days / 2) * 2}')" >> $GITHUB_ENV
- name: Get pip cache
if: ${{ (steps.skip.outputs.continue == '1') }}
id: pip-cache
run: |
python -c "from pip._internal.locations import USER_CACHE_DIR; print('::set-output name=dir::' + USER_CACHE_DIR)"
- name: Cache pip
if: ${{ (steps.skip.outputs.continue == '1') }}
uses: actions/cache@v2
with:
path: ${{ steps.pip-cache.outputs.dir }}
@ -72,7 +53,6 @@ jobs:
${{ runner.os }}-pip-td${{ env.TIME_PERIOD }}-py${{ matrix.python-version }}-
- name: Install dependencies
if: ${{ (steps.skip.outputs.continue == '1') }}
env:
PACKAGE_NAME: pytorch
FREEZE_REQUIREMENTS: 1
@ -84,21 +64,20 @@ jobs:
shell: bash
- name: Testing PyTorch
if: ${{ (steps.skip.outputs.continue == '1') }}
working-directory: tests/tests_pytorch
run: coverage run --source pytorch_lightning -m pytest -v --junitxml=results-${{ runner.os }}-py${{ matrix.python-version }}.xml
env:
PL_RUN_SLOW_TESTS: 1
- name: Upload pytest test results
if: ${{ (failure()) && (steps.skip.outputs.continue == '1') }}
uses: actions/upload-artifact@v3
with:
name: unittest-results-${{ runner.os }}-py${{ matrix.python-version }}
path: tests/tests_pytorch/results-${{ runner.os }}-py${{ matrix.python-version }}.xml
if: failure()
- name: Statistics
if: ${{ (success()) && (steps.skip.outputs.continue == '1') }}
if: success()
working-directory: tests/tests_pytorch
run: |
coverage report
@ -106,7 +85,7 @@ jobs:
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
if: ${{ (success()) && (steps.skip.outputs.continue == '1') }}
if: success()
# see: https://github.com/actions/toolkit/issues/399
continue-on-error: true
with:

View File

@ -14,10 +14,6 @@ concurrency:
jobs:
doctest:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
pkg: ["app", "pytorch"]
steps:
- uses: actions/checkout@v2
with:
@ -35,12 +31,13 @@ jobs:
uses: actions/cache@v2
with:
path: ~/.cache/pip
key: ${{ runner.os }}-docs-test-pip-td${{ env.TIME_PERIOD }}-${{ hashFiles('requirements/${{ matrix.pkg }}/*.txt') }}
key: ${{ runner.os }}-docs-test-pip-td${{ env.TIME_PERIOD }}-${{ hashFiles('requirements/pytorch/*.txt') }}
restore-keys: |
${{ runner.os }}-docs-test-pip-td${{ env.TIME_PERIOD }}-
- name: Install dependencies
env:
PACKAGE_NAME: pytorch
FREEZE_REQUIREMENTS: 1
run: |
sudo apt-get update
@ -48,27 +45,22 @@ jobs:
pip --version
pip install -q fire
# python -m pip install --upgrade --user pip
pip install -e . --quiet -r requirements/${{ matrix.pkg }}/base.txt -r requirements/${{ matrix.pkg }}/docs.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
pip install -r requirements/${{ matrix.pkg }}/devel.txt
pip install -e . --quiet -r requirements/pytorch/docs.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
pip install -r requirements/pytorch/devel.txt
pip list
shell: bash
- name: Test Documentation
env:
SPHINX_MOCK_REQUIREMENTS: 0
working-directory: ./docs/source-${{ matrix.pkg }}
working-directory: ./docs
run: |
# ToDo: proper parametrize
# First run the same pipeline as Read-The-Docs
make doctest
make coverage
make-docs:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
pkg: ["app", "pytorch"]
steps:
- uses: actions/checkout@v2
with:
@ -84,27 +76,27 @@ jobs:
uses: actions/cache@v2
with:
path: ~/.cache/pip
key: ${{ runner.os }}-docs-make-pip-${{ hashFiles('requirements/${{ matrix.pkg }}/*.txt') }}
key: ${{ runner.os }}-docs-make-pip-${{ hashFiles('requirements/pytorch/base.txt') }}
restore-keys: |
${{ runner.os }}-docs-make-pip-
- name: Install dependencies
env:
PACKAGE_NAME: pytorch
FREEZE_REQUIREMENTS: 1
run: |
sudo apt-get update
sudo apt-get install -y cmake pandoc
pip --version
pip install -e . --quiet -r requirements/${{ matrix.pkg }}/base.txt -r requirements/${{ matrix.pkg }}/docs.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
pip install -e . --quiet -r requirements/pytorch/docs.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
# install Texlive, see https://linuxconfig.org/how-to-install-latex-on-ubuntu-20-04-focal-fossa-linux
sudo apt-get update && sudo apt-get install -y texlive-latex-extra dvipng texlive-pictures
pip list
shell: bash
- name: Make Documentation
working-directory: ./docs/source-${{ matrix.pkg }}
working-directory: ./docs
run: |
# ToDo: rather use python cmd
# First run the same pipeline as Read-The-Docs
make html --debug --jobs $(nproc) SPHINXOPTS="-W --keep-going"

View File

@ -1,75 +0,0 @@
name: "Deploy Docs"
on:
push:
branches: [master]
jobs:
# https://github.com/marketplace/actions/deploy-to-github-pages
build-docs-deploy:
runs-on: ubuntu-20.04
steps:
- name: Checkout 🛎️
uses: actions/checkout@v2
# If you're using actions/checkout@v2 you must set persist-credentials to false in most cases for the deployment to work correctly.
with:
persist-credentials: false
- uses: actions/setup-python@v2
with:
python-version: 3.8
- id: 'auth'
name: 'Authenticate to Google Cloud'
uses: 'google-github-actions/auth@v0'
with:
credentials_json: ${{ secrets.GCS_SA_KEY }}
- name: Setup gcloud
uses: 'google-github-actions/setup-gcloud@v0'
with:
project_id: ${{ secrets.GCS_PROJECT }}
# Note: This uses an internal pip API and may not always work
# https://github.com/actions/cache/blob/master/examples.md#multiple-oss-in-a-workflow
- name: Cache pip
uses: actions/cache@v2
with:
path: ~/.cache/pip
key: ${{ runner.os }}-deploy-docs-pip-${{ hashFiles('requirements/app/*.txt') }}
restore-keys: |
${{ runner.os }}-deploy-docs-pip-
- name: Install dependencies
env:
FREEZE_REQUIREMENTS: 1
run: |
sudo apt-get update
sudo apt-get install -y cmake pandoc
pip --version
pip install -e . --quiet -r requirements/app/docs.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
sudo apt-get update && sudo apt-get install -y texlive-latex-extra dvipng texlive-pictures
pip list
shell: bash
- name: Make Documentation
working-directory: ./docs/source-app
run: |
# First run the same pipeline as Read-The-Docs
make clean
make html --jobs 2
- name: Deploy 🚀
uses: JamesIves/github-pages-deploy-action@4.1.4
with:
token: ${{ secrets.GITHUB_TOKEN }}
branch: gh-pages # The branch the action should deploy to.
folder: docs/build/html # The folder the action should deploy.
clean: true # Automatically remove deleted files from the deploy branch
target-folder: docs # If you'd like to push the contents of the deployment folder into a specific directory
single-commit: true # you'd prefer to have a single commit on the deployment branch instead of full history
if: success()
# Uploading docs to GCS so they can be served on lightning.ai
- name: Upload to GCS 🪣
run: |-
gsutil -m rsync -d -R docs/build/html/ gs://${{ secrets.GCS_BUCKET }}
if: success()

View File

@ -77,7 +77,6 @@ repos:
hooks:
- id: black
name: Format code
exclude: docs/source-app
- repo: https://github.com/asottile/blacken-docs
rev: v1.12.1
@ -85,7 +84,6 @@ repos:
- id: blacken-docs
args: [--line-length=120]
additional_dependencies: [black==21.12b0]
exclude: docs/source-app
- repo: https://github.com/executablebooks/mdformat
rev: 0.7.14
@ -102,4 +100,3 @@ repos:
hooks:
- id: flake8
name: Check PEP8
exclude: docs/source-app

View File

@ -4,8 +4,8 @@
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
SOURCEDIR = .
BUILDDIR = ../build
SOURCEDIR = source-pytorch
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:

View File

@ -7,8 +7,8 @@ REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=../build
set SOURCEDIR=source-pytorch
set BUILDDIR=build
if "%1" == "" goto help

View File

@ -1,19 +0,0 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
SOURCEDIR = .
BUILDDIR = ../build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

View File

@ -7,3 +7,7 @@
.. autoclass:: {{ name }}
:members:
..
autogenerated from source/_templates/classtemplate.rst
note it does not have :inherited-members:

View File

@ -1,12 +0,0 @@
:orphan:
.. role:: hidden
:class: hidden-section
.. currentmodule:: {{ module }}
{{ name | underline }}
.. autoclass:: {{ name }}
:members:
:noindex:

View File

@ -1,10 +0,0 @@
{% extends "!layout.html" %}
<link rel="canonical" href="{{ theme_canonical_url }}{{ pagename }}.html" />
{% block footer %}
{{ super() }}
<script script type="text/javascript">
var collapsedSections = ['App Building Skills', 'Practical Examples', 'Common Workflows', 'Core API Reference', 'Addons API Reference', 'Glossary'];
</script>
{% endblock %}

View File

@ -1,8 +1,8 @@
{%- set external_urls = {
'github': 'https://github.com/Lightning-AI/lightning',
'github_issues': 'https://github.com/Lightning-AI/lightning/issues',
'contributing': 'https://github.com/Lightning-AI/lightning/blob/master/.github/CONTRIBUTING.md',
'governance': 'https://github.com/Lightning-AI/lightning/blob/master/docs/source-pytorch/governance.rst',
'github': 'https://github.com/PytorchLightning/lightning',
'github_issues': 'https://github.com/PytorchLightning/lightning/issues',
'contributing': 'https://github.com/PytorchLightning/pytorch-lightning/blob/master/CONTRIBUTING.md',
'governance': 'https://github.com/PytorchLightning/pytorch-lightning/blob/master/governance.md',
'docs': 'https://lightning.rtfd.io/en/latest',
'twitter': 'https://twitter.com/PyTorchLightnin',
'discuss': 'https://pytorch-lightning.slack.com',

View File

@ -1,90 +0,0 @@
:orphan:
##############################
Lightning App - API References
##############################
Core
----
.. currentmodule:: lightning_app.core
.. autosummary::
:toctree: api
:nosignatures:
:template: classtemplate_no_index.rst
LightningApp
LightningFlow
LightningWork
Learn more about :ref:`Lightning Core <core_api>`.
----
Built-in Components
___________________
.. currentmodule:: lightning_app.components
.. autosummary::
:toctree: generated/
:nosignatures:
:template: classtemplate_no_index.rst
~serve.serve.ModelInferenceAPI
~python.popen.PopenPythonScript
~serve.gradio.ServeGradio
~python.tracer.TracerPythonScript
----
Frontend's
__________
.. currentmodule:: lightning_app.frontend
.. autosummary::
:toctree: generated/
:nosignatures:
:template: classtemplate_no_index.rst
~frontend.Frontend
~web.StaticWebFrontend
~stream_lit.StreamlitFrontend
Learn more about :ref:`Frontend's <ui_and_frontends>`.
----
Storage
_______
.. currentmodule:: lightning_app.storage
.. autosummary::
:toctree: generated/
:nosignatures:
:template: classtemplate_no_index.rst
~drive.Drive
~path.Path
~payload.Payload
Learn more about :ref:`Storage <storage>`.
----
Runners
_______
.. currentmodule:: lightning_app.runners
.. autosummary::
:toctree: generated/
:nosignatures:
:template: classtemplate_no_index.rst
~cloud.CloudRuntime
~multiprocess.MultiProcessRuntime
~singleprocess.SingleProcessRuntime

View File

@ -1,7 +1,7 @@
import lightning as L
from lightning_app import LightningWork
class ExampleWork(L.LightningWork):
class ExampleWork(LightningWork):
def run(self, *args, **kwargs):
print(f"I received the following props: args: {args} kwargs: {kwargs}")

View File

@ -1,7 +1,7 @@
import lightning as L
from lightning_app import LightningWork
class ExampleWork(L.LightningWork):
class ExampleWork(LightningWork):
def __init__(self):
super().__init__(cache_calls=False)

View File

@ -1,17 +0,0 @@
import lightning as L
from lightning.app.components.python import TracerPythonScript
class RootFlow(L.LightningFlow):
def __init__(self):
super().__init__()
self.runner = TracerPythonScript(
"train.py",
cloud_compute=L.CloudCompute("gpu"),
)
def run(self):
self.runner.run()
app = L.LightningApp(RootFlow())

View File

@ -1,3 +0,0 @@
torch
torchvision
pytorch_lightning

View File

@ -1,46 +0,0 @@
import os
import torch
import torch.nn.functional as F
from torch import nn
from torch.utils.data import DataLoader, random_split
from torchvision import transforms as T
from torchvision.datasets import MNIST
import pytorch_lightning as pl
class LitAutoEncoder(pl.LightningModule):
def __init__(self):
super().__init__()
self.encoder = nn.Sequential(nn.Linear(28 * 28, 128), nn.ReLU(), nn.Linear(128, 3))
self.decoder = nn.Sequential(nn.Linear(3, 128), nn.ReLU(), nn.Linear(128, 28 * 28))
def forward(self, x):
# in lightning,
# forward defines the prediction/inference actions
embedding = self.encoder(x)
return embedding
def training_step(self, batch, batch_idx):
# training_step defines the train loop.
# It is independent of forward
x, y = batch
x = x.view(x.size(0), -1)
z = self.encoder(x)
x_hat = self.decoder(z)
loss = F.mse_loss(x_hat, x)
self.log("train_loss", loss)
return loss
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
return optimizer
dataset = MNIST(os.getcwd(), download=True, transform=T.ToTensor())
train, val = random_split(dataset, [55000, 5000])
autoencoder = LitAutoEncoder()
trainer = pl.Trainer(accelerator="auto")
trainer.fit(autoencoder, DataLoader(train), DataLoader(val))

View File

@ -1,10 +1,10 @@
from docs.quickstart.app_02 import HourLongWork
import lightning as L
from lightning_app import LightningApp, LightningFlow, LightningWork
class RootFlow(L.LightningFlow):
def __init__(self, child_work_1: L.LightningWork, child_work_2: L.LightningWork):
class RootFlow(LightningFlow):
def __init__(self, child_work_1: LightningWork, child_work_2: LightningWork):
super().__init__()
self.child_work_1 = child_work_1
self.child_work_2 = child_work_2
@ -19,4 +19,4 @@ class RootFlow(L.LightningFlow):
print("1 hour later `child_work_2` started!")
app = L.LightningApp(RootFlow(HourLongWork(parallel=True), HourLongWork(parallel=True)))
app = LightningApp(RootFlow(HourLongWork(parallel=True), HourLongWork(parallel=True)))

View File

@ -2,14 +2,14 @@ import flash
from flash.core.data.utils import download_data
from flash.image import ImageClassificationData, ImageClassifier
import lightning as L
from lightning_app import CloudCompute, LightningApp, LightningFlow, LightningWork
from pytorch_lightning.callbacks import ModelCheckpoint
# Step 1: Create a training LightningWork component that gets a backbone as input
# and saves the best model and its score
class ImageClassifierTrainWork(L.LightningWork):
def __init__(self, max_epochs: int, backbone: str, cloud_compute: L.CloudCompute):
class ImageClassifierTrainWork(LightningWork):
def __init__(self, max_epochs: int, backbone: str, cloud_compute: CloudCompute):
# parallel is set to True to run asynchronously
super().__init__(parallel=True, cloud_compute=cloud_compute)
# Number of epochs to run
@ -44,7 +44,7 @@ class ImageClassifierTrainWork(L.LightningWork):
# Step 2: Create a serving LightningWork component that gets a model input and serves it
class ImageClassifierServeWork(L.LightningWork):
class ImageClassifierServeWork(LightningWork):
def run(self, best_model_path: str):
# Load the model from the model path
model = ImageClassifier.load_from_checkpoint(best_model_path)
@ -53,7 +53,7 @@ class ImageClassifierServeWork(L.LightningWork):
# Step 3: Create a root LightningFlow component that gets number of epochs and a path to
# a dataset as inputs, initialize 2 training components and serves the best model
class RootFlow(L.LightningFlow):
class RootFlow(LightningFlow):
def __init__(self, max_epochs: int, data_dir: str):
super().__init__()
self.data_dir = data_dir
@ -89,4 +89,4 @@ class RootFlow(L.LightningFlow):
download_data("https://pl-flash-data.s3.amazonaws.com/hymenoptera_data.zip", "./data")
# Initalize your Lightning app with 5 epochs
app = L.LightningApp(RootFlow(5, "./data/hymenoptera_data"))
app = LightningApp(RootFlow(5, "./data/hymenoptera_data"))

View File

@ -1,19 +1,19 @@
import lightning as L
from lightning.app.utilities.app_helpers import pretty_state
from lightning_app import LightningApp, LightningFlow, LightningWork
from lightning_app.utilities.app_helpers import pretty_state
class Work(L.LightningWork):
class Work(LightningWork):
def __init__(self):
super().__init__(cache_calls=False)
# Attributes are registered automatically in the state.
self.counter = 0
def run(self):
# Incrementing an attribute gets reflected in the `Flow` state.
# Incrementing an attribute gets reflected in the `RootFlow` state.
self.counter += 1
class Flow(L.LightningFlow):
class Flow(LightningFlow):
def __init__(self):
super().__init__()
self.w = Work()
@ -24,4 +24,4 @@ class Flow(L.LightningFlow):
self.w.run()
app = L.LightningApp(Flow())
app = LightningApp(Flow())

View File

@ -1,10 +1,10 @@
from time import sleep
import lightning as L
from lightning_app import LightningApp, LightningFlow, LightningWork
# This work takes an hour to run
class HourLongWork(L.LightningWork):
class HourLongWork(LightningWork):
def __init__(self, parallel: bool = False):
super().__init__(parallel=parallel)
self.progress = 0.0
@ -16,8 +16,8 @@ class HourLongWork(L.LightningWork):
sleep(1)
class RootFlow(L.LightningFlow):
def __init__(self, child_work: L.LightningWork):
class RootFlow(LightningFlow):
def __init__(self, child_work: LightningWork):
super().__init__()
self.child_work = child_work
@ -29,4 +29,4 @@ class RootFlow(L.LightningFlow):
print("1 hour later!")
app = L.LightningApp(RootFlow(HourLongWork()))
app = LightningApp(RootFlow(HourLongWork()))

View File

@ -1,9 +1,9 @@
from time import sleep
import lightning as L
from lightning_app import LightningApp, LightningFlow, LightningWork
class HourLongWork(L.LightningWork):
class HourLongWork(LightningWork):
def __init__(self):
super().__init__(cache_calls=False)
self.progress = 0.0
@ -15,8 +15,8 @@ class HourLongWork(L.LightningWork):
sleep(1)
class RootFlow(L.LightningFlow):
def __init__(self, child_work: L.LightningWork):
class RootFlow(LightningFlow):
def __init__(self, child_work: LightningWork):
super().__init__()
self.child_work = child_work
@ -28,4 +28,4 @@ class RootFlow(L.LightningFlow):
print("1 hour later!")
app = L.LightningApp(RootFlow(HourLongWork()))
app = LightningApp(RootFlow(HourLongWork()))

View File

@ -1,8 +1,8 @@
import lightning as L
from lightning.app.testing.helpers import EmptyFlow, EmptyWork
from lightning_app import LightningApp, LightningFlow
from lightning_app.testing.helpers import EmptyFlow, EmptyWork
class FlowB(L.LightningFlow):
class FlowB(LightningFlow):
def __init__(self):
super().__init__()
self.flow_d = EmptyFlow()
@ -12,7 +12,7 @@ class FlowB(L.LightningFlow):
...
class FlowA(L.LightningFlow):
class FlowA(LightningFlow):
def __init__(self):
super().__init__()
self.flow_b = FlowB()
@ -23,4 +23,4 @@ class FlowA(L.LightningFlow):
...
app = L.LightningApp(FlowA())
app = LightningApp(FlowA())

View File

@ -1,8 +1,8 @@
import lightning as L
from lightning_app import LightningApp, LightningFlow
# Step 1: Subclass LightningFlow component to define the app flow.
class HelloWorld(L.LightningFlow):
class HelloWorld(LightningFlow):
# Step 2: Add the app logic to the LightningFlow run method to
# ``print("Hello World!")`.
@ -13,4 +13,4 @@ class HelloWorld(L.LightningFlow):
# Step 3: Initalize a LightningApp with the LightningFlow you defined (in step 1)
app = L.LightningApp(HelloWorld())
app = LightningApp(HelloWorld())

View File

@ -1,12 +1,12 @@
import os
import lightning as L
from lightning.app.frontend import StaticWebFrontend, StreamlitFrontend
from lightning.app.utilities.state import AppState
from lightning_app import LightningApp, LightningFlow
from lightning_app.frontend import StaticWebFrontend, StreamlitFrontend
from lightning_app.utilities.state import AppState
# Step 1: Define your LightningFlow component with the app UI
class UIStreamLit(L.LightningFlow):
class UIStreamLit(LightningFlow):
def __init__(self):
super().__init__()
self.should_print = False
@ -31,7 +31,7 @@ def render_fn(state: AppState):
# Step 4: Implement a Static Web Frontend. This could be react, vue, etc.
class UIStatic(L.LightningFlow):
class UIStatic(LightningFlow):
# Step 5:
def configure_layout(self):
@ -39,7 +39,7 @@ class UIStatic(L.LightningFlow):
# Step 6: Implement the root flow.
class HelloWorld(L.LightningFlow):
class HelloWorld(LightningFlow):
def __init__(self):
super().__init__()
self.static_ui = UIStatic()
@ -55,4 +55,4 @@ class HelloWorld(L.LightningFlow):
]
app = L.LightningApp(HelloWorld())
app = LightningApp(HelloWorld())

View File

@ -15,28 +15,32 @@ import inspect
import os
import shutil
import sys
from importlib.util import module_from_spec, spec_from_file_location
import pt_lightning_sphinx_theme
import lightning_app
_PATH_HERE = os.path.abspath(os.path.dirname(__file__))
_PATH_ROOT = os.path.realpath(os.path.join(_PATH_HERE, "..", ".."))
sys.path.insert(0, os.path.abspath(_PATH_ROOT))
SPHINX_MOCK_REQUIREMENTS = int(os.environ.get("SPHINX_MOCK_REQUIREMENTS", True))
# alternative https://stackoverflow.com/a/67692/4521646
spec = spec_from_file_location("lightning_app/__about__.py", os.path.join(_PATH_ROOT, "lightning_app", "__about__.py"))
about = module_from_spec(spec)
spec.loader.exec_module(about)
# -- Project information -----------------------------------------------------
# this name shall match the project name in Github as it is used for linking to code
project = "lightning"
copyright = lightning_app.__copyright__
author = lightning_app.__author__
copyright = about.__copyright__
author = about.__author__
# The short X.Y version
version = lightning_app.__version__
version = about.__version__
# The full version, including alpha/beta/rc tags
release = lightning_app.__version__
release = about.__version__
# Options for the linkcode extension
# ----------------------------------
@ -156,8 +160,8 @@ html_theme_path = [pt_lightning_sphinx_theme.get_html_theme_path()]
# documentation.
html_theme_options = {
"pytorch_project": lightning_app.__homepage__,
"canonical_url": lightning_app.__homepage__,
"pytorch_project": about.__homepage__,
"canonical_url": about.__homepage__,
"collapse_navigation": False,
"display_version": True,
"logo_only": False,
@ -223,7 +227,7 @@ texinfo_documents = [
project + " Documentation",
author,
project,
lightning_app.__docs__,
about.__docs__,
"Miscellaneous",
),
]
@ -277,15 +281,6 @@ for path_ipynb in glob.glob(os.path.join(_PATH_ROOT, "notebooks", "*.ipynb")):
path_ipynb2 = os.path.join(path_nbs, os.path.basename(path_ipynb))
shutil.copy(path_ipynb, path_ipynb2)
# copy all examples to local folder
path_examples = os.path.join(_PATH_HERE, "..", "examples")
if not os.path.isdir(path_examples):
os.mkdir(path_examples)
for path_app_example in glob.glob(os.path.join(_PATH_ROOT, "examples", "app_*")):
path_app_example2 = os.path.join(path_examples, os.path.basename(path_app_example))
if not os.path.isdir(path_app_example2):
shutil.copytree(path_app_example, path_app_example2, dirs_exist_ok=True)
# Ignoring Third-party packages
# https://stackoverflow.com/questions/15889621/sphinx-how-to-exclude-imports-in-automodule
@ -319,7 +314,7 @@ autodoc_mock_imports = MOCK_PACKAGES
def linkcode_resolve(domain, info):
def find_source():
# try to find the file and line number, based on code from numpy:
# https://github.com/numpy/numpy/blob/master/doc/source-app/conf.py#L286
# https://github.com/numpy/numpy/blob/master/doc/source/conf.py#L286
obj = sys.modules[info["module"]]
for part in info["fullname"].split("."):
obj = getattr(obj, part)
@ -386,6 +381,6 @@ doctest_test_doctest_blocks = ""
doctest_global_setup = """
import importlib
import os
import lightning as L
import lightning_app
"""
coverage_skip_undoc_in_source = True

View File

@ -1,40 +0,0 @@
:orphan:
.. _core_api:
###############################
Learn more about Lightning Core
###############################
.. raw:: html
<div class="display-card-container">
<div class="row">
.. displayitem::
:header: Level-up with Lightning Apps
:description: From Basics to Advanced Skills
:col_css: col-md-6
:button_link: ../levels/basic/index.html
:height: 180
.. displayitem::
:header: Understand Lightning App
:description: Detailed description
:col_css: col-md-6
:button_link: lightning_app/index.html
:height: 180
.. displayitem::
:header: Understand Lightning Flow
:description: Detailed description
:col_css: col-md-6
:button_link: lightning_flow.html
:height: 180
.. displayitem::
:header: Understand Lightning Work
:description: Detailed description
:col_css: col-md-6
:button_link: lightning_work/index.html
:height: 180

View File

@ -1,27 +0,0 @@
import lightning as L
from lightning.app.utilities.app_helpers import pretty_state
class Work(L.LightningWork):
def __init__(self):
super().__init__(cache_calls=False)
# Attributes are registered automatically in the state.
self.counter = 0
def run(self):
# Incrementing an attribute gets reflected in the `Flow` state.
self.counter += 1
class Flow(L.LightningFlow):
def __init__(self):
super().__init__()
self.w = Work()
def run(self):
if self.w.has_started:
print(f"State: {pretty_state(self.state)} \n")
self.w.run()
app = L.LightningApp(Flow())

View File

@ -1,15 +1,138 @@
:orphan:
##########################################
Communication between Lightning Components
##########################################
################################
Communication Between Components
################################
**Audience:** Users that want to create interactive applications.
**Level:** Intermediate
**Level:** Advanced
**Prerequisite**: Read the `Communication in Lightning Apps article <../../access_app_state.html>`_.
**Prerequisite**: Read the :ref:`access_app_state` guide.
----
.. include:: ../../core_api/lightning_app/communication_content.rst
***********************************
Why should components communicate ?
***********************************
When creating interactive apps with multiple components, you might want your components to share information with each other. You might to rely on that information to control their execution, share progress in the UI, trigger a sequence of operations, etc.
By design, the :class:`~lightning_app.core.flow.LightningFlow` communicates to all :class:`~lightning_app.core.flow.LightningWork` within the application, but :class:`~lightning_app.core.flow.LightningWork` can't communicate between each other directly, they need the flow as a proxy to do so.
Once a ``LightningWork`` is running, any updates to its state is automatically communicated to the flow as a delta (using `DeepDiff <https://github.com/seperman/deepdiff>`_). The state communication isn't bi-directional, it is only done from work to flow.
Internally, the Lightning App is alternatively collecting deltas sent from all the registered ``LightningWorks`` and/or UI, and running the root flow run method of the app.
*******************************
Communication From Work to Flow
*******************************
Below, find an example to better understand this behavior.
The ``WorkCounter`` increments a counter until 1 million and the ``Flow`` prints the work counter.
As the work is running into its own process, its state changes is sent to the Flow which contains the latest value of the counter.
.. code-block:: python
import lightning_app as la
class WorkCounter(lapp.LightningWork):
def __init__(self):
super().__init__(parallel=True)
self.counter = 0
def run(self):
for _ in range(int(10e6)):
self.counter += 1
class Flow(lapp.LightningFlow):
def __init__(self):
super().__init__()
self.w = WorkCounter()
def run(self):
self.w.run()
print(self.w.counter)
app = lapp.LightningApp(Flow())
A delta sent from the work to the flow looks like this:
.. code-block:: python
{"values_changed": {"root['works']['w']['vars']['counter']": {"new_value": 425}}}
Here is the associated illustration:
.. figure:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/deltas.gif
:alt: Mechanism showing how delta are sent.
:width: 100 %
*******************************
Communication From From to Work
*******************************
Communication from the flow to the work while running isn't support yet. If your application requires this feature, please open an issue on Github.
.. code-block:: python
import lightning_app as la
from time import sleep
class WorkCounter(lapp.LightningWork):
def __init__(self):
super().__init__(parallel=True)
self.counter = 0
def run(self):
while True:
sleep(1)
print(f"Work {self.counter}")
class Flow(lapp.LightningFlow):
def __init__(self):
super().__init__()
self.w = WorkCounter()
def run(self):
self.w.run()
sleep(1)
print(f"Flow {self.w.counter}")
self.w.counter += 1
app = lapp.LightningApp(Flow())
As you can observe, there is a divergence between the value within the Work and the Flow.
.. code-block:: console
Flow 0
Flow 1
Flow 2
Flow 3
Work 0
Flow 4
Work 0
Flow 5
Work 0
Flow 6
Work 0
Flow 7
Work 0
Flow 8
Work 0
Flow 9
Work 0
Flow 10
.. note:: Technically, the flow and works relies on queues to share data (multiprocessing locally and redis lists in the cloud).

View File

@ -1,160 +0,0 @@
********************************
Communication Between Components
********************************
When creating interactive Lightning Apps (App) with multiple components, you may need your components to share information with each other and rely on that information to control their execution, share progress in the UI, trigger a sequence of operations, etc.
To accomplish that, Lightning components can communicate using the App State. The App State is composed of all attributes defined within each component's **__init__** method e.g anything attached to the component with **self.x = y**.
All attributes of all **LightningWork (Work)** components are accessible in the **LightningFlow (Flow)** components in real-time.
By design, the Flows communicate to all **Works** within the application. However, Works can't communicate with each other directly, they must use Flows as a proxy to communicate.
Once a Work is running, any updates to the Work's state is automatically communicated to the Flow, as a delta (using `DeepDiff <https://github.com/seperman/deepdiff>`_). The state communication isn't bi-directional, communication is only done from Work to Flow.
Internally, the App is alternatively collecting deltas sent from all the registered Works and/or UI, and running the root Flow run method of the App.
----
*************************************************
Communication from LightningWork to LightningFlow
*************************************************
LightningFlow (Flow) can access their children's LightningWork (Work) state.
When a running Work attribute gets updated inside its method (separate process locally or remote machine), the app re-executes Flow's run method once it receives the state update from the Work.
Here's an example to better understand communication from Work to Flow.
The ``WorkCounter`` increments a counter until 1 million and the ``Flow`` prints the work counter.
As the Work is running its own process, its state changes are sent to the Flow which contains the latest value of the counter.
.. code-block:: python
import lightning as L
class WorkCounter(L.LightningWork):
def __init__(self):
super().__init__(parallel=True)
self.counter = 0
def run(self):
for _ in range(int(10e6)):
self.counter += 1
class Flow(L.LightningFlow):
def __init__(self):
super().__init__()
self.w = WorkCounter()
def run(self):
self.w.run()
print(self.w.counter)
app = L.LightningApp(Flow())
A delta sent from the Work to the Flow looks like this:
.. code-block:: python
{"values_changed": {"root['works']['w']['vars']['counter']": {"new_value": 425}}}
Here is the associated illustration:
.. figure:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/deltas.gif
:alt: Mechanism showing how delta are sent.
:width: 100 %
Here's another example that is slightly different. Here we define a Flow and Work, where the Work increments a counter indefinitely and the Flow prints its state which contain the Work.
You can easily check the state of your entire app as follows:
.. literalinclude:: ../../core_api/lightning_app/app.py
Run the app with:
.. code-block:: bash
lightning run app docs/source-app/core_api/lightning_app/app.py
And here's the output you get when running the App using the **Lightning CLI**:
.. code-block:: console
INFO: Your app has started. View it in your browser: http://127.0.0.1:7501/view
State: {'works': {'w': {'vars': {'counter': 1}}}}
State: {'works': {'w': {'vars': {'counter': 2}}}}
State: {'works': {'w': {'vars': {'counter': 3}}}}
State: {'works': {'w': {'vars': {'counter': 3}}}}
State: {'works': {'w': {'vars': {'counter': 4}}}}
...
----
*************************************************
Communication from LightningFlow to LightningWork
*************************************************
Communication from the LightningFlow (Flow) to the LightningWork (Work) while running **isn't supported yet**. If your application requires this feature, please open an issue on Github.
Here's an example of what would happen if you try to have the Flow communicate with the Work:
.. code-block:: python
import lightning as L
from time import sleep
class WorkCounter(L.LightningWork):
def __init__(self):
super().__init__(parallel=True)
self.counter = 0
def run(self):
while True:
sleep(1)
print(f"Work {self.counter}")
class Flow(L.LightningFlow):
def __init__(self):
super().__init__()
self.w = WorkCounter()
def run(self):
self.w.run()
sleep(1)
print(f"Flow {self.w.counter}")
self.w.counter += 1
app = L.LightningApp(Flow())
As you can see, there is a divergence between the values within the Work and the Flow.
.. code-block:: console
Flow 0
Flow 1
Flow 2
Flow 3
Work 0
Flow 4
Work 0
Flow 5
Work 0
Flow 6
Work 0
Flow 7
Work 0
Flow 8
Work 0
Flow 9
Work 0
Flow 10

View File

@ -1,15 +1,187 @@
:orphan:
.. _dynamic_work:
############
Dynamic Work
############
#####################
Dynamic LightningWork
#####################
**Audience:** Users who want to learn how to create application which adapts to user demands.
**Audience:** Users who want to create applications that adapt to user demands.
**Level:** Advanced
**Level:** Intermediate
----
.. include:: dynamic_work_content.rst
***************************************************
Why should I care about creating work dynamically ?
***************************************************
Imagine you want to create a research notebook app for your team, where every member can create multiple `JupyterLab <https://jupyter.org/>`_ session on their hardware of choice.
To allow every notebook to choose hardware, it needs to be set up in it's own :class:`~lightning_app.core.work.LightningWork`, but you can't know the number of notebooks user will need in advance. In this case you'll need to add ``LightningWorks`` dynamically at run time.
This is what **dynamic works** enables.
***************************
When to use dynamic works ?
***************************
Dynamic works should be used anytime you want change the resources your application is using at runtime.
*******************
How to add a work ?
*******************
You can simply attach your components in the **run** method of a flow using python **hasattr**, **setattr** and **getattr** functions.
.. code-block:: python
class RootFlow(lapp.LightningFlow):
def run(self):
if not hasattr(self, "work"):
setattr(self, "work", Work()) # The `Work` component is created and attached here.
getattr(self, "work").run() # Run the `Work` component.
But it is usually more readable to use Lightning built-in :class:`~lightning_app.structures.Dict` or :class:`~lightning_app.structures.List` as follows:
.. code-block:: python
from lightning_app.structures import Dict
class RootFlow(lapp.LightningFlow):
def __init__(self):
super().__init__()
self.dict = Dict()
def run(self):
if "work" not in self.dict:
self.dict["work"] = Work() # The `Work` component is attached here.
self.dict["work"].run()
********************
How to stop a work ?
********************
In order to stop a work, simply use the work ``stop`` method as follows:
.. code-block:: python
class RootFlow(lapp.LightningFlow):
def __init__(self):
super().__init__()
self.work = Work()
def run(self):
self.work.stop()
**********************************
Application Example with StreamLit
**********************************
..
The entire application can be found `here <https://github.com/PyTorchLightning/lightning-template-jupyterlab>`_.
The Notebook Manager
^^^^^^^^^^^^^^^^^^^^
In the component below, we are dynamically creating ``JupyterLabWork`` every time as user clicks the ``Create Jupyter Notebook`` button.
To do so, we are iterating over the list of ``jupyter_config_requests`` infinitely.
.. code-block:: python
import lightning_app as la
class JupyterLabManager(lapp.LightningFlow):
"""This flow manages the users notebooks running within works."""
def __init__(self):
super().__init__()
self.jupyter_works = lapp.structures.Dict()
self.jupyter_config_requests = []
def run(self):
for idx, jupyter_config in enumerate(self.jupyter_config_requests):
# The Jupyter Config has this form is:
# {"use_gpu": False/True, "token": None, "username": ..., "stop": False}
# Step 1: Check if JupyterWork already exists for this username
username = jupyter_config["username"]
if username not in self.jupyter_works:
jupyter_config["ready"] = False
# Set the hardware selected by the user: GPU or CPU.
cloud_compute = lapp.CloudCompute("gpu" if jupyter_config["use_gpu"] else "cpu-small")
# Step 2: Create new JupyterWork dynamically !
self.jupyter_works[username] = JupyterLabWork(cloud_compute=cloud_compute)
# Step 3: Run the JupyterWork
self.jupyter_works[username].run()
# Step 4: Store the notebook token in the associated config.
# We are using this to know when the notebook is ready
# and display the stop button on the UI.
if self.jupyter_works[username].token:
jupyter_config["token"] = self.jupyter_works[username].token
# Step 5: Stop the work if the user requested it.
if jupyter_config["stop"]:
self.jupyter_works[username].stop()
self.jupyter_config_requests.pop(idx)
def configure_layout(self):
return StreamlitFrontend(render_fn=render_fn)
The StreamLit UI
^^^^^^^^^^^^^^^^
In the UI below, we receive the **state** of the Jupyter Manager and it can be modified directly from the UI interaction.
.. code-block:: python
def render_fn(state):
import streamlit as st
# Step 1: Enable users to select their notebooks and create them
column_1, column_2, column_3 = st.columns(3)
with column_1:
create_jupyter = st.button("Create Jupyter Notebook")
with column_2:
username = st.text_input("Enter your username", "tchaton")
assert username
with column_3:
use_gpu = st.checkbox("Use GPU")
# Step 2: If a user clicked the button, add an element to the list of configs
# Note: state.jupyter_config_requests = ... will sent the state update to the component.
if create_jupyter:
new_config = [{"use_gpu": use_gpu, "token": None, "username": username, "stop": False}]
state.jupyter_config_requests = state.jupyter_config_requests + new_config
# Step 3: List of running notebooks.
for idx, config in enumerate(state.jupyter_config_requests):
column_1, column_2, column_3 = st.columns(3)
with column_1:
if not idx:
st.write(f"Idx")
st.write(f"{idx}")
with column_2:
if not idx:
st.write(f"Use GPU")
st.write(config["use_gpu"])
with column_3:
if not idx:
st.write(f"Stop")
if config["token"]:
should_stop = st.button("Stop this notebook")
# Step 4: Change stop if the user clicked the button
if should_stop:
config["stop"] = should_stop
state.jupyter_config_requests = state.jupyter_config_requests

View File

@ -1,202 +0,0 @@
***************************************
What Dynamic LightningWork does for you
***************************************
Dynamic LightningWork (Work) changes the resources your application uses while the application is running (aka at runtime).
For example, imagine you want to create a research notebook app for your team. You want every member to be able to create multiple `JupyterLab <https://jupyter.org/>`_ sessions on their hardware of choice.
To allow every notebook to choose hardware, it needs to be set up in it's own :class:`~lightning_app.core.work.LightningWork`, but you can't know the number of notebooks user will need in advance. In this case you'll need to add ``LightningWorks`` dynamically at run time.
----
*****************
Use Dynamic Works
*****************
Dynamic Works should be used anytime you want change the resources your application is using while it is running (aka at runtime).
You're usually going to use the ``start`` and ``stop`` methods together.
----
Add a Dynamic Work
^^^^^^^^^^^^^^^^^^
There are a couple of ways you can add a dynamic Work:
- Option 1: Attach your components in the **run** method using the Python functions.
- Option 2: Use the Lightning built-in classes :class:`~lightning.structures.Dict` or :class:`~lightning.structures.List`.
.. note:: Using the Lightning built-in classes is usually easier to read.
----
**OPTION 1:** Attach your components in the run method of a flow using the Python functions **hasattr**, **setattr**, and **getattr**:
.. code-block:: python
class RootFlow(lapp.LightningFlow):
def run(self):
if not hasattr(self, "work"):
# The `Work` component is created and attached here.
setattr(self, "work", Work())
# Run the `Work` component.
getattr(self, "work").run()
**OPTION 2:** Use the built-in Lightning classes :class:`~lightning_app.structures.Dict` or :class:`~lightning_app.structures.List`
.. code-block:: python
from lightning_app.structures import Dict
class RootFlow(lapp.LightningFlow):
def __init__(self):
super().__init__()
self.dict = Dict()
def run(self):
if "work" not in self.dict:
# The `Work` component is attached here.
self.dict["work"] = Work()
self.dict["work"].run()
----
Stop a Work
^^^^^^^^^^^
Stop a work when you are concerned about cost.
To stop a work, use the work ``stop`` method:
.. code-block:: python
class RootFlow(L.LightningFlow):
def __init__(self):
super().__init__()
self.work = Work()
def run(self):
self.work.stop()
----
*********************
Dynamic Work Examples
*********************
..
The entire application can be found `here <https://github.com/Lightning-AI/lightning-template-jupyterlab>`_.
----
Dynamic Work with Jupyter Notebooks
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
In this example, we are dynamically creating ``JupyterLabWork`` every time a user clicks the **Create Jupyter Notebook** button.
In order to do that, we are iterating over the list of ``jupyter_config_requests`` infinitely.
.. code-block:: python
import lightning as L
class JupyterLabManager(L.LightningFlow):
"""This flow manages the users notebooks running within works.""""
def __init__(self):
super().__init__()
self.jupyter_works = L.structures.Dict()
self.jupyter_config_requests = []
def run(self):
for idx, jupyter_config in enumerate(self.jupyter_config_requests):
# The Jupyter Config has this form is:
# {"use_gpu": False/True, "token": None, "username": ..., "stop": False}
# Step 1: Check if JupyterWork already exists for this username
username = jupyter_config["username"]
if username not in self.jupyter_works:
jupyter_config["ready"] = False
# Set the hardware selected by the user: GPU or CPU.
cloud_compute = L.CloudCompute("gpu" if jupyter_config["use_gpu"] else "cpu-small")
# Step 2: Create new JupyterWork dynamically !
self.jupyter_works[username] = JupyterLabWork(cloud_compute=cloud_compute)
# Step 3: Run the JupyterWork
self.jupyter_works[username].run()
# Step 4: Store the notebook token in the associated config.
# We are using this to know when the notebook is ready
# and display the stop button on the UI.
if self.jupyter_works[username].token:
jupyter_config["token"] = self.jupyter_works[username].token
# Step 5: Stop the work if the user requested it.
if jupyter_config['stop']:
self.jupyter_works[username].stop()
self.jupyter_config_requests.pop(idx)
def configure_layout(self):
return L.app.frontend.StreamlitFrontend(render_fn=render_fn)
----
Dynamic Works with StreamLit UI
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Continuing from the Jupyter Notebook example, in the UI, we receive the **state** of the Jupyter Manager and the state can be modified directly from the UI.
.. code-block:: python
import streamlit as st
def render_fn(state):
# Step 1: Enable users to select their notebooks and create them
column_1, column_2, column_3 = st.columns(3)
with column_1:
create_jupyter = st.button("Create Jupyter Notebook")
with column_2:
username = st.text_input('Enter your username', "tchaton")
assert username
with column_3:
use_gpu = st.checkbox('Use GPU')
# Step 2: If a user clicked the button, add an element to the list of configs
# Note: state.jupyter_config_requests = ... will sent the state update to the component.
if create_jupyter:
new_config = [{"use_gpu": use_gpu, "token": None, "username": username, "stop": False}]
state.jupyter_config_requests = state.jupyter_config_requests + new_config
# Step 3: List of running notebooks.
for idx, config in enumerate(state.jupyter_config_requests):
column_1, column_2, column_3 = st.columns(3)
with column_1:
if not idx:
st.write(f"Idx")
st.write(f"{idx}")
with column_2:
if not idx:
st.write(f"Use GPU")
st.write(config['use_gpu'])
with column_3:
if not idx:
st.write(f"Stop")
if config["token"]:
should_stop = st.button("Stop this notebook")
# Step 4: Change stop if the user clicked the button
if should_stop:
config["stop"] = should_stop
state.jupyter_config_requests = state.jupyter_config_requests

View File

@ -7,6 +7,7 @@ LightningApp
############
The :class:`~lightning_app.core.app.LightningApp` runs a tree of one or more components that interact to create end-to-end applications. There are two kinds of components: :class:`~lightning_app.core.flow.LightningFlow` and :class:`~lightning_app.core.work.LightningWork`. This modular design enables you to reuse components created by other users.
.. autoclass:: lightning_app.core.app.LightningApp
:exclude-members: _run, connect, get_component_by_name, maybe_apply_changes, set_state
:noindex:

View File

@ -4,5 +4,8 @@
LightningFlow
#############
The :class:`~lightning_app.core.flow.LightningFlow` component coordinates long-running tasks :class:`~lightning_app.core.work.LightningWork` and runs its children :class:`~lightning_app.core.flow.LightningFlow` components.
.. autoclass:: lightning_app.core.flow.LightningFlow
:exclude-members: _attach_backend, _exit, _is_state_attribute, set_state
:noindex:

View File

@ -8,8 +8,103 @@ Customize your Cloud Compute
**Audience:** Users who want to select the hardware to run in the cloud.
**Level:** Intermediate
**Level:** Basic
----
.. include:: compute_content.rst
***************************************
How can I customize my Work resources ?
***************************************
In the cloud, you can simply configure which machine to run on by passing
a :class:`~lightning_app.utilities.packaging.cloud_compute.CloudCompute` to your work ``__init__`` method:
.. code-block:: python
import lightning_app as la
# Run on a free, shared CPU machine. This is the default for every LightningWork.
MyCustomWork(cloud_compute=lapp.CloudCompute())
# Run on a dedicated, medium-size CPU machine (see specs below)
MyCustomWork(cloud_compute=lapp.CloudCompute("cpu-medium"))
# Run on cheap GPU machine with a single GPU (see specs below)
MyCustomWork(cloud_compute=lapp.CloudCompute("gpu"))
# Run on a fast multi-GPU machine (see specs below)
MyCustomWork(cloud_compute=lapp.CloudCompute("gpu-fast-multi"))
Here is the full list of supported machine names:
.. list-table:: Hardware by Accelerator Type
:widths: 25 25 25 25
:header-rows: 1
* - Name
- # of CPUs
- GPUs
- Memory
* - default
- 2
- 0
- 3 GB
* - cpu-small
- 2
- 0
- 8 GB
* - cpu-medium
- 8
- 0
- 32 GB
* - gpu
- 4
- 1 (T4, 16 GB)
- 16 GB
* - gpu-fast
- 8
- 1 (V100, 16 GB)
- 61 GB
* - gpu-fast-multi
- 32
- 4 (V100 16 GB)
- 244 GB
The up-to-date prices for these instances can be found `here <https://lightning.ai/pricing>`_.
*******************************************
How can I run on spot/preemptible machine ?
*******************************************
Most cloud provider offers ``preemptible`` (synonym of ``spot``) machine which are usually discounted up to 90 %. Those machines are cheaper but the cloud provider can retrieve them at any time.
.. code-block:: python
import lightning_app as la
# Run on a single CPU
MyCustomWork(cloud_compute=lapp.CloudCompute("gpu", preemptible=True))
***********************************
How can I stop my work when idle ?
***********************************
By providing **idle_timeout=X Seconds**, the work is automatically stopped **X seconds** after doing nothing.
.. code-block:: python
import lightning_app as la
# Run on a single CPU and turn down immediately when idle.
MyCustomWork(cloud_compute=lapp.CloudCompute("gpu", idle_timeout=0))
#############
CloudCompute
#############
.. autoclass:: lightning_app.utilities.packaging.cloud_compute.CloudCompute
:noindex:

View File

@ -1,100 +0,0 @@
***************************
Customize my Work resources
***************************
In the cloud, you can simply configure which machine to run on by passing
a :class:`~lightning_app.utilities.packaging.cloud_compute.CloudCompute` to your work ``__init__`` method:
.. code-block:: python
import lightning as L
# Run on a free, shared CPU machine. This is the default for every LightningWork.
MyCustomWork(cloud_compute=L.CloudCompute())
# Run on a dedicated, medium-size CPU machine (see specs below)
MyCustomWork(cloud_compute=L.CloudCompute("cpu-medium"))
# Run on cheap GPU machine with a single GPU (see specs below)
MyCustomWork(cloud_compute=L.CloudCompute("gpu"))
# Run on a fast multi-GPU machine (see specs below)
MyCustomWork(cloud_compute=L.CloudCompute("gpu-fast-multi"))
Here is the full list of supported machine names:
.. list-table:: Hardware by Accelerator Type
:widths: 25 25 25 25
:header-rows: 1
* - Name
- # of CPUs
- GPUs
- Memory
* - default
- 2
- 0
- 3 GB
* - cpu-small
- 2
- 0
- 8 GB
* - cpu-medium
- 8
- 0
- 32 GB
* - gpu
- 4
- 1 (T4, 16 GB)
- 16 GB
* - gpu-fast
- 8
- 1 (V100, 16 GB)
- 61 GB
* - gpu-fast-multi
- 32
- 4 (V100 16 GB)
- 244 GB
The up-to-date prices for these instances can be found `here <https://lightning.ai/pages/pricing>`_.
----
*******************************
Run on spot/preemptible machine
*******************************
Most cloud provider offers ``preemptible`` (synonym of ``spot``) machines that are usually discounted by up to 90 %. Those machines are cheaper but the cloud provider can retrieve them at any time and might take longer to be ready.
.. code-block:: python
import lightning as L
# Run on a single CPU
MyCustomWork(cloud_compute=L.CloudCompute("gpu", preemptible=True))
----
**********************
Stop my work when idle
**********************
By providing **idle_timeout=X Seconds**, the work is automatically stopped **X seconds** after doing nothing.
.. code-block:: python
import lightning as L
# Run on a single CPU and turn down immediately when idle.
MyCustomWork(cloud_compute=L.CloudCompute("gpu", idle_timeout=0))
----
************
CloudCompute
************
.. autoclass:: lightning_app.utilities.packaging.cloud_compute.CloudCompute
:noindex:

View File

@ -1,13 +1,83 @@
:orphan:
###############################
Handle Lightning App exceptions
###############################
########################
Handling App Exceptions
########################
**Audience:** Users who want to make Lightning Apps more robust to potential issues.
**Audience:** Users who want to know how to implement app where errors are handled.
**Level:** Advanced
----
.. include:: handling_app_exception_content.rst
*************************************************
Why should I care about handling app exceptions ?
*************************************************
Imagine you are creating an application where your team can launch model training by providing their own Github Repo any time they want.
As the application admin, you don't want the application to go down if their code has a bug and breaks.
Instead, you would like the work to capture the exception and surface this to the users on failures.
****************************************
How can I configure exception handling ?
****************************************
The LightningWork accepts an argument **raise_exception** which is **True** by default. This aligns with Python default behaviors.
However, for the user case stated above, we want to capture the work exceptions. This is done by providing ``raise_exception=False`` to the work ``__init__`` method.
.. code-block:: python
MyCustomWork(raise_exception=False) # <== HERE: The exception is captured.
# Default behavior
MyCustomWork(raise_exception=True) # <== HERE: The exception is raised within the flow and terminates the app
And you can customize this behavior by overriding the ``on_exception`` hook to the Lightning Work.
.. code-block:: python
import lightning as L
class MyCustomWork(L.LightningWork):
def on_exception(self, exception: Exception):
# do something when an exception is triggered.
pass
*******************
Application Example
*******************
This is the pseudo-code for the application described above.
.. code-block:: python
import lightning_app as lapp
class RootFlow(lapp.LightningFlow):
def __init__(self):
super().__init__()
self.user_jobs = lapp.structures.Dict()
self.requested_jobs = []
def run(self):
for request in self.requested_jobs:
job_id = request["id"]
if job_id not in self.user_jobs:
# Note: The `GithubRepoLauncher` doesn't exist yet.
self.user_jobs[job_id] = GithubRepoLauncher(
**request,
raise_exception=False, # <== HERE: The exception is captured.
)
self.user_jobs[job_id].run()
if self.user_jobs[job_id].status.stage == "failed" and "printed" not in request:
print(self.user_jobs[job_id].status) # <== HERE: Print the user exception.
request["printed"] = True

View File

@ -1,74 +0,0 @@
***************************************************
What handling Lightning App exceptions does for you
***************************************************
Imagine you are creating a Lightning App (App) where your team can launch model training by providing their own Github Repo any time they want.
As the App admin, you don't want the App to go down if their code has a bug and breaks.
Instead, you would like the LightningWork (Work) to capture the exception and present the issue to users.
----
****************************
Configure exception handling
****************************
The LightningWork (Work) accepts an argument **raise_exception** which is **True** by default. This aligns with Python default behaviors.
However, for the user case stated in the previous section, we want to capture the Work exceptions. This is done by providing ``raise_exception=False`` to the work ``__init__`` method.
.. code-block:: python
import lightning as L
MyCustomWork(raise_exception=False) # <== HERE: The exception is captured.
# Default behavior
MyCustomWork(raise_exception=True) # <== HERE: The exception is raised within the flow and terminates the app
And you can customize this behavior by overriding the ``on_exception`` hook to the Work.
.. code-block:: python
import lightning as L
class MyCustomWork(L.LightningWork):
def on_exception(self, exception: Exception):
# do something when an exception is triggered.
----
**************************
Exception handling example
**************************
This is the pseudo-code for the application described above.
.. code-block:: python
import lightning as L
class RootFlow(L.LightningFlow):
def __init__(self):
super().__init__()
self.user_jobs = L.structures.Dict()
self.requested_jobs = []
def run(self):
for request in self.requested_jobs:
job_id = request["id"]
if job_id not in self.user_jobs:
# Note: The `GithubRepoLauncher` doesn't exist yet.
self.user_jobs[job_id] = GithubRepoLauncher(
**request,
raise_exception=False, # <== HERE: The exception is captured.
)
self.user_jobs[job_id].run()
if self.user_jobs[job_id].status.stage == "failed" and "printed" not in request:
print(self.user_jobs[job_id].status) # <== HERE: Print the user exception.
request["printed"] = True

View File

@ -6,6 +6,7 @@
LightningWork
#############
The :class:`~lightning_app.core.work.LightningWork` component is a building block optimized for long-running jobs or integrating third-party services. LightningWork can be used for training large models, downloading a dataset, or any long-lasting operation.
.. autoclass:: lightning_app.core.work.LightningWork
:exclude-members: _aggregate_status_timeout, _is_state_attribute, _is_state_attribute, set_state
:noindex:

View File

@ -1,15 +1,87 @@
:orphan:
######################################
Sharing Objects between LightningWorks
######################################
#############################
Sharing Objects between Works
#############################
**Audience:** Users who want to know how to transfer Python objects between their LightningWorks.
**Audience:** Users who want to know how to transfer python objects between their works.
**Level:** Advanced
**Prerequisite**: Reach Level 16+, know about the `pandas DataFrames <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_ and read and read the `Access app state guide <../../access_app_state.html>`_.
**Prerequisite**: Know about the pandas library and read the :ref:`access_app_state` guide.
----
.. include:: payload_content.rst
************************************
When do I need to transfer objects ?
************************************
Imagine your application is processing some data using `pandas DaFrame <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_ and you want to pass those data to another work. This is when and what the **Payload API** is meant for.
*************************************
How can I use the Lightning Payload ?
*************************************
The Payload enables non JSON-serializable attribute objects to be part of your work state and be communicated to other works.
Here is an example how to use it:
.. code-block:: python
import lightning_app as la
import pandas as pd
class SourceWork(lapp.LightningWork):
def __init__(self):
super().__init__()
self.df = None
def run(self):
# do some processing
df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]})
# The object you care about needs to be wrapped into a Payload object.
self.df = lapp.storage.Payload(df)
# You can access the original object from the payload using its value property.
print("src", self.df.value)
# src col1 col2
# 0 1 3
# 1 2 4
Once the Payload object is attached to your work state, it can be passed to another work via the flow as follows:
.. code-block:: python
import lightning_app as la
import pandas as pd
class DestinationWork(lapp.LightningWork):
def run(self, df: lapp.storage.Payload):
# You can access the original object from the payload using its value property.
print("dst", df.value)
# dst col1 col2
# 0 1 3
# 1 2 4
class Flow(lapp.LightningFlow):
def __init__(self):
super().__init__()
self.src = SourceWork()
self.dst = DestinationWork()
def run(self):
self.src.run()
# The pandas DataFrame created by the ``SourceWork``
# is passed to the ``DestinationWork``.
# Internally, Lightning pickles and un-pickle the python object,
# so you receive a copy of the original object.
self.dst.run(df=self.src.df)
app = lapp.LightningApp(Flow())

View File

@ -1,75 +0,0 @@
**************************************
What transferring objects does for you
**************************************
Imagine your application is processing some data using `pandas DaFrame <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_ and you want to pass that data to another LightningWork (Work). This is what the **Payload API** is meant for.
----
*************************
Use the Lightning Payload
*************************
The Payload enables non JSON-serializable attribute objects to be part of your Work's state and to be communicated to other Works.
Here is an example:
.. code-block:: python
import lightning as L
import pandas as pd
class SourceWork(L.LightningWork):
def __init__(self):
super().__init__()
self.df = None
def run(self):
# do some processing
df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]})
# The object you care about needs to be wrapped into a Payload object.
self.df = L.storage.Payload(df)
# You can access the original object from the payload using its value property.
print("src", self.df.value)
# src col1 col2
# 0 1 3
# 1 2 4
Once the Payload object is attached to your Work's state, it can be passed to another work using the LightningFlow (Flow) as follows:
.. code-block:: python
import lightning as L
import pandas as pd
class DestinationWork(L.LightningWork):
def run(self, df: L.storage.Payload):
# You can access the original object from the payload using its value property.
print("dst", df.value)
# dst col1 col2
# 0 1 3
# 1 2 4
class Flow(L.LightningFlow):
def __init__(self):
super().__init__()
self.src = SourceWork()
self.dst = DestinationWork()
def run(self):
self.src.run()
# The pandas DataFrame created by the ``SourceWork``
# is passed to the ``DestinationWork``.
# Internally, Lightning pickles and un-pickle the python object,
# so you receive a copy of the original object.
self.dst.run(df=self.src.df)
app = L.LightningApp(Flow())

View File

@ -1,8 +1,9 @@
:orphan:
####################
LightningWork Status
####################
#####################
Lightning Work Status
#####################
**Audience:** Users who want to understand ``LightningWork`` under the hood.
@ -10,4 +11,199 @@ LightningWork Status
----
.. include:: status_content.rst
*******************
What are statuses ?
*******************
Statuses indicates transition points in the life of a Lightning Work and contain metadata.
The different stages are:
.. code-block:: python
class WorkStageStatus:
NOT_STARTED = "not_started"
STOPPED = "stopped"
PENDING = "pending"
RUNNING = "running"
SUCCEEDED = "succeeded"
FAILED = "failed"
And a single status is as follows:
.. code-block:: python
@dataclass
class WorkStatus:
stage: WorkStageStatus
timestamp: float
reason: Optional[str] = None
message: Optional[str] = None
count: int = 1
On creation, the work's status flags all evaluate to ``False`` (in particular ``has_started``) and when calling ``work.run`` in your flow,
the work transition from ``is_pending`` to ``is_running`` and then to ``has_succeeded`` if everything when well or ``has_failed`` otherwise.
.. code-block:: python
from time import sleep
import lightning_app as la
class Work(lapp.LightningWork):
def run(self, value: int):
sleep(1)
if value == 0:
return
raise Exception(f"The provided value was {value}")
class Flow(lapp.LightningFlow):
def __init__(self):
super().__init__()
self.work = Work(raise_exception=False)
self.counter = 0
def run(self):
if not self.work.has_started:
print("NOT STARTED")
elif self.work.is_pending:
print("PENDING")
elif self.work.is_running:
print("RUNNING")
elif self.work.has_succeeded:
print("SUCCESS")
elif self.work.has_failed:
print("FAILED")
elif self.work.has_stopped:
print("STOPPED")
self._exit()
print(self.work.status)
self.work.run(self.counter)
self.counter += 1
app = lapp.LightningApp(Flow())
Run this app as follows:
.. code-block:: bash
lightning run app test.py > app_log.txt
And here is the expected output inside ``app_log.txt`` and as expected,
we are observing the following transition ``has_started``, ``is_pending``, ``is_running``, ``has_succeeded``, ``is_running`` and ``has_failed``
.. code-block:: console
NOT STARTED
WorkStatus(stage='not_started', timestamp=1653498225.18468, reason=None, message=None, count=1)
PENDING
WorkStatus(stage='pending', timestamp=1653498225.217413, reason=None, message=None, count=1)
PENDING
WorkStatus(stage='pending', timestamp=1653498225.217413, reason=None, message=None, count=1)
PENDING
...
PENDING
WorkStatus(stage='pending', timestamp=1653498225.217413, reason=None, message=None, count=1)
PENDING
WorkStatus(stage='pending', timestamp=1653498225.217413, reason=None, message=None, count=1)
RUNNING
WorkStatus(stage='running', timestamp=1653498228.825194, reason=None, message=None, count=1)
...
SUCCESS
WorkStatus(stage='succeeded', timestamp=1653498229.831793, reason=None, message=None, count=1)
SUCCESS
WorkStatus(stage='succeeded', timestamp=1653498229.831793, reason=None, message=None, count=1)
SUCCESS
WorkStatus(stage='succeeded', timestamp=1653498229.831793, reason=None, message=None, count=1)
RUNNING
WorkStatus(stage='running', timestamp=1653498229.846451, reason=None, message=None, count=1)
RUNNING
...
WorkStatus(stage='running', timestamp=1653498229.846451, reason=None, message=None, count=1)
RUNNING
WorkStatus(stage='running', timestamp=1653498229.846451, reason=None, message=None, count=1)
FAILED
WorkStatus(stage='failed', timestamp=1653498230.852565, reason='user_exception', message='The provided value was 1', count=1)
FAILED
WorkStatus(stage='failed', timestamp=1653498230.852565, reason='user_exception', message='The provided value was 1', count=1)
FAILED
WorkStatus(stage='failed', timestamp=1653498230.852565, reason='user_exception', message='The provided value was 1', count=1)
FAILED
WorkStatus(stage='failed', timestamp=1653498230.852565, reason='user_exception', message='The provided value was 1', count=1)
...
In order to access all statuses, simply do:
.. code-block:: python
from time import sleep
import lightning_app as la
class Work(lapp.LightningWork):
def run(self, value: int):
sleep(1)
if value == 0:
return
raise Exception(f"The provided value was {value}")
class Flow(lapp.LightningFlow):
def __init__(self):
super().__init__()
self.work = Work(raise_exception=False)
self.counter = 0
def run(self):
print(self.statuses)
self.work.run(self.counter)
self.counter += 1
app = lapp.LightningApp(Flow())
Run this app as follows:
.. code-block:: bash
lightning run app test.py > app_log.txt
And here is the expected output inside ``app_log.txt``:
.. code-block:: console
# First execution with value = 0
[]
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1)]
...
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1)]
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1)]
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1)]
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1)]
...
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1)]
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1)]
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1), WorkStatus(stage='succeeded', timestamp=1653498627.191053, reason=None, message=None, count=1)]
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1), WorkStatus(stage='succeeded', timestamp=1653498627.191053, reason=None, message=None, count=1)]
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1), WorkStatus(stage='succeeded', timestamp=1653498627.191053, reason=None, message=None, count=1)]
# Second execution with value = 1
[WorkStatus(stage='pending', timestamp=1653498627.204636, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1)]
[WorkStatus(stage='pending', timestamp=1653498627.204636, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1)]
...
[WorkStatus(stage='pending', timestamp=1653498627.204636, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1)]
[WorkStatus(stage='pending', timestamp=1653498627.204636, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1), WorkStatus(stage='failed', timestamp=1653498628.210164, reason='user_exception', message='The provided value was 1', count=1)]
[WorkStatus(stage='pending', timestamp=1653498627.204636, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1), WorkStatus(stage='failed', timestamp=1653498628.210164, reason='user_exception', message='The provided value was 1', count=1)]

View File

@ -1,197 +0,0 @@
*************************************
Everything about LightningWork Status
*************************************
Statuses indicate transition points in the life of a LightningWork (Work) and contain metadata.
The different stages are:
.. code-block:: python
class WorkStageStatus:
NOT_STARTED = "not_started"
STOPPED = "stopped"
PENDING = "pending"
RUNNING = "running"
SUCCEEDED = "succeeded"
FAILED = "failed"
And a single status is as follows:
.. code-block:: python
@dataclass
class WorkStatus:
stage: WorkStageStatus
timestamp: float
reason: Optional[str] = None
message: Optional[str] = None
count: int = 1
On creation, the Work's status flags all evaluate to ``False`` (in particular ``has_started``) and when calling ``work.run`` in your Lightning Flow (Flow),
the Work transitions from ``is_pending`` to ``is_running`` and then to ``has_succeeded`` if everything went well or ``has_failed`` otherwise.
.. code-block:: python
from time import sleep
import lightning as L
class Work(L.LightningWork):
def run(self, value: int):
sleep(1)
if value == 0:
return
raise Exception(f"The provided value was {value}")
class Flow(L.LightningFlow):
def __init__(self):
super().__init__()
self.work = Work(raise_exception=False)
self.counter = 0
def run(self):
if not self.work.has_started:
print("NOT STARTED")
elif self.work.is_pending:
print("PENDING")
elif self.work.is_running:
print("RUNNING")
elif self.work.has_succeeded:
print("SUCCESS")
elif self.work.has_failed:
print("FAILED")
elif self.work.has_stopped:
print("STOPPED")
self._exit()
print(self.work.status)
self.work.run(self.counter)
self.counter += 1
app = L.LightningApp(Flow())
Run this app as follows:
.. code-block:: bash
lightning run app test.py > app_log.txt
And here is the expected output inside ``app_log.txt`` and as expected,
we are observing the following transition ``has_started``, ``is_pending``, ``is_running``, ``has_succeeded``, ``is_running`` and ``has_failed``
.. code-block:: console
NOT STARTED
WorkStatus(stage='not_started', timestamp=1653498225.18468, reason=None, message=None, count=1)
PENDING
WorkStatus(stage='pending', timestamp=1653498225.217413, reason=None, message=None, count=1)
PENDING
WorkStatus(stage='pending', timestamp=1653498225.217413, reason=None, message=None, count=1)
PENDING
...
PENDING
WorkStatus(stage='pending', timestamp=1653498225.217413, reason=None, message=None, count=1)
PENDING
WorkStatus(stage='pending', timestamp=1653498225.217413, reason=None, message=None, count=1)
RUNNING
WorkStatus(stage='running', timestamp=1653498228.825194, reason=None, message=None, count=1)
...
SUCCESS
WorkStatus(stage='succeeded', timestamp=1653498229.831793, reason=None, message=None, count=1)
SUCCESS
WorkStatus(stage='succeeded', timestamp=1653498229.831793, reason=None, message=None, count=1)
SUCCESS
WorkStatus(stage='succeeded', timestamp=1653498229.831793, reason=None, message=None, count=1)
RUNNING
WorkStatus(stage='running', timestamp=1653498229.846451, reason=None, message=None, count=1)
RUNNING
...
WorkStatus(stage='running', timestamp=1653498229.846451, reason=None, message=None, count=1)
RUNNING
WorkStatus(stage='running', timestamp=1653498229.846451, reason=None, message=None, count=1)
FAILED
WorkStatus(stage='failed', timestamp=1653498230.852565, reason='user_exception', message='The provided value was 1', count=1)
FAILED
WorkStatus(stage='failed', timestamp=1653498230.852565, reason='user_exception', message='The provided value was 1', count=1)
FAILED
WorkStatus(stage='failed', timestamp=1653498230.852565, reason='user_exception', message='The provided value was 1', count=1)
FAILED
WorkStatus(stage='failed', timestamp=1653498230.852565, reason='user_exception', message='The provided value was 1', count=1)
...
In order to access all statuses:
.. code-block:: python
from time import sleep
import lightning as L
class Work(L.LightningWork):
def run(self, value: int):
sleep(1)
if value == 0:
return
raise Exception(f"The provided value was {value}")
class Flow(L.LightningFlow):
def __init__(self):
super().__init__()
self.work = Work(raise_exception=False)
self.counter = 0
def run(self):
print(self.statuses)
self.work.run(self.counter)
self.counter += 1
app = L.LightningApp(Flow())
Run this app as follows:
.. code-block:: bash
lightning run app test.py > app_log.txt
And here is the expected output inside ``app_log.txt``:
.. code-block:: console
# First execution with value = 0
[]
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1)]
...
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1)]
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1)]
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1)]
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1)]
...
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1)]
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1)]
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1), WorkStatus(stage='succeeded', timestamp=1653498627.191053, reason=None, message=None, count=1)]
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1), WorkStatus(stage='succeeded', timestamp=1653498627.191053, reason=None, message=None, count=1)]
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1), WorkStatus(stage='succeeded', timestamp=1653498627.191053, reason=None, message=None, count=1)]
# Second execution with value = 1
[WorkStatus(stage='pending', timestamp=1653498627.204636, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1)]
[WorkStatus(stage='pending', timestamp=1653498627.204636, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1)]
...
[WorkStatus(stage='pending', timestamp=1653498627.204636, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1)]
[WorkStatus(stage='pending', timestamp=1653498627.204636, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1), WorkStatus(stage='failed', timestamp=1653498628.210164, reason='user_exception', message='The provided value was 1', count=1)]
[WorkStatus(stage='pending', timestamp=1653498627.204636, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1), WorkStatus(stage='failed', timestamp=1653498628.210164, reason='user_exception', message='The provided value was 1', count=1)]

View File

@ -2,8 +2,6 @@
Build a Directed Acyclic Graph (DAG)
####################################
.. _dag_example:
**Audience:** Users coming from MLOps to Lightning Apps, looking for more flexibility.
A typical ML training workflow can be implemented with a simple DAG.
@ -12,10 +10,10 @@ Below is a pseudo-code using the lightning framework that uses a LightningFlow t
.. code-block:: python
import lightning as L
import lightning_app as la
class DAGFlow(L.LightningFlow):
class DAGFlow(lapp.LightningFlow):
def __init__(self):
super().__init__()
self.processor = DataProcessorWork(...)
@ -31,19 +29,21 @@ Below is a pseudo-code to run several works in parallel using a built-in :class:
.. code-block:: python
import lightning as L
import lightning_app as la
class DAGFlow(L.LightningFlow):
class DAGFlow(lapp.LightningFlow):
def __init__(self):
super().__init__()
...
self.train_works = L.structures.Dict(**{
"1": TrainingWork(..., parallel=True),
"2": TrainingWork(..., parallel=True),
"3": TrainingWork(..., parallel=True),
...
})
self.train_works = lapp.structures.Dict(
**{
"1": TrainingWork(..., parallel=True),
"2": TrainingWork(..., parallel=True),
"3": TrainingWork(..., parallel=True),
# ...
}
)
...
def run(self):
@ -59,12 +59,13 @@ Below is a pseudo-code to run several works in parallel using a built-in :class:
self.serve_work.run(...)
----
**********
Next Steps
Next steps
**********
Depending on your use case, you might want to check one of these out next.
.. raw:: html
<div class="display-card-container">

View File

@ -15,9 +15,7 @@ In this example, you will learn how to create a simple DAG which:
and learn how to schedule this entire process.
Find the complete example `here <https://github.com/Lightning-AI/lightning/blob/master/examples/app_dag/app.py>`_.
----
Find the complete example `here <https://github.com/PyTorchLightning/lightning/blob/master/examples/dag/app.py>`_.
**************************
Step 1: Implement your DAG
@ -35,20 +33,19 @@ First, let's define the component we need:
* Processing is responsible to execute a ``processing.py`` script.
* A collection of model work to train all models in parallel.
.. literalinclude:: ../../../examples/app_dag/app.py
.. literalinclude:: ../../../../examples/dag/app.py
:lines: 55-79
And its run method executes the steps described above.
Additionally, ``work.stop`` is used to reduce cost when running in the cloud.
.. literalinclude:: ../../../examples/app_dag/app.py
.. literalinclude:: ../../../../examples/dag/app.py
:lines: 81-108
----
*****************************
Step 2: Define the scheduling
*****************************
.. literalinclude:: ../../../examples/app_dag/app.py
.. literalinclude:: ../../../../examples/dag/app.py
:lines: 109-137

View File

@ -1,5 +1,3 @@
:orphan:
##########################
Build a Data Exploring App
##########################

View File

@ -1,5 +1,3 @@
:orphan:
###############
Build a ETL App
###############

View File

@ -1,232 +0,0 @@
import json
import os
import tarfile
import uuid
import zipfile
from dataclasses import dataclass
from pathlib import Path
from typing import List
import lightning as L
from lightning.app.storage import Drive
class FileServer(L.LightningWork):
def __init__(self, drive: Drive, base_dir: str = "file_server", chunk_size=10240, **kwargs):
"""This component uploads, downloads files to your application.
Arguments:
drive: The drive can share data inside your application.
base_dir: The local directory where the data will be stored.
chunk_size: The quantity of bytes to download/upload at once.
"""
super().__init__(
cloud_build_config=L.BuildConfig(["flask, flask-cors"]),
parallel=True,
**kwargs,
)
# 1: Attach the arguments to the state.
self.drive = drive
self.base_dir = base_dir
self.chunk_size = chunk_size
# 2: Create a folder to store the data.
os.makedirs(self.base_dir, exist_ok=True)
# 3: Keep a reference to the uploaded filenames.
self.uploaded_files = dict()
def get_filepath(self, path: str) -> str:
"""Returns file path stored on the file server."""
return os.path.join(self.base_dir, path)
def get_random_filename(self) -> str:
"""Returns a random hash for the file name."""
return uuid.uuid4().hex
def upload_file(self, file):
"""Upload a file while tracking its progress."""
# 1: Track metadata about the file
filename = file.filename
uploaded_file = self.get_random_filename()
meta_file = uploaded_file + ".meta"
self.uploaded_files[filename] = {"progress": (0, None), "done": False}
# 2: Create a stream and write bytes of
# the file to the disk under `uploaded_file` path.
with open(self.get_filepath(uploaded_file), "wb") as out_file:
content = file.read(self.chunk_size)
while content:
# 2.1 Write the file bytes
size = out_file.write(content)
# 2.2 Update the progress metadata
self.uploaded_files[filename]["progress"] = (
self.uploaded_files[filename]["progress"][0] + size,
None,
)
# 4: Read next chunk of data
content = file.read(self.chunk_size)
# 3: Update metadata that the file has been uploaded.
full_size = self.uploaded_files[filename]["progress"][0]
self.drive.put(self.get_filepath(uploaded_file))
self.uploaded_files[filename] = {
"progress": (full_size, full_size),
"done": True,
"uploaded_file": uploaded_file,
}
# 4: Write down the metadata about the file to the disk
meta = {
"original_path": filename,
"display_name": os.path.splitext(filename)[0],
"size": full_size,
"drive_path": uploaded_file,
}
with open(self.get_filepath(meta_file), "wt") as f:
json.dump(meta, f)
# 5: Put the file to the drive.
# It means other components can access get or list them.
self.drive.put(self.get_filepath(meta_file))
return meta
def list_files(self, file_path: str):
# 1: Get the local file path of the file server.
file_path = self.get_filepath(file_path)
# 2: If the file exists in the drive, transfer it locally.
if not os.path.exists(file_path):
self.drive.get(file_path)
if os.path.isdir(file_path):
result = set()
for _, _, f in os.walk(file_path):
for file in f:
if not file.endswith(".meta"):
for filename, meta in self.uploaded_files.items():
if meta["uploaded_file"] == file:
result.add(filename)
return {"asset_names": [v for v in result]}
# 3: If the filepath is a tar or zip file, list their contents
if zipfile.is_zipfile(file_path):
with zipfile.ZipFile(file_path, "r") as zf:
result = zf.namelist()
elif tarfile.is_tarfile(file_path):
with tarfile.TarFile(file_path, "r") as tf:
result = tf.getnames()
else:
raise ValueError("Cannot open archive file!")
# 4: Returns the matching files.
return {"asset_names": result}
def run(self):
# 1: Imports flask requirements.
from flask import Flask, request
from flask_cors import CORS
# 2: Create a flask app
flask_app = Flask(__name__)
CORS(flask_app)
# 3: Define the upload file endpoint
@flask_app.post("/upload_file/")
def upload_file():
"""Upload a file directly as form data."""
f = request.files["file"]
return self.upload_file(f)
@flask_app.get("/")
def list_files():
return self.list_files(str(Path(self.base_dir).resolve()))
# 5: Start the flask app while providing the `host` and `port`.
flask_app.run(host=self.host, port=self.port, load_dotenv=False)
def alive(self):
"""Hack: Returns whether the server is alive."""
return self.url != ""
import requests
from lightning import LightningWork
class TestFileServer(LightningWork):
def __init__(self, drive: Drive):
super().__init__(cache_calls=True)
self.drive = drive
def run(self, file_server_url: str, first=True):
if first:
with open("test.txt", "w") as f:
f.write("Some text.")
response = requests.post(file_server_url + "/upload_file/", files={"file": open("test.txt", "rb")})
assert response.status_code == 200
else:
response = requests.get(file_server_url)
assert response.status_code == 200
assert response.json() == {"asset_names": ["test.txt"]}
from lightning import LightningApp, LightningFlow
class Flow(LightningFlow):
def __init__(self):
super().__init__()
# 1: Create a drive to share data between works
self.drive = Drive("lit://file_server")
# 2: Create the filer server
self.file_server = FileServer(self.drive)
# 3: Create the file ser
self.test_file_server = TestFileServer(self.drive)
def run(self):
# 1: Start the file server.
self.file_server.run()
# 2: Trigger the test file server work when ready.
if self.file_server.alive():
# 3 Execute the test file server work.
self.test_file_server.run(self.file_server.url)
self.test_file_server.run(self.file_server.url, first=False)
# 4 When both execution are successful, exit the app.
if self.test_file_server.num_successes == 2:
self._exit()
def configure_layout(self):
# Expose the file_server component
# in the UI using its `/` endpoint.
return {"name": "File Server", "content": self.file_server}
from lightning.app.runners import MultiProcessRuntime
def test_file_server():
app = LightningApp(Flow())
MultiProcessRuntime(app).dispatch()
from lightning.app.testing.testing import run_app_in_cloud
def test_file_server_in_cloud():
# You need to provide the directory containing the app file.
app_dir = "docs/source-app/examples/file_server"
with run_app_in_cloud(app_dir) as (admin_page, view_page, get_logs_fn):
"""# 1. `admin_page` and `view_page` are playwright Page Objects.
# Check out https://playwright.dev/python/ doc to learn more.
# You can click the UI and trigger actions.
# 2. By calling logs = get_logs_fn(),
# you get all the logs currently on the admin page.
"""

View File

@ -1,12 +0,0 @@
.. _fileserver_example:
###################
Build a File Server
###################
**Prerequisite**: Reach :ref:`level 16+ <intermediate_level>` and read the `Drive article <https://lightning.ai/lightning-docs/glossary/storage/drive_content.html>`_.
----
.. include:: file_server_content.rst

View File

@ -1,82 +0,0 @@
*********
Objective
*********
Create a simple application where users can upload files and list the uploaded files.
----
*****************
Final Application
*****************
Here is a recording of the final application built in this example tested with pytest.
.. raw:: html
<iframe width="100%" height="290" src="https://pl-flash-data.s3.amazonaws.com/assets_lightning/file_server.mp4" frameborder="0" allowfullscreen></iframe>
----
*************
System Design
*************
In order to create such application, we need to build two components and an application:
* A **File Server Component** that gives you the ability to download or list files shared with your application. This is particularly useful when you want to trigger an ML job but your users need to provide their own data or if the user wants to download the trained checkpoints.
* A **Test File Server** Component to interact with the file server.
* An application putting everything together and its associated pytest tests.
----
********
Tutorial
********
Let's dive in on how to create such application and component:
.. raw:: html
<div class="display-card-container">
<div class="row">
.. displayitem::
:header: 1. Implement the File Server general structure
:description: Put together the shape of the component
:col_css: col-md-4
:button_link: file_server_step_1.html
:height: 180
:tag: Basic
.. displayitem::
:header: 2. Implement the File Server upload and list files methods
:description: Add the core functionalities to the component
:col_css: col-md-4
:button_link: file_server_step_2.html
:height: 180
:tag: Basic
.. displayitem::
:header: 3. Implement a File Server Testing Component
:description: Create a component to test the file server
:col_css: col-md-4
:button_link: file_server_step_3.html
:height: 180
:tag: Intermediate
.. displayitem::
:header: 4. Implement tests for the File Server component with pytest
:description: Create an app to validate the upload and list files endpoints
:col_css: col-md-4
:button_link: file_server_step_4.html
:height: 180
:tag: Intermediate
.. raw:: html
</div>
</div>

View File

@ -1,11 +0,0 @@
:orphan:
*********************************************
1. Implement the FileServer general structure
*********************************************
Let's dive in on how to create such a component with the code below.
.. literalinclude:: ./app.py
:lines: 1-44, 132-158
:emphasize-lines: 16, 51-

View File

@ -1,37 +0,0 @@
:orphan:
**********************************************************
2. Implement the File Server upload and list_files methods
**********************************************************
Let's dive in on how to implement such methods.
***************************
Implement the upload method
***************************
In this method, we are creating a stream between the uploaded file and the uploaded file stored on the file server disk.
Once the file is uploaded, we are putting the file into the :class:`~lightning_app.storage.drive.Drive`, so it becomes persistent and accessible to all components.
.. literalinclude:: ./app.py
:lines: 13, 52-100
:emphasize-lines: 49
*******************************
Implement the fist_files method
*******************************
First, in this method, we get the file in the file server filesystem, if available in the Drive. Once done, we list the the files under the provided paths and return the results.
.. literalinclude:: ./app.py
:lines: 13, 101-131
:emphasize-lines: 9
*******************
Implement utilities
*******************
.. literalinclude:: ./app.py
:lines: 13, 46-51

View File

@ -1,16 +0,0 @@
:orphan:
********************************************
3. Implement a File Server Testing Component
********************************************
Let's dive in on how to implement a testing component for a server.
This component needs to test two things:
* The **/upload_file/** endpoint by creating a file and sending its content to it.
* The **/** endpoint listing files, by validating the that previously uploaded file is present in the response.
.. literalinclude:: ./app.py
:lines: 161-183

View File

@ -1,86 +0,0 @@
:orphan:
************************************************************
4. Implement tests for the File Server component with pytest
************************************************************
Let's create a simple Lightning App (App) with our **File Server** and the **File Server Test** components.
Once the File Server is up and running, we'll execute the **test_file_server** LightningWork and when both calls are successful, we exit the App using ``self._exit``.
.. literalinclude:: ./app.py
:lines: 186-216
Simply create a ``test.py`` file with the following code and run ``pytest tests.py``
.. literalinclude:: ./app.py
:lines: 218-222
To test the App in the cloud, create a ``cloud_test.py`` file with the following code and run ``pytest cloud_test.py``. Under the hood, we are using the end-to-end testing `playwright <https://playwright.dev/python/>`_ library so you can interact with the UI.
.. literalinclude:: ./app.py
:lines: 224-
----
********************
Test the application
********************
Clone the lightning repo and run the following command:
.. code-block:: bash
pytest docs/source-app/examples/file_server/app.py --capture=no -v
----
******************
Find more examples
******************
.. raw:: html
<div class="display-card-container">
<div class="row">
.. Add callout items below this line
.. displayitem::
:header: Build a DAG
:description: Create a dag pipeline
:col_css: col-md-4
:button_link: ../dag/dag.html
:height: 150
:tag: Intermediate
.. displayitem::
:header: Build a Github Repo Script Runner
:description: Run any script on github in the cloud
:col_css: col-md-4
:button_link: ../github_repo_runner/github_repo_runner.html
:height: 150
:tag: Intermediate
.. displayitem::
:header: Build a HPO Sweeper
:description: Train multiple models with different parameters
:col_css: col-md-4
:button_link: ../hpo/hpo.html
:height: 150
:tag: Intermediate
.. displayitem::
:header: Build a Model Server
:description: Serve multiple models with different parameters
:col_css: col-md-4
:button_link: ../model_server/model_server.html
:height: 150
:tag: Intermediate
.. raw:: html
</div>
</div>

View File

@ -1 +0,0 @@
name: github_repo_runner

View File

@ -1,299 +0,0 @@
import io
import os
import subprocess
import sys
from copy import deepcopy
from functools import partial
from subprocess import Popen
from typing import Any, Dict, List, Optional
from lightning import BuildConfig, CloudCompute, LightningApp, LightningFlow
from lightning.app import structures
from lightning.app.components.python import TracerPythonScript
from lightning.app.frontend import StreamlitFrontend
from lightning.app.storage.path import Path
from lightning.app.utilities.state import AppState
class GithubRepoRunner(TracerPythonScript):
def __init__(
self,
id: str,
github_repo: str,
script_path: str,
script_args: List[str],
requirements: List[str],
cloud_compute: Optional[CloudCompute] = None,
**kwargs: Any,
):
"""The GithubRepoRunner Component clones a repo, runs a specific script with provided arguments and collect
logs.
Arguments:
id: Identified of the component.
github_repo: The Github Repo URL to clone.
script_path: The path to the script to execute.
script_args: The arguments to be provided to the script.
requirements: The python requirements tp run the script.
cloud_compute: The object to select the cloud instance.
"""
super().__init__(
script_path=script_path,
script_args=script_args,
cloud_compute=cloud_compute,
cloud_build_config=BuildConfig(requirements=requirements),
**kwargs,
)
self.id = id
self.github_repo = github_repo
self.logs = []
def run(self, *args, **kwargs):
# 1. Hack: Patch stdout so we can capture the logs.
string_io = io.StringIO()
sys.stdout = string_io
# 2: Use git command line to clone the repo.
repo_name = self.github_repo.split("/")[-1].replace(".git", "")
cwd = os.path.dirname(__file__)
subprocess.Popen(f"git clone {self.github_repo}", cwd=cwd, shell=True).wait()
# 3: Execute the parent run method of the TracerPythonScript class.
os.chdir(os.path.join(cwd, repo_name))
super().run(*args, **kwargs)
# 4: Get all the collected logs and add them to the state.
# This isn't optimal as heavy, but works for this demo purpose.
self.logs = string_io.getvalue()
string_io.close()
def configure_layout(self):
return {"name": self.id, "content": self}
class PyTorchLightningGithubRepoRunner(GithubRepoRunner):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.best_model_path = None
self.best_model_score = None
def configure_tracer(self):
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import Callback
tracer = super().configure_tracer()
class TensorboardServerLauncher(Callback):
def __init__(self, work):
# The provided `work` is the
# current ``PyTorchLightningScript`` work.
self.w = work
def on_train_start(self, trainer, *_):
# Add `host` and `port` for tensorboard to work in the cloud.
cmd = f"tensorboard --logdir='{trainer.logger.log_dir}'"
server_args = f"--host {self.w.host} --port {self.w.port}"
Popen(cmd + " " + server_args, shell=True)
def trainer_pre_fn(self, *args, work=None, **kwargs):
# Intercept Trainer __init__ call
# and inject a ``TensorboardServerLauncher`` component.
kwargs["callbacks"].append(TensorboardServerLauncher(work))
return {}, args, kwargs
# 5. Patch the `__init__` method of the Trainer
# to inject our callback with a reference to the work.
tracer.add_traced(Trainer, "__init__", pre_fn=partial(trainer_pre_fn, work=self))
return tracer
def on_after_run(self, end_script_globals):
import torch
# 1. Once the script has finished to execute,
# we can collect its globals and access any objects.
trainer = end_script_globals["cli"].trainer
checkpoint_callback = trainer.checkpoint_callback
lightning_module = trainer.lightning_module
# 2. From the checkpoint_callback,
# we are accessing the best model weights
checkpoint = torch.load(checkpoint_callback.best_model_path)
# 3. Load the best weights and torchscript the model.
lightning_module.load_state_dict(checkpoint["state_dict"])
lightning_module.to_torchscript(f"{self.name}.pt")
# 4. Use lightning.app.storage.Pathto create a reference to the
# torch scripted model. In the cloud with multiple machines,
# by simply passing this reference to another work,
# it triggers automatically a file transfer.
self.best_model_path = Path(f"{self.name}.pt")
# 5. Keep track of the metrics.
self.best_model_score = float(checkpoint_callback.best_model_score)
class KerasGithubRepoRunner(GithubRepoRunner):
"""Left to the users to implement."""
class TensorflowGithubRepoRunner(GithubRepoRunner):
"""Left to the users to implement."""
GITHUB_REPO_RUNNERS = {
"PyTorch Lightning": PyTorchLightningGithubRepoRunner,
"Keras": KerasGithubRepoRunner,
"Tensorflow": TensorflowGithubRepoRunner,
}
class Flow(LightningFlow):
def __init__(self):
super().__init__()
# 1: Keep track of the requests within the state
self.requests = []
# 2: Create a dictionary of components.
self.ws = structures.Dict()
def run(self):
# Iterate continuously over all requests
for request_id, request in enumerate(self.requests):
self._handle_request(request_id, deepcopy(request))
def _handle_request(self, request_id: int, request: Dict):
# 1: Create a name and find selected framework
name = f"w_{request_id}"
ml_framework = request["train"].pop("ml_framework")
# 2: If the component hasn't been created yet, create it.
if name not in self.ws:
work_cls = GITHUB_REPO_RUNNERS[ml_framework]
work = work_cls(id=request["id"], **request["train"])
self.ws[name] = work
# 3: Run the component
self.ws[name].run()
# 4: Once the component has finished,
# add metadata to the original request for the UI.
if self.ws[name].best_model_path:
request = self.requests[request_id]
request["best_model_score"] = self.ws[name].best_model_score
request["best_model_path"] = self.ws[name].best_model_path
def configure_layout(self):
# Create a StreamLit UI for the user to run his Github Repo.
return StreamlitFrontend(render_fn=render_fn)
def page_1__create_new_run(state):
import streamlit as st
st.markdown("# Create a new Run 🎈")
# 1: Collect arguments from the users
id = st.text_input("Name your run", value="my_first_run")
github_repo = st.text_input(
"Enter a Github Repo URL", value="https://github.com/Lightning-AI/lightning-quick-start.git"
)
default_script_args = "--trainer.max_epochs=5 --trainer.limit_train_batches=4 --trainer.limit_val_batches=4 --trainer.callbacks=ModelCheckpoint --trainer.callbacks.monitor=val_acc"
default_requirements = "torchvision, pytorch_lightning, jsonargparse[signatures]"
script_path = st.text_input("Enter your script to run", value="train_script.py")
script_args = st.text_input("Enter your base script arguments", value=default_script_args)
requirements = st.text_input("Enter your requirements", value=default_requirements)
ml_framework = st.radio("Select your ML Training Frameworks", options=["PyTorch Lightning", "Keras", "Tensorflow"])
if ml_framework not in ("PyTorch Lightning"):
st.write(f"{ml_framework} isn't supported yet.")
return
clicked = st.button("Submit")
# 2: If clicked, create a new request.
if clicked:
new_request = {
"id": id,
"train": {
"github_repo": github_repo,
"script_path": script_path,
"script_args": script_args.split(" "),
"requirements": requirements.split(" "),
"ml_framework": ml_framework,
},
}
# 3: IMPORTANT: Add a new request to the state in-place.
# The flow receives the UI request and dynamically create
# and run the associated work from the request information.
state.requests = state.requests + [new_request]
def page_2__view_run_lists(state):
import streamlit as st
st.markdown("# Run Lists 🎈")
# 1: Iterate through all the requests in the state.
for i, r in enumerate(state.requests):
i = str(i)
# 2: Display information such as request, logs, work state, model score.
work = state._state["structures"]["ws"]["works"][f"w_{i}"]
with st.expander(f"Expand to view Run {i}", expanded=False):
if st.checkbox(f"Expand to view your configuration", key=i):
st.json(r)
if st.checkbox(f"Expand to view logs", key=i):
st.code(body=work["vars"]["logs"])
if st.checkbox(f"Expand to view your work state", key=i):
work["vars"].pop("logs")
st.json(work)
best_model_score = r.get("best_model_score", None)
if best_model_score:
if st.checkbox(f"Expand to view your run performance", key=i):
st.json({"best_model_score": best_model_score, "best_model_path": r.get("best_model_path")})
def page_3__view_app_state(state):
import streamlit as st
st.markdown("# App State 🎈")
st.write(state._state)
def render_fn(state: AppState):
import streamlit as st
page_names_to_funcs = {
"Create a new Run": partial(page_1__create_new_run, state=state),
"View your Runs": partial(page_2__view_run_lists, state=state),
"View the App state": partial(page_3__view_app_state, state=state),
}
selected_page = st.sidebar.selectbox("Select a page", page_names_to_funcs.keys())
page_names_to_funcs[selected_page]()
class RootFlow(LightningFlow):
def __init__(self):
super().__init__()
# Create the flow
self.flow = Flow()
def run(self):
# Run the flow
self.flow.run()
def configure_layout(self):
# 1: Add the main StreamLit UI
selection_tab = [
{
"name": "Run your Github Repo",
"content": self.flow,
}
]
# 2: Add a new tab whenever a new work is dynamically created
run_tabs = [e.configure_layout() for e in self.flow.ws.values()]
# 3: Returns the list of tabs.
return selection_tab + run_tabs
app = LightningApp(RootFlow())

View File

@ -1,13 +0,0 @@
.. _github_repo_script_runner_example:
#################################
Build a Github Repo Script Runner
#################################
**Audience:** Users that want to create interactive applications which runs Github Repo in the cloud at any scale for multiple users.
**Prerequisite**: Reach :ref:`level 16+ <intermediate_level>` and read the docstring of of :class:`~lightning_app.components.python.tracer.TracerPythonScript` component.
----
.. include:: github_repo_runner_content.rst

View File

@ -1,98 +0,0 @@
*********
Objective
*********
Create a simple application where users can enter information in a UI to run a given PyTorch Lightning Script from a given Github Repo with optionally some extra python requirements and arguments.
Furthermore, the users should be able to monitor their training progress in real-time, view the logs, and get the best-monitored metric and associated checkpoint for their models.
----
*****************
Final Application
*****************
Here is a recording of the final application built in this example. The example is around 200 lines in total and should give you a great foundation to build your own Lightning App.
.. raw:: html
<video id="background-video" autoplay loop muted controls poster="https://pl-flash-data.s3.amazonaws.com/assets_lightning/github_app.png" width="100%">
<source src="https://pl-flash-data.s3.amazonaws.com/assets_lightning/github_app.mp4" type="video/mp4" width="100%">
</video>
----
*************
System Design
*************
In order to create such application, we need to build several components:
* A GithubRepoRunner Component that clones a repo, runs a specific script with provided arguments and collect logs.
* A PyTorch Lightning GithubRepoRunner Component that augments the GithubRepoRunner component to track PyTorch Lightning Trainer.
* A UI for the users to provide to trigger dynamically a new execution.
* A Flow to dynamically create GithubRepoRunner once a user submits information from the UI.
Let's dive in on how to create such a component.
----
********
Tutorial
********
.. raw:: html
<div class="display-card-container">
<div class="row">
.. displayitem::
:header: 1. Implement the GithubRepoRunner Component
:description: Clone and execute script from a GitHub Repo.
:col_css: col-md-4
:button_link: github_repo_runner_step_1.html
:height: 180
:tag: Intermediate
.. displayitem::
:header: 2. Implement the PyTorch Lightning GithubRepoRunner Component
:description: Automate PyTorch Lightning execution
:col_css: col-md-4
:button_link: github_repo_runner_step_2.html
:height: 180
:tag: Advanced
.. displayitem::
:header: 3. Implement the Flow to manage user requests
:description: Dynamically create GithubRepoRunner
:col_css: col-md-4
:button_link: github_repo_runner_step_3.html
:height: 180
:tag: Intermediate
.. displayitem::
:header: 4. Implement the UI with StreamLit
:description: Several pages application
:col_css: col-md-4
:button_link: github_repo_runner_step_4.html
:height: 180
:tag: Intermediate
.. displayitem::
:header: 5. Putting everything together
:description:
:col_css: col-md-4
:button_link: github_repo_runner_step_5.html
:height: 180
:tag: Intermediate
.. raw:: html
</div>
</div>

View File

@ -1,62 +0,0 @@
:orphan:
*******************************************
1. Implement the GithubRepoRunner Component
*******************************************
The GithubRepoRunner Component clones a repo, runs a specific script with provided arguments and collect logs.
Let's dive in on how to create such a component with the code below.
.. literalinclude:: ./app.py
:lines: -72
----
********
Tutorial
********
.. raw:: html
<div class="display-card-container">
<div class="row">
.. displayitem::
:header: 2. Implement the PyTorch Lightning GithubRepoRunner Component
:description: Automate PyTorch Lightning execution
:col_css: col-md-4
:button_link: github_repo_runner_step_2.html
:height: 180
:tag: Advanced
.. displayitem::
:header: 3. Implement the Flow to manage user requests
:description: Dynamically create GithubRepoRunner
:col_css: col-md-4
:button_link: github_repo_runner_step_3.html
:height: 180
:tag: Intermediate
.. displayitem::
:header: 4. Implement the UI with StreamLit
:description: Several pages application
:col_css: col-md-4
:button_link: github_repo_runner_step_4.html
:height: 180
:tag: Intermediate
.. displayitem::
:header: 5. Putting everything together
:description:
:col_css: col-md-4
:button_link: github_repo_runner_step_5.html
:height: 180
:tag: Intermediate
.. raw:: html
</div>
</div>

View File

@ -1,68 +0,0 @@
:orphan:
*************************************************************
2. Implement the PyTorch Lightning GithubRepoRunner Component
*************************************************************
The PyTorch Lightning GithubRepoRunner Component subclasses the GithubRepoRunner but tailors the execution experience to PyTorch Lightning.
As a matter of fact, this component adds two primary tailored features for PyTorch Lightning users:
* It injects dynamically a custom callback ``TensorboardServerLauncher`` in the PyTorch Lightning Trainer to start a tensorboard server so it can be exposed in Lightning App UI.
* Once the script has run, the ``on_after_run`` hook of the :class:`~lightning_app.components.python.tracer.TracerPythonScript` is invoked with the script globals, meaning we can collect anything we need. In particular, we are reloading the best model, torch scripting it, and storing its path in the state alongside the best metric score.
Let's dive in on how to create such a component with the code below.
.. literalinclude:: ./app.py
:lines: 75-136
----
********
Tutorial
********
.. raw:: html
<div class="display-card-container">
<div class="row">
.. displayitem::
:header: 1. Implement the GithubRepoRunner Component
:description: Clone and execute script from a GitHub Repo.
:col_css: col-md-4
:button_link: github_repo_runner_step_1.html
:height: 180
:tag: Intermediate
.. displayitem::
:header: 3. Implement the Flow to manage user requests
:description: Dynamically create GithubRepoRunner
:col_css: col-md-4
:button_link: github_repo_runner_step_3.html
:height: 180
:tag: Intermediate
.. displayitem::
:header: 4. Implement the UI with StreamLit
:description: Several pages application
:col_css: col-md-4
:button_link: github_repo_runner_step_4.html
:height: 180
:tag: Intermediate
.. displayitem::
:header: 5. Putting everything together
:description:
:col_css: col-md-4
:button_link: github_repo_runner_step_5.html
:height: 180
:tag: Intermediate
.. raw:: html
</div>
</div>

View File

@ -1,62 +0,0 @@
:orphan:
*********************************************
3. Implement the Flow to manage user requests
*********************************************
In step 1 and 2, we have implemented ``GithubRepoRunner`` and ``PyTorchLightningGithubRepoRunner`` components.
Now, we are going to create a component to dynamically handle user requests.
Let's dive in on how to create such a component with the code below.
.. literalinclude:: ./app.py
:lines: 138-187
----
********
Tutorial
********
.. raw:: html
<div class="display-card-container">
<div class="row">
.. displayitem::
:header: 1. Implement the GithubRepoRunner Component
:description: Clone and execute script from a GitHub Repo.
:col_css: col-md-4
:button_link: github_repo_runner_step_1.html
:height: 180
:tag: Intermediate
.. displayitem::
:header: 2. Implement the PyTorch Lightning GithubRepoRunner Component
:description: Automate PyTorch Lightning execution
:col_css: col-md-4
:button_link: github_repo_runner_step_2.html
:height: 180
:tag: Advanced
.. displayitem::
:header: 4. Implement the UI with StreamLit
:description: Several pages application
:col_css: col-md-4
:button_link: github_repo_runner_step_4.html
:height: 180
:tag: Intermediate
.. displayitem::
:header: 5. Putting everything together
:description:
:col_css: col-md-4
:button_link: github_repo_runner_step_5.html
:height: 180
:tag: Intermediate
.. raw:: html
</div>
</div>

View File

@ -1,93 +0,0 @@
:orphan:
**********************************
4. Implement the UI with StreamLit
**********************************
In step 3, we have implemented a flow that dynamically creates a Work when a new request is added to the requests list.
From the UI, we create 3 pages with `StreamLit <https://streamlit.io/>`_:
* **Page 1**: Create a form to add a new request to the flow state **requests**.
* **Page 2**: Iterate through all the requests and display associated information.
* **Page 3**: Display the entire App State.
****************
Render All Pages
****************
.. literalinclude:: ./app.py
:lines: 263-274
******
Page 1
******
.. literalinclude:: ./app.py
:lines: 189-231
:emphasize-lines: 43
******
Page 2
******
.. literalinclude:: ./app.py
:lines: 233-255
******
Page 3
******
.. literalinclude:: ./app.py
:lines: 257-261
----
********
Tutorial
********
.. raw:: html
<div class="display-card-container">
<div class="row">
.. displayitem::
:header: 1. Implement the GithubRepoRunner Component
:description: Clone and execute script from a GitHub Repo.
:col_css: col-md-4
:button_link: github_repo_runner_step_1.html
:height: 180
:tag: Intermediate
.. displayitem::
:header: 2. Implement the PyTorch Lightning GithubRepoRunner Component
:description: Automate PyTorch Lightning execution
:col_css: col-md-4
:button_link: github_repo_runner_step_2.html
:height: 180
:tag: Advanced
.. displayitem::
:header: 3. Implement the Flow to manage user requests
:description: Dynamically create GithubRepoRunner
:col_css: col-md-4
:button_link: github_repo_runner_step_3.html
:height: 180
:tag: Intermediate
.. displayitem::
:header: 5. Putting everything together
:description:
:col_css: col-md-4
:button_link: github_repo_runner_step_5.html
:height: 180
:tag: Intermediate
.. raw:: html
</div>
</div>

View File

@ -1,77 +0,0 @@
:orphan:
******************************
5. Putting everything together
******************************
Let's dive in on how to create such a component with the code below.
.. literalinclude:: ./app.py
:lines: 277-
*******************
Run the application
*******************
Clone the lightning repo and run the following command:
.. code-block:: bash
lightning run app docs/source-app/examples/github_repo_runner/app.py
Add **--cloud** to run this application in the cloud.
.. code-block:: bash
lightning run app docs/source-app/examples/github_repo_runner/app.py --cloud
----
******************
Find more examples
******************
.. raw:: html
<div class="display-card-container">
<div class="row">
.. Add callout items below this line
.. displayitem::
:header: Build a DAG
:description: Create a dag pipeline
:col_css: col-md-4
:button_link: ../dag/dag.html
:height: 150
:tag: Intermediate
.. displayitem::
:header: Build a File Server
:description: Train multiple models with different parameters
:col_css: col-md-4
:button_link: ../file_server/file_server.html
:height: 150
:tag: Intermediate
.. displayitem::
:header: Build a HPO Sweeper
:description: Train multiple models with different parameters
:col_css: col-md-4
:button_link: ../hpo/hpo.html
:height: 150
:tag: Intermediate
.. displayitem::
:header: Build a Model Server
:description: Serve multiple models with different parameters
:col_css: col-md-4
:button_link: ../model_server/model_server.html
:height: 150
:tag: Intermediate
.. raw:: html
</div>
</div>

View File

@ -1,50 +0,0 @@
:orphan:
#################
Hands-on Examples
#################
.. raw:: html
<div class="display-card-container">
<div class="row">
.. displayitem::
:header: Build a DAG
:description: Learn how to orchestrate workflows
:col_css: col-md-6
:button_link: dag/dag.html
:height: 180
.. displayitem::
:header: Build a File Server
:description: Learn how to upload and download files
:col_css: col-md-6
:button_link: file_server/file_server.html
:height: 180
.. displayitem::
:header: Build a Github Repo Script Runner
:description: Learn how to configure dynamic execution from the UI
:col_css: col-md-6
:button_link: github_repo_runner/github_repo_runner.html
:height: 180
.. displayitem::
:header: Build a HPO Sweeper
:description: Learn how to scale your training
:col_css: col-md-6
:button_link: hpo/hpo.html
:height: 180
.. displayitem::
:header: Build a Model Server
:description: Learn how to server your models
:col_css: col-md-6
:button_link: model_server_app_content.html
:height: 180
.. raw:: html
</div>
</div>

View File

@ -1,41 +0,0 @@
:orphan:
#######################################
Implement an HPO component from scratch
#######################################
**Audience:** Users who want to understand how to implement sweep training from scratch.
**Prereqs:** Finish Intermediate Level.
----
********
Examples
********
.. raw:: html
<div class="display-card-container">
<div class="row">
.. displayitem::
:header: Step 1: Implement an HPO component with the Lightning Works.
:description: Learn how it works under the hood
:col_css: col-md-4
:button_link: hpo_wo.html
:height: 180
:tag: Intermediate
.. displayitem::
:header: Step 2: Add the flow to your HPO component
:description: Learn how it works under the hood
:col_css: col-md-4
:button_link: hpo_wi.html
:height: 180
:tag: Intermediate
.. raw:: html
</div>
</div>

View File

@ -1,80 +0,0 @@
.. hpo:
.. _hpo_example:
#######################################################
Build a Lightning Hyperparameter Optimization (HPO) App
#######################################################
*******************
A bit of background
*******************
Traditionally, developing machine learning (ML) products requires choosing among a large space of
hyperparameters while creating and training the ML models. Hyperparameter optimization
(HPO) aims to find a well-performing hyperparameter configuration for a given ML model
on a dataset at hand, including the ML model,
its hyperparameters, and other data processing steps.
HPOs free the human expert from a tedious and error-prone, manual hyperparameter tuning process.
As an example, in the famous `scikit-learn <https://scikit-learn.org/stable/>`_ library,
hyperparameters are passed as arguments to the constructor of
the estimator classes such as ``C`` kernel for
`Support Vector Classifier <https://scikit-learn.org/stable/modules/classes.html?highlight=svm#module-sklearn.svm>`_, etc.
It is possible and recommended to search the hyperparameter space for the best validation score.
An HPO search consists of:
* an objective method
* a defined parameter space
* a method for searching or sampling candidates
A naive method for sampling candidates is grid search, which exhaustively considers all
hyperparameter combinations from a user-specified grid.
Fortunately, HPO is an active area of research, and many methods have been developed to
optimize the time required to get strong candidates.
In the following tutorial, you will learn how to use Lightning together with `Optuna <https://optuna.org/>`_.
`Optuna <https://optuna.org/>`_ is an open source HPO framework to automate hyperparameter search.
Out-of-the-box, it provides efficient algorithms to search large spaces and prune unpromising trials for faster results.
First, you will learn about the best practices on how to implement HPO without the Lightning Framework.
Secondly, we will dive into a working HPO application with Lightning, and finally create a neat
`HiPlot UI <https://facebookresearch.github.io/hiplot/_static/demo/demo_basic_usage.html?hip.filters=%5B%5D&hip.color_by=%22dropout%22&hip.PARALLEL_PLOT.order=%5B%22uid%22%2C%22dropout%22%2C%22lr%22%2C%22loss%22%2C%22optimizer%22%5D>`_
for our application.
----
********
Examples
********
.. raw:: html
<div class="display-card-container">
<div class="row">
.. displayitem::
:header: Re-use an existing HPO component
:description: Learn how to use Lightning HPO with your app.
:col_css: col-md-4
:button_link: lightning_hpo.html
:height: 180
:tag: Basic
.. displayitem::
:header: Implement an HPO component from scratch
:description: Learn how it works under the hood
:col_css: col-md-4
:button_link: build_from_scratch.html
:height: 180
:tag: Intermediate
.. raw:: html
</div>
</div>

View File

@ -1,57 +0,0 @@
:orphan:
##########################################
Step 2: Add the flow to your HPO component
##########################################
**Audience:** Users who want to understand how to implement HPO training from scratch with Lightning.
**Prereqs:** Level 17+
----
Thanks to the simplified version, you should have a good grasp on how to implement HPO with Optuna.
As the :class:`~lightning_app.core.app.LightningApp` handles the Infinite Loop,
it has been removed from within the run method of the HPORootFlow.
However, the ``run`` method code is the same as the one defined above.
.. literalinclude:: ../../../examples/app_hpo/app_wo_ui.py
:language: python
The ``ObjectiveWork`` is sub-classing
the built-in :class:`~lightning_app.components.python.TracerPythonScript`
which enables launching scripts and more.
.. literalinclude:: ../../../examples/app_hpo/objective.py
:language: python
Finally, let's add the ``HiPlotFlow`` component to visualize our hyperparameter optimization.
The metric and sampled parameters are added to the ``self.hi_plot.data`` list, enabling
updates to the dashboard in near-realtime.
.. literalinclude:: ../../../examples/app_hpo/app_wi_ui.py
:diff: ../../../examples/app_hpo/app_wo_ui.py
Here is the associated code with the ``HiPlotFlow`` component.
In the ``render_fn`` method, the state of the ``HiPlotFlow`` is passed.
The ``state.data`` is accessed as it contains the metric and sampled parameters.
.. literalinclude:: ../../../examples/app_hpo/hyperplot.py
Run the HPO application with the following command:
.. code-block:: console
$ lightning run app examples/app_hpo/app_wi_ui.py
INFO: Your app has started. View it in your browser: http://127.0.0.1:7501/view
{0: ..., 1: ..., ..., 5: ...}
Here is what the UI looks like when launched:
.. image:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/hpo_ui_2.gif
:width: 100 %
:alt: Alternative text

View File

@ -1,57 +0,0 @@
:orphan:
###########################################################
Step 1: Implement an HPO component with the Lightning Works
###########################################################
**Audience:** Users who want to understand how to implement HPO training from scratch.
**Prereqs:** Level 17+
----
In the example below, we are emulating the Lightning Infinite Loop.
We are assuming we have already defined an ``ObjectiveWork`` component which is responsible to run the objective method and track the metric through its state.
.. literalinclude:: ./hpo.py
:language: python
We are running ``TOTAL_TRIALS`` trials by series of ``SIMULTANEOUS_TRIALS`` trials.
When starting, ``TOTAL_TRIALS`` ``ObjectiveWork`` are created.
The entire code runs within an infinite loop as it would within Lightning.
When iterating through the Works, if the current ``objective_work`` hasn't started,
some new parameters are sampled from the Optuna Study with our custom distributions
and then passed to run method of the ``objective_work``.
The condition ``not objective_work.has_started`` will be ``False`` once ``objective_work.run()`` starts.
Also, the second condition ``objective_work.has_told_study`` will be ``True`` when the metric
is defined within the state of the Work and has been shared with the study.
Finally, once the current ``SIMULTANEOUS_TRIALS`` have both registered their
metric to the Optuna Study, simply increment ``NUM_TRIALS`` by ``SIMULTANEOUS_TRIALS`` to launch the next trials.
Below, you can find the simplified version of the ``ObjectiveWork`` where the metric is randomly sampled using NumPy.
In a realistic use case, the Work executes some user-defined code.
.. literalinclude:: ./objective.py
:language: python
Here are the logs produced when running the application above:
.. code-block:: console
$ python docs/source-app/tutorials/hpo/hpo.py
INFO: Your app has started. View it in your browser: http://127.0.0.1:7501/view
# After you have clicked `run` on the UI.
[I 2022-03-01 12:32:50,050] A new study created in memory with name: ...
{0: 13.994859806481264, 1: 59.866743330127825, ..., 5: 94.65919769609225}
The following animation shows how this application works in the cloud:
.. image:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/hpo.gif
:alt: Animation showing how to HPO works UI in a distributed manner.

View File

@ -1,99 +0,0 @@
:orphan:
################################
Re-use an existing HPO component
################################
**Audience:** Users who want to easily get started with HPO training.
**Prereqs:** Level 8+
----
*********************
Install Lightning HPO
*********************
Lightning HPO provides a Pythonic implementation for Scalable Hyperparameter Tuning
and relies on Optuna for providing state-of-the-art sampling hyper-parameters algorithms and efficient trial pruning strategies.
Find the `Lightning Sweeper App <https://lightning.ai/app/8FOWcOVsdf-Lightning%20Sweeper>`_ on `lightning.ai <https://lightning.ai/>`_ and its associated `Github repo <https://github.com/Lightning-AI/LAI-lightning-hpo-App>`_.
.. code-block:: bash
lightning install app lightning/hpo
*********************
Lightning HPO Example
*********************
In this tutorial, we are going to convert `Optuna Efficient Optimization Algorithms <https://optuna.readthedocs.io/en/stable/tutorial/10_key_features/003_efficient_optimization_algorithms.html#sphx-glr-tutorial-10-key-features-003-efficient-optimization-algorithms-py>`_ into a Lightning App.
The Optuna example optimizes the value (example: learning-rate) of a ``SGDClassifier`` from ``sklearn`` trained over the `Iris Dataset <https://archive.ics.uci.edu/ml/datasets/iris>`_.
.. literalinclude:: ./optuna_reference.py
:language: python
As you can see, several trials were pruned (stopped) before they finished all of the iterations.
.. code-block:: console
A new study created in memory with name: no-name-4423c12c-22e1-4eaf-ba60-caf0020403c6
Trial 0 finished with value: 0.07894736842105265 and parameters: {'alpha': 0.00020629773477269024}. Best is trial 0 with value: 0.07894736842105265.
Trial 1 finished with value: 0.368421052631579 and parameters: {'alpha': 0.0005250149151047217}. Best is trial 0 with value: 0.07894736842105265.
Trial 2 finished with value: 0.052631578947368474 and parameters: {'alpha': 5.9086862655635784e-05}. Best is trial 2 with value: 0.052631578947368474.
Trial 3 finished with value: 0.3421052631578947 and parameters: {'alpha': 0.07177263583415294}. Best is trial 2 with value: 0.052631578947368474.
Trial 4 finished with value: 0.23684210526315785 and parameters: {'alpha': 1.7451874636151302e-05}. Best is trial 2 with value: 0.052631578947368474.
Trial 5 pruned.
Trial 6 finished with value: 0.10526315789473684 and parameters: {'alpha': 1.4943994864178649e-05}. Best is trial 2 with value: 0.052631578947368474.
Trial 7 pruned.
Trial 8 pruned.
Trial 9 pruned.
Trial 10 pruned.
Trial 11 pruned.
Trial 12 pruned.
Trial 13 pruned.
Trial 14 pruned.
Trial 15 pruned.
Trial 16 finished with value: 0.07894736842105265 and parameters: {'alpha': 0.006166329613687364}. Best is trial 2 with value: 0.052631578947368474.
Trial 17 pruned.
Trial 18 pruned.
Trial 19 pruned.
The example above has been re-organized in order to run as Lightning App.
.. literalinclude:: ./lightning_hpo_target.py
:language: python
Now, your code can run at scale in the cloud, if needed, and it has a simple neat UI.
.. figure:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/lightning_hpo_optimizer.png
:alt: Lightning App UI
:width: 100 %
As you can see, several trials were pruned (stopped) before they finished all of the iterations. Same as when using pure optuna.
.. code-block:: console
A new study created in memory with name: no-name-a93d848e-a225-4df3-a9c3-5f86680e295d
Trial 0 finished with value: 0.23684210526315785 and parameters: {'alpha': 0.006779437004523296}. Best is trial 0 with value: 0.23684210526315785.
Trial 1 finished with value: 0.07894736842105265 and parameters: {'alpha': 0.008936151407006062}. Best is trial 1 with value: 0.07894736842105265.
Trial 2 finished with value: 0.052631578947368474 and parameters: {'alpha': 0.0035836511240528008}. Best is trial 2 with value: 0.052631578947368474.
Trial 3 finished with value: 0.052631578947368474 and parameters: {'alpha': 0.0005393218926409795}. Best is trial 2 with value: 0.052631578947368474.
Trial 4 finished with value: 0.1578947368421053 and parameters: {'alpha': 6.572557493358585e-05}. Best is trial 2 with value: 0.052631578947368474.
Trial 5 finished with value: 0.02631578947368418 and parameters: {'alpha': 0.0013953760106345603}. Best is trial 5 with value: 0.02631578947368418.
Trail 6 pruned.
Trail 7 pruned.
Trail 8 pruned.
Trail 9 pruned.
Trial 10 finished with value: 0.07894736842105265 and parameters: {'alpha': 0.00555435554783454}. Best is trial 5 with value: 0.02631578947368418.
Trail 11 pruned.
Trial 12 finished with value: 0.052631578947368474 and parameters: {'alpha': 0.025624276147153992}. Best is trial 5 with value: 0.02631578947368418.
Trial 13 finished with value: 0.07894736842105265 and parameters: {'alpha': 0.014613957457075546}. Best is trial 5 with value: 0.02631578947368418.
Trail 14 pruned.
Trail 15 pruned.
Trail 16 pruned.
Trial 17 finished with value: 0.052631578947368474 and parameters: {'alpha': 0.01028208215647372}. Best is trial 5 with value: 0.02631578947368418.
Trail 18 pruned.
Trail 19 pruned.

View File

@ -1,53 +0,0 @@
import optuna
from lightning_hpo import BaseObjective, Optimizer
from optuna.distributions import LogUniformDistribution
from sklearn import datasets
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from lightning import LightningApp, LightningFlow
class Objective(BaseObjective):
def run(self, params):
# WARNING: Don't forget to assign `params` to self,
# so they get tracked in the state.
self.params = params
iris = datasets.load_iris()
classes = list(set(iris.target))
train_x, valid_x, train_y, valid_y = train_test_split(iris.data, iris.target, test_size=0.25, random_state=0)
clf = SGDClassifier(alpha=params["alpha"])
for step in range(100):
clf.partial_fit(train_x, train_y, classes=classes)
intermediate_value = 1.0 - clf.score(valid_x, valid_y)
# WARNING: Assign to reports,
# so the state is instantly sent to the flow.
self.reports = self.reports + [[intermediate_value, step]]
self.best_model_score = 1.0 - clf.score(valid_x, valid_y)
def distributions(self):
return {"alpha": LogUniformDistribution(1e-5, 1e-1)}
class RootFlow(LightningFlow):
def __init__(self):
super().__init__()
self.optimizer = Optimizer(
objective_cls=Objective,
n_trials=20,
study=optuna.create_study(pruner=optuna.pruners.MedianPruner()),
)
def run(self):
self.optimizer.run()
def configure_layout(self):
return {"name": "HyperPlot", "content": self.optimizer.hi_plot}
app = LightningApp(RootFlow())

View File

@ -1,36 +0,0 @@
import logging
import sys
import optuna
from sklearn import datasets
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
def objective(trial):
iris = datasets.load_iris()
classes = list(set(iris.target))
train_x, valid_x, train_y, valid_y = train_test_split(iris.data, iris.target, test_size=0.25, random_state=0)
alpha = trial.suggest_float("alpha", 1e-5, 1e-1, log=True)
clf = SGDClassifier(alpha=alpha)
for step in range(100):
clf.partial_fit(train_x, train_y, classes=classes)
# Report intermediate objective value.
intermediate_value = 1.0 - clf.score(valid_x, valid_y)
trial.report(intermediate_value, step)
# Handle pruning based on the intermediate value.
if trial.should_prune():
raise optuna.TrialPruned()
return 1.0 - clf.score(valid_x, valid_y)
# Add stream handler of stdout to show the messages
logger = optuna.logging.get_logger("optuna")
logger.addHandler(logging.StreamHandler(sys.stdout))
study = optuna.create_study(pruner=optuna.pruners.MedianPruner())
study.optimize(objective, n_trials=20)

View File

@ -0,0 +1,3 @@
############################
Build a Model Deployment App
############################

View File

@ -1,34 +0,0 @@
from locust_component import Locust
from model_server import MLServer
from train import TrainModel
from lightning import LightningApp, LightningFlow
class TrainAndServe(LightningFlow):
def __init__(self):
super().__init__()
self.train_model = TrainModel()
self.model_server = MLServer(
name="mnist-svm",
implementation="mlserver_sklearn.SKLearnModel",
workers=8,
)
self.performance_tester = Locust(num_users=100)
def run(self):
self.train_model.run()
self.model_server.run(self.train_model.best_model_path)
if self.model_server.alive():
# The performance tester needs the model server to be up
# and running to be started, so the URL is added in the UI.
self.performance_tester.run(self.model_server.url)
def configure_layout(self):
return [
{"name": "Server", "content": self.model_server.url + "/docs"},
{"name": "Server Testing", "content": self.performance_tester},
]
app = LightningApp(TrainAndServe())

View File

@ -1,57 +0,0 @@
:orphan:
***********************************
3. Build the Load Testing Component
***********************************
Now, we are going to create a component to test the performance of your model server.
We are going to use a python performance testing tool called `Locust <https://github.com/locustio/locust>`_.
.. literalinclude:: ./locust_component.py
Finally, once the component is done, we need to crate a ``locustfile.py`` file which defines the format of the request to send to your model server.
The endpoint to hit has the following format: ``/v2/models/{MODEL_NAME}/versions/{VERSION}/infer``.
.. literalinclude:: ./locustfile.py
----
.. raw:: html
<div class="display-card-container">
<div class="row">
.. Add callout items below this line
.. displayitem::
:header: 1. Build a Train Component
:description: Train a model and store its checkpoints with SKlearn
:col_css: col-md-4
:button_link: train.html
:height: 150
:tag: Intermediate
.. displayitem::
:header: 2. Build a Model Server Component
:description: Use MLServer to server your models
:col_css: col-md-4
:button_link: model_server.html
:height: 150
:tag: Intermediate
.. displayitem::
:header: 4. Putting everything together.
:description: Ensemble the components together and run the app
:col_css: col-md-4
:button_link: putting_everything_together.html
:height: 150
:tag: basic
.. raw:: html
</div>
</div>

View File

@ -1,43 +0,0 @@
import os
import subprocess
from lightning import LightningWork
from lightning.app.utilities.packaging.build_config import BuildConfig
class Locust(LightningWork):
def __init__(self, num_users: int = 100):
"""This component checks the performance of a server. The server url is passed to its run method.
Arguments:
num_users: Number of users emulated by Locust
"""
# Note: Using the default port 8089 of Locust.
super().__init__(
port=8089,
parallel=True,
cloud_build_config=BuildConfig(requirements=["locust"]),
)
self.num_users = num_users
def run(self, load_tested_url: str):
# 1: Create the locust command line.
cmd = " ".join(
[
"locust",
"--master-host",
str(self.host),
"--master-port",
str(self.port),
"--host",
str(load_tested_url),
"-u",
str(self.num_users),
]
)
# 2: Create another process with locust
process = subprocess.Popen(cmd, cwd=os.path.dirname(__file__), shell=True)
# 3: Wait for the process to finish. As locust is a server,
# this waits infinitely or if killed.
process.wait()

View File

@ -1,41 +0,0 @@
from locust import FastHttpUser, task
from sklearn import datasets
from sklearn.model_selection import train_test_split
class HelloWorldUser(FastHttpUser):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._prepare_inference_request()
@task
def predict(self):
self.client.post(
"/v2/models/mnist-svm/versions/v0.0.1/infer",
json=self.inference_request,
)
def _prepare_inference_request(self):
# The digits dataset
digits = datasets.load_digits()
# To apply a classifier on this data,
# we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))
# Split data into train and test subsets
_, X_test, _, _ = train_test_split(data, digits.target, test_size=0.5, shuffle=False)
x_0 = X_test[0:1]
self.inference_request = {
"inputs": [
{
"name": "predict",
"shape": x_0.shape,
"datatype": "FP32",
"data": x_0.tolist(),
}
]
}

View File

@ -1,90 +0,0 @@
import json
import subprocess
from lightning import LightningWork
from lightning.app.storage import Path
from lightning.app.utilities.packaging.build_config import BuildConfig
# ML_SERVER_URL = https://github.com/SeldonIO/MLServer
class MLServer(LightningWork):
"""This components uses SeldonIO MLServer library.
The model endpoint: /v2/models/{MODEL_NAME}/versions/{VERSION}/infer.
Arguments:
name: The name of the model for the endpoint.
implementation: The model loader class.
Example: "mlserver_sklearn.SKLearnModel".
Learn more here: $ML_SERVER_URL/tree/master/runtimes
workers: Number of server worker.
"""
def __init__(
self,
name: str,
implementation: str,
workers: int = 1,
**kwargs,
):
super().__init__(
parallel=True,
cloud_build_config=BuildConfig(
requirements=["mlserver", "mlserver-sklearn"],
),
**kwargs,
)
# 1: Collect the config's.
self.settings = {
"debug": True,
"parallel_workers": workers,
}
self.model_settings = {
"name": name,
"implementation": implementation,
}
# 2: Keep track of latest version
self.version = 1
def run(self, model_path: Path):
"""The model is downloaded when the run method is invoked.
Arguments:
model_path: The path to the trained model.
"""
# 1: Use the host and port at runtime so it works in the cloud.
# $ML_SERVER_URL/blob/master/mlserver/settings.py#L50
if self.version == 1:
# TODO: Reload the next version model of the model.
self.settings.update({"host": self.host, "http_port": self.port})
with open("settings.json", "w") as f:
json.dump(self.settings, f)
# 2. Store the model-settings
# $ML_SERVER_URL/blob/master/mlserver/settings.py#L120
self.model_settings["parameters"] = {
"version": f"v0.0.{self.version}",
"uri": str(model_path.absolute()),
}
with open("model-settings.json", "w") as f:
json.dump(self.model_settings, f)
# 3. Launch the Model Server
subprocess.Popen("mlserver start .", shell=True)
# 4. Increment the version for the next time run is called.
self.version += 1
else:
# TODO: Load the next model and unload the previous one.
pass
def alive(self):
# Current hack, when the url is available,
# the server is up and running.
# This would be cleaned out and automated.
return self.url != ""

View File

@ -1,48 +0,0 @@
:orphan:
***********************************
2. Build the Model Server Component
***********************************
In the code below, we use `MLServer <https://github.com/SeldonIO/MLServer>`_ which aims to provide an easy way to start serving your machine learning models through a REST and gRPC interface,
fully compliant with KFServing's V2 Dataplane spec.
.. literalinclude:: ./model_server.py
----
.. raw:: html
<div class="display-card-container">
<div class="row">
.. Add callout items below this line
.. displayitem::
:header: 1. Build a Train Component
:description: Train a model and store its checkpoints with SKlearn
:col_css: col-md-4
:button_link: train.html
:height: 150
:tag: Intermediate
.. displayitem::
:header: 3. Build a Load Testing Component
:description: Use Locust to test your model servers
:col_css: col-md-4
:button_link: load_testing.html
:height: 150
:tag: Intermediate
.. displayitem::
:header: 4. Putting everything together.
:description: Ensemble the components together and run the app
:col_css: col-md-4
:button_link: putting_everything_together.html
:height: 150
:tag: basic
.. raw:: html
</div>
</div>

View File

@ -1,15 +0,0 @@
:orphan:
.. _model_server_example:
####################
Build a Model Server
####################
**Audience:** Users who want to serve their trained models.
**Prerequisite**: Reach :ref:`level 16+ <intermediate_level>`.
----
.. include:: model_server_app_content.rst

View File

@ -1,84 +0,0 @@
*********
Objective
*********
Create a simple application that trains and serves a `Sklearn <https://scikit-learn.org/stable/>`_ machine learning model with `MLServer from SeldonIO <https://github.com/SeldonIO/MLServer>`_
----
*****************
Final Application
*****************
Here is a gif of the final application built in this example.
.. figure:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/ml_server_2.gif
----
*************
System Design
*************
In order to create such application, we need to build several components:
* A Model Train Component that trains a model and provides its trained weights
* A Model Server Component that serves as an API endpoint for the model generated by the **Model Train Component**.
* A Load Testing Component that tests the model server works as expected. This could be used to CI/CD the performance of newly generated models (left to the users).
.. figure:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/model_server_app_2.png
Let's dive into the tutorial.
----
********
Tutorial
********
.. raw:: html
<div class="display-card-container">
<div class="row">
.. Add callout items below this line
.. displayitem::
:header: 1. Build a Train Component
:description: Train a model and store its checkpoints with SKlearn
:col_css: col-md-4
:button_link: train.html
:height: 150
:tag: Intermediate
.. displayitem::
:header: 2. Build a Model Server Component
:description: Use MLServer to server your models
:col_css: col-md-4
:button_link: model_server.html
:height: 150
:tag: Intermediate
.. displayitem::
:header: 3. Build a Load Testing Component
:description: Use Locust to test your model servers
:col_css: col-md-4
:button_link: load_testing.html
:height: 150
:tag: Intermediate
.. displayitem::
:header: 4. Putting everything together.
:description: Ensemble the components together and run the app
:col_css: col-md-4
:button_link: putting_everything_together.html
:height: 150
:tag: basic
.. raw:: html
</div>
</div>

View File

@ -1,80 +0,0 @@
:orphan:
******************************
4. Putting everything together
******************************
In the code below, we put together the **TrainWork**, the **MLServer** and the **Locust** components in an ``app.py`` file.
.. literalinclude:: ./app.py
***********
Run the App
***********
To run the app, simply open a terminal and execute this command:
.. code-block:: bash
lightning run app docs/source-app/examples/model_deploy_app/app.py
Here is a gif of the UI.
.. figure:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/ml_server_2.gif
.. raw:: html
<br />
Congrats, you have finished the **Build a Model Server** example !
----
******************
Find more examples
******************
.. raw:: html
<div class="display-card-container">
<div class="row">
.. Add callout items below this line
.. displayitem::
:header: Build a DAG
:description: Create a dag pipeline
:col_css: col-md-4
:button_link: ../dag/dag.html
:height: 150
:tag: Intermediate
.. displayitem::
:header: Build a File Server
:description: Train multiple models with different parameters
:col_css: col-md-4
:button_link: ../file_server/file_server.html
:height: 150
:tag: Intermediate
.. displayitem::
:header: Build a Github Repo Script Runner
:description: Run code from the internet in the cloud
:col_css: col-md-4
:button_link: ../github_repo_runner/github_repo_runner.html
:height: 150
:tag: Intermediate
.. displayitem::
:header: Build a HPO Sweeper
:description: Train multiple models with different parameters
:col_css: col-md-4
:button_link: ../hpo/hpo.html
:height: 150
:tag: Intermediate
.. raw:: html
</div>
</div>

View File

@ -1,42 +0,0 @@
import joblib
from sklearn import datasets, svm
from sklearn.model_selection import train_test_split
from lightning import LightningWork
from lightning.app.storage import Path
class TrainModel(LightningWork):
"""This component trains a Sklearn SVC model on digits dataset."""
def __init__(self):
super().__init__()
# 1: Add element to the state.
self.best_model_path = None
def run(self):
# 2: Load the Digits
digits = datasets.load_digits()
# 3: To apply a classifier on this data,
# we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))
# 4: Create a classifier: a support vector classifier
classifier = svm.SVC(gamma=0.001)
# 5: Split data into train and test subsets
X_train, _, y_train, _ = train_test_split(data, digits.target, test_size=0.5, shuffle=False)
# 6: We learn the digits on the first half of the digits
classifier.fit(X_train, y_train)
# 7: Save the Sklearn model with `joblib`.
model_file_name = "mnist-svm.joblib"
joblib.dump(classifier, model_file_name)
# 8: Keep a reference the the generated model.
self.best_model_path = Path("mnist-svm.joblib")

View File

@ -1,49 +0,0 @@
:orphan:
****************************
1. Build the Train Component
****************************
In the code below, we create a work which trains a simple `SVC <https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html>`_ model on the digits dataset (classification).
Once the model is trained, it is saved and a reference :class:`~lightning_app.storage.path.Path` with ``best_model_path`` state attribute.
.. literalinclude:: ./train.py
----
.. raw:: html
<div class="display-card-container">
<div class="row">
.. Add callout items below this line
.. displayitem::
:header: 2. Build a Model Server Component
:description: Use MLServer to server your models
:col_css: col-md-4
:button_link: model_server.html
:height: 150
:tag: Intermediate
.. displayitem::
:header: 3. Build a Load Testing Component
:description: Use Locust to test your model servers
:col_css: col-md-4
:button_link: load_testing.html
:height: 150
:tag: Intermediate
.. displayitem::
:header: 4. Putting everything together.
:description: Ensemble the components together and run the app
:col_css: col-md-4
:button_link: putting_everything_together.html
:height: 150
:tag: basic
.. raw:: html
</div>
</div>

View File

@ -1,5 +1,3 @@
:orphan:
#########################
Build a Research Demo App
#########################

View File

@ -1,18 +0,0 @@
:orphan:
#######################
Add an Interactive Demo
#######################
.. _add_an_interactive_Demo:
**Required background:** Basic Python familiarity and complete the :ref:`install` guide.
**Goal:** We'll walk you through the 4 key steps to run a Lightning App that trains and demos a model.
.. join_slack::
:align: left
----
.. include:: go_beyond_training_content.rst

View File

@ -1,76 +0,0 @@
:orphan:
.. _build_model:
#######################
Build and Train a Model
#######################
**Required background:** Basic Python familiarity and complete the :ref:`install` guide.
**Goal:** We'll walk you through the creation of a model using PyTorch Lightning.
.. join_slack::
:align: left
----
*********************************
A simple PyTorch Lightning script
*********************************
Let's assume you already have a folder with those two files.
.. code-block:: bash
pl_project/
train.py # your own script to train your models
requirements.txt # your python requirements.
If you don't, simply create a ``pl_project`` folder with those two files and add the following `PyTorch Lightning <https://pytorch-lightning.readthedocs.io/en/latest/>`_ code in the ``train.py`` file. This code trains a simple ``AutoEncoder`` on `MNIST Dataset <https://en.wikipedia.org/wiki/MNIST_database>`_.
.. literalinclude:: ../code_samples/convert_pl_to_app/train.py
Add the following to the ``requirements.txt`` file.
.. literalinclude:: ../code_samples/convert_pl_to_app/requirements.py
Simply run the following commands in your terminal to install the requirements and train the model.
.. code-block:: bash
pip install -r requirements.txt
python train.py
Get through `PyTorch Lightning Introduction <https://pytorch-lightning.readthedocs.io/en/stable/starter/introduction.html#step-1-define-lightningmodule>`_ to learn more.
----
**********
Next Steps
**********
.. raw:: html
<br />
<div class="display-card-container">
<div class="row">
.. displayitem::
:header: Evolve a Model into an ML System
:description: Develop an App to train a model in the cloud
:col_css: col-md-6
:button_link: training_with_apps.html
:height: 180
.. displayitem::
:header: Start from a Template ML System
:description: Learn about Apps, from a template.
:col_css: col-md-6
:button_link: go_beyond_training.html
:height: 180
.. raw:: html
</div>
</div>

View File

@ -1,18 +0,0 @@
:orphan:
################################
Start from an ML system template
################################
.. _go_beyond_training:
**Required background:** Basic Python familiarity and complete the :ref:`install` guide.
**Goal:** We'll walk you through the 4 key steps to run a Lightning App that trains and demos a model.
.. join_slack::
:align: left
----
.. include:: go_beyond_training_content.rst

View File

@ -1,123 +0,0 @@
:orphan:
#####################################
Start from Ready-to-Run Template Apps
#####################################
.. _jumpstart_from_app_gallery:
Anyone can build Apps for their own use cases and promote them on the `App Gallery <https://lightning.ai/apps>`_.
In return, you can benefit from the work of others and get started faster by re-using a ready-to-run App close to your own use case.
.. join_slack::
:align: left
----
*************
User Workflow
*************
#. Visit the `App Gallery <https://lightning.ai/apps>`_ and look for an App close to your own use case.
.. raw:: html
<br />
#. If **Launch** is available, it means the App is live and ready to be used! Take it for a spin.
.. figure:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/launch_button.png
:alt: Launch Button on lightning.ai
:width: 100 %
#. By clicking **Clone & Run**, a copy of the App is added to your account and an instance starts running.
.. raw:: html
<br />
<video id="background-video" autoplay loop muted controls poster="https://pl-flash-data.s3.amazonaws.com/assets_lightning/clone_and_run.png" width="100%">
<source src="https://pl-flash-data.s3.amazonaws.com/assets_lightning/clone_and_run.mp4" type="video/mp4" width="100%">
</video>
#. If you found an App that matches what you need, move to **step 5**! Otherwise, go back to **step 1**.
.. raw:: html
<br />
#. Copy the installation command (optionally from the clipboard on the right).
.. figure:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/install_command.png
:alt: Install command on lightning.ai
:width: 100 %
#. Copy the command to your local terminal.
.. code-block:: bash
lightning install app lightning/hackernews-app
#. Go through the installation steps.
.. raw:: html
<br />
<video id="background-video" autoplay loop muted controls poster="https://pl-flash-data.s3.amazonaws.com/assets_lightning/install_an_app.png" width="100%">
<source src="https://pl-flash-data.s3.amazonaws.com/assets_lightning/install_an_app.mp4" type="video/mp4" width="100%">
</video>
#. Run the App locally.
.. code-block:: bash
cd LAI-Hackernews-App
lightning run app app.py
.. raw:: html
<br />
<video id="background-video" autoplay loop muted controls poster="https://pl-flash-data.s3.amazonaws.com/assets_lightning/hackernews.png" width="100%">
<source src="https://pl-flash-data.s3.amazonaws.com/assets_lightning/hackernews.mp4" type="video/mp4" width="100%">
</video>
#. Open the code with your favorite IDE, modify it, and run it back in the cloud.
.. raw:: html
<br />
<video id="background-video" autoplay loop muted controls poster="https://pl-flash-data.s3.amazonaws.com/assets_lightning/hackernews_modified.png" width="100%">
<source src="https://pl-flash-data.s3.amazonaws.com/assets_lightning/hackernews_modified.mp4" type="video/mp4" width="100%">
</video>
<br />
----
**********
Next Steps
**********
.. raw:: html
<div class="display-card-container">
<div class="row">
.. displayitem::
:header: Add Component made by others to your App
:description: Add more functionality to your projects
:col_css: col-md-6
:button_link: jumpstart_from_component_gallery.html
:height: 180
.. displayitem::
:header: Level-up your skills with Lightning Apps
:description: From Basic to Advanced Skills
:col_css: col-md-6
:button_link: ../levels/basic/index.html
:height: 180
.. raw:: html
</div>
</div>
<br />

View File

@ -1,155 +0,0 @@
:orphan:
########################################
Add Component made by others to your App
########################################
.. _jumpstart_from_component_gallery:
Anyone can build components for their own use case and promote them on the `Component Gallery <https://lightning.ai/components>`_.
In return, you can benefit from the work of others and add new functionalities to your Apps with minimal effort.
.. join_slack::
:align: left
----
*************
User Workflow
*************
#. Visit the `Component Gallery <https://lightning.ai/components>`_ and look for a Component close to something you want to do.
.. raw:: html
<br />
#. Check out the code for inspiration or simply install the component from PyPi and use it.
----
*************
Success Story
*************
The default `Train and Demo Application <https://github.com/Lightning-AI/lightning-quick-start>`_ trains a PyTorch Lightning
model and then starts a demo with `Gradio <https://gradio.app/>`_.
.. code-block:: python
import os.path as ops
import lightning as L
from quick_start.components import PyTorchLightningScript, ImageServeGradio
class TrainDeploy(L.LightningFlow):
def __init__(self):
super().__init__()
self.train_work = PyTorchLightningScript(
script_path=ops.join(ops.dirname(__file__), "./train_script.py"),
script_args=["--trainer.max_epochs=5"],
)
self.serve_work = ImageServeGradio(L.CloudCompute("cpu"))
def run(self):
# 1. Run the python script that trains the model
self.train_work.run()
# 2. when a checkpoint is available, deploy
if self.train_work.best_model_path:
self.serve_work.run(self.train_work.best_model_path)
def configure_layout(self):
tab_1 = {"name": "Model training", "content": self.train_work}
tab_2 = {"name": "Interactive demo", "content": self.serve_work}
return [tab_1, tab_2]
app = L.LightningApp(TrainDeploy())
However, someone who wants to use this Aop (maybe you) found `Lightning HPO <https://lightning.ai/component/BA2slXI093-Lightning%20HPO>`_
from browsing the `Component Gallery <https://lightning.ai/components>`_ and decided to give it a spin after checking the associated
`Github Repository <https://github.com/Lightning-AI/LAI-lightning-hpo-App>`_.
Once ``lightning_hpo`` installed, they improved the default App by easily adding HPO support to their project.
Here is the resulting App. It is almost the same code, but it's way more powerful now!
This is the power of `lightning.ai <https://lightning.ai/>`_ ecosystem 🔥⚡🔥
.. code-block:: python
import os.path as ops
import lightning as L
from quick_start.components import PyTorchLightningScript, ImageServeGradio
import optuna
from optuna.distributions import LogUniformDistribution
from lightning_hpo import Optimizer, BaseObjective
class HPOPyTorchLightningScript(PyTorchLightningScript, BaseObjective):
@staticmethod
def distributions():
return {"model.lr": LogUniformDistribution(0.0001, 0.1)}
class TrainDeploy(L.LightningFlow):
def __init__(self):
super().__init__()
self.train_work = Optimizer(
script_path=ops.join(ops.dirname(__file__), "./train_script.py"),
script_args=["--trainer.max_epochs=5"],
objective_cls=HPOPyTorchLightningScript,
n_trials=4,
)
self.serve_work = ImageServeGradio(L.CloudCompute("cpu"))
def run(self):
# 1. Run the python script that trains the model
self.train_work.run()
# 2. when a checkpoint is available, deploy
if self.train_work.best_model_path:
self.serve_work.run(self.train_work.best_model_path)
def configure_layout(self):
tab_1 = {"name": "Model training", "content": self.train_work.hi_plot}
tab_2 = {"name": "Interactive demo", "content": self.serve_work}
return [tab_1, tab_2]
app = L.LightningApp(TrainDeploy())
----
**********
Next Steps
**********
.. raw:: html
<div class="display-card-container">
<div class="row">
.. displayitem::
:header: Start from Ready-to-Run Template Apps
:description: Jump-start your projects development
:col_css: col-md-6
:button_link: jumpstart_from_app_gallery.html
:height: 180
.. displayitem::
:header: Level-up your skills with Lightning Apps
:description: From Basic to Advanced Skills
:col_css: col-md-6
:button_link: ../levels/basic/index.html
:height: 180
.. raw:: html
</div>
</div>
<br />

View File

@ -1,14 +0,0 @@
############################
Lightning Apps in 15 minutes
############################
**Required background:** Basic Python familiarity.
**Goal:** Guide you to develop your first Lightning App or use an existing App from the `Apps Gallery <https://lightning.ai/apps>`_.
.. join_slack::
:align: left
----
.. include:: go_beyond_training_content.rst

View File

@ -1,136 +0,0 @@
:orphan:
################################
Evolve a model into an ML system
################################
.. _convert_pl_to_app:
**Required background:** Basic Python familiarity and complete the :ref:`build_model` guide.
**Goal:** We'll walk you through the two key steps to build your first Lightning App from your existing Pytorch Lightning scripts.
.. join_slack::
:align: left
----
*******************
Training and beyond
*******************
With `PyTorch Lightning <https://github.com/Lightning-AI/lightning/tree/master/src/pytorch_lightning>`_, we abstracted distributed training and hardware, by organizing PyTorch code.
With `Lightning Apps <https://github.com/Lightning-AI/lightning/tree/master/src/lightning_app>`_, we unified the local and cloud experience while abstracting infrastructure.
By using `PyTorch Lightning <https://github.com/Lightning-AI/lightning/tree/master/src/pytorch_lightning>`_ and `Lightning Apps <https://github.com/Lightning-AI/lightning/tree/master/src/lightning_app>`_
together, a completely new world of possibilities emerges.
.. figure:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/pl_to_app_4.png
:alt: From PyTorch Lightning to Lightning App
:width: 100 %
----
******************************************
1. Write an App to run the train.py script
******************************************
This article continues where the :ref:`build_model` guide finished.
Create an additional file ``app.py`` in the ``pl_project`` folder as follows:
.. code-block:: bash
pl_project/
app.py
train.py
requirements.txt
Inside the ``app.py`` file, add the following code.
.. literalinclude:: ../code_samples/convert_pl_to_app/app.py
This App runs the Pytorch Lightning script contained in the ``train.py`` file using the powerful :class:`~lightning_app.components.python.tracer.TracerPythonScript` component. This is really worth checking out!
----
************************************************
2. Run the train.py file locally or in the cloud
************************************************
First, go to the ``pl_folder`` folder from the local terminal and install the requirements.
.. code-block:: bash
cd pl_folder
pip install -r requirements.txt
To run your app, copy the following command to your local terminal:
.. code-block:: bash
lightning run app app.py
Simply add ``--cloud`` to run this application in the cloud with a GPU machine 🤯
.. code-block:: bash
lightning run app app.py --cloud
Congratulations! Now, you know how to run a `PyTorch Lightning <https://github.com/Lightning-AI/lightning/tree/master/src/pytorch_lightning>`_ script with Lightning Apps.
Lightning Apps can make your ML system way more powerful, keep reading to learn how.
----
**********
Next Steps
**********
.. raw:: html
<div class="display-card-container">
<div class="row">
.. displayitem::
:header: Level-up with Lightning Apps
:description: From Basics to Advanced Skills
:col_css: col-md-4
:button_link: ../levels/basic/index.html
:height: 180
.. displayitem::
:header: Add an Interactive Demo
:description: Add a Gradio Demo once the training is finished
:col_css: col-md-4
:button_link: add_an_interactive_demo.html
:height: 180
.. displayitem::
:header: Add Hyper Parameter Optimization
:description: Add a HPO to optimize your models
:col_css: col-md-4
:button_link: ../examples/hpo/hpo.html
:height: 180
.. displayitem::
:header: Add Model Serving
:description: Serve and load testing with MLServer and Locust
:col_css: col-md-4
:button_link: ../examples/model_server_app/model_server_app.html
:height: 180
.. displayitem::
:header: Add DAG Orchestration
:description: Organize your processing, training and metrics collection
:col_css: col-md-4
:button_link: ../examples/dag/dag.html
:height: 180
.. displayitem::
:header: Add Team Collaboration
:description: Create an app to run any PyTorch Lightning Script from Github
:col_css: col-md-4
:button_link: ../examples/github_repo_runner/github_repo_runner.html
:height: 180

Some files were not shown because too many files have changed in this diff Show More