This reverts commit cd31ba3f87
.
This commit is contained in:
parent
cd31ba3f87
commit
eec862ef2f
|
@ -1,34 +0,0 @@
|
|||
import os
|
||||
|
||||
from lightning_cloud.openapi.rest import ApiException
|
||||
|
||||
from lightning_app.utilities.cloud import _get_project
|
||||
from lightning_app.utilities.network import LightningClient
|
||||
|
||||
client = LightningClient()
|
||||
|
||||
try:
|
||||
PR_NUMBER = int(os.getenv("PR_NUMBER", None))
|
||||
except (TypeError, ValueError):
|
||||
# Failed when the PR is running master or 'PR_NUMBER' isn't defined.
|
||||
PR_NUMBER = ""
|
||||
|
||||
APP_NAME = os.getenv("TEST_APP_NAME", "")
|
||||
|
||||
project = _get_project(client)
|
||||
list_lightningapps = client.lightningapp_instance_service_list_lightningapp_instances(project.project_id)
|
||||
|
||||
print([lightningapp.name for lightningapp in list_lightningapps.lightningapps])
|
||||
|
||||
for lightningapp in list_lightningapps.lightningapps:
|
||||
if PR_NUMBER and APP_NAME and not lightningapp.name.startswith(f"test-{PR_NUMBER}-{APP_NAME}-"):
|
||||
continue
|
||||
print(f"Deleting {lightningapp.name}")
|
||||
try:
|
||||
res = client.lightningapp_instance_service_delete_lightningapp_instance(
|
||||
project_id=project.project_id,
|
||||
id=lightningapp.id,
|
||||
)
|
||||
assert res == {}
|
||||
except ApiException as e:
|
||||
print(f"Failed to delete {lightningapp.name}. Exception {e}")
|
|
@ -1,5 +0,0 @@
|
|||
import lightning_app
|
||||
from lightning_app.utilities.packaging.lightning_utils import download_frontend
|
||||
|
||||
if __name__ == "__main__":
|
||||
download_frontend(lightning_app._PROJECT_ROOT)
|
|
@ -12,7 +12,6 @@ trigger:
|
|||
- "master"
|
||||
- "release/*"
|
||||
- "refs/tags/*"
|
||||
|
||||
pr:
|
||||
- "master"
|
||||
- "release/*"
|
||||
|
@ -38,19 +37,6 @@ jobs:
|
|||
|
||||
steps:
|
||||
|
||||
- bash: |
|
||||
CHANGED_FILES=$(git diff --name-status master | awk '{print $2}')
|
||||
echo $CHANGED_FILES > changed_files.txt
|
||||
MATCHES=$(cat changed_files.txt | grep -E $FILTER)
|
||||
echo $MATCHES
|
||||
if [ -z "$MATCHES" ]; then
|
||||
echo "Skip"
|
||||
else
|
||||
echo "Continue"
|
||||
fi
|
||||
|
||||
displayName: Decide if skipping should be done.
|
||||
|
||||
- bash: |
|
||||
lspci | egrep 'VGA|3D'
|
||||
whereis nvidia
|
||||
|
|
|
@ -9,7 +9,6 @@ trigger:
|
|||
- "master"
|
||||
- "release/*"
|
||||
- "refs/tags/*"
|
||||
|
||||
pr:
|
||||
- "master"
|
||||
- "release/*"
|
||||
|
|
|
@ -7,7 +7,6 @@ trigger:
|
|||
- master
|
||||
- release/*
|
||||
- refs/tags/*
|
||||
|
||||
pr:
|
||||
- master
|
||||
- release/*
|
||||
|
|
|
@ -14,7 +14,6 @@ trigger:
|
|||
- "master"
|
||||
- "release/*"
|
||||
- "refs/tags/*"
|
||||
|
||||
pr:
|
||||
- "master"
|
||||
- "release/*"
|
||||
|
|
|
@ -13,9 +13,12 @@ jobs:
|
|||
- name: Get changed files using defaults
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@v23
|
||||
|
||||
- name: List all added files
|
||||
run: |
|
||||
for file in ${{ steps.changed-files.outputs.all_changed_and_modified_files }}; do
|
||||
echo "$file"
|
||||
done
|
||||
|
||||
- name: Block edits in docs/source-app
|
||||
if: contains(steps.changed-files.outputs.all_changed_and_modified_files, 'docs/source-app')
|
||||
run: exit 1
|
||||
|
|
|
@ -6,6 +6,11 @@ on: # Trigger the workflow on push or pull request, but only for the master bra
|
|||
branches: [master, "release/*"]
|
||||
pull_request:
|
||||
branches: [master, "release/*"]
|
||||
paths-ignore:
|
||||
- "src/lightning_app/**" # todo: implement job skip
|
||||
- "tests/tests_app/**" # todo: implement job skip
|
||||
- "tests/tests_app_examples/**" # todo: implement job skip
|
||||
- "examples/app_*" # todo: implement job skip
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}
|
||||
|
@ -29,34 +34,13 @@ jobs:
|
|||
- {python-version: "3.9", pytorch-version: "1.11"}
|
||||
|
||||
timeout-minutes: 30
|
||||
|
||||
steps:
|
||||
- name: Workaround for https://github.com/actions/checkout/issues/760
|
||||
run: git config --global --add safe.directory /__w/lightning/lightning
|
||||
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Get changed files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@v23.1
|
||||
|
||||
- name: Decide if the test should be skipped
|
||||
id: skip
|
||||
run: |
|
||||
FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_'
|
||||
echo "${{ steps.changed-files.outputs.all_changed_files }}" | tr " " "\n" > changed_files.txt
|
||||
MATCHES=$(cat changed_files.txt | grep -E $FILTER)
|
||||
echo $MATCHES
|
||||
if [ -z "$MATCHES" ]; then
|
||||
echo "Skip"
|
||||
echo "::set-output name=continue::'0'"
|
||||
else
|
||||
echo "Continue"
|
||||
echo "::set-output name=continue::'1'"
|
||||
fi
|
||||
|
||||
- name: Update base dependencies
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
env:
|
||||
PACKAGE_NAME: pytorch
|
||||
FREEZE_REQUIREMENTS: 1
|
||||
|
@ -66,12 +50,10 @@ jobs:
|
|||
pip install -e .[test]
|
||||
|
||||
- name: DocTests
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
working-directory: ./src
|
||||
run: pytest pytorch_lightning --cov=pytorch_lightning
|
||||
|
||||
- name: Update all dependencies
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
env:
|
||||
HOROVOD_BUILD_ARCH_FLAGS: "-mfma"
|
||||
HOROVOD_WITHOUT_MXNET: 1
|
||||
|
@ -90,11 +72,9 @@ jobs:
|
|||
python requirements/pytorch/check-avail-extras.py
|
||||
|
||||
- name: Pull legacy checkpoints
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
run: bash .actions/pull_legacy_checkpoints.sh
|
||||
|
||||
- name: Testing PyTorch
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
working-directory: tests/tests_pytorch
|
||||
run: coverage run --source pytorch_lightning -m pytest -v --timeout 150 --durations=50 --junitxml=results-${{ runner.os }}-torch${{ matrix.pytorch-version }}.xml
|
||||
|
||||
|
@ -106,7 +86,7 @@ jobs:
|
|||
if: failure()
|
||||
|
||||
- name: Statistics
|
||||
if: ${{ success() && (steps.skip.outputs.continue == '1') }}
|
||||
if: success()
|
||||
working-directory: tests/tests_pytorch
|
||||
run: |
|
||||
coverage report
|
||||
|
@ -114,7 +94,7 @@ jobs:
|
|||
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v3
|
||||
if: ${{ success() && (steps.skip.outputs.continue == '1') }}
|
||||
if: always()
|
||||
# see: https://github.com/actions/toolkit/issues/399
|
||||
continue-on-error: true
|
||||
with:
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
name: Test PyTorch full
|
||||
name: Test PyTorch full
|
||||
|
||||
# see: https://help.github.com/en/actions/reference/events-that-trigger-workflows
|
||||
on: # Trigger the workflow on push or pull request, but only for the master branch
|
||||
|
@ -7,6 +7,11 @@ on: # Trigger the workflow on push or pull request, but only for the master bra
|
|||
pull_request:
|
||||
branches: [master, "release/*"]
|
||||
types: [opened, reopened, ready_for_review, synchronize]
|
||||
paths-ignore:
|
||||
- "src/lightning_app/**" # todo: implement job skip
|
||||
- "tests/tests_app/**" # todo: implement job skip
|
||||
- "tests/tests_app_examples/**" # todo: implement job skip
|
||||
- "examples/app_*" # todo: implement job skip
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}
|
||||
|
@ -32,67 +37,40 @@ jobs:
|
|||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Get changed files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@v23.1
|
||||
|
||||
- name: Decide if the test should be skipped
|
||||
id: skip
|
||||
run: |
|
||||
FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_'
|
||||
echo "${{ steps.changed-files.outputs.all_changed_files }}" | tr " " "\n" > changed_files.txt
|
||||
MATCHES=$(cat changed_files.txt | grep -E $FILTER)
|
||||
echo $MATCHES
|
||||
if [ -z "$MATCHES" ]; then
|
||||
echo "Skip"
|
||||
echo "::set-output name=continue::'0'"
|
||||
else
|
||||
echo "Continue"
|
||||
echo "::set-output name=continue::'1'"
|
||||
fi
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Reset caching
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
run: python -c "import time; days = time.time() / 60 / 60 / 24; print(f'TIME_PERIOD=d{int(days / 2) * 2}')" >> $GITHUB_ENV
|
||||
|
||||
- name: basic setup
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
run: |
|
||||
pip --version
|
||||
pip install -q fire
|
||||
|
||||
# Github Actions: Run step on specific OS: https://stackoverflow.com/a/57948488/4521646
|
||||
- name: Setup macOS
|
||||
if: ${{ (runner.os == 'macOS') && (steps.skip.outputs.continue == '1') }}
|
||||
if: runner.os == 'macOS'
|
||||
run: |
|
||||
brew install openmpi libuv # Horovod on macOS requires OpenMPI, Gloo not currently supported
|
||||
|
||||
- name: Setup Windows
|
||||
if: ${{ (runner.os == 'windows') && (steps.skip.outputs.continue == '1') }}
|
||||
if: runner.os == 'windows'
|
||||
run: |
|
||||
python .actions/assistant.py requirements_prune_pkgs horovod
|
||||
|
||||
- name: Set min. dependencies
|
||||
if: ${{ (matrix.requires == 'oldest') && (steps.skip.outputs.continue == '1') }}
|
||||
if: matrix.requires == 'oldest'
|
||||
run: |
|
||||
python .actions/assistant.py replace_oldest_ver
|
||||
|
||||
# Note: This uses an internal pip API and may not always work
|
||||
# https://github.com/actions/cache/blob/master/examples.md#multiple-oss-in-a-workflow
|
||||
- name: Get pip cache dir
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
id: pip-cache
|
||||
run: echo "::set-output name=dir::$(pip cache dir)"
|
||||
|
||||
- name: pip cache
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: ${{ steps.pip-cache.outputs.dir }}
|
||||
|
@ -101,11 +79,9 @@ jobs:
|
|||
${{ runner.os }}-pip-td${{ env.TIME_PERIOD }}-py${{ matrix.python-version }}-${{ matrix.release }}-${{ matrix.requires }}-
|
||||
|
||||
- name: Pull legacy checkpoints
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
run: bash .actions/pull_legacy_checkpoints.sh
|
||||
|
||||
- name: Install dependencies
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
env:
|
||||
PACKAGE_NAME: pytorch
|
||||
FREEZE_REQUIREMENTS: 1
|
||||
|
@ -117,12 +93,10 @@ jobs:
|
|||
shell: bash
|
||||
|
||||
- name: DocTests
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
working-directory: ./src
|
||||
run: pytest pytorch_lightning --cov=pytorch_lightning
|
||||
|
||||
- name: Install extra dependencies
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
run: |
|
||||
# adjust versions according installed Torch version
|
||||
python ./requirements/pytorch/adjust-versions.py requirements/pytorch/extra.txt
|
||||
|
@ -131,7 +105,7 @@ jobs:
|
|||
shell: bash
|
||||
|
||||
- name: Reinstall Horovod if necessary
|
||||
if: ${{ (runner.os != 'windows') && (steps.skip.outputs.continue == '1') }}
|
||||
if: runner.os != 'windows'
|
||||
env:
|
||||
HOROVOD_BUILD_ARCH_FLAGS: "-mfma"
|
||||
HOROVOD_WITHOUT_MXNET: 1
|
||||
|
@ -148,43 +122,38 @@ jobs:
|
|||
shell: bash
|
||||
|
||||
- name: Cache datasets
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: Datasets
|
||||
key: pl-dataset
|
||||
|
||||
- name: Sanity check
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
run: python requirements/pytorch/check-avail-extras.py
|
||||
|
||||
- name: Testing PyTorch
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
working-directory: tests/tests_pytorch
|
||||
# NOTE: do not include coverage report here, see: https://github.com/nedbat/coveragepy/issues/1003
|
||||
run: coverage run --source pytorch_lightning -m pytest -v --durations=50 --junitxml=results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }}.xml
|
||||
|
||||
- name: Upload pytest results
|
||||
if: ${{ (failure()) && (steps.skip.outputs.continue == '1') }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: unittest-results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }}
|
||||
path: tests/tests_pytorch/results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }}.xml
|
||||
if: failure()
|
||||
|
||||
- name: Prepare Examples
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
run: |
|
||||
# adjust versions according installed Torch version
|
||||
python ./requirements/pytorch/adjust-versions.py requirements/pytorch/examples.txt
|
||||
pip install -r requirements/pytorch/examples.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --upgrade
|
||||
|
||||
- name: Run Examples
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
working-directory: ./examples
|
||||
run: python -m pytest test_pl_examples.py -v --durations=10
|
||||
|
||||
- name: Statistics
|
||||
if: ${{ (success()) && (steps.skip.outputs.continue == '1') }}
|
||||
if: success()
|
||||
working-directory: tests/tests_pytorch
|
||||
run: |
|
||||
coverage report
|
||||
|
@ -192,7 +161,7 @@ jobs:
|
|||
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v3
|
||||
if: ${{ (always()) && (steps.skip.outputs.continue == '1') }}
|
||||
if: always()
|
||||
# see: https://github.com/actions/toolkit/issues/399
|
||||
continue-on-error: true
|
||||
with:
|
||||
|
|
|
@ -7,6 +7,11 @@ on: # Trigger the workflow on push or pull request, but only for the master bra
|
|||
pull_request:
|
||||
branches: [master, "release/*"]
|
||||
types: [opened, reopened, ready_for_review, synchronize]
|
||||
paths-ignore:
|
||||
- "src/lightning_app/**" # todo: implement job skip
|
||||
- "tests/tests_app/**" # todo: implement job skip
|
||||
- "tests/tests_app_examples/**" # todo: implement job skip
|
||||
- "examples/app_*" # todo: implement job skip
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}
|
||||
|
@ -27,43 +32,19 @@ jobs:
|
|||
timeout-minutes: 20
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Get changed files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@v23.1
|
||||
|
||||
- name: Decide if the test should be skipped
|
||||
id: skip
|
||||
run: |
|
||||
FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_'
|
||||
echo "${{ steps.changed-files.outputs.all_changed_files }}" | tr " " "\n" > changed_files.txt
|
||||
MATCHES=$(cat changed_files.txt | grep -E $FILTER)
|
||||
echo $MATCHES
|
||||
if [ -z "$MATCHES" ]; then
|
||||
echo "Skip"
|
||||
echo "::set-output name=continue::'0'"
|
||||
else
|
||||
echo "Continue"
|
||||
echo "::set-output name=continue::'1'"
|
||||
fi
|
||||
|
||||
- uses: actions/setup-python@v2
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Reset caching
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
run: python -c "import time; days = time.time() / 60 / 60 / 24; print(f'TIME_PERIOD=d{int(days / 2) * 2}')" >> $GITHUB_ENV
|
||||
|
||||
- name: Get pip cache
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
id: pip-cache
|
||||
run: |
|
||||
python -c "from pip._internal.locations import USER_CACHE_DIR; print('::set-output name=dir::' + USER_CACHE_DIR)"
|
||||
|
||||
- name: Cache pip
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: ${{ steps.pip-cache.outputs.dir }}
|
||||
|
@ -72,7 +53,6 @@ jobs:
|
|||
${{ runner.os }}-pip-td${{ env.TIME_PERIOD }}-py${{ matrix.python-version }}-
|
||||
|
||||
- name: Install dependencies
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
env:
|
||||
PACKAGE_NAME: pytorch
|
||||
FREEZE_REQUIREMENTS: 1
|
||||
|
@ -84,21 +64,20 @@ jobs:
|
|||
shell: bash
|
||||
|
||||
- name: Testing PyTorch
|
||||
if: ${{ (steps.skip.outputs.continue == '1') }}
|
||||
working-directory: tests/tests_pytorch
|
||||
run: coverage run --source pytorch_lightning -m pytest -v --junitxml=results-${{ runner.os }}-py${{ matrix.python-version }}.xml
|
||||
env:
|
||||
PL_RUN_SLOW_TESTS: 1
|
||||
|
||||
- name: Upload pytest test results
|
||||
if: ${{ (failure()) && (steps.skip.outputs.continue == '1') }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: unittest-results-${{ runner.os }}-py${{ matrix.python-version }}
|
||||
path: tests/tests_pytorch/results-${{ runner.os }}-py${{ matrix.python-version }}.xml
|
||||
if: failure()
|
||||
|
||||
- name: Statistics
|
||||
if: ${{ (success()) && (steps.skip.outputs.continue == '1') }}
|
||||
if: success()
|
||||
working-directory: tests/tests_pytorch
|
||||
run: |
|
||||
coverage report
|
||||
|
@ -106,7 +85,7 @@ jobs:
|
|||
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v3
|
||||
if: ${{ (success()) && (steps.skip.outputs.continue == '1') }}
|
||||
if: success()
|
||||
# see: https://github.com/actions/toolkit/issues/399
|
||||
continue-on-error: true
|
||||
with:
|
||||
|
|
|
@ -14,10 +14,6 @@ concurrency:
|
|||
jobs:
|
||||
doctest:
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
pkg: ["app", "pytorch"]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
|
@ -35,12 +31,13 @@ jobs:
|
|||
uses: actions/cache@v2
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-docs-test-pip-td${{ env.TIME_PERIOD }}-${{ hashFiles('requirements/${{ matrix.pkg }}/*.txt') }}
|
||||
key: ${{ runner.os }}-docs-test-pip-td${{ env.TIME_PERIOD }}-${{ hashFiles('requirements/pytorch/*.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-docs-test-pip-td${{ env.TIME_PERIOD }}-
|
||||
|
||||
- name: Install dependencies
|
||||
env:
|
||||
PACKAGE_NAME: pytorch
|
||||
FREEZE_REQUIREMENTS: 1
|
||||
run: |
|
||||
sudo apt-get update
|
||||
|
@ -48,27 +45,22 @@ jobs:
|
|||
pip --version
|
||||
pip install -q fire
|
||||
# python -m pip install --upgrade --user pip
|
||||
pip install -e . --quiet -r requirements/${{ matrix.pkg }}/base.txt -r requirements/${{ matrix.pkg }}/docs.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
|
||||
pip install -r requirements/${{ matrix.pkg }}/devel.txt
|
||||
pip install -e . --quiet -r requirements/pytorch/docs.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
|
||||
pip install -r requirements/pytorch/devel.txt
|
||||
pip list
|
||||
shell: bash
|
||||
|
||||
- name: Test Documentation
|
||||
env:
|
||||
SPHINX_MOCK_REQUIREMENTS: 0
|
||||
working-directory: ./docs/source-${{ matrix.pkg }}
|
||||
working-directory: ./docs
|
||||
run: |
|
||||
# ToDo: proper parametrize
|
||||
# First run the same pipeline as Read-The-Docs
|
||||
make doctest
|
||||
make coverage
|
||||
|
||||
make-docs:
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
pkg: ["app", "pytorch"]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
|
@ -84,27 +76,27 @@ jobs:
|
|||
uses: actions/cache@v2
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-docs-make-pip-${{ hashFiles('requirements/${{ matrix.pkg }}/*.txt') }}
|
||||
key: ${{ runner.os }}-docs-make-pip-${{ hashFiles('requirements/pytorch/base.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-docs-make-pip-
|
||||
|
||||
- name: Install dependencies
|
||||
env:
|
||||
PACKAGE_NAME: pytorch
|
||||
FREEZE_REQUIREMENTS: 1
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y cmake pandoc
|
||||
pip --version
|
||||
pip install -e . --quiet -r requirements/${{ matrix.pkg }}/base.txt -r requirements/${{ matrix.pkg }}/docs.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
|
||||
pip install -e . --quiet -r requirements/pytorch/docs.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
|
||||
# install Texlive, see https://linuxconfig.org/how-to-install-latex-on-ubuntu-20-04-focal-fossa-linux
|
||||
sudo apt-get update && sudo apt-get install -y texlive-latex-extra dvipng texlive-pictures
|
||||
pip list
|
||||
shell: bash
|
||||
|
||||
- name: Make Documentation
|
||||
working-directory: ./docs/source-${{ matrix.pkg }}
|
||||
working-directory: ./docs
|
||||
run: |
|
||||
# ToDo: rather use python cmd
|
||||
# First run the same pipeline as Read-The-Docs
|
||||
make html --debug --jobs $(nproc) SPHINXOPTS="-W --keep-going"
|
||||
|
||||
|
|
|
@ -1,75 +0,0 @@
|
|||
name: "Deploy Docs"
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
|
||||
jobs:
|
||||
# https://github.com/marketplace/actions/deploy-to-github-pages
|
||||
build-docs-deploy:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: Checkout 🛎️
|
||||
uses: actions/checkout@v2
|
||||
# If you're using actions/checkout@v2 you must set persist-credentials to false in most cases for the deployment to work correctly.
|
||||
with:
|
||||
persist-credentials: false
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.8
|
||||
|
||||
- id: 'auth'
|
||||
name: 'Authenticate to Google Cloud'
|
||||
uses: 'google-github-actions/auth@v0'
|
||||
with:
|
||||
credentials_json: ${{ secrets.GCS_SA_KEY }}
|
||||
|
||||
- name: Setup gcloud
|
||||
uses: 'google-github-actions/setup-gcloud@v0'
|
||||
with:
|
||||
project_id: ${{ secrets.GCS_PROJECT }}
|
||||
|
||||
# Note: This uses an internal pip API and may not always work
|
||||
# https://github.com/actions/cache/blob/master/examples.md#multiple-oss-in-a-workflow
|
||||
- name: Cache pip
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-deploy-docs-pip-${{ hashFiles('requirements/app/*.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-deploy-docs-pip-
|
||||
|
||||
- name: Install dependencies
|
||||
env:
|
||||
FREEZE_REQUIREMENTS: 1
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y cmake pandoc
|
||||
pip --version
|
||||
pip install -e . --quiet -r requirements/app/docs.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
|
||||
sudo apt-get update && sudo apt-get install -y texlive-latex-extra dvipng texlive-pictures
|
||||
pip list
|
||||
shell: bash
|
||||
|
||||
- name: Make Documentation
|
||||
working-directory: ./docs/source-app
|
||||
run: |
|
||||
# First run the same pipeline as Read-The-Docs
|
||||
make clean
|
||||
make html --jobs 2
|
||||
|
||||
- name: Deploy 🚀
|
||||
uses: JamesIves/github-pages-deploy-action@4.1.4
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
branch: gh-pages # The branch the action should deploy to.
|
||||
folder: docs/build/html # The folder the action should deploy.
|
||||
clean: true # Automatically remove deleted files from the deploy branch
|
||||
target-folder: docs # If you'd like to push the contents of the deployment folder into a specific directory
|
||||
single-commit: true # you'd prefer to have a single commit on the deployment branch instead of full history
|
||||
if: success()
|
||||
|
||||
# Uploading docs to GCS so they can be served on lightning.ai
|
||||
- name: Upload to GCS 🪣
|
||||
run: |-
|
||||
gsutil -m rsync -d -R docs/build/html/ gs://${{ secrets.GCS_BUCKET }}
|
||||
if: success()
|
|
@ -77,7 +77,6 @@ repos:
|
|||
hooks:
|
||||
- id: black
|
||||
name: Format code
|
||||
exclude: docs/source-app
|
||||
|
||||
- repo: https://github.com/asottile/blacken-docs
|
||||
rev: v1.12.1
|
||||
|
@ -85,7 +84,6 @@ repos:
|
|||
- id: blacken-docs
|
||||
args: [--line-length=120]
|
||||
additional_dependencies: [black==21.12b0]
|
||||
exclude: docs/source-app
|
||||
|
||||
- repo: https://github.com/executablebooks/mdformat
|
||||
rev: 0.7.14
|
||||
|
@ -102,4 +100,3 @@ repos:
|
|||
hooks:
|
||||
- id: flake8
|
||||
name: Check PEP8
|
||||
exclude: docs/source-app
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
# You can set these variables from the command line.
|
||||
SPHINXOPTS =
|
||||
SPHINXBUILD = sphinx-build
|
||||
SOURCEDIR = .
|
||||
BUILDDIR = ../build
|
||||
SOURCEDIR = source-pytorch
|
||||
BUILDDIR = build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
|
@ -7,8 +7,8 @@ REM Command file for Sphinx documentation
|
|||
if "%SPHINXBUILD%" == "" (
|
||||
set SPHINXBUILD=sphinx-build
|
||||
)
|
||||
set SOURCEDIR=.
|
||||
set BUILDDIR=../build
|
||||
set SOURCEDIR=source-pytorch
|
||||
set BUILDDIR=build
|
||||
|
||||
if "%1" == "" goto help
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line.
|
||||
SPHINXOPTS =
|
||||
SPHINXBUILD = sphinx-build
|
||||
SOURCEDIR = .
|
||||
BUILDDIR = ../build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
|
@ -7,3 +7,7 @@
|
|||
|
||||
.. autoclass:: {{ name }}
|
||||
:members:
|
||||
|
||||
..
|
||||
autogenerated from source/_templates/classtemplate.rst
|
||||
note it does not have :inherited-members:
|
||||
|
|
|
@ -1,12 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
.. role:: hidden
|
||||
:class: hidden-section
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
|
||||
{{ name | underline }}
|
||||
|
||||
.. autoclass:: {{ name }}
|
||||
:members:
|
||||
:noindex:
|
|
@ -1,10 +0,0 @@
|
|||
{% extends "!layout.html" %}
|
||||
<link rel="canonical" href="{{ theme_canonical_url }}{{ pagename }}.html" />
|
||||
|
||||
{% block footer %}
|
||||
{{ super() }}
|
||||
<script script type="text/javascript">
|
||||
var collapsedSections = ['App Building Skills', 'Practical Examples', 'Common Workflows', 'Core API Reference', 'Addons API Reference', 'Glossary'];
|
||||
</script>
|
||||
|
||||
{% endblock %}
|
|
@ -1,8 +1,8 @@
|
|||
{%- set external_urls = {
|
||||
'github': 'https://github.com/Lightning-AI/lightning',
|
||||
'github_issues': 'https://github.com/Lightning-AI/lightning/issues',
|
||||
'contributing': 'https://github.com/Lightning-AI/lightning/blob/master/.github/CONTRIBUTING.md',
|
||||
'governance': 'https://github.com/Lightning-AI/lightning/blob/master/docs/source-pytorch/governance.rst',
|
||||
'github': 'https://github.com/PytorchLightning/lightning',
|
||||
'github_issues': 'https://github.com/PytorchLightning/lightning/issues',
|
||||
'contributing': 'https://github.com/PytorchLightning/pytorch-lightning/blob/master/CONTRIBUTING.md',
|
||||
'governance': 'https://github.com/PytorchLightning/pytorch-lightning/blob/master/governance.md',
|
||||
'docs': 'https://lightning.rtfd.io/en/latest',
|
||||
'twitter': 'https://twitter.com/PyTorchLightnin',
|
||||
'discuss': 'https://pytorch-lightning.slack.com',
|
||||
|
|
|
@ -1,90 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
##############################
|
||||
Lightning App - API References
|
||||
##############################
|
||||
|
||||
Core
|
||||
----
|
||||
|
||||
.. currentmodule:: lightning_app.core
|
||||
|
||||
.. autosummary::
|
||||
:toctree: api
|
||||
:nosignatures:
|
||||
:template: classtemplate_no_index.rst
|
||||
|
||||
LightningApp
|
||||
LightningFlow
|
||||
LightningWork
|
||||
|
||||
Learn more about :ref:`Lightning Core <core_api>`.
|
||||
|
||||
----
|
||||
|
||||
Built-in Components
|
||||
___________________
|
||||
|
||||
.. currentmodule:: lightning_app.components
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
:nosignatures:
|
||||
:template: classtemplate_no_index.rst
|
||||
|
||||
~serve.serve.ModelInferenceAPI
|
||||
~python.popen.PopenPythonScript
|
||||
~serve.gradio.ServeGradio
|
||||
~python.tracer.TracerPythonScript
|
||||
|
||||
----
|
||||
|
||||
Frontend's
|
||||
__________
|
||||
|
||||
.. currentmodule:: lightning_app.frontend
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
:nosignatures:
|
||||
:template: classtemplate_no_index.rst
|
||||
|
||||
~frontend.Frontend
|
||||
~web.StaticWebFrontend
|
||||
~stream_lit.StreamlitFrontend
|
||||
|
||||
Learn more about :ref:`Frontend's <ui_and_frontends>`.
|
||||
|
||||
----
|
||||
|
||||
Storage
|
||||
_______
|
||||
|
||||
.. currentmodule:: lightning_app.storage
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
:nosignatures:
|
||||
:template: classtemplate_no_index.rst
|
||||
|
||||
~drive.Drive
|
||||
~path.Path
|
||||
~payload.Payload
|
||||
|
||||
Learn more about :ref:`Storage <storage>`.
|
||||
|
||||
----
|
||||
|
||||
Runners
|
||||
_______
|
||||
|
||||
.. currentmodule:: lightning_app.runners
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
:nosignatures:
|
||||
:template: classtemplate_no_index.rst
|
||||
|
||||
~cloud.CloudRuntime
|
||||
~multiprocess.MultiProcessRuntime
|
||||
~singleprocess.SingleProcessRuntime
|
|
@ -1,7 +1,7 @@
|
|||
import lightning as L
|
||||
from lightning_app import LightningWork
|
||||
|
||||
|
||||
class ExampleWork(L.LightningWork):
|
||||
class ExampleWork(LightningWork):
|
||||
def run(self, *args, **kwargs):
|
||||
print(f"I received the following props: args: {args} kwargs: {kwargs}")
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import lightning as L
|
||||
from lightning_app import LightningWork
|
||||
|
||||
|
||||
class ExampleWork(L.LightningWork):
|
||||
class ExampleWork(LightningWork):
|
||||
def __init__(self):
|
||||
super().__init__(cache_calls=False)
|
||||
|
||||
|
|
|
@ -1,17 +0,0 @@
|
|||
import lightning as L
|
||||
from lightning.app.components.python import TracerPythonScript
|
||||
|
||||
|
||||
class RootFlow(L.LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.runner = TracerPythonScript(
|
||||
"train.py",
|
||||
cloud_compute=L.CloudCompute("gpu"),
|
||||
)
|
||||
|
||||
def run(self):
|
||||
self.runner.run()
|
||||
|
||||
|
||||
app = L.LightningApp(RootFlow())
|
|
@ -1,3 +0,0 @@
|
|||
torch
|
||||
torchvision
|
||||
pytorch_lightning
|
|
@ -1,46 +0,0 @@
|
|||
import os
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from torch.utils.data import DataLoader, random_split
|
||||
from torchvision import transforms as T
|
||||
from torchvision.datasets import MNIST
|
||||
|
||||
import pytorch_lightning as pl
|
||||
|
||||
|
||||
class LitAutoEncoder(pl.LightningModule):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.encoder = nn.Sequential(nn.Linear(28 * 28, 128), nn.ReLU(), nn.Linear(128, 3))
|
||||
self.decoder = nn.Sequential(nn.Linear(3, 128), nn.ReLU(), nn.Linear(128, 28 * 28))
|
||||
|
||||
def forward(self, x):
|
||||
# in lightning,
|
||||
# forward defines the prediction/inference actions
|
||||
embedding = self.encoder(x)
|
||||
return embedding
|
||||
|
||||
def training_step(self, batch, batch_idx):
|
||||
# training_step defines the train loop.
|
||||
# It is independent of forward
|
||||
x, y = batch
|
||||
x = x.view(x.size(0), -1)
|
||||
z = self.encoder(x)
|
||||
x_hat = self.decoder(z)
|
||||
loss = F.mse_loss(x_hat, x)
|
||||
self.log("train_loss", loss)
|
||||
return loss
|
||||
|
||||
def configure_optimizers(self):
|
||||
optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
|
||||
return optimizer
|
||||
|
||||
|
||||
dataset = MNIST(os.getcwd(), download=True, transform=T.ToTensor())
|
||||
train, val = random_split(dataset, [55000, 5000])
|
||||
|
||||
autoencoder = LitAutoEncoder()
|
||||
trainer = pl.Trainer(accelerator="auto")
|
||||
trainer.fit(autoencoder, DataLoader(train), DataLoader(val))
|
|
@ -1,10 +1,10 @@
|
|||
from docs.quickstart.app_02 import HourLongWork
|
||||
|
||||
import lightning as L
|
||||
from lightning_app import LightningApp, LightningFlow, LightningWork
|
||||
|
||||
|
||||
class RootFlow(L.LightningFlow):
|
||||
def __init__(self, child_work_1: L.LightningWork, child_work_2: L.LightningWork):
|
||||
class RootFlow(LightningFlow):
|
||||
def __init__(self, child_work_1: LightningWork, child_work_2: LightningWork):
|
||||
super().__init__()
|
||||
self.child_work_1 = child_work_1
|
||||
self.child_work_2 = child_work_2
|
||||
|
@ -19,4 +19,4 @@ class RootFlow(L.LightningFlow):
|
|||
print("1 hour later `child_work_2` started!")
|
||||
|
||||
|
||||
app = L.LightningApp(RootFlow(HourLongWork(parallel=True), HourLongWork(parallel=True)))
|
||||
app = LightningApp(RootFlow(HourLongWork(parallel=True), HourLongWork(parallel=True)))
|
||||
|
|
|
@ -2,14 +2,14 @@ import flash
|
|||
from flash.core.data.utils import download_data
|
||||
from flash.image import ImageClassificationData, ImageClassifier
|
||||
|
||||
import lightning as L
|
||||
from lightning_app import CloudCompute, LightningApp, LightningFlow, LightningWork
|
||||
from pytorch_lightning.callbacks import ModelCheckpoint
|
||||
|
||||
|
||||
# Step 1: Create a training LightningWork component that gets a backbone as input
|
||||
# and saves the best model and its score
|
||||
class ImageClassifierTrainWork(L.LightningWork):
|
||||
def __init__(self, max_epochs: int, backbone: str, cloud_compute: L.CloudCompute):
|
||||
class ImageClassifierTrainWork(LightningWork):
|
||||
def __init__(self, max_epochs: int, backbone: str, cloud_compute: CloudCompute):
|
||||
# parallel is set to True to run asynchronously
|
||||
super().__init__(parallel=True, cloud_compute=cloud_compute)
|
||||
# Number of epochs to run
|
||||
|
@ -44,7 +44,7 @@ class ImageClassifierTrainWork(L.LightningWork):
|
|||
|
||||
|
||||
# Step 2: Create a serving LightningWork component that gets a model input and serves it
|
||||
class ImageClassifierServeWork(L.LightningWork):
|
||||
class ImageClassifierServeWork(LightningWork):
|
||||
def run(self, best_model_path: str):
|
||||
# Load the model from the model path
|
||||
model = ImageClassifier.load_from_checkpoint(best_model_path)
|
||||
|
@ -53,7 +53,7 @@ class ImageClassifierServeWork(L.LightningWork):
|
|||
|
||||
# Step 3: Create a root LightningFlow component that gets number of epochs and a path to
|
||||
# a dataset as inputs, initialize 2 training components and serves the best model
|
||||
class RootFlow(L.LightningFlow):
|
||||
class RootFlow(LightningFlow):
|
||||
def __init__(self, max_epochs: int, data_dir: str):
|
||||
super().__init__()
|
||||
self.data_dir = data_dir
|
||||
|
@ -89,4 +89,4 @@ class RootFlow(L.LightningFlow):
|
|||
download_data("https://pl-flash-data.s3.amazonaws.com/hymenoptera_data.zip", "./data")
|
||||
|
||||
# Initalize your Lightning app with 5 epochs
|
||||
app = L.LightningApp(RootFlow(5, "./data/hymenoptera_data"))
|
||||
app = LightningApp(RootFlow(5, "./data/hymenoptera_data"))
|
||||
|
|
|
@ -1,19 +1,19 @@
|
|||
import lightning as L
|
||||
from lightning.app.utilities.app_helpers import pretty_state
|
||||
from lightning_app import LightningApp, LightningFlow, LightningWork
|
||||
from lightning_app.utilities.app_helpers import pretty_state
|
||||
|
||||
|
||||
class Work(L.LightningWork):
|
||||
class Work(LightningWork):
|
||||
def __init__(self):
|
||||
super().__init__(cache_calls=False)
|
||||
# Attributes are registered automatically in the state.
|
||||
self.counter = 0
|
||||
|
||||
def run(self):
|
||||
# Incrementing an attribute gets reflected in the `Flow` state.
|
||||
# Incrementing an attribute gets reflected in the `RootFlow` state.
|
||||
self.counter += 1
|
||||
|
||||
|
||||
class Flow(L.LightningFlow):
|
||||
class Flow(LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.w = Work()
|
||||
|
@ -24,4 +24,4 @@ class Flow(L.LightningFlow):
|
|||
self.w.run()
|
||||
|
||||
|
||||
app = L.LightningApp(Flow())
|
||||
app = LightningApp(Flow())
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from time import sleep
|
||||
|
||||
import lightning as L
|
||||
from lightning_app import LightningApp, LightningFlow, LightningWork
|
||||
|
||||
|
||||
# This work takes an hour to run
|
||||
class HourLongWork(L.LightningWork):
|
||||
class HourLongWork(LightningWork):
|
||||
def __init__(self, parallel: bool = False):
|
||||
super().__init__(parallel=parallel)
|
||||
self.progress = 0.0
|
||||
|
@ -16,8 +16,8 @@ class HourLongWork(L.LightningWork):
|
|||
sleep(1)
|
||||
|
||||
|
||||
class RootFlow(L.LightningFlow):
|
||||
def __init__(self, child_work: L.LightningWork):
|
||||
class RootFlow(LightningFlow):
|
||||
def __init__(self, child_work: LightningWork):
|
||||
super().__init__()
|
||||
self.child_work = child_work
|
||||
|
||||
|
@ -29,4 +29,4 @@ class RootFlow(L.LightningFlow):
|
|||
print("1 hour later!")
|
||||
|
||||
|
||||
app = L.LightningApp(RootFlow(HourLongWork()))
|
||||
app = LightningApp(RootFlow(HourLongWork()))
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
from time import sleep
|
||||
|
||||
import lightning as L
|
||||
from lightning_app import LightningApp, LightningFlow, LightningWork
|
||||
|
||||
|
||||
class HourLongWork(L.LightningWork):
|
||||
class HourLongWork(LightningWork):
|
||||
def __init__(self):
|
||||
super().__init__(cache_calls=False)
|
||||
self.progress = 0.0
|
||||
|
@ -15,8 +15,8 @@ class HourLongWork(L.LightningWork):
|
|||
sleep(1)
|
||||
|
||||
|
||||
class RootFlow(L.LightningFlow):
|
||||
def __init__(self, child_work: L.LightningWork):
|
||||
class RootFlow(LightningFlow):
|
||||
def __init__(self, child_work: LightningWork):
|
||||
super().__init__()
|
||||
self.child_work = child_work
|
||||
|
||||
|
@ -28,4 +28,4 @@ class RootFlow(L.LightningFlow):
|
|||
print("1 hour later!")
|
||||
|
||||
|
||||
app = L.LightningApp(RootFlow(HourLongWork()))
|
||||
app = LightningApp(RootFlow(HourLongWork()))
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import lightning as L
|
||||
from lightning.app.testing.helpers import EmptyFlow, EmptyWork
|
||||
from lightning_app import LightningApp, LightningFlow
|
||||
from lightning_app.testing.helpers import EmptyFlow, EmptyWork
|
||||
|
||||
|
||||
class FlowB(L.LightningFlow):
|
||||
class FlowB(LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.flow_d = EmptyFlow()
|
||||
|
@ -12,7 +12,7 @@ class FlowB(L.LightningFlow):
|
|||
...
|
||||
|
||||
|
||||
class FlowA(L.LightningFlow):
|
||||
class FlowA(LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.flow_b = FlowB()
|
||||
|
@ -23,4 +23,4 @@ class FlowA(L.LightningFlow):
|
|||
...
|
||||
|
||||
|
||||
app = L.LightningApp(FlowA())
|
||||
app = LightningApp(FlowA())
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import lightning as L
|
||||
from lightning_app import LightningApp, LightningFlow
|
||||
|
||||
|
||||
# Step 1: Subclass LightningFlow component to define the app flow.
|
||||
class HelloWorld(L.LightningFlow):
|
||||
class HelloWorld(LightningFlow):
|
||||
|
||||
# Step 2: Add the app logic to the LightningFlow run method to
|
||||
# ``print("Hello World!")`.
|
||||
|
@ -13,4 +13,4 @@ class HelloWorld(L.LightningFlow):
|
|||
|
||||
|
||||
# Step 3: Initalize a LightningApp with the LightningFlow you defined (in step 1)
|
||||
app = L.LightningApp(HelloWorld())
|
||||
app = LightningApp(HelloWorld())
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
import os
|
||||
|
||||
import lightning as L
|
||||
from lightning.app.frontend import StaticWebFrontend, StreamlitFrontend
|
||||
from lightning.app.utilities.state import AppState
|
||||
from lightning_app import LightningApp, LightningFlow
|
||||
from lightning_app.frontend import StaticWebFrontend, StreamlitFrontend
|
||||
from lightning_app.utilities.state import AppState
|
||||
|
||||
|
||||
# Step 1: Define your LightningFlow component with the app UI
|
||||
class UIStreamLit(L.LightningFlow):
|
||||
class UIStreamLit(LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.should_print = False
|
||||
|
@ -31,7 +31,7 @@ def render_fn(state: AppState):
|
|||
|
||||
|
||||
# Step 4: Implement a Static Web Frontend. This could be react, vue, etc.
|
||||
class UIStatic(L.LightningFlow):
|
||||
class UIStatic(LightningFlow):
|
||||
|
||||
# Step 5:
|
||||
def configure_layout(self):
|
||||
|
@ -39,7 +39,7 @@ class UIStatic(L.LightningFlow):
|
|||
|
||||
|
||||
# Step 6: Implement the root flow.
|
||||
class HelloWorld(L.LightningFlow):
|
||||
class HelloWorld(LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.static_ui = UIStatic()
|
||||
|
@ -55,4 +55,4 @@ class HelloWorld(L.LightningFlow):
|
|||
]
|
||||
|
||||
|
||||
app = L.LightningApp(HelloWorld())
|
||||
app = LightningApp(HelloWorld())
|
||||
|
|
|
@ -15,28 +15,32 @@ import inspect
|
|||
import os
|
||||
import shutil
|
||||
import sys
|
||||
from importlib.util import module_from_spec, spec_from_file_location
|
||||
|
||||
import pt_lightning_sphinx_theme
|
||||
|
||||
import lightning_app
|
||||
|
||||
_PATH_HERE = os.path.abspath(os.path.dirname(__file__))
|
||||
_PATH_ROOT = os.path.realpath(os.path.join(_PATH_HERE, "..", ".."))
|
||||
sys.path.insert(0, os.path.abspath(_PATH_ROOT))
|
||||
|
||||
SPHINX_MOCK_REQUIREMENTS = int(os.environ.get("SPHINX_MOCK_REQUIREMENTS", True))
|
||||
|
||||
# alternative https://stackoverflow.com/a/67692/4521646
|
||||
spec = spec_from_file_location("lightning_app/__about__.py", os.path.join(_PATH_ROOT, "lightning_app", "__about__.py"))
|
||||
about = module_from_spec(spec)
|
||||
spec.loader.exec_module(about)
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
|
||||
# this name shall match the project name in Github as it is used for linking to code
|
||||
project = "lightning"
|
||||
copyright = lightning_app.__copyright__
|
||||
author = lightning_app.__author__
|
||||
copyright = about.__copyright__
|
||||
author = about.__author__
|
||||
|
||||
# The short X.Y version
|
||||
version = lightning_app.__version__
|
||||
version = about.__version__
|
||||
# The full version, including alpha/beta/rc tags
|
||||
release = lightning_app.__version__
|
||||
release = about.__version__
|
||||
|
||||
# Options for the linkcode extension
|
||||
# ----------------------------------
|
||||
|
@ -156,8 +160,8 @@ html_theme_path = [pt_lightning_sphinx_theme.get_html_theme_path()]
|
|||
# documentation.
|
||||
|
||||
html_theme_options = {
|
||||
"pytorch_project": lightning_app.__homepage__,
|
||||
"canonical_url": lightning_app.__homepage__,
|
||||
"pytorch_project": about.__homepage__,
|
||||
"canonical_url": about.__homepage__,
|
||||
"collapse_navigation": False,
|
||||
"display_version": True,
|
||||
"logo_only": False,
|
||||
|
@ -223,7 +227,7 @@ texinfo_documents = [
|
|||
project + " Documentation",
|
||||
author,
|
||||
project,
|
||||
lightning_app.__docs__,
|
||||
about.__docs__,
|
||||
"Miscellaneous",
|
||||
),
|
||||
]
|
||||
|
@ -277,15 +281,6 @@ for path_ipynb in glob.glob(os.path.join(_PATH_ROOT, "notebooks", "*.ipynb")):
|
|||
path_ipynb2 = os.path.join(path_nbs, os.path.basename(path_ipynb))
|
||||
shutil.copy(path_ipynb, path_ipynb2)
|
||||
|
||||
# copy all examples to local folder
|
||||
path_examples = os.path.join(_PATH_HERE, "..", "examples")
|
||||
if not os.path.isdir(path_examples):
|
||||
os.mkdir(path_examples)
|
||||
for path_app_example in glob.glob(os.path.join(_PATH_ROOT, "examples", "app_*")):
|
||||
path_app_example2 = os.path.join(path_examples, os.path.basename(path_app_example))
|
||||
if not os.path.isdir(path_app_example2):
|
||||
shutil.copytree(path_app_example, path_app_example2, dirs_exist_ok=True)
|
||||
|
||||
|
||||
# Ignoring Third-party packages
|
||||
# https://stackoverflow.com/questions/15889621/sphinx-how-to-exclude-imports-in-automodule
|
||||
|
@ -319,7 +314,7 @@ autodoc_mock_imports = MOCK_PACKAGES
|
|||
def linkcode_resolve(domain, info):
|
||||
def find_source():
|
||||
# try to find the file and line number, based on code from numpy:
|
||||
# https://github.com/numpy/numpy/blob/master/doc/source-app/conf.py#L286
|
||||
# https://github.com/numpy/numpy/blob/master/doc/source/conf.py#L286
|
||||
obj = sys.modules[info["module"]]
|
||||
for part in info["fullname"].split("."):
|
||||
obj = getattr(obj, part)
|
||||
|
@ -386,6 +381,6 @@ doctest_test_doctest_blocks = ""
|
|||
doctest_global_setup = """
|
||||
import importlib
|
||||
import os
|
||||
import lightning as L
|
||||
import lightning_app
|
||||
"""
|
||||
coverage_skip_undoc_in_source = True
|
||||
|
|
|
@ -1,40 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
.. _core_api:
|
||||
|
||||
###############################
|
||||
Learn more about Lightning Core
|
||||
###############################
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div class="display-card-container">
|
||||
<div class="row">
|
||||
|
||||
.. displayitem::
|
||||
:header: Level-up with Lightning Apps
|
||||
:description: From Basics to Advanced Skills
|
||||
:col_css: col-md-6
|
||||
:button_link: ../levels/basic/index.html
|
||||
:height: 180
|
||||
|
||||
.. displayitem::
|
||||
:header: Understand Lightning App
|
||||
:description: Detailed description
|
||||
:col_css: col-md-6
|
||||
:button_link: lightning_app/index.html
|
||||
:height: 180
|
||||
|
||||
.. displayitem::
|
||||
:header: Understand Lightning Flow
|
||||
:description: Detailed description
|
||||
:col_css: col-md-6
|
||||
:button_link: lightning_flow.html
|
||||
:height: 180
|
||||
|
||||
.. displayitem::
|
||||
:header: Understand Lightning Work
|
||||
:description: Detailed description
|
||||
:col_css: col-md-6
|
||||
:button_link: lightning_work/index.html
|
||||
:height: 180
|
|
@ -1,27 +0,0 @@
|
|||
import lightning as L
|
||||
from lightning.app.utilities.app_helpers import pretty_state
|
||||
|
||||
|
||||
class Work(L.LightningWork):
|
||||
def __init__(self):
|
||||
super().__init__(cache_calls=False)
|
||||
# Attributes are registered automatically in the state.
|
||||
self.counter = 0
|
||||
|
||||
def run(self):
|
||||
# Incrementing an attribute gets reflected in the `Flow` state.
|
||||
self.counter += 1
|
||||
|
||||
|
||||
class Flow(L.LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.w = Work()
|
||||
|
||||
def run(self):
|
||||
if self.w.has_started:
|
||||
print(f"State: {pretty_state(self.state)} \n")
|
||||
self.w.run()
|
||||
|
||||
|
||||
app = L.LightningApp(Flow())
|
|
@ -1,15 +1,138 @@
|
|||
:orphan:
|
||||
|
||||
##########################################
|
||||
Communication between Lightning Components
|
||||
##########################################
|
||||
################################
|
||||
Communication Between Components
|
||||
################################
|
||||
|
||||
**Audience:** Users that want to create interactive applications.
|
||||
|
||||
**Level:** Intermediate
|
||||
**Level:** Advanced
|
||||
|
||||
**Prerequisite**: Read the `Communication in Lightning Apps article <../../access_app_state.html>`_.
|
||||
**Prerequisite**: Read the :ref:`access_app_state` guide.
|
||||
|
||||
----
|
||||
|
||||
.. include:: ../../core_api/lightning_app/communication_content.rst
|
||||
***********************************
|
||||
Why should components communicate ?
|
||||
***********************************
|
||||
|
||||
When creating interactive apps with multiple components, you might want your components to share information with each other. You might to rely on that information to control their execution, share progress in the UI, trigger a sequence of operations, etc.
|
||||
|
||||
By design, the :class:`~lightning_app.core.flow.LightningFlow` communicates to all :class:`~lightning_app.core.flow.LightningWork` within the application, but :class:`~lightning_app.core.flow.LightningWork` can't communicate between each other directly, they need the flow as a proxy to do so.
|
||||
|
||||
Once a ``LightningWork`` is running, any updates to its state is automatically communicated to the flow as a delta (using `DeepDiff <https://github.com/seperman/deepdiff>`_). The state communication isn't bi-directional, it is only done from work to flow.
|
||||
|
||||
Internally, the Lightning App is alternatively collecting deltas sent from all the registered ``LightningWorks`` and/or UI, and running the root flow run method of the app.
|
||||
|
||||
*******************************
|
||||
Communication From Work to Flow
|
||||
*******************************
|
||||
|
||||
Below, find an example to better understand this behavior.
|
||||
|
||||
The ``WorkCounter`` increments a counter until 1 million and the ``Flow`` prints the work counter.
|
||||
|
||||
As the work is running into its own process, its state changes is sent to the Flow which contains the latest value of the counter.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning_app as la
|
||||
|
||||
|
||||
class WorkCounter(lapp.LightningWork):
|
||||
def __init__(self):
|
||||
super().__init__(parallel=True)
|
||||
self.counter = 0
|
||||
|
||||
def run(self):
|
||||
for _ in range(int(10e6)):
|
||||
self.counter += 1
|
||||
|
||||
|
||||
class Flow(lapp.LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.w = WorkCounter()
|
||||
|
||||
def run(self):
|
||||
self.w.run()
|
||||
print(self.w.counter)
|
||||
|
||||
|
||||
app = lapp.LightningApp(Flow())
|
||||
|
||||
|
||||
A delta sent from the work to the flow looks like this:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{"values_changed": {"root['works']['w']['vars']['counter']": {"new_value": 425}}}
|
||||
|
||||
Here is the associated illustration:
|
||||
|
||||
.. figure:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/deltas.gif
|
||||
:alt: Mechanism showing how delta are sent.
|
||||
:width: 100 %
|
||||
|
||||
|
||||
*******************************
|
||||
Communication From From to Work
|
||||
*******************************
|
||||
|
||||
Communication from the flow to the work while running isn't support yet. If your application requires this feature, please open an issue on Github.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning_app as la
|
||||
from time import sleep
|
||||
|
||||
|
||||
class WorkCounter(lapp.LightningWork):
|
||||
def __init__(self):
|
||||
super().__init__(parallel=True)
|
||||
self.counter = 0
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
sleep(1)
|
||||
print(f"Work {self.counter}")
|
||||
|
||||
|
||||
class Flow(lapp.LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.w = WorkCounter()
|
||||
|
||||
def run(self):
|
||||
self.w.run()
|
||||
sleep(1)
|
||||
print(f"Flow {self.w.counter}")
|
||||
self.w.counter += 1
|
||||
|
||||
|
||||
app = lapp.LightningApp(Flow())
|
||||
|
||||
As you can observe, there is a divergence between the value within the Work and the Flow.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
Flow 0
|
||||
Flow 1
|
||||
Flow 2
|
||||
Flow 3
|
||||
Work 0
|
||||
Flow 4
|
||||
Work 0
|
||||
Flow 5
|
||||
Work 0
|
||||
Flow 6
|
||||
Work 0
|
||||
Flow 7
|
||||
Work 0
|
||||
Flow 8
|
||||
Work 0
|
||||
Flow 9
|
||||
Work 0
|
||||
Flow 10
|
||||
|
||||
.. note:: Technically, the flow and works relies on queues to share data (multiprocessing locally and redis lists in the cloud).
|
||||
|
|
|
@ -1,160 +0,0 @@
|
|||
|
||||
********************************
|
||||
Communication Between Components
|
||||
********************************
|
||||
|
||||
When creating interactive Lightning Apps (App) with multiple components, you may need your components to share information with each other and rely on that information to control their execution, share progress in the UI, trigger a sequence of operations, etc.
|
||||
|
||||
To accomplish that, Lightning components can communicate using the App State. The App State is composed of all attributes defined within each component's **__init__** method e.g anything attached to the component with **self.x = y**.
|
||||
|
||||
All attributes of all **LightningWork (Work)** components are accessible in the **LightningFlow (Flow)** components in real-time.
|
||||
|
||||
By design, the Flows communicate to all **Works** within the application. However, Works can't communicate with each other directly, they must use Flows as a proxy to communicate.
|
||||
|
||||
Once a Work is running, any updates to the Work's state is automatically communicated to the Flow, as a delta (using `DeepDiff <https://github.com/seperman/deepdiff>`_). The state communication isn't bi-directional, communication is only done from Work to Flow.
|
||||
|
||||
Internally, the App is alternatively collecting deltas sent from all the registered Works and/or UI, and running the root Flow run method of the App.
|
||||
|
||||
----
|
||||
|
||||
*************************************************
|
||||
Communication from LightningWork to LightningFlow
|
||||
*************************************************
|
||||
|
||||
LightningFlow (Flow) can access their children's LightningWork (Work) state.
|
||||
|
||||
When a running Work attribute gets updated inside its method (separate process locally or remote machine), the app re-executes Flow's run method once it receives the state update from the Work.
|
||||
|
||||
Here's an example to better understand communication from Work to Flow.
|
||||
|
||||
The ``WorkCounter`` increments a counter until 1 million and the ``Flow`` prints the work counter.
|
||||
|
||||
As the Work is running its own process, its state changes are sent to the Flow which contains the latest value of the counter.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning as L
|
||||
|
||||
|
||||
class WorkCounter(L.LightningWork):
|
||||
def __init__(self):
|
||||
super().__init__(parallel=True)
|
||||
self.counter = 0
|
||||
|
||||
def run(self):
|
||||
for _ in range(int(10e6)):
|
||||
self.counter += 1
|
||||
|
||||
|
||||
class Flow(L.LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.w = WorkCounter()
|
||||
|
||||
def run(self):
|
||||
self.w.run()
|
||||
print(self.w.counter)
|
||||
|
||||
|
||||
app = L.LightningApp(Flow())
|
||||
|
||||
|
||||
A delta sent from the Work to the Flow looks like this:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{"values_changed": {"root['works']['w']['vars']['counter']": {"new_value": 425}}}
|
||||
|
||||
Here is the associated illustration:
|
||||
|
||||
.. figure:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/deltas.gif
|
||||
:alt: Mechanism showing how delta are sent.
|
||||
:width: 100 %
|
||||
|
||||
Here's another example that is slightly different. Here we define a Flow and Work, where the Work increments a counter indefinitely and the Flow prints its state which contain the Work.
|
||||
|
||||
You can easily check the state of your entire app as follows:
|
||||
|
||||
.. literalinclude:: ../../core_api/lightning_app/app.py
|
||||
|
||||
Run the app with:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
lightning run app docs/source-app/core_api/lightning_app/app.py
|
||||
|
||||
And here's the output you get when running the App using the **Lightning CLI**:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
INFO: Your app has started. View it in your browser: http://127.0.0.1:7501/view
|
||||
State: {'works': {'w': {'vars': {'counter': 1}}}}
|
||||
State: {'works': {'w': {'vars': {'counter': 2}}}}
|
||||
State: {'works': {'w': {'vars': {'counter': 3}}}}
|
||||
State: {'works': {'w': {'vars': {'counter': 3}}}}
|
||||
State: {'works': {'w': {'vars': {'counter': 4}}}}
|
||||
...
|
||||
|
||||
----
|
||||
|
||||
*************************************************
|
||||
Communication from LightningFlow to LightningWork
|
||||
*************************************************
|
||||
|
||||
Communication from the LightningFlow (Flow) to the LightningWork (Work) while running **isn't supported yet**. If your application requires this feature, please open an issue on Github.
|
||||
|
||||
Here's an example of what would happen if you try to have the Flow communicate with the Work:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning as L
|
||||
from time import sleep
|
||||
|
||||
|
||||
class WorkCounter(L.LightningWork):
|
||||
def __init__(self):
|
||||
super().__init__(parallel=True)
|
||||
self.counter = 0
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
sleep(1)
|
||||
print(f"Work {self.counter}")
|
||||
|
||||
|
||||
class Flow(L.LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.w = WorkCounter()
|
||||
|
||||
def run(self):
|
||||
self.w.run()
|
||||
sleep(1)
|
||||
print(f"Flow {self.w.counter}")
|
||||
self.w.counter += 1
|
||||
|
||||
|
||||
app = L.LightningApp(Flow())
|
||||
|
||||
As you can see, there is a divergence between the values within the Work and the Flow.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
Flow 0
|
||||
Flow 1
|
||||
Flow 2
|
||||
Flow 3
|
||||
Work 0
|
||||
Flow 4
|
||||
Work 0
|
||||
Flow 5
|
||||
Work 0
|
||||
Flow 6
|
||||
Work 0
|
||||
Flow 7
|
||||
Work 0
|
||||
Flow 8
|
||||
Work 0
|
||||
Flow 9
|
||||
Work 0
|
||||
Flow 10
|
|
@ -1,15 +1,187 @@
|
|||
:orphan:
|
||||
|
||||
.. _dynamic_work:
|
||||
############
|
||||
Dynamic Work
|
||||
############
|
||||
|
||||
#####################
|
||||
Dynamic LightningWork
|
||||
#####################
|
||||
**Audience:** Users who want to learn how to create application which adapts to user demands.
|
||||
|
||||
**Audience:** Users who want to create applications that adapt to user demands.
|
||||
|
||||
**Level:** Advanced
|
||||
**Level:** Intermediate
|
||||
|
||||
----
|
||||
|
||||
.. include:: dynamic_work_content.rst
|
||||
***************************************************
|
||||
Why should I care about creating work dynamically ?
|
||||
***************************************************
|
||||
|
||||
Imagine you want to create a research notebook app for your team, where every member can create multiple `JupyterLab <https://jupyter.org/>`_ session on their hardware of choice.
|
||||
|
||||
To allow every notebook to choose hardware, it needs to be set up in it's own :class:`~lightning_app.core.work.LightningWork`, but you can't know the number of notebooks user will need in advance. In this case you'll need to add ``LightningWorks`` dynamically at run time.
|
||||
|
||||
This is what **dynamic works** enables.
|
||||
|
||||
***************************
|
||||
When to use dynamic works ?
|
||||
***************************
|
||||
|
||||
Dynamic works should be used anytime you want change the resources your application is using at runtime.
|
||||
|
||||
*******************
|
||||
How to add a work ?
|
||||
*******************
|
||||
|
||||
You can simply attach your components in the **run** method of a flow using python **hasattr**, **setattr** and **getattr** functions.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
class RootFlow(lapp.LightningFlow):
|
||||
def run(self):
|
||||
|
||||
if not hasattr(self, "work"):
|
||||
setattr(self, "work", Work()) # The `Work` component is created and attached here.
|
||||
getattr(self, "work").run() # Run the `Work` component.
|
||||
|
||||
But it is usually more readable to use Lightning built-in :class:`~lightning_app.structures.Dict` or :class:`~lightning_app.structures.List` as follows:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from lightning_app.structures import Dict
|
||||
|
||||
|
||||
class RootFlow(lapp.LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.dict = Dict()
|
||||
|
||||
def run(self):
|
||||
if "work" not in self.dict:
|
||||
self.dict["work"] = Work() # The `Work` component is attached here.
|
||||
self.dict["work"].run()
|
||||
|
||||
|
||||
********************
|
||||
How to stop a work ?
|
||||
********************
|
||||
|
||||
In order to stop a work, simply use the work ``stop`` method as follows:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
class RootFlow(lapp.LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.work = Work()
|
||||
|
||||
def run(self):
|
||||
self.work.stop()
|
||||
|
||||
|
||||
**********************************
|
||||
Application Example with StreamLit
|
||||
**********************************
|
||||
|
||||
..
|
||||
The entire application can be found `here <https://github.com/PyTorchLightning/lightning-template-jupyterlab>`_.
|
||||
|
||||
The Notebook Manager
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
In the component below, we are dynamically creating ``JupyterLabWork`` every time as user clicks the ``Create Jupyter Notebook`` button.
|
||||
|
||||
To do so, we are iterating over the list of ``jupyter_config_requests`` infinitely.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning_app as la
|
||||
|
||||
|
||||
class JupyterLabManager(lapp.LightningFlow):
|
||||
"""This flow manages the users notebooks running within works."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.jupyter_works = lapp.structures.Dict()
|
||||
self.jupyter_config_requests = []
|
||||
|
||||
def run(self):
|
||||
for idx, jupyter_config in enumerate(self.jupyter_config_requests):
|
||||
|
||||
# The Jupyter Config has this form is:
|
||||
# {"use_gpu": False/True, "token": None, "username": ..., "stop": False}
|
||||
|
||||
# Step 1: Check if JupyterWork already exists for this username
|
||||
username = jupyter_config["username"]
|
||||
if username not in self.jupyter_works:
|
||||
jupyter_config["ready"] = False
|
||||
|
||||
# Set the hardware selected by the user: GPU or CPU.
|
||||
cloud_compute = lapp.CloudCompute("gpu" if jupyter_config["use_gpu"] else "cpu-small")
|
||||
|
||||
# Step 2: Create new JupyterWork dynamically !
|
||||
self.jupyter_works[username] = JupyterLabWork(cloud_compute=cloud_compute)
|
||||
|
||||
# Step 3: Run the JupyterWork
|
||||
self.jupyter_works[username].run()
|
||||
|
||||
# Step 4: Store the notebook token in the associated config.
|
||||
# We are using this to know when the notebook is ready
|
||||
# and display the stop button on the UI.
|
||||
if self.jupyter_works[username].token:
|
||||
jupyter_config["token"] = self.jupyter_works[username].token
|
||||
|
||||
# Step 5: Stop the work if the user requested it.
|
||||
if jupyter_config["stop"]:
|
||||
self.jupyter_works[username].stop()
|
||||
self.jupyter_config_requests.pop(idx)
|
||||
|
||||
def configure_layout(self):
|
||||
return StreamlitFrontend(render_fn=render_fn)
|
||||
|
||||
|
||||
The StreamLit UI
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
In the UI below, we receive the **state** of the Jupyter Manager and it can be modified directly from the UI interaction.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
def render_fn(state):
|
||||
import streamlit as st
|
||||
|
||||
# Step 1: Enable users to select their notebooks and create them
|
||||
column_1, column_2, column_3 = st.columns(3)
|
||||
with column_1:
|
||||
create_jupyter = st.button("Create Jupyter Notebook")
|
||||
with column_2:
|
||||
username = st.text_input("Enter your username", "tchaton")
|
||||
assert username
|
||||
with column_3:
|
||||
use_gpu = st.checkbox("Use GPU")
|
||||
|
||||
# Step 2: If a user clicked the button, add an element to the list of configs
|
||||
# Note: state.jupyter_config_requests = ... will sent the state update to the component.
|
||||
if create_jupyter:
|
||||
new_config = [{"use_gpu": use_gpu, "token": None, "username": username, "stop": False}]
|
||||
state.jupyter_config_requests = state.jupyter_config_requests + new_config
|
||||
|
||||
# Step 3: List of running notebooks.
|
||||
for idx, config in enumerate(state.jupyter_config_requests):
|
||||
column_1, column_2, column_3 = st.columns(3)
|
||||
with column_1:
|
||||
if not idx:
|
||||
st.write(f"Idx")
|
||||
st.write(f"{idx}")
|
||||
with column_2:
|
||||
if not idx:
|
||||
st.write(f"Use GPU")
|
||||
st.write(config["use_gpu"])
|
||||
with column_3:
|
||||
if not idx:
|
||||
st.write(f"Stop")
|
||||
if config["token"]:
|
||||
should_stop = st.button("Stop this notebook")
|
||||
|
||||
# Step 4: Change stop if the user clicked the button
|
||||
if should_stop:
|
||||
config["stop"] = should_stop
|
||||
state.jupyter_config_requests = state.jupyter_config_requests
|
||||
|
|
|
@ -1,202 +0,0 @@
|
|||
***************************************
|
||||
What Dynamic LightningWork does for you
|
||||
***************************************
|
||||
|
||||
Dynamic LightningWork (Work) changes the resources your application uses while the application is running (aka at runtime).
|
||||
|
||||
For example, imagine you want to create a research notebook app for your team. You want every member to be able to create multiple `JupyterLab <https://jupyter.org/>`_ sessions on their hardware of choice.
|
||||
|
||||
To allow every notebook to choose hardware, it needs to be set up in it's own :class:`~lightning_app.core.work.LightningWork`, but you can't know the number of notebooks user will need in advance. In this case you'll need to add ``LightningWorks`` dynamically at run time.
|
||||
|
||||
----
|
||||
|
||||
*****************
|
||||
Use Dynamic Works
|
||||
*****************
|
||||
|
||||
Dynamic Works should be used anytime you want change the resources your application is using while it is running (aka at runtime).
|
||||
|
||||
You're usually going to use the ``start`` and ``stop`` methods together.
|
||||
|
||||
----
|
||||
|
||||
Add a Dynamic Work
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
|
||||
There are a couple of ways you can add a dynamic Work:
|
||||
|
||||
- Option 1: Attach your components in the **run** method using the Python functions.
|
||||
- Option 2: Use the Lightning built-in classes :class:`~lightning.structures.Dict` or :class:`~lightning.structures.List`.
|
||||
|
||||
.. note:: Using the Lightning built-in classes is usually easier to read.
|
||||
|
||||
----
|
||||
|
||||
**OPTION 1:** Attach your components in the run method of a flow using the Python functions **hasattr**, **setattr**, and **getattr**:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
class RootFlow(lapp.LightningFlow):
|
||||
|
||||
def run(self):
|
||||
|
||||
if not hasattr(self, "work"):
|
||||
# The `Work` component is created and attached here.
|
||||
setattr(self, "work", Work())
|
||||
# Run the `Work` component.
|
||||
getattr(self, "work").run()
|
||||
|
||||
**OPTION 2:** Use the built-in Lightning classes :class:`~lightning_app.structures.Dict` or :class:`~lightning_app.structures.List`
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from lightning_app.structures import Dict
|
||||
|
||||
class RootFlow(lapp.LightningFlow):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.dict = Dict()
|
||||
|
||||
def run(self):
|
||||
if "work" not in self.dict:
|
||||
# The `Work` component is attached here.
|
||||
self.dict["work"] = Work()
|
||||
self.dict["work"].run()
|
||||
|
||||
----
|
||||
|
||||
Stop a Work
|
||||
^^^^^^^^^^^
|
||||
Stop a work when you are concerned about cost.
|
||||
|
||||
To stop a work, use the work ``stop`` method:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
class RootFlow(L.LightningFlow):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.work = Work()
|
||||
|
||||
def run(self):
|
||||
self.work.stop()
|
||||
|
||||
----
|
||||
|
||||
*********************
|
||||
Dynamic Work Examples
|
||||
*********************
|
||||
|
||||
..
|
||||
The entire application can be found `here <https://github.com/Lightning-AI/lightning-template-jupyterlab>`_.
|
||||
|
||||
----
|
||||
|
||||
Dynamic Work with Jupyter Notebooks
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
In this example, we are dynamically creating ``JupyterLabWork`` every time a user clicks the **Create Jupyter Notebook** button.
|
||||
|
||||
In order to do that, we are iterating over the list of ``jupyter_config_requests`` infinitely.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning as L
|
||||
|
||||
|
||||
class JupyterLabManager(L.LightningFlow):
|
||||
|
||||
"""This flow manages the users notebooks running within works.""""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.jupyter_works = L.structures.Dict()
|
||||
self.jupyter_config_requests = []
|
||||
|
||||
def run(self):
|
||||
for idx, jupyter_config in enumerate(self.jupyter_config_requests):
|
||||
|
||||
# The Jupyter Config has this form is:
|
||||
# {"use_gpu": False/True, "token": None, "username": ..., "stop": False}
|
||||
|
||||
# Step 1: Check if JupyterWork already exists for this username
|
||||
username = jupyter_config["username"]
|
||||
if username not in self.jupyter_works:
|
||||
jupyter_config["ready"] = False
|
||||
|
||||
# Set the hardware selected by the user: GPU or CPU.
|
||||
cloud_compute = L.CloudCompute("gpu" if jupyter_config["use_gpu"] else "cpu-small")
|
||||
|
||||
# Step 2: Create new JupyterWork dynamically !
|
||||
self.jupyter_works[username] = JupyterLabWork(cloud_compute=cloud_compute)
|
||||
|
||||
# Step 3: Run the JupyterWork
|
||||
self.jupyter_works[username].run()
|
||||
|
||||
# Step 4: Store the notebook token in the associated config.
|
||||
# We are using this to know when the notebook is ready
|
||||
# and display the stop button on the UI.
|
||||
if self.jupyter_works[username].token:
|
||||
jupyter_config["token"] = self.jupyter_works[username].token
|
||||
|
||||
# Step 5: Stop the work if the user requested it.
|
||||
if jupyter_config['stop']:
|
||||
self.jupyter_works[username].stop()
|
||||
self.jupyter_config_requests.pop(idx)
|
||||
|
||||
def configure_layout(self):
|
||||
return L.app.frontend.StreamlitFrontend(render_fn=render_fn)
|
||||
|
||||
----
|
||||
|
||||
Dynamic Works with StreamLit UI
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Continuing from the Jupyter Notebook example, in the UI, we receive the **state** of the Jupyter Manager and the state can be modified directly from the UI.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import streamlit as st
|
||||
|
||||
|
||||
def render_fn(state):
|
||||
|
||||
# Step 1: Enable users to select their notebooks and create them
|
||||
column_1, column_2, column_3 = st.columns(3)
|
||||
with column_1:
|
||||
create_jupyter = st.button("Create Jupyter Notebook")
|
||||
with column_2:
|
||||
username = st.text_input('Enter your username', "tchaton")
|
||||
assert username
|
||||
with column_3:
|
||||
use_gpu = st.checkbox('Use GPU')
|
||||
|
||||
# Step 2: If a user clicked the button, add an element to the list of configs
|
||||
# Note: state.jupyter_config_requests = ... will sent the state update to the component.
|
||||
if create_jupyter:
|
||||
new_config = [{"use_gpu": use_gpu, "token": None, "username": username, "stop": False}]
|
||||
state.jupyter_config_requests = state.jupyter_config_requests + new_config
|
||||
|
||||
# Step 3: List of running notebooks.
|
||||
for idx, config in enumerate(state.jupyter_config_requests):
|
||||
column_1, column_2, column_3 = st.columns(3)
|
||||
with column_1:
|
||||
if not idx:
|
||||
st.write(f"Idx")
|
||||
st.write(f"{idx}")
|
||||
with column_2:
|
||||
if not idx:
|
||||
st.write(f"Use GPU")
|
||||
st.write(config['use_gpu'])
|
||||
with column_3:
|
||||
if not idx:
|
||||
st.write(f"Stop")
|
||||
if config["token"]:
|
||||
should_stop = st.button("Stop this notebook")
|
||||
|
||||
# Step 4: Change stop if the user clicked the button
|
||||
if should_stop:
|
||||
config["stop"] = should_stop
|
||||
state.jupyter_config_requests = state.jupyter_config_requests
|
|
@ -7,6 +7,7 @@ LightningApp
|
|||
############
|
||||
|
||||
|
||||
The :class:`~lightning_app.core.app.LightningApp` runs a tree of one or more components that interact to create end-to-end applications. There are two kinds of components: :class:`~lightning_app.core.flow.LightningFlow` and :class:`~lightning_app.core.work.LightningWork`. This modular design enables you to reuse components created by other users.
|
||||
|
||||
.. autoclass:: lightning_app.core.app.LightningApp
|
||||
:exclude-members: _run, connect, get_component_by_name, maybe_apply_changes, set_state
|
||||
:noindex:
|
||||
|
|
|
@ -4,5 +4,8 @@
|
|||
LightningFlow
|
||||
#############
|
||||
|
||||
The :class:`~lightning_app.core.flow.LightningFlow` component coordinates long-running tasks :class:`~lightning_app.core.work.LightningWork` and runs its children :class:`~lightning_app.core.flow.LightningFlow` components.
|
||||
|
||||
|
||||
.. autoclass:: lightning_app.core.flow.LightningFlow
|
||||
:exclude-members: _attach_backend, _exit, _is_state_attribute, set_state
|
||||
:noindex:
|
||||
|
|
|
@ -8,8 +8,103 @@ Customize your Cloud Compute
|
|||
|
||||
**Audience:** Users who want to select the hardware to run in the cloud.
|
||||
|
||||
**Level:** Intermediate
|
||||
**Level:** Basic
|
||||
|
||||
----
|
||||
|
||||
.. include:: compute_content.rst
|
||||
***************************************
|
||||
How can I customize my Work resources ?
|
||||
***************************************
|
||||
|
||||
In the cloud, you can simply configure which machine to run on by passing
|
||||
a :class:`~lightning_app.utilities.packaging.cloud_compute.CloudCompute` to your work ``__init__`` method:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning_app as la
|
||||
|
||||
# Run on a free, shared CPU machine. This is the default for every LightningWork.
|
||||
MyCustomWork(cloud_compute=lapp.CloudCompute())
|
||||
|
||||
# Run on a dedicated, medium-size CPU machine (see specs below)
|
||||
MyCustomWork(cloud_compute=lapp.CloudCompute("cpu-medium"))
|
||||
|
||||
# Run on cheap GPU machine with a single GPU (see specs below)
|
||||
MyCustomWork(cloud_compute=lapp.CloudCompute("gpu"))
|
||||
|
||||
# Run on a fast multi-GPU machine (see specs below)
|
||||
MyCustomWork(cloud_compute=lapp.CloudCompute("gpu-fast-multi"))
|
||||
|
||||
|
||||
Here is the full list of supported machine names:
|
||||
|
||||
.. list-table:: Hardware by Accelerator Type
|
||||
:widths: 25 25 25 25
|
||||
:header-rows: 1
|
||||
|
||||
* - Name
|
||||
- # of CPUs
|
||||
- GPUs
|
||||
- Memory
|
||||
* - default
|
||||
- 2
|
||||
- 0
|
||||
- 3 GB
|
||||
* - cpu-small
|
||||
- 2
|
||||
- 0
|
||||
- 8 GB
|
||||
* - cpu-medium
|
||||
- 8
|
||||
- 0
|
||||
- 32 GB
|
||||
* - gpu
|
||||
- 4
|
||||
- 1 (T4, 16 GB)
|
||||
- 16 GB
|
||||
* - gpu-fast
|
||||
- 8
|
||||
- 1 (V100, 16 GB)
|
||||
- 61 GB
|
||||
* - gpu-fast-multi
|
||||
- 32
|
||||
- 4 (V100 16 GB)
|
||||
- 244 GB
|
||||
|
||||
The up-to-date prices for these instances can be found `here <https://lightning.ai/pricing>`_.
|
||||
|
||||
|
||||
*******************************************
|
||||
How can I run on spot/preemptible machine ?
|
||||
*******************************************
|
||||
|
||||
Most cloud provider offers ``preemptible`` (synonym of ``spot``) machine which are usually discounted up to 90 %. Those machines are cheaper but the cloud provider can retrieve them at any time.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning_app as la
|
||||
|
||||
# Run on a single CPU
|
||||
MyCustomWork(cloud_compute=lapp.CloudCompute("gpu", preemptible=True))
|
||||
|
||||
|
||||
***********************************
|
||||
How can I stop my work when idle ?
|
||||
***********************************
|
||||
|
||||
By providing **idle_timeout=X Seconds**, the work is automatically stopped **X seconds** after doing nothing.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning_app as la
|
||||
|
||||
# Run on a single CPU and turn down immediately when idle.
|
||||
MyCustomWork(cloud_compute=lapp.CloudCompute("gpu", idle_timeout=0))
|
||||
|
||||
|
||||
#############
|
||||
CloudCompute
|
||||
#############
|
||||
|
||||
.. autoclass:: lightning_app.utilities.packaging.cloud_compute.CloudCompute
|
||||
:noindex:
|
||||
|
|
|
@ -1,100 +0,0 @@
|
|||
|
||||
***************************
|
||||
Customize my Work resources
|
||||
***************************
|
||||
|
||||
In the cloud, you can simply configure which machine to run on by passing
|
||||
a :class:`~lightning_app.utilities.packaging.cloud_compute.CloudCompute` to your work ``__init__`` method:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning as L
|
||||
|
||||
# Run on a free, shared CPU machine. This is the default for every LightningWork.
|
||||
MyCustomWork(cloud_compute=L.CloudCompute())
|
||||
|
||||
# Run on a dedicated, medium-size CPU machine (see specs below)
|
||||
MyCustomWork(cloud_compute=L.CloudCompute("cpu-medium"))
|
||||
|
||||
# Run on cheap GPU machine with a single GPU (see specs below)
|
||||
MyCustomWork(cloud_compute=L.CloudCompute("gpu"))
|
||||
|
||||
# Run on a fast multi-GPU machine (see specs below)
|
||||
MyCustomWork(cloud_compute=L.CloudCompute("gpu-fast-multi"))
|
||||
|
||||
|
||||
Here is the full list of supported machine names:
|
||||
|
||||
.. list-table:: Hardware by Accelerator Type
|
||||
:widths: 25 25 25 25
|
||||
:header-rows: 1
|
||||
|
||||
* - Name
|
||||
- # of CPUs
|
||||
- GPUs
|
||||
- Memory
|
||||
* - default
|
||||
- 2
|
||||
- 0
|
||||
- 3 GB
|
||||
* - cpu-small
|
||||
- 2
|
||||
- 0
|
||||
- 8 GB
|
||||
* - cpu-medium
|
||||
- 8
|
||||
- 0
|
||||
- 32 GB
|
||||
* - gpu
|
||||
- 4
|
||||
- 1 (T4, 16 GB)
|
||||
- 16 GB
|
||||
* - gpu-fast
|
||||
- 8
|
||||
- 1 (V100, 16 GB)
|
||||
- 61 GB
|
||||
* - gpu-fast-multi
|
||||
- 32
|
||||
- 4 (V100 16 GB)
|
||||
- 244 GB
|
||||
|
||||
The up-to-date prices for these instances can be found `here <https://lightning.ai/pages/pricing>`_.
|
||||
|
||||
----
|
||||
|
||||
*******************************
|
||||
Run on spot/preemptible machine
|
||||
*******************************
|
||||
|
||||
Most cloud provider offers ``preemptible`` (synonym of ``spot``) machines that are usually discounted by up to 90 %. Those machines are cheaper but the cloud provider can retrieve them at any time and might take longer to be ready.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning as L
|
||||
|
||||
# Run on a single CPU
|
||||
MyCustomWork(cloud_compute=L.CloudCompute("gpu", preemptible=True))
|
||||
|
||||
----
|
||||
|
||||
**********************
|
||||
Stop my work when idle
|
||||
**********************
|
||||
|
||||
By providing **idle_timeout=X Seconds**, the work is automatically stopped **X seconds** after doing nothing.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning as L
|
||||
|
||||
# Run on a single CPU and turn down immediately when idle.
|
||||
MyCustomWork(cloud_compute=L.CloudCompute("gpu", idle_timeout=0))
|
||||
|
||||
----
|
||||
|
||||
************
|
||||
CloudCompute
|
||||
************
|
||||
|
||||
.. autoclass:: lightning_app.utilities.packaging.cloud_compute.CloudCompute
|
||||
:noindex:
|
|
@ -1,13 +1,83 @@
|
|||
:orphan:
|
||||
|
||||
###############################
|
||||
Handle Lightning App exceptions
|
||||
###############################
|
||||
########################
|
||||
Handling App Exceptions
|
||||
########################
|
||||
|
||||
**Audience:** Users who want to make Lightning Apps more robust to potential issues.
|
||||
**Audience:** Users who want to know how to implement app where errors are handled.
|
||||
|
||||
**Level:** Advanced
|
||||
|
||||
----
|
||||
|
||||
.. include:: handling_app_exception_content.rst
|
||||
*************************************************
|
||||
Why should I care about handling app exceptions ?
|
||||
*************************************************
|
||||
|
||||
Imagine you are creating an application where your team can launch model training by providing their own Github Repo any time they want.
|
||||
|
||||
As the application admin, you don't want the application to go down if their code has a bug and breaks.
|
||||
|
||||
Instead, you would like the work to capture the exception and surface this to the users on failures.
|
||||
|
||||
****************************************
|
||||
How can I configure exception handling ?
|
||||
****************************************
|
||||
|
||||
|
||||
The LightningWork accepts an argument **raise_exception** which is **True** by default. This aligns with Python default behaviors.
|
||||
|
||||
However, for the user case stated above, we want to capture the work exceptions. This is done by providing ``raise_exception=False`` to the work ``__init__`` method.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
MyCustomWork(raise_exception=False) # <== HERE: The exception is captured.
|
||||
|
||||
# Default behavior
|
||||
MyCustomWork(raise_exception=True) # <== HERE: The exception is raised within the flow and terminates the app
|
||||
|
||||
|
||||
And you can customize this behavior by overriding the ``on_exception`` hook to the Lightning Work.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning as L
|
||||
|
||||
|
||||
class MyCustomWork(L.LightningWork):
|
||||
def on_exception(self, exception: Exception):
|
||||
# do something when an exception is triggered.
|
||||
pass
|
||||
|
||||
|
||||
*******************
|
||||
Application Example
|
||||
*******************
|
||||
|
||||
This is the pseudo-code for the application described above.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning_app as lapp
|
||||
|
||||
|
||||
class RootFlow(lapp.LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.user_jobs = lapp.structures.Dict()
|
||||
self.requested_jobs = []
|
||||
|
||||
def run(self):
|
||||
for request in self.requested_jobs:
|
||||
job_id = request["id"]
|
||||
if job_id not in self.user_jobs:
|
||||
# Note: The `GithubRepoLauncher` doesn't exist yet.
|
||||
self.user_jobs[job_id] = GithubRepoLauncher(
|
||||
**request,
|
||||
raise_exception=False, # <== HERE: The exception is captured.
|
||||
)
|
||||
self.user_jobs[job_id].run()
|
||||
|
||||
if self.user_jobs[job_id].status.stage == "failed" and "printed" not in request:
|
||||
print(self.user_jobs[job_id].status) # <== HERE: Print the user exception.
|
||||
request["printed"] = True
|
||||
|
|
|
@ -1,74 +0,0 @@
|
|||
|
||||
***************************************************
|
||||
What handling Lightning App exceptions does for you
|
||||
***************************************************
|
||||
|
||||
Imagine you are creating a Lightning App (App) where your team can launch model training by providing their own Github Repo any time they want.
|
||||
|
||||
As the App admin, you don't want the App to go down if their code has a bug and breaks.
|
||||
|
||||
Instead, you would like the LightningWork (Work) to capture the exception and present the issue to users.
|
||||
|
||||
----
|
||||
|
||||
****************************
|
||||
Configure exception handling
|
||||
****************************
|
||||
|
||||
The LightningWork (Work) accepts an argument **raise_exception** which is **True** by default. This aligns with Python default behaviors.
|
||||
|
||||
However, for the user case stated in the previous section, we want to capture the Work exceptions. This is done by providing ``raise_exception=False`` to the work ``__init__`` method.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning as L
|
||||
|
||||
MyCustomWork(raise_exception=False) # <== HERE: The exception is captured.
|
||||
|
||||
# Default behavior
|
||||
MyCustomWork(raise_exception=True) # <== HERE: The exception is raised within the flow and terminates the app
|
||||
|
||||
|
||||
And you can customize this behavior by overriding the ``on_exception`` hook to the Work.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning as L
|
||||
|
||||
class MyCustomWork(L.LightningWork):
|
||||
|
||||
def on_exception(self, exception: Exception):
|
||||
# do something when an exception is triggered.
|
||||
|
||||
----
|
||||
|
||||
**************************
|
||||
Exception handling example
|
||||
**************************
|
||||
|
||||
This is the pseudo-code for the application described above.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning as L
|
||||
|
||||
class RootFlow(L.LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.user_jobs = L.structures.Dict()
|
||||
self.requested_jobs = []
|
||||
|
||||
def run(self):
|
||||
for request in self.requested_jobs:
|
||||
job_id = request["id"]
|
||||
if job_id not in self.user_jobs:
|
||||
# Note: The `GithubRepoLauncher` doesn't exist yet.
|
||||
self.user_jobs[job_id] = GithubRepoLauncher(
|
||||
**request,
|
||||
raise_exception=False, # <== HERE: The exception is captured.
|
||||
)
|
||||
self.user_jobs[job_id].run()
|
||||
|
||||
if self.user_jobs[job_id].status.stage == "failed" and "printed" not in request:
|
||||
print(self.user_jobs[job_id].status) # <== HERE: Print the user exception.
|
||||
request["printed"] = True
|
|
@ -6,6 +6,7 @@
|
|||
LightningWork
|
||||
#############
|
||||
|
||||
The :class:`~lightning_app.core.work.LightningWork` component is a building block optimized for long-running jobs or integrating third-party services. LightningWork can be used for training large models, downloading a dataset, or any long-lasting operation.
|
||||
|
||||
.. autoclass:: lightning_app.core.work.LightningWork
|
||||
:exclude-members: _aggregate_status_timeout, _is_state_attribute, _is_state_attribute, set_state
|
||||
:noindex:
|
||||
|
|
|
@ -1,15 +1,87 @@
|
|||
:orphan:
|
||||
|
||||
######################################
|
||||
Sharing Objects between LightningWorks
|
||||
######################################
|
||||
#############################
|
||||
Sharing Objects between Works
|
||||
#############################
|
||||
|
||||
**Audience:** Users who want to know how to transfer Python objects between their LightningWorks.
|
||||
**Audience:** Users who want to know how to transfer python objects between their works.
|
||||
|
||||
**Level:** Advanced
|
||||
|
||||
**Prerequisite**: Reach Level 16+, know about the `pandas DataFrames <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_ and read and read the `Access app state guide <../../access_app_state.html>`_.
|
||||
**Prerequisite**: Know about the pandas library and read the :ref:`access_app_state` guide.
|
||||
|
||||
----
|
||||
|
||||
.. include:: payload_content.rst
|
||||
************************************
|
||||
When do I need to transfer objects ?
|
||||
************************************
|
||||
|
||||
Imagine your application is processing some data using `pandas DaFrame <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_ and you want to pass those data to another work. This is when and what the **Payload API** is meant for.
|
||||
|
||||
|
||||
*************************************
|
||||
How can I use the Lightning Payload ?
|
||||
*************************************
|
||||
|
||||
The Payload enables non JSON-serializable attribute objects to be part of your work state and be communicated to other works.
|
||||
|
||||
Here is an example how to use it:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning_app as la
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class SourceWork(lapp.LightningWork):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.df = None
|
||||
|
||||
def run(self):
|
||||
# do some processing
|
||||
|
||||
df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]})
|
||||
|
||||
# The object you care about needs to be wrapped into a Payload object.
|
||||
self.df = lapp.storage.Payload(df)
|
||||
|
||||
# You can access the original object from the payload using its value property.
|
||||
print("src", self.df.value)
|
||||
# src col1 col2
|
||||
# 0 1 3
|
||||
# 1 2 4
|
||||
|
||||
Once the Payload object is attached to your work state, it can be passed to another work via the flow as follows:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning_app as la
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class DestinationWork(lapp.LightningWork):
|
||||
def run(self, df: lapp.storage.Payload):
|
||||
# You can access the original object from the payload using its value property.
|
||||
print("dst", df.value)
|
||||
# dst col1 col2
|
||||
# 0 1 3
|
||||
# 1 2 4
|
||||
|
||||
|
||||
class Flow(lapp.LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.src = SourceWork()
|
||||
self.dst = DestinationWork()
|
||||
|
||||
def run(self):
|
||||
self.src.run()
|
||||
# The pandas DataFrame created by the ``SourceWork``
|
||||
# is passed to the ``DestinationWork``.
|
||||
# Internally, Lightning pickles and un-pickle the python object,
|
||||
# so you receive a copy of the original object.
|
||||
self.dst.run(df=self.src.df)
|
||||
|
||||
|
||||
app = lapp.LightningApp(Flow())
|
||||
|
|
|
@ -1,75 +0,0 @@
|
|||
|
||||
**************************************
|
||||
What transferring objects does for you
|
||||
**************************************
|
||||
|
||||
Imagine your application is processing some data using `pandas DaFrame <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_ and you want to pass that data to another LightningWork (Work). This is what the **Payload API** is meant for.
|
||||
|
||||
----
|
||||
|
||||
*************************
|
||||
Use the Lightning Payload
|
||||
*************************
|
||||
|
||||
The Payload enables non JSON-serializable attribute objects to be part of your Work's state and to be communicated to other Works.
|
||||
|
||||
Here is an example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning as L
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class SourceWork(L.LightningWork):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.df = None
|
||||
|
||||
def run(self):
|
||||
# do some processing
|
||||
|
||||
df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]})
|
||||
|
||||
# The object you care about needs to be wrapped into a Payload object.
|
||||
self.df = L.storage.Payload(df)
|
||||
|
||||
# You can access the original object from the payload using its value property.
|
||||
print("src", self.df.value)
|
||||
# src col1 col2
|
||||
# 0 1 3
|
||||
# 1 2 4
|
||||
|
||||
Once the Payload object is attached to your Work's state, it can be passed to another work using the LightningFlow (Flow) as follows:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning as L
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class DestinationWork(L.LightningWork):
|
||||
def run(self, df: L.storage.Payload):
|
||||
# You can access the original object from the payload using its value property.
|
||||
print("dst", df.value)
|
||||
# dst col1 col2
|
||||
# 0 1 3
|
||||
# 1 2 4
|
||||
|
||||
|
||||
class Flow(L.LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.src = SourceWork()
|
||||
self.dst = DestinationWork()
|
||||
|
||||
def run(self):
|
||||
self.src.run()
|
||||
# The pandas DataFrame created by the ``SourceWork``
|
||||
# is passed to the ``DestinationWork``.
|
||||
# Internally, Lightning pickles and un-pickle the python object,
|
||||
# so you receive a copy of the original object.
|
||||
self.dst.run(df=self.src.df)
|
||||
|
||||
|
||||
app = L.LightningApp(Flow())
|
|
@ -1,8 +1,9 @@
|
|||
:orphan:
|
||||
|
||||
####################
|
||||
LightningWork Status
|
||||
####################
|
||||
|
||||
#####################
|
||||
Lightning Work Status
|
||||
#####################
|
||||
|
||||
**Audience:** Users who want to understand ``LightningWork`` under the hood.
|
||||
|
||||
|
@ -10,4 +11,199 @@ LightningWork Status
|
|||
|
||||
----
|
||||
|
||||
.. include:: status_content.rst
|
||||
*******************
|
||||
What are statuses ?
|
||||
*******************
|
||||
|
||||
Statuses indicates transition points in the life of a Lightning Work and contain metadata.
|
||||
|
||||
The different stages are:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
class WorkStageStatus:
|
||||
NOT_STARTED = "not_started"
|
||||
STOPPED = "stopped"
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
SUCCEEDED = "succeeded"
|
||||
FAILED = "failed"
|
||||
|
||||
And a single status is as follows:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@dataclass
|
||||
class WorkStatus:
|
||||
stage: WorkStageStatus
|
||||
timestamp: float
|
||||
reason: Optional[str] = None
|
||||
message: Optional[str] = None
|
||||
count: int = 1
|
||||
|
||||
|
||||
On creation, the work's status flags all evaluate to ``False`` (in particular ``has_started``) and when calling ``work.run`` in your flow,
|
||||
the work transition from ``is_pending`` to ``is_running`` and then to ``has_succeeded`` if everything when well or ``has_failed`` otherwise.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from time import sleep
|
||||
import lightning_app as la
|
||||
|
||||
|
||||
class Work(lapp.LightningWork):
|
||||
def run(self, value: int):
|
||||
sleep(1)
|
||||
if value == 0:
|
||||
return
|
||||
raise Exception(f"The provided value was {value}")
|
||||
|
||||
|
||||
class Flow(lapp.LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.work = Work(raise_exception=False)
|
||||
self.counter = 0
|
||||
|
||||
def run(self):
|
||||
if not self.work.has_started:
|
||||
print("NOT STARTED")
|
||||
|
||||
elif self.work.is_pending:
|
||||
print("PENDING")
|
||||
|
||||
elif self.work.is_running:
|
||||
print("RUNNING")
|
||||
|
||||
elif self.work.has_succeeded:
|
||||
print("SUCCESS")
|
||||
|
||||
elif self.work.has_failed:
|
||||
print("FAILED")
|
||||
|
||||
elif self.work.has_stopped:
|
||||
print("STOPPED")
|
||||
self._exit()
|
||||
|
||||
print(self.work.status)
|
||||
self.work.run(self.counter)
|
||||
self.counter += 1
|
||||
|
||||
|
||||
app = lapp.LightningApp(Flow())
|
||||
|
||||
Run this app as follows:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
lightning run app test.py > app_log.txt
|
||||
|
||||
And here is the expected output inside ``app_log.txt`` and as expected,
|
||||
we are observing the following transition ``has_started``, ``is_pending``, ``is_running``, ``has_succeeded``, ``is_running`` and ``has_failed``
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
NOT STARTED
|
||||
WorkStatus(stage='not_started', timestamp=1653498225.18468, reason=None, message=None, count=1)
|
||||
PENDING
|
||||
WorkStatus(stage='pending', timestamp=1653498225.217413, reason=None, message=None, count=1)
|
||||
PENDING
|
||||
WorkStatus(stage='pending', timestamp=1653498225.217413, reason=None, message=None, count=1)
|
||||
PENDING
|
||||
...
|
||||
PENDING
|
||||
WorkStatus(stage='pending', timestamp=1653498225.217413, reason=None, message=None, count=1)
|
||||
PENDING
|
||||
WorkStatus(stage='pending', timestamp=1653498225.217413, reason=None, message=None, count=1)
|
||||
RUNNING
|
||||
WorkStatus(stage='running', timestamp=1653498228.825194, reason=None, message=None, count=1)
|
||||
...
|
||||
SUCCESS
|
||||
WorkStatus(stage='succeeded', timestamp=1653498229.831793, reason=None, message=None, count=1)
|
||||
SUCCESS
|
||||
WorkStatus(stage='succeeded', timestamp=1653498229.831793, reason=None, message=None, count=1)
|
||||
SUCCESS
|
||||
WorkStatus(stage='succeeded', timestamp=1653498229.831793, reason=None, message=None, count=1)
|
||||
RUNNING
|
||||
WorkStatus(stage='running', timestamp=1653498229.846451, reason=None, message=None, count=1)
|
||||
RUNNING
|
||||
...
|
||||
WorkStatus(stage='running', timestamp=1653498229.846451, reason=None, message=None, count=1)
|
||||
RUNNING
|
||||
WorkStatus(stage='running', timestamp=1653498229.846451, reason=None, message=None, count=1)
|
||||
FAILED
|
||||
WorkStatus(stage='failed', timestamp=1653498230.852565, reason='user_exception', message='The provided value was 1', count=1)
|
||||
FAILED
|
||||
WorkStatus(stage='failed', timestamp=1653498230.852565, reason='user_exception', message='The provided value was 1', count=1)
|
||||
FAILED
|
||||
WorkStatus(stage='failed', timestamp=1653498230.852565, reason='user_exception', message='The provided value was 1', count=1)
|
||||
FAILED
|
||||
WorkStatus(stage='failed', timestamp=1653498230.852565, reason='user_exception', message='The provided value was 1', count=1)
|
||||
...
|
||||
|
||||
In order to access all statuses, simply do:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from time import sleep
|
||||
import lightning_app as la
|
||||
|
||||
|
||||
class Work(lapp.LightningWork):
|
||||
def run(self, value: int):
|
||||
sleep(1)
|
||||
if value == 0:
|
||||
return
|
||||
raise Exception(f"The provided value was {value}")
|
||||
|
||||
|
||||
class Flow(lapp.LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.work = Work(raise_exception=False)
|
||||
self.counter = 0
|
||||
|
||||
def run(self):
|
||||
print(self.statuses)
|
||||
self.work.run(self.counter)
|
||||
self.counter += 1
|
||||
|
||||
|
||||
app = lapp.LightningApp(Flow())
|
||||
|
||||
|
||||
Run this app as follows:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
lightning run app test.py > app_log.txt
|
||||
|
||||
And here is the expected output inside ``app_log.txt``:
|
||||
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
# First execution with value = 0
|
||||
|
||||
[]
|
||||
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1)]
|
||||
...
|
||||
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1)]
|
||||
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1)]
|
||||
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1)]
|
||||
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1)]
|
||||
...
|
||||
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1)]
|
||||
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1)]
|
||||
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1), WorkStatus(stage='succeeded', timestamp=1653498627.191053, reason=None, message=None, count=1)]
|
||||
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1), WorkStatus(stage='succeeded', timestamp=1653498627.191053, reason=None, message=None, count=1)]
|
||||
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1), WorkStatus(stage='succeeded', timestamp=1653498627.191053, reason=None, message=None, count=1)]
|
||||
|
||||
# Second execution with value = 1
|
||||
|
||||
[WorkStatus(stage='pending', timestamp=1653498627.204636, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1)]
|
||||
[WorkStatus(stage='pending', timestamp=1653498627.204636, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1)]
|
||||
...
|
||||
[WorkStatus(stage='pending', timestamp=1653498627.204636, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1)]
|
||||
[WorkStatus(stage='pending', timestamp=1653498627.204636, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1), WorkStatus(stage='failed', timestamp=1653498628.210164, reason='user_exception', message='The provided value was 1', count=1)]
|
||||
[WorkStatus(stage='pending', timestamp=1653498627.204636, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1), WorkStatus(stage='failed', timestamp=1653498628.210164, reason='user_exception', message='The provided value was 1', count=1)]
|
||||
|
|
|
@ -1,197 +0,0 @@
|
|||
|
||||
*************************************
|
||||
Everything about LightningWork Status
|
||||
*************************************
|
||||
|
||||
Statuses indicate transition points in the life of a LightningWork (Work) and contain metadata.
|
||||
|
||||
The different stages are:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
class WorkStageStatus:
|
||||
NOT_STARTED = "not_started"
|
||||
STOPPED = "stopped"
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
SUCCEEDED = "succeeded"
|
||||
FAILED = "failed"
|
||||
|
||||
And a single status is as follows:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@dataclass
|
||||
class WorkStatus:
|
||||
stage: WorkStageStatus
|
||||
timestamp: float
|
||||
reason: Optional[str] = None
|
||||
message: Optional[str] = None
|
||||
count: int = 1
|
||||
|
||||
|
||||
On creation, the Work's status flags all evaluate to ``False`` (in particular ``has_started``) and when calling ``work.run`` in your Lightning Flow (Flow),
|
||||
the Work transitions from ``is_pending`` to ``is_running`` and then to ``has_succeeded`` if everything went well or ``has_failed`` otherwise.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from time import sleep
|
||||
import lightning as L
|
||||
|
||||
|
||||
class Work(L.LightningWork):
|
||||
def run(self, value: int):
|
||||
sleep(1)
|
||||
if value == 0:
|
||||
return
|
||||
raise Exception(f"The provided value was {value}")
|
||||
|
||||
|
||||
class Flow(L.LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.work = Work(raise_exception=False)
|
||||
self.counter = 0
|
||||
|
||||
def run(self):
|
||||
if not self.work.has_started:
|
||||
print("NOT STARTED")
|
||||
|
||||
elif self.work.is_pending:
|
||||
print("PENDING")
|
||||
|
||||
elif self.work.is_running:
|
||||
print("RUNNING")
|
||||
|
||||
elif self.work.has_succeeded:
|
||||
print("SUCCESS")
|
||||
|
||||
elif self.work.has_failed:
|
||||
print("FAILED")
|
||||
|
||||
elif self.work.has_stopped:
|
||||
print("STOPPED")
|
||||
self._exit()
|
||||
|
||||
print(self.work.status)
|
||||
self.work.run(self.counter)
|
||||
self.counter += 1
|
||||
|
||||
|
||||
app = L.LightningApp(Flow())
|
||||
|
||||
Run this app as follows:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
lightning run app test.py > app_log.txt
|
||||
|
||||
And here is the expected output inside ``app_log.txt`` and as expected,
|
||||
we are observing the following transition ``has_started``, ``is_pending``, ``is_running``, ``has_succeeded``, ``is_running`` and ``has_failed``
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
NOT STARTED
|
||||
WorkStatus(stage='not_started', timestamp=1653498225.18468, reason=None, message=None, count=1)
|
||||
PENDING
|
||||
WorkStatus(stage='pending', timestamp=1653498225.217413, reason=None, message=None, count=1)
|
||||
PENDING
|
||||
WorkStatus(stage='pending', timestamp=1653498225.217413, reason=None, message=None, count=1)
|
||||
PENDING
|
||||
...
|
||||
PENDING
|
||||
WorkStatus(stage='pending', timestamp=1653498225.217413, reason=None, message=None, count=1)
|
||||
PENDING
|
||||
WorkStatus(stage='pending', timestamp=1653498225.217413, reason=None, message=None, count=1)
|
||||
RUNNING
|
||||
WorkStatus(stage='running', timestamp=1653498228.825194, reason=None, message=None, count=1)
|
||||
...
|
||||
SUCCESS
|
||||
WorkStatus(stage='succeeded', timestamp=1653498229.831793, reason=None, message=None, count=1)
|
||||
SUCCESS
|
||||
WorkStatus(stage='succeeded', timestamp=1653498229.831793, reason=None, message=None, count=1)
|
||||
SUCCESS
|
||||
WorkStatus(stage='succeeded', timestamp=1653498229.831793, reason=None, message=None, count=1)
|
||||
RUNNING
|
||||
WorkStatus(stage='running', timestamp=1653498229.846451, reason=None, message=None, count=1)
|
||||
RUNNING
|
||||
...
|
||||
WorkStatus(stage='running', timestamp=1653498229.846451, reason=None, message=None, count=1)
|
||||
RUNNING
|
||||
WorkStatus(stage='running', timestamp=1653498229.846451, reason=None, message=None, count=1)
|
||||
FAILED
|
||||
WorkStatus(stage='failed', timestamp=1653498230.852565, reason='user_exception', message='The provided value was 1', count=1)
|
||||
FAILED
|
||||
WorkStatus(stage='failed', timestamp=1653498230.852565, reason='user_exception', message='The provided value was 1', count=1)
|
||||
FAILED
|
||||
WorkStatus(stage='failed', timestamp=1653498230.852565, reason='user_exception', message='The provided value was 1', count=1)
|
||||
FAILED
|
||||
WorkStatus(stage='failed', timestamp=1653498230.852565, reason='user_exception', message='The provided value was 1', count=1)
|
||||
...
|
||||
|
||||
In order to access all statuses:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from time import sleep
|
||||
import lightning as L
|
||||
|
||||
|
||||
class Work(L.LightningWork):
|
||||
def run(self, value: int):
|
||||
sleep(1)
|
||||
if value == 0:
|
||||
return
|
||||
raise Exception(f"The provided value was {value}")
|
||||
|
||||
|
||||
class Flow(L.LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.work = Work(raise_exception=False)
|
||||
self.counter = 0
|
||||
|
||||
def run(self):
|
||||
print(self.statuses)
|
||||
self.work.run(self.counter)
|
||||
self.counter += 1
|
||||
|
||||
|
||||
app = L.LightningApp(Flow())
|
||||
|
||||
|
||||
Run this app as follows:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
lightning run app test.py > app_log.txt
|
||||
|
||||
And here is the expected output inside ``app_log.txt``:
|
||||
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
# First execution with value = 0
|
||||
|
||||
[]
|
||||
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1)]
|
||||
...
|
||||
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1)]
|
||||
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1)]
|
||||
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1)]
|
||||
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1)]
|
||||
...
|
||||
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1)]
|
||||
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1)]
|
||||
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1), WorkStatus(stage='succeeded', timestamp=1653498627.191053, reason=None, message=None, count=1)]
|
||||
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1), WorkStatus(stage='succeeded', timestamp=1653498627.191053, reason=None, message=None, count=1)]
|
||||
[WorkStatus(stage='pending', timestamp=1653498622.252016, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498626.185683, reason=None, message=None, count=1), WorkStatus(stage='succeeded', timestamp=1653498627.191053, reason=None, message=None, count=1)]
|
||||
|
||||
# Second execution with value = 1
|
||||
|
||||
[WorkStatus(stage='pending', timestamp=1653498627.204636, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1)]
|
||||
[WorkStatus(stage='pending', timestamp=1653498627.204636, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1)]
|
||||
...
|
||||
[WorkStatus(stage='pending', timestamp=1653498627.204636, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1)]
|
||||
[WorkStatus(stage='pending', timestamp=1653498627.204636, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1), WorkStatus(stage='failed', timestamp=1653498628.210164, reason='user_exception', message='The provided value was 1', count=1)]
|
||||
[WorkStatus(stage='pending', timestamp=1653498627.204636, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1), WorkStatus(stage='running', timestamp=1653498627.205509, reason=None, message=None, count=1), WorkStatus(stage='failed', timestamp=1653498628.210164, reason='user_exception', message='The provided value was 1', count=1)]
|
|
@ -2,8 +2,6 @@
|
|||
Build a Directed Acyclic Graph (DAG)
|
||||
####################################
|
||||
|
||||
.. _dag_example:
|
||||
|
||||
**Audience:** Users coming from MLOps to Lightning Apps, looking for more flexibility.
|
||||
|
||||
A typical ML training workflow can be implemented with a simple DAG.
|
||||
|
@ -12,10 +10,10 @@ Below is a pseudo-code using the lightning framework that uses a LightningFlow t
|
|||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning as L
|
||||
import lightning_app as la
|
||||
|
||||
class DAGFlow(L.LightningFlow):
|
||||
|
||||
class DAGFlow(lapp.LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.processor = DataProcessorWork(...)
|
||||
|
@ -31,19 +29,21 @@ Below is a pseudo-code to run several works in parallel using a built-in :class:
|
|||
|
||||
.. code-block:: python
|
||||
|
||||
import lightning as L
|
||||
import lightning_app as la
|
||||
|
||||
class DAGFlow(L.LightningFlow):
|
||||
|
||||
class DAGFlow(lapp.LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
...
|
||||
self.train_works = L.structures.Dict(**{
|
||||
"1": TrainingWork(..., parallel=True),
|
||||
"2": TrainingWork(..., parallel=True),
|
||||
"3": TrainingWork(..., parallel=True),
|
||||
...
|
||||
})
|
||||
self.train_works = lapp.structures.Dict(
|
||||
**{
|
||||
"1": TrainingWork(..., parallel=True),
|
||||
"2": TrainingWork(..., parallel=True),
|
||||
"3": TrainingWork(..., parallel=True),
|
||||
# ...
|
||||
}
|
||||
)
|
||||
...
|
||||
|
||||
def run(self):
|
||||
|
@ -59,12 +59,13 @@ Below is a pseudo-code to run several works in parallel using a built-in :class:
|
|||
|
||||
self.serve_work.run(...)
|
||||
|
||||
----
|
||||
|
||||
**********
|
||||
Next Steps
|
||||
Next steps
|
||||
**********
|
||||
|
||||
Depending on your use case, you might want to check one of these out next.
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div class="display-card-container">
|
||||
|
|
|
@ -15,9 +15,7 @@ In this example, you will learn how to create a simple DAG which:
|
|||
|
||||
and learn how to schedule this entire process.
|
||||
|
||||
Find the complete example `here <https://github.com/Lightning-AI/lightning/blob/master/examples/app_dag/app.py>`_.
|
||||
|
||||
----
|
||||
Find the complete example `here <https://github.com/PyTorchLightning/lightning/blob/master/examples/dag/app.py>`_.
|
||||
|
||||
**************************
|
||||
Step 1: Implement your DAG
|
||||
|
@ -35,20 +33,19 @@ First, let's define the component we need:
|
|||
* Processing is responsible to execute a ``processing.py`` script.
|
||||
* A collection of model work to train all models in parallel.
|
||||
|
||||
.. literalinclude:: ../../../examples/app_dag/app.py
|
||||
.. literalinclude:: ../../../../examples/dag/app.py
|
||||
:lines: 55-79
|
||||
|
||||
And its run method executes the steps described above.
|
||||
Additionally, ``work.stop`` is used to reduce cost when running in the cloud.
|
||||
|
||||
.. literalinclude:: ../../../examples/app_dag/app.py
|
||||
.. literalinclude:: ../../../../examples/dag/app.py
|
||||
:lines: 81-108
|
||||
|
||||
----
|
||||
|
||||
*****************************
|
||||
Step 2: Define the scheduling
|
||||
*****************************
|
||||
|
||||
.. literalinclude:: ../../../examples/app_dag/app.py
|
||||
.. literalinclude:: ../../../../examples/dag/app.py
|
||||
:lines: 109-137
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
:orphan:
|
||||
|
||||
##########################
|
||||
Build a Data Exploring App
|
||||
##########################
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
:orphan:
|
||||
|
||||
###############
|
||||
Build a ETL App
|
||||
###############
|
||||
|
|
|
@ -1,232 +0,0 @@
|
|||
import json
|
||||
import os
|
||||
import tarfile
|
||||
import uuid
|
||||
import zipfile
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
import lightning as L
|
||||
from lightning.app.storage import Drive
|
||||
|
||||
|
||||
class FileServer(L.LightningWork):
|
||||
def __init__(self, drive: Drive, base_dir: str = "file_server", chunk_size=10240, **kwargs):
|
||||
"""This component uploads, downloads files to your application.
|
||||
|
||||
Arguments:
|
||||
drive: The drive can share data inside your application.
|
||||
base_dir: The local directory where the data will be stored.
|
||||
chunk_size: The quantity of bytes to download/upload at once.
|
||||
"""
|
||||
super().__init__(
|
||||
cloud_build_config=L.BuildConfig(["flask, flask-cors"]),
|
||||
parallel=True,
|
||||
**kwargs,
|
||||
)
|
||||
# 1: Attach the arguments to the state.
|
||||
self.drive = drive
|
||||
self.base_dir = base_dir
|
||||
self.chunk_size = chunk_size
|
||||
|
||||
# 2: Create a folder to store the data.
|
||||
os.makedirs(self.base_dir, exist_ok=True)
|
||||
|
||||
# 3: Keep a reference to the uploaded filenames.
|
||||
self.uploaded_files = dict()
|
||||
|
||||
def get_filepath(self, path: str) -> str:
|
||||
"""Returns file path stored on the file server."""
|
||||
return os.path.join(self.base_dir, path)
|
||||
|
||||
def get_random_filename(self) -> str:
|
||||
"""Returns a random hash for the file name."""
|
||||
return uuid.uuid4().hex
|
||||
|
||||
def upload_file(self, file):
|
||||
"""Upload a file while tracking its progress."""
|
||||
# 1: Track metadata about the file
|
||||
filename = file.filename
|
||||
uploaded_file = self.get_random_filename()
|
||||
meta_file = uploaded_file + ".meta"
|
||||
self.uploaded_files[filename] = {"progress": (0, None), "done": False}
|
||||
|
||||
# 2: Create a stream and write bytes of
|
||||
# the file to the disk under `uploaded_file` path.
|
||||
with open(self.get_filepath(uploaded_file), "wb") as out_file:
|
||||
content = file.read(self.chunk_size)
|
||||
while content:
|
||||
# 2.1 Write the file bytes
|
||||
size = out_file.write(content)
|
||||
|
||||
# 2.2 Update the progress metadata
|
||||
self.uploaded_files[filename]["progress"] = (
|
||||
self.uploaded_files[filename]["progress"][0] + size,
|
||||
None,
|
||||
)
|
||||
# 4: Read next chunk of data
|
||||
content = file.read(self.chunk_size)
|
||||
|
||||
# 3: Update metadata that the file has been uploaded.
|
||||
full_size = self.uploaded_files[filename]["progress"][0]
|
||||
self.drive.put(self.get_filepath(uploaded_file))
|
||||
self.uploaded_files[filename] = {
|
||||
"progress": (full_size, full_size),
|
||||
"done": True,
|
||||
"uploaded_file": uploaded_file,
|
||||
}
|
||||
|
||||
# 4: Write down the metadata about the file to the disk
|
||||
meta = {
|
||||
"original_path": filename,
|
||||
"display_name": os.path.splitext(filename)[0],
|
||||
"size": full_size,
|
||||
"drive_path": uploaded_file,
|
||||
}
|
||||
with open(self.get_filepath(meta_file), "wt") as f:
|
||||
json.dump(meta, f)
|
||||
|
||||
# 5: Put the file to the drive.
|
||||
# It means other components can access get or list them.
|
||||
self.drive.put(self.get_filepath(meta_file))
|
||||
return meta
|
||||
|
||||
def list_files(self, file_path: str):
|
||||
# 1: Get the local file path of the file server.
|
||||
file_path = self.get_filepath(file_path)
|
||||
|
||||
# 2: If the file exists in the drive, transfer it locally.
|
||||
if not os.path.exists(file_path):
|
||||
self.drive.get(file_path)
|
||||
|
||||
if os.path.isdir(file_path):
|
||||
result = set()
|
||||
for _, _, f in os.walk(file_path):
|
||||
for file in f:
|
||||
if not file.endswith(".meta"):
|
||||
for filename, meta in self.uploaded_files.items():
|
||||
if meta["uploaded_file"] == file:
|
||||
result.add(filename)
|
||||
return {"asset_names": [v for v in result]}
|
||||
|
||||
# 3: If the filepath is a tar or zip file, list their contents
|
||||
if zipfile.is_zipfile(file_path):
|
||||
with zipfile.ZipFile(file_path, "r") as zf:
|
||||
result = zf.namelist()
|
||||
elif tarfile.is_tarfile(file_path):
|
||||
with tarfile.TarFile(file_path, "r") as tf:
|
||||
result = tf.getnames()
|
||||
else:
|
||||
raise ValueError("Cannot open archive file!")
|
||||
|
||||
# 4: Returns the matching files.
|
||||
return {"asset_names": result}
|
||||
|
||||
def run(self):
|
||||
# 1: Imports flask requirements.
|
||||
from flask import Flask, request
|
||||
from flask_cors import CORS
|
||||
|
||||
# 2: Create a flask app
|
||||
flask_app = Flask(__name__)
|
||||
CORS(flask_app)
|
||||
|
||||
# 3: Define the upload file endpoint
|
||||
@flask_app.post("/upload_file/")
|
||||
def upload_file():
|
||||
"""Upload a file directly as form data."""
|
||||
f = request.files["file"]
|
||||
return self.upload_file(f)
|
||||
|
||||
@flask_app.get("/")
|
||||
def list_files():
|
||||
return self.list_files(str(Path(self.base_dir).resolve()))
|
||||
|
||||
# 5: Start the flask app while providing the `host` and `port`.
|
||||
flask_app.run(host=self.host, port=self.port, load_dotenv=False)
|
||||
|
||||
def alive(self):
|
||||
"""Hack: Returns whether the server is alive."""
|
||||
return self.url != ""
|
||||
|
||||
|
||||
import requests
|
||||
|
||||
from lightning import LightningWork
|
||||
|
||||
|
||||
class TestFileServer(LightningWork):
|
||||
def __init__(self, drive: Drive):
|
||||
super().__init__(cache_calls=True)
|
||||
self.drive = drive
|
||||
|
||||
def run(self, file_server_url: str, first=True):
|
||||
if first:
|
||||
with open("test.txt", "w") as f:
|
||||
f.write("Some text.")
|
||||
|
||||
response = requests.post(file_server_url + "/upload_file/", files={"file": open("test.txt", "rb")})
|
||||
assert response.status_code == 200
|
||||
else:
|
||||
response = requests.get(file_server_url)
|
||||
assert response.status_code == 200
|
||||
assert response.json() == {"asset_names": ["test.txt"]}
|
||||
|
||||
|
||||
from lightning import LightningApp, LightningFlow
|
||||
|
||||
|
||||
class Flow(LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
# 1: Create a drive to share data between works
|
||||
self.drive = Drive("lit://file_server")
|
||||
# 2: Create the filer server
|
||||
self.file_server = FileServer(self.drive)
|
||||
# 3: Create the file ser
|
||||
self.test_file_server = TestFileServer(self.drive)
|
||||
|
||||
def run(self):
|
||||
# 1: Start the file server.
|
||||
self.file_server.run()
|
||||
|
||||
# 2: Trigger the test file server work when ready.
|
||||
if self.file_server.alive():
|
||||
# 3 Execute the test file server work.
|
||||
self.test_file_server.run(self.file_server.url)
|
||||
self.test_file_server.run(self.file_server.url, first=False)
|
||||
|
||||
# 4 When both execution are successful, exit the app.
|
||||
if self.test_file_server.num_successes == 2:
|
||||
self._exit()
|
||||
|
||||
def configure_layout(self):
|
||||
# Expose the file_server component
|
||||
# in the UI using its `/` endpoint.
|
||||
return {"name": "File Server", "content": self.file_server}
|
||||
|
||||
|
||||
from lightning.app.runners import MultiProcessRuntime
|
||||
|
||||
|
||||
def test_file_server():
|
||||
app = LightningApp(Flow())
|
||||
MultiProcessRuntime(app).dispatch()
|
||||
|
||||
|
||||
from lightning.app.testing.testing import run_app_in_cloud
|
||||
|
||||
|
||||
def test_file_server_in_cloud():
|
||||
# You need to provide the directory containing the app file.
|
||||
app_dir = "docs/source-app/examples/file_server"
|
||||
with run_app_in_cloud(app_dir) as (admin_page, view_page, get_logs_fn):
|
||||
"""# 1. `admin_page` and `view_page` are playwright Page Objects.
|
||||
|
||||
# Check out https://playwright.dev/python/ doc to learn more.
|
||||
# You can click the UI and trigger actions.
|
||||
|
||||
# 2. By calling logs = get_logs_fn(),
|
||||
# you get all the logs currently on the admin page.
|
||||
"""
|
|
@ -1,12 +0,0 @@
|
|||
|
||||
.. _fileserver_example:
|
||||
|
||||
###################
|
||||
Build a File Server
|
||||
###################
|
||||
|
||||
**Prerequisite**: Reach :ref:`level 16+ <intermediate_level>` and read the `Drive article <https://lightning.ai/lightning-docs/glossary/storage/drive_content.html>`_.
|
||||
|
||||
----
|
||||
|
||||
.. include:: file_server_content.rst
|
|
@ -1,82 +0,0 @@
|
|||
*********
|
||||
Objective
|
||||
*********
|
||||
|
||||
Create a simple application where users can upload files and list the uploaded files.
|
||||
|
||||
----
|
||||
|
||||
*****************
|
||||
Final Application
|
||||
*****************
|
||||
|
||||
Here is a recording of the final application built in this example tested with pytest.
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<iframe width="100%" height="290" src="https://pl-flash-data.s3.amazonaws.com/assets_lightning/file_server.mp4" frameborder="0" allowfullscreen></iframe>
|
||||
|
||||
----
|
||||
|
||||
*************
|
||||
System Design
|
||||
*************
|
||||
|
||||
In order to create such application, we need to build two components and an application:
|
||||
|
||||
* A **File Server Component** that gives you the ability to download or list files shared with your application. This is particularly useful when you want to trigger an ML job but your users need to provide their own data or if the user wants to download the trained checkpoints.
|
||||
|
||||
* A **Test File Server** Component to interact with the file server.
|
||||
|
||||
* An application putting everything together and its associated pytest tests.
|
||||
|
||||
----
|
||||
|
||||
********
|
||||
Tutorial
|
||||
********
|
||||
|
||||
Let's dive in on how to create such application and component:
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div class="display-card-container">
|
||||
<div class="row">
|
||||
|
||||
.. displayitem::
|
||||
:header: 1. Implement the File Server general structure
|
||||
:description: Put together the shape of the component
|
||||
:col_css: col-md-4
|
||||
:button_link: file_server_step_1.html
|
||||
:height: 180
|
||||
:tag: Basic
|
||||
|
||||
.. displayitem::
|
||||
:header: 2. Implement the File Server upload and list files methods
|
||||
:description: Add the core functionalities to the component
|
||||
:col_css: col-md-4
|
||||
:button_link: file_server_step_2.html
|
||||
:height: 180
|
||||
:tag: Basic
|
||||
|
||||
.. displayitem::
|
||||
:header: 3. Implement a File Server Testing Component
|
||||
:description: Create a component to test the file server
|
||||
:col_css: col-md-4
|
||||
:button_link: file_server_step_3.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
|
||||
.. displayitem::
|
||||
:header: 4. Implement tests for the File Server component with pytest
|
||||
:description: Create an app to validate the upload and list files endpoints
|
||||
:col_css: col-md-4
|
||||
:button_link: file_server_step_4.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
.. raw:: html
|
||||
|
||||
</div>
|
||||
</div>
|
|
@ -1,11 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
*********************************************
|
||||
1. Implement the FileServer general structure
|
||||
*********************************************
|
||||
|
||||
Let's dive in on how to create such a component with the code below.
|
||||
|
||||
.. literalinclude:: ./app.py
|
||||
:lines: 1-44, 132-158
|
||||
:emphasize-lines: 16, 51-
|
|
@ -1,37 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
**********************************************************
|
||||
2. Implement the File Server upload and list_files methods
|
||||
**********************************************************
|
||||
|
||||
Let's dive in on how to implement such methods.
|
||||
|
||||
***************************
|
||||
Implement the upload method
|
||||
***************************
|
||||
|
||||
In this method, we are creating a stream between the uploaded file and the uploaded file stored on the file server disk.
|
||||
|
||||
Once the file is uploaded, we are putting the file into the :class:`~lightning_app.storage.drive.Drive`, so it becomes persistent and accessible to all components.
|
||||
|
||||
.. literalinclude:: ./app.py
|
||||
:lines: 13, 52-100
|
||||
:emphasize-lines: 49
|
||||
|
||||
*******************************
|
||||
Implement the fist_files method
|
||||
*******************************
|
||||
|
||||
First, in this method, we get the file in the file server filesystem, if available in the Drive. Once done, we list the the files under the provided paths and return the results.
|
||||
|
||||
.. literalinclude:: ./app.py
|
||||
:lines: 13, 101-131
|
||||
:emphasize-lines: 9
|
||||
|
||||
|
||||
*******************
|
||||
Implement utilities
|
||||
*******************
|
||||
|
||||
.. literalinclude:: ./app.py
|
||||
:lines: 13, 46-51
|
|
@ -1,16 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
********************************************
|
||||
3. Implement a File Server Testing Component
|
||||
********************************************
|
||||
|
||||
Let's dive in on how to implement a testing component for a server.
|
||||
|
||||
This component needs to test two things:
|
||||
|
||||
* The **/upload_file/** endpoint by creating a file and sending its content to it.
|
||||
|
||||
* The **/** endpoint listing files, by validating the that previously uploaded file is present in the response.
|
||||
|
||||
.. literalinclude:: ./app.py
|
||||
:lines: 161-183
|
|
@ -1,86 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
************************************************************
|
||||
4. Implement tests for the File Server component with pytest
|
||||
************************************************************
|
||||
|
||||
Let's create a simple Lightning App (App) with our **File Server** and the **File Server Test** components.
|
||||
|
||||
Once the File Server is up and running, we'll execute the **test_file_server** LightningWork and when both calls are successful, we exit the App using ``self._exit``.
|
||||
|
||||
.. literalinclude:: ./app.py
|
||||
:lines: 186-216
|
||||
|
||||
|
||||
Simply create a ``test.py`` file with the following code and run ``pytest tests.py``
|
||||
|
||||
.. literalinclude:: ./app.py
|
||||
:lines: 218-222
|
||||
|
||||
To test the App in the cloud, create a ``cloud_test.py`` file with the following code and run ``pytest cloud_test.py``. Under the hood, we are using the end-to-end testing `playwright <https://playwright.dev/python/>`_ library so you can interact with the UI.
|
||||
|
||||
.. literalinclude:: ./app.py
|
||||
:lines: 224-
|
||||
|
||||
----
|
||||
|
||||
********************
|
||||
Test the application
|
||||
********************
|
||||
|
||||
Clone the lightning repo and run the following command:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pytest docs/source-app/examples/file_server/app.py --capture=no -v
|
||||
|
||||
----
|
||||
|
||||
******************
|
||||
Find more examples
|
||||
******************
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div class="display-card-container">
|
||||
<div class="row">
|
||||
|
||||
.. Add callout items below this line
|
||||
|
||||
.. displayitem::
|
||||
:header: Build a DAG
|
||||
:description: Create a dag pipeline
|
||||
:col_css: col-md-4
|
||||
:button_link: ../dag/dag.html
|
||||
:height: 150
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: Build a Github Repo Script Runner
|
||||
:description: Run any script on github in the cloud
|
||||
:col_css: col-md-4
|
||||
:button_link: ../github_repo_runner/github_repo_runner.html
|
||||
:height: 150
|
||||
:tag: Intermediate
|
||||
|
||||
|
||||
.. displayitem::
|
||||
:header: Build a HPO Sweeper
|
||||
:description: Train multiple models with different parameters
|
||||
:col_css: col-md-4
|
||||
:button_link: ../hpo/hpo.html
|
||||
:height: 150
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: Build a Model Server
|
||||
:description: Serve multiple models with different parameters
|
||||
:col_css: col-md-4
|
||||
:button_link: ../model_server/model_server.html
|
||||
:height: 150
|
||||
:tag: Intermediate
|
||||
|
||||
.. raw:: html
|
||||
|
||||
</div>
|
||||
</div>
|
|
@ -1 +0,0 @@
|
|||
name: github_repo_runner
|
|
@ -1,299 +0,0 @@
|
|||
import io
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from copy import deepcopy
|
||||
from functools import partial
|
||||
from subprocess import Popen
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from lightning import BuildConfig, CloudCompute, LightningApp, LightningFlow
|
||||
from lightning.app import structures
|
||||
from lightning.app.components.python import TracerPythonScript
|
||||
from lightning.app.frontend import StreamlitFrontend
|
||||
from lightning.app.storage.path import Path
|
||||
from lightning.app.utilities.state import AppState
|
||||
|
||||
|
||||
class GithubRepoRunner(TracerPythonScript):
|
||||
def __init__(
|
||||
self,
|
||||
id: str,
|
||||
github_repo: str,
|
||||
script_path: str,
|
||||
script_args: List[str],
|
||||
requirements: List[str],
|
||||
cloud_compute: Optional[CloudCompute] = None,
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""The GithubRepoRunner Component clones a repo, runs a specific script with provided arguments and collect
|
||||
logs.
|
||||
|
||||
Arguments:
|
||||
id: Identified of the component.
|
||||
github_repo: The Github Repo URL to clone.
|
||||
script_path: The path to the script to execute.
|
||||
script_args: The arguments to be provided to the script.
|
||||
requirements: The python requirements tp run the script.
|
||||
cloud_compute: The object to select the cloud instance.
|
||||
"""
|
||||
super().__init__(
|
||||
script_path=script_path,
|
||||
script_args=script_args,
|
||||
cloud_compute=cloud_compute,
|
||||
cloud_build_config=BuildConfig(requirements=requirements),
|
||||
**kwargs,
|
||||
)
|
||||
self.id = id
|
||||
self.github_repo = github_repo
|
||||
self.logs = []
|
||||
|
||||
def run(self, *args, **kwargs):
|
||||
# 1. Hack: Patch stdout so we can capture the logs.
|
||||
string_io = io.StringIO()
|
||||
sys.stdout = string_io
|
||||
|
||||
# 2: Use git command line to clone the repo.
|
||||
repo_name = self.github_repo.split("/")[-1].replace(".git", "")
|
||||
cwd = os.path.dirname(__file__)
|
||||
subprocess.Popen(f"git clone {self.github_repo}", cwd=cwd, shell=True).wait()
|
||||
|
||||
# 3: Execute the parent run method of the TracerPythonScript class.
|
||||
os.chdir(os.path.join(cwd, repo_name))
|
||||
super().run(*args, **kwargs)
|
||||
|
||||
# 4: Get all the collected logs and add them to the state.
|
||||
# This isn't optimal as heavy, but works for this demo purpose.
|
||||
self.logs = string_io.getvalue()
|
||||
string_io.close()
|
||||
|
||||
def configure_layout(self):
|
||||
return {"name": self.id, "content": self}
|
||||
|
||||
|
||||
class PyTorchLightningGithubRepoRunner(GithubRepoRunner):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.best_model_path = None
|
||||
self.best_model_score = None
|
||||
|
||||
def configure_tracer(self):
|
||||
from pytorch_lightning import Trainer
|
||||
from pytorch_lightning.callbacks import Callback
|
||||
|
||||
tracer = super().configure_tracer()
|
||||
|
||||
class TensorboardServerLauncher(Callback):
|
||||
def __init__(self, work):
|
||||
# The provided `work` is the
|
||||
# current ``PyTorchLightningScript`` work.
|
||||
self.w = work
|
||||
|
||||
def on_train_start(self, trainer, *_):
|
||||
# Add `host` and `port` for tensorboard to work in the cloud.
|
||||
cmd = f"tensorboard --logdir='{trainer.logger.log_dir}'"
|
||||
server_args = f"--host {self.w.host} --port {self.w.port}"
|
||||
Popen(cmd + " " + server_args, shell=True)
|
||||
|
||||
def trainer_pre_fn(self, *args, work=None, **kwargs):
|
||||
# Intercept Trainer __init__ call
|
||||
# and inject a ``TensorboardServerLauncher`` component.
|
||||
kwargs["callbacks"].append(TensorboardServerLauncher(work))
|
||||
return {}, args, kwargs
|
||||
|
||||
# 5. Patch the `__init__` method of the Trainer
|
||||
# to inject our callback with a reference to the work.
|
||||
tracer.add_traced(Trainer, "__init__", pre_fn=partial(trainer_pre_fn, work=self))
|
||||
return tracer
|
||||
|
||||
def on_after_run(self, end_script_globals):
|
||||
import torch
|
||||
|
||||
# 1. Once the script has finished to execute,
|
||||
# we can collect its globals and access any objects.
|
||||
trainer = end_script_globals["cli"].trainer
|
||||
checkpoint_callback = trainer.checkpoint_callback
|
||||
lightning_module = trainer.lightning_module
|
||||
|
||||
# 2. From the checkpoint_callback,
|
||||
# we are accessing the best model weights
|
||||
checkpoint = torch.load(checkpoint_callback.best_model_path)
|
||||
|
||||
# 3. Load the best weights and torchscript the model.
|
||||
lightning_module.load_state_dict(checkpoint["state_dict"])
|
||||
lightning_module.to_torchscript(f"{self.name}.pt")
|
||||
|
||||
# 4. Use lightning.app.storage.Pathto create a reference to the
|
||||
# torch scripted model. In the cloud with multiple machines,
|
||||
# by simply passing this reference to another work,
|
||||
# it triggers automatically a file transfer.
|
||||
self.best_model_path = Path(f"{self.name}.pt")
|
||||
|
||||
# 5. Keep track of the metrics.
|
||||
self.best_model_score = float(checkpoint_callback.best_model_score)
|
||||
|
||||
|
||||
class KerasGithubRepoRunner(GithubRepoRunner):
|
||||
"""Left to the users to implement."""
|
||||
|
||||
|
||||
class TensorflowGithubRepoRunner(GithubRepoRunner):
|
||||
"""Left to the users to implement."""
|
||||
|
||||
|
||||
GITHUB_REPO_RUNNERS = {
|
||||
"PyTorch Lightning": PyTorchLightningGithubRepoRunner,
|
||||
"Keras": KerasGithubRepoRunner,
|
||||
"Tensorflow": TensorflowGithubRepoRunner,
|
||||
}
|
||||
|
||||
|
||||
class Flow(LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
# 1: Keep track of the requests within the state
|
||||
self.requests = []
|
||||
# 2: Create a dictionary of components.
|
||||
self.ws = structures.Dict()
|
||||
|
||||
def run(self):
|
||||
# Iterate continuously over all requests
|
||||
for request_id, request in enumerate(self.requests):
|
||||
self._handle_request(request_id, deepcopy(request))
|
||||
|
||||
def _handle_request(self, request_id: int, request: Dict):
|
||||
# 1: Create a name and find selected framework
|
||||
name = f"w_{request_id}"
|
||||
ml_framework = request["train"].pop("ml_framework")
|
||||
|
||||
# 2: If the component hasn't been created yet, create it.
|
||||
if name not in self.ws:
|
||||
work_cls = GITHUB_REPO_RUNNERS[ml_framework]
|
||||
work = work_cls(id=request["id"], **request["train"])
|
||||
self.ws[name] = work
|
||||
|
||||
# 3: Run the component
|
||||
self.ws[name].run()
|
||||
|
||||
# 4: Once the component has finished,
|
||||
# add metadata to the original request for the UI.
|
||||
if self.ws[name].best_model_path:
|
||||
request = self.requests[request_id]
|
||||
request["best_model_score"] = self.ws[name].best_model_score
|
||||
request["best_model_path"] = self.ws[name].best_model_path
|
||||
|
||||
def configure_layout(self):
|
||||
# Create a StreamLit UI for the user to run his Github Repo.
|
||||
return StreamlitFrontend(render_fn=render_fn)
|
||||
|
||||
|
||||
def page_1__create_new_run(state):
|
||||
import streamlit as st
|
||||
|
||||
st.markdown("# Create a new Run 🎈")
|
||||
|
||||
# 1: Collect arguments from the users
|
||||
id = st.text_input("Name your run", value="my_first_run")
|
||||
github_repo = st.text_input(
|
||||
"Enter a Github Repo URL", value="https://github.com/Lightning-AI/lightning-quick-start.git"
|
||||
)
|
||||
|
||||
default_script_args = "--trainer.max_epochs=5 --trainer.limit_train_batches=4 --trainer.limit_val_batches=4 --trainer.callbacks=ModelCheckpoint --trainer.callbacks.monitor=val_acc"
|
||||
default_requirements = "torchvision, pytorch_lightning, jsonargparse[signatures]"
|
||||
|
||||
script_path = st.text_input("Enter your script to run", value="train_script.py")
|
||||
script_args = st.text_input("Enter your base script arguments", value=default_script_args)
|
||||
requirements = st.text_input("Enter your requirements", value=default_requirements)
|
||||
ml_framework = st.radio("Select your ML Training Frameworks", options=["PyTorch Lightning", "Keras", "Tensorflow"])
|
||||
|
||||
if ml_framework not in ("PyTorch Lightning"):
|
||||
st.write(f"{ml_framework} isn't supported yet.")
|
||||
return
|
||||
|
||||
clicked = st.button("Submit")
|
||||
|
||||
# 2: If clicked, create a new request.
|
||||
if clicked:
|
||||
new_request = {
|
||||
"id": id,
|
||||
"train": {
|
||||
"github_repo": github_repo,
|
||||
"script_path": script_path,
|
||||
"script_args": script_args.split(" "),
|
||||
"requirements": requirements.split(" "),
|
||||
"ml_framework": ml_framework,
|
||||
},
|
||||
}
|
||||
# 3: IMPORTANT: Add a new request to the state in-place.
|
||||
# The flow receives the UI request and dynamically create
|
||||
# and run the associated work from the request information.
|
||||
state.requests = state.requests + [new_request]
|
||||
|
||||
|
||||
def page_2__view_run_lists(state):
|
||||
import streamlit as st
|
||||
|
||||
st.markdown("# Run Lists 🎈")
|
||||
# 1: Iterate through all the requests in the state.
|
||||
for i, r in enumerate(state.requests):
|
||||
i = str(i)
|
||||
# 2: Display information such as request, logs, work state, model score.
|
||||
work = state._state["structures"]["ws"]["works"][f"w_{i}"]
|
||||
with st.expander(f"Expand to view Run {i}", expanded=False):
|
||||
if st.checkbox(f"Expand to view your configuration", key=i):
|
||||
st.json(r)
|
||||
if st.checkbox(f"Expand to view logs", key=i):
|
||||
st.code(body=work["vars"]["logs"])
|
||||
if st.checkbox(f"Expand to view your work state", key=i):
|
||||
work["vars"].pop("logs")
|
||||
st.json(work)
|
||||
best_model_score = r.get("best_model_score", None)
|
||||
if best_model_score:
|
||||
if st.checkbox(f"Expand to view your run performance", key=i):
|
||||
st.json({"best_model_score": best_model_score, "best_model_path": r.get("best_model_path")})
|
||||
|
||||
|
||||
def page_3__view_app_state(state):
|
||||
import streamlit as st
|
||||
|
||||
st.markdown("# App State 🎈")
|
||||
st.write(state._state)
|
||||
|
||||
|
||||
def render_fn(state: AppState):
|
||||
import streamlit as st
|
||||
|
||||
page_names_to_funcs = {
|
||||
"Create a new Run": partial(page_1__create_new_run, state=state),
|
||||
"View your Runs": partial(page_2__view_run_lists, state=state),
|
||||
"View the App state": partial(page_3__view_app_state, state=state),
|
||||
}
|
||||
selected_page = st.sidebar.selectbox("Select a page", page_names_to_funcs.keys())
|
||||
page_names_to_funcs[selected_page]()
|
||||
|
||||
|
||||
class RootFlow(LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
# Create the flow
|
||||
self.flow = Flow()
|
||||
|
||||
def run(self):
|
||||
# Run the flow
|
||||
self.flow.run()
|
||||
|
||||
def configure_layout(self):
|
||||
# 1: Add the main StreamLit UI
|
||||
selection_tab = [
|
||||
{
|
||||
"name": "Run your Github Repo",
|
||||
"content": self.flow,
|
||||
}
|
||||
]
|
||||
# 2: Add a new tab whenever a new work is dynamically created
|
||||
run_tabs = [e.configure_layout() for e in self.flow.ws.values()]
|
||||
# 3: Returns the list of tabs.
|
||||
return selection_tab + run_tabs
|
||||
|
||||
|
||||
app = LightningApp(RootFlow())
|
|
@ -1,13 +0,0 @@
|
|||
.. _github_repo_script_runner_example:
|
||||
|
||||
#################################
|
||||
Build a Github Repo Script Runner
|
||||
#################################
|
||||
|
||||
**Audience:** Users that want to create interactive applications which runs Github Repo in the cloud at any scale for multiple users.
|
||||
|
||||
**Prerequisite**: Reach :ref:`level 16+ <intermediate_level>` and read the docstring of of :class:`~lightning_app.components.python.tracer.TracerPythonScript` component.
|
||||
|
||||
----
|
||||
|
||||
.. include:: github_repo_runner_content.rst
|
|
@ -1,98 +0,0 @@
|
|||
|
||||
*********
|
||||
Objective
|
||||
*********
|
||||
|
||||
Create a simple application where users can enter information in a UI to run a given PyTorch Lightning Script from a given Github Repo with optionally some extra python requirements and arguments.
|
||||
|
||||
Furthermore, the users should be able to monitor their training progress in real-time, view the logs, and get the best-monitored metric and associated checkpoint for their models.
|
||||
|
||||
----
|
||||
|
||||
*****************
|
||||
Final Application
|
||||
*****************
|
||||
|
||||
Here is a recording of the final application built in this example. The example is around 200 lines in total and should give you a great foundation to build your own Lightning App.
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<video id="background-video" autoplay loop muted controls poster="https://pl-flash-data.s3.amazonaws.com/assets_lightning/github_app.png" width="100%">
|
||||
<source src="https://pl-flash-data.s3.amazonaws.com/assets_lightning/github_app.mp4" type="video/mp4" width="100%">
|
||||
</video>
|
||||
|
||||
----
|
||||
|
||||
*************
|
||||
System Design
|
||||
*************
|
||||
|
||||
In order to create such application, we need to build several components:
|
||||
|
||||
* A GithubRepoRunner Component that clones a repo, runs a specific script with provided arguments and collect logs.
|
||||
|
||||
* A PyTorch Lightning GithubRepoRunner Component that augments the GithubRepoRunner component to track PyTorch Lightning Trainer.
|
||||
|
||||
* A UI for the users to provide to trigger dynamically a new execution.
|
||||
|
||||
* A Flow to dynamically create GithubRepoRunner once a user submits information from the UI.
|
||||
|
||||
Let's dive in on how to create such a component.
|
||||
|
||||
----
|
||||
|
||||
********
|
||||
Tutorial
|
||||
********
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div class="display-card-container">
|
||||
<div class="row">
|
||||
|
||||
.. displayitem::
|
||||
:header: 1. Implement the GithubRepoRunner Component
|
||||
:description: Clone and execute script from a GitHub Repo.
|
||||
:col_css: col-md-4
|
||||
:button_link: github_repo_runner_step_1.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: 2. Implement the PyTorch Lightning GithubRepoRunner Component
|
||||
:description: Automate PyTorch Lightning execution
|
||||
:col_css: col-md-4
|
||||
:button_link: github_repo_runner_step_2.html
|
||||
:height: 180
|
||||
:tag: Advanced
|
||||
|
||||
.. displayitem::
|
||||
:header: 3. Implement the Flow to manage user requests
|
||||
:description: Dynamically create GithubRepoRunner
|
||||
:col_css: col-md-4
|
||||
:button_link: github_repo_runner_step_3.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
|
||||
.. displayitem::
|
||||
:header: 4. Implement the UI with StreamLit
|
||||
:description: Several pages application
|
||||
:col_css: col-md-4
|
||||
:button_link: github_repo_runner_step_4.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
|
||||
.. displayitem::
|
||||
:header: 5. Putting everything together
|
||||
:description:
|
||||
:col_css: col-md-4
|
||||
:button_link: github_repo_runner_step_5.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
.. raw:: html
|
||||
|
||||
</div>
|
||||
</div>
|
|
@ -1,62 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
*******************************************
|
||||
1. Implement the GithubRepoRunner Component
|
||||
*******************************************
|
||||
|
||||
The GithubRepoRunner Component clones a repo, runs a specific script with provided arguments and collect logs.
|
||||
|
||||
Let's dive in on how to create such a component with the code below.
|
||||
|
||||
.. literalinclude:: ./app.py
|
||||
:lines: -72
|
||||
|
||||
----
|
||||
|
||||
********
|
||||
Tutorial
|
||||
********
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div class="display-card-container">
|
||||
<div class="row">
|
||||
|
||||
.. displayitem::
|
||||
:header: 2. Implement the PyTorch Lightning GithubRepoRunner Component
|
||||
:description: Automate PyTorch Lightning execution
|
||||
:col_css: col-md-4
|
||||
:button_link: github_repo_runner_step_2.html
|
||||
:height: 180
|
||||
:tag: Advanced
|
||||
|
||||
.. displayitem::
|
||||
:header: 3. Implement the Flow to manage user requests
|
||||
:description: Dynamically create GithubRepoRunner
|
||||
:col_css: col-md-4
|
||||
:button_link: github_repo_runner_step_3.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
|
||||
.. displayitem::
|
||||
:header: 4. Implement the UI with StreamLit
|
||||
:description: Several pages application
|
||||
:col_css: col-md-4
|
||||
:button_link: github_repo_runner_step_4.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
|
||||
.. displayitem::
|
||||
:header: 5. Putting everything together
|
||||
:description:
|
||||
:col_css: col-md-4
|
||||
:button_link: github_repo_runner_step_5.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
.. raw:: html
|
||||
|
||||
</div>
|
||||
</div>
|
|
@ -1,68 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
*************************************************************
|
||||
2. Implement the PyTorch Lightning GithubRepoRunner Component
|
||||
*************************************************************
|
||||
|
||||
The PyTorch Lightning GithubRepoRunner Component subclasses the GithubRepoRunner but tailors the execution experience to PyTorch Lightning.
|
||||
|
||||
As a matter of fact, this component adds two primary tailored features for PyTorch Lightning users:
|
||||
|
||||
* It injects dynamically a custom callback ``TensorboardServerLauncher`` in the PyTorch Lightning Trainer to start a tensorboard server so it can be exposed in Lightning App UI.
|
||||
|
||||
* Once the script has run, the ``on_after_run`` hook of the :class:`~lightning_app.components.python.tracer.TracerPythonScript` is invoked with the script globals, meaning we can collect anything we need. In particular, we are reloading the best model, torch scripting it, and storing its path in the state alongside the best metric score.
|
||||
|
||||
Let's dive in on how to create such a component with the code below.
|
||||
|
||||
.. literalinclude:: ./app.py
|
||||
:lines: 75-136
|
||||
|
||||
----
|
||||
|
||||
********
|
||||
Tutorial
|
||||
********
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div class="display-card-container">
|
||||
<div class="row">
|
||||
|
||||
.. displayitem::
|
||||
:header: 1. Implement the GithubRepoRunner Component
|
||||
:description: Clone and execute script from a GitHub Repo.
|
||||
:col_css: col-md-4
|
||||
:button_link: github_repo_runner_step_1.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: 3. Implement the Flow to manage user requests
|
||||
:description: Dynamically create GithubRepoRunner
|
||||
:col_css: col-md-4
|
||||
:button_link: github_repo_runner_step_3.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
|
||||
.. displayitem::
|
||||
:header: 4. Implement the UI with StreamLit
|
||||
:description: Several pages application
|
||||
:col_css: col-md-4
|
||||
:button_link: github_repo_runner_step_4.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
|
||||
.. displayitem::
|
||||
:header: 5. Putting everything together
|
||||
:description:
|
||||
:col_css: col-md-4
|
||||
:button_link: github_repo_runner_step_5.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
.. raw:: html
|
||||
|
||||
</div>
|
||||
</div>
|
|
@ -1,62 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
*********************************************
|
||||
3. Implement the Flow to manage user requests
|
||||
*********************************************
|
||||
|
||||
In step 1 and 2, we have implemented ``GithubRepoRunner`` and ``PyTorchLightningGithubRepoRunner`` components.
|
||||
|
||||
Now, we are going to create a component to dynamically handle user requests.
|
||||
Let's dive in on how to create such a component with the code below.
|
||||
|
||||
.. literalinclude:: ./app.py
|
||||
:lines: 138-187
|
||||
|
||||
----
|
||||
|
||||
********
|
||||
Tutorial
|
||||
********
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div class="display-card-container">
|
||||
<div class="row">
|
||||
|
||||
.. displayitem::
|
||||
:header: 1. Implement the GithubRepoRunner Component
|
||||
:description: Clone and execute script from a GitHub Repo.
|
||||
:col_css: col-md-4
|
||||
:button_link: github_repo_runner_step_1.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: 2. Implement the PyTorch Lightning GithubRepoRunner Component
|
||||
:description: Automate PyTorch Lightning execution
|
||||
:col_css: col-md-4
|
||||
:button_link: github_repo_runner_step_2.html
|
||||
:height: 180
|
||||
:tag: Advanced
|
||||
|
||||
.. displayitem::
|
||||
:header: 4. Implement the UI with StreamLit
|
||||
:description: Several pages application
|
||||
:col_css: col-md-4
|
||||
:button_link: github_repo_runner_step_4.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
|
||||
.. displayitem::
|
||||
:header: 5. Putting everything together
|
||||
:description:
|
||||
:col_css: col-md-4
|
||||
:button_link: github_repo_runner_step_5.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
.. raw:: html
|
||||
|
||||
</div>
|
||||
</div>
|
|
@ -1,93 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
**********************************
|
||||
4. Implement the UI with StreamLit
|
||||
**********************************
|
||||
|
||||
In step 3, we have implemented a flow that dynamically creates a Work when a new request is added to the requests list.
|
||||
|
||||
From the UI, we create 3 pages with `StreamLit <https://streamlit.io/>`_:
|
||||
|
||||
* **Page 1**: Create a form to add a new request to the flow state **requests**.
|
||||
|
||||
* **Page 2**: Iterate through all the requests and display associated information.
|
||||
|
||||
* **Page 3**: Display the entire App State.
|
||||
|
||||
****************
|
||||
Render All Pages
|
||||
****************
|
||||
|
||||
.. literalinclude:: ./app.py
|
||||
:lines: 263-274
|
||||
|
||||
|
||||
******
|
||||
Page 1
|
||||
******
|
||||
|
||||
.. literalinclude:: ./app.py
|
||||
:lines: 189-231
|
||||
:emphasize-lines: 43
|
||||
|
||||
******
|
||||
Page 2
|
||||
******
|
||||
|
||||
.. literalinclude:: ./app.py
|
||||
:lines: 233-255
|
||||
|
||||
******
|
||||
Page 3
|
||||
******
|
||||
|
||||
.. literalinclude:: ./app.py
|
||||
:lines: 257-261
|
||||
|
||||
----
|
||||
|
||||
********
|
||||
Tutorial
|
||||
********
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div class="display-card-container">
|
||||
<div class="row">
|
||||
|
||||
.. displayitem::
|
||||
:header: 1. Implement the GithubRepoRunner Component
|
||||
:description: Clone and execute script from a GitHub Repo.
|
||||
:col_css: col-md-4
|
||||
:button_link: github_repo_runner_step_1.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: 2. Implement the PyTorch Lightning GithubRepoRunner Component
|
||||
:description: Automate PyTorch Lightning execution
|
||||
:col_css: col-md-4
|
||||
:button_link: github_repo_runner_step_2.html
|
||||
:height: 180
|
||||
:tag: Advanced
|
||||
|
||||
.. displayitem::
|
||||
:header: 3. Implement the Flow to manage user requests
|
||||
:description: Dynamically create GithubRepoRunner
|
||||
:col_css: col-md-4
|
||||
:button_link: github_repo_runner_step_3.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: 5. Putting everything together
|
||||
:description:
|
||||
:col_css: col-md-4
|
||||
:button_link: github_repo_runner_step_5.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
.. raw:: html
|
||||
|
||||
</div>
|
||||
</div>
|
|
@ -1,77 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
******************************
|
||||
5. Putting everything together
|
||||
******************************
|
||||
|
||||
Let's dive in on how to create such a component with the code below.
|
||||
|
||||
.. literalinclude:: ./app.py
|
||||
:lines: 277-
|
||||
|
||||
|
||||
*******************
|
||||
Run the application
|
||||
*******************
|
||||
|
||||
Clone the lightning repo and run the following command:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
lightning run app docs/source-app/examples/github_repo_runner/app.py
|
||||
|
||||
Add **--cloud** to run this application in the cloud.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
lightning run app docs/source-app/examples/github_repo_runner/app.py --cloud
|
||||
|
||||
----
|
||||
|
||||
******************
|
||||
Find more examples
|
||||
******************
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div class="display-card-container">
|
||||
<div class="row">
|
||||
|
||||
.. Add callout items below this line
|
||||
|
||||
.. displayitem::
|
||||
:header: Build a DAG
|
||||
:description: Create a dag pipeline
|
||||
:col_css: col-md-4
|
||||
:button_link: ../dag/dag.html
|
||||
:height: 150
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: Build a File Server
|
||||
:description: Train multiple models with different parameters
|
||||
:col_css: col-md-4
|
||||
:button_link: ../file_server/file_server.html
|
||||
:height: 150
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: Build a HPO Sweeper
|
||||
:description: Train multiple models with different parameters
|
||||
:col_css: col-md-4
|
||||
:button_link: ../hpo/hpo.html
|
||||
:height: 150
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: Build a Model Server
|
||||
:description: Serve multiple models with different parameters
|
||||
:col_css: col-md-4
|
||||
:button_link: ../model_server/model_server.html
|
||||
:height: 150
|
||||
:tag: Intermediate
|
||||
|
||||
.. raw:: html
|
||||
|
||||
</div>
|
||||
</div>
|
|
@ -1,50 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
#################
|
||||
Hands-on Examples
|
||||
#################
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div class="display-card-container">
|
||||
<div class="row">
|
||||
|
||||
.. displayitem::
|
||||
:header: Build a DAG
|
||||
:description: Learn how to orchestrate workflows
|
||||
:col_css: col-md-6
|
||||
:button_link: dag/dag.html
|
||||
:height: 180
|
||||
|
||||
.. displayitem::
|
||||
:header: Build a File Server
|
||||
:description: Learn how to upload and download files
|
||||
:col_css: col-md-6
|
||||
:button_link: file_server/file_server.html
|
||||
:height: 180
|
||||
|
||||
.. displayitem::
|
||||
:header: Build a Github Repo Script Runner
|
||||
:description: Learn how to configure dynamic execution from the UI
|
||||
:col_css: col-md-6
|
||||
:button_link: github_repo_runner/github_repo_runner.html
|
||||
:height: 180
|
||||
|
||||
.. displayitem::
|
||||
:header: Build a HPO Sweeper
|
||||
:description: Learn how to scale your training
|
||||
:col_css: col-md-6
|
||||
:button_link: hpo/hpo.html
|
||||
:height: 180
|
||||
|
||||
.. displayitem::
|
||||
:header: Build a Model Server
|
||||
:description: Learn how to server your models
|
||||
:col_css: col-md-6
|
||||
:button_link: model_server_app_content.html
|
||||
:height: 180
|
||||
|
||||
.. raw:: html
|
||||
|
||||
</div>
|
||||
</div>
|
|
@ -1,41 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
#######################################
|
||||
Implement an HPO component from scratch
|
||||
#######################################
|
||||
|
||||
**Audience:** Users who want to understand how to implement sweep training from scratch.
|
||||
|
||||
**Prereqs:** Finish Intermediate Level.
|
||||
|
||||
----
|
||||
|
||||
********
|
||||
Examples
|
||||
********
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div class="display-card-container">
|
||||
<div class="row">
|
||||
|
||||
.. displayitem::
|
||||
:header: Step 1: Implement an HPO component with the Lightning Works.
|
||||
:description: Learn how it works under the hood
|
||||
:col_css: col-md-4
|
||||
:button_link: hpo_wo.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: Step 2: Add the flow to your HPO component
|
||||
:description: Learn how it works under the hood
|
||||
:col_css: col-md-4
|
||||
:button_link: hpo_wi.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
.. raw:: html
|
||||
|
||||
</div>
|
||||
</div>
|
|
@ -1,80 +0,0 @@
|
|||
.. hpo:
|
||||
.. _hpo_example:
|
||||
|
||||
|
||||
#######################################################
|
||||
Build a Lightning Hyperparameter Optimization (HPO) App
|
||||
#######################################################
|
||||
|
||||
*******************
|
||||
A bit of background
|
||||
*******************
|
||||
|
||||
Traditionally, developing machine learning (ML) products requires choosing among a large space of
|
||||
hyperparameters while creating and training the ML models. Hyperparameter optimization
|
||||
(HPO) aims to find a well-performing hyperparameter configuration for a given ML model
|
||||
on a dataset at hand, including the ML model,
|
||||
its hyperparameters, and other data processing steps.
|
||||
|
||||
HPOs free the human expert from a tedious and error-prone, manual hyperparameter tuning process.
|
||||
|
||||
As an example, in the famous `scikit-learn <https://scikit-learn.org/stable/>`_ library,
|
||||
hyperparameters are passed as arguments to the constructor of
|
||||
the estimator classes such as ``C`` kernel for
|
||||
`Support Vector Classifier <https://scikit-learn.org/stable/modules/classes.html?highlight=svm#module-sklearn.svm>`_, etc.
|
||||
|
||||
It is possible and recommended to search the hyperparameter space for the best validation score.
|
||||
|
||||
An HPO search consists of:
|
||||
|
||||
* an objective method
|
||||
* a defined parameter space
|
||||
* a method for searching or sampling candidates
|
||||
|
||||
A naive method for sampling candidates is grid search, which exhaustively considers all
|
||||
hyperparameter combinations from a user-specified grid.
|
||||
|
||||
Fortunately, HPO is an active area of research, and many methods have been developed to
|
||||
optimize the time required to get strong candidates.
|
||||
|
||||
In the following tutorial, you will learn how to use Lightning together with `Optuna <https://optuna.org/>`_.
|
||||
|
||||
`Optuna <https://optuna.org/>`_ is an open source HPO framework to automate hyperparameter search.
|
||||
Out-of-the-box, it provides efficient algorithms to search large spaces and prune unpromising trials for faster results.
|
||||
|
||||
First, you will learn about the best practices on how to implement HPO without the Lightning Framework.
|
||||
Secondly, we will dive into a working HPO application with Lightning, and finally create a neat
|
||||
`HiPlot UI <https://facebookresearch.github.io/hiplot/_static/demo/demo_basic_usage.html?hip.filters=%5B%5D&hip.color_by=%22dropout%22&hip.PARALLEL_PLOT.order=%5B%22uid%22%2C%22dropout%22%2C%22lr%22%2C%22loss%22%2C%22optimizer%22%5D>`_
|
||||
for our application.
|
||||
|
||||
----
|
||||
|
||||
********
|
||||
Examples
|
||||
********
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div class="display-card-container">
|
||||
<div class="row">
|
||||
|
||||
.. displayitem::
|
||||
:header: Re-use an existing HPO component
|
||||
:description: Learn how to use Lightning HPO with your app.
|
||||
:col_css: col-md-4
|
||||
:button_link: lightning_hpo.html
|
||||
:height: 180
|
||||
:tag: Basic
|
||||
|
||||
.. displayitem::
|
||||
:header: Implement an HPO component from scratch
|
||||
:description: Learn how it works under the hood
|
||||
:col_css: col-md-4
|
||||
:button_link: build_from_scratch.html
|
||||
:height: 180
|
||||
:tag: Intermediate
|
||||
|
||||
.. raw:: html
|
||||
|
||||
</div>
|
||||
</div>
|
|
@ -1,57 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
##########################################
|
||||
Step 2: Add the flow to your HPO component
|
||||
##########################################
|
||||
|
||||
**Audience:** Users who want to understand how to implement HPO training from scratch with Lightning.
|
||||
|
||||
**Prereqs:** Level 17+
|
||||
|
||||
----
|
||||
|
||||
Thanks to the simplified version, you should have a good grasp on how to implement HPO with Optuna.
|
||||
|
||||
As the :class:`~lightning_app.core.app.LightningApp` handles the Infinite Loop,
|
||||
it has been removed from within the run method of the HPORootFlow.
|
||||
|
||||
However, the ``run`` method code is the same as the one defined above.
|
||||
|
||||
.. literalinclude:: ../../../examples/app_hpo/app_wo_ui.py
|
||||
:language: python
|
||||
|
||||
The ``ObjectiveWork`` is sub-classing
|
||||
the built-in :class:`~lightning_app.components.python.TracerPythonScript`
|
||||
which enables launching scripts and more.
|
||||
|
||||
.. literalinclude:: ../../../examples/app_hpo/objective.py
|
||||
:language: python
|
||||
|
||||
Finally, let's add the ``HiPlotFlow`` component to visualize our hyperparameter optimization.
|
||||
|
||||
The metric and sampled parameters are added to the ``self.hi_plot.data`` list, enabling
|
||||
updates to the dashboard in near-realtime.
|
||||
|
||||
.. literalinclude:: ../../../examples/app_hpo/app_wi_ui.py
|
||||
:diff: ../../../examples/app_hpo/app_wo_ui.py
|
||||
|
||||
Here is the associated code with the ``HiPlotFlow`` component.
|
||||
|
||||
In the ``render_fn`` method, the state of the ``HiPlotFlow`` is passed.
|
||||
The ``state.data`` is accessed as it contains the metric and sampled parameters.
|
||||
|
||||
.. literalinclude:: ../../../examples/app_hpo/hyperplot.py
|
||||
|
||||
Run the HPO application with the following command:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ lightning run app examples/app_hpo/app_wi_ui.py
|
||||
INFO: Your app has started. View it in your browser: http://127.0.0.1:7501/view
|
||||
{0: ..., 1: ..., ..., 5: ...}
|
||||
|
||||
Here is what the UI looks like when launched:
|
||||
|
||||
.. image:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/hpo_ui_2.gif
|
||||
:width: 100 %
|
||||
:alt: Alternative text
|
|
@ -1,57 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
###########################################################
|
||||
Step 1: Implement an HPO component with the Lightning Works
|
||||
###########################################################
|
||||
|
||||
**Audience:** Users who want to understand how to implement HPO training from scratch.
|
||||
|
||||
**Prereqs:** Level 17+
|
||||
|
||||
----
|
||||
|
||||
In the example below, we are emulating the Lightning Infinite Loop.
|
||||
|
||||
We are assuming we have already defined an ``ObjectiveWork`` component which is responsible to run the objective method and track the metric through its state.
|
||||
|
||||
.. literalinclude:: ./hpo.py
|
||||
:language: python
|
||||
|
||||
We are running ``TOTAL_TRIALS`` trials by series of ``SIMULTANEOUS_TRIALS`` trials.
|
||||
When starting, ``TOTAL_TRIALS`` ``ObjectiveWork`` are created.
|
||||
|
||||
The entire code runs within an infinite loop as it would within Lightning.
|
||||
|
||||
When iterating through the Works, if the current ``objective_work`` hasn't started,
|
||||
some new parameters are sampled from the Optuna Study with our custom distributions
|
||||
and then passed to run method of the ``objective_work``.
|
||||
|
||||
The condition ``not objective_work.has_started`` will be ``False`` once ``objective_work.run()`` starts.
|
||||
|
||||
Also, the second condition ``objective_work.has_told_study`` will be ``True`` when the metric
|
||||
is defined within the state of the Work and has been shared with the study.
|
||||
|
||||
Finally, once the current ``SIMULTANEOUS_TRIALS`` have both registered their
|
||||
metric to the Optuna Study, simply increment ``NUM_TRIALS`` by ``SIMULTANEOUS_TRIALS`` to launch the next trials.
|
||||
|
||||
Below, you can find the simplified version of the ``ObjectiveWork`` where the metric is randomly sampled using NumPy.
|
||||
|
||||
In a realistic use case, the Work executes some user-defined code.
|
||||
|
||||
.. literalinclude:: ./objective.py
|
||||
:language: python
|
||||
|
||||
Here are the logs produced when running the application above:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ python docs/source-app/tutorials/hpo/hpo.py
|
||||
INFO: Your app has started. View it in your browser: http://127.0.0.1:7501/view
|
||||
# After you have clicked `run` on the UI.
|
||||
[I 2022-03-01 12:32:50,050] A new study created in memory with name: ...
|
||||
{0: 13.994859806481264, 1: 59.866743330127825, ..., 5: 94.65919769609225}
|
||||
|
||||
The following animation shows how this application works in the cloud:
|
||||
|
||||
.. image:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/hpo.gif
|
||||
:alt: Animation showing how to HPO works UI in a distributed manner.
|
|
@ -1,99 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
################################
|
||||
Re-use an existing HPO component
|
||||
################################
|
||||
|
||||
**Audience:** Users who want to easily get started with HPO training.
|
||||
|
||||
**Prereqs:** Level 8+
|
||||
|
||||
----
|
||||
|
||||
*********************
|
||||
Install Lightning HPO
|
||||
*********************
|
||||
|
||||
Lightning HPO provides a Pythonic implementation for Scalable Hyperparameter Tuning
|
||||
and relies on Optuna for providing state-of-the-art sampling hyper-parameters algorithms and efficient trial pruning strategies.
|
||||
|
||||
Find the `Lightning Sweeper App <https://lightning.ai/app/8FOWcOVsdf-Lightning%20Sweeper>`_ on `lightning.ai <https://lightning.ai/>`_ and its associated `Github repo <https://github.com/Lightning-AI/LAI-lightning-hpo-App>`_.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
lightning install app lightning/hpo
|
||||
|
||||
*********************
|
||||
Lightning HPO Example
|
||||
*********************
|
||||
|
||||
In this tutorial, we are going to convert `Optuna Efficient Optimization Algorithms <https://optuna.readthedocs.io/en/stable/tutorial/10_key_features/003_efficient_optimization_algorithms.html#sphx-glr-tutorial-10-key-features-003-efficient-optimization-algorithms-py>`_ into a Lightning App.
|
||||
|
||||
The Optuna example optimizes the value (example: learning-rate) of a ``SGDClassifier`` from ``sklearn`` trained over the `Iris Dataset <https://archive.ics.uci.edu/ml/datasets/iris>`_.
|
||||
|
||||
.. literalinclude:: ./optuna_reference.py
|
||||
:language: python
|
||||
|
||||
|
||||
As you can see, several trials were pruned (stopped) before they finished all of the iterations.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
A new study created in memory with name: no-name-4423c12c-22e1-4eaf-ba60-caf0020403c6
|
||||
Trial 0 finished with value: 0.07894736842105265 and parameters: {'alpha': 0.00020629773477269024}. Best is trial 0 with value: 0.07894736842105265.
|
||||
Trial 1 finished with value: 0.368421052631579 and parameters: {'alpha': 0.0005250149151047217}. Best is trial 0 with value: 0.07894736842105265.
|
||||
Trial 2 finished with value: 0.052631578947368474 and parameters: {'alpha': 5.9086862655635784e-05}. Best is trial 2 with value: 0.052631578947368474.
|
||||
Trial 3 finished with value: 0.3421052631578947 and parameters: {'alpha': 0.07177263583415294}. Best is trial 2 with value: 0.052631578947368474.
|
||||
Trial 4 finished with value: 0.23684210526315785 and parameters: {'alpha': 1.7451874636151302e-05}. Best is trial 2 with value: 0.052631578947368474.
|
||||
Trial 5 pruned.
|
||||
Trial 6 finished with value: 0.10526315789473684 and parameters: {'alpha': 1.4943994864178649e-05}. Best is trial 2 with value: 0.052631578947368474.
|
||||
Trial 7 pruned.
|
||||
Trial 8 pruned.
|
||||
Trial 9 pruned.
|
||||
Trial 10 pruned.
|
||||
Trial 11 pruned.
|
||||
Trial 12 pruned.
|
||||
Trial 13 pruned.
|
||||
Trial 14 pruned.
|
||||
Trial 15 pruned.
|
||||
Trial 16 finished with value: 0.07894736842105265 and parameters: {'alpha': 0.006166329613687364}. Best is trial 2 with value: 0.052631578947368474.
|
||||
Trial 17 pruned.
|
||||
Trial 18 pruned.
|
||||
Trial 19 pruned.
|
||||
|
||||
The example above has been re-organized in order to run as Lightning App.
|
||||
|
||||
.. literalinclude:: ./lightning_hpo_target.py
|
||||
:language: python
|
||||
|
||||
Now, your code can run at scale in the cloud, if needed, and it has a simple neat UI.
|
||||
|
||||
.. figure:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/lightning_hpo_optimizer.png
|
||||
:alt: Lightning App UI
|
||||
:width: 100 %
|
||||
|
||||
As you can see, several trials were pruned (stopped) before they finished all of the iterations. Same as when using pure optuna.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
A new study created in memory with name: no-name-a93d848e-a225-4df3-a9c3-5f86680e295d
|
||||
Trial 0 finished with value: 0.23684210526315785 and parameters: {'alpha': 0.006779437004523296}. Best is trial 0 with value: 0.23684210526315785.
|
||||
Trial 1 finished with value: 0.07894736842105265 and parameters: {'alpha': 0.008936151407006062}. Best is trial 1 with value: 0.07894736842105265.
|
||||
Trial 2 finished with value: 0.052631578947368474 and parameters: {'alpha': 0.0035836511240528008}. Best is trial 2 with value: 0.052631578947368474.
|
||||
Trial 3 finished with value: 0.052631578947368474 and parameters: {'alpha': 0.0005393218926409795}. Best is trial 2 with value: 0.052631578947368474.
|
||||
Trial 4 finished with value: 0.1578947368421053 and parameters: {'alpha': 6.572557493358585e-05}. Best is trial 2 with value: 0.052631578947368474.
|
||||
Trial 5 finished with value: 0.02631578947368418 and parameters: {'alpha': 0.0013953760106345603}. Best is trial 5 with value: 0.02631578947368418.
|
||||
Trail 6 pruned.
|
||||
Trail 7 pruned.
|
||||
Trail 8 pruned.
|
||||
Trail 9 pruned.
|
||||
Trial 10 finished with value: 0.07894736842105265 and parameters: {'alpha': 0.00555435554783454}. Best is trial 5 with value: 0.02631578947368418.
|
||||
Trail 11 pruned.
|
||||
Trial 12 finished with value: 0.052631578947368474 and parameters: {'alpha': 0.025624276147153992}. Best is trial 5 with value: 0.02631578947368418.
|
||||
Trial 13 finished with value: 0.07894736842105265 and parameters: {'alpha': 0.014613957457075546}. Best is trial 5 with value: 0.02631578947368418.
|
||||
Trail 14 pruned.
|
||||
Trail 15 pruned.
|
||||
Trail 16 pruned.
|
||||
Trial 17 finished with value: 0.052631578947368474 and parameters: {'alpha': 0.01028208215647372}. Best is trial 5 with value: 0.02631578947368418.
|
||||
Trail 18 pruned.
|
||||
Trail 19 pruned.
|
|
@ -1,53 +0,0 @@
|
|||
import optuna
|
||||
from lightning_hpo import BaseObjective, Optimizer
|
||||
from optuna.distributions import LogUniformDistribution
|
||||
from sklearn import datasets
|
||||
from sklearn.linear_model import SGDClassifier
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
from lightning import LightningApp, LightningFlow
|
||||
|
||||
|
||||
class Objective(BaseObjective):
|
||||
def run(self, params):
|
||||
# WARNING: Don't forget to assign `params` to self,
|
||||
# so they get tracked in the state.
|
||||
self.params = params
|
||||
|
||||
iris = datasets.load_iris()
|
||||
classes = list(set(iris.target))
|
||||
train_x, valid_x, train_y, valid_y = train_test_split(iris.data, iris.target, test_size=0.25, random_state=0)
|
||||
|
||||
clf = SGDClassifier(alpha=params["alpha"])
|
||||
|
||||
for step in range(100):
|
||||
clf.partial_fit(train_x, train_y, classes=classes)
|
||||
intermediate_value = 1.0 - clf.score(valid_x, valid_y)
|
||||
|
||||
# WARNING: Assign to reports,
|
||||
# so the state is instantly sent to the flow.
|
||||
self.reports = self.reports + [[intermediate_value, step]]
|
||||
|
||||
self.best_model_score = 1.0 - clf.score(valid_x, valid_y)
|
||||
|
||||
def distributions(self):
|
||||
return {"alpha": LogUniformDistribution(1e-5, 1e-1)}
|
||||
|
||||
|
||||
class RootFlow(LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.optimizer = Optimizer(
|
||||
objective_cls=Objective,
|
||||
n_trials=20,
|
||||
study=optuna.create_study(pruner=optuna.pruners.MedianPruner()),
|
||||
)
|
||||
|
||||
def run(self):
|
||||
self.optimizer.run()
|
||||
|
||||
def configure_layout(self):
|
||||
return {"name": "HyperPlot", "content": self.optimizer.hi_plot}
|
||||
|
||||
|
||||
app = LightningApp(RootFlow())
|
|
@ -1,36 +0,0 @@
|
|||
import logging
|
||||
import sys
|
||||
|
||||
import optuna
|
||||
from sklearn import datasets
|
||||
from sklearn.linear_model import SGDClassifier
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
|
||||
def objective(trial):
|
||||
iris = datasets.load_iris()
|
||||
classes = list(set(iris.target))
|
||||
train_x, valid_x, train_y, valid_y = train_test_split(iris.data, iris.target, test_size=0.25, random_state=0)
|
||||
|
||||
alpha = trial.suggest_float("alpha", 1e-5, 1e-1, log=True)
|
||||
clf = SGDClassifier(alpha=alpha)
|
||||
|
||||
for step in range(100):
|
||||
clf.partial_fit(train_x, train_y, classes=classes)
|
||||
|
||||
# Report intermediate objective value.
|
||||
intermediate_value = 1.0 - clf.score(valid_x, valid_y)
|
||||
trial.report(intermediate_value, step)
|
||||
|
||||
# Handle pruning based on the intermediate value.
|
||||
if trial.should_prune():
|
||||
raise optuna.TrialPruned()
|
||||
|
||||
return 1.0 - clf.score(valid_x, valid_y)
|
||||
|
||||
|
||||
# Add stream handler of stdout to show the messages
|
||||
logger = optuna.logging.get_logger("optuna")
|
||||
logger.addHandler(logging.StreamHandler(sys.stdout))
|
||||
study = optuna.create_study(pruner=optuna.pruners.MedianPruner())
|
||||
study.optimize(objective, n_trials=20)
|
|
@ -0,0 +1,3 @@
|
|||
############################
|
||||
Build a Model Deployment App
|
||||
############################
|
|
@ -1,34 +0,0 @@
|
|||
from locust_component import Locust
|
||||
from model_server import MLServer
|
||||
from train import TrainModel
|
||||
|
||||
from lightning import LightningApp, LightningFlow
|
||||
|
||||
|
||||
class TrainAndServe(LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.train_model = TrainModel()
|
||||
self.model_server = MLServer(
|
||||
name="mnist-svm",
|
||||
implementation="mlserver_sklearn.SKLearnModel",
|
||||
workers=8,
|
||||
)
|
||||
self.performance_tester = Locust(num_users=100)
|
||||
|
||||
def run(self):
|
||||
self.train_model.run()
|
||||
self.model_server.run(self.train_model.best_model_path)
|
||||
if self.model_server.alive():
|
||||
# The performance tester needs the model server to be up
|
||||
# and running to be started, so the URL is added in the UI.
|
||||
self.performance_tester.run(self.model_server.url)
|
||||
|
||||
def configure_layout(self):
|
||||
return [
|
||||
{"name": "Server", "content": self.model_server.url + "/docs"},
|
||||
{"name": "Server Testing", "content": self.performance_tester},
|
||||
]
|
||||
|
||||
|
||||
app = LightningApp(TrainAndServe())
|
|
@ -1,57 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
***********************************
|
||||
3. Build the Load Testing Component
|
||||
***********************************
|
||||
|
||||
Now, we are going to create a component to test the performance of your model server.
|
||||
|
||||
We are going to use a python performance testing tool called `Locust <https://github.com/locustio/locust>`_.
|
||||
|
||||
.. literalinclude:: ./locust_component.py
|
||||
|
||||
|
||||
Finally, once the component is done, we need to crate a ``locustfile.py`` file which defines the format of the request to send to your model server.
|
||||
|
||||
The endpoint to hit has the following format: ``/v2/models/{MODEL_NAME}/versions/{VERSION}/infer``.
|
||||
|
||||
.. literalinclude:: ./locustfile.py
|
||||
|
||||
|
||||
----
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div class="display-card-container">
|
||||
<div class="row">
|
||||
|
||||
.. Add callout items below this line
|
||||
|
||||
.. displayitem::
|
||||
:header: 1. Build a Train Component
|
||||
:description: Train a model and store its checkpoints with SKlearn
|
||||
:col_css: col-md-4
|
||||
:button_link: train.html
|
||||
:height: 150
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: 2. Build a Model Server Component
|
||||
:description: Use MLServer to server your models
|
||||
:col_css: col-md-4
|
||||
:button_link: model_server.html
|
||||
:height: 150
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: 4. Putting everything together.
|
||||
:description: Ensemble the components together and run the app
|
||||
:col_css: col-md-4
|
||||
:button_link: putting_everything_together.html
|
||||
:height: 150
|
||||
:tag: basic
|
||||
|
||||
.. raw:: html
|
||||
|
||||
</div>
|
||||
</div>
|
|
@ -1,43 +0,0 @@
|
|||
import os
|
||||
import subprocess
|
||||
|
||||
from lightning import LightningWork
|
||||
from lightning.app.utilities.packaging.build_config import BuildConfig
|
||||
|
||||
|
||||
class Locust(LightningWork):
|
||||
def __init__(self, num_users: int = 100):
|
||||
"""This component checks the performance of a server. The server url is passed to its run method.
|
||||
|
||||
Arguments:
|
||||
num_users: Number of users emulated by Locust
|
||||
"""
|
||||
# Note: Using the default port 8089 of Locust.
|
||||
super().__init__(
|
||||
port=8089,
|
||||
parallel=True,
|
||||
cloud_build_config=BuildConfig(requirements=["locust"]),
|
||||
)
|
||||
self.num_users = num_users
|
||||
|
||||
def run(self, load_tested_url: str):
|
||||
# 1: Create the locust command line.
|
||||
cmd = " ".join(
|
||||
[
|
||||
"locust",
|
||||
"--master-host",
|
||||
str(self.host),
|
||||
"--master-port",
|
||||
str(self.port),
|
||||
"--host",
|
||||
str(load_tested_url),
|
||||
"-u",
|
||||
str(self.num_users),
|
||||
]
|
||||
)
|
||||
# 2: Create another process with locust
|
||||
process = subprocess.Popen(cmd, cwd=os.path.dirname(__file__), shell=True)
|
||||
|
||||
# 3: Wait for the process to finish. As locust is a server,
|
||||
# this waits infinitely or if killed.
|
||||
process.wait()
|
|
@ -1,41 +0,0 @@
|
|||
from locust import FastHttpUser, task
|
||||
from sklearn import datasets
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
|
||||
class HelloWorldUser(FastHttpUser):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._prepare_inference_request()
|
||||
|
||||
@task
|
||||
def predict(self):
|
||||
self.client.post(
|
||||
"/v2/models/mnist-svm/versions/v0.0.1/infer",
|
||||
json=self.inference_request,
|
||||
)
|
||||
|
||||
def _prepare_inference_request(self):
|
||||
# The digits dataset
|
||||
digits = datasets.load_digits()
|
||||
|
||||
# To apply a classifier on this data,
|
||||
# we need to flatten the image, to
|
||||
# turn the data in a (samples, feature) matrix:
|
||||
n_samples = len(digits.images)
|
||||
data = digits.images.reshape((n_samples, -1))
|
||||
|
||||
# Split data into train and test subsets
|
||||
_, X_test, _, _ = train_test_split(data, digits.target, test_size=0.5, shuffle=False)
|
||||
|
||||
x_0 = X_test[0:1]
|
||||
self.inference_request = {
|
||||
"inputs": [
|
||||
{
|
||||
"name": "predict",
|
||||
"shape": x_0.shape,
|
||||
"datatype": "FP32",
|
||||
"data": x_0.tolist(),
|
||||
}
|
||||
]
|
||||
}
|
|
@ -1,90 +0,0 @@
|
|||
import json
|
||||
import subprocess
|
||||
|
||||
from lightning import LightningWork
|
||||
from lightning.app.storage import Path
|
||||
from lightning.app.utilities.packaging.build_config import BuildConfig
|
||||
|
||||
# ML_SERVER_URL = https://github.com/SeldonIO/MLServer
|
||||
|
||||
|
||||
class MLServer(LightningWork):
|
||||
|
||||
"""This components uses SeldonIO MLServer library.
|
||||
|
||||
The model endpoint: /v2/models/{MODEL_NAME}/versions/{VERSION}/infer.
|
||||
|
||||
Arguments:
|
||||
name: The name of the model for the endpoint.
|
||||
implementation: The model loader class.
|
||||
Example: "mlserver_sklearn.SKLearnModel".
|
||||
Learn more here: $ML_SERVER_URL/tree/master/runtimes
|
||||
workers: Number of server worker.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
implementation: str,
|
||||
workers: int = 1,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(
|
||||
parallel=True,
|
||||
cloud_build_config=BuildConfig(
|
||||
requirements=["mlserver", "mlserver-sklearn"],
|
||||
),
|
||||
**kwargs,
|
||||
)
|
||||
# 1: Collect the config's.
|
||||
self.settings = {
|
||||
"debug": True,
|
||||
"parallel_workers": workers,
|
||||
}
|
||||
self.model_settings = {
|
||||
"name": name,
|
||||
"implementation": implementation,
|
||||
}
|
||||
# 2: Keep track of latest version
|
||||
self.version = 1
|
||||
|
||||
def run(self, model_path: Path):
|
||||
"""The model is downloaded when the run method is invoked.
|
||||
|
||||
Arguments:
|
||||
model_path: The path to the trained model.
|
||||
"""
|
||||
# 1: Use the host and port at runtime so it works in the cloud.
|
||||
# $ML_SERVER_URL/blob/master/mlserver/settings.py#L50
|
||||
if self.version == 1:
|
||||
# TODO: Reload the next version model of the model.
|
||||
|
||||
self.settings.update({"host": self.host, "http_port": self.port})
|
||||
|
||||
with open("settings.json", "w") as f:
|
||||
json.dump(self.settings, f)
|
||||
|
||||
# 2. Store the model-settings
|
||||
# $ML_SERVER_URL/blob/master/mlserver/settings.py#L120
|
||||
self.model_settings["parameters"] = {
|
||||
"version": f"v0.0.{self.version}",
|
||||
"uri": str(model_path.absolute()),
|
||||
}
|
||||
with open("model-settings.json", "w") as f:
|
||||
json.dump(self.model_settings, f)
|
||||
|
||||
# 3. Launch the Model Server
|
||||
subprocess.Popen("mlserver start .", shell=True)
|
||||
|
||||
# 4. Increment the version for the next time run is called.
|
||||
self.version += 1
|
||||
|
||||
else:
|
||||
# TODO: Load the next model and unload the previous one.
|
||||
pass
|
||||
|
||||
def alive(self):
|
||||
# Current hack, when the url is available,
|
||||
# the server is up and running.
|
||||
# This would be cleaned out and automated.
|
||||
return self.url != ""
|
|
@ -1,48 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
***********************************
|
||||
2. Build the Model Server Component
|
||||
***********************************
|
||||
|
||||
In the code below, we use `MLServer <https://github.com/SeldonIO/MLServer>`_ which aims to provide an easy way to start serving your machine learning models through a REST and gRPC interface,
|
||||
fully compliant with KFServing's V2 Dataplane spec.
|
||||
|
||||
.. literalinclude:: ./model_server.py
|
||||
|
||||
----
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div class="display-card-container">
|
||||
<div class="row">
|
||||
|
||||
.. Add callout items below this line
|
||||
|
||||
.. displayitem::
|
||||
:header: 1. Build a Train Component
|
||||
:description: Train a model and store its checkpoints with SKlearn
|
||||
:col_css: col-md-4
|
||||
:button_link: train.html
|
||||
:height: 150
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: 3. Build a Load Testing Component
|
||||
:description: Use Locust to test your model servers
|
||||
:col_css: col-md-4
|
||||
:button_link: load_testing.html
|
||||
:height: 150
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: 4. Putting everything together.
|
||||
:description: Ensemble the components together and run the app
|
||||
:col_css: col-md-4
|
||||
:button_link: putting_everything_together.html
|
||||
:height: 150
|
||||
:tag: basic
|
||||
|
||||
.. raw:: html
|
||||
|
||||
</div>
|
||||
</div>
|
|
@ -1,15 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
.. _model_server_example:
|
||||
|
||||
####################
|
||||
Build a Model Server
|
||||
####################
|
||||
|
||||
**Audience:** Users who want to serve their trained models.
|
||||
|
||||
**Prerequisite**: Reach :ref:`level 16+ <intermediate_level>`.
|
||||
|
||||
----
|
||||
|
||||
.. include:: model_server_app_content.rst
|
|
@ -1,84 +0,0 @@
|
|||
|
||||
*********
|
||||
Objective
|
||||
*********
|
||||
|
||||
Create a simple application that trains and serves a `Sklearn <https://scikit-learn.org/stable/>`_ machine learning model with `MLServer from SeldonIO <https://github.com/SeldonIO/MLServer>`_
|
||||
|
||||
----
|
||||
|
||||
*****************
|
||||
Final Application
|
||||
*****************
|
||||
|
||||
Here is a gif of the final application built in this example.
|
||||
|
||||
.. figure:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/ml_server_2.gif
|
||||
|
||||
----
|
||||
|
||||
*************
|
||||
System Design
|
||||
*************
|
||||
|
||||
In order to create such application, we need to build several components:
|
||||
|
||||
* A Model Train Component that trains a model and provides its trained weights
|
||||
|
||||
* A Model Server Component that serves as an API endpoint for the model generated by the **Model Train Component**.
|
||||
|
||||
* A Load Testing Component that tests the model server works as expected. This could be used to CI/CD the performance of newly generated models (left to the users).
|
||||
|
||||
.. figure:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/model_server_app_2.png
|
||||
|
||||
Let's dive into the tutorial.
|
||||
|
||||
----
|
||||
|
||||
********
|
||||
Tutorial
|
||||
********
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div class="display-card-container">
|
||||
<div class="row">
|
||||
|
||||
.. Add callout items below this line
|
||||
|
||||
.. displayitem::
|
||||
:header: 1. Build a Train Component
|
||||
:description: Train a model and store its checkpoints with SKlearn
|
||||
:col_css: col-md-4
|
||||
:button_link: train.html
|
||||
:height: 150
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: 2. Build a Model Server Component
|
||||
:description: Use MLServer to server your models
|
||||
:col_css: col-md-4
|
||||
:button_link: model_server.html
|
||||
:height: 150
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: 3. Build a Load Testing Component
|
||||
:description: Use Locust to test your model servers
|
||||
:col_css: col-md-4
|
||||
:button_link: load_testing.html
|
||||
:height: 150
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: 4. Putting everything together.
|
||||
:description: Ensemble the components together and run the app
|
||||
:col_css: col-md-4
|
||||
:button_link: putting_everything_together.html
|
||||
:height: 150
|
||||
:tag: basic
|
||||
|
||||
.. raw:: html
|
||||
|
||||
</div>
|
||||
</div>
|
|
@ -1,80 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
******************************
|
||||
4. Putting everything together
|
||||
******************************
|
||||
|
||||
In the code below, we put together the **TrainWork**, the **MLServer** and the **Locust** components in an ``app.py`` file.
|
||||
|
||||
.. literalinclude:: ./app.py
|
||||
|
||||
|
||||
***********
|
||||
Run the App
|
||||
***********
|
||||
|
||||
To run the app, simply open a terminal and execute this command:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
lightning run app docs/source-app/examples/model_deploy_app/app.py
|
||||
|
||||
Here is a gif of the UI.
|
||||
|
||||
.. figure:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/ml_server_2.gif
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<br />
|
||||
|
||||
Congrats, you have finished the **Build a Model Server** example !
|
||||
|
||||
----
|
||||
|
||||
******************
|
||||
Find more examples
|
||||
******************
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div class="display-card-container">
|
||||
<div class="row">
|
||||
|
||||
.. Add callout items below this line
|
||||
|
||||
.. displayitem::
|
||||
:header: Build a DAG
|
||||
:description: Create a dag pipeline
|
||||
:col_css: col-md-4
|
||||
:button_link: ../dag/dag.html
|
||||
:height: 150
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: Build a File Server
|
||||
:description: Train multiple models with different parameters
|
||||
:col_css: col-md-4
|
||||
:button_link: ../file_server/file_server.html
|
||||
:height: 150
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: Build a Github Repo Script Runner
|
||||
:description: Run code from the internet in the cloud
|
||||
:col_css: col-md-4
|
||||
:button_link: ../github_repo_runner/github_repo_runner.html
|
||||
:height: 150
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: Build a HPO Sweeper
|
||||
:description: Train multiple models with different parameters
|
||||
:col_css: col-md-4
|
||||
:button_link: ../hpo/hpo.html
|
||||
:height: 150
|
||||
:tag: Intermediate
|
||||
|
||||
.. raw:: html
|
||||
|
||||
</div>
|
||||
</div>
|
|
@ -1,42 +0,0 @@
|
|||
import joblib
|
||||
from sklearn import datasets, svm
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
from lightning import LightningWork
|
||||
from lightning.app.storage import Path
|
||||
|
||||
|
||||
class TrainModel(LightningWork):
|
||||
|
||||
"""This component trains a Sklearn SVC model on digits dataset."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
# 1: Add element to the state.
|
||||
self.best_model_path = None
|
||||
|
||||
def run(self):
|
||||
# 2: Load the Digits
|
||||
digits = datasets.load_digits()
|
||||
|
||||
# 3: To apply a classifier on this data,
|
||||
# we need to flatten the image, to
|
||||
# turn the data in a (samples, feature) matrix:
|
||||
n_samples = len(digits.images)
|
||||
data = digits.images.reshape((n_samples, -1))
|
||||
|
||||
# 4: Create a classifier: a support vector classifier
|
||||
classifier = svm.SVC(gamma=0.001)
|
||||
|
||||
# 5: Split data into train and test subsets
|
||||
X_train, _, y_train, _ = train_test_split(data, digits.target, test_size=0.5, shuffle=False)
|
||||
|
||||
# 6: We learn the digits on the first half of the digits
|
||||
classifier.fit(X_train, y_train)
|
||||
|
||||
# 7: Save the Sklearn model with `joblib`.
|
||||
model_file_name = "mnist-svm.joblib"
|
||||
joblib.dump(classifier, model_file_name)
|
||||
|
||||
# 8: Keep a reference the the generated model.
|
||||
self.best_model_path = Path("mnist-svm.joblib")
|
|
@ -1,49 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
****************************
|
||||
1. Build the Train Component
|
||||
****************************
|
||||
|
||||
In the code below, we create a work which trains a simple `SVC <https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html>`_ model on the digits dataset (classification).
|
||||
|
||||
Once the model is trained, it is saved and a reference :class:`~lightning_app.storage.path.Path` with ``best_model_path`` state attribute.
|
||||
|
||||
.. literalinclude:: ./train.py
|
||||
|
||||
----
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div class="display-card-container">
|
||||
<div class="row">
|
||||
|
||||
.. Add callout items below this line
|
||||
|
||||
.. displayitem::
|
||||
:header: 2. Build a Model Server Component
|
||||
:description: Use MLServer to server your models
|
||||
:col_css: col-md-4
|
||||
:button_link: model_server.html
|
||||
:height: 150
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: 3. Build a Load Testing Component
|
||||
:description: Use Locust to test your model servers
|
||||
:col_css: col-md-4
|
||||
:button_link: load_testing.html
|
||||
:height: 150
|
||||
:tag: Intermediate
|
||||
|
||||
.. displayitem::
|
||||
:header: 4. Putting everything together.
|
||||
:description: Ensemble the components together and run the app
|
||||
:col_css: col-md-4
|
||||
:button_link: putting_everything_together.html
|
||||
:height: 150
|
||||
:tag: basic
|
||||
|
||||
.. raw:: html
|
||||
|
||||
</div>
|
||||
</div>
|
|
@ -1,5 +1,3 @@
|
|||
:orphan:
|
||||
|
||||
#########################
|
||||
Build a Research Demo App
|
||||
#########################
|
||||
|
|
|
@ -1,18 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
#######################
|
||||
Add an Interactive Demo
|
||||
#######################
|
||||
|
||||
.. _add_an_interactive_Demo:
|
||||
|
||||
**Required background:** Basic Python familiarity and complete the :ref:`install` guide.
|
||||
|
||||
**Goal:** We'll walk you through the 4 key steps to run a Lightning App that trains and demos a model.
|
||||
|
||||
.. join_slack::
|
||||
:align: left
|
||||
|
||||
----
|
||||
|
||||
.. include:: go_beyond_training_content.rst
|
|
@ -1,76 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
.. _build_model:
|
||||
|
||||
#######################
|
||||
Build and Train a Model
|
||||
#######################
|
||||
|
||||
**Required background:** Basic Python familiarity and complete the :ref:`install` guide.
|
||||
|
||||
**Goal:** We'll walk you through the creation of a model using PyTorch Lightning.
|
||||
|
||||
.. join_slack::
|
||||
:align: left
|
||||
|
||||
----
|
||||
|
||||
*********************************
|
||||
A simple PyTorch Lightning script
|
||||
*********************************
|
||||
|
||||
Let's assume you already have a folder with those two files.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pl_project/
|
||||
train.py # your own script to train your models
|
||||
requirements.txt # your python requirements.
|
||||
|
||||
If you don't, simply create a ``pl_project`` folder with those two files and add the following `PyTorch Lightning <https://pytorch-lightning.readthedocs.io/en/latest/>`_ code in the ``train.py`` file. This code trains a simple ``AutoEncoder`` on `MNIST Dataset <https://en.wikipedia.org/wiki/MNIST_database>`_.
|
||||
|
||||
.. literalinclude:: ../code_samples/convert_pl_to_app/train.py
|
||||
|
||||
Add the following to the ``requirements.txt`` file.
|
||||
|
||||
.. literalinclude:: ../code_samples/convert_pl_to_app/requirements.py
|
||||
|
||||
Simply run the following commands in your terminal to install the requirements and train the model.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -r requirements.txt
|
||||
python train.py
|
||||
|
||||
Get through `PyTorch Lightning Introduction <https://pytorch-lightning.readthedocs.io/en/stable/starter/introduction.html#step-1-define-lightningmodule>`_ to learn more.
|
||||
|
||||
----
|
||||
|
||||
**********
|
||||
Next Steps
|
||||
**********
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<br />
|
||||
<div class="display-card-container">
|
||||
<div class="row">
|
||||
|
||||
.. displayitem::
|
||||
:header: Evolve a Model into an ML System
|
||||
:description: Develop an App to train a model in the cloud
|
||||
:col_css: col-md-6
|
||||
:button_link: training_with_apps.html
|
||||
:height: 180
|
||||
|
||||
.. displayitem::
|
||||
:header: Start from a Template ML System
|
||||
:description: Learn about Apps, from a template.
|
||||
:col_css: col-md-6
|
||||
:button_link: go_beyond_training.html
|
||||
:height: 180
|
||||
|
||||
.. raw:: html
|
||||
|
||||
</div>
|
||||
</div>
|
|
@ -1,18 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
################################
|
||||
Start from an ML system template
|
||||
################################
|
||||
|
||||
.. _go_beyond_training:
|
||||
|
||||
**Required background:** Basic Python familiarity and complete the :ref:`install` guide.
|
||||
|
||||
**Goal:** We'll walk you through the 4 key steps to run a Lightning App that trains and demos a model.
|
||||
|
||||
.. join_slack::
|
||||
:align: left
|
||||
|
||||
----
|
||||
|
||||
.. include:: go_beyond_training_content.rst
|
|
@ -1,123 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
#####################################
|
||||
Start from Ready-to-Run Template Apps
|
||||
#####################################
|
||||
|
||||
.. _jumpstart_from_app_gallery:
|
||||
|
||||
Anyone can build Apps for their own use cases and promote them on the `App Gallery <https://lightning.ai/apps>`_.
|
||||
|
||||
In return, you can benefit from the work of others and get started faster by re-using a ready-to-run App close to your own use case.
|
||||
|
||||
.. join_slack::
|
||||
:align: left
|
||||
|
||||
----
|
||||
|
||||
*************
|
||||
User Workflow
|
||||
*************
|
||||
|
||||
#. Visit the `App Gallery <https://lightning.ai/apps>`_ and look for an App close to your own use case.
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<br />
|
||||
|
||||
#. If **Launch** is available, it means the App is live and ready to be used! Take it for a spin.
|
||||
|
||||
.. figure:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/launch_button.png
|
||||
:alt: Launch Button on lightning.ai
|
||||
:width: 100 %
|
||||
|
||||
#. By clicking **Clone & Run**, a copy of the App is added to your account and an instance starts running.
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<br />
|
||||
<video id="background-video" autoplay loop muted controls poster="https://pl-flash-data.s3.amazonaws.com/assets_lightning/clone_and_run.png" width="100%">
|
||||
<source src="https://pl-flash-data.s3.amazonaws.com/assets_lightning/clone_and_run.mp4" type="video/mp4" width="100%">
|
||||
</video>
|
||||
|
||||
#. If you found an App that matches what you need, move to **step 5**! Otherwise, go back to **step 1**.
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<br />
|
||||
|
||||
#. Copy the installation command (optionally from the clipboard on the right).
|
||||
|
||||
.. figure:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/install_command.png
|
||||
:alt: Install command on lightning.ai
|
||||
:width: 100 %
|
||||
|
||||
#. Copy the command to your local terminal.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
lightning install app lightning/hackernews-app
|
||||
|
||||
#. Go through the installation steps.
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<br />
|
||||
<video id="background-video" autoplay loop muted controls poster="https://pl-flash-data.s3.amazonaws.com/assets_lightning/install_an_app.png" width="100%">
|
||||
<source src="https://pl-flash-data.s3.amazonaws.com/assets_lightning/install_an_app.mp4" type="video/mp4" width="100%">
|
||||
</video>
|
||||
|
||||
#. Run the App locally.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
cd LAI-Hackernews-App
|
||||
lightning run app app.py
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<br />
|
||||
<video id="background-video" autoplay loop muted controls poster="https://pl-flash-data.s3.amazonaws.com/assets_lightning/hackernews.png" width="100%">
|
||||
<source src="https://pl-flash-data.s3.amazonaws.com/assets_lightning/hackernews.mp4" type="video/mp4" width="100%">
|
||||
</video>
|
||||
|
||||
#. Open the code with your favorite IDE, modify it, and run it back in the cloud.
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<br />
|
||||
<video id="background-video" autoplay loop muted controls poster="https://pl-flash-data.s3.amazonaws.com/assets_lightning/hackernews_modified.png" width="100%">
|
||||
<source src="https://pl-flash-data.s3.amazonaws.com/assets_lightning/hackernews_modified.mp4" type="video/mp4" width="100%">
|
||||
</video>
|
||||
<br />
|
||||
|
||||
----
|
||||
|
||||
**********
|
||||
Next Steps
|
||||
**********
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div class="display-card-container">
|
||||
<div class="row">
|
||||
|
||||
.. displayitem::
|
||||
:header: Add Component made by others to your App
|
||||
:description: Add more functionality to your projects
|
||||
:col_css: col-md-6
|
||||
:button_link: jumpstart_from_component_gallery.html
|
||||
:height: 180
|
||||
|
||||
.. displayitem::
|
||||
:header: Level-up your skills with Lightning Apps
|
||||
:description: From Basic to Advanced Skills
|
||||
:col_css: col-md-6
|
||||
:button_link: ../levels/basic/index.html
|
||||
:height: 180
|
||||
|
||||
.. raw:: html
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<br />
|
|
@ -1,155 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
########################################
|
||||
Add Component made by others to your App
|
||||
########################################
|
||||
|
||||
.. _jumpstart_from_component_gallery:
|
||||
|
||||
Anyone can build components for their own use case and promote them on the `Component Gallery <https://lightning.ai/components>`_.
|
||||
|
||||
In return, you can benefit from the work of others and add new functionalities to your Apps with minimal effort.
|
||||
|
||||
.. join_slack::
|
||||
:align: left
|
||||
|
||||
----
|
||||
|
||||
*************
|
||||
User Workflow
|
||||
*************
|
||||
|
||||
#. Visit the `Component Gallery <https://lightning.ai/components>`_ and look for a Component close to something you want to do.
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<br />
|
||||
|
||||
#. Check out the code for inspiration or simply install the component from PyPi and use it.
|
||||
|
||||
----
|
||||
|
||||
*************
|
||||
Success Story
|
||||
*************
|
||||
|
||||
The default `Train and Demo Application <https://github.com/Lightning-AI/lightning-quick-start>`_ trains a PyTorch Lightning
|
||||
model and then starts a demo with `Gradio <https://gradio.app/>`_.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import os.path as ops
|
||||
import lightning as L
|
||||
from quick_start.components import PyTorchLightningScript, ImageServeGradio
|
||||
|
||||
|
||||
class TrainDeploy(L.LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.train_work = PyTorchLightningScript(
|
||||
script_path=ops.join(ops.dirname(__file__), "./train_script.py"),
|
||||
script_args=["--trainer.max_epochs=5"],
|
||||
)
|
||||
|
||||
self.serve_work = ImageServeGradio(L.CloudCompute("cpu"))
|
||||
|
||||
def run(self):
|
||||
# 1. Run the python script that trains the model
|
||||
self.train_work.run()
|
||||
|
||||
# 2. when a checkpoint is available, deploy
|
||||
if self.train_work.best_model_path:
|
||||
self.serve_work.run(self.train_work.best_model_path)
|
||||
|
||||
def configure_layout(self):
|
||||
tab_1 = {"name": "Model training", "content": self.train_work}
|
||||
tab_2 = {"name": "Interactive demo", "content": self.serve_work}
|
||||
return [tab_1, tab_2]
|
||||
|
||||
|
||||
app = L.LightningApp(TrainDeploy())
|
||||
|
||||
However, someone who wants to use this Aop (maybe you) found `Lightning HPO <https://lightning.ai/component/BA2slXI093-Lightning%20HPO>`_
|
||||
from browsing the `Component Gallery <https://lightning.ai/components>`_ and decided to give it a spin after checking the associated
|
||||
`Github Repository <https://github.com/Lightning-AI/LAI-lightning-hpo-App>`_.
|
||||
|
||||
Once ``lightning_hpo`` installed, they improved the default App by easily adding HPO support to their project.
|
||||
|
||||
Here is the resulting App. It is almost the same code, but it's way more powerful now!
|
||||
|
||||
This is the power of `lightning.ai <https://lightning.ai/>`_ ecosystem 🔥⚡🔥
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import os.path as ops
|
||||
import lightning as L
|
||||
from quick_start.components import PyTorchLightningScript, ImageServeGradio
|
||||
import optuna
|
||||
from optuna.distributions import LogUniformDistribution
|
||||
from lightning_hpo import Optimizer, BaseObjective
|
||||
|
||||
|
||||
class HPOPyTorchLightningScript(PyTorchLightningScript, BaseObjective):
|
||||
@staticmethod
|
||||
def distributions():
|
||||
return {"model.lr": LogUniformDistribution(0.0001, 0.1)}
|
||||
|
||||
|
||||
class TrainDeploy(L.LightningFlow):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.train_work = Optimizer(
|
||||
script_path=ops.join(ops.dirname(__file__), "./train_script.py"),
|
||||
script_args=["--trainer.max_epochs=5"],
|
||||
objective_cls=HPOPyTorchLightningScript,
|
||||
n_trials=4,
|
||||
)
|
||||
|
||||
self.serve_work = ImageServeGradio(L.CloudCompute("cpu"))
|
||||
|
||||
def run(self):
|
||||
# 1. Run the python script that trains the model
|
||||
self.train_work.run()
|
||||
|
||||
# 2. when a checkpoint is available, deploy
|
||||
if self.train_work.best_model_path:
|
||||
self.serve_work.run(self.train_work.best_model_path)
|
||||
|
||||
def configure_layout(self):
|
||||
tab_1 = {"name": "Model training", "content": self.train_work.hi_plot}
|
||||
tab_2 = {"name": "Interactive demo", "content": self.serve_work}
|
||||
return [tab_1, tab_2]
|
||||
|
||||
|
||||
app = L.LightningApp(TrainDeploy())
|
||||
|
||||
----
|
||||
|
||||
**********
|
||||
Next Steps
|
||||
**********
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div class="display-card-container">
|
||||
<div class="row">
|
||||
|
||||
.. displayitem::
|
||||
:header: Start from Ready-to-Run Template Apps
|
||||
:description: Jump-start your projects development
|
||||
:col_css: col-md-6
|
||||
:button_link: jumpstart_from_app_gallery.html
|
||||
:height: 180
|
||||
|
||||
.. displayitem::
|
||||
:header: Level-up your skills with Lightning Apps
|
||||
:description: From Basic to Advanced Skills
|
||||
:col_css: col-md-6
|
||||
:button_link: ../levels/basic/index.html
|
||||
:height: 180
|
||||
|
||||
.. raw:: html
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<br />
|
|
@ -1,14 +0,0 @@
|
|||
############################
|
||||
Lightning Apps in 15 minutes
|
||||
############################
|
||||
|
||||
**Required background:** Basic Python familiarity.
|
||||
|
||||
**Goal:** Guide you to develop your first Lightning App or use an existing App from the `Apps Gallery <https://lightning.ai/apps>`_.
|
||||
|
||||
.. join_slack::
|
||||
:align: left
|
||||
|
||||
----
|
||||
|
||||
.. include:: go_beyond_training_content.rst
|
|
@ -1,136 +0,0 @@
|
|||
:orphan:
|
||||
|
||||
################################
|
||||
Evolve a model into an ML system
|
||||
################################
|
||||
|
||||
.. _convert_pl_to_app:
|
||||
|
||||
**Required background:** Basic Python familiarity and complete the :ref:`build_model` guide.
|
||||
|
||||
**Goal:** We'll walk you through the two key steps to build your first Lightning App from your existing Pytorch Lightning scripts.
|
||||
|
||||
.. join_slack::
|
||||
:align: left
|
||||
|
||||
----
|
||||
|
||||
*******************
|
||||
Training and beyond
|
||||
*******************
|
||||
|
||||
With `PyTorch Lightning <https://github.com/Lightning-AI/lightning/tree/master/src/pytorch_lightning>`_, we abstracted distributed training and hardware, by organizing PyTorch code.
|
||||
With `Lightning Apps <https://github.com/Lightning-AI/lightning/tree/master/src/lightning_app>`_, we unified the local and cloud experience while abstracting infrastructure.
|
||||
|
||||
By using `PyTorch Lightning <https://github.com/Lightning-AI/lightning/tree/master/src/pytorch_lightning>`_ and `Lightning Apps <https://github.com/Lightning-AI/lightning/tree/master/src/lightning_app>`_
|
||||
together, a completely new world of possibilities emerges.
|
||||
|
||||
.. figure:: https://pl-flash-data.s3.amazonaws.com/assets_lightning/pl_to_app_4.png
|
||||
:alt: From PyTorch Lightning to Lightning App
|
||||
:width: 100 %
|
||||
|
||||
----
|
||||
|
||||
******************************************
|
||||
1. Write an App to run the train.py script
|
||||
******************************************
|
||||
|
||||
This article continues where the :ref:`build_model` guide finished.
|
||||
|
||||
Create an additional file ``app.py`` in the ``pl_project`` folder as follows:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pl_project/
|
||||
app.py
|
||||
train.py
|
||||
requirements.txt
|
||||
|
||||
Inside the ``app.py`` file, add the following code.
|
||||
|
||||
.. literalinclude:: ../code_samples/convert_pl_to_app/app.py
|
||||
|
||||
This App runs the Pytorch Lightning script contained in the ``train.py`` file using the powerful :class:`~lightning_app.components.python.tracer.TracerPythonScript` component. This is really worth checking out!
|
||||
|
||||
----
|
||||
|
||||
************************************************
|
||||
2. Run the train.py file locally or in the cloud
|
||||
************************************************
|
||||
|
||||
First, go to the ``pl_folder`` folder from the local terminal and install the requirements.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
cd pl_folder
|
||||
pip install -r requirements.txt
|
||||
|
||||
To run your app, copy the following command to your local terminal:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
lightning run app app.py
|
||||
|
||||
Simply add ``--cloud`` to run this application in the cloud with a GPU machine 🤯
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
lightning run app app.py --cloud
|
||||
|
||||
|
||||
Congratulations! Now, you know how to run a `PyTorch Lightning <https://github.com/Lightning-AI/lightning/tree/master/src/pytorch_lightning>`_ script with Lightning Apps.
|
||||
|
||||
Lightning Apps can make your ML system way more powerful, keep reading to learn how.
|
||||
|
||||
----
|
||||
|
||||
**********
|
||||
Next Steps
|
||||
**********
|
||||
|
||||
.. raw:: html
|
||||
|
||||
<div class="display-card-container">
|
||||
<div class="row">
|
||||
|
||||
.. displayitem::
|
||||
:header: Level-up with Lightning Apps
|
||||
:description: From Basics to Advanced Skills
|
||||
:col_css: col-md-4
|
||||
:button_link: ../levels/basic/index.html
|
||||
:height: 180
|
||||
|
||||
.. displayitem::
|
||||
:header: Add an Interactive Demo
|
||||
:description: Add a Gradio Demo once the training is finished
|
||||
:col_css: col-md-4
|
||||
:button_link: add_an_interactive_demo.html
|
||||
:height: 180
|
||||
|
||||
.. displayitem::
|
||||
:header: Add Hyper Parameter Optimization
|
||||
:description: Add a HPO to optimize your models
|
||||
:col_css: col-md-4
|
||||
:button_link: ../examples/hpo/hpo.html
|
||||
:height: 180
|
||||
|
||||
.. displayitem::
|
||||
:header: Add Model Serving
|
||||
:description: Serve and load testing with MLServer and Locust
|
||||
:col_css: col-md-4
|
||||
:button_link: ../examples/model_server_app/model_server_app.html
|
||||
:height: 180
|
||||
|
||||
.. displayitem::
|
||||
:header: Add DAG Orchestration
|
||||
:description: Organize your processing, training and metrics collection
|
||||
:col_css: col-md-4
|
||||
:button_link: ../examples/dag/dag.html
|
||||
:height: 180
|
||||
|
||||
.. displayitem::
|
||||
:header: Add Team Collaboration
|
||||
:description: Create an app to run any PyTorch Lightning Script from Github
|
||||
:col_css: col-md-4
|
||||
:button_link: ../examples/github_repo_runner/github_repo_runner.html
|
||||
:height: 180
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue