[App] Fix idle timeout e2e (#16786)

This commit is contained in:
Ethan Harris 2023-02-17 01:52:46 +00:00 committed by GitHub
parent 57c1138525
commit 6e359dcc86
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 19 additions and 17 deletions

View File

@ -639,7 +639,10 @@ class LightningWork:
return WorkStatus(**status, count=len(timeout_statuses)) return WorkStatus(**status, count=len(timeout_statuses))
def on_exit(self): def on_exit(self):
"""Override this hook to add your logic when the work is exiting.""" """Override this hook to add your logic when the work is exiting.
Note: This hook is not guaranteed to be called when running in the cloud.
"""
pass pass
def stop(self): def stop(self):

View File

@ -2,7 +2,7 @@ import pathlib
from lightning.app import CloudCompute, LightningApp, LightningFlow, LightningWork from lightning.app import CloudCompute, LightningApp, LightningFlow, LightningWork
from lightning.app.storage.path import _artifacts_path, _filesystem from lightning.app.storage.path import _artifacts_path, _filesystem
from lightning.app.utilities.enum import WorkStageStatus, WorkStopReasons from lightning.app.utilities.enum import WorkStageStatus
class SourceFileWriterWork(LightningWork): class SourceFileWriterWork(LightningWork):
@ -35,22 +35,21 @@ class RootFlow(LightningFlow):
if self.work.counter == 0: if self.work.counter == 0:
self.work.run() self.work.run()
elif ( elif self.work.status.stage == WorkStageStatus.STOPPED and self.make_check:
self.work.status.stage == WorkStageStatus.STOPPED succeeded_statuses = [status for status in self.work.statuses if status.stage == WorkStageStatus.SUCCEEDED]
and self.work.status.reason == WorkStopReasons.SIGTERM_SIGNAL_HANDLER # Ensure the work succeeded at some point
and self.make_check assert len(succeeded_statuses) > 0
): succeeded_status = succeeded_statuses[-1]
succeeded_status = self.work.statuses[-3]
stopped_status_pending = self.work.statuses[-2] stopped_statuses = [status for status in self.work.statuses if status.stage == WorkStageStatus.STOPPED]
stopped_status_sigterm = self.work.statuses[-1]
assert succeeded_status.stage == WorkStageStatus.SUCCEEDED # We want to check that the work started shutting down withing the required timeframe, so we take the first
assert stopped_status_pending.stage == WorkStageStatus.STOPPED # status that has `stage == STOPPED`.
assert stopped_status_pending.reason == WorkStopReasons.PENDING stopped_status = stopped_statuses[0]
assert stopped_status_sigterm.stage == WorkStageStatus.STOPPED
assert stopped_status_sigterm.reason == WorkStopReasons.SIGTERM_SIGNAL_HANDLER
# Note: Account for the controlplane, k8s, SIGTERM handler delays. # Note: Account for the controlplane, k8s, SIGTERM handler delays.
assert (stopped_status_pending.timestamp - succeeded_status.timestamp) < 20 assert (stopped_status.timestamp - succeeded_status.timestamp) < 20
assert (stopped_status_sigterm.timestamp - stopped_status_pending.timestamp) < 120
fs = _filesystem() fs = _filesystem()
destination_path = _artifacts_path(self.work) / pathlib.Path(*self.work.path.resolve().parts[1:]) destination_path = _artifacts_path(self.work) / pathlib.Path(*self.work.path.resolve().parts[1:])
assert fs.exists(destination_path) assert fs.exists(destination_path)