From 6e359dcc86c6e12ebcbaf1cdddab989d90bd52c7 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Fri, 17 Feb 2023 01:52:46 +0000 Subject: [PATCH] [App] Fix idle timeout e2e (#16786) --- src/lightning/app/core/work.py | 5 ++- .../integrations_app/apps/idle_timeout/app.py | 31 +++++++++---------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/lightning/app/core/work.py b/src/lightning/app/core/work.py index abe57ab45f..1eb7cacbc1 100644 --- a/src/lightning/app/core/work.py +++ b/src/lightning/app/core/work.py @@ -639,7 +639,10 @@ class LightningWork: return WorkStatus(**status, count=len(timeout_statuses)) def on_exit(self): - """Override this hook to add your logic when the work is exiting.""" + """Override this hook to add your logic when the work is exiting. + + Note: This hook is not guaranteed to be called when running in the cloud. + """ pass def stop(self): diff --git a/tests/integrations_app/apps/idle_timeout/app.py b/tests/integrations_app/apps/idle_timeout/app.py index 31e0d7c124..d33df0a616 100644 --- a/tests/integrations_app/apps/idle_timeout/app.py +++ b/tests/integrations_app/apps/idle_timeout/app.py @@ -2,7 +2,7 @@ import pathlib from lightning.app import CloudCompute, LightningApp, LightningFlow, LightningWork from lightning.app.storage.path import _artifacts_path, _filesystem -from lightning.app.utilities.enum import WorkStageStatus, WorkStopReasons +from lightning.app.utilities.enum import WorkStageStatus class SourceFileWriterWork(LightningWork): @@ -35,22 +35,21 @@ class RootFlow(LightningFlow): if self.work.counter == 0: self.work.run() - elif ( - self.work.status.stage == WorkStageStatus.STOPPED - and self.work.status.reason == WorkStopReasons.SIGTERM_SIGNAL_HANDLER - and self.make_check - ): - succeeded_status = self.work.statuses[-3] - stopped_status_pending = self.work.statuses[-2] - stopped_status_sigterm = self.work.statuses[-1] - assert succeeded_status.stage == WorkStageStatus.SUCCEEDED - assert stopped_status_pending.stage == WorkStageStatus.STOPPED - assert stopped_status_pending.reason == WorkStopReasons.PENDING - assert stopped_status_sigterm.stage == WorkStageStatus.STOPPED - assert stopped_status_sigterm.reason == WorkStopReasons.SIGTERM_SIGNAL_HANDLER + elif self.work.status.stage == WorkStageStatus.STOPPED and self.make_check: + succeeded_statuses = [status for status in self.work.statuses if status.stage == WorkStageStatus.SUCCEEDED] + # Ensure the work succeeded at some point + assert len(succeeded_statuses) > 0 + succeeded_status = succeeded_statuses[-1] + + stopped_statuses = [status for status in self.work.statuses if status.stage == WorkStageStatus.STOPPED] + + # We want to check that the work started shutting down withing the required timeframe, so we take the first + # status that has `stage == STOPPED`. + stopped_status = stopped_statuses[0] + # Note: Account for the controlplane, k8s, SIGTERM handler delays. - assert (stopped_status_pending.timestamp - succeeded_status.timestamp) < 20 - assert (stopped_status_sigterm.timestamp - stopped_status_pending.timestamp) < 120 + assert (stopped_status.timestamp - succeeded_status.timestamp) < 20 + fs = _filesystem() destination_path = _artifacts_path(self.work) / pathlib.Path(*self.work.path.resolve().parts[1:]) assert fs.exists(destination_path)