[App] Fix idle timeout e2e (#16786)
This commit is contained in:
parent
57c1138525
commit
6e359dcc86
|
@ -639,7 +639,10 @@ class LightningWork:
|
||||||
return WorkStatus(**status, count=len(timeout_statuses))
|
return WorkStatus(**status, count=len(timeout_statuses))
|
||||||
|
|
||||||
def on_exit(self):
|
def on_exit(self):
|
||||||
"""Override this hook to add your logic when the work is exiting."""
|
"""Override this hook to add your logic when the work is exiting.
|
||||||
|
|
||||||
|
Note: This hook is not guaranteed to be called when running in the cloud.
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
|
|
|
@ -2,7 +2,7 @@ import pathlib
|
||||||
|
|
||||||
from lightning.app import CloudCompute, LightningApp, LightningFlow, LightningWork
|
from lightning.app import CloudCompute, LightningApp, LightningFlow, LightningWork
|
||||||
from lightning.app.storage.path import _artifacts_path, _filesystem
|
from lightning.app.storage.path import _artifacts_path, _filesystem
|
||||||
from lightning.app.utilities.enum import WorkStageStatus, WorkStopReasons
|
from lightning.app.utilities.enum import WorkStageStatus
|
||||||
|
|
||||||
|
|
||||||
class SourceFileWriterWork(LightningWork):
|
class SourceFileWriterWork(LightningWork):
|
||||||
|
@ -35,22 +35,21 @@ class RootFlow(LightningFlow):
|
||||||
if self.work.counter == 0:
|
if self.work.counter == 0:
|
||||||
self.work.run()
|
self.work.run()
|
||||||
|
|
||||||
elif (
|
elif self.work.status.stage == WorkStageStatus.STOPPED and self.make_check:
|
||||||
self.work.status.stage == WorkStageStatus.STOPPED
|
succeeded_statuses = [status for status in self.work.statuses if status.stage == WorkStageStatus.SUCCEEDED]
|
||||||
and self.work.status.reason == WorkStopReasons.SIGTERM_SIGNAL_HANDLER
|
# Ensure the work succeeded at some point
|
||||||
and self.make_check
|
assert len(succeeded_statuses) > 0
|
||||||
):
|
succeeded_status = succeeded_statuses[-1]
|
||||||
succeeded_status = self.work.statuses[-3]
|
|
||||||
stopped_status_pending = self.work.statuses[-2]
|
stopped_statuses = [status for status in self.work.statuses if status.stage == WorkStageStatus.STOPPED]
|
||||||
stopped_status_sigterm = self.work.statuses[-1]
|
|
||||||
assert succeeded_status.stage == WorkStageStatus.SUCCEEDED
|
# We want to check that the work started shutting down withing the required timeframe, so we take the first
|
||||||
assert stopped_status_pending.stage == WorkStageStatus.STOPPED
|
# status that has `stage == STOPPED`.
|
||||||
assert stopped_status_pending.reason == WorkStopReasons.PENDING
|
stopped_status = stopped_statuses[0]
|
||||||
assert stopped_status_sigterm.stage == WorkStageStatus.STOPPED
|
|
||||||
assert stopped_status_sigterm.reason == WorkStopReasons.SIGTERM_SIGNAL_HANDLER
|
|
||||||
# Note: Account for the controlplane, k8s, SIGTERM handler delays.
|
# Note: Account for the controlplane, k8s, SIGTERM handler delays.
|
||||||
assert (stopped_status_pending.timestamp - succeeded_status.timestamp) < 20
|
assert (stopped_status.timestamp - succeeded_status.timestamp) < 20
|
||||||
assert (stopped_status_sigterm.timestamp - stopped_status_pending.timestamp) < 120
|
|
||||||
fs = _filesystem()
|
fs = _filesystem()
|
||||||
destination_path = _artifacts_path(self.work) / pathlib.Path(*self.work.path.resolve().parts[1:])
|
destination_path = _artifacts_path(self.work) / pathlib.Path(*self.work.path.resolve().parts[1:])
|
||||||
assert fs.exists(destination_path)
|
assert fs.exists(destination_path)
|
||||||
|
|
Loading…
Reference in New Issue