[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
This commit is contained in:
parent
56ea78b45f
commit
24983a0a5a
|
@ -340,63 +340,63 @@ class _LoadBalancer(LightningWork):
|
||||||
|
|
||||||
class AutoScaler(LightningFlow):
|
class AutoScaler(LightningFlow):
|
||||||
"""The ``AutoScaler`` can be used to automatically change the number of replicas of the given server in
|
"""The ``AutoScaler`` can be used to automatically change the number of replicas of the given server in
|
||||||
response to changes in the number of incoming requests. Incoming requests will be batched and balanced across
|
response to changes in the number of incoming requests. Incoming requests will be batched and balanced across
|
||||||
the replicas.
|
the replicas.
|
||||||
Note that the ``Autoscaler`` experience on the cloud is in beta.
|
Note that the ``Autoscaler`` experience on the cloud is in beta.
|
||||||
Args:
|
Args:
|
||||||
min_replicas: The number of works to start when app initializes.
|
min_replicas: The number of works to start when app initializes.
|
||||||
max_replicas: The max number of works to spawn to handle the incoming requests.
|
max_replicas: The max number of works to spawn to handle the incoming requests.
|
||||||
autoscale_interval: The number of seconds to wait before checking whether to upscale or downscale the works.
|
autoscale_interval: The number of seconds to wait before checking whether to upscale or downscale the works.
|
||||||
endpoint: Default=api/predict. Provide the REST API path
|
endpoint: Default=api/predict. Provide the REST API path
|
||||||
max_batch_size: (auto-batching) The number of requests to process at once.
|
max_batch_size: (auto-batching) The number of requests to process at once.
|
||||||
timeout_batching: (auto-batching) The number of seconds to wait before sending the requests to process.
|
timeout_batching: (auto-batching) The number of seconds to wait before sending the requests to process.
|
||||||
input_type: Input type.
|
input_type: Input type.
|
||||||
output_type: Output type.
|
output_type: Output type.
|
||||||
|
|
||||||
.. testcode::
|
.. testcode::
|
||||||
|
|
||||||
import lightning as L
|
import lightning as L
|
||||||
|
|
||||||
# Example 1: Auto-scaling serve component out-of-the-box
|
# Example 1: Auto-scaling serve component out-of-the-box
|
||||||
app = L.LightningApp(
|
app = L.LightningApp(
|
||||||
L.app.components.AutoScaler(
|
L.app.components.AutoScaler(
|
||||||
MyPythonServer,
|
MyPythonServer,
|
||||||
min_replicas=1,
|
min_replicas=1,
|
||||||
max_replicas=8,
|
max_replicas=8,
|
||||||
autoscale_interval=10,
|
autoscale_interval=10,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Example 2: Customizing the scaling logic
|
# Example 2: Customizing the scaling logic
|
||||||
class MyAutoScaler(L.app.components.AutoScaler):
|
class MyAutoScaler(L.app.components.AutoScaler):
|
||||||
def scale(self, replicas: int, metrics: dict) -> int:
|
def scale(self, replicas: int, metrics: dict) -> int:
|
||||||
pending_requests_per_running_or_pending_work = metrics["pending_requests"] / (
|
pending_requests_per_running_or_pending_work = metrics["pending_requests"] / (
|
||||||
replicas + metrics["pending_works"]
|
replicas + metrics["pending_works"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# upscale
|
||||||
|
max_requests_per_work = self.max_batch_size
|
||||||
|
if pending_requests_per_running_or_pending_work >= max_requests_per_work:
|
||||||
|
return replicas + 1
|
||||||
|
|
||||||
|
# downscale
|
||||||
|
min_requests_per_work = max_requests_per_work * 0.25
|
||||||
|
if pending_requests_per_running_or_pending_work < min_requests_per_work:
|
||||||
|
return replicas - 1
|
||||||
|
|
||||||
|
return replicas
|
||||||
|
|
||||||
|
|
||||||
|
app = L.LightningApp(
|
||||||
|
MyAutoScaler(
|
||||||
|
MyPythonServer,
|
||||||
|
min_replicas=1,
|
||||||
|
max_replicas=8,
|
||||||
|
autoscale_interval=10,
|
||||||
|
max_batch_size=8, # for auto batching
|
||||||
|
timeout_batching=2, # for auto batching
|
||||||
)
|
)
|
||||||
|
|
||||||
# upscale
|
|
||||||
max_requests_per_work = self.max_batch_size
|
|
||||||
if pending_requests_per_running_or_pending_work >= max_requests_per_work:
|
|
||||||
return replicas + 1
|
|
||||||
|
|
||||||
# downscale
|
|
||||||
min_requests_per_work = max_requests_per_work * 0.25
|
|
||||||
if pending_requests_per_running_or_pending_work < min_requests_per_work:
|
|
||||||
return replicas - 1
|
|
||||||
|
|
||||||
return replicas
|
|
||||||
|
|
||||||
|
|
||||||
app = L.LightningApp(
|
|
||||||
MyAutoScaler(
|
|
||||||
MyPythonServer,
|
|
||||||
min_replicas=1,
|
|
||||||
max_replicas=8,
|
|
||||||
autoscale_interval=10,
|
|
||||||
max_batch_size=8, # for auto batching
|
|
||||||
timeout_batching=2, # for auto batching
|
|
||||||
)
|
)
|
||||||
)
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
Loading…
Reference in New Issue