mirror of https://github.com/rq/rq.git
Enhanced Redis Connection Reliability (#1753)
* Enhanced Redis Connection Reliability The Redis connection may fail for several reasons. As the connection can be (1) explicitly passed to the worker or (2) implicity set, this will improve the Connection configuration by setting a timeout to the socket, and adding an ExponentialBackoff Retry logic. * Simpler Connection logic * Add simple retry logic to Redis Connection Error * Make retry exponential, add keepalive & socket_connect_timeout * Handles configuration on Redis' connection pool * Simplifies timeout exception logic * Fix burst bug, add test * Add docs related to `socket_timeout`, improve compatibility with older RedisPy versions * Fixes * New timeout private method * Fix timeout
This commit is contained in:
parent
436250d36b
commit
c2e6d95338
|
@ -142,3 +142,23 @@ SENTINEL: {'INSTANCES':[('remote.host1.org', 26379), ('remote.host2.org', 26379)
|
||||||
'DB': 2,
|
'DB': 2,
|
||||||
'MASTER_NAME': 'master'}
|
'MASTER_NAME': 'master'}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Timeout
|
||||||
|
|
||||||
|
To avoid potential issues with hanging Redis commands, specifically the blocking `BLPOP` command,
|
||||||
|
RQ automatically sets a `socket_timeout` value that is 10 seconds higher than the `default_worker_ttl`.
|
||||||
|
|
||||||
|
If you prefer to manually set the `socket_timeout` value,
|
||||||
|
make sure that the value being set is higher than the `default_worker_ttl` (which is 420 by default).
|
||||||
|
|
||||||
|
```python
|
||||||
|
from redis import Redis
|
||||||
|
from rq import Queue
|
||||||
|
|
||||||
|
conn = Redis('localhost', 6379, socket_timeout=500)
|
||||||
|
q = Queue(connection=conn)
|
||||||
|
```
|
||||||
|
|
||||||
|
Setting a `socket_timeout` with a lower value than the `default_worker_ttl` will cause a `TimeoutError`
|
||||||
|
since it will interrupt the worker while it gets new jobs from the queue.
|
||||||
|
|
36
rq/worker.py
36
rq/worker.py
|
@ -197,10 +197,9 @@ class Worker:
|
||||||
job_monitoring_interval=DEFAULT_JOB_MONITORING_INTERVAL,
|
job_monitoring_interval=DEFAULT_JOB_MONITORING_INTERVAL,
|
||||||
disable_default_exception_handler: bool = False,
|
disable_default_exception_handler: bool = False,
|
||||||
prepare_for_work: bool = True, serializer=None): # noqa
|
prepare_for_work: bool = True, serializer=None): # noqa
|
||||||
if connection is None:
|
|
||||||
connection = get_current_connection()
|
|
||||||
self.connection = connection
|
|
||||||
|
|
||||||
|
connection = self._set_connection(connection, default_worker_ttl)
|
||||||
|
self.connection = connection
|
||||||
self.redis_server_version = None
|
self.redis_server_version = None
|
||||||
|
|
||||||
self.job_class = backend_class(self, 'job_class', override=job_class)
|
self.job_class = backend_class(self, 'job_class', override=job_class)
|
||||||
|
@ -281,6 +280,24 @@ class Worker:
|
||||||
elif exception_handlers is not None:
|
elif exception_handlers is not None:
|
||||||
self.push_exc_handler(exception_handlers)
|
self.push_exc_handler(exception_handlers)
|
||||||
|
|
||||||
|
def _set_connection(self, connection: Optional['Redis'], default_worker_ttl: int) -> 'Redis':
|
||||||
|
"""Configures the Redis connection to have a socket timeout.
|
||||||
|
This should timouet the connection in case any specific command hangs at any given time (eg. BLPOP).
|
||||||
|
If the connection provided already has a `socket_timeout` defined, skips.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
connection (Optional[Redis]): The Redis Connection.
|
||||||
|
default_worker_ttl (int): The Default Worker TTL
|
||||||
|
"""
|
||||||
|
if connection is None:
|
||||||
|
connection = get_current_connection()
|
||||||
|
current_socket_timeout = connection.connection_pool.connection_kwargs.get("socket_timeout")
|
||||||
|
if current_socket_timeout is None:
|
||||||
|
timeout = self._get_timeout(default_worker_ttl) + 10
|
||||||
|
timeout_config = {"socket_timeout": timeout}
|
||||||
|
connection.connection_pool.connection_kwargs.update(timeout_config)
|
||||||
|
return connection
|
||||||
|
|
||||||
def get_redis_server_version(self):
|
def get_redis_server_version(self):
|
||||||
"""Return Redis server version of connection"""
|
"""Return Redis server version of connection"""
|
||||||
if not self.redis_server_version:
|
if not self.redis_server_version:
|
||||||
|
@ -328,6 +345,12 @@ class Worker:
|
||||||
"""Returns whether or not this is the worker or the work horse."""
|
"""Returns whether or not this is the worker or the work horse."""
|
||||||
return self._is_horse
|
return self._is_horse
|
||||||
|
|
||||||
|
def _get_timeout(self, worker_ttl: Optional[int] = None) -> int:
|
||||||
|
timeout = DEFAULT_WORKER_TTL
|
||||||
|
if worker_ttl:
|
||||||
|
timeout = worker_ttl
|
||||||
|
return max(1, timeout - 15)
|
||||||
|
|
||||||
def procline(self, message):
|
def procline(self, message):
|
||||||
"""Changes the current procname for the process.
|
"""Changes the current procname for the process.
|
||||||
|
|
||||||
|
@ -626,7 +649,6 @@ class Worker:
|
||||||
self.scheduler.start()
|
self.scheduler.start()
|
||||||
|
|
||||||
self._install_signal_handlers()
|
self._install_signal_handlers()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
|
@ -639,7 +661,7 @@ class Worker:
|
||||||
self.log.info('Worker %s: stopping on request', self.key)
|
self.log.info('Worker %s: stopping on request', self.key)
|
||||||
break
|
break
|
||||||
|
|
||||||
timeout = None if burst else max(1, self.default_worker_ttl - 15)
|
timeout = None if burst else self._get_timeout()
|
||||||
result = self.dequeue_job_and_maintain_ttl(timeout)
|
result = self.dequeue_job_and_maintain_ttl(timeout)
|
||||||
if result is None:
|
if result is None:
|
||||||
if burst:
|
if burst:
|
||||||
|
@ -660,6 +682,10 @@ class Worker:
|
||||||
)
|
)
|
||||||
break
|
break
|
||||||
|
|
||||||
|
except redis.exceptions.TimeoutError:
|
||||||
|
self.log.error(f"Worker {self.key}: Redis connection timeout, quitting...")
|
||||||
|
break
|
||||||
|
|
||||||
except StopRequested:
|
except StopRequested:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
|
@ -1049,6 +1049,13 @@ class TestWorker(RQTestCase):
|
||||||
w.dequeue_job_and_maintain_ttl(10)
|
w.dequeue_job_and_maintain_ttl(10)
|
||||||
self.assertNotIn("Frank", mock_logger_info.call_args[0][2])
|
self.assertNotIn("Frank", mock_logger_info.call_args[0][2])
|
||||||
|
|
||||||
|
def test_worker_configures_socket_timeout(self):
|
||||||
|
"""Ensures that the worker correctly updates Redis client connection to have a socket_timeout"""
|
||||||
|
q = Queue()
|
||||||
|
_ = Worker([q])
|
||||||
|
connection_kwargs = q.connection.connection_pool.connection_kwargs
|
||||||
|
self.assertEqual(connection_kwargs["socket_timeout"], 415)
|
||||||
|
|
||||||
def test_worker_version(self):
|
def test_worker_version(self):
|
||||||
q = Queue()
|
q = Queue()
|
||||||
w = Worker([q])
|
w = Worker([q])
|
||||||
|
|
Loading…
Reference in New Issue