From eadc7db29f6ef6bbd016fbbc5882d1266b75e3dd Mon Sep 17 00:00:00 2001 From: Selwin Ong Date: Thu, 18 Apr 2013 22:11:43 +0700 Subject: [PATCH 1/7] First stab at writing implementing job dependency. --- rq/job.py | 57 +++++++++++++++++++++++++++++++++++++++++--- rq/queue.py | 29 ++++++++++++++++++---- rq/worker.py | 1 + tests/test_job.py | 35 ++++++++++++++++++++++++++- tests/test_queue.py | 25 +++++++++++++++++++ tests/test_worker.py | 9 +++++++ 6 files changed, 148 insertions(+), 8 deletions(-) diff --git a/rq/job.py b/rq/job.py index a4772a41..2a6efcfb 100644 --- a/rq/job.py +++ b/rq/job.py @@ -64,7 +64,7 @@ class Job(object): # Job construction @classmethod def create(cls, func, args=None, kwargs=None, connection=None, - result_ttl=None, status=None): + result_ttl=None, status=None, parent=None): """Creates a new Job instance for the given function, arguments, and keyword arguments. """ @@ -87,6 +87,9 @@ class Job(object): job.description = job.get_call_string() job.result_ttl = result_ttl job._status = status + # parent could be job instance or id + if parent is not None: + job._parent_id = parent.id if isinstance(parent, Job) else parent return job @property @@ -119,6 +122,20 @@ class Job(object): def is_started(self): return self.status == Status.STARTED + @property + def parent(self): + """Returns a job's parent. To avoid repeated Redis fetches, we cache + job.parent as job._parent. + """ + if self._parent_id is None: + return None + if hasattr(self, '_parent'): + return self._parent + job = Job.fetch(self._parent_id, connection=self.connection) + job.refresh() + self._parent = job + return job + @property def func(self): func_name = self.func_name @@ -185,6 +202,7 @@ class Job(object): self.timeout = None self.result_ttl = None self._status = None + self._parent_id = None self.meta = {} @@ -208,11 +226,21 @@ class Job(object): """The Redis key that is used to store job hash under.""" return 'rq:job:%s' % (job_id,) + @classmethod + def waitlist_key_for(cls, job_id): + """The Redis key that is used to store job hash under.""" + return 'rq:job:%s:waitlist' % (job_id,) + @property def key(self): """The Redis key that is used to store job hash under.""" return self.key_for(self.id) + @property + def waitlist_key(self): + """The Redis key that is used to store job hash under.""" + return self.waitlist_key_for(self.id) + @property # noqa def job_tuple(self): """Returns the job tuple that encodes the actual function call that @@ -285,6 +313,7 @@ class Job(object): self.timeout = int(obj.get('timeout')) if obj.get('timeout') else None self.result_ttl = int(obj.get('result_ttl')) if obj.get('result_ttl') else None # noqa self._status = obj.get('status') if obj.get('status') else None + self._parent_id = obj.get('parent_id', None) self.meta = unpickle(obj.get('meta')) if obj.get('meta') else {} def save(self, pipeline=None): @@ -315,6 +344,8 @@ class Job(object): obj['result_ttl'] = self.result_ttl if self._status is not None: obj['status'] = self._status + if self._parent_id is not None: + obj['parent_id'] = self._parent_id if self.meta: obj['meta'] = dumps(self.meta) @@ -381,7 +412,26 @@ class Job(object): elif ttl > 0: connection = pipeline if pipeline is not None else self.connection connection.expire(self.key, ttl) + + def register_dependency(self): + """Jobs may have a waitlist. Jobs in this waitlist are enqueued + only if the parent job is successfully performed. We maintain this + waitlist in Redis, with key that looks something like: + + rq:job:job_id:waitlist = ['job_id_1', 'job_id_2'] + This method puts the job on it's parent's waitlist. + """ + # TODO: This can probably be pipelined + self.connection.rpush(Job.waitlist_key_for(self._parent_id), self.id) + + def get_waitlist(self): + """Returns all job ids in the waitlist. + """ + # TODO: This can probably be pipelined + + return self.connection.lrange( + self.waitlist_key, 0, self.connection.llen(self.waitlist_key) - 1) def __str__(self): return '' % (self.id, self.description) @@ -413,10 +463,11 @@ class Job(object): def __setattr__(self, name, value): # Ignore the "private" fields - private_attrs = set(['origin', '_func_name', 'ended_at', + private_attrs = set(('origin', '_func_name', 'ended_at', 'description', '_args', 'created_at', 'enqueued_at', 'connection', '_result', 'result', 'timeout', '_kwargs', 'exc_info', '_id', - 'data', '_instance', 'result_ttl', '_status', 'status', 'meta']) + 'data', '_instance', 'result_ttl', '_status', 'status', + '_parent_id', '_parent', 'parent', 'meta')) if name in private_attrs: object.__setattr__(self, name, value) diff --git a/rq/queue.py b/rq/queue.py index 3bf5d009..f70033a9 100644 --- a/rq/queue.py +++ b/rq/queue.py @@ -113,7 +113,8 @@ class Queue(object): """Pushes a job ID on the corresponding Redis queue.""" self.connection.rpush(self.key, job_id) - def enqueue_call(self, func, args=None, kwargs=None, timeout=None, result_ttl=None): #noqa + def enqueue_call(self, func, args=None, kwargs=None, timeout=None, + result_ttl=None, after=None): """Creates a job to represent the delayed function call and enqueues it. @@ -123,7 +124,14 @@ class Queue(object): """ timeout = timeout or self._default_timeout job = Job.create(func, args, kwargs, connection=self.connection, - result_ttl=result_ttl, status=Status.QUEUED) + result_ttl=result_ttl, status=Status.QUEUED, + parent=after) + # If job depends on another job to finish, register itself on it's + # parent's waitlist instead of enqueueing it + if after is not None: + job.register_dependency() + job.save() + return job return self.enqueue_job(job, timeout=timeout) def enqueue(self, f, *args, **kwargs): @@ -149,15 +157,18 @@ class Queue(object): # q.enqueue(foo, args=(1, 2), kwargs={'a': 1}, timeout=30) timeout = None result_ttl = None - if 'args' in kwargs or 'kwargs' in kwargs: + after = None + if 'args' in kwargs or 'kwargs' in kwargs or 'after' in kwargs: assert args == (), 'Extra positional arguments cannot be used when using explicit args and kwargs.' # noqa timeout = kwargs.pop('timeout', None) args = kwargs.pop('args', None) result_ttl = kwargs.pop('result_ttl', None) + after = kwargs.pop('after', None) kwargs = kwargs.pop('kwargs', None) return self.enqueue_call(func=f, args=args, kwargs=kwargs, - timeout=timeout, result_ttl=result_ttl) + timeout=timeout, result_ttl=result_ttl, + after=after) def enqueue_job(self, job, timeout=None, set_meta_data=True): """Enqueues a job for delayed execution. @@ -188,6 +199,16 @@ class Queue(object): job.save() return job + def enqueue_waitlist(self, job): + """Enqueues all jobs in the waitlist and clears it""" + # TODO: can probably be pipelined + job_ids = job.get_waitlist() + for job_id in job.get_waitlist(): + waitlisted_job = Job.fetch(job_id, connection=self.connection) + self.enqueue_job(waitlisted_job) + if job_ids: + self.connection.delete(job.waitlist_key) + def pop_job_id(self): """Pops a given job ID from this Redis queue.""" return self.connection.lpop(self.key) diff --git a/rq/worker.py b/rq/worker.py index 8f1da270..8d43f9f8 100644 --- a/rq/worker.py +++ b/rq/worker.py @@ -328,6 +328,7 @@ class Worker(object): self.connection.expire(self.key, (job.timeout or 180) + 60) self.fork_and_perform_job(job) self.connection.expire(self.key, self.default_worker_ttl) + queue.enqueue_waitlist(job) did_perform_work = True finally: diff --git a/tests/test_job.py b/tests/test_job.py index 8b1d137d..87f649e1 100644 --- a/tests/test_job.py +++ b/tests/test_job.py @@ -131,6 +131,22 @@ class TestJob(RQTestCase): self.testconn.hkeys(job.key), ['created_at', 'data', 'description']) + def test_persistence_of_parent_job(self): + """Storing jobs with parent job, either instance or key.""" + parent_job = Job.create(func=some_calculation) + parent_job.save() + job = Job.create(func=some_calculation, parent=parent_job) + job.save() + stored_job = Job.fetch(job.id) + self.assertEqual(stored_job._parent_id, parent_job.id) + self.assertEqual(stored_job.parent, parent_job) + + job = Job.create(func=some_calculation, parent=parent_job.id) + job.save() + stored_job = Job.fetch(job.id) + self.assertEqual(stored_job._parent_id, parent_job.id) + self.assertEqual(stored_job.parent, parent_job) + def test_store_then_fetch(self): """Store, then fetch.""" job = Job.create(func=some_calculation, args=(3, 4), kwargs=dict(z=2)) @@ -242,10 +258,27 @@ class TestJob(RQTestCase): job.cleanup(ttl=-1) self.assertEqual(self.testconn.ttl(job.key), -1) - # Jobs with positive TTLs are eventually deleted + # Jobs with positive TTLs are eventually deleted job.cleanup(ttl=100) self.assertEqual(self.testconn.ttl(job.key), 100) # Jobs with 0 TTL are immediately deleted job.cleanup(ttl=0) self.assertRaises(NoSuchJobError, Job.fetch, job.id, self.testconn) + + def test_register_dependency(self): + """Test that jobs updates the correct job waitlist""" + job = Job.create(func=say_hello) + job._parent_id = 'id' + job.save() + job.register_dependency() + self.assertEqual(self.testconn.lpop('rq:job:id:waitlist'), job.id) + + def test_get_waitlist(self): + """Test that all waitlisted job ids are fetched""" + job = Job.create(func=say_hello) + self.assertEqual(job.get_waitlist(), []) + self.testconn.lpush(job.waitlist_key, 'id_1') + self.assertEqual(job.get_waitlist(), ['id_1']) + self.testconn.lpush(job.waitlist_key, 'id_2') + self.assertEqual(job.get_waitlist(), ['id_2', 'id_1']) diff --git a/tests/test_queue.py b/tests/test_queue.py index 061c984b..b5960046 100644 --- a/tests/test_queue.py +++ b/tests/test_queue.py @@ -237,6 +237,31 @@ class TestQueue(RQTestCase): job = q.enqueue(say_hello) self.assertEqual(job.status, Status.QUEUED) + def test_enqueue_waitlist(self): + """Enqueueing a waitlist pushes all jobs in waitlist to queue""" + q = Queue() + parent_job = Job.create(func=say_hello) + parent_job.save() + job_1 = Job.create(func=say_hello, parent=parent_job) + job_1.save() + job_1.register_dependency() + job_2 = Job.create(func=say_hello, parent=parent_job) + job_2.save() + job_2.register_dependency() + + # After waitlist is enqueued, job_1 and job_2 should be in queue + self.assertEqual(q.job_ids, []) + q.enqueue_waitlist(parent_job) + self.assertEqual(q.job_ids, [job_1.id, job_2.id]) + self.assertFalse(self.testconn.exists(parent_job.waitlist_key)) + + def test_enqueue_job_with_dependency(self): + """Job with dependency is not queued right away""" + parent_job = Job.create(func=say_hello) + q = Queue() + q.enqueue_call(say_hello, after=parent_job) + self.assertEqual(q.job_ids, []) + class TestFailedQueue(RQTestCase): def test_requeue_job(self): diff --git a/tests/test_worker.py b/tests/test_worker.py index d9e2fe52..143fdbca 100644 --- a/tests/test_worker.py +++ b/tests/test_worker.py @@ -234,3 +234,12 @@ class TestWorker(RQTestCase): self.assertEqual(job.is_queued, False) self.assertEqual(job.is_finished, False) self.assertEqual(job.is_failed, True) + + def test_job_dependency(self): + q = Queue() + w = Worker([q]) + parent_job = q.enqueue(say_hello) + job = q.enqueue_call(say_hello, after=parent_job) + w.work(burst=True) + job = Job.fetch(job.id) + self.assertEqual(job.status, 'finished') From 6550f866463cf3d25da5a4ee6b5ec5dcf31f01d3 Mon Sep 17 00:00:00 2001 From: Selwin Ong Date: Thu, 18 Apr 2013 22:18:56 +0700 Subject: [PATCH 2/7] Don't enqueue waitlisted jobs on failed execution. --- rq/queue.py | 3 ++- rq/worker.py | 3 ++- tests/test_worker.py | 9 ++++++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/rq/queue.py b/rq/queue.py index f70033a9..31d33c9f 100644 --- a/rq/queue.py +++ b/rq/queue.py @@ -113,7 +113,7 @@ class Queue(object): """Pushes a job ID on the corresponding Redis queue.""" self.connection.rpush(self.key, job_id) - def enqueue_call(self, func, args=None, kwargs=None, timeout=None, + def enqueue_call(self, func, args=None, kwargs=None, timeout=None, result_ttl=None, after=None): """Creates a job to represent the delayed function call and enqueues it. @@ -123,6 +123,7 @@ class Queue(object): contain options for RQ itself. """ timeout = timeout or self._default_timeout + # TODO: job with dependency shouldn't have "queued" as status job = Job.create(func, args, kwargs, connection=self.connection, result_ttl=result_ttl, status=Status.QUEUED, parent=after) diff --git a/rq/worker.py b/rq/worker.py index 8d43f9f8..6d35656c 100644 --- a/rq/worker.py +++ b/rq/worker.py @@ -328,7 +328,8 @@ class Worker(object): self.connection.expire(self.key, (job.timeout or 180) + 60) self.fork_and_perform_job(job) self.connection.expire(self.key, self.default_worker_ttl) - queue.enqueue_waitlist(job) + if job.status == 'finished': + queue.enqueue_waitlist(job) did_perform_work = True finally: diff --git a/tests/test_worker.py b/tests/test_worker.py index 143fdbca..86158010 100644 --- a/tests/test_worker.py +++ b/tests/test_worker.py @@ -236,10 +236,17 @@ class TestWorker(RQTestCase): self.assertEqual(job.is_failed, True) def test_job_dependency(self): + """Waitlisted jobs are enqueued only if their parents don't fail""" q = Queue() w = Worker([q]) parent_job = q.enqueue(say_hello) job = q.enqueue_call(say_hello, after=parent_job) w.work(burst=True) job = Job.fetch(job.id) - self.assertEqual(job.status, 'finished') + self.assertEqual(job.status, 'finished') + + parent_job = q.enqueue(div_by_zero) + job = q.enqueue_call(say_hello, after=parent_job) + w.work(burst=True) + job = Job.fetch(job.id) + self.assertNotEqual(job.status, 'finished') From 18ff57ef352f39a8c9077cd33c46b0ec0634e242 Mon Sep 17 00:00:00 2001 From: Selwin Ong Date: Sun, 28 Apr 2013 11:11:36 +0700 Subject: [PATCH 3/7] Avoid race conditions when enqueueing job with dependency. --- rq/exceptions.py | 4 ++++ rq/queue.py | 25 +++++++++++++++++++------ tests/test_queue.py | 9 ++++++++- tests/test_worker.py | 2 +- 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/rq/exceptions.py b/rq/exceptions.py index 982a5801..7f8df37d 100644 --- a/rq/exceptions.py +++ b/rq/exceptions.py @@ -17,3 +17,7 @@ class UnpickleError(Exception): class DequeueTimeout(Exception): pass + + +class EnqueueError(Exception): + pass \ No newline at end of file diff --git a/rq/queue.py b/rq/queue.py index 31d33c9f..da0154a9 100644 --- a/rq/queue.py +++ b/rq/queue.py @@ -1,10 +1,12 @@ import times from .connections import resolve_connection from .job import Job, Status -from .exceptions import (NoSuchJobError, UnpickleError, - InvalidJobOperationError, DequeueTimeout) +from .exceptions import (DequeueTimeout, EnqueueError, InvalidJobOperationError, + NoSuchJobError, UnpickleError) from .compat import total_ordering +from redis import WatchError + def get_failed_queue(connection=None): """Returns a handle to the special failed queue.""" @@ -127,12 +129,23 @@ class Queue(object): job = Job.create(func, args, kwargs, connection=self.connection, result_ttl=result_ttl, status=Status.QUEUED, parent=after) - # If job depends on another job to finish, register itself on it's + + # If job depends on an unfinished job, register itself on it's # parent's waitlist instead of enqueueing it if after is not None: - job.register_dependency() - job.save() - return job + with self.connection.pipeline() as pipe: + try: + pipe.watch(after.key) + if after.status != Status.FINISHED: + job.register_dependency() + job.save() + return job + except WatchError: + raise EnqueueError( + 'Parent job (%s) modified during enqueue process. ' + + 'Bailing out to avoid race conditions' % after.id + ) + return self.enqueue_job(job, timeout=timeout) def enqueue(self, f, *args, **kwargs): diff --git a/tests/test_queue.py b/tests/test_queue.py index b5960046..9304e42d 100644 --- a/tests/test_queue.py +++ b/tests/test_queue.py @@ -256,12 +256,19 @@ class TestQueue(RQTestCase): self.assertFalse(self.testconn.exists(parent_job.waitlist_key)) def test_enqueue_job_with_dependency(self): - """Job with dependency is not queued right away""" + """Test enqueueing job with dependency""" + # Job with unfinished dependency is not immediately enqueued parent_job = Job.create(func=say_hello) q = Queue() q.enqueue_call(say_hello, after=parent_job) self.assertEqual(q.job_ids, []) + # Jobs dependent on finished jobs are immediately enqueued + parent_job.status = 'finished' + parent_job.save() + job = q.enqueue_call(say_hello, after=parent_job) + self.assertEqual(q.job_ids, [job.id]) + class TestFailedQueue(RQTestCase): def test_requeue_job(self): diff --git a/tests/test_worker.py b/tests/test_worker.py index 86158010..7e4bd2e9 100644 --- a/tests/test_worker.py +++ b/tests/test_worker.py @@ -236,7 +236,7 @@ class TestWorker(RQTestCase): self.assertEqual(job.is_failed, True) def test_job_dependency(self): - """Waitlisted jobs are enqueued only if their parents don't fail""" + """Enqueue waitlisted jobs only if their parents don't fail""" q = Queue() w = Worker([q]) parent_job = q.enqueue(say_hello) From 0dfb0413831c2df1df451fa50ffafdc85e0cbd78 Mon Sep 17 00:00:00 2001 From: Selwin Ong Date: Sun, 28 Apr 2013 11:20:39 +0700 Subject: [PATCH 4/7] Simplify enqueue_waitlist by using lpop. --- rq/job.py | 8 -------- rq/queue.py | 8 ++++---- tests/test_job.py | 9 --------- 3 files changed, 4 insertions(+), 21 deletions(-) diff --git a/rq/job.py b/rq/job.py index 2a6efcfb..fbaa4248 100644 --- a/rq/job.py +++ b/rq/job.py @@ -425,14 +425,6 @@ class Job(object): # TODO: This can probably be pipelined self.connection.rpush(Job.waitlist_key_for(self._parent_id), self.id) - def get_waitlist(self): - """Returns all job ids in the waitlist. - """ - # TODO: This can probably be pipelined - - return self.connection.lrange( - self.waitlist_key, 0, self.connection.llen(self.waitlist_key) - 1) - def __str__(self): return '' % (self.id, self.description) diff --git a/rq/queue.py b/rq/queue.py index da0154a9..e06904df 100644 --- a/rq/queue.py +++ b/rq/queue.py @@ -216,12 +216,12 @@ class Queue(object): def enqueue_waitlist(self, job): """Enqueues all jobs in the waitlist and clears it""" # TODO: can probably be pipelined - job_ids = job.get_waitlist() - for job_id in job.get_waitlist(): + while True: + job_id = self.connection.lpop(job.waitlist_key) + if job_id is None: + break waitlisted_job = Job.fetch(job_id, connection=self.connection) self.enqueue_job(waitlisted_job) - if job_ids: - self.connection.delete(job.waitlist_key) def pop_job_id(self): """Pops a given job ID from this Redis queue.""" diff --git a/tests/test_job.py b/tests/test_job.py index 87f649e1..cafc8ad0 100644 --- a/tests/test_job.py +++ b/tests/test_job.py @@ -273,12 +273,3 @@ class TestJob(RQTestCase): job.save() job.register_dependency() self.assertEqual(self.testconn.lpop('rq:job:id:waitlist'), job.id) - - def test_get_waitlist(self): - """Test that all waitlisted job ids are fetched""" - job = Job.create(func=say_hello) - self.assertEqual(job.get_waitlist(), []) - self.testconn.lpush(job.waitlist_key, 'id_1') - self.assertEqual(job.get_waitlist(), ['id_1']) - self.testconn.lpush(job.waitlist_key, 'id_2') - self.assertEqual(job.get_waitlist(), ['id_2', 'id_1']) From 2e826e2b1f822a1c084c8239014160809162b1d2 Mon Sep 17 00:00:00 2001 From: Selwin Ong Date: Sun, 28 Apr 2013 11:27:50 +0700 Subject: [PATCH 5/7] Internally renamed the term "parent" to "dependency". --- rq/job.py | 40 ++++++++++++++++++++-------------------- rq/queue.py | 2 +- tests/test_job.py | 14 +++++++------- tests/test_queue.py | 4 ++-- 4 files changed, 30 insertions(+), 30 deletions(-) diff --git a/rq/job.py b/rq/job.py index fbaa4248..9f5d2729 100644 --- a/rq/job.py +++ b/rq/job.py @@ -64,7 +64,7 @@ class Job(object): # Job construction @classmethod def create(cls, func, args=None, kwargs=None, connection=None, - result_ttl=None, status=None, parent=None): + result_ttl=None, status=None, dependency=None): """Creates a new Job instance for the given function, arguments, and keyword arguments. """ @@ -87,9 +87,9 @@ class Job(object): job.description = job.get_call_string() job.result_ttl = result_ttl job._status = status - # parent could be job instance or id - if parent is not None: - job._parent_id = parent.id if isinstance(parent, Job) else parent + # dependency could be job instance or id + if dependency is not None: + job._dependency_id = dependency.id if isinstance(dependency, Job) else dependency return job @property @@ -123,17 +123,17 @@ class Job(object): return self.status == Status.STARTED @property - def parent(self): - """Returns a job's parent. To avoid repeated Redis fetches, we cache - job.parent as job._parent. + def dependency(self): + """Returns a job's dependency. To avoid repeated Redis fetches, we cache + job.dependency as job._dependency. """ - if self._parent_id is None: + if self._dependency_id is None: return None - if hasattr(self, '_parent'): - return self._parent - job = Job.fetch(self._parent_id, connection=self.connection) + if hasattr(self, '_dependency'): + return self._dependency + job = Job.fetch(self._dependency_id, connection=self.connection) job.refresh() - self._parent = job + self._dependency = job return job @property @@ -202,7 +202,7 @@ class Job(object): self.timeout = None self.result_ttl = None self._status = None - self._parent_id = None + self._dependency_id = None self.meta = {} @@ -313,7 +313,7 @@ class Job(object): self.timeout = int(obj.get('timeout')) if obj.get('timeout') else None self.result_ttl = int(obj.get('result_ttl')) if obj.get('result_ttl') else None # noqa self._status = obj.get('status') if obj.get('status') else None - self._parent_id = obj.get('parent_id', None) + self._dependency_id = obj.get('dependency_id', None) self.meta = unpickle(obj.get('meta')) if obj.get('meta') else {} def save(self, pipeline=None): @@ -344,8 +344,8 @@ class Job(object): obj['result_ttl'] = self.result_ttl if self._status is not None: obj['status'] = self._status - if self._parent_id is not None: - obj['parent_id'] = self._parent_id + if self._dependency_id is not None: + obj['dependency_id'] = self._dependency_id if self.meta: obj['meta'] = dumps(self.meta) @@ -415,15 +415,15 @@ class Job(object): def register_dependency(self): """Jobs may have a waitlist. Jobs in this waitlist are enqueued - only if the parent job is successfully performed. We maintain this + only if the dependency job is successfully performed. We maintain this waitlist in Redis, with key that looks something like: rq:job:job_id:waitlist = ['job_id_1', 'job_id_2'] - This method puts the job on it's parent's waitlist. + This method puts the job on it's dependency's waitlist. """ # TODO: This can probably be pipelined - self.connection.rpush(Job.waitlist_key_for(self._parent_id), self.id) + self.connection.rpush(Job.waitlist_key_for(self._dependency_id), self.id) def __str__(self): return '' % (self.id, self.description) @@ -459,7 +459,7 @@ class Job(object): 'description', '_args', 'created_at', 'enqueued_at', 'connection', '_result', 'result', 'timeout', '_kwargs', 'exc_info', '_id', 'data', '_instance', 'result_ttl', '_status', 'status', - '_parent_id', '_parent', 'parent', 'meta')) + '_dependency_id', '_dependency', 'dependency', 'meta')) if name in private_attrs: object.__setattr__(self, name, value) diff --git a/rq/queue.py b/rq/queue.py index e06904df..486af1a8 100644 --- a/rq/queue.py +++ b/rq/queue.py @@ -128,7 +128,7 @@ class Queue(object): # TODO: job with dependency shouldn't have "queued" as status job = Job.create(func, args, kwargs, connection=self.connection, result_ttl=result_ttl, status=Status.QUEUED, - parent=after) + dependency=after) # If job depends on an unfinished job, register itself on it's # parent's waitlist instead of enqueueing it diff --git a/tests/test_job.py b/tests/test_job.py index cafc8ad0..83a6eae6 100644 --- a/tests/test_job.py +++ b/tests/test_job.py @@ -135,17 +135,17 @@ class TestJob(RQTestCase): """Storing jobs with parent job, either instance or key.""" parent_job = Job.create(func=some_calculation) parent_job.save() - job = Job.create(func=some_calculation, parent=parent_job) + job = Job.create(func=some_calculation, dependency=parent_job) job.save() stored_job = Job.fetch(job.id) - self.assertEqual(stored_job._parent_id, parent_job.id) - self.assertEqual(stored_job.parent, parent_job) + self.assertEqual(stored_job._dependency_id, parent_job.id) + self.assertEqual(stored_job.dependency, parent_job) - job = Job.create(func=some_calculation, parent=parent_job.id) + job = Job.create(func=some_calculation, dependency=parent_job.id) job.save() stored_job = Job.fetch(job.id) - self.assertEqual(stored_job._parent_id, parent_job.id) - self.assertEqual(stored_job.parent, parent_job) + self.assertEqual(stored_job._dependency_id, parent_job.id) + self.assertEqual(stored_job.dependency, parent_job) def test_store_then_fetch(self): """Store, then fetch.""" @@ -269,7 +269,7 @@ class TestJob(RQTestCase): def test_register_dependency(self): """Test that jobs updates the correct job waitlist""" job = Job.create(func=say_hello) - job._parent_id = 'id' + job._dependency_id = 'id' job.save() job.register_dependency() self.assertEqual(self.testconn.lpop('rq:job:id:waitlist'), job.id) diff --git a/tests/test_queue.py b/tests/test_queue.py index 9304e42d..83a129e3 100644 --- a/tests/test_queue.py +++ b/tests/test_queue.py @@ -242,10 +242,10 @@ class TestQueue(RQTestCase): q = Queue() parent_job = Job.create(func=say_hello) parent_job.save() - job_1 = Job.create(func=say_hello, parent=parent_job) + job_1 = Job.create(func=say_hello, dependency=parent_job) job_1.save() job_1.register_dependency() - job_2 = Job.create(func=say_hello, parent=parent_job) + job_2 = Job.create(func=say_hello, dependency=parent_job) job_2.save() job_2.register_dependency() From 6ee45597ca589708ed5d4f8f68b84e4eb1925dab Mon Sep 17 00:00:00 2001 From: Selwin Ong Date: Tue, 7 May 2013 18:26:08 +0700 Subject: [PATCH 6/7] Don't fail if job dependency is modified during enqueue process. --- rq/exceptions.py | 5 +---- rq/queue.py | 27 ++++++++++++++------------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/rq/exceptions.py b/rq/exceptions.py index 7f8df37d..25e4f0e1 100644 --- a/rq/exceptions.py +++ b/rq/exceptions.py @@ -15,9 +15,6 @@ class UnpickleError(Exception): super(UnpickleError, self).__init__(message, inner_exception) self.raw_data = raw_data + class DequeueTimeout(Exception): pass - - -class EnqueueError(Exception): - pass \ No newline at end of file diff --git a/rq/queue.py b/rq/queue.py index 0e8bdb7c..8a601022 100644 --- a/rq/queue.py +++ b/rq/queue.py @@ -1,7 +1,7 @@ import times from .connections import resolve_connection from .job import Job, Status -from .exceptions import (DequeueTimeout, EnqueueError, InvalidJobOperationError, +from .exceptions import (DequeueTimeout, InvalidJobOperationError, NoSuchJobError, UnpickleError) from .compat import total_ordering @@ -145,20 +145,21 @@ class Queue(object): dependency=after) # If job depends on an unfinished job, register itself on it's - # parent's waitlist instead of enqueueing it + # parent's waitlist instead of enqueueing it. + # If WatchError is raised in the process, that means something else is + # modifying the dependency. In this case we simply retry if after is not None: with self.connection.pipeline() as pipe: - try: - pipe.watch(after.key) - if after.status != Status.FINISHED: - job.register_dependency() - job.save() - return job - except WatchError: - raise EnqueueError( - 'Parent job (%s) modified during enqueue process. ' + - 'Bailing out to avoid race conditions' % after.id - ) + while True: + try: + pipe.watch(after.key) + if after.status != Status.FINISHED: + job.register_dependency() + job.save() + return job + break + except WatchError: + continue return self.enqueue_job(job, timeout=timeout) From fd44ad39d49e1ea978384d1480653781fa75175c Mon Sep 17 00:00:00 2001 From: Selwin Ong Date: Tue, 3 Sep 2013 08:03:50 +0700 Subject: [PATCH 7/7] Python 3 fixes for job dependency stuff. --- rq/job.py | 2 +- rq/queue.py | 2 +- tests/test_job.py | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/rq/job.py b/rq/job.py index 7eaf5325..a10c340b 100644 --- a/rq/job.py +++ b/rq/job.py @@ -318,7 +318,7 @@ class Job(object): self.timeout = int(obj.get('timeout')) if obj.get('timeout') else None self.result_ttl = int(obj.get('result_ttl')) if obj.get('result_ttl') else None # noqa self._status = as_text(obj.get('status') if obj.get('status') else None) - self._dependency_id = obj.get('dependency_id', None) + self._dependency_id = as_text(obj.get('dependency_id', None)) self.meta = unpickle(obj.get('meta')) if obj.get('meta') else {} def dump(self): diff --git a/rq/queue.py b/rq/queue.py index 79442ac9..e9f3c7f9 100644 --- a/rq/queue.py +++ b/rq/queue.py @@ -240,7 +240,7 @@ class Queue(object): """Enqueues all jobs in the waitlist and clears it""" # TODO: can probably be pipelined while True: - job_id = self.connection.lpop(job.waitlist_key) + job_id = as_text(self.connection.lpop(job.waitlist_key)) if job_id is None: break waitlisted_job = Job.fetch(job_id, connection=self.connection) diff --git a/tests/test_job.py b/tests/test_job.py index 13ef3687..bc70d5e4 100644 --- a/tests/test_job.py +++ b/tests/test_job.py @@ -7,6 +7,7 @@ try: from cPickle import loads except ImportError: from pickle import loads +from rq.compat import as_text from rq.job import Job, get_current_job from rq.exceptions import NoSuchJobError, UnpickleError from rq.queue import Queue @@ -288,4 +289,4 @@ class TestJob(RQTestCase): job._dependency_id = 'id' job.save() job.register_dependency() - self.assertEqual(self.testconn.lpop('rq:job:id:waitlist'), job.id) + self.assertEqual(as_text(self.testconn.lpop('rq:job:id:waitlist')), job.id)