From 2da957a68da218db0ecc75786d66c2128cff400e Mon Sep 17 00:00:00 2001
From: Nikita Romaniuk <nikkonrom@gmail.com>
Date: Tue, 20 Oct 2020 02:57:03 +0300
Subject: [PATCH] scheduler: now operates with chunks of jobs (#1355)

* scheduler: now operates with chunks of jobs

* scheduler: set default chunk_size for ScheduledJobRegistry.get_jobs_to_schedule

* scheduler: fixed missing indent

* scheduler: added test for get_jobs_to_schedule() with chunk_size parameter

* scheduler: fixed test for passing python 3.5 (no f-strings)

* scheduler: fixed chunk_size in test make it lighter to run
---
 rq/registry.py          |  4 ++--
 rq/scheduler.py         | 11 +++++------
 tests/test_scheduler.py | 20 +++++++++++++++++---
 3 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/rq/registry.py b/rq/registry.py
index 579d0c5c..5a5c078b 100644
--- a/rq/registry.py
+++ b/rq/registry.py
@@ -286,11 +286,11 @@ class ScheduledJobRegistry(BaseRegistry):
         score = timestamp if timestamp is not None else current_timestamp()
         return connection.zremrangebyscore(self.key, 0, score)
 
-    def get_jobs_to_schedule(self, timestamp=None):
+    def get_jobs_to_schedule(self, timestamp=None, chunk_size=1000):
         """Remove jobs whose timestamp is in the past from registry."""
         score = timestamp if timestamp is not None else current_timestamp()
         return [as_text(job_id) for job_id in
-                self.connection.zrangebyscore(self.key, 0, score)]
+                self.connection.zrangebyscore(self.key, 0, score, start=0, num=chunk_size)]
 
     def get_scheduled_time(self, job_or_id):
         """Returns datetime (UTC) at which job is scheduled to be enqueued"""
diff --git a/rq/scheduler.py b/rq/scheduler.py
index b55f728a..4d84b48a 100644
--- a/rq/scheduler.py
+++ b/rq/scheduler.py
@@ -28,7 +28,6 @@ setup_loghandlers(
 
 
 class RQScheduler(object):
-
     # STARTED: scheduler has been started but sleeping
     # WORKING: scheduler is in the midst of scheduling jobs
     # STOPPED: scheduler is in stopped condition
@@ -137,11 +136,11 @@ class RQScheduler(object):
             queue = Queue(registry.name, connection=self.connection)
 
             with self.connection.pipeline() as pipeline:
-                # This should be done in bulk
-                for job_id in job_ids:
-                    job = Job.fetch(job_id, connection=self.connection)
-                    queue.enqueue_job(job, pipeline=pipeline)
-                registry.remove_jobs(timestamp)
+                jobs = Job.fetch_many(job_ids, connection=self.connection)
+                for job in jobs:
+                    if job is not None:
+                        queue.enqueue_job(job, pipeline=pipeline)
+                        registry.remove(job, pipeline=pipeline)
                 pipeline.execute()
         self._status = self.Status.STARTED
 
diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py
index 480e993e..13c68e27 100644
--- a/tests/test_scheduler.py
+++ b/tests/test_scheduler.py
@@ -35,6 +35,21 @@ class TestScheduledJobRegistry(RQTestCase):
         self.assertEqual(registry.get_jobs_to_enqueue(timestamp + 20),
                          ['foo', 'bar'])
 
+    def test_get_jobs_to_schedule_with_chunk_size(self):
+        """Max amount of jobs returns by get_jobs_to_schedule() equal to chunk_size"""
+        queue = Queue(connection=self.testconn)
+        registry = ScheduledJobRegistry(queue=queue)
+        timestamp = current_timestamp()
+        chunk_size = 5
+
+        for index in range(0, chunk_size * 2):
+            self.testconn.zadd(registry.key, {'foo_{}'.format(index): 1})
+
+        self.assertEqual(len(registry.get_jobs_to_schedule(timestamp, chunk_size)),
+                         chunk_size)
+        self.assertEqual(len(registry.get_jobs_to_schedule(timestamp, chunk_size * 2)),
+                         chunk_size * 2)
+
     def test_get_scheduled_time(self):
         """get_scheduled_time() returns job's scheduled datetime"""
         queue = Queue(connection=self.testconn)
@@ -87,7 +102,7 @@ class TestScheduledJobRegistry(RQTestCase):
             with mock_tz, mock_day, mock_atz:
                 registry.schedule(job, datetime(2019, 1, 1))
                 self.assertEqual(self.testconn.zscore(registry.key, job.id),
-                                1546300800 + 18000)  # 2019-01-01 UTC in Unix timestamp
+                                 1546300800 + 18000)  # 2019-01-01 UTC in Unix timestamp
 
             # second, time.daylight != 0 (in DST)
             # mock the sitatuoin for American/New_York not in DST (UTC - 4)
@@ -100,8 +115,7 @@ class TestScheduledJobRegistry(RQTestCase):
             with mock_tz, mock_day, mock_atz:
                 registry.schedule(job, datetime(2019, 1, 1))
                 self.assertEqual(self.testconn.zscore(registry.key, job.id),
-                                1546300800 + 14400)  # 2019-01-01 UTC in Unix timestamp
-
+                                 1546300800 + 14400)  # 2019-01-01 UTC in Unix timestamp
 
             # Score is always stored in UTC even if datetime is in a different tz
             tz = timezone(timedelta(hours=7))