diff --git a/checkin_notes b/checkin_notes
index 55bbeca1e7..52c9135576 100644
--- a/checkin_notes
+++ b/checkin_notes
@@ -7713,3 +7713,52 @@ David  01 Nov 2010
 
     client/
         cpu_sched.cpp
+
+David  01 Nov 2010
+    - client: fix problems with job scheduling policy.
+        Old: job scheduling has 2 phases.
+            In the first phase (schedule_cpus()) we make a list of jobs,
+            with deadline-miss and high-STD jobs first.
+            Keep track of the RAM used,
+            and skip jobs that would exceed available RAM.
+            Stop scanning when the # of CPUs used by jobs in the list
+            exceeds the # of actual CPUs.
+
+            In the 2nd phase (enforce_schedule()), we add currently running jobs
+            (which may be in the middle of a time slice) to the list,
+            and reorder to give priority to such jobs,
+            and possibly also to multi-thread jobs.
+            We then run and/or preempt jobs, keeping track of RAM used.
+
+        Problems:
+            - suppose we add an EDF 1-CPU job to the list, then a MT job.
+                We'll stop at that point because #CPUs is exceeded.
+                But enforce_schedule() won't run the MT job,
+                and CPUs will be idle.
+            - Because the list may be reordered, skipping jobs based
+                on RAM is not correct, and may cause deadlines to be missed.
+
+        New:
+            - when making the job list, keep track of #CPUs used
+                by MT jobs and non-MT jobs separately.
+                Stop the scan only if the non-MT count exceeds #CPUs.
+                This ensures that we have enough jobs to use all the CPUs,
+                even if the MT jobs can't be run for whatever reason.
+            - don't skip jobs because of RAM usage
+            - skip MT jobs if the MT CPU count is at least #CPUs
+
+        Notes:
+            - ignoring RAM usage in phase 1 can cause idleness in some cases,
+                e.g. suppose there are 4 GB of RAM and the list has
+                jobs that use 3 GB, but there are also some jobs that use 1 GB.
+                I'm not sure how to fix this.
+            - Maybe the 2-phase approach is not a good idea.
+                We did it this way for efficiency,
+                so that we don't have to recompute the job list
+                each time a job checkpoints.
+                But this is probably not a concern,
+                and I like the idea of a simpler approach,
+                e.g. reducing the policy to a single comparison function.
+
+    client/
+        cpu_sched.cpp
diff --git a/client/cpu_sched.cpp b/client/cpu_sched.cpp
index 9cff03fb78..76a1b372f3 100644
--- a/client/cpu_sched.cpp
+++ b/client/cpu_sched.cpp
@@ -71,14 +71,14 @@ using std::list;
 //
 struct PROC_RESOURCES {
     int ncpus;
-    double ncpus_used;
-    double ram_left;
+    double ncpus_used_st;   // #CPUs of GPU or single-thread jobs
+    double ncpus_used_mt;   // #CPUs of multi-thread jobs
     COPROCS coprocs;
 
     // should we stop scanning jobs?
     //
     inline bool stop_scan_cpu() {
-        return ncpus_used >= ncpus;
+        return ncpus_used_st >= ncpus;
     }
 
     inline bool stop_scan_coproc(int rsc_type) {
@@ -94,9 +94,7 @@ struct PROC_RESOURCES {
         if (rp->schedule_backoff > gstate.now) return false;
         if (rp->uses_coprocs()) {
             if (gpu_suspend_reason) return false;
-            if (sufficient_coprocs(
-                *rp->avp, log_flags.cpu_sched_debug)
-            ) {
+            if (sufficient_coprocs(*rp->avp, log_flags.cpu_sched_debug)) {
                 return true;
             } else {
                 if (log_flags.cpu_sched_debug) {
@@ -106,10 +104,10 @@ struct PROC_RESOURCES {
                 }
                 return false;
             }
+        } else if (rp->avp->avg_ncpus > 1) {
+            return (ncpus_used_mt + rp->avp->avg_ncpus < ncpus);
         } else {
-            // otherwise, only if CPUs are available
-            //
-            return (ncpus_used < ncpus);
+            return (ncpus_used_st < ncpus);
         }
     }
 
@@ -119,7 +117,13 @@ struct PROC_RESOURCES {
         reserve_coprocs(
             *rp->avp, log_flags.cpu_sched_debug, "cpu_sched_debug"
         );
-        ncpus_used += rp->avp->avg_ncpus;
+        if (rp->uses_coprocs()) {
+            ncpus_used_st += rp->avp->avg_ncpus;
+        } else if (rp->avp->avg_ncpus > 1) {
+            ncpus_used_mt += rp->avp->avg_ncpus;
+        } else {
+            ncpus_used_st += rp->avp->avg_ncpus;
+        }
     }
 
     bool sufficient_coprocs(APP_VERSION& av, bool log_flag) {
@@ -409,7 +413,9 @@ RESULT* first_coproc_result(int rsc_type) {
     return best;
 }
 
-// Return earliest-deadline result for given resource type.
+// Return earliest-deadline result for given resource type;
+// return only results projected to miss their deadline,
+// or from projects with extreme DCF
 //
 static RESULT* earliest_deadline_result(int rsc_type) {
     RESULT *best_result = NULL;
@@ -624,7 +630,6 @@ bool CLIENT_STATE::possibly_schedule_cpus() {
 }
 
 // Check whether the job can be run:
-// - it will fit in RAM
 // - we have enough shared-mem segments (old Mac problem)
 // If so, update proc_rsc and anticipated debts, and return true
 //
@@ -633,20 +638,6 @@ static bool schedule_if_possible(
     const char* description
 ) {
     if (atp) {
-        // see if it fits in available RAM
-        //
-        if (atp->procinfo.working_set_size_smoothed > proc_rsc.ram_left) {
-            if (log_flags.cpu_sched_debug) {
-                msg_printf(rp->project, MSG_INFO,
-                    "[cpu_sched]  %s working set too large: %.2fMB",
-                    rp->name, atp->procinfo.working_set_size_smoothed/MEGA
-                );
-            }
-            atp->too_large = true;
-            return false;
-        }
-        atp->too_large = false;
-        
         if (gstate.retry_shmem_time > gstate.now) {
             if (atp->app_client_shm.shm == NULL) {
                 if (log_flags.cpu_sched_debug) {
@@ -660,17 +651,6 @@ static bool schedule_if_possible(
             }
             atp->needs_shmem = false;
         }
-        proc_rsc.ram_left -= atp->procinfo.working_set_size_smoothed;
-    } else {
-        if (rp->avp->max_working_set_size > proc_rsc.ram_left) {
-            if (log_flags.cpu_sched_debug) {
-                msg_printf(rp->project, MSG_INFO,
-                    "[cpu_sched]  %s projected working set too large: %.2fMB",
-                    rp->name, rp->avp->max_working_set_size/MEGA
-                );
-            }
-            return false;
-        }
     }
 
     if (log_flags.cpu_sched_debug) {
@@ -761,8 +741,8 @@ void CLIENT_STATE::schedule_cpus() {
     bool can_run;
 
     proc_rsc.ncpus = ncpus;
-    proc_rsc.ncpus_used = 0;
-    proc_rsc.ram_left = available_ram();
+    proc_rsc.ncpus_used_st = 0;
+    proc_rsc.ncpus_used_mt = 0;
     proc_rsc.coprocs.clone(host_info.coprocs, false);
 
     if (log_flags.cpu_sched_debug) {
@@ -819,9 +799,17 @@ void CLIENT_STATE::schedule_cpus() {
 
     ordered_scheduled_results.clear();
 
+    // first, add GPU jobs
+
     add_coproc_jobs(RSC_TYPE_CUDA, proc_rsc);
     add_coproc_jobs(RSC_TYPE_ATI, proc_rsc);
 
+    // then add CPU jobs.
+    // Note: the jobs that actually get run are not necessarily
+    // an initial segment of this list;
+    // e.g. a multithread job may not get run because it has
+    // a high-priority single-thread job ahead of it.
+
     // choose CPU jobs from projects with CPU deadline misses
     //
 #ifdef SIM
@@ -874,6 +862,9 @@ static inline bool in_ordered_scheduled_results(ACTIVE_TASK* atp) {
 // if find a MT job J, and X < ncpus, move J before all non-MT jobs
 // But don't promote a MT job ahead of a job in EDF
 //
+// This is needed because there may always be a 1-CPU jobs
+// in the middle of its time-slice, and MT jobs could starve.
+//
 static void promote_multi_thread_jobs(vector<RESULT*>& runnable_jobs) {
     double cpus_used = 0;
     vector<RESULT*>::iterator first_non_mt = runnable_jobs.end();