diff --git a/checkin_notes b/checkin_notes index 3579cfd1ec..dc5a085cf9 100644 --- a/checkin_notes +++ b/checkin_notes @@ -4344,3 +4344,15 @@ David 6 May 2009 client/ gui_rpc_server_ops.cpp cpu_sched.cpp + +David 6 May 2009 + - feeder: add -appids option: lets you specify which apps to + get jobs for (default it all). + Useful if you're mixing locality and regular scheduling. + - a little E@h-specific stuff + From Bernd Machenschalk. + + sched/ + feeder.cpp + handle_request.cpp + sched_send.cpp diff --git a/sched/feeder.cpp b/sched/feeder.cpp index 0da6647cbe..5d6917f0ba 100644 --- a/sched/feeder.cpp +++ b/sched/feeder.cpp @@ -29,6 +29,7 @@ // recommended if using HR with multiple schedulers // [ -sleep_interval x ] sleep x seconds if nothing to do // [ -allapps ] interleave results from all applications uniformly +// [ -appids a1{,a2} ] get work only for appids a1,... (comma-separated list) // [ -purge_stale x ] remove work items from the shared memory segment // that have been there for longer then x minutes // but haven't been assigned @@ -632,6 +633,7 @@ int main(int argc, char** argv) { int i, retval; void* p; char path[256]; + char* appids=NULL; unlink(REREAD_DB_FILENAME); @@ -660,6 +662,10 @@ int main(int argc, char** argv) { order_clause = "order by r1.priority desc, r1.workunitid"; } else if (!strcmp(argv[i], "-purge_stale")) { purge_stale_time = atoi(argv[++i])*60; + } else if (!strcmp(argv[i], "-appids")) { + strcat(mod_select_clause, " and workunit.appid in ("); + strcat(mod_select_clause, argv[++i]); + strcat(mod_select_clause, ")"); } else if (!strcmp(argv[i], "-mod")) { int n = atoi(argv[++i]); int j = atoi(argv[++i]); @@ -678,11 +684,6 @@ int main(int argc, char** argv) { } } -#ifdef EINSTEIN_AT_HOME - // don't read locality scheduling workunits into the feeder - strcat(mod_select_clause, " and workunit.name not like \"%\\_\\_%\" "); -#endif - log_messages.printf(MSG_NORMAL, "Starting\n"); show_version(); diff --git a/sched/handle_request.cpp b/sched/handle_request.cpp index e4259fd786..5f96f4bfe1 100644 --- a/sched/handle_request.cpp +++ b/sched/handle_request.cpp @@ -65,6 +65,12 @@ static bool find_host_by_other(DB_USER& user, HOST req_host, DB_HOST& host) { char buf[2048]; char dn[512], ip[512], os[512], pm[512]; +#ifdef EINSTEIN_AT_HOME + // This is to prevent GRID hosts that manipulate their hostids from flooding E@H's DB with slow queries + if ((user.id == 282952) || (user.id == 243543)) + return false; +#endif + // Only check if the fields are populated if (strlen(req_host.domain_name) && strlen(req_host.last_ip_addr) && strlen(req_host.os_name) && strlen(req_host.p_model)) { strcpy(dn, req_host.domain_name); @@ -77,7 +83,8 @@ static bool find_host_by_other(DB_USER& user, HOST req_host, DB_HOST& host) { escape_string(pm, 512); sprintf(buf, - "where userid=%d and id>%d and domain_name='%s' and last_ip_addr = '%s' and os_name = '%s' and p_model = '%s' and m_nbytes = %lf order by id desc", user.id, req_host.id, dn, ip, os, pm, req_host.m_nbytes + "where userid=%d and id>%d and domain_name='%s' and last_ip_addr = '%s' and os_name = '%s' and p_model = '%s'" + " and m_nbytes = %lf order by id desc", user.id, req_host.id, dn, ip, os, pm, req_host.m_nbytes ); if (!host.enumerate(buf)) { host.end_enumerate(); diff --git a/sched/sched_send.cpp b/sched/sched_send.cpp index d7670c9783..9b1bdb7304 100644 --- a/sched/sched_send.cpp +++ b/sched/sched_send.cpp @@ -127,8 +127,8 @@ inline void get_max_wus_in_progress_multiplier() { } static const char* find_user_friendly_name(int appid) { - APP* app = ssp->lookup_app(appid); - if (app) return app->user_friendly_name; + APP* app = ssp->lookup_app(appid); + if (app) return app->user_friendly_name; return "deprecated application"; } @@ -312,16 +312,16 @@ static void get_prefs_info() { pos = str.find("", pos) + 1; } - if (parse_bool(buf,"allow_non_preferred_apps", flag)) { - g_wreq->allow_non_preferred_apps = flag; + if (parse_bool(buf,"allow_non_preferred_apps", flag)) { + g_wreq->allow_non_preferred_apps = flag; } - if (parse_bool(buf,"allow_beta_work", flag)) { + if (parse_bool(buf,"allow_beta_work", flag)) { g_wreq->allow_beta_work = flag; - } - if (parse_bool(buf,"no_gpus", flag)) { + } + if (parse_bool(buf,"no_gpus", flag)) { g_wreq->no_gpus = flag; } - if (parse_bool(buf,"no_cpu", flag)) { + if (parse_bool(buf,"no_cpu", flag)) { g_wreq->no_cpu = flag; } } @@ -342,7 +342,7 @@ static void get_host_info() { double expavg_time = g_reply->host.expavg_time; update_average(0, 0, CREDIT_HALF_LIFE, expavg_credit, expavg_time); - // Platforms other then Windows, Linux and Intel Macs need a + // Platforms other then Windows, Linux and Intel Macs need a // larger set of computers to be marked reliable // double multiplier = 1.0; @@ -351,9 +351,9 @@ static void get_host_info() { || (strstr(g_reply->host.os_name,"Darwin") && !(strstr(g_reply->host.p_vendor,"Power Macintosh")) )) { - multiplier = 1.0; + multiplier = 1.0; } else { - multiplier = 1.8; + multiplier = 1.8; } if ( @@ -385,7 +385,7 @@ bool app_not_selected(WORKUNIT& wu) { if (g_wreq->preferred_apps.size() == 0) return false; for (i=0; ipreferred_apps.size(); i++) { if (wu.appid == g_wreq->preferred_apps[i].appid) { - g_wreq->preferred_apps[i].work_available = true; + g_wreq->preferred_apps[i].work_available = true; return false; } } @@ -898,22 +898,22 @@ int add_result_to_reply( // if (config.reliable_on_priority && result.priority >= config.reliable_on_priority && config.reliable_reduced_delay_bound > 0.01 ) { - double reduced_delay_bound = delay_bound*config.reliable_reduced_delay_bound; - double est_wallclock_duration = estimate_duration(wu, *bavp); + double reduced_delay_bound = delay_bound*config.reliable_reduced_delay_bound; + double est_wallclock_duration = estimate_duration(wu, *bavp); // Check to see how reasonable this reduced time is. // Increase it to twice the estimated delay bound // if all the following apply: // - // 1) Twice the estimate is longer then the reduced delay bound - // 2) Twice the estimate is less then the original delay bound - // 3) Twice the estimate is less then the twice the reduced delay bound - if (est_wallclock_duration*2 > reduced_delay_bound + // 1) Twice the estimate is longer then the reduced delay bound + // 2) Twice the estimate is less then the original delay bound + // 3) Twice the estimate is less then the twice the reduced delay bound + if (est_wallclock_duration*2 > reduced_delay_bound && est_wallclock_duration*2 < delay_bound && est_wallclock_duration*2 < delay_bound*config.reliable_reduced_delay_bound*2 ) { - reduced_delay_bound = est_wallclock_duration*2; + reduced_delay_bound = est_wallclock_duration*2; } - delay_bound = (int) reduced_delay_bound; + delay_bound = (int) reduced_delay_bound; } result.report_deadline = result.sent_time + delay_bound; @@ -1131,23 +1131,23 @@ static void explain_to_user() { // Tell the user about applications with no work // for (i=0; ipreferred_apps.size(); i++) { - if (!g_wreq->preferred_apps[i].work_available) { - APP* app = ssp->lookup_app(g_wreq->preferred_apps[i].appid); - // don't write message if the app is deprecated - if (app != NULL) { - char explanation[256]; - sprintf(explanation, "No work is available for %s", + if (!g_wreq->preferred_apps[i].work_available) { + APP* app = ssp->lookup_app(g_wreq->preferred_apps[i].appid); + // don't write message if the app is deprecated + if (app != NULL) { + char explanation[256]; + sprintf(explanation, "No work is available for %s", find_user_friendly_name(g_wreq->preferred_apps[i].appid) ); - g_reply->insert_message(USER_MESSAGE(explanation, "high")); - } - } + g_reply->insert_message(USER_MESSAGE(explanation, "high")); + } + } } // Tell the user about applications they didn't qualify for // for (i=0; ino_work_messages.size(); i++){ - g_reply->insert_message(g_wreq->no_work_messages.at(i)); + g_reply->insert_message(g_wreq->no_work_messages.at(i)); } if (g_wreq->no_allowed_apps_available) { g_reply->insert_message( @@ -1461,11 +1461,19 @@ void send_work() { if (config.locality_scheduler_fraction > 0) { if (drand() < config.locality_scheduler_fraction) { - send_work_locality(); - send_work_old(); + if (config.debug_locality) + log_messages.printf(MSG_NORMAL, "[mixed] sending locality work first\n"); + send_work_locality(); + if (config.debug_locality) + log_messages.printf(MSG_NORMAL, "[mixed] sending non-locality work second\n"); + send_work_old(); } else { - send_work_old(); - send_work_locality(); + if (config.debug_locality) + log_messages.printf(MSG_NORMAL, "[mixed] sending non-locality work first\n"); + send_work_old(); + if (config.debug_locality) + log_messages.printf(MSG_NORMAL, "[mixed] sending locality work second\n"); + send_work_locality(); } } else if (config.locality_scheduling) { send_work_locality();