diff --git a/checkin_notes b/checkin_notes index adc3c45d49..22fdd5cbbb 100644 --- a/checkin_notes +++ b/checkin_notes @@ -2123,3 +2123,28 @@ David Mar 9 2008 AdvancedFrame.cpp lib/ util.C + +David Mar 10 2008 + - server: fix botched checkin from 7 March. + The new field (workunit.rsc_bandwidth_bound) + goes at the END of the record. + Always do it this way! + - make_work: after creating a batch of new WUs, + we were waiting 60 sec for the transitioner to + create the results for them + (so that our next count of unsent results would be correct). + This is bogus; if e.g. the transitioner isn't running, + we'll never get the results, and we'll keep creating WUs forever. + + Instead: explicitly wait for there to be results for + the last WU from the batch just created. + + - scheduler: parse , correctly. + + db/ + schema.sql + html/ops/ + db_update.php + sched/ + make_work.C + send.C diff --git a/db/schema.sql b/db/schema.sql index 294fe724b7..80ffb397b6 100644 --- a/db/schema.sql +++ b/db/schema.sql @@ -192,7 +192,6 @@ create table workunit ( rsc_fpops_bound double not null, rsc_memory_bound double not null, rsc_disk_bound double not null, - rsc_bandwidth_bound double not null, need_validate smallint not null, canonical_resultid integer not null, canonical_credit double not null, @@ -210,6 +209,7 @@ create table workunit ( max_success_results integer not null, result_template_file varchar(63) not null, priority integer not null, + rsc_bandwidth_bound double not null, mod_time timestamp, primary key (id) ) engine=InnoDB; diff --git a/html/ops/db_update.php b/html/ops/db_update.php index 7c67258362..f21d51dac4 100755 --- a/html/ops/db_update.php +++ b/html/ops/db_update.php @@ -555,15 +555,24 @@ function update_2_18_2008() { "); } +// If you haven't done 3_7, skip both of the following: +// function update_3_7_2008() { do_query("alter table workunit add column rsc_bandwidth_bound double not null after rsc_disk_bound"); } +function update_3_7_undo_2008() { + do_query("alter table workunit drop column rsc_bandwidth_bound"); +} + +function update_3_10_2008() { + do_query("alter table workunit add column rsc_bandwidth_bound double not null"); +} // modify the following to call the function you want. // Make sure you do all needed functions, in order. // (Look at your DB structure using "explain" queries to see // which ones you need). -//update_2_18_2008(); +update_3_10_2008(); ?> diff --git a/sched/make_work.C b/sched/make_work.C index 04bd3c8755..25d9bf5221 100644 --- a/sched/make_work.C +++ b/sched/make_work.C @@ -145,6 +145,27 @@ void make_new_wu(DB_WORKUNIT& original_wu, char* starting_xml, int start_time) { ); } +// wait for the transitioner to create a result for the given WU. +// This keeps us from getting infinitely far ahead of the transitioner +// (e.g. if the transitioner isn't running) +// +void wait_for_results(int wu_id) { + DB_RESULT result; + int count, retval; + char buf[256]; + + sprintf(buf, "where workunitid=%d", wu_id); + while (1) { + retval = result.count(count, buf); + if (retval) { + log_messages.printf(MSG_CRITICAL, "result.count: %d\n", retval); + exit(1); + } + if (count > 0) return; + sleep(10); + } +} + void make_work(vector &wu_names) { int retval, start_time=time(0); char keypath[256]; @@ -218,6 +239,7 @@ void make_work(vector &wu_names) { int results_needed = cushion - unsent_results; + int new_wu_id = 0; while (1) { DB_WORKUNIT& wu = wus[index++]; if (index == nwu_names) index=0; @@ -229,14 +251,14 @@ void make_work(vector &wu_names) { total_wus++; } make_new_wu(wu, wu.xml_doc, start_time); + new_wu_id = wu.id; results_needed -= wu.target_nresults; if (results_needed <= 0) break; } if (one_pass) break; - // wait a while for the transitioner to make results - // - sleep(60); + + wait_for_results(new_wu_id); } } diff --git a/sched/sched_send.C b/sched/sched_send.C index 52eaa9a879..7a3702e0f9 100644 --- a/sched/sched_send.C +++ b/sched/sched_send.C @@ -287,6 +287,7 @@ static int get_host_info(SCHEDULER_REPLY& reply) { std::string str; unsigned int pos = 0; int temp_int; + bool flag; extract_venue(reply.user.project_prefs, reply.host.venue, buf); str = buf; @@ -303,11 +304,11 @@ static int get_host_info(SCHEDULER_REPLY& reply) { pos = str.find("", pos) + 1; } - if (parse_int(buf,"", temp_int)) { - reply.wreq.host_info.allow_non_preferred_apps = true; + if (parse_bool(buf,"", flag)) { + reply.wreq.host_info.allow_non_preferred_apps = flag; } - if (parse_int(buf,"", temp_int)) { - reply.wreq.host_info.allow_beta_work = true; + if (parse_bool(buf,"", flag)) { + reply.wreq.host_info.allow_beta_work = flag; } // Decide whether or not this computer is a 'reliable' computer