diff --git a/checkin_notes b/checkin_notes index 269786972e..f29e1834a1 100755 --- a/checkin_notes +++ b/checkin_notes @@ -6045,3 +6045,31 @@ Korpela 2003/09/03 api/gutil.C configure.ac + +David Sept 3 2003 + - Changed the way a WU's resources usage is described. + There are now separate estimates and bounds for FP ops. + Nothing about integer ops. + Disk and memory numbers are bounds. + + This changes the client/server RPC format and the DB format, + so it will require a major version number increment + + client/ + app.C + client_state.C + client_types.C,h + http.C + net_xfer.C + db/ + boinc_db.C,h + schema.sql + lib/ + crypt.C,h + sched/ + handle_request.C + test/ + test_rsc.py + testbase.py + tools/ + create_work.C diff --git a/client/app.C b/client/app.C index 3d5b588700..6f35d7d7dd 100644 --- a/client/app.C +++ b/client/app.C @@ -124,8 +124,8 @@ int ACTIVE_TASK::init(RESULT* rp) { wup = rp->wup; app_version = wup->avp; max_cpu_time = gstate.estimate_cpu_time(*rp->wup)*2; - max_disk_usage = rp->wup->rsc_disk; - max_mem_usage = rp->wup->rsc_memory; + max_disk_usage = rp->wup->rsc_disk_bound; + max_mem_usage = rp->wup->rsc_memory_bound; return 0; } diff --git a/client/client_state.C b/client/client_state.C index 4b76f858c3..20fe9fc279 100644 --- a/client/client_state.C +++ b/client/client_state.C @@ -225,7 +225,7 @@ int CLIENT_STATE::init() { if (show_projects) { printf("projects:\n"); for (i=0; imaster_url, projects[i]->project_name ); } @@ -236,8 +236,9 @@ int CLIENT_STATE::init() { PROJECT* project = lookup_project(detach_project_url); if (project) { detach_project(project); + msg_printf(project, MSG_INFO, "detached from %s\n", detach_project_url); } else { - printf("project %s not found\n", detach_project_url); + msg_printf(NULL, MSG_ERROR, "project %s not found\n", detach_project_url); } exit(0); } @@ -246,9 +247,9 @@ int CLIENT_STATE::init() { PROJECT* project = lookup_project(reset_project_url); if (project) { reset_project(project); - msg_printf(project, MSG_INFO, "Project has been reset"); + msg_printf(project, MSG_INFO, "Project %s has been reset", reset_project_url); } else { - printf("project %s not found\n", reset_project_url); + msg_printf(NULL, MSG_ERROR, "project %s not found\n", reset_project_url); } exit(0); } @@ -258,7 +259,7 @@ int CLIENT_STATE::init() { if (project) { project->sched_rpc_pending = true; } else { - printf("project %s not found\n", update_prefs_url); + msg_printf(NULL, MSG_ERROR, "project %s not found\n", update_prefs_url); } } @@ -520,27 +521,24 @@ int CLIENT_STATE::current_disk_usage(double& size) { double CLIENT_STATE::estimate_cpu_time(WORKUNIT& wu) { double x; - x = wu.rsc_fpops/host_info.p_fpops; - x += wu.rsc_iops/host_info.p_iops; + x = wu.rsc_fpops_est/host_info.p_fpops; return x; } -inline double force_fraction(double f) -{ +inline double force_fraction(double f) { if (f < 0) return 0; if (f > 1) return 1; return f; } -double CLIENT_STATE::get_percent_done(RESULT* result) -{ +double CLIENT_STATE::get_percent_done(RESULT* result) { ACTIVE_TASK* atp = active_tasks.lookup_result(result); return atp ? force_fraction(atp->fraction_done) : 0.0; } // returns true if start_hour == end_hour or start_hour <= now < end_hour -inline bool now_between_two_hours(int start_hour, int end_hour) -{ +// +inline bool now_between_two_hours(int start_hour, int end_hour) { if (start_hour == end_hour) { // always work return true; diff --git a/client/client_types.C b/client/client_types.C index 03d7c7bd93..e40e296e56 100644 --- a/client/client_types.C +++ b/client/client_types.C @@ -672,10 +672,10 @@ int WORKUNIT::parse(FILE* in) { project = NULL; // Default these to very large values (1 week on a 1 cobblestone machine) // so we don't keep asking the server for more work - rsc_fpops = 1e9*SECONDS_PER_DAY*7; - rsc_iops = 1e9*SECONDS_PER_DAY*7; - rsc_memory = 4e9*SECONDS_PER_DAY*7; - rsc_disk = 1024*1024*1024; // 1 GB + rsc_fpops_est = 1e9*SECONDS_PER_DAY*7; + rsc_fpops_bound = 4e9*SECONDS_PER_DAY*7; + rsc_memory_bound = 1e8; + rsc_disk_bound = 1e9; while (fgets(buf, 256, in)) { if (match_tag(buf, "")) return 0; else if (parse_str(buf, "", name, sizeof(name))) continue; @@ -683,10 +683,10 @@ int WORKUNIT::parse(FILE* in) { else if (parse_int(buf, "", version_num)) continue; else if (parse_str(buf, "", command_line, sizeof(command_line))) continue; else if (parse_str(buf, "", env_vars, sizeof(env_vars))) continue; - else if (parse_double(buf, "", rsc_fpops)) continue; - else if (parse_double(buf, "", rsc_iops)) continue; - else if (parse_double(buf, "", rsc_memory)) continue; - else if (parse_double(buf, "", rsc_disk)) continue; + else if (parse_double(buf, "", rsc_fpops_est)) continue; + else if (parse_double(buf, "", rsc_fpops_bound)) continue; + else if (parse_double(buf, "", rsc_memory_bound)) continue; + else if (parse_double(buf, "", rsc_disk_bound)) continue; else if (match_tag(buf, "")) { file_ref.parse(in); input_files.push_back(file_ref); @@ -707,19 +707,19 @@ int WORKUNIT::write(FILE* out) { " %d\n" " %s\n" " %s\n" - " %f\n" - " %f\n" - " %f\n" - " %f\n", + " %f\n" + " %f\n" + " %f\n" + " %f\n", name, app_name, version_num, command_line, env_vars, - rsc_fpops, - rsc_iops, - rsc_memory, - rsc_disk + rsc_fpops_est, + rsc_fpops_bound, + rsc_memory_bound, + rsc_disk_bound ); for (i=0; iopen_server(); + retval = htp->open_server(); + if (retval) { + htp->http_op_state = HTTP_STATE_DONE; + htp->http_op_retval = retval; + } break; } if ((htp->hrh.status/100)*100 != HTTP_STATUS_OK) { diff --git a/client/net_xfer.C b/client/net_xfer.C index 65dfd1f0ab..749107ea79 100644 --- a/client/net_xfer.C +++ b/client/net_xfer.C @@ -73,7 +73,7 @@ typedef int socklen_t; typedef size_t socklen_t; #endif -int NET_XFER::get_ip_addr( char *hostname, int &ip_addr ) { +int NET_XFER::get_ip_addr(char *hostname, int &ip_addr) { hostent* hep; #ifdef _WIN32 @@ -121,7 +121,7 @@ int NET_XFER::get_ip_addr( char *hostname, int &ip_addr ) { switch (h_errno) { case HOST_NOT_FOUND: - sprintf(msg+n, "(authoritative answer not found)"); + sprintf(msg+n, "(host not found)"); break; case NO_DATA: sprintf(msg+n, "(valid name, no data record of requested type)"); @@ -130,7 +130,7 @@ int NET_XFER::get_ip_addr( char *hostname, int &ip_addr ) { sprintf(msg+n, "(a nonrecoverable error occurred)"); break; case TRY_AGAIN: - sprintf(msg+n, "(nonauthoritative host not found, or server failure)"); + sprintf(msg+n, "(host not found or server failure)"); break; } diff --git a/db/boinc_db.C b/db/boinc_db.C index 9592ce91d7..cf2dbf60bd 100644 --- a/db/boinc_db.C +++ b/db/boinc_db.C @@ -414,7 +414,8 @@ void DB_WORKUNIT::db_print(char* buf){ sprintf(buf, "id=%d, create_time=%d, appid=%d, " "name='%s', xml_doc='%s', batch=%d, " - "rsc_fpops=%.15e, rsc_iops=%.15e, rsc_memory=%.15e, rsc_disk=%.15e, " + "rsc_fpops_est=%.15e, rsc_fpops_bound=%.15e, " + "rsc_memory_bound=%.15e, rsc_disk_bound=%.15e, " "need_validate=%d, " "canonical_resultid=%d, canonical_credit=%.15e, " "transition_time=%d, delay_bound=%d, " @@ -425,7 +426,7 @@ void DB_WORKUNIT::db_print(char* buf){ "result_template='%s'", id, create_time, appid, name, xml_doc, batch, - rsc_fpops, rsc_iops, rsc_memory, rsc_disk, + rsc_fpops_est, rsc_fpops_bound, rsc_memory_bound, rsc_disk_bound, need_validate, canonical_resultid, canonical_credit, transition_time, delay_bound, @@ -449,10 +450,10 @@ void DB_WORKUNIT::db_parse(MYSQL_ROW &r) { strcpy2(name, r[i++]); strcpy2(xml_doc, r[i++]); batch = atoi(r[i++]); - rsc_fpops = atof(r[i++]); - rsc_iops = atof(r[i++]); - rsc_memory = atof(r[i++]); - rsc_disk = atof(r[i++]); + rsc_fpops_est = atof(r[i++]); + rsc_fpops_bound = atof(r[i++]); + rsc_memory_bound = atof(r[i++]); + rsc_disk_bound = atof(r[i++]); need_validate = atoi(r[i++]); canonical_resultid = atoi(r[i++]); canonical_credit = atof(r[i++]); diff --git a/db/boinc_db.h b/db/boinc_db.h index 1fcdb87d80..4a9538eb3d 100755 --- a/db/boinc_db.h +++ b/db/boinc_db.h @@ -265,24 +265,19 @@ struct WORKUNIT { char name[256]; char xml_doc[MAX_BLOB_SIZE]; int batch; - double rsc_fpops; // estimated # of FP operations - double rsc_iops; // estimated # of integer operations - // The above two items are used for 2 purposes: - // 1) to estimate how long a result will take on a host - // for scheduling purposes; - // 2) to calculate an upper bound on the CPU time for a result - // before it is aborted. - // Currently this is twice the estimated CPU time. - // At some point we might want to have separate "max rsc" fields - double rsc_memory; // estimated size of RAM working set (bytes) + double rsc_fpops_est; // estimated # of FP operations + // used to estimate how long a result will take on a host + double rsc_fpops_bound; // upper bound on # of FP ops + // used to calculate an upper bound on the CPU time for a result + // before it is aborted. + double rsc_memory_bound; // upper bound on RAM working set (bytes) // currently used only by scheduler to screen hosts // At some point, could use as runtime limit - double rsc_disk; // estimated amount of disk needed (bytes) + double rsc_disk_bound; // upper bound on amount of disk needed (bytes) // (including input, output and temp files, but NOT the app) - // This is used for 2 purposes: + // used for 2 purposes: // 1) for scheduling (don't send this WU to a host w/ insuff. disk) - // 2) upper bound (abort task if it uses more than this disk) - // At some point we might want to have separate "max" fields + // 2) abort task if it uses more than this disk bool need_validate; // this WU has at least 1 result in // validate state = NEED_CHECK int canonical_resultid; // ID of canonical result, or zero diff --git a/db/schema.sql b/db/schema.sql index f7cd578475..9a46ef9e88 100644 --- a/db/schema.sql +++ b/db/schema.sql @@ -150,10 +150,10 @@ create table workunit ( name varchar(254) not null, xml_doc blob, batch integer not null, - rsc_fpops double not null, - rsc_iops double not null, - rsc_memory double not null, - rsc_disk double not null, + rsc_fpops_est double not null, + rsc_fpops_bound double not null, + rsc_memory_bound double not null, + rsc_disk_bound double not null, need_validate smallint not null, canonical_resultid integer not null, canonical_credit double not null, diff --git a/lib/crypt.h b/lib/crypt.h index a32f7bbb58..0574e07348 100644 --- a/lib/crypt.h +++ b/lib/crypt.h @@ -50,7 +50,6 @@ struct DATA_BLOCK { int print_hex_data(FILE* f, DATA_BLOCK&); int sprint_hex_data(char* p, DATA_BLOCK&); int scan_hex_data(FILE* f, DATA_BLOCK&); -int sscan_hex_data(char* p, DATA_BLOCK&); int print_key_hex(FILE*, KEY* key, int len); int scan_key_hex(FILE*, KEY* key, int len); int sscan_key_hex(char*, KEY* key, int len); diff --git a/sched/handle_request.C b/sched/handle_request.C index 925a7a38fe..3729d96985 100644 --- a/sched/handle_request.C +++ b/sched/handle_request.C @@ -54,10 +54,8 @@ const double HOST_ACTIVE_FRAC_MIN = 0.5; // inline double estimate_duration(WORKUNIT& wu, HOST& host) { if (host.p_fpops <= 0) host.p_fpops = 1e9; - if (host.p_iops <= 0) host.p_iops = 1e9; - if (wu.rsc_fpops <= 0) wu.rsc_fpops = 1e12; - if (wu.rsc_iops <= 0) wu.rsc_iops = 1e12; - return wu.rsc_fpops/host.p_fpops + wu.rsc_iops/host.p_iops; + if (wu.rsc_fpops_est <= 0) wu.rsc_fpops_est = 1e12; + return wu.rsc_fpops_est/host.p_fpops; } // estimate the amount of real time for this WU based on active_frac and @@ -74,17 +72,17 @@ inline double estimate_wallclock_duration(WORKUNIT& wu, HOST& host) { // return true if the WU can be executed on the host // bool wu_is_feasible(WORKUNIT& wu, HOST& host) { - if(host.d_free && wu.rsc_disk > host.d_free) { + if(host.d_free && wu.rsc_disk_bound > host.d_free) { log_messages.printf( SchedMessages::DEBUG, "[WU#%d %s] needs %f disk; [HOST#%d] has %f\n", - wu.id, wu.name, wu.rsc_disk, host.id, host.d_free + wu.id, wu.name, wu.rsc_disk_bound, host.id, host.d_free ); return false; } - if (host.m_nbytes && wu.rsc_memory > host.m_nbytes) { + if (host.m_nbytes && wu.rsc_memory_bound > host.m_nbytes) { log_messages.printf( SchedMessages::DEBUG, "[WU#%d %s] needs %f mem; [HOST#%d] has %f\n", - wu.id, wu.name, wu.rsc_memory, host.id, host.m_nbytes + wu.id, wu.name, wu.rsc_memory_bound, host.id, host.m_nbytes ); return false; } @@ -132,16 +130,16 @@ int insert_wu_tags(WORKUNIT& wu, APP& app) { char buf[256]; sprintf(buf, - " %f\n" - " %f\n" - " %f\n" - " %f\n" + " %f\n" + " %f\n" + " %f\n" + " %f\n" " %s\n" " %s\n", - wu.rsc_fpops, - wu.rsc_iops, - wu.rsc_memory, - wu.rsc_disk, + wu.rsc_fpops_est, + wu.rsc_fpops_bound, + wu.rsc_memory_bound, + wu.rsc_disk_bound, wu.name, app.name ); diff --git a/test/test_rsc.py b/test/test_rsc.py index 16eed9b720..a5b4605d27 100755 --- a/test/test_rsc.py +++ b/test/test_rsc.py @@ -9,7 +9,7 @@ from test_uc import * class WorkTooBig(WorkUC): def __init__(self): WorkUC.__init__(self) - self.rsc_disk = 1000000000000 # 1 TB + self.rsc_disk_bound = 1000000000000 # 1 TB class ResultUnsent: def __init__(self): diff --git a/test/testbase.py b/test/testbase.py index 1b96ff30fa..77e9cfb11e 100644 --- a/test/testbase.py +++ b/test/testbase.py @@ -524,10 +524,10 @@ class Host: class Work: def __init__(self, redundancy, **kwargs): self.input_files = [] - self.rsc_iops = 1.8e12 - self.rsc_fpops = 1e13 - self.rsc_memory = 1e7 - self.rsc_disk = 1e7 + self.rsc_fpops_est = 1e13 + self.rsc_fpops_bound = 4e13 + self.rsc_memory_bound = 1e7 + self.rsc_disk_bound = 1e7 self.delay_bound = 86400 if not isinstance(redundancy, int): raise TypeError @@ -576,9 +576,10 @@ class Work: download_url = project.download_url, keyfile = os.path.join(project.key_dir,'upload_private'), appname = self.app.name, - rsc_iops = self.rsc_iops, - rsc_fpops = self.rsc_fpops, - rsc_disk = self.rsc_disk, + rsc_fpops_est = self.rsc_fpops_est, + rsc_fpops_bound = self.rsc_fpops_bound, + rsc_disk_bound = self.rsc_disk_bound, + rsc_memory_bound = self.rsc_memory_bound, wu_template = self.wu_template, result_template = self.result_template, min_quorum = self.min_quorum, diff --git a/tools/create_work.C b/tools/create_work.C index df135b9e0a..8f446e7965 100644 --- a/tools/create_work.C +++ b/tools/create_work.C @@ -28,10 +28,10 @@ // [ -download_url x ] // [ -download_dir x ] // [ -keyfile path ] -// -rsc_fpops n -// -rsc_iops n -// -rsc_memory n -// -rsc_disk n +// -rsc_fpops_est n +// -rsc_fpops_bound n +// -rsc_memory_bound n +// -rsc_disk_bound n // -delay_bound x // [ -min_quorum x ] // [ -target_nresults x ] @@ -119,14 +119,14 @@ int main(int argc, char** argv) { strcpy(wu_template_file, argv[++i]); } else if (!strcmp(argv[i], "-result_template")) { strcpy(result_template_file, argv[++i]); - } else if (!strcmp(argv[i], "-rsc_fpops")) { - wu.rsc_fpops = atof(argv[++i]); - } else if (!strcmp(argv[i], "-rsc_iops")) { - wu.rsc_iops = atof(argv[++i]); - } else if (!strcmp(argv[i], "-rsc_memory")) { - wu.rsc_memory = atof(argv[++i]); - } else if (!strcmp(argv[i], "-rsc_disk")) { - wu.rsc_disk = atof(argv[++i]); + } else if (!strcmp(argv[i], "-rsc_fpops_est")) { + wu.rsc_fpops_est = atof(argv[++i]); + } else if (!strcmp(argv[i], "-rsc_fpops_bound")) { + wu.rsc_fpops_bound = atof(argv[++i]); + } else if (!strcmp(argv[i], "-rsc_memory_bound")) { + wu.rsc_memory_bound = atof(argv[++i]); + } else if (!strcmp(argv[i], "-rsc_disk_bound")) { + wu.rsc_disk_bound = atof(argv[++i]); } else if (!strcmp(argv[i], "-keyfile")) { strcpy(keyfile, argv[++i]); } else if (!strcmp(argv[i], "-delay_bound")) {