From 4bc558ffc891b94f45543e76d53cd138486cc091 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Tue, 4 Oct 2005 21:44:58 +0000 Subject: [PATCH] disk usage fixes svn path=/trunk/boinc/; revision=8442 --- checkin_notes | 66 ++++++++++++++++++++++++++++++++++ client/client_state.h | 6 ++-- client/cs_prefs.C | 5 ++- client/cs_scheduler.C | 65 +++++++++++++++++++++++----------- client/hostinfo_unix.C | 15 ++++---- db/boinc_db.h | 30 +++++++++++----- lib/gui_rpc_client_ops.C | 76 ---------------------------------------- lib/hostinfo.C | 27 +------------- lib/hostinfo.h | 15 ++++---- sched/handle_request.C | 2 ++ sched/sched_send.C | 72 ++++++++++++++++++------------------- sched/server_types.C | 59 +++++++++++++++++++------------ sched/server_types.h | 18 +++------- 13 files changed, 234 insertions(+), 222 deletions(-) diff --git a/checkin_notes b/checkin_notes index 996b67e16c..12cbf3d6c0 100755 --- a/checkin_notes +++ b/checkin_notes @@ -12665,3 +12665,69 @@ Bruce 4 October 2005 sched/ file_upload_handler.C +David 4 October 2005 + - Fix problems with disk space management code, which can cause + - violation of user disk usage prefs + - server sends clients lots of results + even though client has zero free disk space, + and results will immediately error out. + OLD: + client: scheduler requests include disk space info + (d_total and d_free), + but they're the values at client startup, + which may be way out of date! + (e.g. free space may now be zero) + server: + Expected in request message. + This was removed a long time ago. + The formula for computing max additional disk usage was wrong. + NEW: + client: scheduler requests includes + - up-to-date values for d_total and d_free + - up-to-date values for BOINC and project disk usage: + , + server: + Parse new scheduler request fields. + If new fields are present, + calculate max additional disk usage based on them. + - Change to scheduler RPC requests: + OLD: + is sent, but it's actually + potentially runnable resource share fraction. + (Didn't matter; it's not currently used by server) + NEW: + (total resource share fraction) + (runnable resource share fraction) + (potentially runnable resource share fraction) + ... are all sent. + Future versions of the server may use them for + disk and/or CPU allocation> + - Commented out or deleted some unused code, e.g. all references to + p_fpop_err, p_iop_err, p_membw_err + + NOTES: + 1) If old client talks to new server, + only the "min free" preference will be enforced. + 2) If old or new client talks to old server, + no preferences are reliably enforced + 3) If new client talks to new server, + all preferences are enforced, + BUT project resource shares are not enforced. + E.g. one project might hog all the disk space + even though it's resource share is small and + other projects need space. + Need to figure out how to address this. + + client/ + client_state.h + cs_prefs.C + cs_scheduler.C + hostinfo_unix.C + db/ + boinc_db.h + lib/ + gui_rpc_client_ops.C + hostinfo.C,h + sched/ + sched_send.C + server_types.C,h diff --git a/client/client_state.h b/client/client_state.h index 507c1dea05..ac4590aa4b 100644 --- a/client/client_state.h +++ b/client/client_state.h @@ -218,9 +218,6 @@ private: bool garbage_collect(); bool garbage_collect_always(); bool update_results(); - double total_resource_share(); - double runnable_resource_share(); - double potentially_runnable_resource_share(); // --------------- cs_account.C: public: @@ -236,6 +233,9 @@ private: private: void adjust_debts(); bool must_schedule_cpus; + double total_resource_share(); + double runnable_resource_share(); + double potentially_runnable_resource_share(); public: void request_schedule_cpus(const char*); // Reschedule CPUs ASAP. Called when: diff --git a/client/cs_prefs.C b/client/cs_prefs.C index 6a92c80c18..46b22f7fa8 100644 --- a/client/cs_prefs.C +++ b/client/cs_prefs.C @@ -60,6 +60,7 @@ void CLIENT_STATE::install_global_prefs() { set_ncpus(); } +#if 0 // Return the maximum allowed disk usage as determined by user preferences. // There are three different settings in the prefs; // return the least of the three. @@ -75,6 +76,7 @@ int CLIENT_STATE::allowed_disk_usage(double& size) { if (size < 0) size = 0; return 0; } +#endif int CLIENT_STATE::project_disk_usage(PROJECT* p, double& size) { char buf[256]; @@ -99,6 +101,7 @@ int CLIENT_STATE::total_disk_usage(double& size) { return dir_size(".", size); } +#if 0 int CLIENT_STATE::allowed_project_disk_usage(double& size) { double other_disk_used; double total_disk_available; @@ -112,7 +115,7 @@ int CLIENT_STATE::allowed_project_disk_usage(double& size) { size = total_disk_available - other_disk_used; return 0; } - +#endif // returns true if start_hour == end_hour or start_hour <= now < end_hour // diff --git a/client/cs_scheduler.C b/client/cs_scheduler.C index 2665b45580..7adc8c5808 100644 --- a/client/cs_scheduler.C +++ b/client/cs_scheduler.C @@ -210,28 +210,38 @@ PROJECT* CLIENT_STATE::next_project_need_work() { return p_prospect; } -// Write a scheduler request to a disk file -// (later sent to the scheduling server) +// Write a scheduler request to a disk file, +// to be sent to a scheduling server // int CLIENT_STATE::make_scheduler_request(PROJECT* p) { char buf[1024]; - - get_sched_request_filename(*p, buf); - FILE* f = boinc_fopen(buf, "wb"); MIOFILE mf; unsigned int i; RESULT* rp; int retval; -#if 0 - double free, possible; -#endif + double disk_total, disk_project; + get_sched_request_filename(*p, buf); + FILE* f = boinc_fopen(buf, "wb"); + + double trs = total_resource_share(); + double rrs = runnable_resource_share(); double prrs = potentially_runnable_resource_share(); - double resource_share_fraction; - if (prrs) { - resource_share_fraction = p->resource_share / prrs; + double resource_share_fraction, rrs_fraction, prrs_fraction; + if (trs) { + resource_share_fraction = p->resource_share / trs; } else { - resource_share_fraction = 1; // TODO - fix + resource_share_fraction = 1; + } + if (rrs) { + rrs_fraction = p->resource_share / rrs; + } else { + rrs_fraction = 1; + } + if (prrs) { + prrs_fraction = p->resource_share / prrs; + } else { + prrs_fraction = 1; } if (!f) return ERR_FOPEN; @@ -247,6 +257,8 @@ int CLIENT_STATE::make_scheduler_request(PROJECT* p) { " %d\n" " %f\n" " %f\n" + " %f\n" + " %f\n" " %f\n" " %f\n", p->authenticator, @@ -258,6 +270,8 @@ int CLIENT_STATE::make_scheduler_request(PROJECT* p) { core_client_release, p->work_request, resource_share_fraction, + rrs_fraction, + prrs_fraction, time_until_work_done(p, proj_min_results(p, prrs)-1, prrs), p->duration_correction_factor ); @@ -270,14 +284,6 @@ int CLIENT_STATE::make_scheduler_request(PROJECT* p) { } fprintf(f, " \n"); } -#if 0 - anything_free(free); - fprintf(f, " %f\n", free); - total_potential_offender(p, possible); - fprintf(f, " %f\n", possible); - total_potential_self(p, possible); - fprintf(f, " %f\n", possible); -#endif if (strlen(p->code_sign_key)) { fprintf(f, " \n%s\n", p->code_sign_key); } @@ -325,8 +331,27 @@ int CLIENT_STATE::make_scheduler_request(PROJECT* p) { if (retval) return retval; retval = net_stats.write(mf); if (retval) return retval; + + // update disk usage, and write host info + // + get_filesystem_info(host_info.d_total, host_info.d_free); retval = host_info.write(mf); if (retval) return retval; + + // get and write disk usage + // + total_disk_usage(disk_total); + project_disk_usage(p, disk_project); + fprintf(f, + " \n" + " %f\n" + " %f\n" + " \n", + disk_total, disk_project + ); + + // report results + // p->nresults_returned = 0; for (i=0; i")) return 0; - else if (parse_int(buf, "", timezone)) continue; - else if (parse_str(buf, "", domain_name, sizeof(domain_name))) continue; - else if (parse_str(buf, "", ip_addr, sizeof(ip_addr))) continue; - else if (parse_int(buf, "", p_ncpus)) continue; - else if (parse_str(buf, "", p_vendor, sizeof(p_vendor))) continue; - else if (parse_str(buf, "", p_model, sizeof(p_model))) continue; - else if (parse_double(buf, "", p_fpops)) { - // fix foolishness that could result in negative value here - // - if (p_fpops < 0) p_fpops = -p_fpops; - continue; - } - else if (parse_double(buf, "", p_iops)) { - if (p_iops < 0) p_iops = -p_iops; - continue; - } - else if (parse_double(buf, "", p_membw)) { - if (p_membw < 0) p_membw = -p_membw; - continue; - } - else if (parse_int(buf, "", p_fpop_err)) continue; - else if (parse_int(buf, "", p_iop_err)) continue; - else if (parse_int(buf, "", p_membw_err)) continue; - else if (parse_double(buf, "", p_calculated)) continue; - else if (parse_str(buf, "", os_name, sizeof(os_name))) continue; - else if (parse_str(buf, "", os_version, sizeof(os_version))) continue; - else if (parse_double(buf, "", m_nbytes)) continue; - else if (parse_double(buf, "", m_cache)) continue; - else if (parse_double(buf, "", m_swap)) continue; - else if (parse_double(buf, "", d_total)) continue; - else if (parse_double(buf, "", d_free)) continue; - } - return 0; -} - -void HOST_INFO::clear() { - timezone = 0; - strcpy(domain_name, ""); - strcpy(serialnum, ""); - strcpy(ip_addr, ""); - p_ncpus = 0; - strcpy(p_vendor, ""); - strcpy(p_model, ""); - p_fpops = 0.0; - p_iops = 0.0; - p_membw = 0.0; - p_fpop_err = 0; - p_iop_err = 0; - p_membw_err = 0; - p_calculated = 0.0; - strcpy(os_name, ""); - strcpy(os_version, ""); - m_nbytes = 0.0; - m_cache = 0.0; - m_swap = 0.0; - d_total = 0.0; - d_free = 0.0; -} - -#endif - CC_STATE::CC_STATE() { clear(); } diff --git a/lib/hostinfo.C b/lib/hostinfo.C index 0de2fbf1e1..42b93afdd4 100644 --- a/lib/hostinfo.C +++ b/lib/hostinfo.C @@ -41,10 +41,8 @@ HOST_INFO::HOST_INFO() { clear_host_info(); } -// Reset the host info struct to default values -// void HOST_INFO::clear_host_info() { - timezone = 0; // local STANDARD time - UTC time (in seconds) + timezone = 0; strcpy(domain_name, ""); strcpy(serialnum, ""); strcpy(ip_addr, ""); @@ -56,9 +54,6 @@ void HOST_INFO::clear_host_info() { p_fpops = 0; p_iops = 0; p_membw = 0; - p_fpop_err = 0; - p_iop_err = 0; - p_membw_err = 0; p_calculated = 0; strcpy(os_name, ""); @@ -72,8 +67,6 @@ void HOST_INFO::clear_host_info() { d_free = 0; } -// Parse the host information, usually from the client state XML file -// int HOST_INFO::parse(MIOFILE& in) { char buf[256]; @@ -101,9 +94,6 @@ int HOST_INFO::parse(MIOFILE& in) { if (p_membw < 0) p_membw = -p_membw; continue; } - else if (parse_int(buf, "", p_fpop_err)) continue; - else if (parse_int(buf, "", p_iop_err)) continue; - else if (parse_int(buf, "", p_membw_err)) continue; else if (parse_double(buf, "", p_calculated)) continue; else if (parse_str(buf, "", os_name, sizeof(os_name))) continue; else if (parse_str(buf, "", os_version, sizeof(os_version))) continue; @@ -131,9 +121,6 @@ int HOST_INFO::write(MIOFILE& out) { " %f\n" " %f\n" " %f\n" - " %d\n" - " %d\n" - " %d\n" " %f\n" " %s\n" " %s\n" @@ -153,9 +140,6 @@ int HOST_INFO::write(MIOFILE& out) { p_fpops, p_iops, p_membw, - p_fpop_err, - p_iop_err, - p_membw_err, p_calculated, os_name, os_version, @@ -182,9 +166,6 @@ int HOST_INFO::parse_cpu_benchmarks(FILE* in) { else if (parse_double(buf, "", p_fpops)) continue; else if (parse_double(buf, "", p_iops)) continue; else if (parse_double(buf, "", p_membw)) continue; - else if (parse_int(buf, "", p_fpop_err)) continue; - else if (parse_int(buf, "", p_iop_err)) continue; - else if (parse_int(buf, "", p_membw_err)) continue; else if (parse_double(buf, "", p_calculated)) continue; else if (parse_double(buf, "", m_cache)) continue; } @@ -197,18 +178,12 @@ int HOST_INFO::write_cpu_benchmarks(FILE* out) { " %f\n" " %f\n" " %f\n" - " %d\n" - " %d\n" - " %d\n" " %f\n" " %f\n" "\n", p_fpops, p_iops, p_membw, - p_fpop_err, - p_iop_err, - p_membw_err, p_calculated, m_cache ); diff --git a/lib/hostinfo.h b/lib/hostinfo.h index 3eddcf8bd2..47c1f898e3 100644 --- a/lib/hostinfo.h +++ b/lib/hostinfo.h @@ -20,12 +20,18 @@ #ifndef _HOSTINFO_ #define _HOSTINFO_ -#include "miofile.h" - +// Description of a host's hardware and software. +// This is used a few places: +// - it's part of the client's state file, client_state.xml +// - it's passed in the reply to the get_host_info GUI RPC +// - it's included in scheduler RPC requests +// // Other host-specific info is kept in // TIME_STATS (on/connected/active fractions) // NET_STATS (average network bandwidths) +#include "miofile.h" + class HOST_INFO { public: int timezone; // local STANDARD time - UTC time (in seconds) @@ -40,10 +46,7 @@ public: double p_fpops; double p_iops; double p_membw; - int p_fpop_err; - int p_iop_err; - int p_membw_err; - double p_calculated; //needs to be initialized to zero + double p_calculated; // when benchmarks were last run, or zero char os_name[256]; char os_version[256]; diff --git a/sched/handle_request.C b/sched/handle_request.C index 830feab9b5..74f7338c41 100644 --- a/sched/handle_request.C +++ b/sched/handle_request.C @@ -341,6 +341,8 @@ static int modify_host_struct(SCHEDULER_REQUEST& sreq, HOST& host) { host.m_swap = sreq.host.m_swap; host.d_total = sreq.host.d_total; host.d_free = sreq.host.d_free; + host.d_boinc_used_total = sreq.host.d_boinc_used_total; + host.d_boinc_used_project = sreq.host.d_boinc_used_project; host.n_bwup = sreq.host.n_bwup; host.n_bwdown = sreq.host.n_bwdown; if (strlen(sreq.host.host_cpid)) { diff --git a/sched/sched_send.C b/sched/sched_send.C index 5d0f9f8c72..611cdbef52 100644 --- a/sched/sched_send.C +++ b/sched/sched_send.C @@ -107,10 +107,12 @@ int get_app_version( return 0; } -// compute the max additional disk usage we can impose on the host +// Compute the max additional disk usage we can impose on the host. +// Depending on the client version, it can either send us +// - d_total and d_free (pre 4 oct 2005) +// - the above plus d_boinc_used_total and d_boinc_used_project // double max_allowable_disk(SCHEDULER_REQUEST& req, SCHEDULER_REPLY& reply) { -#if 1 HOST host = req.host; GLOBAL_PREFS prefs = req.global_prefs; double x1, x2, x3, x; @@ -121,29 +123,43 @@ double max_allowable_disk(SCHEDULER_REQUEST& req, SCHEDULER_REPLY& reply) { if (prefs.disk_max_used_pct == 0) prefs.disk_max_used_pct = 10; // min_free_gb can be zero - // default values for BOINC disk usage (project and total) is zero - // - // no defaults for total/free disk space (host.d_total, d_free) - // if they're zero, project will get no work. + // if they're zero, client will get no work. // - x1 = prefs.disk_max_used_gb*1e9 - req.total_disk_usage; - x2 = host.d_total*prefs.disk_max_used_pct/100.; - x3 = host.d_free - prefs.disk_min_free_gb*1e9; // may be negative + if (host.d_boinc_used_total) { + // The post 4 oct 2005 case. + // Compute the max allowable additional disk usage based on prefs + // + x1 = prefs.disk_max_used_gb*1e9 - host.d_boinc_used_total; + x2 = host.d_total*prefs.disk_max_used_pct/100. + - host.d_boinc_used_total; + x3 = host.d_free - prefs.disk_min_free_gb*1e9; // may be negative + x = min(x1, min(x2, x3)); - x = min(x1, min(x2, x3)); - - // keep track of which bound is the most stringent - // - if (x==x1) { - reply.disk_limits.max_used = x; - } else if (x==x2) { - reply.disk_limits.max_frac = x; + // see which bound is the most stringent + // + if (x==x1) { + reply.disk_limits.max_used = x; + } else if (x==x2) { + reply.disk_limits.max_frac = x; + } else { + reply.disk_limits.min_free = x; + } } else { + // here we don't know how much space BOINC is using. + // so we're kinda screwed. + // All we can do is assume that BOINC is using zero space. + // We can't honor the max_used for max_used_pct preferences. + // We can only honor the min_free pref. + // + x = host.d_free - prefs.disk_min_free_gb*1e9; // may be negative reply.disk_limits.min_free = x; + x1 = x2 = x3 = 0; } + + //if (true) { if (x < 0) { log_messages.printf( SCHED_MSG_LOG::MSG_NORMAL, @@ -153,8 +169,8 @@ double max_allowable_disk(SCHEDULER_REQUEST& req, SCHEDULER_REPLY& reply) { ); log_messages.printf( SCHED_MSG_LOG::MSG_NORMAL, - "req.total_disk_usage %f host.d_total %f host.d_free %f\n", - req.total_disk_usage, host.d_total, host.d_free + "host.d_total %f host.d_free %f host.d_boinc_used_total %f\n", + host.d_total, host.d_free, host.d_boinc_used_total ); log_messages.printf( SCHED_MSG_LOG::MSG_NORMAL, @@ -163,24 +179,6 @@ double max_allowable_disk(SCHEDULER_REQUEST& req, SCHEDULER_REPLY& reply) { ); } return x; -#else - double x1, x2, x3; - - HOST host = req.host; - - x1 = req.project_disk_free; - x2 = req.potentially_free_offender; - x3 = req.potentially_free_self; - - if (x1 < 0) { - log_messages.printf( - SCHED_MSG_LOG::MSG_NORMAL, - "req.project_disk_free_gb %f\n", - x1 - ); - } - return max(max(x1,x2), x3); -#endif } // if a host has active_frac < 0.1, assume 0.1 so we don't deprive it of work. diff --git a/sched/server_types.C b/sched/server_types.C index 36b15d2044..c7ad851eef 100644 --- a/sched/server_types.C +++ b/sched/server_types.C @@ -109,6 +109,8 @@ int SCHEDULER_REQUEST::parse(FILE* fin) { rpc_seqno = 0; work_req_seconds = 0; resource_share_fraction = 1.0; + rrs_fraction = 1.0; + prrs_fraction = 1.0; estimated_delay = 0; strcpy(global_prefs_xml, ""); strcpy(code_sign_key, ""); @@ -143,21 +145,11 @@ int SCHEDULER_REQUEST::parse(FILE* fin) { else if (parse_int(buf, "", core_client_release)) continue; else if (parse_double(buf, "", work_req_seconds)) continue; else if (parse_double(buf, "", resource_share_fraction)) continue; + else if (parse_double(buf, "", rrs_fraction)) continue; + else if (parse_double(buf, "", prrs_fraction)) continue; else if (parse_double(buf, "", estimated_delay)) continue; else if (parse_double(buf, "", host.duration_correction_factor)) continue; -// ROMW: Added these back in since we have 3.x clients who still want -// want to send us the older style for determining disk usage. -// TODO: Remove the two lines below when the 4.x way of doing things -// is completely implemented. - else if (parse_double(buf, "", project_disk_usage)) continue; - else if (parse_double(buf, "", total_disk_usage)) continue; - -#if 0 - else if (parse_double(buf, "", project_disk_free)) continue; - else if (parse_double(buf, "", potentially_free_offender)) continue; - else if (parse_double(buf, "", potentially_free_self)) continue; -#endif else if (match_tag(buf, "")) { strcpy(global_prefs_xml, "\n"); while (fgets(buf, 256, fin)) { @@ -179,6 +171,10 @@ int SCHEDULER_REQUEST::parse(FILE* fin) { host.parse_net_stats(fin); continue; } + else if (match_tag(buf, "")) { + host.parse_disk_usage(fin); + continue; + } else if (match_tag(buf, "")) { result.parse_from_client(fin); results.push_back(result); @@ -254,10 +250,10 @@ int SCHEDULER_REQUEST::write(FILE* fout) { " %d\n" " %.15f\n" " %.15f\n" + " %.15f\n" + " %.15f\n" " %.15f\n" " %s\n" - " %.15f\n" - " %.15f\n" " %s\n", authenticator, platform_name, @@ -269,10 +265,10 @@ int SCHEDULER_REQUEST::write(FILE* fout) { rpc_seqno, work_req_seconds, resource_share_fraction, + rrs_fraction, + prrs_fraction, estimated_delay, code_sign_key, - total_disk_usage, - project_disk_usage, anonymous_platform?"true":"false" ); @@ -720,8 +716,6 @@ int RESULT::parse_from_client(FILE* fin) { return ERR_XML_PARSE; } -// TODO: put the benchmark errors into the DB -// int HOST::parse(FILE* fin) { char buf[256]; @@ -751,12 +745,13 @@ int HOST::parse(FILE* fin) { else if (parse_double(buf, "", d_free)) continue; else if (parse_double(buf, "", n_bwup)) continue; else if (parse_double(buf, "", n_bwdown)) continue; - // following four lines can be eliminated with a later version of - // the core client. - else if (parse_int(buf, "", trash_int)) continue; - else if (parse_int(buf, "", trash_int)) continue; - else if (parse_int(buf, "", trash_int)) continue; else if (parse_double(buf, "", trash_double)) continue; + + // parse deprecated fields to avoid error messages + else if (match_tag(buf, "")) continue; + else if (match_tag(buf, "")) continue; + else if (match_tag(buf, "")) continue; + else { log_messages.printf(SCHED_MSG_LOG::MSG_NORMAL, "HOST::parse(): unrecognized: %s\n", buf @@ -805,6 +800,24 @@ int HOST::parse_net_stats(FILE* fin) { return ERR_XML_PARSE; } +int HOST::parse_disk_usage(FILE* fin) { + char buf[256]; + + while (fgets(buf, 256, fin)) { + if (match_tag(buf, "")) return 0; + else if (parse_double(buf, "", d_boinc_used_total)) continue; + else if (parse_double(buf, "", d_boinc_used_project)) continue; + else { + log_messages.printf( + SCHED_MSG_LOG::MSG_NORMAL, + "HOST::parse_disk_usage(): unrecognized: %s\n", + buf + ); + } + } + return ERR_XML_PARSE; +} + void GLOBAL_PREFS::parse(char* buf, char* venue) { char buf2[LARGE_BLOB_SIZE]; diff --git a/sched/server_types.h b/sched/server_types.h index 08a8ef67c8..56d0dc0153 100644 --- a/sched/server_types.h +++ b/sched/server_types.h @@ -116,6 +116,11 @@ struct SCHEDULER_REQUEST { double work_req_seconds; // in "normalized CPU seconds" (see work_req.php) double resource_share_fraction; + // this project's fraction of total resource share + double rrs_fraction; + // ... of runnable resource share + double prrs_fraction; + // ... of potentially runnable resource share double estimated_delay; // how many wall-clock seconds will elapse before // host will begin any new work for this project @@ -123,19 +128,6 @@ struct SCHEDULER_REQUEST { char global_prefs_xml[LARGE_BLOB_SIZE]; char code_sign_key[4096]; -// ROMW: Added these back in since we have 3.x clients who still want -// want to send us the older style for determining disk usage. -// TODO: Remove the two lines below when the 4.x way of doing things -// is completely implemented. - double total_disk_usage; - double project_disk_usage; - -#if 0 - double project_disk_free; - double potentially_free_offender; - double potentially_free_self; -#endif - bool anonymous_platform; std::vector client_app_versions; GLOBAL_PREFS global_prefs;