// This file is part of BOINC. // http://boinc.berkeley.edu // Copyright (C) 2012 University of California // // BOINC is free software; you can redistribute it and/or modify it // under the terms of the GNU Lesser General Public License // as published by the Free Software Foundation, // either version 3 of the License, or (at your option) any later version. // // BOINC is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // See the GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License // along with BOINC. If not, see . #ifdef _WIN32 #include "boinc_win.h" #else #include "config.h" #include #endif #include "str_replace.h" #include "parse.h" #include "client_msgs.h" #include "client_state.h" #include "log_flags.h" #include "result.h" int RESULT::parse_name(XML_PARSER& xp, const char* end_tag) { safe_strcpy(name, ""); while (!xp.get_tag()) { if (xp.match_tag(end_tag)) return 0; if (xp.parse_str("name", name, sizeof(name))) continue; if (log_flags.unparsed_xml) { msg_printf(0, MSG_INFO, "[unparsed_xml] RESULT::parse_name(): unrecognized: %s\n", xp.parsed_tag ); } xp.skip_unexpected(); } return ERR_XML_PARSE; } void RESULT::clear() { safe_strcpy(name, ""); safe_strcpy(wu_name, ""); received_time = 0; report_deadline = 0; version_num = 0; safe_strcpy(plan_class, ""); safe_strcpy(platform, ""); avp = NULL; output_files.clear(); ready_to_report = false; completed_time = 0; got_server_ack = false; final_cpu_time = 0; final_elapsed_time = 0; final_peak_working_set_size = 0; final_peak_swap_size = 0; final_peak_disk_usage = 0; final_bytes_sent = 0; final_bytes_received = 0; #ifdef SIM peak_flop_count = 0; #endif fpops_per_cpu_sec = 0; fpops_cumulative = 0; intops_per_cpu_sec = 0; intops_cumulative = 0; _state = RESULT_NEW; exit_status = 0; stderr_out.clear(); suspended_via_gui = false; coproc_missing = false; report_immediately = false; not_started = false; name_md5.clear(); index = 0; app = NULL; wup = NULL; project = NULL; rrsim_flops_left = 0; rrsim_finish_delay = 0; rrsim_flops = 0; rrsim_done = false; already_selected = false; rr_sim_misses_deadline = false; unfinished_time_slice = false; seqno = 0; edf_scheduled = false; safe_strcpy(resources, ""); schedule_backoff = 0; safe_strcpy(schedule_backoff_reason, ""); } // parse a element from scheduling server. // int RESULT::parse_server(XML_PARSER& xp) { FILE_REF file_ref; int retval; clear(); while (!xp.get_tag()) { if (xp.match_tag("/result")) return 0; if (xp.parse_str("name", name, sizeof(name))) continue; if (xp.parse_str("wu_name", wu_name, sizeof(wu_name))) continue; if (xp.parse_double("report_deadline", report_deadline)) continue; if (xp.parse_str("platform", platform, sizeof(platform))) continue; if (xp.parse_str("plan_class", plan_class, sizeof(plan_class))) continue; if (xp.parse_int("version_num", version_num)) continue; if (xp.match_tag("file_ref")) { retval = file_ref.parse(xp); if (retval) { msg_printf(0, MSG_INFO, "can't parse file_ref in result: %s", boincerror(retval) ); return retval; } output_files.push_back(file_ref); continue; } if (xp.parse_bool("report_immediately", report_immediately)) continue; if (log_flags.unparsed_xml) { msg_printf(0, MSG_INFO, "[unparsed_xml] RESULT::parse(): unrecognized: %s\n", xp.parsed_tag ); } xp.skip_unexpected(); } return ERR_XML_PARSE; } // parse a element from state file // int RESULT::parse_state(XML_PARSER& xp) { FILE_REF file_ref; int retval; clear(); while (!xp.get_tag()) { if (xp.match_tag("/result")) { // set state to something reasonable in case of bad state file // if (got_server_ack || ready_to_report) { switch (state()) { case RESULT_NEW: case RESULT_FILES_DOWNLOADING: case RESULT_FILES_DOWNLOADED: case RESULT_FILES_UPLOADING: set_state(RESULT_FILES_UPLOADED, "RESULT::parse_state"); break; } } return 0; } if (xp.parse_str("name", name, sizeof(name))) continue; if (xp.parse_str("wu_name", wu_name, sizeof(wu_name))) continue; if (xp.parse_double("received_time", received_time)) continue; if (xp.parse_double("report_deadline", report_deadline)) { continue; } if (xp.match_tag("file_ref")) { retval = file_ref.parse(xp); if (retval) { msg_printf(0, MSG_INFO, "can't parse file_ref in result: %s", boincerror(retval) ); return retval; } #ifndef SIM output_files.push_back(file_ref); #endif continue; } if (xp.parse_double("final_cpu_time", final_cpu_time)) continue; if (xp.parse_double("final_elapsed_time", final_elapsed_time)) continue; if (xp.parse_double("final_peak_working_set_size", final_peak_working_set_size)) continue; if (xp.parse_double("final_peak_swap_size", final_peak_swap_size)) continue; if (xp.parse_double("final_peak_disk_usage", final_peak_disk_usage)) continue; if (xp.parse_double("final_bytes_sent", final_bytes_sent)) continue; if (xp.parse_double("final_bytes_received", final_bytes_received)) continue; if (xp.parse_int("exit_status", exit_status)) continue; if (xp.parse_bool("got_server_ack", got_server_ack)) continue; if (xp.parse_bool("ready_to_report", ready_to_report)) continue; if (xp.parse_double("completed_time", completed_time)) continue; if (xp.parse_bool("suspended_via_gui", suspended_via_gui)) continue; if (xp.parse_bool("report_immediately", report_immediately)) continue; if (xp.parse_int("state", _state)) continue; if (xp.parse_string("stderr_out", stderr_out)) continue; if (xp.parse_double("fpops_per_cpu_sec", fpops_per_cpu_sec)) continue; if (xp.parse_double("fpops_cumulative", fpops_cumulative)) continue; if (xp.parse_double("intops_per_cpu_sec", intops_per_cpu_sec)) continue; if (xp.parse_double("intops_cumulative", intops_cumulative)) continue; if (xp.parse_str("platform", platform, sizeof(platform))) continue; if (xp.parse_str("plan_class", plan_class, sizeof(plan_class))) continue; if (xp.parse_int("version_num", version_num)) continue; if (log_flags.unparsed_xml) { msg_printf(0, MSG_INFO, "[unparsed_xml] RESULT::parse(): unrecognized: %s\n", xp.parsed_tag ); } xp.skip_unexpected(); } return ERR_XML_PARSE; } // write result descriptor to state file, GUI RPC reply, or sched request // int RESULT::write(MIOFILE& out, bool to_server) { unsigned int i; FILE_INFO* fip; int n, retval; out.printf( "\n" " %s\n" " %f\n" " %f\n" " %d\n" " %d\n" " %s\n" " %d\n", name, final_cpu_time, final_elapsed_time, exit_status, state(), platform, version_num ); if (strlen(plan_class)) { out.printf(" %s\n", plan_class); } if (fpops_per_cpu_sec) { out.printf(" %f\n", fpops_per_cpu_sec); } if (fpops_cumulative) { out.printf(" %f\n", fpops_cumulative); } if (intops_per_cpu_sec) { out.printf(" %f\n", intops_per_cpu_sec); } if (intops_cumulative) { out.printf(" %f\n", intops_cumulative); } if (final_peak_working_set_size) { out.printf( " %.0f\n", final_peak_working_set_size ); } if (final_peak_swap_size) { out.printf( " %.0f\n", final_peak_swap_size ); } if (final_peak_disk_usage) { out.printf( " %.0f\n", final_peak_disk_usage ); } if (final_bytes_sent) { out.printf( " %.0f\n", final_bytes_sent ); } if (final_bytes_received) { out.printf( " %.0f\n", final_bytes_received ); } if (to_server) { out.printf( " %d\n", wup->version_num ); } n = (int)stderr_out.length(); if (n || to_server) { out.printf("\n"); // the following is here so that it gets recorded on server // (there's no core_client_version field of result table) // if (to_server) { out.printf( "%d.%d.%d\n", gstate.core_client_version.major, gstate.core_client_version.minor, gstate.core_client_version.release ); } if (n) { out.printf("\n"); } out.printf("\n"); } if (to_server) { for (i=0; iuploaded) { retval = fip->write(out, true); if (retval) return retval; } } } else { if (got_server_ack) out.printf(" \n"); if (ready_to_report) out.printf(" \n"); if (completed_time) out.printf(" %f\n", completed_time); if (suspended_via_gui) out.printf(" \n"); if (report_immediately) out.printf(" \n"); out.printf( " %s\n" " %f\n" " %f\n", wu_name, report_deadline, received_time ); for (i=0; i\n"); return 0; } #ifndef SIM static const char* cpu_string(double ncpus) { return (ncpus==1)?"CPU":"CPUs"; } int RESULT::write_gui(MIOFILE& out) { out.printf( "\n" " %s\n" " %s\n" " %s\n" " %d\n" " %s\n" " %s\n" " %f\n" " %f\n" " %d\n" " %d\n" " %f\n" " %f\n" " %f\n", name, wu_name, platform, version_num, plan_class, project->master_url, final_cpu_time, final_elapsed_time, exit_status, state(), report_deadline, received_time, estimated_runtime_remaining() ); if (got_server_ack) out.printf(" \n"); if (ready_to_report) out.printf(" \n"); if (completed_time) out.printf(" %f\n", completed_time); if (suspended_via_gui) out.printf(" \n"); if (project->suspended_via_gui) out.printf(" \n"); if (report_immediately) out.printf(" \n"); if (edf_scheduled) out.printf(" \n"); if (coproc_missing) out.printf(" \n"); if (schedule_backoff > gstate.now) { out.printf(" \n"); if (strlen(schedule_backoff_reason)) { out.printf( " %s\n", schedule_backoff_reason ); } } if (avp->needs_network && gstate.network_suspended) out.printf(" \n"); ACTIVE_TASK* atp = gstate.active_tasks.lookup_result(this); if (atp) { atp->write_gui(out); } if (!strlen(resources)) { // only need to compute this string once // if (avp->gpu_usage.rsc_type) { if (avp->gpu_usage.usage == 1) { snprintf(resources, sizeof(resources), "%.3g %s + 1 %s", avp->avg_ncpus, cpu_string(avp->avg_ncpus), rsc_name_long(avp->gpu_usage.rsc_type) ); } else { snprintf(resources, sizeof(resources), "%.3g %s + %.3g %ss", avp->avg_ncpus, cpu_string(avp->avg_ncpus), avp->gpu_usage.usage, rsc_name_long(avp->gpu_usage.rsc_type) ); } } else if (avp->missing_coproc) { snprintf(resources, sizeof(resources), "%.3g %s + %.12s GPU (missing)", avp->avg_ncpus, cpu_string(avp->avg_ncpus), avp->missing_coproc_name ); } else if (!project->non_cpu_intensive && (avp->avg_ncpus != 1)) { snprintf(resources, sizeof(resources), "%.3g %s", avp->avg_ncpus, cpu_string(avp->avg_ncpus) ); } else { safe_strcpy(resources, " "); } } if (strlen(resources)>1) { char buf[256]; safe_strcpy(buf, ""); if (atp && atp->scheduler_state == CPU_SCHED_SCHEDULED) { if (avp->gpu_usage.rsc_type) { COPROC& cp = coprocs.coprocs[avp->gpu_usage.rsc_type]; if (cp.count > 1) { // if there are multiple GPUs of this type, // show the user which one(s) are being used // int n = (int)ceil(avp->gpu_usage.usage); safe_strcpy(buf, n>1?" (devices ":" (device "); for (int i=0; i 0) { safe_strcat(buf, ", "); } safe_strcat(buf, buf2); } safe_strcat(buf, ")"); } } } out.printf( " %s%s\n", resources, buf ); } out.printf("\n"); return 0; } #endif // Returns true if the result's output files are all either // successfully uploaded or have unrecoverable errors // bool RESULT::is_upload_done() { unsigned int i; FILE_INFO* fip; int retval; for (i=0; iuploadable()) { if (fip->had_failure(retval)) continue; if (!fip->uploaded) { return false; } } } return true; } // resets all FILE_INFO's in result to uploaded = false // void RESULT::clear_uploaded_flags() { unsigned int i; FILE_INFO* fip; for (i=0; iuploaded = false; } } bool RESULT::is_not_started() { if (computing_done()) return false; if (gstate.active_tasks.lookup_result(this)) return false; return true; } // return true if some file needed by this result (input or application) // is downloading and backed off // bool RESULT::some_download_stalled() { #ifndef SIM unsigned int i; FILE_INFO* fip; PERS_FILE_XFER* pfx; bool some_file_missing = false; for (i=0; iinput_files.size(); i++) { fip = wup->input_files[i].file_info; if (fip->status != FILE_PRESENT) some_file_missing = true; pfx = fip->pers_file_xfer; if (pfx && pfx->next_request_time > gstate.now) { return true; } } for (i=0; iapp_files.size(); i++) { fip = avp->app_files[i].file_info; if (fip->status != FILE_PRESENT) some_file_missing = true; pfx = fip->pers_file_xfer; if (pfx && pfx->next_request_time > gstate.now) { return true; } } if (some_file_missing && !project->download_backoff.ok_to_transfer()) { return true; } #endif return false; } FILE_REF* RESULT::lookup_file(FILE_INFO* fip) { for (unsigned int i=0; irsc_fpops_est, name, final_elapsed_time, exit_status ); fclose(f); } // abort a result that's not currently running // void RESULT::abort_inactive(int status) { if (state() >= RESULT_COMPUTE_ERROR) return; set_state(RESULT_ABORTED, "RESULT::abort_inactive"); exit_status = status; } // whether this task can be run right now // bool RESULT::runnable() { if (suspended_via_gui) return false; if (project->suspended_via_gui) return false; if (state() != RESULT_FILES_DOWNLOADED) return false; if (coproc_missing) return false; if (schedule_backoff > gstate.now) return false; if (avp->needs_network && gstate.file_xfers_suspended) { // check file_xfers_suspended rather than network_suspended; // the latter remains false for a period after GUI RPCs // return false; } return true; } // whether this task should be included in RR simulation // Like runnable, except downloading backoff is OK // Schedule-backoff is not OK; // we should be able to get GPU jobs from project A // even if project B has backed-off jobs. // bool RESULT::nearly_runnable() { if (suspended_via_gui) return false; if (project->suspended_via_gui) return false; switch (state()) { case RESULT_FILES_DOWNLOADED: case RESULT_FILES_DOWNLOADING: break; default: return false; } if (coproc_missing) return false; if (schedule_backoff > gstate.now) return false; return true; } // Return true if the result is waiting for its files to download, // and nothing prevents this from happening soon // bool RESULT::downloading() { if (suspended_via_gui) return false; if (project->suspended_via_gui) return false; if (state() > RESULT_FILES_DOWNLOADING) return false; if (some_download_stalled()) return false; return true; } double RESULT::estimated_runtime_uncorrected() { return wup->rsc_fpops_est/avp->flops; } // estimate how long a result will take on this host // double RESULT::estimated_runtime() { double x = estimated_runtime_uncorrected(); if (!project->dont_use_dcf) { x *= project->duration_correction_factor; } return x; } double RESULT::estimated_runtime_remaining() { if (computing_done()) return 0; ACTIVE_TASK* atp = gstate.lookup_active_task_by_result(this); if (app->non_cpu_intensive) { if (atp && atp->fraction_done>0) { double est_dur = atp->fraction_done_elapsed_time / atp->fraction_done; double x = est_dur - atp->elapsed_time; if (x <= 0) x = 1; return x; } return 0; } if (atp) { #ifdef SIM return sim_flops_left/avp->flops; #else return atp->est_dur() - atp->elapsed_time; #endif } return estimated_runtime(); } // Results must be complete early enough to report before the report deadline. // Not all hosts are connected all of the time. // double RESULT::computation_deadline() { return report_deadline - ( gstate.work_buf_min() // Seconds that the host will not be connected to the Internet + DEADLINE_CUSHION ); } static const char* result_state_name(int val) { switch (val) { case RESULT_NEW: return "NEW"; case RESULT_FILES_DOWNLOADING: return "FILES_DOWNLOADING"; case RESULT_FILES_DOWNLOADED: return "FILES_DOWNLOADED"; case RESULT_COMPUTE_ERROR: return "COMPUTE_ERROR"; case RESULT_FILES_UPLOADING: return "FILES_UPLOADING"; case RESULT_FILES_UPLOADED: return "FILES_UPLOADED"; case RESULT_ABORTED: return "ABORTED"; } return "Unknown"; } void RESULT::set_state(int val, const char* where) { _state = val; if (log_flags.task_debug) { msg_printf(project, MSG_INFO, "[task] result state=%s for %s from %s", result_state_name(val), name, where ); } } void add_old_result(RESULT& r) { while (!old_results.empty()) { OLD_RESULT& ores = *old_results.begin(); if (ores.create_time < gstate.now - 3600) { old_results.pop_front(); } else { break; } } OLD_RESULT ores; safe_strcpy(ores.project_url, r.project->master_url); safe_strcpy(ores.result_name, r.name); safe_strcpy(ores.app_name, r.app->name); ores.elapsed_time = r.final_elapsed_time; ores.cpu_time = r.final_cpu_time; ores.completed_time = r.completed_time; ores.create_time = gstate.now; ores.exit_status = r.exit_status; old_results.push_back(ores); } void print_old_results(MIOFILE& mf) { mf.printf("\n"); deque::iterator i = old_results.begin(); while (i != old_results.end()) { OLD_RESULT& ores = *i; mf.printf( " \n" " %s\n" " %s\n" " %s\n" " %d\n" " %f\n" " %f\n" " %f\n" " %f\n" " \n", ores.project_url, ores.result_name, ores.app_name, ores.exit_status, ores.elapsed_time, ores.cpu_time, ores.completed_time, ores.create_time ); ++i; } mf.printf("\n"); } std::deque old_results;