// This file is part of BOINC. // http://boinc.berkeley.edu // Copyright (C) 2008 University of California // // BOINC is free software; you can redistribute it and/or modify it // under the terms of the GNU Lesser General Public License // as published by the Free Software Foundation, // either version 3 of the License, or (at your option) any later version. // // BOINC is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // See the GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License // along with BOINC. If not, see . // If you change anything, make sure you also change: // client_types.C (to write and parse it) // client_state.C (to cross-link objects) // #ifndef _CLIENT_TYPES_ #define _CLIENT_TYPES_ #include "cpp.h" #if !defined(_WIN32) || defined(__CYGWIN32__) #include #include #endif #include "md5_file.h" #include "cert_sig.h" #include "hostinfo.h" #include "coproc.h" #include "miofile.h" #include "common_defs.h" #include "cc_config.h" #include "rr_sim.h" #include "work_fetch.h" #include "cs_notice.h" #include "cs_trickle.h" #ifdef SIM #include "sim.h" #endif #define MAX_FILE_INFO_LEN 4096 #define MAX_SIGNATURE_LEN 4096 #define MAX_KEY_LEN 4096 #define MAX_COPROCS_PER_JOB 8 // max # of instances of a GPU that a job can use extern int rsc_index(const char*); extern const char* rsc_name(int); extern COPROCS coprocs; struct FILE_INFO; struct ASYNC_VERIFY; // represents a list of URLs (e.g. to download a file) // and a current position in that list // struct URL_LIST { std::vector urls; int start_index; int current_index; URL_LIST(){}; void clear() { urls.clear(); start_index = -1; current_index = -1; } bool empty() {return urls.empty();} const char* get_init_url(); const char* get_next_url(); const char* get_current_url(FILE_INFO&); inline void add(std::string url) { urls.push_back(url); } void replace(URL_LIST& ul) { clear(); for (unsigned int i=0; i proj_feeds; inline char *get_project_name() { if (strlen(project_name)) { return project_name; } else { return master_url; } } }; struct PROJECT : PROJ_AM { // the following items come from the account file // They are a function only of the user and the project // char authenticator[256]; // user's authenticator on this project std::string project_prefs; // without the enclosing tags. // May include elements // This field is used only briefly: between handling a // scheduler RPC reply and writing the account file std::string project_specific_prefs; // without enclosing tags // Does not include elements std::string gui_urls; // GUI URLs, with enclosing tags double resource_share; // project's resource share relative to other projects. double resource_share_frac; // fraction of RS of non-suspended, compute-intensive projects // the following are from the user's project prefs // bool no_rsc_pref[MAX_RSC]; // derived from GPU exclusions in cc_config.xml; // disable work fetch if all instances excluded // bool no_rsc_config[MAX_RSC]; // the following are from the project itself // (or derived from app version list if anonymous platform) // bool no_rsc_apps[MAX_RSC]; // the following are from the account manager, if any // bool no_rsc_ams[MAX_RSC]; // the following set dynamically // bool rsc_defer_sched[MAX_RSC]; // This project has a GPU job for which there's insuff. video RAM. // Don't fetch more jobs of this type; they might have same problem char host_venue[256]; // logically, this belongs in the client state file // rather than the account file. // But we need it in the latter in order to parse prefs. bool using_venue_specific_prefs; // the following items come from client_state.xml // They may depend on the host as well as user and project // NOTE: if you add anything, add it to copy_state_fields() also!!! // std::vector scheduler_urls; // where to find scheduling servers char symstore[256]; // URL of symbol server (Windows) char user_name[256]; char team_name[256]; char email_hash[MD5_LEN]; char cross_project_id[MD5_LEN]; double cpid_time; double user_total_credit; double user_expavg_credit; double user_create_time; int userid; int teamid; int hostid; double host_total_credit; double host_expavg_credit; double host_create_time; double ams_resource_share; // resource share according to AMS; overrides project // -1 means not specified by AMS // stuff related to scheduler RPCs and master fetch // int rpc_seqno; int nrpc_failures; // # of consecutive times we've failed to contact all scheduling servers int master_fetch_failures; double min_rpc_time; // earliest time to contact any server of this project (or zero) void set_min_rpc_time(double future_time, const char* reason); double next_rpc_time; // if nonzero, specifies a time when another scheduler RPC // should be done (as requested by server). // An RPC could be done sooner than this. bool waiting_until_min_rpc_time(); // returns true if min_rpc_time > now bool master_url_fetch_pending; // need to fetch and parse the master URL int sched_rpc_pending; // we need to do a scheduler RPC, for various possible reasons: // user request, propagate host CPID, time-based, etc. // Reasons are enumerated in lib/common_defs.h bool possibly_backed_off; // we need to call request_work_fetch() when a project // transitions from being backed off to not. // This (slightly misnamed) keeps track of whether this // may still need to be done for given project bool trickle_up_pending; // have trickle up to send double last_rpc_time; // when last RPC finished // Other stuff bool anonymous_platform; // app_versions.xml file found in project dir; // use those apps rather then getting from server bool non_cpu_intensive; // All this project's apps are non-CPU-intensive. // Apps can also be individually marked as NCI bool verify_files_on_app_start; // Check app version and input files on app startup, // to make sure they haven't been tampered with. // This provides only the illusion of security. bool use_symlinks; double disk_usage; // computed by get_disk_usages() double disk_share; // computed by get_disk_shares(); // items send in scheduler replies, requesting that // various things be sent in the next request // int send_time_stats_log; // if nonzero, send time stats log from that point on int send_job_log; // if nonzero, send this project's job log from that point on bool send_full_workload; bool suspended_via_gui; bool dont_request_more_work; // Return work, but don't request more // Used for a clean exit to a project, // or if a user wants to pause doing work for the project bool attached_via_acct_mgr; bool detach_when_done; // when no results for this project, detach it. bool ended; // project has ended; advise user to detach char code_sign_key[MAX_KEY_LEN]; std::vector user_files; std::vector project_files; // files not specific to apps or work - e.g. icons int parse_preferences_for_user_files(); int parse_project_files(XML_PARSER&, bool delete_existing_symlinks); void write_project_files(MIOFILE&); void link_project_files(bool recreate_symlink_files); int write_symlink_for_project_file(FILE_INFO*); double project_files_downloaded_time; // when last project file download finished void update_project_files_downloaded_time(); // called when a project file download finishes. // If it's the last one, set project_files_downloaded_time to now double duration_correction_factor; // Multiply by this when estimating the CPU time of a result // (based on FLOPs estimated and benchmarks). // This is dynamically updated in a way that maintains an upper bound. // it goes down slowly but if a new estimate X is larger, // the factor is set to X. // // Deprecated - current server logic handles this, // and this should go to 1. // But we need to keep it around for older projects void update_duration_correction_factor(ACTIVE_TASK*); // fields used by CPU scheduler and work fetch // everything from here on applies only to CPU intensive projects bool can_request_work(); // not suspended and not deferred and not no more work bool runnable(int rsc_type); // has a runnable result using the given resource type bool downloading(); // has a result in downloading state bool potentially_runnable(); // runnable or contactable or downloading bool nearly_runnable(); // runnable or downloading bool overworked(); // the project has used too much CPU time recently bool some_download_stalled(); // a download is backed off bool some_result_suspended(); double last_upload_start; // the last time an upload was started. // Used for "work fetch deferral" mechanism: // don't request work from a project if an upload started // in last X minutes and is still active bool uploading(); bool has_results(); struct RESULT *next_runnable_result; // the next result to run for this project int nuploading_results; // number of results in UPLOADING state // Don't start new results if these exceeds 2*ncpus. bool too_many_uploading_results; // scheduling (work fetch and job scheduling) // double sched_priority; void compute_sched_priority(); // stuff for RR sim // double rr_sim_cpu_share; bool rr_sim_active; int ncoprocs_excluded[MAX_RSC]; // number of excluded instances per processor type bool operator<(const PROJECT& p) { return sched_priority > p.sched_priority; } // stuff related to work fetch // RSC_PROJECT_WORK_FETCH rsc_pwf[MAX_RSC]; PROJECT_WORK_FETCH pwf; inline void reset() { for (int i=0; i trickle_up_ops; PROJECT(); ~PROJECT(){} void init(); void copy_state_fields(PROJECT&); int write_account_file(); int parse_account(FILE*); int parse_account_file_venue(); int parse_account_file(); int parse_state(XML_PARSER&); int write_state(MIOFILE&, bool gui_rpc=false); // statistic of the last x days std::vector statistics; int parse_statistics(MIOFILE&); int parse_statistics(FILE*); int write_statistics(MIOFILE&, bool gui_rpc=false); int write_statistics_file(); void suspend(); void resume(); void abort_not_started(); // abort unstarted jobs // clear AMS-related fields inline void detach_ams() { attached_via_acct_mgr = false; ams_resource_share = -1; for (int i=0; i app_files; int ref_cnt; char graphics_exec_path[512]; char graphics_exec_file[256]; double max_working_set_size; // max working set of tasks using this app version. // unstarted jobs using this app version are assumed // to use this much RAM, // so that we don't run a long sequence of jobs, // each of which turns out not to fit in available RAM bool missing_coproc; double missing_coproc_usage; char missing_coproc_name[256]; bool dont_throttle; int index; // temp var for make_scheduler_request() #ifdef SIM bool dont_use; #endif APP_VERSION(){} ~APP_VERSION(){} int parse(XML_PARSER&); int write(MIOFILE&, bool write_file_info = true); bool had_download_failure(int& failnum); void get_file_errors(std::string&); void clear_errors(); int api_major_version(); inline bool uses_coproc(int rt) { return (gpu_usage.rsc_type == rt); } inline int rsc_type() { return gpu_usage.rsc_type; } }; struct WORKUNIT { char name[256]; char app_name[256]; int version_num; // Deprecated, but need to keep around to let people revert // to versions before multi-platform support std::string command_line; std::vector input_files; PROJECT* project; APP* app; int ref_cnt; double rsc_fpops_est; double rsc_fpops_bound; double rsc_memory_bound; double rsc_disk_bound; WORKUNIT(){} ~WORKUNIT(){} int parse(XML_PARSER&); int write(MIOFILE&); bool had_download_failure(int& failnum); void get_file_errors(std::string&); void clear_errors(); }; struct RESULT { char name[256]; char wu_name[256]; double received_time; // when we got this from server double report_deadline; int version_num; // identifies the app used char plan_class[64]; char platform[256]; APP_VERSION* avp; std::vector output_files; bool ready_to_report; // we're ready to report this result to the server; // either computation is done and all the files have been uploaded // or there was an error double completed_time; // time when ready_to_report was set bool got_server_ack; // we've received the ack for this result from the server double final_cpu_time; double final_elapsed_time; #ifdef SIM double peak_flop_count; double sim_flops_left; #endif // the following are nonzero if reported by app double fpops_per_cpu_sec; double fpops_cumulative; double intops_per_cpu_sec; double intops_cumulative; int _state; // state of this result: see lib/result_state.h inline int state() { return _state; } inline void set_ready_to_report() { ready_to_report = true; } void set_state(int, const char*); int exit_status; // return value from the application std::string stderr_out; // the concatenation of: // // - if report_result_error() is called for this result: // x // x // x // - if called in FILES_DOWNLOADED state: // x // - if called in NEW state: // x for each failed download // - if called in COMPUTE_DONE state: // x for each failed upload // // - X, where X is the app's stderr output bool suspended_via_gui; bool coproc_missing; // a coproc needed by this job is missing // (e.g. because user removed their GPU board). bool report_immediately; bool not_started; // temp for CPU sched APP* app; WORKUNIT* wup; PROJECT* project; RESULT(){} ~RESULT(){} void clear(); int parse_server(XML_PARSER&); int parse_state(XML_PARSER&); int parse_name(XML_PARSER&, const char* end_tag); int write(MIOFILE&, bool to_server); int write_gui(MIOFILE&); bool is_upload_done(); // files uploaded? void clear_uploaded_flags(); FILE_REF* lookup_file(FILE_INFO*); FILE_INFO* lookup_file_logical(const char*); void abort_inactive(int); // abort the result if it hasn't started computing yet // Called only for results with no active task // (otherwise you need to abort the active task) void append_log_record(); // stuff related to CPU scheduling bool is_not_started(); double estimated_duration(); double estimated_duration_uncorrected(); double estimated_time_remaining(); inline double estimated_flops_remaining() { #ifdef SIM return sim_flops_left; #else return estimated_time_remaining()*avp->flops; #endif } inline bool computing_done() { if (state() >= RESULT_COMPUTE_ERROR) return true; if (ready_to_report) return true; return false; } bool runnable(); // downloaded, not finished, not suspended, project not suspended bool nearly_runnable(); // downloading or downloaded, // not finished, suspended, project not suspended bool downloading(); // downloading, not downloaded, not suspended, project not suspended bool some_download_stalled(); // some input or app file is downloading, and backed off // i.e. it may be a long time before we can run this result inline bool uses_coprocs() { return (avp->gpu_usage.rsc_type != 0); } inline int resource_type() { return avp->gpu_usage.rsc_type; } inline bool non_cpu_intensive() { if (project->non_cpu_intensive) return true; if (app->non_cpu_intensive) return true; return false; } inline bool dont_throttle() { if (non_cpu_intensive()) return true; if (avp->dont_throttle) return true; return false; } // temporaries used in CLIENT_STATE::rr_simulation(): double rrsim_flops_left; double rrsim_finish_delay; double rrsim_flops; bool rrsim_done; bool already_selected; // used to keep cpu scheduler from scheduling a result twice // transient; used only within schedule_cpus() double computation_deadline(); // report deadline - prefs.work_buf_min - time slice bool rr_sim_misses_deadline; // temporaries used in enforce_schedule(): bool unfinished_time_slice; int seqno; bool edf_scheduled; // temporary used to tell GUI that this result is deadline-scheduled int coproc_indices[MAX_COPROCS_PER_JOB]; // keep track of coprocessor reservations char resources[256]; // textual description of resources used double schedule_backoff; // don't try to schedule until this time // (wait for free GPU RAM) }; // represents an always/auto/never value, possibly temporarily overridden struct RUN_MODE { int perm_mode; int temp_mode; int prev_mode; double temp_timeout; RUN_MODE(); void set(int mode, double duration); void set_prev(int mode); int get_perm(); int get_prev(); int get_current(); double delay(); }; // a platform supported by the client. struct PLATFORM { std::string name; }; #endif