// This file is part of BOINC. // http://boinc.berkeley.edu // Copyright (C) 2008 University of California // // BOINC is free software; you can redistribute it and/or modify it // under the terms of the GNU Lesser General Public License // as published by the Free Software Foundation, // either version 3 of the License, or (at your option) any later version. // // BOINC is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // See the GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License // along with BOINC. If not, see . // If you change anything, make sure you also change: // client_types.C (to write and parse it) // client_state.C (to cross-link objects) // #ifndef _CLIENT_TYPES_ #define _CLIENT_TYPES_ #include "cpp.h" #if !defined(_WIN32) || defined(__CYGWIN32__) #include #include #endif #include "md5_file.h" #include "cert_sig.h" #include "hostinfo.h" #include "coproc.h" #include "miofile.h" #include "common_defs.h" #include "rr_sim.h" #include "work_fetch.h" #include "cs_notice.h" #define MAX_FILE_INFO_LEN 4096 #define MAX_SIGNATURE_LEN 4096 #define MAX_KEY_LEN 4096 #define MAX_COPROCS_PER_JOB 8 // If the status is neither of these two, // it will be an error code defined in error_numbers.h, // indicating an unrecoverable error in the upload or download of the file, // or that the file was too big and was deleted // #define FILE_NOT_PRESENT 0 #define FILE_PRESENT 1 struct FILE_INFO { char name[256]; char md5_cksum[33]; double max_nbytes; double nbytes; double upload_offset; bool generated_locally; // file is produced by app int status; bool executable; // change file protections to make executable bool uploaded; // file has been uploaded bool upload_when_present; bool sticky; // don't delete unless instructed to do so bool report_on_rpc; // include this in each scheduler request /// if not in use, delete even if sticky is true /// don't report to server even if report_on_rpc is true bool marked_for_delete; // server requested delete; bool signature_required; // true iff associated with app version bool is_user_file; bool is_project_file; bool is_auto_update_file; /// for output files: gzip file when done, and append .gz to its name bool gzip_when_done; /// nonzero if in the process of being up/downloaded class PERS_FILE_XFER* pers_file_xfer; /// for upload files (to authenticate) RESULT* result; PROJECT* project; int ref_cnt; std::vector urls; int start_url; int current_url; /// if the file_info is signed (for uploadable files) /// this is the text that is signed /// Otherwise it is the FILE_INFO's XML descriptor /// (without enclosing tags) char signed_xml[MAX_FILE_INFO_LEN]; /// ... and this is the signature char xml_signature[MAX_SIGNATURE_LEN]; /// if the file itself is signed (for executable files) /// this is the signature char file_signature[MAX_SIGNATURE_LEN]; /// if permanent error occurs during file xfer, it's recorded here std::string error_msg; CERT_SIGS* cert_sigs; FILE_INFO(); ~FILE_INFO(); void reset(); int set_permissions(); int parse(MIOFILE&, bool from_server); int write(MIOFILE&, bool to_server); int write_gui(MIOFILE&); /// attempt to delete the underlying file int delete_file(); const char* get_init_url(); const char* get_next_url(); const char* get_current_url(); bool had_failure(int& failnum); void failure_message(std::string&); int merge_info(FILE_INFO&); int verify_file(bool, bool); bool verify_file_certs(); /// gzip file and add .gz to name int gzip(); }; /// Describes a connection between a file and a workunit, result, or application /// In the first two cases, /// the app will either use open() or fopen() to access the file /// (in which case "open_name" is the name it will use) /// or the app will be connected by the given fd (in which case fd is nonzero) struct FILE_REF { /// physical name char file_name[256]; /// logical name char open_name[256]; bool main_program; FILE_INFO* file_info; /// if true, core client will copy the file instead of linking bool copy_file; /// for output files: app may not generate file; /// don't treat as error if file is missing. bool optional; int parse(MIOFILE&); int write(MIOFILE&); }; // file xfer backoff state for a project and direction (up/down) // if file_xfer_failures exceeds FILE_XFER_FAILURE_LIMIT, // we switch from a per-file to a project-wide backoff policy // (separately for the up/down directions) // NOTE: this refers to transient failures, not permanent. // #define FILE_XFER_FAILURE_LIMIT 3 struct FILE_XFER_BACKOFF { int file_xfer_failures; // count of consecutive failures double next_xfer_time; // when to start trying again bool ok_to_transfer(); void file_xfer_failed(PROJECT*); void file_xfer_succeeded(); FILE_XFER_BACKOFF() { file_xfer_failures = 0; next_xfer_time = 0; } }; /// statistics at a specific day struct DAILY_STATS { double user_total_credit; double user_expavg_credit; double host_total_credit; double host_expavg_credit; double day; void clear(); DAILY_STATS() {clear();} int parse(FILE*); }; bool operator < (const DAILY_STATS&, const DAILY_STATS&); struct PROJECT { // the following items come from the account file // They are a function only of the user and the project // /// url of site that contains scheduler tags for this project char master_url[256]; /// user's authenticator on this project char authenticator[256]; /// without the enclosing tags. /// May include elements /// This field is used only briefly: between handling a /// scheduler RPC reply and writing the account file std::string project_prefs; /// without enclosing tags /// Does not include elements std::string project_specific_prefs; /// GUI URLs, with enclosing tags std::string gui_urls; /// project's resource share relative to other projects. double resource_share; // the following are the user's project prefs bool no_cpu_pref; bool no_cuda_pref; bool no_ati_pref; // the following are from the project itself bool no_cpu_apps; bool no_cuda_apps; bool no_ati_apps; // the following set dynamically bool cuda_low_mem; bool ati_low_mem; /// logically, this belongs in the client state file /// rather than the account file. /// But we need it in the latter in order to parse prefs. char host_venue[256]; bool using_venue_specific_prefs; // the following items come from client_state.xml // They may depend on the host as well as user and project // NOTE: if you add anything, add it to copy_state_fields() also!!! // /// where to find scheduling servers std::vector scheduler_urls; /// descriptive. not unique char project_name[256]; /// URL of symbol server (Windows) char symstore[256]; char user_name[256]; char team_name[256]; char email_hash[MD5_LEN]; char cross_project_id[MD5_LEN]; double cpid_time; double user_total_credit; double user_expavg_credit; double user_create_time; int hostid; double host_total_credit; double host_expavg_credit; double host_create_time; double ams_resource_share; // resource share according to AMS; overrides project // -1 means not specified by AMS // stuff related to scheduler RPCs and master fetch // int rpc_seqno; /// # of consecutive times we've failed to contact all scheduling servers int nrpc_failures; int master_fetch_failures; /// earliest time to contact any server of this project (or zero) double min_rpc_time; void set_min_rpc_time(double future_time, const char* reason); /// if nonzero, specifies a time when another scheduler RPC /// should be done (as requested by server). /// An RPC could be done sooner than this. double next_rpc_time; /// returns true if min_rpc_time > now bool waiting_until_min_rpc_time(); /// need to fetch and parse the master URL bool master_url_fetch_pending; /// we need to do a scheduler RPC, for various possible reasons: /// user request, propagate host CPID, time-based, etc. /// Reasons are enumerated in scheduler_op.h int sched_rpc_pending; /// we need to call request_work_fetch() when a project /// transitions from being backed off to not. /// This (slightly misnamed) keeps track of whether this /// may still need to be done for given project bool possibly_backed_off; /// have trickle up to send bool trickle_up_pending; /// when last RPC finished double last_rpc_time; // Other stuff /// app_versions.xml file found in project dir; /// use those apps rather then getting from server bool anonymous_platform; bool non_cpu_intensive; bool verify_files_on_app_start; bool use_symlinks; // items send in scheduler replies, requesting that // various things be sent in the next request // /// send the list of permanent files associated with the project /// in the next scheduler reply bool send_file_list; /// if nonzero, send time stats log from that point on int send_time_stats_log; /// if nonzero, send this project's job log from that point on int send_job_log; bool suspended_via_gui; /// Return work, but don't request more /// Used for a clean exit to a project, /// or if a user wants to pause doing work for the project bool dont_request_more_work; bool attached_via_acct_mgr; /// when no results for this project, detach it. bool detach_when_done; /// project has ended; advise user to detach bool ended; char code_sign_key[MAX_KEY_LEN]; std::vector user_files; /// files not specific to apps or work - e.g. icons std::vector project_files; int parse_preferences_for_user_files(); int parse_project_files(MIOFILE&, bool delete_existing_symlinks); void write_project_files(MIOFILE&); void link_project_files(bool recreate_symlink_files); int write_symlink_for_project_file(FILE_INFO*); /// when last project file download finished double project_files_downloaded_time; /// called when a project file download finishes. /// If it's the last one, set project_files_downloaded_time to now void update_project_files_downloaded_time(); /// Multiply by this when estimating the CPU time of a result /// (based on FLOPs estimated and benchmarks). /// This is dynamically updated in a way that maintains an upper bound. /// it goes down slowly but if a new estimate X is larger, /// the factor is set to X. double duration_correction_factor; void update_duration_correction_factor(ACTIVE_TASK*); // fields used by CPU scheduler and work fetch // everything from here on applies only to CPU intensive projects /// not suspended and not deferred and not no more work bool can_request_work(); /// has a runnable result using the given resource type bool runnable(int rsc_type); /// has a result in downloading state bool downloading(); /// runnable or contactable or downloading bool potentially_runnable(); /// runnable or downloading bool nearly_runnable(); /// the project has used too much CPU time recently bool overworked(); /// a download is backed off bool some_download_stalled(); bool some_result_suspended(); RR_SIM_PROJECT_STATUS rr_sim_status; // temps used in CLIENT_STATE::rr_simulation(); /// the next result to run for this project struct RESULT *next_runnable_result; /// number of results in UPLOADING state /// Don't start new results if these exceeds 2*ncpus. int nuploading_results; bool too_many_uploading_results; // stuff related to work fetch // RSC_PROJECT_WORK_FETCH cpu_pwf; RSC_PROJECT_WORK_FETCH cuda_pwf; RSC_PROJECT_WORK_FETCH ati_pwf; PROJECT_WORK_FETCH pwf; inline void reset() { cpu_pwf.reset(); cuda_pwf.reset(); ati_pwf.reset(); } inline int deadlines_missed(int rsc_type) { switch(rsc_type) { case RSC_TYPE_CUDA: return cuda_pwf.deadlines_missed; case RSC_TYPE_ATI: return ati_pwf.deadlines_missed; } return cpu_pwf.deadlines_missed; } /// # of results being returned in current scheduler op int nresults_returned; /// get scheduler URL with random offset r const char* get_scheduler_url(int index, double r); /// temporary used when scanning projects bool checked; FILE_XFER_BACKOFF download_backoff; FILE_XFER_BACKOFF upload_backoff; inline FILE_XFER_BACKOFF& file_xfer_backoff(bool is_upload) { return is_upload?upload_backoff:download_backoff; } PROJECT(); ~PROJECT(){} void init(); void copy_state_fields(PROJECT&); char *get_project_name(); int write_account_file(); int parse_account(FILE*); int parse_account_file_venue(); int parse_account_file(); int parse_state(MIOFILE&); int write_state(MIOFILE&, bool gui_rpc=false); // statistic of the last x days std::vector statistics; int parse_statistics(MIOFILE&); int parse_statistics(FILE*); int write_statistics(MIOFILE&, bool gui_rpc=false); int write_statistics_file(); // feed-related std::vector proj_feeds; }; struct APP { char name[256]; char user_friendly_name[256]; PROJECT* project; int parse(MIOFILE&); int write(MIOFILE&); }; struct APP_VERSION { char app_name[256]; int version_num; char platform[256]; char plan_class[64]; char api_version[16]; double avg_ncpus; double max_ncpus; double ncudas; double natis; double gpu_ram; double flops; /// additional cmdline args char cmdline[256]; APP* app; PROJECT* project; std::vector app_files; int ref_cnt; char graphics_exec_path[512]; double max_working_set_size; // max working set of tasks using this app version. // temp var used in schedule_cpus() //double temp_dcf; APP_VERSION(){} ~APP_VERSION(){} int parse(MIOFILE&); int write(MIOFILE&, bool write_file_info = true); bool had_download_failure(int& failnum); void get_file_errors(std::string&); void clear_errors(); int api_major_version(); bool missing_coproc(); inline bool uses_coproc(int rsc_type) { switch (rsc_type) { case RSC_TYPE_CUDA: return (ncudas>0); case RSC_TYPE_ATI: return (natis>0); } return false; } inline int rsc_type() { if (ncudas>0) return RSC_TYPE_CUDA; if (natis>0) return RSC_TYPE_ATI; return RSC_TYPE_CPU; } }; struct WORKUNIT { char name[256]; char app_name[256]; /// Deprecated, but need to keep around to let people revert /// to versions before multi-platform support int version_num; std::string command_line; std::vector input_files; PROJECT* project; APP* app; int ref_cnt; double rsc_fpops_est; double rsc_fpops_bound; double rsc_memory_bound; double rsc_disk_bound; WORKUNIT(){} ~WORKUNIT(){} int parse(MIOFILE&); int write(MIOFILE&); bool had_download_failure(int& failnum); void get_file_errors(std::string&); void clear_errors(); }; struct RESULT { char name[256]; char wu_name[256]; double received_time; // when we got this from server double report_deadline; int version_num; // identifies the app used char plan_class[64]; char platform[256]; APP_VERSION* avp; std::vector output_files; /// we're ready to report this result to the server; /// either computation is done and all the files have been uploaded /// or there was an error bool ready_to_report; /// time when ready_to_report was set double completed_time; /// we've received the ack for this result from the server bool got_server_ack; double final_cpu_time; double final_elapsed_time; // the following are nonzero if reported by app double fpops_per_cpu_sec; double fpops_cumulative; double intops_per_cpu_sec; double intops_cumulative; /// state of this result: see lib/result_state.h int _state; inline int state() { return _state; } void set_state(int, const char*); /// return value from the application int exit_status; /// the concatenation of: /// /// - if report_result_error() is called for this result: /// x /// x /// x /// - if called in FILES_DOWNLOADED state: /// x /// - if called in NEW state: /// x for each failed download /// - if called in COMPUTE_DONE state: /// x for each failed upload /// /// - X, where X is the app's stderr output std::string stderr_out; bool suspended_via_gui; bool coproc_missing; // a coproc needed by this job is missing // (e.g. because user removed their GPU board). APP* app; /// this may be NULL after result is finished WORKUNIT* wup; PROJECT* project; RESULT(){} ~RESULT(){} void clear(); int parse_server(MIOFILE&); int parse_state(MIOFILE&); int parse_name(FILE*, const char* end_tag); int write(MIOFILE&, bool to_server); int write_gui(MIOFILE&); bool is_upload_done(); // files uploaded? void clear_uploaded_flags(); FILE_REF* lookup_file(FILE_INFO*); FILE_INFO* lookup_file_logical(const char*); /// abort the result if it hasn't started computing yet /// Called only for results with no active task /// (otherwise you need to abort the active task) void abort_inactive(int); void append_log_record(); // stuff related to CPU scheduling double estimated_duration(bool for_work_fetch); double estimated_duration_uncorrected(); double estimated_time_remaining(bool for_work_fetch); inline double estimated_flops_remaining() { return estimated_time_remaining(false)*avp->flops; } inline bool computing_done() { if (state() >= RESULT_COMPUTE_ERROR) return true; if (ready_to_report) return true; return false; } bool not_started(); /// downloaded, not finished, not suspended, project not suspended bool runnable(); /// downloading or downloaded, /// not finished, suspended, project not suspended bool nearly_runnable(); /// downloading, not downloaded, not suspended, project not suspended bool downloading(); /// some input or app file is downloading, and backed off /// i.e. it may be a long time before we can run this result bool some_download_stalled(); inline bool uses_cuda() { return (avp->ncudas > 0); } inline bool uses_ati() { return (avp->natis > 0); } inline bool uses_coprocs() { if (avp->ncudas > 0) return true; if (avp->natis > 0) return true; return false; } // temporaries used in CLIENT_STATE::rr_simulation(): double rrsim_flops_left; double rrsim_finish_delay; double rrsim_flops; /// used to keep cpu scheduler from scheduling a result twice /// transient; used only within schedule_cpus() bool already_selected; /// report deadline - prefs.work_buf_min - time slice double computation_deadline(); bool rr_sim_misses_deadline; // temporaries used in enforce_schedule(): bool unfinished_time_slice; int seqno; /// temporary used to tell GUI that this result is deadline-scheduled bool edf_scheduled; int coproc_indices[MAX_COPROCS_PER_JOB]; // keep track of coprocessor reservations char resources[256]; // textual description of resources used bool insufficient_video_ram(); double schedule_backoff; // don't try to schedule until this time // (wait for free video RAM) }; /// represents an always/auto/never value, possibly temporarily overridden class MODE { private: int perm_mode; int temp_mode; double temp_timeout; public: MODE(); void set(int mode, double duration); int get_perm(); int get_current(); double delay(); }; /// a platform supported by the client. class PLATFORM { public: std::string name; }; #endif