boinc/client/client_types.h

// This file is part of BOINC.
// http://boinc.berkeley.edu
// Copyright (C) 2008 University of California
//
// BOINC is free software; you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation,
// either version 3 of the License, or (at your option) any later version.
//
// BOINC is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with BOINC.  If not, see <http://www.gnu.org/licenses/>.

// If you change anything, make sure you also change:
// client_types.C         (to write and parse it)
// client_state.C  (to cross-link objects)
//

#ifndef _CLIENT_TYPES_
#define _CLIENT_TYPES_

#include "cpp.h"

#if !defined(_WIN32) || defined(__CYGWIN32__)
#include <cstdio>
#include <sys/time.h>
#endif

#include "md5_file.h"
#include "cert_sig.h"
#include "hostinfo.h"
#include "coproc.h"
#include "miofile.h"
#include "common_defs.h"

#include "rr_sim.h"
#include "work_fetch.h"
#include "cs_notice.h"

#define MAX_FILE_INFO_LEN   4096
#define MAX_SIGNATURE_LEN   4096
#define MAX_KEY_LEN         4096

#define MAX_COPROCS_PER_JOB 8

// If the status is neither of these two,
// it will be an error code defined in error_numbers.h,
// indicating an unrecoverable error in the upload or download of the file,
// or that the file was too big and was deleted
//
#define FILE_NOT_PRESENT    0
#define FILE_PRESENT        1

struct FILE_INFO {
    char name[256];
    char md5_cksum[33];
    double max_nbytes;
    double nbytes;
    double upload_offset;
    bool generated_locally; // file is produced by app
    int status;
    bool executable;        // change file protections to make executable
    bool uploaded;          // file has been uploaded
    bool upload_when_present;
    bool sticky;            // don't delete unless instructed to do so
    bool report_on_rpc;     // include this in each scheduler request
        /// if not in use, delete even if sticky is true
        /// don't report to server even if report_on_rpc is true
    bool marked_for_delete;     // server requested delete;
    bool signature_required;    // true iff associated with app version
    bool is_user_file;
    bool is_project_file;
	bool is_auto_update_file;
        /// for output files: gzip file when done, and append .gz to its name
    bool gzip_when_done;
        /// nonzero if in the process of being up/downloaded
    class PERS_FILE_XFER* pers_file_xfer;
        /// for upload files (to authenticate)
    RESULT* result;
    PROJECT* project;
    int ref_cnt;
    std::vector<std::string> urls;
    int start_url;
    int current_url;
        /// if the file_info is signed (for uploadable files)
        /// this is the text that is signed
        /// Otherwise it is the FILE_INFO's XML descriptor
        /// (without enclosing <file_info> tags)
    char signed_xml[MAX_FILE_INFO_LEN];
        /// ... and this is the signature
    char xml_signature[MAX_SIGNATURE_LEN];
        /// if the file itself is signed (for executable files)
        /// this is the signature
    char file_signature[MAX_SIGNATURE_LEN];
        /// if permanent error occurs during file xfer, it's recorded here
    std::string error_msg;
    CERT_SIGS* cert_sigs;

    FILE_INFO();
    ~FILE_INFO();
    void reset();
    int set_permissions();
    int parse(MIOFILE&, bool from_server);
    int write(MIOFILE&, bool to_server);
    int write_gui(MIOFILE&);
        /// attempt to delete the underlying file
    int delete_file();
    const char* get_init_url();
    const char* get_next_url();
    const char* get_current_url();
    bool had_failure(int& failnum);
    void failure_message(std::string&);
    int merge_info(FILE_INFO&);
    int verify_file(bool, bool);
    bool verify_file_certs();
        /// gzip file and add .gz to name
    int gzip();
};

/// Describes a connection between a file and a workunit, result, or application

/// In the first two cases,
/// the app will either use open() or fopen() to access the file
/// (in which case "open_name" is the name it will use)
/// or the app will be connected by the given fd (in which case fd is nonzero)

struct FILE_REF {
        /// physical name
    char file_name[256];
        /// logical name
    char open_name[256];
    bool main_program;
    FILE_INFO* file_info;
        /// if true, core client will copy the file instead of linking
    bool copy_file;
		/// for output files: app may not generate file;
		/// don't treat as error if file is missing.
	bool optional;
    int parse(MIOFILE&);
    int write(MIOFILE&);
};

// file xfer backoff state for a project and direction (up/down)
// if file_xfer_failures exceeds FILE_XFER_FAILURE_LIMIT,
// we switch from a per-file to a project-wide backoff policy
// (separately for the up/down directions)
// NOTE: this refers to transient failures, not permanent.
//
#define FILE_XFER_FAILURE_LIMIT 3
struct FILE_XFER_BACKOFF {
    int file_xfer_failures;
        // count of consecutive failures
    double next_xfer_time;
        // when to start trying again
    bool ok_to_transfer();
    void file_xfer_failed(PROJECT*);
    void file_xfer_succeeded();

    FILE_XFER_BACKOFF() {
        file_xfer_failures = 0;
        next_xfer_time = 0;
    }
};

/// statistics at a specific day

struct DAILY_STATS {
    double user_total_credit;
    double user_expavg_credit;
    double host_total_credit;
    double host_expavg_credit;
    double day;

    void clear();
    DAILY_STATS() {clear();}
    int parse(FILE*);
};
bool operator < (const DAILY_STATS&, const DAILY_STATS&);

struct PROJECT {
    // the following items come from the account file
    // They are a function only of the user and the project
    //
        /// url of site that contains scheduler tags for this project
    char master_url[256];
        /// user's authenticator on this project
    char authenticator[256];
        /// without the enclosing <project_preferences> tags.
        /// May include <venue> elements
        /// This field is used only briefly: between handling a
        /// scheduler RPC reply and writing the account file
    std::string project_prefs;
        /// without enclosing <project_specific> tags
        /// Does not include <venue> elements
    std::string project_specific_prefs;
        /// GUI URLs, with enclosing <gui_urls> tags
    std::string gui_urls;
        /// project's resource share relative to other projects.
    double resource_share;
        // the following are the user's project prefs
    bool no_cpu_pref;
    bool no_cuda_pref;
    bool no_ati_pref;
        // the following are from the project itself
    bool no_cpu_apps;
    bool no_cuda_apps;
    bool no_ati_apps;
        // the following set dynamically
    bool cuda_low_mem;
    bool ati_low_mem;

        /// logically, this belongs in the client state file
        /// rather than the account file.
        /// But we need it in the latter in order to parse prefs.
    char host_venue[256];
    bool using_venue_specific_prefs;

    // the following items come from client_state.xml
    // They may depend on the host as well as user and project
    // NOTE: if you add anything, add it to copy_state_fields() also!!!
    //
        /// where to find scheduling servers
    std::vector<std::string> scheduler_urls;
        /// descriptive.  not unique
    char project_name[256];
        /// URL of symbol server (Windows)
    char symstore[256];
    char user_name[256];
    char team_name[256];
    char email_hash[MD5_LEN];
    char cross_project_id[MD5_LEN];
    double cpid_time;
    double user_total_credit;
    double user_expavg_credit;
    double user_create_time;
    int hostid;
    double host_total_credit;
    double host_expavg_credit;
    double host_create_time;
    double ams_resource_share;
        // resource share according to AMS; overrides project
        // -1 means not specified by AMS

    // stuff related to scheduler RPCs and master fetch
    //
    int rpc_seqno;
        /// # of consecutive times we've failed to contact all scheduling servers
    int nrpc_failures;
    int master_fetch_failures;
        /// earliest time to contact any server of this project (or zero)
    double min_rpc_time;
    void set_min_rpc_time(double future_time, const char* reason);
        /// if nonzero, specifies a time when another scheduler RPC
        /// should be done (as requested by server).
        /// An RPC could be done sooner than this.
    double next_rpc_time;
        /// returns true if min_rpc_time > now
    bool waiting_until_min_rpc_time();
        /// need to fetch and parse the master URL
    bool master_url_fetch_pending;
        /// we need to do a scheduler RPC, for various possible reasons:
        /// user request, propagate host CPID, time-based, etc.
		/// Reasons are enumerated in scheduler_op.h
    int sched_rpc_pending;
        /// we need to call request_work_fetch() when a project
        /// transitions from being backed off to not.
        /// This (slightly misnamed) keeps track of whether this
        /// may still need to be done for given project
	bool possibly_backed_off;
        /// have trickle up to send
    bool trickle_up_pending;
        /// when last RPC finished
    double last_rpc_time;

    // Other stuff

        /// app_versions.xml file found in project dir;
        /// use those apps rather then getting from server
    bool anonymous_platform;
    bool non_cpu_intensive;
    bool verify_files_on_app_start;
    bool use_symlinks;

    // items send in scheduler replies, requesting that
    // various things be sent in the next request
    //
        /// send the list of permanent files associated with the project
        /// in the next scheduler reply
    bool send_file_list;
        /// if nonzero, send time stats log from that point on
    int send_time_stats_log;
        /// if nonzero, send this project's job log from that point on
    int send_job_log;

    bool suspended_via_gui;
        /// Return work, but don't request more
        /// Used for a clean exit to a project,
        /// or if a user wants to pause doing work for the project
    bool dont_request_more_work;
    bool attached_via_acct_mgr;
        /// when no results for this project, detach it.
    bool detach_when_done;
        /// project has ended; advise user to detach
    bool ended;
    char code_sign_key[MAX_KEY_LEN];
    std::vector<FILE_REF> user_files;
        /// files not specific to apps or work - e.g. icons
    std::vector<FILE_REF> project_files;
    int parse_preferences_for_user_files();
    int parse_project_files(MIOFILE&, bool delete_existing_symlinks);
    void write_project_files(MIOFILE&);
    void link_project_files(bool recreate_symlink_files);
    int write_symlink_for_project_file(FILE_INFO*);
        /// when last project file download finished
    double project_files_downloaded_time;
        /// called when a project file download finishes.
        /// If it's the last one, set project_files_downloaded_time to now
    void update_project_files_downloaded_time();

        /// Multiply by this when estimating the CPU time of a result
        /// (based on FLOPs estimated and benchmarks).
        /// This is dynamically updated in a way that maintains an upper bound.
        /// it goes down slowly but if a new estimate X is larger,
        /// the factor is set to X.
    double duration_correction_factor;
    void update_duration_correction_factor(ACTIVE_TASK*);

    // fields used by CPU scheduler and work fetch
    // everything from here on applies only to CPU intensive projects

        /// not suspended and not deferred and not no more work
    bool can_request_work();
        /// has a runnable result using the given resource type
    bool runnable(int rsc_type);
        /// has a result in downloading state
    bool downloading();
        /// runnable or contactable or downloading
    bool potentially_runnable();
        /// runnable or downloading
    bool nearly_runnable();
        /// the project has used too much CPU time recently
    bool overworked();
        /// a download is backed off
    bool some_download_stalled();
    bool some_result_suspended();

    RR_SIM_PROJECT_STATUS rr_sim_status;
        // temps used in CLIENT_STATE::rr_simulation();

        /// the next result to run for this project
    struct RESULT *next_runnable_result;
        /// number of results in UPLOADING state
        /// Don't start new results if these exceeds 2*ncpus.
    int nuploading_results;
    bool too_many_uploading_results;

    // stuff related to work fetch
    //
    RSC_PROJECT_WORK_FETCH cpu_pwf;
    RSC_PROJECT_WORK_FETCH cuda_pwf;
    RSC_PROJECT_WORK_FETCH ati_pwf;
    PROJECT_WORK_FETCH pwf;
    inline void reset() {
        cpu_pwf.reset();
        cuda_pwf.reset();
        ati_pwf.reset();
    }
    inline int deadlines_missed(int rsc_type) {
        switch(rsc_type) {
        case RSC_TYPE_CUDA: return cuda_pwf.deadlines_missed;
        case RSC_TYPE_ATI: return ati_pwf.deadlines_missed;
        }
        return cpu_pwf.deadlines_missed;
    }

        /// # of results being returned in current scheduler op
    int nresults_returned;
        /// get scheduler URL with random offset r
    const char* get_scheduler_url(int index, double r);
        /// temporary used when scanning projects
    bool checked;

    FILE_XFER_BACKOFF download_backoff;
    FILE_XFER_BACKOFF upload_backoff;
    inline FILE_XFER_BACKOFF& file_xfer_backoff(bool is_upload) {
        return is_upload?upload_backoff:download_backoff;
    }

    PROJECT();
    ~PROJECT(){}
    void init();
    void copy_state_fields(PROJECT&);
    char *get_project_name();
    int write_account_file();
    int parse_account(FILE*);
    int parse_account_file_venue();
    int parse_account_file();
    int parse_state(MIOFILE&);
    int write_state(MIOFILE&, bool gui_rpc=false);

    // statistic of the last x days
    std::vector<DAILY_STATS> statistics;
    int parse_statistics(MIOFILE&);
    int parse_statistics(FILE*);
    int write_statistics(MIOFILE&, bool gui_rpc=false);
    int write_statistics_file();

    // feed-related
    std::vector<RSS_FEED> proj_feeds;
};

struct APP {
    char name[256];
    char user_friendly_name[256];
    PROJECT* project;

    int parse(MIOFILE&);
    int write(MIOFILE&);
};

struct APP_VERSION {
    char app_name[256];
    int version_num;
    char platform[256];
    char plan_class[64];
    char api_version[16];
    double avg_ncpus;
    double max_ncpus;
    double ncudas;
    double natis;
    double gpu_ram;
    double flops;
        /// additional cmdline args
    char cmdline[256];

    APP* app;
    PROJECT* project;
    std::vector<FILE_REF> app_files;
    int ref_cnt;
    char graphics_exec_path[512];
    double max_working_set_size;
        // max working set of tasks using this app version.
        // temp var used in schedule_cpus()
    //double temp_dcf;

    APP_VERSION(){}
    ~APP_VERSION(){}
    int parse(MIOFILE&);
    int write(MIOFILE&, bool write_file_info = true);
    bool had_download_failure(int& failnum);
    void get_file_errors(std::string&);
    void clear_errors();
    int api_major_version();
    bool missing_coproc();
    inline bool uses_coproc(int rsc_type) {
        switch (rsc_type) {
        case RSC_TYPE_CUDA: return (ncudas>0);
        case RSC_TYPE_ATI: return (natis>0);
        }
        return false;
    }
    inline int rsc_type() {
        if (ncudas>0) return RSC_TYPE_CUDA;
        if (natis>0) return RSC_TYPE_ATI;
        return RSC_TYPE_CPU;
    }
};

struct WORKUNIT {
    char name[256];
    char app_name[256];
        /// Deprecated, but need to keep around to let people revert
        /// to versions before multi-platform support
    int version_num;
    std::string command_line;
    std::vector<FILE_REF> input_files;
    PROJECT* project;
    APP* app;
    int ref_cnt;
    double rsc_fpops_est;
    double rsc_fpops_bound;
    double rsc_memory_bound;
    double rsc_disk_bound;

    WORKUNIT(){}
    ~WORKUNIT(){}
    int parse(MIOFILE&);
    int write(MIOFILE&);
    bool had_download_failure(int& failnum);
    void get_file_errors(std::string&);
    void clear_errors();
};

struct RESULT {
    char name[256];
    char wu_name[256];
    double received_time;   // when we got this from server
    double report_deadline;
    int version_num;        // identifies the app used
    char plan_class[64];
    char platform[256];
    APP_VERSION* avp;
    std::vector<FILE_REF> output_files;
        /// we're ready to report this result to the server;
        /// either computation is done and all the files have been uploaded
        /// or there was an error
    bool ready_to_report;
        /// time when ready_to_report was set
    double completed_time;
        /// we've received the ack for this result from the server
    bool got_server_ack;
    double final_cpu_time;
    double final_elapsed_time;

    // the following are nonzero if reported by app
    double fpops_per_cpu_sec;
    double fpops_cumulative;
    double intops_per_cpu_sec;
    double intops_cumulative;

        /// state of this result: see lib/result_state.h
    int _state;
    inline int state() { return _state; }
    void set_state(int, const char*);
        /// return value from the application
    int exit_status;
        /// the concatenation of:
        ///
        /// - if report_result_error() is called for this result:
        ///   <message>x</message>
        ///   <exit_status>x</exit_status>
        ///   <signal>x</signal>
        ///   - if called in FILES_DOWNLOADED state:
        ///     <couldnt_start>x</couldnt_start>
        ///   - if called in NEW state:
        ///     <download_error>x</download_error> for each failed download
        ///   - if called in COMPUTE_DONE state:
        ///     <upload_error>x</upload_error> for each failed upload
        ///
        /// - <stderr_txt>X</stderr_txt>, where X is the app's stderr output
    std::string stderr_out;
    bool suspended_via_gui;
    bool coproc_missing;
        // a coproc needed by this job is missing
        // (e.g. because user removed their GPU board).

    APP* app;
        /// this may be NULL after result is finished
    WORKUNIT* wup;
    PROJECT* project;

    RESULT(){}
    ~RESULT(){}
    void clear();
    int parse_server(MIOFILE&);
    int parse_state(MIOFILE&);
    int parse_name(FILE*, const char* end_tag);
    int write(MIOFILE&, bool to_server);
    int write_gui(MIOFILE&);
    bool is_upload_done();    // files uploaded?
    void clear_uploaded_flags();
    FILE_REF* lookup_file(FILE_INFO*);
    FILE_INFO* lookup_file_logical(const char*);
        /// abort the result if it hasn't started computing yet

        /// Called only for results with no active task
        /// (otherwise you need to abort the active task)
    void abort_inactive(int);
    void append_log_record();

    // stuff related to CPU scheduling

    double estimated_duration(bool for_work_fetch);
    double estimated_duration_uncorrected();
    double estimated_time_remaining(bool for_work_fetch);
    inline double estimated_flops_remaining() {
        return estimated_time_remaining(false)*avp->flops;
    }

    inline bool computing_done() {
        if (state() >= RESULT_COMPUTE_ERROR) return true;
        if (ready_to_report) return true;
        return false;
    }
    bool not_started();
        /// downloaded, not finished, not suspended, project not suspended
    bool runnable();
        /// downloading or downloaded,
        /// not finished, suspended, project not suspended
    bool nearly_runnable();
        /// downloading, not downloaded, not suspended, project not suspended
    bool downloading();
        /// some input or app file is downloading, and backed off
        /// i.e. it may be a long time before we can run this result
    bool some_download_stalled();
    inline bool uses_cuda() {
        return (avp->ncudas > 0);
    }
    inline bool uses_ati() {
        return (avp->natis > 0);
    }
    inline bool uses_coprocs() {
        if (avp->ncudas > 0) return true;
        if (avp->natis > 0) return true;
        return false;
    }

    // temporaries used in CLIENT_STATE::rr_simulation():
    double rrsim_flops_left;
    double rrsim_finish_delay;
    double rrsim_flops;

        /// used to keep cpu scheduler from scheduling a result twice
        /// transient; used only within schedule_cpus()
    bool already_selected;
        /// report deadline - prefs.work_buf_min - time slice
    double computation_deadline();
    bool rr_sim_misses_deadline;

    // temporaries used in enforce_schedule():
    bool unfinished_time_slice;
    int seqno;

        /// temporary used to tell GUI that this result is deadline-scheduled
    bool edf_scheduled;

    int coproc_indices[MAX_COPROCS_PER_JOB];
        // keep track of coprocessor reservations
    char resources[256];
        // textual description of resources used
    bool insufficient_video_ram();
    double schedule_backoff;
        // don't try to schedule until this time
        // (wait for free video RAM)
};

/// represents an always/auto/never value, possibly temporarily overridden

class MODE {
private:
    int perm_mode;
    int temp_mode;
    double temp_timeout;
public:
    MODE();
    void set(int mode, double duration);
    int get_perm();
    int get_current();
	double delay();
};

/// a platform supported by the client.

class PLATFORM {
public:
    std::string name;
};

#endif