// This file is part of BOINC. // http://boinc.berkeley.edu // Copyright (C) 2020 University of California // // BOINC is free software; you can redistribute it and/or modify it // under the terms of the GNU Lesser General Public License // as published by the Free Software Foundation, // either version 3 of the License, or (at your option) any later version. // // BOINC is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // See the GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License // along with BOINC. If not, see . #ifndef BOINC_CLIENT_STATE_H #define BOINC_CLIENT_STATE_H #define NEW_CPU_THROTTLE // do CPU throttling using a separate thread. // This makes it possible to throttle faster than the client's 1-sec poll period // NOTE: we can't actually do this because the runtime system's // poll period is currently 1 sec. #ifndef _WIN32 #include #include #include #endif using std::string; using std::vector; #include "coproc.h" #include "util.h" #ifdef NEW_CPU_THROTTLE #include "thread.h" #endif #include "acct_mgr.h" #include "acct_setup.h" #include "app.h" #include "client_types.h" #include "current_version.h" #include "file_xfer.h" #include "file_names.h" #include "gui_rpc_server.h" #include "gui_http.h" #include "project_init.h" #include "hostinfo.h" #include "miofile.h" #include "net_stats.h" #include "pers_file_xfer.h" #include "prefs.h" #include "project_list.h" #include "scheduler_op.h" #include "time_stats.h" #ifdef SIM #include "../sched/edf_sim.h" #endif #define WF_EST_FETCH_TIME 180 // Figure that fetching work (possibly requesting from several projects) // could take as long as this. // So start work fetch this long before an instance becomes idle, // in order to avoid idleness. // encapsulates the global variables of the core client. // If you add anything here, initialize it in the constructor // struct CLIENT_STATE { vector platforms; vector projects; // in alphabetical order, to improve display vector apps; vector file_infos; vector app_versions; vector workunits; vector results; // list of jobs, ordered by increasing arrival time PERS_FILE_XFER_SET* pers_file_xfers; HTTP_OP_SET* http_ops; FILE_XFER_SET* file_xfers; #ifndef SIM GUI_RPC_CONN_SET gui_rpcs; #endif GUI_HTTP gui_http; #ifdef ENABLE_AUTO_UPDATE AUTO_UPDATE auto_update; #endif LOOKUP_WEBSITE_OP lookup_website_op; GET_CURRENT_VERSION_OP get_current_version_op; GET_PROJECT_LIST_OP get_project_list_op; ACCT_MGR_OP acct_mgr_op; LOOKUP_LOGIN_TOKEN_OP lookup_login_token_op; CLIENT_TIME_STATS time_stats; GLOBAL_PREFS global_prefs; NET_STATS net_stats; ACTIVE_TASK_SET active_tasks; HOST_INFO host_info; // the following used only on Android DEVICE_STATUS device_status; double device_status_time; char language[16]; // ISO language code reported by GUI char client_brand[256]; // contents of client_brand.txt, e.g. "HTC Power to Give" // reported to scheduler VERSION_INFO core_client_version; string statefile_platform_name; int file_xfer_giveup_period; RUN_MODE cpu_run_mode; RUN_MODE gpu_run_mode; RUN_MODE network_run_mode; bool started_by_screensaver; bool check_all_logins; bool user_active; // there has been recent mouse/kbd input int cmdline_gui_rpc_port; bool show_projects; bool requested_exit; // we should exit now. Set when // - got a "quit" GUI RPC // - (Unix) got a HUP, INT, QUIT, TERM, or PWR signal // - (Win) got CTRL_LOGOFF, CTRL_C, CTRL_BREAK, etc. event // - (Mac) client was started from screensaver, // which has since exited bool os_requested_suspend; // we should suspend for OS reasonts (used on Win only). // Set when // - got BATTERY_LOW, SUSPEND, SERVICE_CONTROL_PAUSE double os_requested_suspend_time; bool cleanup_completed; bool in_abort_sequence; // Determine when it is safe to leave the quit_client() handler // and to finish cleaning up. char detach_project_url[256]; // stores URL for --detach_project option char reset_project_url[256]; // stores URL for --reset_project option char update_prefs_url[256]; // stores URL for --update_prefs option char main_host_venue[256]; // venue from project or AMS that gave us general prefs char attach_project_url[256]; char attach_project_auth[256]; bool exit_before_upload; // exit when about to upload a file bool run_test_app; // API test mode #ifndef _WIN32 gid_t boinc_project_gid; #endif #ifdef _WIN32 // vars so that the sysmon thread can write messages // bool have_sysmon_msg; char sysmon_msg[256]; #endif // backoff-related variables // int master_fetch_period; // fetch project's master URL (and stop doing scheduler RPCs) // if get this many successive RPC failures (default 10) int retry_cap; // cap project->nrpc_failures at this number int master_fetch_retry_cap; // after this many master-fetch failures, // move into a state in which we retry master fetch // at the frequency below int master_fetch_interval; // see above int sched_retry_delay_min; int sched_retry_delay_max; int pers_retry_delay_min; int pers_retry_delay_max; int pers_giveup; bool tasks_suspended; // Computing suspended for reason other than throttling int suspend_reason; bool tasks_throttled; // Computing suspended because of throttling bool network_suspended; // Don't use network. bool file_xfers_suspended; // Don't do file xfers (but allow other network activity). int network_suspend_reason; bool executing_as_daemon; // true if --daemon is on the commandline // this means we are running as a daemon on unix, // or as a service on Windows bool redirect_io; // redirect stdout, stderr to log files bool disable_graphics; // a condition has occurred in which we know graphics will // not be displayable, so GUIs shouldn't offer graphics. bool detach_console; bool launched_by_manager; bool run_by_updater; double now; bool clock_change; // system clock was recently decreased double last_wakeup_time; bool initialized; bool cant_write_state_file; // failed to write state file. // In this case we continue to run for 1 minute, // handling GUI RPCs but doing nothing else, // so that the Manager can tell the user what the problem is bool client_state_dirty; int old_major_version; int old_minor_version; int old_release; bool run_cpu_benchmarks; // if set, run benchmarks when possible int exit_after_app_start_secs; // if nonzero, exit this many seconds after starting an app double app_started; // when the most recent app was started bool cmdline_dir; // data dir was specified on cmdline // --------------- acct_mgr.cpp: ACCT_MGR_INFO acct_mgr_info; // --------------- acct_setup.cpp: PROJECT_INIT project_init; PROJECT_ATTACH project_attach; void new_version_check(bool force = false); void all_projects_list_check(); double new_version_check_time; double all_projects_list_check_time; // the time we last successfully fetched the project list bool autologin_in_progress; bool autologin_fetching_project_list; PROJECT_LIST project_list; void process_autologin(bool first); // --------------- current_version.cpp: string newer_version; string client_version_check_url; // --------------- client_state.cpp: CLIENT_STATE(); void show_host_info(); bool is_new_client(); int init(); bool poll_slow_events(); // Never blocks. // Returns true if it actually did something, // in which case it should be called again immediately. void do_io_or_sleep(double dt); bool time_to_exit(); PROJECT* lookup_project(const char*); APP* lookup_app(PROJECT*, const char*); FILE_INFO* lookup_file_info(PROJECT*, const char* name); RESULT* lookup_result(PROJECT*, const char*); WORKUNIT* lookup_workunit(PROJECT*, const char*); APP_VERSION* lookup_app_version( APP*, char* platform, int ver, char* plan_class ); int detach_project(PROJECT*); int report_result_error(RESULT&, const char* err_msg); int reset_project(PROJECT*, bool detaching); bool no_gui_rpc; bool gui_rpc_unix_domain; // do GUI RPC over Unix-domain sockets rather than TCP void start_abort_sequence(); bool abort_sequence_done(); int quit_activities(); int link_app(PROJECT*, APP*); int link_file_info(PROJECT*, FILE_INFO*); int link_file_ref(PROJECT*, FILE_REF*); int link_app_version(PROJECT*, APP_VERSION*); int link_workunit(PROJECT*, WORKUNIT*); int link_result(PROJECT*, RESULT*); void print_summary(); bool abort_unstarted_late_jobs(); bool garbage_collect(); bool garbage_collect_always(); bool update_results(); int nresults_for_project(PROJECT*); void check_clock_reset(); void clear_absolute_times(); void set_now(); void log_show_projects(); // --------------- cpu_sched.cpp: double total_resource_share(); double potentially_runnable_resource_share(); double nearly_runnable_resource_share(); double fetchable_resource_share(); double rec_interval_start; double total_cpu_time_this_rec_interval; bool must_enforce_cpu_schedule; bool must_schedule_cpus; bool must_check_work_fetch; void assign_results_to_projects(); RESULT* highest_prio_project_best_result(); void reset_rec_accounting(); bool schedule_cpus(); void make_run_list(vector&); bool enforce_run_list(vector&); void append_unfinished_time_slice(vector&); double runnable_resource_share(int); void adjust_rec(); double retry_shmem_time; // if we fail to start a task due to no shared-mem segments, // wait until at least this time to try running // another task that needs a shared-mem seg inline double work_buf_min() { double x = global_prefs.work_buf_min_days * 86400; if (x < WF_EST_FETCH_TIME) x = WF_EST_FETCH_TIME; return x; } inline double work_buf_additional() { return global_prefs.work_buf_additional_days *86400; } inline double work_buf_total() { double x = work_buf_min() + work_buf_additional(); if (x < 1) x = 1; return x; } void request_schedule_cpus(const char*); // Reschedule CPUs ASAP. // Called when: // - core client starts (CS::init()) // - an app exits (ATS::check_app_exited()) // - Tasks are killed (ATS::exit_tasks()) // - a result's input files finish downloading (CS::update_results()) // - an app fails to start (CS::schedule_cpus()) // - any project op is done via RPC (suspend/resume) // - any result op is done via RPC (suspend/resume) void set_ncpus(); // --------------- cs_account.cpp: int add_project( const char* master_url, const char* authenticator, const char* project_name, bool attached_via_acct_mgr ); int parse_account_files(); int parse_account_files_venue(); int parse_preferences_for_user_files(); int parse_statistics_files(); // should be move to a new file, but this will do it for testing // --------------- cs_apps.cpp: double get_fraction_done(RESULT* result); int input_files_available(RESULT*, bool, FILE_INFO** f=0); ACTIVE_TASK* lookup_active_task_by_result(RESULT*); int ncpus; // Act like there are this many CPUs. // By default this is the # of physical CPUs, // but it can be changed in two ways: // - type N in the config file // - type the max_ncpus_pct pref int latest_version(APP*, char*); int app_finished(ACTIVE_TASK&); bool start_apps(); bool handle_finished_apps(); void check_for_finished_jobs(); ACTIVE_TASK* get_task(RESULT*); // --------------- cs_benchmark.cpp: bool benchmarks_running; void check_if_need_benchmarks(); bool can_run_cpu_benchmarks(); void start_cpu_benchmarks(bool force = false); bool cpu_benchmarks_poll(); void abort_cpu_benchmarks(); bool cpu_benchmarks_done(); void cpu_benchmarks_set_defaults(); void print_benchmark_results(); // --------------- cs_cmdline.cpp: void parse_cmdline(int argc, char** argv); void parse_env_vars(); void do_cmdline_actions(); // --------------- cs_files.cpp: void check_file_existence(); RESULT* file_info_to_result(FILE_INFO*); bool start_new_file_xfer(PERS_FILE_XFER&); int make_project_dirs(); bool create_and_delete_pers_file_xfers(); // --------------- cs_platforms.cpp: const char* get_primary_platform(); void add_platform(const char*); void detect_platforms(); void write_platforms(PROJECT*, FILE*); bool is_supported_platform(const char*); // --------------- cs_prefs.cpp: double client_disk_usage; // disk usage not counting projects // computed by get_disk_usages() double total_disk_usage; // client plus projects int get_disk_usages(); void get_disk_shares(); double allowed_disk_usage(double boinc_total); int allowed_project_disk_usage(double&); void show_suspend_tasks_message(int reason); int resume_tasks(int reason=0); void read_global_prefs( const char* fname = GLOBAL_PREFS_FILE_NAME, const char* override_fname = GLOBAL_PREFS_OVERRIDE_FILE ); int save_global_prefs(const char* prefs, char* url, char* sched); double available_ram(); double max_available_ram(); int check_suspend_processing(); void check_suspend_network(); void install_global_prefs(); PROJECT* global_prefs_source_project(); void show_global_prefs_source(bool); // --------------- cs_scheduler.cpp: void request_work_fetch(const char*); // Called when: // - core client starts (CS::init()) // - task is completed or fails // - tasks are killed // - an RPC completes // - project suspend/detch/attach/reset GUI RPC // - result suspend/abort GUI RPC int make_scheduler_request(PROJECT*); int handle_scheduler_reply(PROJECT*, char* scheduler_url); SCHEDULER_OP* scheduler_op; PROJECT* next_project_master_pending(); PROJECT* next_project_sched_rpc_pending(); PROJECT* next_project_trickle_up_pending(); PROJECT* find_project_with_overdue_results(bool network_suspend_soon); bool had_or_requested_work; bool scheduler_rpc_poll(); // --------------- cs_statefile.cpp: void set_client_state_dirty(const char*); int parse_state_file(); int parse_state_file_aux(const char*); int write_state(MIOFILE&); int write_state_file(); int write_state_file_if_needed(); void check_anonymous(); int parse_app_info(PROJECT*, FILE*); int write_state_gui(MIOFILE&); int write_file_transfers_gui(MIOFILE&); int write_tasks_gui(MIOFILE&, bool); void sort_results(); void sort_projects_by_name(); // --------------- cs_trickle.cpp: int read_trickle_files(PROJECT*, FILE*); int remove_trickle_files(PROJECT*); int handle_trickle_down(PROJECT*, FILE*); // --------------- check_state.cpp: // stuff related to data-structure integrity checking // void check_project_pointer(PROJECT*); void check_app_pointer(APP*); void check_file_info_pointer(FILE_INFO*); void check_app_version_pointer(APP_VERSION*); void check_workunit_pointer(WORKUNIT*); void check_result_pointer(RESULT*); void check_pers_file_xfer_pointer(PERS_FILE_XFER*); void check_file_xfer_pointer(FILE_XFER*); void check_app(APP&); void check_file_info(FILE_INFO&); void check_file_ref(FILE_REF&); void check_app_version(APP_VERSION&); void check_workunit(WORKUNIT&); void check_result(RESULT&); void check_active_task(ACTIVE_TASK&); void check_pers_file_xfer(PERS_FILE_XFER&); void check_file_xfer(FILE_XFER&); void check_all(); void free_mem(); // --------------- work_fetch.cpp: int proj_min_results(PROJECT*, double); void check_project_timeout(); double overall_cpu_frac(); double overall_cpu_and_network_frac(); double overall_gpu_frac(); double time_until_work_done(PROJECT*, int, double); bool compute_work_requests(); void scale_duration_correction_factors(double); void generate_new_host_cpid(); void compute_nuploading_results(); #ifdef SIM double share_violation(); double monotony(); void handle_completed_results(PROJECT*); void get_workload(vector&); bool simulate_rpc(PROJECT*); #endif KEYWORDS keywords; }; extern CLIENT_STATE gstate; extern bool gpus_usable; // set to false if GPUs not usable because of remote desktop // or login situation (Windows) // return a random double in the range [MIN,min(e^n,MAX)) extern double calculate_exponential_backoff( int n, double MIN, double MAX ); #ifdef NEW_CPU_THROTTLE extern THREAD_LOCK client_mutex; extern THREAD throttle_thread; #endif //////// TIME-RELATED CONSTANTS //////////// //////// POLLING PERIODS #define POLL_INTERVAL 1.0 // the client will handle I/O (including GUI RPCs) // for up to POLL_INTERVAL seconds before calling poll_slow_events() // to call the polling functions #define GARBAGE_COLLECT_PERIOD 10 // how often to garbage collect #define TASK_POLL_PERIOD 1.0 #define UPDATE_RESULTS_PERIOD 1.0 #define HANDLE_FINISHED_APPS_PERIOD 1.0 #define BENCHMARK_POLL_PERIOD 1.0 #define PERS_FILE_XFER_START_PERIOD 1.0 #define PERS_FILE_XFER_POLL_PERIOD 1.0 #define SCHEDULER_RPC_POLL_PERIOD 5.0 #define FILE_XFER_POLL_PERIOD 1.0 #define GUI_HTTP_POLL_PERIOD 1.0 #define MEMORY_USAGE_PERIOD 10 // computer memory usage and check for exclusive apps this often //////// WORK FETCH #define WORK_FETCH_PERIOD 60 // see if we need to fetch work at least this often #define WF_MIN_BACKOFF_INTERVAL 600 #define WF_MAX_BACKOFF_INTERVAL 86400 // if we ask a project for work for a resource and don't get it, // we do exponential backoff. // This constant is an upper bound for this. // E.g., if we need GPU work, we'll end up asking once a day, // so if the project develops a GPU app, // we'll find out about it within a day. #define WF_UPLOAD_DEFER_INTERVAL 300 // if a project is uploading, // and the last upload started within this interval, // don't fetch work from it. // This allows the work fetch to be merged with the reporting of the // jobs that are currently uploading. #define RESULT_REPORT_IF_AT_LEAST_N 64 // If a project has at least this many ready-to-report tasks, report them. #define WF_MAX_RUNNABLE_JOBS 1000 // don't fetch work from a project if it has this many runnable jobs. // This is a failsafe mechanism to prevent infinite fetching //////// CPU SCHEDULING #define CPU_SCHED_PERIOD 60 // do CPU schedule at least this often #define REC_ADJUST_PERIOD CPU_SCHED_PERIOD // REC is adjusted at least this often, // since adjust_rec() is called from enforce_schedule() #define DEADLINE_CUSHION 0 // try to finish jobs this much in advance of their deadline /////// JOB CONTROL #define ABORT_TIMEOUT 60 // if we send app request, wait this long before killing it. // This gives it time to download symbol files (which can be several MB) // and write stack trace to stderr #define QUIT_TIMEOUT 60 // Same, for . // Should be large enough that apps can finalize // (e.g. write checkpoint file) in that time. // In Nov 2015 we increased it from 15 to 60 // because CERN's VBox apps take a long time to save state. #define MAX_STARTUP_TIME 10 // if app startup takes longer than this, quit loop #define MIN_TIME_BOUND 120. #define DEFAULT_TIME_BOUND (12*3600.) // if ACTIVE_TASK::max_elapsed_time is < MIN, set it to DEFAULT // This is a sanity check, so that bad values for // wup->rsc_fpops_bound or avp->flops won't cause jobs // to get aborted after a few seconds // The values are a bit arbitrary. #define FINISH_FILE_TIMEOUT 300 // if app process exists this long after writing finish file, abort it. // NOTE: this used to be 10 sec and it wasn't enough, // e.g. during heavy paging. //////// NETWORK #define CONNECT_ERROR_PERIOD 600.0 #define ALLOW_NETWORK_IF_RECENT_RPC_PERIOD 300 // if there has been a GUI RPC within this period // that requires network access (e.g. attach to project) // allow it even if setting is "no access" //////// MISC #define EXCLUSIVE_APP_WAIT 5 // if "exclusive app" feature used, // wait this long after app exits before restarting jobs #define DAILY_XFER_HISTORY_PERIOD 60 #define ACCT_MGR_MIN_BACKOFF 600 #define ACCT_MGR_MAX_BACKOFF 86400 // min/max account manager RPC backoff #define ANDROID_KEEPALIVE_TIMEOUT 30 // Android: if don't get a report_device_status() RPC from the GUI // in this interval, exit. // We rely on the GUI to report battery status. #ifndef ANDROID #define USE_NET_PREFS // use preferences obtained over the network // (i.e. through scheduler replies) // Don't do this on Android #endif #define NEED_NETWORK_MSG _("BOINC can't access Internet - check network connection or proxy configuration.") #endif