2008-08-06 18:36:30 +00:00
|
|
|
// This file is part of BOINC.
|
2005-01-20 23:22:22 +00:00
|
|
|
// http://boinc.berkeley.edu
|
2008-08-06 18:36:30 +00:00
|
|
|
// Copyright (C) 2008 University of California
|
2003-07-25 20:26:38 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
|
|
// under the terms of the GNU Lesser General Public License
|
|
|
|
// as published by the Free Software Foundation,
|
|
|
|
// either version 3 of the License, or (at your option) any later version.
|
2003-07-25 20:26:38 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is distributed in the hope that it will be useful,
|
2005-01-20 23:22:22 +00:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
2002-04-30 22:22:54 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2003-05-02 23:29:57 +00:00
|
|
|
#ifndef _BOINC_DB_
|
|
|
|
#define _BOINC_DB_
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2002-09-26 05:57:10 +00:00
|
|
|
// Structures corresponding to database records.
|
|
|
|
// Some of these types have counterparts in client/types.h,
|
|
|
|
// but don't be deceived - client and server have different variants.
|
2002-04-30 22:22:54 +00:00
|
|
|
|
|
|
|
// The parse and write functions are for use in scheduler RPC.
|
|
|
|
// They don't necessarily serialize the entire records.
|
|
|
|
|
2004-07-13 13:54:09 +00:00
|
|
|
#include <cstdio>
|
2004-07-01 20:24:00 +00:00
|
|
|
#include <vector>
|
2010-04-21 20:11:41 +00:00
|
|
|
#include <string.h>
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2003-08-31 01:21:31 +00:00
|
|
|
#include "db_base.h"
|
2009-11-04 21:23:56 +00:00
|
|
|
#include "average.h"
|
2011-08-10 17:11:08 +00:00
|
|
|
#include "parse.h"
|
2003-08-31 01:21:31 +00:00
|
|
|
|
2003-09-05 21:26:21 +00:00
|
|
|
extern DB_CONN boinc_db;
|
|
|
|
|
2003-12-07 18:58:08 +00:00
|
|
|
// Sizes of text buffers in memory, corresponding to database BLOBs.
|
2008-03-31 16:19:45 +00:00
|
|
|
// The following is for regular blobs, 64KB
|
2003-12-07 18:58:08 +00:00
|
|
|
|
2008-03-31 16:19:45 +00:00
|
|
|
#define BLOB_SIZE 65536
|
|
|
|
|
|
|
|
// The following are for "medium blobs",
|
|
|
|
// which are 16MB in the DB
|
|
|
|
//
|
|
|
|
#define APP_VERSION_XML_BLOB_SIZE 262144
|
|
|
|
#define MSG_FROM_HOST_BLOB_SIZE 262144
|
|
|
|
#define MSG_TO_HOST_BLOB_SIZE 262144
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2004-08-06 20:02:00 +00:00
|
|
|
// Dummy name for file xfers
|
|
|
|
#define FILE_MOVER "move_file"
|
|
|
|
|
2008-03-28 18:00:27 +00:00
|
|
|
struct BEST_APP_VERSION;
|
2002-08-25 07:54:33 +00:00
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
// A compilation target, i.e. a architecture/OS combination.
|
2010-03-29 22:28:20 +00:00
|
|
|
// Client will be sent applications only for platforms they support.
|
2002-04-30 22:22:54 +00:00
|
|
|
//
|
|
|
|
struct PLATFORM {
|
|
|
|
int id;
|
2003-08-15 20:35:44 +00:00
|
|
|
int create_time;
|
2003-06-04 17:21:26 +00:00
|
|
|
char name[256]; // i.e. "sparc-sun-solaris"
|
|
|
|
char user_friendly_name[256]; // i.e. "SPARC Solaris 2.8"
|
2003-12-11 19:05:52 +00:00
|
|
|
int deprecated;
|
2003-06-04 17:21:26 +00:00
|
|
|
void clear();
|
2002-04-30 22:22:54 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
// An application.
|
|
|
|
//
|
|
|
|
struct APP {
|
|
|
|
int id;
|
2003-08-15 20:35:44 +00:00
|
|
|
int create_time;
|
2003-06-05 18:16:37 +00:00
|
|
|
char name[256]; // application name, preferably short
|
|
|
|
int min_version; // don't use app versions before this
|
2004-09-27 04:26:51 +00:00
|
|
|
bool deprecated;
|
2003-12-11 19:05:52 +00:00
|
|
|
char user_friendly_name[256];
|
2007-10-05 22:32:47 +00:00
|
|
|
int homogeneous_redundancy;
|
2006-07-11 21:49:20 +00:00
|
|
|
double weight; // tells the feeder what fraction of results
|
|
|
|
// should come from this app
|
2006-10-22 01:46:33 +00:00
|
|
|
bool beta;
|
2008-06-04 23:04:12 +00:00
|
|
|
int target_nresults;
|
2010-03-10 00:33:31 +00:00
|
|
|
double min_avg_pfc;
|
2010-04-16 18:07:08 +00:00
|
|
|
// the weighted average of app_version.pfc.avg
|
|
|
|
// over GPU or CPU versions, whichever is less.
|
|
|
|
// Approximates (actual FLOPS)/wu.rsc_fpops_est
|
2010-03-29 22:28:20 +00:00
|
|
|
bool host_scale_check;
|
|
|
|
// use host scaling cautiously, to thwart cherry picking
|
2011-06-06 03:40:42 +00:00
|
|
|
bool homogeneous_app_version;
|
|
|
|
// do all instances of each job using the same app version
|
2010-03-10 00:33:31 +00:00
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
int write(FILE*);
|
2003-06-04 17:21:26 +00:00
|
|
|
void clear();
|
2002-04-30 22:22:54 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
// A version of an application.
|
|
|
|
//
|
|
|
|
struct APP_VERSION {
|
|
|
|
int id;
|
2003-08-15 20:35:44 +00:00
|
|
|
int create_time;
|
2002-08-26 22:57:17 +00:00
|
|
|
int appid;
|
|
|
|
int version_num;
|
|
|
|
int platformid;
|
2008-03-31 16:19:45 +00:00
|
|
|
char xml_doc[APP_VERSION_XML_BLOB_SIZE];
|
2003-07-01 18:27:08 +00:00
|
|
|
// describes app files. format:
|
|
|
|
// <file_info>...</file_info>
|
|
|
|
// ...
|
2003-06-04 17:21:26 +00:00
|
|
|
// <app_version>
|
|
|
|
// <app_name>...</app_name>
|
|
|
|
// <version_num>x</version_num>
|
2007-09-21 18:10:54 +00:00
|
|
|
// <api_version>n.n.n</api_version>
|
2003-07-01 18:27:08 +00:00
|
|
|
// <file_ref>
|
|
|
|
// ...
|
2003-06-04 17:21:26 +00:00
|
|
|
// [<main_program/>]
|
2003-07-01 18:27:08 +00:00
|
|
|
// [<copy_file/>]
|
|
|
|
// </file_ref>
|
2003-06-04 17:21:26 +00:00
|
|
|
// </app_version>
|
2003-07-01 18:27:08 +00:00
|
|
|
//
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2003-06-04 17:21:26 +00:00
|
|
|
// the following let you handle backwards-incompatible changes to
|
2002-04-30 22:22:54 +00:00
|
|
|
// the core client / app interface
|
|
|
|
//
|
2003-06-05 18:16:37 +00:00
|
|
|
int min_core_version; // min core version this app will run with
|
2002-04-30 22:22:54 +00:00
|
|
|
int max_core_version; // if <>0, max core version this will run with
|
2003-12-24 21:49:35 +00:00
|
|
|
bool deprecated;
|
2008-03-13 22:57:24 +00:00
|
|
|
char plan_class[256];
|
2010-03-29 22:28:20 +00:00
|
|
|
AVERAGE pfc;
|
2010-04-16 18:07:08 +00:00
|
|
|
// the stats of (claimed PFC)/wu.rsc_fpops_est
|
|
|
|
// If wu.rsc_fpops_est is accurate,
|
|
|
|
// this is the reciprocal of efficiency
|
2010-03-29 22:28:20 +00:00
|
|
|
double pfc_scale;
|
2010-04-16 18:07:08 +00:00
|
|
|
// PFC scaling factor for this app (or 0 if not enough data)
|
2011-05-21 06:22:15 +00:00
|
|
|
// The reciprocal of this version's efficiency, averaged over all jobs,
|
|
|
|
// relative to that of the most efficient version
|
2010-03-29 22:28:20 +00:00
|
|
|
double expavg_credit;
|
|
|
|
double expavg_time;
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2008-03-28 18:00:27 +00:00
|
|
|
// the following used by scheduler, not in DB
|
|
|
|
//
|
|
|
|
BEST_APP_VERSION* bavp;
|
2010-03-29 22:28:20 +00:00
|
|
|
|
|
|
|
// used by validator, not in DB
|
|
|
|
//
|
2010-04-02 19:10:37 +00:00
|
|
|
std::vector<double>pfc_samples;
|
|
|
|
std::vector<double>credit_samples;
|
|
|
|
std::vector<double>credit_times;
|
2008-03-28 18:00:27 +00:00
|
|
|
|
2005-02-16 23:17:43 +00:00
|
|
|
int write(FILE*);
|
2003-06-04 17:21:26 +00:00
|
|
|
void clear();
|
2010-04-21 19:33:20 +00:00
|
|
|
|
|
|
|
inline bool is_multithread() {
|
|
|
|
return (strstr(plan_class, "mt") != NULL);
|
|
|
|
}
|
2002-04-30 22:22:54 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct USER {
|
|
|
|
int id;
|
2003-08-15 20:35:44 +00:00
|
|
|
int create_time;
|
2002-08-26 22:57:17 +00:00
|
|
|
char email_addr[256];
|
|
|
|
char name[256];
|
|
|
|
char authenticator[256];
|
|
|
|
char country[256];
|
|
|
|
char postal_code[256];
|
2002-04-30 22:22:54 +00:00
|
|
|
double total_credit;
|
2002-09-27 06:12:50 +00:00
|
|
|
double expavg_credit; // credit per second, recent average
|
|
|
|
double expavg_time; // when the above was computed
|
2008-03-31 16:19:45 +00:00
|
|
|
char global_prefs[BLOB_SIZE];
|
2003-03-19 21:01:32 +00:00
|
|
|
// global preferences, within <global_preferences> tag
|
2008-03-31 16:19:45 +00:00
|
|
|
char project_prefs[BLOB_SIZE];
|
2003-10-26 06:14:15 +00:00
|
|
|
// project preferences; format:
|
|
|
|
// <project_preferences>
|
|
|
|
// <resource_share>X</resource_share>
|
|
|
|
// <project_specific>
|
|
|
|
// ...
|
|
|
|
// </project_specific>
|
|
|
|
// <venue name="x">
|
|
|
|
// <resource_share>x</resource_share>
|
|
|
|
// <project_specific>
|
|
|
|
// ...
|
|
|
|
// </project_specific>
|
|
|
|
// </venue>
|
|
|
|
// ...
|
|
|
|
// </project_preferences>
|
2003-03-20 02:05:25 +00:00
|
|
|
int teamid; // team ID if any
|
2003-02-18 23:07:48 +00:00
|
|
|
char venue[256]; // home/work/school (default)
|
2003-03-20 02:05:25 +00:00
|
|
|
char url[256]; // user's web page if any
|
|
|
|
bool send_email;
|
|
|
|
bool show_hosts;
|
2003-10-24 20:13:50 +00:00
|
|
|
int posts; // number of messages posted (redundant)
|
2006-07-05 21:38:08 +00:00
|
|
|
// deprecated as of 9/2004 - forum_preferences.posts is used instead
|
2009-08-18 20:44:12 +00:00
|
|
|
// now used as salt for weak auth
|
2003-10-24 20:13:50 +00:00
|
|
|
|
|
|
|
// The following are specific to SETI@home;
|
|
|
|
// they record info about the user's involvement in a prior project
|
|
|
|
int seti_id; // ID in old DB
|
|
|
|
int seti_nresults; // number of WUs completed
|
|
|
|
int seti_last_result_time; // time of last result (UNIX)
|
2003-10-31 21:26:12 +00:00
|
|
|
double seti_total_cpu; // number of CPU seconds
|
2003-11-28 23:11:22 +00:00
|
|
|
char signature[256];
|
2006-07-05 21:38:08 +00:00
|
|
|
// deprecated as of 9/2004 - may be used as temp
|
2003-12-15 02:31:29 +00:00
|
|
|
bool has_profile;
|
2004-04-18 18:40:13 +00:00
|
|
|
char cross_project_id[256];
|
2005-08-07 01:33:15 +00:00
|
|
|
char passwd_hash[256];
|
2006-12-27 18:22:17 +00:00
|
|
|
bool email_validated; // deprecated
|
2005-08-25 23:07:58 +00:00
|
|
|
int donated;
|
2003-06-04 17:21:26 +00:00
|
|
|
void clear();
|
2002-07-11 20:35:05 +00:00
|
|
|
};
|
|
|
|
|
2002-12-24 03:03:45 +00:00
|
|
|
#define TEAM_TYPE_CLUB 1
|
|
|
|
#define TEAM_TYPE_COMPANY 2
|
|
|
|
#define TEAM_TYPE_PRIMARY 3
|
|
|
|
#define TEAM_TYPE_SECONDARY 4
|
|
|
|
#define TEAM_TYPE_JUNIOR_COLLEGE 5
|
|
|
|
#define TEAM_TYPE_UNIVERSITY 6
|
|
|
|
#define TEAM_TYPE_GOVERNMENT 7
|
2002-07-11 20:35:05 +00:00
|
|
|
|
2003-12-12 21:10:39 +00:00
|
|
|
// invariants of teams:
|
|
|
|
// a team has > 0 members
|
|
|
|
|
2002-07-11 20:35:05 +00:00
|
|
|
struct TEAM {
|
|
|
|
int id;
|
2003-08-15 20:35:44 +00:00
|
|
|
int create_time;
|
2002-07-16 23:37:54 +00:00
|
|
|
int userid; // User ID of team founder
|
2002-08-26 22:57:17 +00:00
|
|
|
char name[256];
|
2002-07-16 23:37:54 +00:00
|
|
|
char name_lc[256]; // Team name in lowercase (used for searching)
|
2002-08-26 22:57:17 +00:00
|
|
|
char url[256];
|
|
|
|
int type; // Team type (see above)
|
|
|
|
char name_html[256];
|
2008-03-31 16:19:45 +00:00
|
|
|
char description[BLOB_SIZE];
|
2003-01-30 23:03:52 +00:00
|
|
|
int nusers; // UNDEFINED BY DEFAULT
|
2002-12-24 03:03:45 +00:00
|
|
|
char country[256];
|
2004-01-14 20:24:24 +00:00
|
|
|
double total_credit;
|
|
|
|
double expavg_credit;
|
|
|
|
double expavg_time;
|
2003-10-24 20:13:50 +00:00
|
|
|
|
2007-07-25 03:17:31 +00:00
|
|
|
int seti_id; // ID in another DB
|
|
|
|
// this is used to identify BOINC-wide teams
|
2006-12-06 20:59:29 +00:00
|
|
|
int ping_user; // user who asked to become founder
|
2007-07-23 20:30:30 +00:00
|
|
|
int ping_time; // when they asked.
|
|
|
|
// see html/inc/team.inc for more details
|
2006-12-06 20:59:29 +00:00
|
|
|
|
2003-06-04 17:21:26 +00:00
|
|
|
void clear();
|
2002-04-30 22:22:54 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct HOST {
|
|
|
|
int id;
|
2003-08-15 20:35:44 +00:00
|
|
|
int create_time;
|
2002-07-16 23:37:54 +00:00
|
|
|
int userid; // ID of user running this host
|
2005-04-07 20:46:25 +00:00
|
|
|
// If the host is "zombied" during merging of duplicate hosts,
|
|
|
|
// this field is set to zero and rpc_seqno is used to
|
|
|
|
// store the ID of the new host (kludge, but what the heck)
|
2002-04-30 22:22:54 +00:00
|
|
|
int rpc_seqno; // last seqno received from client
|
2005-05-12 00:32:03 +00:00
|
|
|
// also used as a "forwarding ID" for zombied hosts (see above)
|
2003-08-15 20:35:44 +00:00
|
|
|
int rpc_time; // time of last scheduler RPC
|
2002-08-28 21:50:51 +00:00
|
|
|
double total_credit;
|
2002-09-25 19:40:19 +00:00
|
|
|
double expavg_credit; // credit per second, recent average
|
|
|
|
double expavg_time; // last time the above was updated
|
2002-04-30 22:22:54 +00:00
|
|
|
|
|
|
|
// all remaining items are assigned by the client
|
2005-05-05 05:23:45 +00:00
|
|
|
int timezone; // local STANDARD time at host - UTC time
|
|
|
|
// (in seconds)
|
2002-04-30 22:22:54 +00:00
|
|
|
char domain_name[256];
|
2008-12-16 18:46:28 +00:00
|
|
|
char serialnum[256]; // textual description of coprocessors
|
2005-01-21 07:54:15 +00:00
|
|
|
char last_ip_addr[256]; // internal IP address as of last RPC
|
2003-07-01 20:37:09 +00:00
|
|
|
int nsame_ip_addr; // # of RPCs with same IP address
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2005-01-28 19:01:08 +00:00
|
|
|
double on_frac; // see client/time_stats.h
|
|
|
|
double connected_frac;
|
|
|
|
double active_frac;
|
2008-12-03 19:50:06 +00:00
|
|
|
double cpu_efficiency; // deprecated as of 6.4 client
|
2005-06-23 07:42:45 +00:00
|
|
|
double duration_correction_factor;
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2002-07-16 23:37:54 +00:00
|
|
|
int p_ncpus; // Number of CPUs on host
|
2002-08-26 22:57:17 +00:00
|
|
|
char p_vendor[256]; // Vendor name of CPU
|
|
|
|
char p_model[256]; // Model of CPU
|
2002-09-26 05:57:10 +00:00
|
|
|
double p_fpops; // measured floating point ops/sec of CPU
|
|
|
|
double p_iops; // measured integer ops/sec of CPU
|
|
|
|
double p_membw; // measured memory bandwidth (bytes/sec) of CPU
|
2003-07-01 20:37:09 +00:00
|
|
|
// The above are per CPU, not total
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2002-08-26 22:57:17 +00:00
|
|
|
char os_name[256]; // Name of operating system
|
|
|
|
char os_version[256]; // Version of operating system
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2002-08-26 22:57:17 +00:00
|
|
|
double m_nbytes; // Size of memory in bytes
|
2002-07-16 23:37:54 +00:00
|
|
|
double m_cache; // Size of CPU cache in bytes (L1 or L2?)
|
2002-08-26 22:57:17 +00:00
|
|
|
double m_swap; // Size of swap space in bytes
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2005-10-04 21:44:58 +00:00
|
|
|
double d_total; // Total disk space on volume containing
|
|
|
|
// the BOINC client directory.
|
|
|
|
double d_free; // how much is free on that volume
|
|
|
|
|
|
|
|
// the following 2 items are reported in scheduler RPCs
|
|
|
|
// from clients w/ source Oct 4 2005 and later.
|
|
|
|
// NOTE: these items plus d_total and d_free are sufficient
|
|
|
|
// to avoid exceeding BOINC's limit on total disk space.
|
|
|
|
// But they are NOT sufficient to do resource-share-based
|
|
|
|
// disk space allocation.
|
|
|
|
// This needs to thought about.
|
|
|
|
//
|
2003-06-17 18:59:36 +00:00
|
|
|
double d_boinc_used_total;
|
2005-10-04 21:44:58 +00:00
|
|
|
// disk space being used in BOINC client dir,
|
|
|
|
// including all projects and BOINC itself
|
2003-06-17 18:59:36 +00:00
|
|
|
double d_boinc_used_project;
|
|
|
|
// amount being used for this project
|
2005-10-04 21:44:58 +00:00
|
|
|
|
|
|
|
// The following item is not used.
|
|
|
|
// It's redundant (server can compute based on other params and prefs)
|
|
|
|
//
|
|
|
|
double d_boinc_max; // max disk space that BOINC is allowed to use,
|
|
|
|
// reflecting user preferences
|
2002-12-26 17:24:33 +00:00
|
|
|
double n_bwup; // Average upload bandwidth, bytes/sec
|
|
|
|
double n_bwdown; // Average download bandwidth, bytes/sec
|
2003-06-18 20:52:17 +00:00
|
|
|
// The above are derived from actual
|
|
|
|
// file upload/download times, and may reflect
|
|
|
|
// factors other than network bandwidth
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2002-09-25 19:40:19 +00:00
|
|
|
double credit_per_cpu_sec;
|
2010-04-05 20:03:54 +00:00
|
|
|
// deprecated
|
2002-09-25 19:40:19 +00:00
|
|
|
|
2003-02-18 23:07:48 +00:00
|
|
|
char venue[256]; // home/work/school
|
2004-05-27 18:13:00 +00:00
|
|
|
int nresults_today; // results sent since midnight
|
2004-12-06 22:41:19 +00:00
|
|
|
double avg_turnaround; // recent average result turnaround time
|
2005-01-20 18:50:49 +00:00
|
|
|
char host_cpid[256]; // host cross-project ID
|
2005-01-21 07:54:15 +00:00
|
|
|
char external_ip_addr[256]; // IP address seen by scheduler
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
int _max_results_day;
|
2009-01-20 00:54:16 +00:00
|
|
|
// MRD is dynamically adjusted to limit work sent to bad hosts.
|
|
|
|
// The maximum # of results sent per day is
|
|
|
|
// max_results_day * (NCPUS + NCUDA * cuda_multiplier).
|
|
|
|
// 0 means uninitialized; set to config.daily_result_quota
|
2007-07-06 16:37:00 +00:00
|
|
|
// -1 means this host is blacklisted - don't return results
|
|
|
|
// or accept results or trickles; just send it an error message
|
2009-01-20 00:54:16 +00:00
|
|
|
// Otherwise it lies in the range 0 .. config.daily_result_quota
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
// DEPRECATED: only use is -1 means host is blacklisted
|
|
|
|
double _error_rate;
|
|
|
|
// dynamic estimate of fraction of results
|
|
|
|
// that fail validation
|
|
|
|
// DEPRECATED
|
2003-02-18 23:07:48 +00:00
|
|
|
|
2011-08-10 17:11:08 +00:00
|
|
|
// the following not in DB
|
2010-11-17 20:52:01 +00:00
|
|
|
char p_features[1024];
|
2010-11-17 23:19:07 +00:00
|
|
|
char virtualbox_version[256];
|
2011-12-30 09:43:58 +00:00
|
|
|
bool p_vm_extensions_disabled;
|
2007-01-12 17:42:29 +00:00
|
|
|
|
2011-08-10 17:11:08 +00:00
|
|
|
int parse(XML_PARSER&);
|
|
|
|
int parse_time_stats(XML_PARSER&);
|
|
|
|
int parse_net_stats(XML_PARSER&);
|
|
|
|
int parse_disk_usage(XML_PARSER&);
|
|
|
|
|
2004-01-19 01:12:53 +00:00
|
|
|
void fix_nans();
|
2003-06-04 17:21:26 +00:00
|
|
|
void clear();
|
2002-04-30 22:22:54 +00:00
|
|
|
};
|
|
|
|
|
2002-12-17 19:00:43 +00:00
|
|
|
// values for file_delete state
|
|
|
|
#define FILE_DELETE_INIT 0
|
|
|
|
#define FILE_DELETE_READY 1
|
2003-09-21 21:00:25 +00:00
|
|
|
// set to this value only when we believe all files are uploaded
|
2002-12-17 19:00:43 +00:00
|
|
|
#define FILE_DELETE_DONE 2
|
2005-06-02 07:04:29 +00:00
|
|
|
// means the files were successfully deleted
|
2005-09-22 00:30:01 +00:00
|
|
|
#define FILE_DELETE_ERROR 3
|
2005-06-02 07:04:29 +00:00
|
|
|
// Any error was returned while attempting to delete the file
|
2002-12-17 19:00:43 +00:00
|
|
|
|
|
|
|
// values for assimilate_state
|
|
|
|
#define ASSIMILATE_INIT 0
|
|
|
|
#define ASSIMILATE_READY 1
|
|
|
|
#define ASSIMILATE_DONE 2
|
|
|
|
|
2003-06-04 17:21:26 +00:00
|
|
|
// NOTE: there is no overall state for a WU (like done/not done)
|
2003-01-23 08:07:48 +00:00
|
|
|
// There's just a bunch of independent substates
|
|
|
|
// (file delete, assimilate, and states of results, error flags)
|
|
|
|
|
|
|
|
// bit fields of error_mask
|
2003-08-15 20:35:44 +00:00
|
|
|
#define WU_ERROR_COULDNT_SEND_RESULT 1
|
|
|
|
#define WU_ERROR_TOO_MANY_ERROR_RESULTS 2
|
|
|
|
#define WU_ERROR_TOO_MANY_SUCCESS_RESULTS 4
|
|
|
|
#define WU_ERROR_TOO_MANY_TOTAL_RESULTS 8
|
2004-10-10 03:16:30 +00:00
|
|
|
#define WU_ERROR_CANCELLED 16
|
2007-10-23 17:11:56 +00:00
|
|
|
#define WU_ERROR_NO_CANONICAL_RESULT 32
|
2002-11-05 18:36:09 +00:00
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
struct WORKUNIT {
|
|
|
|
int id;
|
2003-08-15 20:35:44 +00:00
|
|
|
int create_time;
|
2002-11-07 19:31:34 +00:00
|
|
|
int appid; // associated app
|
|
|
|
char name[256];
|
2008-03-31 16:19:45 +00:00
|
|
|
char xml_doc[BLOB_SIZE];
|
2002-04-30 22:22:54 +00:00
|
|
|
int batch;
|
2009-08-31 19:35:46 +00:00
|
|
|
// projects can use this for any of several purposes:
|
|
|
|
// - group together related jobs so you can use a DB query
|
|
|
|
// to see if they're all done
|
|
|
|
// - defer deleting output files (see file_deleter.cpp)
|
|
|
|
// - GPUGRID: store the min # of processors needed for the job
|
|
|
|
// (see sched_customize.cpp)
|
2003-09-04 00:41:51 +00:00
|
|
|
double rsc_fpops_est; // estimated # of FP operations
|
|
|
|
// used to estimate how long a result will take on a host
|
|
|
|
double rsc_fpops_bound; // upper bound on # of FP ops
|
|
|
|
// used to calculate an upper bound on the CPU time for a result
|
|
|
|
// before it is aborted.
|
|
|
|
double rsc_memory_bound; // upper bound on RAM working set (bytes)
|
2003-05-21 23:23:42 +00:00
|
|
|
// currently used only by scheduler to screen hosts
|
|
|
|
// At some point, could use as runtime limit
|
2003-09-04 00:41:51 +00:00
|
|
|
double rsc_disk_bound; // upper bound on amount of disk needed (bytes)
|
2003-05-21 23:23:42 +00:00
|
|
|
// (including input, output and temp files, but NOT the app)
|
2003-09-04 00:41:51 +00:00
|
|
|
// used for 2 purposes:
|
2003-05-21 23:23:42 +00:00
|
|
|
// 1) for scheduling (don't send this WU to a host w/ insuff. disk)
|
2003-09-04 00:41:51 +00:00
|
|
|
// 2) abort task if it uses more than this disk
|
2002-09-25 19:40:19 +00:00
|
|
|
bool need_validate; // this WU has at least 1 result in
|
2003-01-23 08:07:48 +00:00
|
|
|
// validate state = NEED_CHECK
|
2002-09-25 19:40:19 +00:00
|
|
|
int canonical_resultid; // ID of canonical result, or zero
|
|
|
|
double canonical_credit; // credit that all correct results get
|
2010-04-05 20:03:54 +00:00
|
|
|
// TODO: deprecate and remove code
|
2003-08-15 20:35:44 +00:00
|
|
|
int transition_time; // when should transition_handler
|
|
|
|
// next check this WU?
|
|
|
|
// MAXINT if no need to check
|
2003-01-23 08:07:48 +00:00
|
|
|
int delay_bound; // determines result deadline,
|
|
|
|
// timeout check time
|
|
|
|
int error_mask; // bitmask of errors (see above)
|
2002-12-17 19:00:43 +00:00
|
|
|
int file_delete_state;
|
|
|
|
int assimilate_state;
|
2004-12-06 22:41:19 +00:00
|
|
|
int hr_class; // homogeneous redundancy class
|
|
|
|
// used to send redundant copies only to "similar" hosts
|
|
|
|
// (in terms of numerics, performance, or both)
|
2003-12-12 21:10:39 +00:00
|
|
|
double opaque; // project-specific; usually external ID
|
2003-08-15 20:35:44 +00:00
|
|
|
int min_quorum; // minimum quorum size
|
2009-09-16 03:10:22 +00:00
|
|
|
int target_nresults;
|
|
|
|
// try to get this many successful results
|
|
|
|
// may be > min_quorum to get consensus quicker or reflect loss rate
|
2003-08-15 20:35:44 +00:00
|
|
|
int max_error_results; // WU error if < #error results
|
|
|
|
int max_total_results; // WU error if < #total results
|
2004-11-22 19:17:13 +00:00
|
|
|
// (need this in case results are never returned)
|
2003-08-15 20:35:44 +00:00
|
|
|
int max_success_results; // WU error if < #success results
|
|
|
|
// without consensus (i.e. WU is nondeterministic)
|
2004-07-02 19:17:53 +00:00
|
|
|
char result_template_file[64];
|
2004-12-06 22:41:19 +00:00
|
|
|
int priority;
|
|
|
|
char mod_time[16];
|
2010-03-05 22:55:16 +00:00
|
|
|
double rsc_bandwidth_bound;
|
|
|
|
// send only to hosts with at least this much download bandwidth
|
|
|
|
int fileset_id;
|
2011-06-06 03:40:42 +00:00
|
|
|
int app_version_id;
|
|
|
|
// if app uses homogeneous_app_version,
|
|
|
|
// which version this job is committed to (0 if none)
|
2002-04-30 22:22:54 +00:00
|
|
|
|
|
|
|
// the following not used in the DB
|
|
|
|
char app_name[256];
|
2003-06-04 17:21:26 +00:00
|
|
|
void clear();
|
2002-04-30 22:22:54 +00:00
|
|
|
};
|
|
|
|
|
2007-05-02 23:17:52 +00:00
|
|
|
struct CREDITED_JOB {
|
2007-05-02 18:51:51 +00:00
|
|
|
int userid;
|
2007-06-20 22:34:06 +00:00
|
|
|
double workunitid;
|
2007-05-02 18:51:51 +00:00
|
|
|
|
|
|
|
// the following not used in the DB
|
|
|
|
void clear();
|
|
|
|
};
|
|
|
|
|
2011-07-25 21:45:53 +00:00
|
|
|
// WARNING: be very careful about changing any values,
|
2003-09-27 23:20:40 +00:00
|
|
|
// especially for a project already running -
|
|
|
|
// the database will become inconsistent
|
2003-06-25 22:55:36 +00:00
|
|
|
|
2011-07-25 21:45:53 +00:00
|
|
|
// values of result.server_state
|
|
|
|
//
|
2006-06-06 18:45:40 +00:00
|
|
|
//#define RESULT_SERVER_STATE_INACTIVE 1
|
2002-12-07 00:56:51 +00:00
|
|
|
#define RESULT_SERVER_STATE_UNSENT 2
|
2003-01-23 08:07:48 +00:00
|
|
|
#define RESULT_SERVER_STATE_IN_PROGRESS 4
|
|
|
|
#define RESULT_SERVER_STATE_OVER 5
|
|
|
|
// we received a reply, timed out, or decided not to send.
|
|
|
|
// Note: we could get a reply even after timing out.
|
|
|
|
|
2011-07-25 21:45:53 +00:00
|
|
|
// values of result.outcome
|
|
|
|
//
|
2003-01-23 08:07:48 +00:00
|
|
|
#define RESULT_OUTCOME_INIT 0
|
|
|
|
#define RESULT_OUTCOME_SUCCESS 1
|
|
|
|
#define RESULT_OUTCOME_COULDNT_SEND 2
|
|
|
|
#define RESULT_OUTCOME_CLIENT_ERROR 3
|
2004-09-09 21:52:20 +00:00
|
|
|
// an error happened on the client
|
2003-01-23 08:07:48 +00:00
|
|
|
#define RESULT_OUTCOME_NO_REPLY 4
|
|
|
|
#define RESULT_OUTCOME_DIDNT_NEED 5
|
2010-02-26 21:34:20 +00:00
|
|
|
// we created the result but didn't need to send it because
|
|
|
|
// 1) we already got a canonical result for the WU, or
|
|
|
|
// 2) the WU had an error
|
2004-09-09 21:52:20 +00:00
|
|
|
#define RESULT_OUTCOME_VALIDATE_ERROR 6
|
2006-07-06 17:30:03 +00:00
|
|
|
// The outcome was initially SUCCESS,
|
|
|
|
// but the validator had a permanent error reading a result file,
|
2004-12-14 00:57:03 +00:00
|
|
|
// or the result file had a syntax error
|
2006-07-06 17:30:03 +00:00
|
|
|
#define RESULT_OUTCOME_CLIENT_DETACHED 7
|
|
|
|
// we believe that the client detached
|
2003-01-23 08:07:48 +00:00
|
|
|
|
2011-07-25 21:45:53 +00:00
|
|
|
// values of result.validate_state
|
|
|
|
//
|
2003-01-23 08:07:48 +00:00
|
|
|
#define VALIDATE_STATE_INIT 0
|
|
|
|
#define VALIDATE_STATE_VALID 1
|
|
|
|
#define VALIDATE_STATE_INVALID 2
|
2003-12-11 19:05:52 +00:00
|
|
|
#define VALIDATE_STATE_NO_CHECK 3
|
|
|
|
// WU had error, so we'll never get around to validating its results
|
|
|
|
// This lets us avoid showing the claimed credit as "pending"
|
2004-12-14 00:57:03 +00:00
|
|
|
#define VALIDATE_STATE_INCONCLUSIVE 4
|
|
|
|
// the validator looked this result (as part of a check_set() call)
|
|
|
|
// but didn't find a canonical result.
|
2005-02-08 00:39:05 +00:00
|
|
|
// This needs to be distinct from INIT for the transitioner to decide
|
|
|
|
// whether to trigger the validator
|
|
|
|
#define VALIDATE_STATE_TOO_LATE 5
|
|
|
|
// The result arrived after the canonical result's files were deleted,
|
|
|
|
// so we can't determine if it's valid
|
2002-09-25 19:40:19 +00:00
|
|
|
|
2008-02-21 00:47:50 +00:00
|
|
|
// values for ASSIGNMENT.target_type
|
|
|
|
#define ASSIGN_NONE 0
|
|
|
|
#define ASSIGN_HOST 1
|
|
|
|
#define ASSIGN_USER 2
|
|
|
|
#define ASSIGN_TEAM 3
|
|
|
|
|
2010-06-15 22:21:57 +00:00
|
|
|
// values for RESULT.app_version_id for anonymous platform
|
2010-03-29 22:28:20 +00:00
|
|
|
#define ANON_PLATFORM_UNKNOWN -1 // relic of old scheduler
|
|
|
|
#define ANON_PLATFORM_CPU -2
|
|
|
|
#define ANON_PLATFORM_NVIDIA -3
|
|
|
|
#define ANON_PLATFORM_ATI -4
|
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
struct RESULT {
|
|
|
|
int id;
|
2003-08-15 20:35:44 +00:00
|
|
|
int create_time;
|
2002-08-26 22:57:17 +00:00
|
|
|
int workunitid;
|
2003-01-23 08:07:48 +00:00
|
|
|
int server_state; // see above
|
|
|
|
int outcome; // see above; defined if server state OVER
|
2003-06-25 22:55:36 +00:00
|
|
|
int client_state; // phase that client contacted us in.
|
2003-10-16 18:10:56 +00:00
|
|
|
// if UPLOADED then outcome is success.
|
2003-06-25 22:55:36 +00:00
|
|
|
// error details are in stderr_out.
|
|
|
|
// The values for this field are defined
|
|
|
|
// in lib/result_state.h
|
2002-09-26 05:57:10 +00:00
|
|
|
int hostid; // host processing this result
|
2003-10-16 18:10:56 +00:00
|
|
|
int userid; // user processing this result
|
2003-08-15 20:35:44 +00:00
|
|
|
int report_deadline; // deadline for receiving result
|
|
|
|
int sent_time; // when result was sent to host
|
|
|
|
int received_time; // when result was received from host
|
2002-08-26 22:57:17 +00:00
|
|
|
char name[256];
|
2002-09-26 05:57:10 +00:00
|
|
|
double cpu_time; // CPU time used to complete result
|
2008-03-31 16:19:45 +00:00
|
|
|
char xml_doc_in[BLOB_SIZE]; // descriptions of output files
|
|
|
|
char xml_doc_out[BLOB_SIZE]; // MD5s of output files
|
|
|
|
char stderr_out[BLOB_SIZE]; // stderr output, if any
|
2002-04-30 22:22:54 +00:00
|
|
|
int batch;
|
2005-09-22 00:30:01 +00:00
|
|
|
int file_delete_state; // see above; values for file_delete_state
|
2002-09-25 19:40:19 +00:00
|
|
|
int validate_state;
|
2011-09-13 21:01:42 +00:00
|
|
|
double claimed_credit; // deprecated
|
2003-06-04 17:21:26 +00:00
|
|
|
double granted_credit; // == canonical credit of WU
|
2003-12-12 21:10:39 +00:00
|
|
|
double opaque; // project-specific; usually external ID
|
2003-06-04 17:21:26 +00:00
|
|
|
int random; // determines send order
|
2004-01-14 20:24:24 +00:00
|
|
|
int app_version_num; // version# of app (not core client)
|
2011-06-25 05:13:56 +00:00
|
|
|
// DEPRECATED - THIS DOESN'T DETERMINE VERSION ANY MORE
|
2003-11-11 20:49:07 +00:00
|
|
|
int appid; // copy of WU's appid
|
2003-12-23 19:21:52 +00:00
|
|
|
int exit_status; // application exit status, if any
|
2004-01-14 20:24:24 +00:00
|
|
|
int teamid;
|
2004-12-06 22:41:19 +00:00
|
|
|
int priority;
|
|
|
|
char mod_time[16];
|
2010-03-10 06:00:37 +00:00
|
|
|
double elapsed_time;
|
|
|
|
// AKA runtime; returned by 6.10+ clients
|
|
|
|
double flops_estimate;
|
2012-01-08 01:28:39 +00:00
|
|
|
// misnomer: actually the peak device FLOPS, returned by app_plan().
|
2010-03-10 00:33:31 +00:00
|
|
|
int app_version_id;
|
2010-03-10 06:00:37 +00:00
|
|
|
// ID of app version used to compute this
|
|
|
|
// 0 if unknown (relic of old scheduler)
|
2010-03-29 22:28:20 +00:00
|
|
|
// -1 anon platform, unknown resource type (relic)
|
|
|
|
// -2/-3/-4 anonymous platform (see variants above)
|
2011-09-16 16:43:15 +00:00
|
|
|
bool runtime_outlier;
|
|
|
|
// the validator tagged this as having an unusual elapsed time;
|
|
|
|
// don't include it in PFC or elapsed time statistics.
|
2002-04-30 22:22:54 +00:00
|
|
|
|
2003-06-04 17:21:26 +00:00
|
|
|
void clear();
|
2002-04-30 22:22:54 +00:00
|
|
|
};
|
|
|
|
|
2011-07-27 06:20:48 +00:00
|
|
|
struct BATCH {
|
|
|
|
int id;
|
|
|
|
int user_id;
|
|
|
|
// submitter
|
|
|
|
int create_time;
|
|
|
|
double logical_start_time;
|
|
|
|
double logical_end_time;
|
|
|
|
double est_completion_time;
|
|
|
|
// current estimate of completion time
|
|
|
|
int njobs;
|
|
|
|
// # of workunits
|
|
|
|
double fraction_done;
|
|
|
|
// based on workunits completed
|
|
|
|
int nerror_jobs;
|
|
|
|
// # of workunits with error
|
|
|
|
int state;
|
|
|
|
// see below
|
|
|
|
double completion_time;
|
|
|
|
// when state became >= COMPLETE
|
|
|
|
double credit_estimate;
|
|
|
|
// initial estimate of required credit, counting replicas
|
|
|
|
double credit_canonical;
|
|
|
|
// the sum of credits of canonical results
|
|
|
|
double credit_total;
|
|
|
|
// the sum of credits of all results
|
|
|
|
char name[256];
|
|
|
|
// user-assigned name; need not be unique
|
|
|
|
int app_id;
|
2011-07-30 22:42:05 +00:00
|
|
|
};
|
2011-07-27 06:20:48 +00:00
|
|
|
|
|
|
|
// values of batch.state
|
|
|
|
//
|
|
|
|
#define BATCH_STATE_INIT 0
|
|
|
|
#define BATCH_STATE_IN_PROGRESS 1
|
|
|
|
#define BATCH_STATE_COMPLETE 2
|
|
|
|
// "complete" means all workunits have either
|
|
|
|
// a canonical result or an error
|
|
|
|
#define BATCH_STATE_ABORTED 3
|
|
|
|
#define BATCH_STATE_CLEANED_UP 4
|
|
|
|
// input/output files can be deleted,
|
|
|
|
// result and workunit records can be purged.
|
|
|
|
|
2004-06-22 22:56:50 +00:00
|
|
|
struct MSG_FROM_HOST {
|
2004-01-04 06:48:40 +00:00
|
|
|
int id;
|
|
|
|
int create_time;
|
2004-05-12 21:21:09 +00:00
|
|
|
int hostid;
|
2004-07-06 04:10:51 +00:00
|
|
|
char variety[256]; // project-defined; what kind of msg
|
|
|
|
bool handled; // message handler has processed this
|
2006-01-24 21:38:03 +00:00
|
|
|
char xml[MSG_FROM_HOST_BLOB_SIZE];
|
2004-03-17 01:26:44 +00:00
|
|
|
void clear();
|
|
|
|
};
|
|
|
|
|
2004-06-22 22:56:50 +00:00
|
|
|
struct MSG_TO_HOST {
|
2004-03-17 01:26:44 +00:00
|
|
|
int id;
|
|
|
|
int create_time;
|
|
|
|
int hostid;
|
2004-07-06 04:10:51 +00:00
|
|
|
char variety[256]; // project-defined; what kind of msg
|
2004-03-17 01:26:44 +00:00
|
|
|
bool handled; // scheduler has sent this
|
2006-01-24 21:38:03 +00:00
|
|
|
char xml[MSG_TO_HOST_BLOB_SIZE]; // text to include in sched reply
|
2004-01-04 06:48:40 +00:00
|
|
|
void clear();
|
|
|
|
};
|
|
|
|
|
2008-02-21 00:47:50 +00:00
|
|
|
struct ASSIGNMENT {
|
|
|
|
int id;
|
|
|
|
int create_time;
|
2011-11-16 19:47:40 +00:00
|
|
|
int target_id; // ID of target host, user, or team
|
|
|
|
int target_type; // none/host/user/team
|
|
|
|
int multi; // 0 = single host, 1 = all hosts in set
|
2008-02-21 00:47:50 +00:00
|
|
|
int workunitid;
|
2011-11-16 19:47:40 +00:00
|
|
|
int resultid; // if not multi, the result ID
|
2008-02-21 00:47:50 +00:00
|
|
|
void clear();
|
|
|
|
};
|
|
|
|
|
2004-07-01 20:24:00 +00:00
|
|
|
struct TRANSITIONER_ITEM {
|
2006-06-03 14:59:22 +00:00
|
|
|
int id; // WARNING: this is the WU ID
|
2004-07-01 18:43:36 +00:00
|
|
|
char name[256];
|
2004-12-06 22:41:19 +00:00
|
|
|
int appid;
|
|
|
|
int min_quorum;
|
2004-07-01 18:43:36 +00:00
|
|
|
bool need_validate;
|
2004-12-06 22:41:19 +00:00
|
|
|
int canonical_resultid;
|
|
|
|
int transition_time;
|
|
|
|
int delay_bound;
|
|
|
|
int error_mask;
|
|
|
|
int max_error_results;
|
|
|
|
int max_total_results;
|
|
|
|
int file_delete_state;
|
|
|
|
int assimilate_state;
|
|
|
|
int target_nresults;
|
2004-08-04 06:05:17 +00:00
|
|
|
char result_template_file[64];
|
2004-12-06 22:41:19 +00:00
|
|
|
int priority;
|
2007-05-29 23:41:31 +00:00
|
|
|
int hr_class;
|
2006-01-26 00:19:05 +00:00
|
|
|
int batch;
|
2011-06-06 03:40:42 +00:00
|
|
|
int app_version_id;
|
2006-06-03 14:59:22 +00:00
|
|
|
int res_id; // This is the RESULT ID
|
2004-07-02 17:53:31 +00:00
|
|
|
char res_name[256];
|
2004-12-06 22:41:19 +00:00
|
|
|
int res_report_deadline;
|
|
|
|
int res_server_state;
|
|
|
|
int res_outcome;
|
|
|
|
int res_validate_state;
|
|
|
|
int res_file_delete_state;
|
|
|
|
int res_sent_time;
|
2004-12-16 21:58:10 +00:00
|
|
|
int res_hostid;
|
2007-04-05 17:02:01 +00:00
|
|
|
int res_received_time;
|
2010-03-29 22:28:20 +00:00
|
|
|
int res_app_version_id;
|
2004-12-16 21:58:10 +00:00
|
|
|
|
2004-07-01 18:43:36 +00:00
|
|
|
void clear();
|
2004-07-01 20:24:00 +00:00
|
|
|
void parse(MYSQL_ROW&);
|
2004-07-01 18:43:36 +00:00
|
|
|
};
|
|
|
|
|
2010-03-29 22:28:20 +00:00
|
|
|
struct HOST_APP_VERSION {
|
|
|
|
int host_id;
|
|
|
|
int app_version_id;
|
|
|
|
// or for anon platform:
|
|
|
|
// 1000000*appid + 2 (CPU)
|
|
|
|
// 1000000*appid + 3 (NVIDIA)
|
|
|
|
// 1000000*appid + 4 (ATI)
|
|
|
|
AVERAGE pfc;
|
2010-04-16 18:07:08 +00:00
|
|
|
// the statistics of (claimed peak FLOPS)/wu.rsc_fpops_est
|
|
|
|
// If wu.rsc_fpops_est is accurate,
|
|
|
|
// this is roughly the reciprocal of efficiency
|
2010-03-29 22:28:20 +00:00
|
|
|
AVERAGE_VAR et;
|
2010-04-16 18:07:08 +00:00
|
|
|
// the statistics of (elapsed time)/wu.rsc_fpops_est
|
|
|
|
//
|
|
|
|
// for old clients (which don't report elapsed time)
|
|
|
|
// we use this for CPU time stats
|
2010-03-29 22:28:20 +00:00
|
|
|
int max_jobs_per_day;
|
2010-06-15 22:21:57 +00:00
|
|
|
// the actual limit is:
|
|
|
|
// for GPU versions:
|
|
|
|
// this times config.gpu_multiplier * #GPUs of this type
|
|
|
|
// for CPU versions:
|
|
|
|
// this times #CPUs
|
2010-03-29 22:28:20 +00:00
|
|
|
int n_jobs_today;
|
|
|
|
AVERAGE_VAR turnaround;
|
2010-04-16 18:07:08 +00:00
|
|
|
// the stats of turnaround time (received - sent)
|
2010-04-21 19:33:20 +00:00
|
|
|
int consecutive_valid;
|
|
|
|
// number of consecutive validated relicated results.
|
|
|
|
// reset to zero on timeouts, errors, invalid
|
2010-03-29 22:28:20 +00:00
|
|
|
|
|
|
|
void clear();
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
|
|
|
|
// not stored in the DB
|
|
|
|
bool reliable;
|
|
|
|
bool trusted;
|
|
|
|
bool daily_quota_exceeded;
|
2010-03-29 22:28:20 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct DB_HOST_APP_VERSION : public DB_BASE, public HOST_APP_VERSION {
|
|
|
|
DB_HOST_APP_VERSION(DB_CONN* p=0);
|
|
|
|
void db_print(char*);
|
|
|
|
void db_parse(MYSQL_ROW &row);
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
int update_scheduler(DB_HOST_APP_VERSION&);
|
|
|
|
int update_validator(DB_HOST_APP_VERSION&);
|
2010-03-29 22:28:20 +00:00
|
|
|
};
|
|
|
|
|
2009-06-23 21:45:22 +00:00
|
|
|
struct STATE_COUNTS {
|
|
|
|
int appid;
|
|
|
|
int last_update_time;
|
|
|
|
int result_server_state_2;
|
|
|
|
int result_server_state_4;
|
|
|
|
int result_file_delete_state_1;
|
|
|
|
int result_file_delete_state_2;
|
|
|
|
int result_server_state_5_and_file_delete_state_0;
|
|
|
|
int workunit_need_validate_1;
|
|
|
|
int workunit_assimilate_state_1;
|
|
|
|
int workunit_file_delete_state_1;
|
|
|
|
int workunit_file_delete_state_2;
|
|
|
|
|
|
|
|
void clear();
|
|
|
|
};
|
|
|
|
|
|
|
|
struct DB_STATE_COUNTS : public DB_BASE, public STATE_COUNTS {
|
|
|
|
DB_STATE_COUNTS(DB_CONN* p=0);
|
|
|
|
int get_id();
|
|
|
|
void db_print(char *);
|
|
|
|
void db_parse(MYSQL_ROW &row);
|
|
|
|
};
|
|
|
|
|
2004-10-08 22:41:33 +00:00
|
|
|
struct VALIDATOR_ITEM {
|
2004-10-08 23:07:59 +00:00
|
|
|
WORKUNIT wu;
|
|
|
|
RESULT res;
|
2004-10-08 22:41:33 +00:00
|
|
|
|
2004-12-06 22:41:19 +00:00
|
|
|
void clear();
|
|
|
|
void parse(MYSQL_ROW&);
|
2004-10-08 22:41:33 +00:00
|
|
|
};
|
|
|
|
|
2003-06-04 17:21:26 +00:00
|
|
|
class DB_PLATFORM : public DB_BASE, public PLATFORM {
|
|
|
|
public:
|
2004-10-04 23:23:57 +00:00
|
|
|
DB_PLATFORM(DB_CONN* p=0);
|
2003-06-04 17:21:26 +00:00
|
|
|
int get_id();
|
|
|
|
void db_print(char*);
|
|
|
|
void db_parse(MYSQL_ROW &row);
|
|
|
|
};
|
|
|
|
|
|
|
|
class DB_APP : public DB_BASE, public APP {
|
|
|
|
public:
|
2004-10-04 23:23:57 +00:00
|
|
|
DB_APP(DB_CONN* p=0);
|
2003-06-04 17:21:26 +00:00
|
|
|
int get_id();
|
|
|
|
void db_print(char*);
|
|
|
|
void db_parse(MYSQL_ROW &row);
|
|
|
|
};
|
|
|
|
|
|
|
|
class DB_APP_VERSION : public DB_BASE, public APP_VERSION {
|
|
|
|
public:
|
2004-10-04 23:23:57 +00:00
|
|
|
DB_APP_VERSION(DB_CONN* p=0);
|
2003-06-04 17:21:26 +00:00
|
|
|
int get_id();
|
|
|
|
void db_print(char*);
|
|
|
|
void db_parse(MYSQL_ROW &row);
|
2010-03-29 22:28:20 +00:00
|
|
|
void operator=(APP_VERSION& w) {APP_VERSION::operator=(w);}
|
2003-06-04 17:21:26 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
class DB_USER : public DB_BASE, public USER {
|
|
|
|
public:
|
2004-10-04 23:23:57 +00:00
|
|
|
DB_USER(DB_CONN* p=0);
|
2003-06-04 17:21:26 +00:00
|
|
|
int get_id();
|
|
|
|
void db_print(char*);
|
|
|
|
void db_parse(MYSQL_ROW &row);
|
|
|
|
void operator=(USER& r) {USER::operator=(r);}
|
|
|
|
};
|
|
|
|
|
|
|
|
class DB_TEAM : public DB_BASE, public TEAM {
|
|
|
|
public:
|
2004-10-04 23:23:57 +00:00
|
|
|
DB_TEAM(DB_CONN* p=0);
|
2003-06-04 17:21:26 +00:00
|
|
|
int get_id();
|
|
|
|
void db_print(char*);
|
|
|
|
void db_parse(MYSQL_ROW &row);
|
|
|
|
};
|
|
|
|
|
|
|
|
class DB_HOST : public DB_BASE, public HOST {
|
|
|
|
public:
|
2004-10-04 23:23:57 +00:00
|
|
|
DB_HOST(DB_CONN* p=0);
|
2003-06-04 17:21:26 +00:00
|
|
|
int get_id();
|
2010-03-29 22:28:20 +00:00
|
|
|
int update_diff_sched(HOST&);
|
|
|
|
int update_diff_validator(HOST&);
|
2011-09-13 21:01:42 +00:00
|
|
|
int fpops_percentile(double percentile, double& fpops);
|
|
|
|
// return the given percentile of p_fpops
|
2012-01-09 17:35:48 +00:00
|
|
|
int fpops_mean(double& mean);
|
|
|
|
int fpops_stddev(double& stddev);
|
2003-06-04 17:21:26 +00:00
|
|
|
void db_print(char*);
|
|
|
|
void db_parse(MYSQL_ROW &row);
|
|
|
|
void operator=(HOST& r) {HOST::operator=(r);}
|
|
|
|
};
|
|
|
|
|
|
|
|
class DB_RESULT : public DB_BASE, public RESULT {
|
|
|
|
public:
|
2004-10-04 23:23:57 +00:00
|
|
|
DB_RESULT(DB_CONN* p=0);
|
2003-06-04 17:21:26 +00:00
|
|
|
int get_id();
|
2011-02-23 21:22:45 +00:00
|
|
|
int mark_as_sent(int old_server_state, int report_grace_period);
|
2003-06-04 17:21:26 +00:00
|
|
|
void db_print(char*);
|
2004-07-21 21:50:25 +00:00
|
|
|
void db_print_values(char*);
|
2003-06-04 17:21:26 +00:00
|
|
|
void db_parse(MYSQL_ROW &row);
|
|
|
|
void operator=(RESULT& r) {RESULT::operator=(r);}
|
|
|
|
};
|
|
|
|
|
|
|
|
class DB_WORKUNIT : public DB_BASE, public WORKUNIT {
|
|
|
|
public:
|
2004-10-04 23:23:57 +00:00
|
|
|
DB_WORKUNIT(DB_CONN* p=0);
|
2003-06-04 17:21:26 +00:00
|
|
|
int get_id();
|
|
|
|
void db_print(char*);
|
|
|
|
void db_parse(MYSQL_ROW &row);
|
2003-09-21 21:00:25 +00:00
|
|
|
void operator=(WORKUNIT& w) {WORKUNIT::operator=(w);}
|
2003-06-04 17:21:26 +00:00
|
|
|
};
|
|
|
|
|
2007-05-02 23:17:52 +00:00
|
|
|
class DB_CREDITED_JOB : public DB_BASE, public CREDITED_JOB {
|
2007-05-02 18:51:51 +00:00
|
|
|
public:
|
2007-05-02 23:17:52 +00:00
|
|
|
DB_CREDITED_JOB(DB_CONN* p=0);
|
2007-05-02 18:51:51 +00:00
|
|
|
void db_print(char*);
|
|
|
|
void db_parse(MYSQL_ROW &row);
|
2007-05-02 23:17:52 +00:00
|
|
|
void operator=(CREDITED_JOB& wh) {CREDITED_JOB::operator=(wh);}
|
2007-05-02 18:51:51 +00:00
|
|
|
};
|
|
|
|
|
2004-06-22 22:56:50 +00:00
|
|
|
class DB_MSG_FROM_HOST : public DB_BASE, public MSG_FROM_HOST {
|
2004-03-17 01:26:44 +00:00
|
|
|
public:
|
2004-10-04 23:23:57 +00:00
|
|
|
DB_MSG_FROM_HOST(DB_CONN* p=0);
|
2004-03-17 01:26:44 +00:00
|
|
|
int get_id();
|
|
|
|
void db_print(char*);
|
|
|
|
void db_parse(MYSQL_ROW &row);
|
|
|
|
};
|
|
|
|
|
2004-06-22 22:56:50 +00:00
|
|
|
class DB_MSG_TO_HOST : public DB_BASE, public MSG_TO_HOST {
|
2004-01-04 06:48:40 +00:00
|
|
|
public:
|
2004-10-04 23:23:57 +00:00
|
|
|
DB_MSG_TO_HOST(DB_CONN* p=0);
|
2004-01-04 06:48:40 +00:00
|
|
|
int get_id();
|
|
|
|
void db_print(char*);
|
|
|
|
void db_parse(MYSQL_ROW &row);
|
|
|
|
};
|
|
|
|
|
2008-02-21 00:47:50 +00:00
|
|
|
class DB_ASSIGNMENT : public DB_BASE, public ASSIGNMENT {
|
|
|
|
public:
|
|
|
|
DB_ASSIGNMENT(DB_CONN* p=0);
|
|
|
|
int get_id();
|
|
|
|
void db_print(char*);
|
|
|
|
void db_parse(MYSQL_ROW& row);
|
|
|
|
};
|
|
|
|
|
2004-07-01 20:24:00 +00:00
|
|
|
// The transitioner uses this to get (WU, result) pairs efficiently.
|
|
|
|
// Each call to enumerate() returns a list of the pairs for a single WU
|
|
|
|
//
|
|
|
|
class DB_TRANSITIONER_ITEM_SET : public DB_BASE_SPECIAL {
|
2004-07-01 18:43:36 +00:00
|
|
|
public:
|
2004-10-04 23:23:57 +00:00
|
|
|
DB_TRANSITIONER_ITEM_SET(DB_CONN* p=0);
|
2004-07-01 20:24:00 +00:00
|
|
|
TRANSITIONER_ITEM last_item;
|
2004-07-02 23:24:36 +00:00
|
|
|
int nitems_this_query;
|
2004-07-01 20:24:00 +00:00
|
|
|
|
|
|
|
int enumerate(
|
|
|
|
int transition_time,
|
|
|
|
int nresult_limit,
|
2005-08-11 23:53:24 +00:00
|
|
|
int wu_id_modulus,
|
|
|
|
int wu_id_remainder,
|
2004-07-01 20:24:00 +00:00
|
|
|
std::vector<TRANSITIONER_ITEM>& items
|
|
|
|
);
|
2004-07-02 19:45:33 +00:00
|
|
|
int update_result(TRANSITIONER_ITEM&);
|
2005-03-30 06:36:11 +00:00
|
|
|
int update_workunit(TRANSITIONER_ITEM&, TRANSITIONER_ITEM&);
|
2004-07-01 18:43:36 +00:00
|
|
|
};
|
|
|
|
|
2004-10-08 22:41:33 +00:00
|
|
|
// The validator uses this to get (WU, result) pairs efficiently.
|
|
|
|
// Each call to enumerate() returns a list of the pairs for a single WU
|
|
|
|
//
|
|
|
|
class DB_VALIDATOR_ITEM_SET : public DB_BASE_SPECIAL {
|
|
|
|
public:
|
|
|
|
DB_VALIDATOR_ITEM_SET(DB_CONN* p=0);
|
|
|
|
VALIDATOR_ITEM last_item;
|
|
|
|
int nitems_this_query;
|
|
|
|
|
|
|
|
int enumerate(
|
|
|
|
int appid,
|
|
|
|
int nresult_limit,
|
2005-02-22 20:12:31 +00:00
|
|
|
int wu_id_modulus,
|
|
|
|
int wu_id_remainder,
|
2004-10-08 22:41:33 +00:00
|
|
|
std::vector<VALIDATOR_ITEM>& items
|
|
|
|
);
|
|
|
|
int update_result(RESULT&);
|
|
|
|
int update_workunit(WORKUNIT&);
|
|
|
|
};
|
|
|
|
|
2008-07-22 23:36:55 +00:00
|
|
|
|
2004-07-02 22:48:33 +00:00
|
|
|
// used by the feeder and scheduler for outgoing work
|
|
|
|
//
|
|
|
|
struct WORK_ITEM {
|
2004-07-03 16:57:28 +00:00
|
|
|
int res_id;
|
2007-04-05 17:02:01 +00:00
|
|
|
int res_priority;
|
2009-08-31 19:35:46 +00:00
|
|
|
int res_server_state;
|
|
|
|
double res_report_deadline;
|
2004-07-03 16:57:28 +00:00
|
|
|
WORKUNIT wu;
|
2004-07-02 22:48:33 +00:00
|
|
|
void parse(MYSQL_ROW& row);
|
|
|
|
};
|
|
|
|
|
|
|
|
class DB_WORK_ITEM : public WORK_ITEM, public DB_BASE_SPECIAL {
|
2007-06-22 23:48:37 +00:00
|
|
|
int start_id;
|
|
|
|
// when enumerate_all is used, keeps track of which ID to start from
|
2004-07-02 22:48:33 +00:00
|
|
|
public:
|
2004-10-04 23:23:57 +00:00
|
|
|
DB_WORK_ITEM(DB_CONN* p=0);
|
2005-09-22 00:15:43 +00:00
|
|
|
int enumerate(
|
2006-05-02 22:33:00 +00:00
|
|
|
int limit, const char* select_clause, const char* order_clause
|
2005-09-22 00:30:01 +00:00
|
|
|
);
|
2004-07-02 22:48:33 +00:00
|
|
|
// used by feeder
|
2007-06-22 23:48:37 +00:00
|
|
|
int enumerate_all(
|
|
|
|
int limit, const char* select_clause
|
|
|
|
);
|
|
|
|
// used by feeder when HR is used.
|
|
|
|
// Successive calls cycle through all results.
|
2004-07-02 22:48:33 +00:00
|
|
|
int read_result();
|
|
|
|
// used by scheduler to read result server state
|
|
|
|
int update();
|
|
|
|
// used by scheduler to update WU transition time
|
|
|
|
// and various result fields
|
|
|
|
};
|
|
|
|
|
2007-04-05 17:02:01 +00:00
|
|
|
// Used by the scheduler to send <result_abort> or <result_abort_if_not_started>
|
|
|
|
// messages if the result is no longer needed.
|
|
|
|
//
|
|
|
|
struct IN_PROGRESS_RESULT {
|
|
|
|
char result_name[256];
|
|
|
|
int assimilate_state;
|
|
|
|
int error_mask;
|
|
|
|
int server_state;
|
|
|
|
int outcome;
|
|
|
|
void parse(MYSQL_ROW& row);
|
|
|
|
};
|
|
|
|
|
|
|
|
class DB_IN_PROGRESS_RESULT : public IN_PROGRESS_RESULT, public DB_BASE_SPECIAL {
|
|
|
|
public:
|
|
|
|
DB_IN_PROGRESS_RESULT(DB_CONN* p=0);
|
|
|
|
int enumerate(int hostid, const char* result_names);
|
|
|
|
};
|
|
|
|
|
2004-12-05 23:52:17 +00:00
|
|
|
// Used by the scheduler to handle results reported by clients
|
|
|
|
// The read and the update of these results are combined
|
|
|
|
// into single SQL queries.
|
|
|
|
|
2004-07-06 18:30:22 +00:00
|
|
|
struct SCHED_RESULT_ITEM {
|
2004-12-05 23:52:17 +00:00
|
|
|
char queried_name[256]; // name as reported by client
|
2004-07-06 18:30:22 +00:00
|
|
|
int id;
|
|
|
|
char name[256];
|
|
|
|
int workunitid;
|
2008-07-22 23:36:55 +00:00
|
|
|
int appid;
|
2004-07-06 18:30:22 +00:00
|
|
|
int server_state;
|
2004-07-02 22:48:33 +00:00
|
|
|
int client_state;
|
2004-07-06 18:30:22 +00:00
|
|
|
int validate_state;
|
|
|
|
int outcome;
|
|
|
|
int hostid;
|
|
|
|
int userid;
|
2004-07-02 22:48:33 +00:00
|
|
|
int teamid;
|
2005-06-26 19:34:17 +00:00
|
|
|
int sent_time;
|
2004-07-06 18:30:22 +00:00
|
|
|
int received_time;
|
|
|
|
double cpu_time;
|
2008-03-31 16:19:45 +00:00
|
|
|
char xml_doc_out[BLOB_SIZE];
|
|
|
|
char stderr_out[BLOB_SIZE];
|
2004-07-06 18:30:22 +00:00
|
|
|
int app_version_num;
|
|
|
|
int exit_status;
|
2006-06-03 16:22:34 +00:00
|
|
|
int file_delete_state;
|
2009-09-03 20:26:31 +00:00
|
|
|
double elapsed_time;
|
- server: change the following from per-host to per-(host, app version):
- daily quota mechanism
- reliable mechanism (accelerated retries)
- "trusted" mechanism (adaptive replication)
- scheduler: enforce host scale probation only for apps with
host_scale_check set.
- validator: do scale probation on invalid results
(need this in addition to error and timeout cases)
- feeder: update app version scales every 10 min, not 10 sec
- back-end apps: support --foo as well as -foo for options
Notes:
- If you have, say, cuda, cuda23 and cuda_fermi plan classes,
a host will have separate quotas for each one.
That means it could error out on 100 jobs for cuda_fermi,
and when its quota goes to zero,
error out on 100 jobs for cuda23, etc.
This is intentional; there may be cases where one version
works but not the others.
- host.error_rate and host.max_results_day are deprecated
TODO:
- the values in the app table for limits on jobs in progress etc.
should override rather than config.xml.
Implementation notes:
scheduler:
process_request():
read all host_app_versions for host at start;
Compute "reliable" and "trusted" for each one.
write modified records at end
get_app_version():
add "reliable_only" arg; if set, use only reliable versions
skip over-quota versions
Multi-pass scheduling: if have at least one reliable version,
do a pass for jobs that need reliable,
and use only reliable versions.
Then clear best_app_versions cache.
Score-based scheduling: for need-reliable jobs,
it will pick the fastest version,
then give a score bonus if that version happens to be reliable.
When get back a successful result from client:
increase daily quota
When get back an error result from client:
impose scale probation
decrease daily quota if not aborted
Validator:
when handling a WU, create a vector of HOST_APP_VERSION
parallel to vector of RESULT.
Pass it to assign_credit_set().
Make copies of originals so we can update only modified ones
update HOST_APP_VERSION error rates
Transitioner:
decrease quota on timeout
svn path=/trunk/boinc/; revision=21181
2010-04-15 03:13:56 +00:00
|
|
|
int app_version_id;
|
2004-07-02 22:48:33 +00:00
|
|
|
|
2004-07-06 18:30:22 +00:00
|
|
|
void clear();
|
|
|
|
void parse(MYSQL_ROW& row);
|
2004-07-02 22:48:33 +00:00
|
|
|
};
|
|
|
|
|
2004-07-06 18:30:22 +00:00
|
|
|
class DB_SCHED_RESULT_ITEM_SET : public DB_BASE_SPECIAL {
|
|
|
|
public:
|
2004-10-04 23:23:57 +00:00
|
|
|
DB_SCHED_RESULT_ITEM_SET(DB_CONN* p=0);
|
2004-07-06 19:04:37 +00:00
|
|
|
std::vector<SCHED_RESULT_ITEM> results;
|
2004-07-06 18:30:22 +00:00
|
|
|
|
|
|
|
int add_result(char* result_name);
|
|
|
|
|
|
|
|
int enumerate();
|
2004-12-05 23:52:17 +00:00
|
|
|
// using a single SQL query, look up all the reported results,
|
|
|
|
// (based on queried_name)
|
|
|
|
// and fill in the rest of the entries in the results vector
|
2004-07-06 18:30:22 +00:00
|
|
|
|
2004-07-27 23:29:27 +00:00
|
|
|
int lookup_result(char* result_name, SCHED_RESULT_ITEM** result);
|
2004-07-06 18:30:22 +00:00
|
|
|
|
|
|
|
int update_result(SCHED_RESULT_ITEM& result);
|
2004-07-21 23:48:56 +00:00
|
|
|
int update_workunits();
|
2004-07-02 22:48:33 +00:00
|
|
|
};
|
|
|
|
|
2010-03-05 22:55:16 +00:00
|
|
|
struct FILE_ITEM {
|
|
|
|
int id;
|
|
|
|
char name[254];
|
|
|
|
char md5sum[34];
|
|
|
|
double size;
|
|
|
|
|
|
|
|
void clear();
|
|
|
|
};
|
|
|
|
|
|
|
|
class DB_FILE : public DB_BASE, public FILE_ITEM {
|
|
|
|
public:
|
|
|
|
DB_FILE(DB_CONN* p=0);
|
|
|
|
int get_id();
|
|
|
|
void db_print(char*);
|
|
|
|
void db_parse(MYSQL_ROW &row);
|
|
|
|
void operator=(FILE_ITEM& f) {FILE_ITEM::operator=(f);}
|
|
|
|
};
|
|
|
|
|
|
|
|
struct FILESET_ITEM {
|
|
|
|
int id;
|
|
|
|
char name[254];
|
|
|
|
|
|
|
|
void clear();
|
|
|
|
};
|
|
|
|
|
|
|
|
class DB_FILESET : public DB_BASE, public FILESET_ITEM {
|
|
|
|
public:
|
|
|
|
DB_FILESET(DB_CONN* p=0);
|
|
|
|
int get_id();
|
|
|
|
void db_print(char*);
|
|
|
|
void db_parse(MYSQL_ROW &row);
|
|
|
|
void operator=(FILESET_ITEM& f) {FILESET_ITEM::operator=(f);}
|
|
|
|
|
|
|
|
// retrieve fileset instance (populate object)
|
|
|
|
int select_by_name(const char* name);
|
|
|
|
};
|
|
|
|
|
|
|
|
struct FILESET_FILE_ITEM {
|
|
|
|
int fileset_id;
|
|
|
|
int file_id;
|
|
|
|
|
|
|
|
void clear();
|
|
|
|
};
|
|
|
|
|
|
|
|
class DB_FILESET_FILE : public DB_BASE, public FILESET_FILE_ITEM {
|
|
|
|
public:
|
|
|
|
DB_FILESET_FILE(DB_CONN* p=0);
|
|
|
|
void db_print(char*);
|
|
|
|
void db_parse(MYSQL_ROW &row);
|
|
|
|
void operator=(FILESET_FILE_ITEM& tf) {FILESET_FILE_ITEM::operator=(tf);}
|
|
|
|
};
|
|
|
|
|
|
|
|
struct SCHED_TRIGGER_ITEM {
|
|
|
|
int id;
|
|
|
|
int fileset_id;
|
|
|
|
bool need_work;
|
|
|
|
bool work_available;
|
|
|
|
bool no_work_available;
|
|
|
|
bool working_set_removal;
|
|
|
|
|
|
|
|
void clear();
|
|
|
|
};
|
|
|
|
|
|
|
|
class DB_SCHED_TRIGGER : public DB_BASE, public SCHED_TRIGGER_ITEM {
|
|
|
|
public:
|
|
|
|
DB_SCHED_TRIGGER(DB_CONN* p=0);
|
|
|
|
int get_id();
|
|
|
|
void db_print(char*);
|
|
|
|
void db_parse(MYSQL_ROW &row);
|
|
|
|
void operator=(SCHED_TRIGGER_ITEM& t) {SCHED_TRIGGER_ITEM::operator=(t);}
|
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
none = 0,
|
|
|
|
state_need_work = 1,
|
|
|
|
state_work_available = 2,
|
|
|
|
state_no_work_available = 3,
|
|
|
|
state_working_set_removal = 4
|
|
|
|
} STATE;
|
|
|
|
|
|
|
|
// retrieve trigger instance (populate object)
|
|
|
|
int select_unique_by_fileset_name(const char* fileset_name);
|
|
|
|
// set single trigger state
|
|
|
|
int update_single_state(const DB_SCHED_TRIGGER::STATE state, const bool value);
|
|
|
|
};
|
|
|
|
|
|
|
|
struct FILESET_SCHED_TRIGGER_ITEM {
|
|
|
|
FILESET_ITEM fileset;
|
|
|
|
SCHED_TRIGGER_ITEM trigger;
|
|
|
|
|
|
|
|
void clear();
|
|
|
|
};
|
|
|
|
|
|
|
|
class DB_FILESET_SCHED_TRIGGER_ITEM : public DB_BASE_SPECIAL, public FILESET_SCHED_TRIGGER_ITEM {
|
|
|
|
public:
|
|
|
|
DB_FILESET_SCHED_TRIGGER_ITEM(DB_CONN* p=0);
|
|
|
|
void db_parse(MYSQL_ROW &row);
|
|
|
|
void operator=(FILESET_SCHED_TRIGGER_ITEM& fst) {FILESET_SCHED_TRIGGER_ITEM::operator=(fst);}
|
|
|
|
};
|
|
|
|
|
|
|
|
class DB_FILESET_SCHED_TRIGGER_ITEM_SET : public DB_BASE_SPECIAL {
|
|
|
|
public:
|
|
|
|
DB_FILESET_SCHED_TRIGGER_ITEM_SET(DB_CONN* p=0);
|
|
|
|
|
|
|
|
// select available triggers based on name and/or state
|
|
|
|
// -> name filter optional (set string, default NULL)
|
|
|
|
// -> pattern search optional (set use_regexp to true, default false))
|
|
|
|
// -> state filter optional (set state, default none)
|
|
|
|
// -> state_value (default true)
|
|
|
|
int select_by_name_state(
|
|
|
|
const char* fileset_name,
|
|
|
|
const bool use_regexp,
|
|
|
|
const DB_SCHED_TRIGGER::STATE state,
|
|
|
|
const bool state_value);
|
|
|
|
|
|
|
|
// check if given trigger (fileset name) is part of set and return position (1-indexed)
|
|
|
|
int contains_trigger(const char* fileset_name);
|
|
|
|
|
|
|
|
// storage vector
|
|
|
|
std::vector<DB_FILESET_SCHED_TRIGGER_ITEM> items;
|
|
|
|
};
|
|
|
|
|
2012-01-23 05:03:52 +00:00
|
|
|
struct VDA_FILE {
|
|
|
|
int id;
|
|
|
|
char dir[256];
|
|
|
|
char name[256];
|
|
|
|
double size;
|
|
|
|
double chunk_size;
|
|
|
|
double created;
|
|
|
|
bool need_update;
|
2012-01-23 21:59:12 +00:00
|
|
|
bool inited;
|
2012-01-23 05:03:52 +00:00
|
|
|
void clear();
|
|
|
|
};
|
|
|
|
|
|
|
|
struct VDA_CHUNK_HOST {
|
|
|
|
int vda_file_id;
|
|
|
|
int host_id; // zero if we're waiting for a host
|
|
|
|
char name[256];
|
|
|
|
bool present_on_host;
|
|
|
|
bool transfer_in_progress;
|
|
|
|
bool transfer_wait;
|
|
|
|
double transition_time;
|
|
|
|
void clear();
|
|
|
|
};
|
|
|
|
|
|
|
|
struct DB_VDA_FILE : public DB_BASE, public VDA_FILE {
|
|
|
|
DB_VDA_FILE(DB_CONN* p=0);
|
|
|
|
int get_id();
|
|
|
|
void db_print(char*);
|
|
|
|
void db_parse(MYSQL_ROW &row);
|
|
|
|
};
|
|
|
|
|
|
|
|
struct DB_VDA_CHUNK_HOST : public DB_BASE, public VDA_CHUNK_HOST {
|
|
|
|
DB_VDA_CHUNK_HOST(DB_CONN* p=0);
|
|
|
|
void db_print(char*);
|
|
|
|
void db_parse(MYSQL_ROW &row);
|
|
|
|
};
|
|
|
|
|
2002-04-30 22:22:54 +00:00
|
|
|
#endif
|