Merge branch 'master' of ssh://isaac.ssl.berkeley.edu/boinc-v2

This commit is contained in:
Rom Walton 2014-11-24 13:23:57 -05:00
commit 28bb2a90c4
8 changed files with 85 additions and 41 deletions

View File

@ -27,13 +27,49 @@
bool have_max_concurrent = false;
int APP_CONFIG::parse_gpu_versions(XML_PARSER& xp, PROJECT* p) {
double x;
while (!xp.get_tag()) {
if (xp.match_tag("/gpu_versions")) return 0;
else if (xp.parse_double("gpu_usage", x)) {
if (x <= 0) {
msg_printf(p, MSG_USER_ALERT,
"gpu_usage must be positive in app_config.xml"
);
} else {
gpu_gpu_usage = x;
}
continue;
}
else if (xp.parse_double("cpu_usage", x)) {
if (x < 0) {
msg_printf(p, MSG_USER_ALERT,
"cpu_usage must be non-negative in app_config.xml"
);
} else {
gpu_cpu_usage = x;
}
continue;
}
if (log_flags.unparsed_xml) {
msg_printf(p, MSG_INFO,
"Unparsed line in app_config.xml: %s",
xp.parsed_tag
);
}
}
msg_printf_notice(p, false, NULL,
"missing </gpu_versions> in app_config.xml"
);
return ERR_XML_PARSE;
}
// In these parsing functions, if there's an error you must
// - generate a notice containing the string "app_config.xml"
// - return an error code
//
int APP_CONFIG::parse(XML_PARSER& xp, PROJECT* p) {
memset(this, 0, sizeof(APP_CONFIG));
double x;
while (!xp.get_tag()) {
if (xp.match_tag("/app")) return 0;
@ -43,29 +79,8 @@ int APP_CONFIG::parse(XML_PARSER& xp, PROJECT* p) {
continue;
}
if (xp.match_tag("gpu_versions")) {
while (!xp.get_tag()) {
if (xp.match_tag("/gpu_versions")) break;
if (xp.parse_double("gpu_usage", x)) {
if (x <= 0) {
msg_printf(p, MSG_USER_ALERT,
"gpu_usage must be positive in app_config.xml"
);
} else {
gpu_gpu_usage = x;
}
continue;
}
if (xp.parse_double("cpu_usage", x)) {
if (x < 0) {
msg_printf(p, MSG_USER_ALERT,
"cpu_usage must be non-negative in app_config.xml"
);
} else {
gpu_cpu_usage = x;
}
continue;
}
}
int retval = parse_gpu_versions(xp, p);
if (retval) return retval;
continue;
}
if (xp.parse_bool("fraction_done_exact", fraction_done_exact)) {

View File

@ -35,6 +35,7 @@ struct APP_CONFIG {
bool fraction_done_exact;
int parse(XML_PARSER&, PROJECT*);
int parse_gpu_versions(XML_PARSER&, PROJECT*);
};
struct APP_VERSION_CONFIG {

View File

@ -423,6 +423,15 @@ int CLIENT_STATE::init() {
coprocs.coprocs[j].type,
coprocs.coprocs[j].count
);
if (coprocs.coprocs[j].count > MAX_COPROC_INSTANCES) {
msg_printf(NULL, MSG_USER_ALERT,
"%d instances of %s specified in cc_config.xml; max is %d",
coprocs.coprocs[j].count,
coprocs.coprocs[j].type,
MAX_COPROC_INSTANCES
);
coprocs.coprocs[j].count = MAX_COPROC_INSTANCES;
}
}
if (!cc_config.no_gpus
#ifdef _WIN32

View File

@ -258,7 +258,7 @@ struct APP {
// Can also specify in client_state.xml (for client emulator)
int n_concurrent;
// temp during job scheduling, to enforce max_concurrent
int non_excluded_instances[MAX_RSC];
COPROC_INSTANCE_BITMAP non_excluded_instances[MAX_RSC];
// for each resource type, bitmap of the non-excluded instances
#ifdef SIM
double latency_bound;

View File

@ -304,9 +304,11 @@ void COPROCS::correlate_gpus(
cpu_opencls.clear();
}
// This is called from CLIENT_STATE::init() after adding NVIDIA, ATI and Intel GPUs
// This is called from CLIENT_STATE::init()
// after adding NVIDIA, ATI and Intel GPUs
// If we don't care about the order of GPUs in COPROCS::coprocs[],
// this code could be included at the end of COPROCS::correlate_gpus().
//
int COPROCS::add_other_coproc_types() {
int retval = 0;
@ -318,8 +320,10 @@ int COPROCS::add_other_coproc_types() {
}
COPROC c;
// For other device types other than NVIDIA, ATI or Intel GPU coprocessor.
// we put each instance into a separate other_opencls element, so count=1.
// For device types other than NVIDIA, ATI or Intel GPU.
// we put each instance into a separate other_opencls element,
// so count=1.
//
c.count = 1;
c.opencl_device_count = 1;
c.opencl_prop = other_opencls[i];

View File

@ -592,7 +592,9 @@ void process_gpu_exclusions() {
APP* app = gstate.apps[i];
for (int k=1; k<coprocs.n_rsc; k++) {
COPROC& cp = coprocs.coprocs[k];
app->non_excluded_instances[k] = (1<<cp.count)-1; // all 1's
for (int h=0; h<cp.count; h++) {
app->non_excluded_instances[k] |= ((COPROC_INSTANCE_BITMAP)1)<<h;
}
}
}
@ -600,18 +602,23 @@ void process_gpu_exclusions() {
p = gstate.projects[i];
for (int k=1; k<coprocs.n_rsc; k++) {
COPROC& cp = coprocs.coprocs[k];
int all_instances = (1<<cp.count)-1; // bitmap of 1 for all inst
COPROC_INSTANCE_BITMAP all_instances = 0;
// bitmap of 1 for all instances
//
for (int h=0; h<cp.count; h++) {
all_instances |= ((COPROC_INSTANCE_BITMAP)1)<<h;
}
for (j=0; j<cc_config.exclude_gpus.size(); j++) {
EXCLUDE_GPU& eg = cc_config.exclude_gpus[j];
if (!eg.type.empty() && (eg.type != cp.type)) continue;
if (strcmp(eg.url.c_str(), p->master_url)) continue;
int mask;
COPROC_INSTANCE_BITMAP mask;
if (eg.device_num >= 0) {
int index = cp.device_num_index(eg.device_num);
// exclusion may refer to nonexistent GPU
//
if (index < 0) continue;
mask = 1<<index;
mask = ((COPROC_INSTANCE_BITMAP)1)<<index;
} else {
mask = all_instances;
}
@ -652,7 +659,7 @@ void process_gpu_exclusions() {
//
p->rsc_pwf[k].ncoprocs_excluded = 0;
for (int b=0; b<cp.count; b++) {
int mask = 1<<b;
COPROC_INSTANCE_BITMAP mask = ((COPROC_INSTANCE_BITMAP)1)<<b;
for (a=0; a<gstate.apps.size(); a++) {
APP* app = gstate.apps[a];
if (app->project != p) continue;

View File

@ -64,12 +64,14 @@ inline void rsc_string(RESULT* rp, char* buf) {
// set "nused" bits of the source bitmap in the dest bitmap
//
static inline void set_bits(int src, double nused, int& dst) {
static inline void set_bits(
COPROC_INSTANCE_BITMAP src, double nused, COPROC_INSTANCE_BITMAP& dst
) {
// if all bits are already set, we're done
//
if ((src&dst) == src) return;
int bit = 1;
for (int i=0; i<32; i++) {
COPROC_INSTANCE_BITMAP bit = 1;
for (int i=0; i<MAX_COPROC_INSTANCES; i++) {
if (nused <= 0) break;
if (bit & src) {
dst |= bit;
@ -524,11 +526,14 @@ void RR_SIM::simulate() {
RSC_WORK_FETCH& rwf = rsc_work_fetch[i];
if (!rwf.has_exclusions) continue;
COPROC& cp = coprocs.coprocs[i];
int mask = (1<<cp.count)-1;
COPROC_INSTANCE_BITMAP mask = 0;
for (int j=0; j<cp.count; j++) {
mask |= ((COPROC_INSTANCE_BITMAP)1)<<j;
}
rwf.sim_excluded_instances = ~(rwf.sim_used_instances) & mask;
if (log_flags.rrsim_detail) {
msg_printf(0, MSG_INFO,
"[rrsim_detail] rsc %d: sim_used_inst %d mask %d sim_excluded_instances %d",
"[rrsim_detail] rsc %d: sim_used_inst %lld mask %lld sim_excluded_instances %lld",
i, rwf.sim_used_instances, mask, rwf.sim_excluded_instances
);
}

View File

@ -61,6 +61,9 @@ struct RSC_WORK_FETCH;
struct SCHEDULER_REPLY;
struct APP_VERSION;
typedef long long COPROC_INSTANCE_BITMAP;
// should be at least MAX_COPROC_INSTANCES (64) bits
// state per (resource, project) pair
//
struct RSC_PROJECT_WORK_FETCH {
@ -88,7 +91,7 @@ struct RSC_PROJECT_WORK_FETCH {
double nused_total; // sum of instances over all runnable jobs
int ncoprocs_excluded;
// number of excluded instances
int non_excluded_instances;
COPROC_INSTANCE_BITMAP non_excluded_instances;
// bitmap of non-excluded instances
// (i.e. instances this project's jobs can run on)
int deadlines_missed;
@ -200,10 +203,10 @@ struct RSC_WORK_FETCH {
// seconds of idle instances between now and now+work_buf_total()
double nidle_now;
double sim_nused;
int sim_used_instances;
COPROC_INSTANCE_BITMAP sim_used_instances;
// bitmap of instances used in simulation,
// taking into account GPU exclusions
int sim_excluded_instances;
COPROC_INSTANCE_BITMAP sim_excluded_instances;
// bitmap of instances not used (i.e. starved because of exclusion)
double total_fetchable_share;
// total RS of projects from which we could fetch jobs for this device