mirror of https://github.com/BOINC/boinc.git
Debug and finish starvation prevention for dynamic AMs.
- Add an official notion of dynamic AM. Conveyed in AM reply message, stored in AM account file. - Report accounting info only to dynamic AMs - Fix bug where account info file wasn't written for AMs using authenticators - In handling GUI RPC to do AM RPC, use info for current AM, e.g. if it's dynamic
This commit is contained in:
parent
99dd146bbb
commit
644f0ae6ba
|
@ -169,12 +169,6 @@ int ACCT_MGR_OP::do_rpc(ACCT_MGR_INFO& _ami, bool _via_gui) {
|
|||
" <detach_when_done>%d</detach_when_done>\n"
|
||||
" <ended>%d</ended>\n"
|
||||
" <resource_share>%f</resource_share>\n"
|
||||
" <cpu_ec>%f</cpu_ec>\n"
|
||||
" <cpu_time>%f</cpu_time>\n"
|
||||
" <gpu_ec>%f</gpu_ec>\n"
|
||||
" <gpu_time>%f</gpu_time>\n"
|
||||
" <njobs_success>%d</njobs_success>\n"
|
||||
" <njobs_error>%d</njobs_error>\n"
|
||||
" <disk_usage>%f</disk_usage>\n"
|
||||
" <disk_share>%f</disk_share>\n",
|
||||
p->master_url,
|
||||
|
@ -188,22 +182,28 @@ int ACCT_MGR_OP::do_rpc(ACCT_MGR_INFO& _ami, bool _via_gui) {
|
|||
p->detach_when_done?1:0,
|
||||
p->ended?1:0,
|
||||
p->resource_share,
|
||||
p->cpu_ec,
|
||||
p->cpu_time,
|
||||
p->gpu_ec,
|
||||
p->gpu_time,
|
||||
p->njobs_success,
|
||||
p->njobs_error,
|
||||
p->disk_usage,
|
||||
p->disk_share
|
||||
);
|
||||
|
||||
// send starvation-related info
|
||||
// send work and starvation-related info
|
||||
//
|
||||
if (ami.send_rec) {
|
||||
if (ami.dynamic) {
|
||||
fprintf(f,
|
||||
" <nrpc_failures>%d</nrpc_failures>",
|
||||
p->nrpc_failures
|
||||
" <nrpc_failures>%d</nrpc_failures>"
|
||||
" <cpu_ec>%f</cpu_ec>\n"
|
||||
" <cpu_time>%f</cpu_time>\n"
|
||||
" <gpu_ec>%f</gpu_ec>\n"
|
||||
" <gpu_time>%f</gpu_time>\n"
|
||||
" <njobs_success>%d</njobs_success>\n"
|
||||
" <njobs_error>%d</njobs_error>\n",
|
||||
p->nrpc_failures,
|
||||
p->cpu_ec,
|
||||
p->cpu_time,
|
||||
p->gpu_ec,
|
||||
p->gpu_time,
|
||||
p->njobs_success,
|
||||
p->njobs_error
|
||||
);
|
||||
for (int j=0; j<coprocs.n_rsc; j++) {
|
||||
if (p->sched_req_no_work[j]) {
|
||||
|
@ -387,6 +387,7 @@ int ACCT_MGR_OP::parse(FILE* f) {
|
|||
safe_strcpy(host_venue, "");
|
||||
safe_strcpy(ami.opaque, "");
|
||||
ami.no_project_notices = false;
|
||||
ami.dynamic = false;
|
||||
rss_feeds.clear();
|
||||
if (!xp.parse_start("acct_mgr_reply")) return ERR_XML_PARSE;
|
||||
while (!xp.get_tag()) {
|
||||
|
@ -406,6 +407,7 @@ int ACCT_MGR_OP::parse(FILE* f) {
|
|||
if (xp.parse_string("error", error_str)) continue;
|
||||
if (xp.parse_string("error_msg", error_str)) continue;
|
||||
if (xp.parse_double("repeat_sec", repeat_sec)) continue;
|
||||
if (xp.parse_bool("dynamic", ami.dynamic)) continue;
|
||||
if (xp.parse_string("message", message)) {
|
||||
msg_printf(NULL, MSG_INFO, "Account manager: %s", message.c_str());
|
||||
continue;
|
||||
|
@ -613,6 +615,7 @@ void ACCT_MGR_OP::handle_reply(int http_op_retval) {
|
|||
safe_strcpy(gstate.acct_mgr_info.password_hash, ami.password_hash);
|
||||
safe_strcpy(gstate.acct_mgr_info.authenticator, ami.authenticator);
|
||||
gstate.acct_mgr_info.no_project_notices = ami.no_project_notices;
|
||||
gstate.acct_mgr_info.dynamic = ami.dynamic;
|
||||
|
||||
// process projects
|
||||
//
|
||||
|
@ -840,7 +843,7 @@ int ACCT_MGR_INFO::write_info() {
|
|||
fclose(f);
|
||||
}
|
||||
|
||||
if (strlen(login_name)) {
|
||||
if (strlen(login_name) || strlen(authenticator)) {
|
||||
f = fopen(ACCT_MGR_LOGIN_FILENAME, "w");
|
||||
if (!f) {
|
||||
msg_printf(NULL, MSG_USER_ALERT,
|
||||
|
@ -870,11 +873,13 @@ int ACCT_MGR_INFO::write_info() {
|
|||
" <next_rpc_time>%f</next_rpc_time>\n"
|
||||
" <opaque>\n%s\n"
|
||||
" </opaque>\n"
|
||||
" <no_project_notices>%d</no_project_notices>\n",
|
||||
" <no_project_notices>%d</no_project_notices>\n"
|
||||
" <dynamic>%d</dynamic>\n",
|
||||
previous_host_cpid,
|
||||
next_rpc_time,
|
||||
opaque,
|
||||
no_project_notices?1:0
|
||||
no_project_notices?1:0,
|
||||
dynamic?1:0
|
||||
);
|
||||
user_keywords.write(f);
|
||||
fprintf(f,
|
||||
|
@ -906,6 +911,7 @@ void ACCT_MGR_INFO::clear() {
|
|||
first_starved = 0;
|
||||
starved_rpc_backoff = 0;
|
||||
starved_rpc_min_time = 0;
|
||||
dynamic = false;
|
||||
}
|
||||
|
||||
ACCT_MGR_INFO::ACCT_MGR_INFO() {
|
||||
|
@ -944,6 +950,7 @@ int ACCT_MGR_INFO::parse_login_file(FILE* p) {
|
|||
continue;
|
||||
}
|
||||
else if (xp.parse_bool("no_project_notices", no_project_notices)) continue;
|
||||
else if (xp.parse_bool("dynamic", dynamic)) continue;
|
||||
else if (xp.match_tag("user_keywords")) {
|
||||
retval = user_keywords.parse(xp);
|
||||
if (retval) {
|
||||
|
@ -1056,7 +1063,7 @@ bool ACCT_MGR_INFO::poll() {
|
|||
|
||||
// if not dynamic AM, we're done
|
||||
//
|
||||
if (!send_rec) {
|
||||
if (!dynamic) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -72,8 +72,10 @@ struct ACCT_MGR_INFO : PROJ_AM {
|
|||
// what login name and password they have been assigned
|
||||
|
||||
bool password_error;
|
||||
bool send_rec;
|
||||
// send REC in AM RPCs
|
||||
bool dynamic;
|
||||
// This AM dynamically decides what projects to assign.
|
||||
// - send EC in AM RPCs
|
||||
// - send starvation info if idle resources
|
||||
USER_KEYWORDS user_keywords;
|
||||
// user's yes/no keywords.
|
||||
// These are conveyed to projects in scheduler requests
|
||||
|
|
|
@ -945,6 +945,7 @@ static void handle_acct_mgr_rpc(GUI_RPC_CONN& grc) {
|
|||
bool use_config_file = false;
|
||||
bool bad_arg = false;
|
||||
bool url_found=false, name_found=false, password_found = false;
|
||||
ACCT_MGR_INFO ami;
|
||||
|
||||
while (!grc.xp.get_tag()) {
|
||||
if (grc.xp.parse_string("url", url)) {
|
||||
|
@ -970,10 +971,7 @@ static void handle_acct_mgr_rpc(GUI_RPC_CONN& grc) {
|
|||
"Not using account manager"
|
||||
);
|
||||
} else {
|
||||
url = gstate.acct_mgr_info.master_url;
|
||||
name = gstate.acct_mgr_info.login_name;
|
||||
password_hash = gstate.acct_mgr_info.password_hash;
|
||||
authenticator = gstate.acct_mgr_info.authenticator;
|
||||
ami = gstate.acct_mgr_info;
|
||||
}
|
||||
} else {
|
||||
bad_arg = !url_found || !name_found || !password_found;
|
||||
|
@ -986,7 +984,11 @@ static void handle_acct_mgr_rpc(GUI_RPC_CONN& grc) {
|
|||
// Remove 'hash:'
|
||||
password_hash = password.substr(5);
|
||||
}
|
||||
}
|
||||
safe_strcpy(ami.master_url, url.c_str());
|
||||
safe_strcpy(ami.login_name, name.c_str());
|
||||
safe_strcpy(ami.password_hash, password_hash.c_str());
|
||||
safe_strcpy(ami.authenticator, authenticator.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if (bad_arg) {
|
||||
|
@ -997,11 +999,6 @@ static void handle_acct_mgr_rpc(GUI_RPC_CONN& grc) {
|
|||
){
|
||||
grc.mfout.printf("<error>attached to a different AM - detach first</error>\n");
|
||||
} else {
|
||||
ACCT_MGR_INFO ami;
|
||||
safe_strcpy(ami.master_url, url.c_str());
|
||||
safe_strcpy(ami.login_name, name.c_str());
|
||||
safe_strcpy(ami.password_hash, password_hash.c_str());
|
||||
safe_strcpy(ami.authenticator, authenticator.c_str());
|
||||
gstate.acct_mgr_op.do_rpc(ami, true);
|
||||
grc.mfout.printf("<success/>\n");
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue