client: extensions, fixes to account manager functionality

when handling an AM repy:
- check URL signature only for projects being attached.
- clear dont_request_more_work, detach_when_done if not present in reply

Store disk usage and share in state file; report to AM
This commit is contained in:
David Anderson 2017-09-15 14:42:51 -07:00
parent 8e1a54621f
commit 9f5c247adf
4 changed files with 97 additions and 62 deletions

View File

@ -166,7 +166,9 @@ int ACCT_MGR_OP::do_rpc(
" <gpu_ec>%f</gpu_ec>\n" " <gpu_ec>%f</gpu_ec>\n"
" <gpu_time>%f</gpu_time>\n" " <gpu_time>%f</gpu_time>\n"
" <njobs_success>%d</njobs_success>\n" " <njobs_success>%d</njobs_success>\n"
" <njobs_error>%d</njobs_error>\n", " <njobs_error>%d</njobs_error>\n"
" <disk_usage>%f</disk_usage>\n"
" <disk_share>%f</disk_share>\n",
p->master_url, p->master_url,
p->project_name, p->project_name,
p->suspended_via_gui?1:0, p->suspended_via_gui?1:0,
@ -183,7 +185,9 @@ int ACCT_MGR_OP::do_rpc(
p->gpu_ec, p->gpu_ec,
p->gpu_time, p->gpu_time,
p->njobs_success, p->njobs_success,
p->njobs_error p->njobs_error,
p->disk_usage,
p->disk_share
); );
if (p->attached_via_acct_mgr) { if (p->attached_via_acct_mgr) {
fprintf(f, fprintf(f,
@ -455,23 +459,39 @@ void ACCT_MGR_OP::handle_reply(int http_op_retval) {
bool verified; bool verified;
PROJECT* pp; PROJECT* pp;
bool sig_ok; bool sig_ok;
bool got_error = false;
if (http_op_retval == 0) { // check for failures of HTTP OP, reply parse
//
if (http_op_retval) {
msg_printf(&ami, MSG_INFO, "AM RPC HTTP failure: %s",
boincerror(http_op_retval)
);
got_error = true;
} else {
FILE* f = fopen(ACCT_MGR_REPLY_FILENAME, "r"); FILE* f = fopen(ACCT_MGR_REPLY_FILENAME, "r");
if (f) { if (f) {
retval = parse(f); retval = parse(f);
if (retval) {
got_error = true;
msg_printf(&ami, MSG_INFO, "AM reply parse error");
}
fclose(f); fclose(f);
} else { } else {
retval = ERR_FOPEN; msg_printf(&ami, MSG_INFO, "AM reply file missing");
got_error = true;
} }
} else {
error_num = http_op_retval;
} }
// if no errors so far, check for errors from AM
//
if (!got_error) {
gstate.acct_mgr_info.password_error = false; gstate.acct_mgr_info.password_error = false;
if (error_num == ERR_BAD_PASSWD && !via_gui) { if (error_num == ERR_BAD_PASSWD && !via_gui) {
gstate.acct_mgr_info.password_error = true; gstate.acct_mgr_info.password_error = true;
} }
// Show error message from AM if available.
// check both error_str and error_num since an account manager may only // check both error_str and error_num since an account manager may only
// return a BOINC based error code for password failures or invalid // return a BOINC based error code for password failures or invalid
// email addresses // email addresses
@ -482,27 +502,18 @@ void ACCT_MGR_OP::handle_reply(int http_op_retval) {
_("Message from account manager"), _("Message from account manager"),
error_str.c_str() error_str.c_str()
); );
if (!error_num) { got_error = true;
error_num = ERR_XML_PARSE;
}
} else if (error_num) { } else if (error_num) {
if (error_num == http_op_retval) {
// if it was an HTTP error, don't notify the user;
// probably the acct mgr server is down
//
msg_printf(&ami, MSG_INFO,
"Account manager RPC failed: %s", boincerror(error_num)
);
} else {
msg_printf(&ami, MSG_USER_ALERT, msg_printf(&ami, MSG_USER_ALERT,
"%s: %s", "%s: %s",
_("Message from account manager"), _("Message from account manager"),
boincerror(error_num) boincerror(error_num)
); );
got_error = true;
} }
} }
if (error_num) { if (got_error) {
gstate.acct_mgr_info.next_rpc_time = gstate.acct_mgr_info.next_rpc_time =
gstate.now gstate.now
+ calculate_exponential_backoff( + calculate_exponential_backoff(
@ -513,6 +524,26 @@ void ACCT_MGR_OP::handle_reply(int http_op_retval) {
gstate.acct_mgr_info.nfailures++; gstate.acct_mgr_info.nfailures++;
return; return;
} }
// The RPC was successful
//
// Detach projects that are
// - detach_when_done
// - done
// - attached via AM
//
while (1) {
bool found = false;
for (i=0; i<gstate.projects.size(); i++) {
PROJECT* p = gstate.projects[i];
if (p->detach_when_done && !gstate.nresults_for_project(p) && p->attached_via_acct_mgr) {
gstate.detach_project(p);
found = true;
}
}
if (!found) break;
}
gstate.acct_mgr_info.nfailures = 0; gstate.acct_mgr_info.nfailures = 0;
msg_printf(NULL, MSG_INFO, "Account manager contact succeeded"); msg_printf(NULL, MSG_INFO, "Account manager contact succeeded");
@ -559,15 +590,6 @@ void ACCT_MGR_OP::handle_reply(int http_op_retval) {
// //
for (i=0; i<accounts.size(); i++) { for (i=0; i<accounts.size(); i++) {
AM_ACCOUNT& acct = accounts[i]; AM_ACCOUNT& acct = accounts[i];
retval = check_string_signature2(
acct.url.c_str(), acct.url_signature, ami.signing_key, verified
);
if (retval || !verified) {
msg_printf(NULL, MSG_INTERNAL_ERROR,
"Bad signature for URL %s", acct.url.c_str()
);
continue;
}
pp = gstate.lookup_project(acct.url.c_str()); pp = gstate.lookup_project(acct.url.c_str());
if (pp) { if (pp) {
if (acct.detach) { if (acct.detach) {
@ -603,12 +625,16 @@ void ACCT_MGR_OP::handle_reply(int http_op_retval) {
pp->attached_via_acct_mgr = true; pp->attached_via_acct_mgr = true;
if (acct.dont_request_more_work.present) { if (acct.dont_request_more_work.present) {
pp->dont_request_more_work = acct.dont_request_more_work.value; pp->dont_request_more_work = acct.dont_request_more_work.value;
} else {
pp->dont_request_more_work = false;
} }
if (acct.detach_when_done.present) { if (acct.detach_when_done.present) {
pp->detach_when_done = acct.detach_when_done.value; pp->detach_when_done = acct.detach_when_done.value;
if (pp->detach_when_done) { if (pp->detach_when_done) {
pp->dont_request_more_work = true; pp->dont_request_more_work = true;
} }
} else {
pp->detach_when_done = false;
} }
// initiate a scheduler RPC if requested by AMS // initiate a scheduler RPC if requested by AMS
@ -656,6 +682,15 @@ void ACCT_MGR_OP::handle_reply(int http_op_retval) {
} else { } else {
// here we don't already have the project. // here we don't already have the project.
// //
retval = check_string_signature2(
acct.url.c_str(), acct.url_signature, ami.signing_key, verified
);
if (retval || !verified) {
msg_printf(NULL, MSG_INTERNAL_ERROR,
"Bad signature for URL %s", acct.url.c_str()
);
continue;
}
if (acct.authenticator.empty()) { if (acct.authenticator.empty()) {
msg_printf(NULL, MSG_INFO, msg_printf(NULL, MSG_INFO,
"Account manager reply missing authenticator for %s", "Account manager reply missing authenticator for %s",

View File

@ -1444,29 +1444,25 @@ bool CLIENT_STATE::garbage_collect() {
// because detach_project() calls garbage_collect_always(), // because detach_project() calls garbage_collect_always(),
// and we need to avoid infinite recursion // and we need to avoid infinite recursion
// //
if (acct_mgr_info.using_am()) { while (1) {
bool found = false;
for (unsigned i=0; i<projects.size(); i++) {
PROJECT* p = projects[i];
if (p->detach_when_done && !nresults_for_project(p)) {
// If we're using an AM, // If we're using an AM,
// start an AM RPC rather than detaching the projects; // wait until the next successful RPC to detach project,
// the RPC completion handler will detach them. // so the AM will be informed of its work done.
// This way the AM will be informed of their work done.
// //
for (unsigned i=0; i<projects.size(); i++) { if (!p->attached_via_acct_mgr) {
PROJECT* p = projects[i]; msg_printf(p, MSG_INFO, "Detaching - no more tasks");
if (p->detach_when_done && !nresults_for_project(p)) {
acct_mgr_info.next_rpc_time = 0;
acct_mgr_info.poll();
break;
}
}
} else {
for (unsigned i=0; i<projects.size(); i++) {
PROJECT* p = projects[i];
if (p->detach_when_done && !nresults_for_project(p)) {
detach_project(p); detach_project(p);
action = true; action = true;
found = true;
} }
} }
} }
if (!found) break;
}
#endif #endif
return action; return action;
} }

View File

@ -332,6 +332,8 @@ int PROJECT::parse_state(XML_PARSER& xp) {
if (xp.parse_double("cpu_time", cpu_time)) continue; if (xp.parse_double("cpu_time", cpu_time)) continue;
if (xp.parse_double("gpu_ec", gpu_ec)) continue; if (xp.parse_double("gpu_ec", gpu_ec)) continue;
if (xp.parse_double("gpu_time", gpu_time)) continue; if (xp.parse_double("gpu_time", gpu_time)) continue;
if (xp.parse_double("disk_usage", disk_usage)) continue;
if (xp.parse_double("disk_share", disk_share)) continue;
#ifdef SIM #ifdef SIM
if (xp.match_tag("available")) { if (xp.match_tag("available")) {
available.parse(xp, "/available"); available.parse(xp, "/available");
@ -529,8 +531,10 @@ int PROJECT::write_state(MIOFILE& out, bool gui_rpc) {
" <cpu_ec>%f</cpu_ec>\n" " <cpu_ec>%f</cpu_ec>\n"
" <cpu_time>%f</cpu_time>\n" " <cpu_time>%f</cpu_time>\n"
" <gpu_ec>%f</gpu_ec>\n" " <gpu_ec>%f</gpu_ec>\n"
" <gpu_time>%f</gpu_time>\n", " <gpu_time>%f</gpu_time>\n"
cpu_ec, cpu_time, gpu_ec, gpu_time " <disk_usage>%f</disk_usage>\n"
" <disk_share>%f</disk_share>\n",
cpu_ec, cpu_time, gpu_ec, gpu_time, disk_usage, disk_share
); );
} }
out.printf( out.printf(

View File

@ -150,6 +150,10 @@ struct PROJECT : PROJ_AM {
// Reasons are enumerated in lib/common_defs.h // Reasons are enumerated in lib/common_defs.h
bool trickle_up_pending; bool trickle_up_pending;
// have trickle up to send // have trickle up to send
double disk_usage;
// computed by get_disk_usages()
double disk_share;
// computed by get_disk_shares();
/////// END OF ITEMS STORED IN client_state.xml /////// END OF ITEMS STORED IN client_state.xml
@ -171,10 +175,6 @@ struct PROJECT : PROJ_AM {
// to make sure they haven't been tampered with. // to make sure they haven't been tampered with.
// This provides only the illusion of security. // This provides only the illusion of security.
bool use_symlinks; bool use_symlinks;
double disk_usage;
// computed by get_disk_usages()
double disk_share;
// computed by get_disk_shares();
bool report_results_immediately; bool report_results_immediately;
// items sent in scheduler replies, // items sent in scheduler replies,