diff --git a/checkin_notes b/checkin_notes index bcb68c0ce1..f8cc073732 100755 --- a/checkin_notes +++ b/checkin_notes @@ -7292,3 +7292,28 @@ David 1 June 2005 am_create.php am_query.php login_action.php + +David 1 June 2005 + - Reorganization of schedule RPC and master file fetch code + trying to get rid of bug where once project's scheduler URLS + end up in another project. + + The following conditions are either all true or all false: + 1) SCHEDULER_OP::project is nonzero + 2) SCHEDULER_OP::state is not IDLE + 3) SCHEDULER_OP::http_op is in HTTP_OP_SET. + + These become true in: + SCHEDULER_OP::start_rpc() + SCHEDULER_OP::init_master_fetch() + + These become false in: + SCHEDULER_OP::poll() (when an operation finishes) + SCHEDULER_OP::abort() (called when reset or abort project) + + - Changed SCHEDULER_OP from struct to class and hid some of its data + + client/ + client_state.C + cs_scheduler.C + schedule_op.C,h diff --git a/client/client_state.C b/client/client_state.C index 5bd345a565..140cd3e796 100644 --- a/client/client_state.C +++ b/client/client_state.C @@ -1189,12 +1189,7 @@ int CLIENT_STATE::reset_project(PROJECT* project) { // if we're in the middle of a scheduler op to the project, abort it // - if (scheduler_op->state != SCHEDULER_OP_STATE_IDLE - && scheduler_op->project == project - ) { - http_ops->remove(&scheduler_op->http_op); - scheduler_op->state = SCHEDULER_OP_STATE_IDLE; - } + scheduler_op->abort(project); // mark results as server-acked. // This will cause garbage_collect to delete them, diff --git a/client/cs_scheduler.C b/client/cs_scheduler.C index 451694d733..a4b4698223 100644 --- a/client/cs_scheduler.C +++ b/client/cs_scheduler.C @@ -141,6 +141,8 @@ PROJECT* CLIENT_STATE::next_project_sched_rpc_pending() { // 3) has master_url_fetch_pending == false // 4) has dont_request_more_work == false // +// TODO: finish this comment. What is "urgency"? +// PROJECT* CLIENT_STATE::next_project_need_work(PROJECT* old, int urgency) { PROJECT *p, *p_prospect = NULL; double work_on_prospect=0; diff --git a/client/scheduler_op.C b/client/scheduler_op.C index 0c207189a3..461a628ace 100644 --- a/client/scheduler_op.C +++ b/client/scheduler_op.C @@ -56,20 +56,20 @@ SCHEDULER_OP::SCHEDULER_OP(HTTP_OP_SET* h) { bool SCHEDULER_OP::check_master_fetch_start() { int retval; - project = gstate.next_project_master_pending(); - if (!project) return false; - retval = init_master_fetch(); + PROJECT* p = gstate.next_project_master_pending(); + if (!p) return false; + retval = init_master_fetch(p); if (retval) { - msg_printf(project, MSG_ERROR, + msg_printf(p, MSG_ERROR, "Couldn't read master page for %s: error %d", - project->get_project_name(), retval + p->get_project_name(), retval ); - if (project->tentative) { - msg_printf(project, MSG_ERROR, "Detaching from project - check for URL error"); - project_add_failed(project); + if (p->tentative) { + msg_printf(p, MSG_ERROR, "Detaching from project - check for URL error"); + project_add_failed(p); } else { - project->master_fetch_failures++; - backoff(project, "Master file fetch failed\n"); + p->master_fetch_failures++; + backoff(p, "Master file fetch failed\n"); } return false; } @@ -85,12 +85,12 @@ int SCHEDULER_OP::init_get_work(int urgency) { char err_msg[256]; must_get_work = true; - project = gstate.next_project_need_work(0, urgency); - if (project) { - retval = init_op_project(); + PROJECT* p = gstate.next_project_need_work(0, urgency); + if (p) { + retval = init_op_project(p); if (retval) { sprintf(err_msg, "init_op_project failed, error %d\n", retval); - backoff(project, err_msg); + backoff(p, err_msg); return retval; } } @@ -103,46 +103,48 @@ int SCHEDULER_OP::init_get_work(int urgency) { // int SCHEDULER_OP::init_return_results(PROJECT* p) { must_get_work = false; - project = p; - return init_op_project(); + return init_op_project(p); } -// try to initiate an RPC to the current project. +// try to initiate an RPC to the given project. // If there are multiple schedulers, start with a random one // -int SCHEDULER_OP::init_op_project() { +int SCHEDULER_OP::init_op_project(PROJECT* p) { int retval; char err_msg[256]; SCOPE_MSG_LOG scope_messages(log_messages, CLIENT_MSG_LOG::DEBUG_SCHED_OP); - scope_messages.printf("SCHEDULER_OP::init_op_project(): starting op for %s\n", project->master_url); + scope_messages.printf( + "SCHEDULER_OP::init_op_project(): starting op for %s\n", + p->master_url + ); // if project has no schedulers, // skip everything else and just get its master file. // - if (project->scheduler_urls.size() == 0) { - retval = init_master_fetch(); + if (p->scheduler_urls.size() == 0) { + retval = init_master_fetch(p); if (retval) { sprintf(err_msg, "Master fetch initialization failed: %d\n", retval ); - backoff(project, err_msg); + backoff(p, err_msg); } return retval; } url_index = 0; - retval = gstate.make_scheduler_request(project); + retval = gstate.make_scheduler_request(p); if (!retval) { - retval = start_rpc(); + retval = start_rpc(p); } if (retval) { sprintf(err_msg, "Scheduler request initialization to %s failed, error %d\n", - project->get_scheduler_url(url_index, url_random), retval + p->get_scheduler_url(url_index, url_random), retval ); - backoff(project, err_msg); + backoff(p, err_msg); } return retval; } @@ -213,73 +215,78 @@ void SCHEDULER_OP::backoff(PROJECT* p, const char *error_msg ) { // If successful, creates an HTTP_OP that must be polled // PRECONDITION: the request file has been created // -int SCHEDULER_OP::start_rpc() { +int SCHEDULER_OP::start_rpc(PROJECT* p) { int retval; char request_file[1024], reply_file[1024]; SCOPE_MSG_LOG scope_messages(log_messages, CLIENT_MSG_LOG::DEBUG_SCHED_OP); - safe_strcpy(scheduler_url, project->get_scheduler_url(url_index, url_random)); + safe_strcpy(scheduler_url, p->get_scheduler_url(url_index, url_random)); if (log_flags.sched_ops) { msg_printf( - project, MSG_INFO, + p, MSG_INFO, "Sending scheduler request to %s\n", scheduler_url ); msg_printf( - project, MSG_INFO, + p, MSG_INFO, "Requesting %.0f seconds of work, returning %d results\n", - project->work_request, project->nresults_returned + p->work_request, p->nresults_returned ); } - get_sched_request_filename(*project, request_file); - get_sched_reply_filename(*project, reply_file); + get_sched_request_filename(*p, request_file); + get_sched_reply_filename(*p, reply_file); scope_messages.printf_file(request_file, "req:"); http_op.set_proxy(&gstate.proxy_info); retval = http_op.init_post(scheduler_url, request_file, reply_file); if (retval) { - msg_printf(project, MSG_ERROR, + msg_printf(p, MSG_ERROR, "Scheduler request failed init_post(): %d", retval ); return retval; } retval = http_ops->insert(&http_op); if (retval) { - msg_printf(project, MSG_ERROR, + msg_printf(p, MSG_ERROR, "Scheduler request failed HTTP insert: %d", retval ); return retval; } - project->rpc_seqno++; + p->rpc_seqno++; + cur_proj = p; // remember what project we're talking to state = SCHEDULER_OP_STATE_RPC; return 0; } // initiate a fetch of a project's master URL file // -int SCHEDULER_OP::init_master_fetch() { +int SCHEDULER_OP::init_master_fetch(PROJECT* p) { int retval; char master_filename[256]; - get_master_filename(*project, master_filename); + get_master_filename(*p, master_filename); SCOPE_MSG_LOG scope_messages(log_messages, CLIENT_MSG_LOG::DEBUG_SCHED_OP); - scope_messages.printf("SCHEDULER_OP::init_master_fetch(): Fetching master file for %s\n", project->master_url); + scope_messages.printf( + "SCHEDULER_OP::init_master_fetch(): Fetching master file for %s\n", + p->master_url + ); http_op.set_proxy(&gstate.proxy_info); - retval = http_op.init_get(project->master_url, master_filename, true); + retval = http_op.init_get(p->master_url, master_filename, true); if (retval) return retval; retval = http_ops->insert(&http_op); if (retval) return retval; + cur_proj = p; state = SCHEDULER_OP_STATE_GET_MASTER; return 0; } // parse a master file. // -int SCHEDULER_OP::parse_master_file(vector &urls) { +int SCHEDULER_OP::parse_master_file(PROJECT* p, vector &urls) { char buf[256]; char master_filename[256]; std::string str; @@ -287,24 +294,24 @@ int SCHEDULER_OP::parse_master_file(vector &urls) { SCOPE_MSG_LOG scope_messages(log_messages, CLIENT_MSG_LOG::DEBUG_SCHED_OP); - get_master_filename(*project, master_filename); + get_master_filename(*p, master_filename); f = boinc_fopen(master_filename, "r"); if (!f) { - msg_printf(project, MSG_ERROR, "Can't open master file\n"); + msg_printf(p, MSG_ERROR, "Can't open master file\n"); return ERR_FOPEN; } - project->scheduler_urls.clear(); + p->scheduler_urls.clear(); while (fgets(buf, 256, f)) { // allow for the possibility of > 1 tag per line here // (UMTS may collapse lines) // - char* p = buf; - while (p && parse_str(p, "", str)) { + char* q = buf; + while (q && parse_str(q, "", str)) { strip_whitespace(str); urls.push_back(str); - p = strstr(p, ""); - if (p) p += strlen(""); + q = strstr(q, ""); + if (q) q += strlen(""); } } fclose(f); @@ -323,15 +330,15 @@ int SCHEDULER_OP::parse_master_file(vector &urls) { // transfer scheduler URLs to project. // Return true if any of them is new // -bool SCHEDULER_OP::update_urls(vector &urls) { +bool SCHEDULER_OP::update_urls(PROJECT* p, vector &urls) { unsigned int i, j; bool found, any_new; any_new = false; for (i=0; ischeduler_urls.size(); j++) { - if (urls[i] == project->scheduler_urls[j]) { + for (j=0; jscheduler_urls.size(); j++) { + if (urls[i] == p->scheduler_urls[j]) { found = true; break; } @@ -339,9 +346,9 @@ bool SCHEDULER_OP::update_urls(vector &urls) { if (!found) any_new = true; } - project->scheduler_urls.clear(); + p->scheduler_urls.clear(); for (i=0; ischeduler_urls.push_back(urls[i]); + p->scheduler_urls.push_back(urls[i]); } return any_new; @@ -353,8 +360,8 @@ bool SCHEDULER_OP::poll() { int retval, nresults; vector urls; bool changed, scheduler_op_done; - bool action = false, err = false; - char err_msg[256], *err_url=NULL; + bool err = false; + char err_msg[256]; SCOPE_MSG_LOG scope_messages(log_messages, CLIENT_MSG_LOG::DEBUG_SCHED_OP); @@ -363,75 +370,64 @@ bool SCHEDULER_OP::poll() { // here we're fetching the master file for a project // if (http_op.http_op_state == HTTP_STATE_DONE) { - action = true; - project->master_url_fetch_pending = false; - gstate.set_client_state_dirty("master URL fetch done"); + state = SCHEDULER_OP_STATE_IDLE; + cur_proj->master_url_fetch_pending = false; http_ops->remove(&http_op); + gstate.set_client_state_dirty("master URL fetch done"); if (http_op.http_op_retval == 0) { scope_messages.printf( "SCHEDULER_OP::poll(): Got master file from %s; parsing\n", - project->master_url + cur_proj->master_url ); - retval = parse_master_file(urls); + retval = parse_master_file(cur_proj, urls); if (retval) { // master file parse failed. // - if (project->tentative) { - PROJECT* project_temp = project; - project = 0; // keep detach(0) from removing HTTP OP - state = SCHEDULER_OP_STATE_IDLE; // avoid double remove + if (cur_proj->tentative) { + PROJECT* project_temp = cur_proj; + cur_proj = 0; // keep detach(0) from removing HTTP OP project_add_failed(project_temp); err = true; } else { - project->master_fetch_failures++; - backoff(project, "Master file parse failed\n"); - err_url = project->master_url; + cur_proj->master_fetch_failures++; + backoff(cur_proj, "Master file parse failed\n"); } } else { - // everything succeeded. Clear error counters + // parse succeeded + // + cur_proj->master_fetch_failures = 0; + changed = update_urls(cur_proj, urls); + + // reenable scheduler RPCs if have new URLs // - changed = update_urls(urls); if (changed) { - project->min_rpc_time = 0; - project->nrpc_failures = 0; - project->master_fetch_failures = 0; + cur_proj->min_rpc_time = 0; + cur_proj->nrpc_failures = 0; } } } else { // master file fetch failed. // - project->master_fetch_failures++; - backoff(project, "Master file fetch failed\n"); - err_url = project->master_url; + cur_proj->master_fetch_failures++; + backoff(cur_proj, "Master file fetch failed\n"); } // Done with master file fetch. // If tentative project and don't have any schedulers, // it may be the wrong URL. notify the user // - if (!err && project->scheduler_urls.size() == 0) { - if (project->tentative) { - state = SCHEDULER_OP_STATE_IDLE; // avoid double remove - project_add_failed(project); + if (!err && cur_proj->scheduler_urls.size() == 0) { + if (cur_proj->tentative) { + project_add_failed(cur_proj); } else { - sprintf(err_msg, - "Could not contact any schedulers for %s.", - err_url - ); - msg_printf(project, MSG_ERROR, err_msg); - project->master_fetch_failures++; - backoff(project, err_msg); + sprintf(err_msg, "Master page has no schedulers"); + msg_printf(cur_proj, MSG_ERROR, err_msg); + cur_proj->master_fetch_failures++; + backoff(cur_proj, err_msg); } } - - // See if need to read master file for another project; - // if not, we're done for now - // - if (!check_master_fetch_start()) { - state = SCHEDULER_OP_STATE_IDLE; - scope_messages.printf("SCHEDULER_OP::poll(): return to idle state\n"); - } - + cur_proj = NULL; + return true; } break; case SCHEDULER_OP_STATE_RPC: @@ -440,13 +436,13 @@ bool SCHEDULER_OP::poll() { // scheduler_op_done = false; if (http_op.http_op_state == HTTP_STATE_DONE) { - action = true; + state = SCHEDULER_OP_STATE_IDLE; http_ops->remove(&http_op); if (http_op.http_op_retval) { if (log_flags.sched_ops) { - msg_printf(project, MSG_ERROR, + msg_printf(cur_proj, MSG_ERROR, "Scheduler request to %s failed\n", - project->get_scheduler_url(url_index, url_random) + cur_proj->get_scheduler_url(url_index, url_random) ); } @@ -454,53 +450,36 @@ bool SCHEDULER_OP::poll() { // while (1) { url_index++; - if (url_index == project->scheduler_urls.size()) break; - retval = start_rpc(); - if (!retval) break; + if (url_index == cur_proj->scheduler_urls.size()) break; + retval = start_rpc(cur_proj); + if (!retval) return true; } - if (url_index == project->scheduler_urls.size()) { - backoff(project, "No schedulers responded"); - if (must_get_work) { - int urgency = gstate.compute_work_requests(); - if (urgency != WORK_FETCH_DONT_NEED) { - project = gstate.next_project_need_work(project, urgency); - if (project) { - retval = init_op_project(); - } else { - scheduler_op_done = true; - } - scheduler_op_done = true; - } - } else { - scheduler_op_done = true; - } + if (url_index == cur_proj->scheduler_urls.size()) { + backoff(cur_proj, "No schedulers responded"); + scheduler_op_done = true; } } else { if (log_flags.sched_ops) { msg_printf( - project, MSG_INFO, + cur_proj, MSG_INFO, "Scheduler request to %s succeeded\n", - project->get_scheduler_url(url_index, url_random) + cur_proj->get_scheduler_url(url_index, url_random) ); } - retval = gstate.handle_scheduler_reply(project, scheduler_url, nresults); + retval = gstate.handle_scheduler_reply(cur_proj, scheduler_url, nresults); // if this was a tentative project and we didn't get user name, // the account ID must be bad. Tell the user. // - if (project->tentative) { - if (retval || strlen(project->user_name)==0) { - state = SCHEDULER_OP_STATE_IDLE; // avoid double remove - project_add_failed(project); + if (cur_proj->tentative) { + if (retval || strlen(cur_proj->user_name)==0) { + project_add_failed(cur_proj); } else { - project->tentative = false; - retval = project->write_account_file(); + cur_proj->tentative = false; + retval = cur_proj->write_account_file(); if (retval) { - project_add_failed(project); + project_add_failed(cur_proj); } -#if 0 - gstate.calc_all_proj_size(); -#endif } } else { switch (retval) { @@ -509,61 +488,33 @@ bool SCHEDULER_OP::poll() { // back off this project // if (must_get_work && nresults==0) { - backoff(project, "No work from project\n"); + backoff(cur_proj, "No work from project\n"); } else { - project->nrpc_failures = 0; + cur_proj->nrpc_failures = 0; } break; case ERR_PROJECT_DOWN: - backoff(project, "Project is down"); + backoff(cur_proj, "Project is down"); break; default: - backoff(project, "Can't parse scheduler reply"); + backoff(cur_proj, "Can't parse scheduler reply"); break; } } - - // if we didn't get all the work we needed, - // ask another project for work - // - if (must_get_work) { - int urgency = gstate.compute_work_requests(); - if (urgency != WORK_FETCH_DONT_NEED) { - project = gstate.next_project_need_work(project, urgency); - if (project) { - retval = init_op_project(); - } else { - scheduler_op_done = true; - } - } else { - scheduler_op_done = true; - } - } else { - scheduler_op_done = true; - } } + cur_proj = NULL; + return true; } - - // If no outstanding ops, see if need a master fetch - // - if (scheduler_op_done) { - project = gstate.next_project_master_pending(); - if (project) { - retval = init_master_fetch(); - if (retval) { - scope_messages.printf("SCHEDULER_OP::poll(): init_master_fetch failed.\n" ); - backoff(project, "Scheduler op: init_master_fetch failed.\n" ); - } - } else { - state = SCHEDULER_OP_STATE_IDLE; - scope_messages.printf("SCHEDULER_OP::poll(): return to idle state\n"); - } - } - break; - default: - break; } - return action; + return false; +} + +void SCHEDULER_OP::abort(PROJECT* p) { + if (state != SCHEDULER_OP_STATE_IDLE && cur_proj == p) { + gstate.http_ops->remove(&http_op); + state = SCHEDULER_OP_STATE_IDLE; + cur_proj = NULL; + } } SCHEDULER_REPLY::SCHEDULER_REPLY() { diff --git a/client/scheduler_op.h b/client/scheduler_op.h index 75c81f38a9..b2b90b90a5 100644 --- a/client/scheduler_op.h +++ b/client/scheduler_op.h @@ -61,30 +61,36 @@ #define SCHEDULER_OP_STATE_GET_MASTER 1 #define SCHEDULER_OP_STATE_RPC 2 -struct SCHEDULER_OP { - int state; +class SCHEDULER_OP { +private: int scheduler_op_retval; HTTP_OP http_op; HTTP_OP_SET* http_ops; - PROJECT* project; // project we're currently contacting + PROJECT* cur_proj; // project we're currently contacting char scheduler_url[256]; bool must_get_work; // true iff in get_work mode int url_index; // index within project's URL list +public: + int state; double url_random; // used to randomize order +public: SCHEDULER_OP(HTTP_OP_SET*); bool poll(); int init_get_work(int urgency); int init_return_results(PROJECT*); - int init_op_project(); - int init_master_fetch(); - int set_min_rpc_time(PROJECT*); - bool update_urls(std::vector &urls); - int start_op(PROJECT*); + int init_master_fetch(PROJECT*); bool check_master_fetch_start(); void backoff(PROJECT* p, const char *error_msg); - int start_rpc(); - int parse_master_file(std::vector&); + void abort(PROJECT*); + // if we're doing an op to this project, abort it +private: + int init_op_project(PROJECT*); + int set_min_rpc_time(PROJECT*); + bool update_urls(PROJECT*, std::vector &urls); + int start_op(PROJECT*); + int start_rpc(PROJECT*); + int parse_master_file(PROJECT*, std::vector&); }; struct USER_MESSAGE {