2008-08-06 18:36:30 +00:00
|
|
|
// This file is part of BOINC.
|
2005-08-04 03:50:04 +00:00
|
|
|
// http://boinc.berkeley.edu
|
2008-08-06 18:36:30 +00:00
|
|
|
// Copyright (C) 2008 University of California
|
2005-08-04 03:50:04 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
|
|
// under the terms of the GNU Lesser General Public License
|
|
|
|
// as published by the Free Software Foundation,
|
|
|
|
// either version 3 of the License, or (at your option) any later version.
|
2005-08-04 03:50:04 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is distributed in the hope that it will be useful,
|
2005-08-04 03:50:04 +00:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
2005-08-04 03:50:04 +00:00
|
|
|
|
- scheduler: add <workload_sim> config option.
If set, the scheduler will use EDF simulation,
together with the in-progress workload reported by the client,
to avoid sending results that
1) will miss their deadline, or
2) will cause an in-progress result to miss its deadline, or
3) will make an in-progress result miss its deadline
by more than is already predicted.
If this option is not set, or if the client request doesn't
include a workload description (i.e. the client is old)
use the existing approach, which assumes there's no workload.
NOTE: this is experimental. Production projects should not use it.
- EDF sim: write debug stuff to stderr instead of stdout
- Account manager:
- if an account is detach_when_done, set dont_request_more_work
- check done_request_more_work even for first-time projects
- update_uotd: generate a file for use by Google gadget
- user_links(): use full URLs (so can use in Google gadget)
client/
acct_mgr.C
work_fetch.C
html/
inc/
uotd.inc
util.inc
user/
uotd_gadget.php (new)
sched/
Makefile.am
edf_sim.C
sched_config.C,h
sched_resend.C
sched_send.C,h
server_types.C,h
svn path=/trunk/boinc/; revision=12639
2007-05-10 21:50:52 +00:00
|
|
|
// scheduler code related to sending "lost" work
|
|
|
|
// (i.e. results we sent to the host, but which they're not reporting)
|
|
|
|
//
|
|
|
|
// TODO:
|
|
|
|
// - make sure result is still needed (no canonical result yet)
|
|
|
|
// - don't send if project has been reset since first send;
|
|
|
|
// this result may have been the cause of reset
|
|
|
|
// (need to pass last reset time from client)
|
2005-08-04 03:50:04 +00:00
|
|
|
|
2009-03-19 16:35:35 +00:00
|
|
|
#include "config.h"
|
|
|
|
|
2008-02-27 23:26:38 +00:00
|
|
|
#include <cstdlib>
|
|
|
|
#include <cstring>
|
|
|
|
#include <string>
|
|
|
|
|
2005-08-04 03:50:04 +00:00
|
|
|
#include "error_numbers.h"
|
|
|
|
|
2013-04-09 19:19:00 +00:00
|
|
|
#include "sched_check.h"
|
2005-08-04 03:50:04 +00:00
|
|
|
#include "sched_config.h"
|
2012-11-08 07:43:43 +00:00
|
|
|
#include "sched_customize.h"
|
2009-03-19 16:35:35 +00:00
|
|
|
#include "sched_locality.h"
|
2013-04-09 19:19:00 +00:00
|
|
|
#include "sched_main.h"
|
2005-08-04 03:50:04 +00:00
|
|
|
#include "sched_msgs.h"
|
|
|
|
#include "sched_send.h"
|
2009-03-19 16:35:35 +00:00
|
|
|
#include "sched_shmem.h"
|
2013-04-09 19:19:00 +00:00
|
|
|
#include "sched_types.h"
|
2009-03-19 16:35:35 +00:00
|
|
|
#include "sched_util.h"
|
|
|
|
#include "sched_version.h"
|
2005-08-04 03:58:00 +00:00
|
|
|
|
|
|
|
#include "sched_resend.h"
|
2005-08-04 03:50:04 +00:00
|
|
|
|
|
|
|
|
|
|
|
#ifdef _USING_FCGI_
|
2008-09-09 19:10:42 +00:00
|
|
|
#include "boinc_fcgi.h"
|
2005-08-04 03:50:04 +00:00
|
|
|
#endif
|
|
|
|
|
- scheduler: add <workload_sim> config option.
If set, the scheduler will use EDF simulation,
together with the in-progress workload reported by the client,
to avoid sending results that
1) will miss their deadline, or
2) will cause an in-progress result to miss its deadline, or
3) will make an in-progress result miss its deadline
by more than is already predicted.
If this option is not set, or if the client request doesn't
include a workload description (i.e. the client is old)
use the existing approach, which assumes there's no workload.
NOTE: this is experimental. Production projects should not use it.
- EDF sim: write debug stuff to stderr instead of stdout
- Account manager:
- if an account is detach_when_done, set dont_request_more_work
- check done_request_more_work even for first-time projects
- update_uotd: generate a file for use by Google gadget
- user_links(): use full URLs (so can use in Google gadget)
client/
acct_mgr.C
work_fetch.C
html/
inc/
uotd.inc
util.inc
user/
uotd_gadget.php (new)
sched/
Makefile.am
edf_sim.C
sched_config.C,h
sched_resend.C
sched_send.C,h
server_types.C,h
svn path=/trunk/boinc/; revision=12639
2007-05-10 21:50:52 +00:00
|
|
|
// Assign a new deadline for the result;
|
|
|
|
// if it's not likely to complete by this time, return nonzero.
|
|
|
|
// TODO: EXPLAIN THE FORMULA FOR NEW DEADLINE
|
2005-08-04 03:50:04 +00:00
|
|
|
//
|
|
|
|
static int possibly_give_result_new_deadline(
|
2008-12-15 21:14:32 +00:00
|
|
|
DB_RESULT& result, WORKUNIT& wu, BEST_APP_VERSION& bav
|
2005-08-04 03:50:04 +00:00
|
|
|
) {
|
|
|
|
const double resend_frac = 0.5; // range [0, 1)
|
2007-11-30 23:02:55 +00:00
|
|
|
int now = time(0);
|
|
|
|
int result_report_deadline = now + (int)(resend_frac*(result.report_deadline - result.sent_time));
|
2005-08-04 03:50:04 +00:00
|
|
|
|
|
|
|
if (result_report_deadline < result.report_deadline) {
|
|
|
|
result_report_deadline = result.report_deadline;
|
|
|
|
}
|
2007-11-30 23:02:55 +00:00
|
|
|
if (result_report_deadline > now + wu.delay_bound) {
|
|
|
|
result_report_deadline = now + wu.delay_bound;
|
2005-08-04 03:50:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// If infeasible, return without modifying result
|
|
|
|
//
|
2008-12-15 21:14:32 +00:00
|
|
|
if (estimate_duration(wu, bav) > result_report_deadline-now) {
|
2008-08-19 03:00:17 +00:00
|
|
|
if (config.debug_resend) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2013-06-19 17:15:08 +00:00
|
|
|
"[resend] [RESULT#%u] [HOST#%d] not resending lost result: can't complete in time\n",
|
2008-12-15 21:14:32 +00:00
|
|
|
result.id, g_reply->host.id
|
2008-08-19 03:00:17 +00:00
|
|
|
);
|
|
|
|
}
|
2005-08-04 03:50:04 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// update result with new report time and sent time
|
|
|
|
//
|
2008-08-19 03:00:17 +00:00
|
|
|
if (config.debug_resend) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2013-06-19 17:15:08 +00:00
|
|
|
"[resend] [RESULT#%u] [HOST#%d] %s report_deadline (resend lost work)\n",
|
2008-12-15 21:14:32 +00:00
|
|
|
result.id, g_reply->host.id,
|
2008-08-19 03:00:17 +00:00
|
|
|
result_report_deadline==result.report_deadline?"NO update to":"Updated"
|
|
|
|
);
|
|
|
|
}
|
2007-11-30 23:02:55 +00:00
|
|
|
result.sent_time = now;
|
2005-08-04 03:50:04 +00:00
|
|
|
result.report_deadline = result_report_deadline;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-11-30 23:02:55 +00:00
|
|
|
// resend any jobs that:
|
|
|
|
// 1) we already sent to this host;
|
|
|
|
// 2) are still in progress (i.e. haven't timed out) and
|
|
|
|
// 3) aren't present on the host
|
|
|
|
// Return true if there were any such jobs
|
|
|
|
//
|
2008-12-19 18:14:02 +00:00
|
|
|
bool resend_lost_work() {
|
2011-06-06 03:40:42 +00:00
|
|
|
SCHED_DB_RESULT result;
|
2005-08-04 03:50:04 +00:00
|
|
|
std::vector<DB_RESULT>results;
|
|
|
|
unsigned int i;
|
|
|
|
char buf[256];
|
2007-11-30 23:02:55 +00:00
|
|
|
char warning_msg[256];
|
2005-08-04 03:50:04 +00:00
|
|
|
bool did_any = false;
|
2007-11-30 23:02:55 +00:00
|
|
|
int num_eligible_to_resend=0;
|
2005-08-04 03:50:04 +00:00
|
|
|
int num_resent=0;
|
2012-03-20 04:36:14 +00:00
|
|
|
BEST_APP_VERSION* bavp = NULL;
|
|
|
|
APP* app = NULL;
|
2005-08-04 03:50:04 +00:00
|
|
|
int retval;
|
|
|
|
|
|
|
|
sprintf(buf, " where hostid=%d and server_state=%d ",
|
2008-12-19 18:14:02 +00:00
|
|
|
g_reply->host.id, RESULT_SERVER_STATE_IN_PROGRESS
|
2005-08-04 03:50:04 +00:00
|
|
|
);
|
|
|
|
while (!result.enumerate(buf)) {
|
2010-05-11 19:50:14 +00:00
|
|
|
if (!work_needed(false)) {
|
|
|
|
result.end_enumerate();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2005-08-04 03:50:04 +00:00
|
|
|
bool found = false;
|
2008-12-19 18:14:02 +00:00
|
|
|
for (i=0; i<g_request->other_results.size(); i++) {
|
|
|
|
OTHER_RESULT& orp = g_request->other_results[i];
|
2009-06-01 22:15:14 +00:00
|
|
|
if (!strcmp(orp.name, result.name)) {
|
2005-08-04 03:50:04 +00:00
|
|
|
found = true;
|
2007-11-30 23:02:55 +00:00
|
|
|
break;
|
2005-08-04 03:50:04 +00:00
|
|
|
}
|
|
|
|
}
|
2007-11-30 23:02:55 +00:00
|
|
|
if (found) continue;
|
|
|
|
|
|
|
|
num_eligible_to_resend++;
|
2008-08-19 03:00:17 +00:00
|
|
|
if (config.debug_resend) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2013-06-19 17:15:08 +00:00
|
|
|
"[resend] [HOST#%d] found lost [RESULT#%u]: %s\n",
|
2008-12-19 18:14:02 +00:00
|
|
|
g_reply->host.id, result.id, result.name
|
2008-08-19 03:00:17 +00:00
|
|
|
);
|
|
|
|
}
|
2007-11-30 23:02:55 +00:00
|
|
|
|
|
|
|
DB_WORKUNIT wu;
|
2012-11-08 07:43:43 +00:00
|
|
|
bool can_resend = true;
|
2007-11-30 23:02:55 +00:00
|
|
|
retval = wu.lookup_id(result.workunitid);
|
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2013-06-19 17:15:08 +00:00
|
|
|
"[HOST#%d] can't resend - WU not found for [RESULT#%u]\n",
|
2008-12-19 18:14:02 +00:00
|
|
|
g_reply->host.id, result.id
|
2005-08-04 03:50:04 +00:00
|
|
|
);
|
2012-11-08 07:43:43 +00:00
|
|
|
can_resend = false;
|
|
|
|
}
|
|
|
|
if (can_resend) {
|
2012-03-20 04:36:14 +00:00
|
|
|
app = ssp->lookup_app(wu.appid);
|
2014-06-09 03:20:25 +00:00
|
|
|
if (!app) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"can't resend - app not found for [RESULT#%u]\n", result.id
|
|
|
|
);
|
|
|
|
can_resend = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (can_resend && app->deprecated) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[RESULT#%u] can't resend - app is deprecated \n", result.id
|
|
|
|
);
|
|
|
|
can_resend = false;
|
|
|
|
}
|
|
|
|
if (can_resend && app_not_selected(app->id)) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[RESULT#%u] can't resend - app is not selected\n", result.id
|
|
|
|
);
|
|
|
|
can_resend = false;
|
|
|
|
}
|
|
|
|
if (can_resend) {
|
2012-11-08 07:43:43 +00:00
|
|
|
bavp = get_app_version(wu, true, false);
|
2012-03-20 04:36:14 +00:00
|
|
|
if (!bavp) {
|
2012-11-08 07:43:43 +00:00
|
|
|
if (config.debug_resend) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
2013-06-19 17:15:08 +00:00
|
|
|
"[HOST#%d] can't resend [RESULT#%u]: no app version for %s\n",
|
2012-11-08 07:43:43 +00:00
|
|
|
g_reply->host.id, result.id, app->name
|
|
|
|
);
|
|
|
|
}
|
|
|
|
can_resend = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (can_resend && wu.error_mask) {
|
|
|
|
if (config.debug_resend) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
2013-06-19 17:15:08 +00:00
|
|
|
"[resend] skipping [RESULT#%u]: WU error mask %d\n",
|
2012-11-08 07:43:43 +00:00
|
|
|
result.id, wu.error_mask
|
2012-03-20 04:36:14 +00:00
|
|
|
);
|
|
|
|
}
|
2012-11-08 07:43:43 +00:00
|
|
|
can_resend = false;
|
2007-11-30 23:02:55 +00:00
|
|
|
}
|
2012-11-08 07:43:43 +00:00
|
|
|
if (can_resend && wu.canonical_resultid) {
|
|
|
|
if (config.debug_resend) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
2013-06-19 17:15:08 +00:00
|
|
|
"[resend] skipping [RESULT#%u]: already have canonical result\n",
|
2012-11-08 07:43:43 +00:00
|
|
|
result.id
|
|
|
|
);
|
|
|
|
}
|
|
|
|
can_resend = false;
|
|
|
|
}
|
|
|
|
if (can_resend && wu_is_infeasible_fast(
|
|
|
|
wu, result.server_state, result.priority, result.report_deadline,
|
|
|
|
*app, *bavp
|
|
|
|
)) {
|
2008-08-19 03:00:17 +00:00
|
|
|
if (config.debug_resend) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2013-06-19 17:15:08 +00:00
|
|
|
"[resend] skipping [RESULT#%u]: feasibility check failed\n",
|
2012-11-08 07:43:43 +00:00
|
|
|
result.id
|
2008-08-19 03:00:17 +00:00
|
|
|
);
|
|
|
|
}
|
2012-11-08 07:43:43 +00:00
|
|
|
can_resend = false;
|
|
|
|
}
|
|
|
|
if (can_resend && possibly_give_result_new_deadline(result, wu, *bavp)) {
|
|
|
|
if (config.debug_resend) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
2013-06-19 17:15:08 +00:00
|
|
|
"[resend] skipping [RESULT#%u]: deadline assignment failed\n",
|
2012-11-08 07:43:43 +00:00
|
|
|
result.id
|
|
|
|
);
|
|
|
|
}
|
|
|
|
can_resend = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we can't resend this job for any of the above reasons,
|
|
|
|
// make it time out so that the transitioner does the right thing.
|
|
|
|
//
|
|
|
|
if (!can_resend) {
|
2007-11-30 23:02:55 +00:00
|
|
|
result.report_deadline = time(0)-1;
|
2011-02-15 22:07:14 +00:00
|
|
|
retval = result.mark_as_sent(result.server_state, config.report_grace_period);
|
2005-08-04 03:50:04 +00:00
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2010-11-08 17:51:57 +00:00
|
|
|
"resend_lost_work: can't update result deadline: %s\n",
|
|
|
|
boincerror(retval)
|
2005-08-04 03:50:04 +00:00
|
|
|
);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2011-06-06 03:40:42 +00:00
|
|
|
retval = update_wu_on_send(
|
|
|
|
wu, result.report_deadline + config.report_grace_period,
|
|
|
|
*app, *bavp
|
2011-02-15 22:07:14 +00:00
|
|
|
);
|
2007-11-30 23:02:55 +00:00
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2010-11-08 17:51:57 +00:00
|
|
|
"resend_lost_result: can't update WU transition time: %s\n",
|
|
|
|
boincerror(retval)
|
2005-08-04 03:50:04 +00:00
|
|
|
);
|
|
|
|
continue;
|
|
|
|
}
|
2007-11-30 23:02:55 +00:00
|
|
|
sprintf(warning_msg,
|
2010-03-03 19:29:23 +00:00
|
|
|
"Didn't resend lost task %s (expired)", result.name
|
2007-11-30 23:02:55 +00:00
|
|
|
);
|
2010-06-15 17:56:30 +00:00
|
|
|
g_reply->insert_message(warning_msg, "low");
|
2007-11-30 23:02:55 +00:00
|
|
|
} else {
|
2009-03-03 16:38:54 +00:00
|
|
|
retval = add_result_to_reply(result, wu, bavp, false);
|
2005-08-04 03:50:04 +00:00
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2013-06-19 17:15:08 +00:00
|
|
|
"[HOST#%d] failed to send [RESULT#%u]\n",
|
2008-12-19 18:14:02 +00:00
|
|
|
g_reply->host.id, result.id
|
2005-08-04 03:50:04 +00:00
|
|
|
);
|
|
|
|
continue;
|
|
|
|
}
|
2010-03-03 19:29:23 +00:00
|
|
|
sprintf(warning_msg, "Resent lost task %s", result.name);
|
2010-06-15 17:56:30 +00:00
|
|
|
g_reply->insert_message(warning_msg, "low");
|
2005-08-04 03:50:04 +00:00
|
|
|
num_resent++;
|
|
|
|
did_any = true;
|
2009-05-15 21:12:46 +00:00
|
|
|
|
2009-06-01 22:15:14 +00:00
|
|
|
if (g_wreq->njobs_sent >= config.max_wus_to_send) {
|
2009-05-15 21:12:46 +00:00
|
|
|
result.end_enumerate();
|
|
|
|
break;
|
|
|
|
}
|
2005-08-04 03:50:04 +00:00
|
|
|
}
|
|
|
|
}
|
2007-05-14 15:21:38 +00:00
|
|
|
|
2008-08-19 03:00:17 +00:00
|
|
|
if (num_eligible_to_resend && config.debug_resend) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-05-15 21:12:46 +00:00
|
|
|
"[resend] [HOST#%d] %d lost results, resent %d\n",
|
|
|
|
g_reply->host.id, num_eligible_to_resend, num_resent
|
2005-08-04 03:50:04 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
return did_any;
|
|
|
|
}
|
|
|
|
|
2005-09-13 09:01:56 +00:00
|
|
|
const char *BOINC_RCSID_3be23838b4="$Id$";
|