2008-08-06 18:36:30 +00:00
|
|
|
// This file is part of BOINC.
|
2005-01-20 23:22:22 +00:00
|
|
|
// http://boinc.berkeley.edu
|
2008-08-06 18:36:30 +00:00
|
|
|
// Copyright (C) 2008 University of California
|
2004-07-09 21:17:42 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
|
|
// under the terms of the GNU Lesser General Public License
|
|
|
|
// as published by the Free Software Foundation,
|
|
|
|
// either version 3 of the License, or (at your option) any later version.
|
2004-07-09 21:17:42 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is distributed in the hope that it will be useful,
|
2005-01-20 23:22:22 +00:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
2004-04-04 01:59:47 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
2004-04-04 01:59:47 +00:00
|
|
|
|
2009-06-01 22:15:14 +00:00
|
|
|
// scheduler code related to sending jobs.
|
|
|
|
// NOTE: there should be nothing here specific to particular
|
|
|
|
// scheduling policies (array scan, matchmaking, locality)
|
2005-02-10 06:21:58 +00:00
|
|
|
|
2005-11-21 18:34:44 +00:00
|
|
|
#include "config.h"
|
2004-04-04 01:59:47 +00:00
|
|
|
#include <vector>
|
2008-02-25 18:05:04 +00:00
|
|
|
#include <list>
|
2004-04-04 01:59:47 +00:00
|
|
|
#include <string>
|
2004-06-24 08:49:37 +00:00
|
|
|
#include <ctime>
|
|
|
|
#include <cstdio>
|
2008-02-27 23:26:38 +00:00
|
|
|
#include <cstring>
|
2005-02-02 18:13:00 +00:00
|
|
|
#include <stdlib.h>
|
2009-03-03 00:12:55 +00:00
|
|
|
#include <sys/time.h>
|
2005-02-10 06:21:58 +00:00
|
|
|
#include <unistd.h>
|
|
|
|
|
2004-04-04 01:59:47 +00:00
|
|
|
#include "error_numbers.h"
|
2005-02-10 20:31:11 +00:00
|
|
|
#include "parse.h"
|
2006-07-11 21:56:53 +00:00
|
|
|
#include "util.h"
|
2007-02-21 16:26:51 +00:00
|
|
|
#include "str_util.h"
|
2009-01-13 23:06:02 +00:00
|
|
|
#include "synch.h"
|
2005-02-10 20:31:11 +00:00
|
|
|
|
2009-08-10 04:49:02 +00:00
|
|
|
#include "sched_types.h"
|
2004-04-04 01:59:47 +00:00
|
|
|
#include "sched_shmem.h"
|
|
|
|
#include "sched_config.h"
|
|
|
|
#include "sched_util.h"
|
2009-08-10 04:49:02 +00:00
|
|
|
#include "sched_main.h"
|
2005-08-04 03:50:04 +00:00
|
|
|
#include "sched_array.h"
|
2004-04-08 08:15:23 +00:00
|
|
|
#include "sched_msgs.h"
|
2007-05-31 18:14:45 +00:00
|
|
|
#include "sched_hr.h"
|
2007-06-20 22:34:06 +00:00
|
|
|
#include "hr.h"
|
2004-09-10 21:02:11 +00:00
|
|
|
#include "sched_locality.h"
|
2005-02-10 20:31:11 +00:00
|
|
|
#include "sched_timezone.h"
|
2008-02-21 00:47:50 +00:00
|
|
|
#include "sched_assign.h"
|
2009-07-29 18:55:50 +00:00
|
|
|
#include "sched_customize.h"
|
2004-04-04 01:59:47 +00:00
|
|
|
|
2007-05-31 18:14:45 +00:00
|
|
|
#include "sched_send.h"
|
2005-02-10 06:21:58 +00:00
|
|
|
|
2004-07-03 21:38:22 +00:00
|
|
|
#ifdef _USING_FCGI_
|
2008-09-09 19:10:42 +00:00
|
|
|
#include "boinc_fcgi.h"
|
2004-07-03 21:38:22 +00:00
|
|
|
#endif
|
|
|
|
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
// if host sends us an impossible RAM size, use this instead
|
|
|
|
//
|
|
|
|
const double DEFAULT_RAM_SIZE = 64000000;
|
2008-02-27 22:26:37 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
void send_work_matchmaker();
|
2008-02-25 18:05:04 +00:00
|
|
|
|
2008-04-26 23:34:38 +00:00
|
|
|
int preferred_app_message_index=0;
|
|
|
|
|
2007-11-30 23:02:55 +00:00
|
|
|
const char* infeasible_string(int code) {
|
|
|
|
switch (code) {
|
|
|
|
case INFEASIBLE_MEM: return "Not enough memory";
|
|
|
|
case INFEASIBLE_DISK: return "Not enough disk";
|
|
|
|
case INFEASIBLE_CPU: return "CPU too slow";
|
|
|
|
case INFEASIBLE_APP_SETTING: return "App not selected";
|
|
|
|
case INFEASIBLE_WORKLOAD: return "Existing workload";
|
|
|
|
case INFEASIBLE_DUP: return "Already in reply";
|
|
|
|
case INFEASIBLE_HR: return "Homogeneous redundancy";
|
2008-03-07 21:13:01 +00:00
|
|
|
case INFEASIBLE_BANDWIDTH: return "Download bandwidth too low";
|
2007-11-30 23:02:55 +00:00
|
|
|
}
|
|
|
|
return "Unknown";
|
|
|
|
}
|
|
|
|
|
2009-01-10 00:43:33 +00:00
|
|
|
const double MIN_REQ_SECS = 0;
|
|
|
|
const double MAX_REQ_SECS = (28*SECONDS_IN_DAY);
|
2008-02-21 20:10:10 +00:00
|
|
|
|
2009-01-20 00:54:16 +00:00
|
|
|
const int MAX_CUDA_DEVS = 8;
|
|
|
|
// don't believe clients who claim they have more CUDA devices than this
|
|
|
|
|
2009-06-22 21:11:19 +00:00
|
|
|
// get limits on #jobs per day and per RPC, on in progress
|
|
|
|
//
|
|
|
|
void WORK_REQ::get_job_limits() {
|
|
|
|
int n;
|
|
|
|
n = g_reply->host.p_ncpus;
|
2009-06-01 22:15:14 +00:00
|
|
|
if (g_request->global_prefs.max_ncpus_pct && g_request->global_prefs.max_ncpus_pct < 100) {
|
2009-06-22 21:11:19 +00:00
|
|
|
n = (int)((n*g_request->global_prefs.max_ncpus_pct)/100.);
|
2009-06-01 22:15:14 +00:00
|
|
|
}
|
2009-06-22 21:11:19 +00:00
|
|
|
if (n > config.max_ncpus) n = config.max_ncpus;
|
|
|
|
if (n < 1) n = 1;
|
|
|
|
effective_ncpus = n;
|
2009-01-20 00:54:16 +00:00
|
|
|
|
2009-06-22 21:11:19 +00:00
|
|
|
n = 0;
|
2009-06-01 22:15:14 +00:00
|
|
|
COPROC* cp = g_request->coprocs.lookup("CUDA");
|
|
|
|
if (cp) {
|
2009-06-22 21:11:19 +00:00
|
|
|
n = cp->count;
|
|
|
|
if (n > MAX_CUDA_DEVS) n = MAX_CUDA_DEVS;
|
2008-10-21 23:16:07 +00:00
|
|
|
}
|
2009-06-22 21:11:19 +00:00
|
|
|
effective_ngpus = n;
|
2009-01-20 00:54:16 +00:00
|
|
|
|
2009-06-22 21:11:19 +00:00
|
|
|
int mult = effective_ncpus + config.gpu_multiplier * effective_ngpus;
|
2009-06-19 17:21:34 +00:00
|
|
|
|
|
|
|
if (config.max_wus_to_send) {
|
|
|
|
g_wreq->max_jobs_per_rpc = mult * config.max_wus_to_send;
|
|
|
|
} else {
|
|
|
|
g_wreq->max_jobs_per_rpc = 999999;
|
|
|
|
}
|
2009-06-01 22:15:14 +00:00
|
|
|
|
2009-06-19 17:21:34 +00:00
|
|
|
if (config.daily_result_quota) {
|
|
|
|
if (g_reply->host.max_results_day == 0 || g_reply->host.max_results_day>config.daily_result_quota) {
|
|
|
|
g_reply->host.max_results_day = config.daily_result_quota;
|
|
|
|
}
|
|
|
|
g_wreq->max_jobs_per_day = mult * g_reply->host.max_results_day;
|
|
|
|
} else {
|
|
|
|
g_wreq->max_jobs_per_day = 999999;
|
2009-01-20 00:54:16 +00:00
|
|
|
}
|
2009-06-01 22:15:14 +00:00
|
|
|
|
2009-06-19 18:34:00 +00:00
|
|
|
if (config.max_wus_in_progress) {
|
2009-06-22 21:11:19 +00:00
|
|
|
g_wreq->max_jobs_on_host_cpu = config.max_wus_in_progress * effective_ncpus;
|
2009-06-19 18:34:00 +00:00
|
|
|
if (config.max_wus_in_progress_gpu) {
|
2009-06-22 21:11:19 +00:00
|
|
|
g_wreq->max_jobs_on_host_gpu = config.max_wus_in_progress_gpu * effective_ngpus;
|
2009-06-19 18:34:00 +00:00
|
|
|
g_wreq->max_jobs_on_host = g_wreq->max_jobs_on_host_cpu + g_wreq->max_jobs_on_host_gpu;
|
|
|
|
} else {
|
|
|
|
g_wreq->max_jobs_on_host_gpu = 999999;
|
|
|
|
g_wreq->max_jobs_on_host = g_wreq->max_jobs_on_host_cpu;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
g_wreq->max_jobs_on_host_cpu = 999999;
|
|
|
|
g_wreq->max_jobs_on_host = 999999;
|
|
|
|
if (config.max_wus_in_progress_gpu) {
|
2009-06-22 21:11:19 +00:00
|
|
|
g_wreq->max_jobs_on_host_gpu = config.max_wus_in_progress_gpu * effective_ngpus;
|
2009-06-19 18:34:00 +00:00
|
|
|
} else {
|
|
|
|
g_wreq->max_jobs_on_host_gpu = 999999;
|
|
|
|
}
|
|
|
|
}
|
2009-07-28 23:07:11 +00:00
|
|
|
|
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] effective_ncpus %d max_jobs_on_host_cpu %d max_jobs_on_host %d\n",
|
|
|
|
effective_ncpus, g_wreq->max_jobs_on_host_cpu, g_wreq->max_jobs_on_host
|
|
|
|
);
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] effective_ngpus %d max_jobs_on_host_gpu %d\n",
|
|
|
|
effective_ngpus, g_wreq->max_jobs_on_host_gpu
|
|
|
|
);
|
|
|
|
}
|
2005-07-28 10:13:30 +00:00
|
|
|
}
|
|
|
|
|
- server code: at some point I made a global var "SCHED_CONFIG config",
mostly so that the parse function could assume
that everything was initially zero.
However, various back-end functions pass around SCHED_CONFIG&
as an argument (also named "config").
This creates a shadow, which is always bad.
Worse is the possibility that some projects have back-end programs
that have a SCHED_CONFIG variable that's automatic,
and therefore isn't zero initially,
and therefore isn't parsing correctly.
To fix this, I changed the 2 vectors in SCHED_CONFIG into pointers,
and have the parse routine zero the structure.
I was tempted to remove the SCHED_CONFIG& args to back-end functions,
but this would have broken some projects' code.
I did, however, change the name from config to config_loc
to avoid shadowing.
Also fixed various other compiler warnings.
svn path=/trunk/boinc/; revision=15541
2008-07-02 17:24:53 +00:00
|
|
|
static const char* find_user_friendly_name(int appid) {
|
2009-05-06 21:52:50 +00:00
|
|
|
APP* app = ssp->lookup_app(appid);
|
|
|
|
if (app) return app->user_friendly_name;
|
2008-03-07 21:13:01 +00:00
|
|
|
return "deprecated application";
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2005-10-04 21:44:58 +00:00
|
|
|
// Compute the max additional disk usage we can impose on the host.
|
|
|
|
// Depending on the client version, it can either send us
|
|
|
|
// - d_total and d_free (pre 4 oct 2005)
|
|
|
|
// - the above plus d_boinc_used_total and d_boinc_used_project
|
2004-04-04 01:59:47 +00:00
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
double max_allowable_disk() {
|
|
|
|
HOST host = g_request->host;
|
|
|
|
GLOBAL_PREFS prefs = g_request->global_prefs;
|
2004-08-14 00:37:38 +00:00
|
|
|
double x1, x2, x3, x;
|
|
|
|
|
2005-12-17 04:53:48 +00:00
|
|
|
// defaults are from config.xml
|
|
|
|
// if not there these are used:
|
|
|
|
// -default_max_used_gb= 100
|
|
|
|
// -default_max_used_pct = 50
|
|
|
|
// -default_min_free_gb = .001
|
2004-08-14 00:37:38 +00:00
|
|
|
//
|
2006-10-06 18:52:50 +00:00
|
|
|
if (prefs.disk_max_used_gb == 0) {
|
2005-12-17 04:53:48 +00:00
|
|
|
prefs.disk_max_used_gb = config.default_disk_max_used_gb;
|
2006-10-06 18:52:50 +00:00
|
|
|
}
|
|
|
|
if (prefs.disk_max_used_pct == 0) {
|
2005-12-17 04:53:48 +00:00
|
|
|
prefs.disk_max_used_pct = config.default_disk_max_used_pct;
|
2006-10-06 18:52:50 +00:00
|
|
|
}
|
|
|
|
if (prefs.disk_min_free_gb < config.default_disk_min_free_gb) {
|
2005-12-17 04:53:48 +00:00
|
|
|
prefs.disk_min_free_gb = config.default_disk_min_free_gb;
|
2006-10-06 18:52:50 +00:00
|
|
|
}
|
2004-08-14 00:37:38 +00:00
|
|
|
|
|
|
|
// no defaults for total/free disk space (host.d_total, d_free)
|
2005-10-04 21:44:58 +00:00
|
|
|
// if they're zero, client will get no work.
|
2004-08-14 00:37:38 +00:00
|
|
|
//
|
|
|
|
|
2005-10-04 21:44:58 +00:00
|
|
|
if (host.d_boinc_used_total) {
|
|
|
|
// The post 4 oct 2005 case.
|
|
|
|
// Compute the max allowable additional disk usage based on prefs
|
|
|
|
//
|
2008-11-01 23:13:55 +00:00
|
|
|
x1 = prefs.disk_max_used_gb*GIGA - host.d_boinc_used_total;
|
2005-10-04 21:44:58 +00:00
|
|
|
x2 = host.d_total*prefs.disk_max_used_pct/100.
|
|
|
|
- host.d_boinc_used_total;
|
2008-11-01 23:13:55 +00:00
|
|
|
x3 = host.d_free - prefs.disk_min_free_gb*GIGA; // may be negative
|
2009-02-26 00:23:23 +00:00
|
|
|
x = std::min(x1, std::min(x2, x3));
|
2005-01-31 19:34:43 +00:00
|
|
|
|
2005-10-04 21:44:58 +00:00
|
|
|
// see which bound is the most stringent
|
|
|
|
//
|
|
|
|
if (x==x1) {
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->disk_limits.max_used = x;
|
2005-10-04 21:44:58 +00:00
|
|
|
} else if (x==x2) {
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->disk_limits.max_frac = x;
|
2005-10-04 21:44:58 +00:00
|
|
|
} else {
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->disk_limits.min_free = x;
|
2005-10-04 21:44:58 +00:00
|
|
|
}
|
2005-02-06 21:26:21 +00:00
|
|
|
} else {
|
2005-10-04 21:44:58 +00:00
|
|
|
// here we don't know how much space BOINC is using.
|
|
|
|
// so we're kinda screwed.
|
|
|
|
// All we can do is assume that BOINC is using zero space.
|
|
|
|
// We can't honor the max_used for max_used_pct preferences.
|
|
|
|
// We can only honor the min_free pref.
|
|
|
|
//
|
2008-11-01 23:13:55 +00:00
|
|
|
x = host.d_free - prefs.disk_min_free_gb*GIGA; // may be negative
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->disk_limits.min_free = x;
|
2005-10-04 21:44:58 +00:00
|
|
|
x1 = x2 = x3 = 0;
|
2005-02-10 20:31:11 +00:00
|
|
|
}
|
2005-01-31 19:34:43 +00:00
|
|
|
|
2004-08-14 00:37:38 +00:00
|
|
|
if (x < 0) {
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-01-27 18:57:27 +00:00
|
|
|
"[send] No disk space available: disk_max_used_gb %.2fGB disk_max_used_pct %.2f disk_min_free_gb %.2fGB\n",
|
|
|
|
prefs.disk_max_used_gb/GIGA,
|
|
|
|
prefs.disk_max_used_pct,
|
|
|
|
prefs.disk_min_free_gb/GIGA
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-01-27 18:57:27 +00:00
|
|
|
"[send] No disk space available: host.d_total %.2fGB host.d_free %.2fGB host.d_boinc_used_total %.2fGB\n",
|
|
|
|
host.d_total/GIGA,
|
|
|
|
host.d_free/GIGA,
|
|
|
|
host.d_boinc_used_total/GIGA
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-01-27 18:57:27 +00:00
|
|
|
"[send] No disk space available: x1 %.2fGB x2 %.2fGB x3 %.2fGB x %.2fGB\n",
|
|
|
|
x1/GIGA, x2/GIGA, x3/GIGA, x/GIGA
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->disk.set_insufficient(-x);
|
2008-11-26 20:37:11 +00:00
|
|
|
x = 0;
|
2004-08-14 00:37:38 +00:00
|
|
|
}
|
|
|
|
return x;
|
2004-04-04 01:59:47 +00:00
|
|
|
}
|
|
|
|
|
2008-12-15 21:14:32 +00:00
|
|
|
static double estimate_duration_unscaled(WORKUNIT& wu, BEST_APP_VERSION& bav) {
|
|
|
|
double rsc_fpops_est = wu.rsc_fpops_est;
|
|
|
|
if (rsc_fpops_est <= 0) rsc_fpops_est = 1e12;
|
|
|
|
return rsc_fpops_est/bav.host_usage.flops;
|
|
|
|
}
|
2004-04-04 01:59:47 +00:00
|
|
|
|
2009-03-05 23:08:53 +00:00
|
|
|
static inline void get_running_frac() {
|
2008-12-18 18:19:42 +00:00
|
|
|
double rf;
|
|
|
|
if (g_request->core_client_version<=419) {
|
|
|
|
rf = g_reply->host.on_frac;
|
|
|
|
} else {
|
|
|
|
rf = g_reply->host.active_frac * g_reply->host.on_frac;
|
|
|
|
}
|
|
|
|
|
|
|
|
// clamp running_frac and DCF to a reasonable range
|
|
|
|
//
|
|
|
|
if (rf > 1) {
|
2009-01-29 20:42:45 +00:00
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL, "running_frac=%f; setting to 1\n", rf);
|
|
|
|
}
|
2008-12-18 18:19:42 +00:00
|
|
|
rf = 1;
|
|
|
|
} else if (rf < .1) {
|
2009-01-29 20:42:45 +00:00
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL, "running_frac=%f; setting to 0.1\n", rf);
|
|
|
|
}
|
2008-12-18 18:19:42 +00:00
|
|
|
rf = .1;
|
|
|
|
}
|
2009-01-29 20:42:45 +00:00
|
|
|
g_wreq->running_frac = rf;
|
|
|
|
}
|
|
|
|
|
2009-03-05 23:08:53 +00:00
|
|
|
static inline void get_dcf() {
|
2009-01-29 20:42:45 +00:00
|
|
|
double dcf = g_reply->host.duration_correction_factor;
|
|
|
|
if (dcf > 10) {
|
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] DCF=%f; setting to 10\n", dcf
|
|
|
|
);
|
2008-12-18 18:19:42 +00:00
|
|
|
}
|
2009-01-29 20:42:45 +00:00
|
|
|
dcf = 10;
|
|
|
|
} else if (dcf < 0.1) {
|
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] DCF=%f; setting to 0.1\n", dcf
|
|
|
|
);
|
|
|
|
}
|
|
|
|
dcf = 0.1;
|
|
|
|
}
|
|
|
|
g_wreq->dcf = dcf;
|
|
|
|
}
|
|
|
|
|
|
|
|
// estimate the amount of real time to complete this WU,
|
|
|
|
// taking into account active_frac etc.
|
|
|
|
// Note: don't factor in resource_share_fraction.
|
|
|
|
// The core client no longer necessarily does round-robin
|
|
|
|
// across all projects.
|
|
|
|
//
|
|
|
|
double estimate_duration(WORKUNIT& wu, BEST_APP_VERSION& bav) {
|
|
|
|
double edu = estimate_duration_unscaled(wu, bav);
|
|
|
|
double ed = edu/g_wreq->running_frac;
|
|
|
|
if (!config.ignore_dcf) {
|
|
|
|
ed *= g_wreq->dcf;
|
2005-06-23 07:42:45 +00:00
|
|
|
}
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-01-24 21:51:19 +00:00
|
|
|
"[send] est. duration for WU %d: unscaled %.2f scaled %.2f\n",
|
2009-01-15 20:23:20 +00:00
|
|
|
wu.id, edu, ed
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2008-12-18 18:19:42 +00:00
|
|
|
return ed;
|
2004-04-04 01:59:47 +00:00
|
|
|
}
|
|
|
|
|
2008-12-18 21:25:51 +00:00
|
|
|
static void get_prefs_info() {
|
2006-05-02 22:17:09 +00:00
|
|
|
char buf[8096];
|
2009-02-26 00:23:23 +00:00
|
|
|
std::string str;
|
2007-09-21 18:10:54 +00:00
|
|
|
unsigned int pos = 0;
|
|
|
|
int temp_int;
|
2008-03-10 17:03:15 +00:00
|
|
|
bool flag;
|
2006-10-22 00:42:44 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
extract_venue(g_reply->user.project_prefs, g_reply->host.venue, buf);
|
2007-12-24 21:34:21 +00:00
|
|
|
str = buf;
|
|
|
|
|
2006-10-22 00:42:44 +00:00
|
|
|
// scan user's project prefs for elements of the form <app_id>N</app_id>,
|
|
|
|
// indicating the apps they want to run.
|
|
|
|
//
|
2008-12-18 21:25:51 +00:00
|
|
|
g_wreq->preferred_apps.clear();
|
2007-09-21 18:10:54 +00:00
|
|
|
while (parse_int(str.substr(pos,str.length()-pos).c_str(), "<app_id>", temp_int)) {
|
2006-10-22 01:46:33 +00:00
|
|
|
APP_INFO ai;
|
|
|
|
ai.appid = temp_int;
|
2008-03-07 21:13:01 +00:00
|
|
|
ai.work_available = false;
|
2008-12-18 21:25:51 +00:00
|
|
|
g_wreq->preferred_apps.push_back(ai);
|
2006-10-22 00:42:44 +00:00
|
|
|
|
2007-09-21 18:10:54 +00:00
|
|
|
pos = str.find("<app_id>", pos) + 1;
|
|
|
|
}
|
2009-05-06 21:52:50 +00:00
|
|
|
if (parse_bool(buf,"allow_non_preferred_apps", flag)) {
|
|
|
|
g_wreq->allow_non_preferred_apps = flag;
|
2008-03-07 21:13:01 +00:00
|
|
|
}
|
2009-05-06 21:52:50 +00:00
|
|
|
if (parse_bool(buf,"allow_beta_work", flag)) {
|
2008-12-18 21:25:51 +00:00
|
|
|
g_wreq->allow_beta_work = flag;
|
2009-05-06 21:52:50 +00:00
|
|
|
}
|
|
|
|
if (parse_bool(buf,"no_gpus", flag)) {
|
2008-12-18 21:25:51 +00:00
|
|
|
g_wreq->no_gpus = flag;
|
|
|
|
}
|
2009-05-06 21:52:50 +00:00
|
|
|
if (parse_bool(buf,"no_cpu", flag)) {
|
2009-03-18 21:14:44 +00:00
|
|
|
g_wreq->no_cpu = flag;
|
|
|
|
}
|
2008-12-18 21:25:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Find or compute various info about the host;
|
|
|
|
// this info affects which jobs are sent to the host.
|
|
|
|
//
|
|
|
|
static void get_host_info() {
|
2009-07-28 23:07:11 +00:00
|
|
|
double expavg_credit = g_reply->host.expavg_credit;
|
|
|
|
double expavg_time = g_reply->host.expavg_time;
|
|
|
|
update_average(0, 0, CREDIT_HALF_LIFE, expavg_credit, expavg_time);
|
|
|
|
|
2008-04-30 20:31:33 +00:00
|
|
|
// Decide whether or not this computer is 'reliable'
|
2008-03-07 21:13:01 +00:00
|
|
|
// A computer is reliable if the following conditions are true
|
|
|
|
// (for those that are set in the config file)
|
|
|
|
// 1) The host average turnaround is less than the config
|
|
|
|
// max average turnaround
|
|
|
|
// 2) The host error rate is less then the config max error rate
|
|
|
|
// 3) The host results per day is equal to the config file value
|
2008-02-21 20:10:10 +00:00
|
|
|
|
2009-07-28 23:07:11 +00:00
|
|
|
// Platforms other than Windows, Linux and Intel Macs need a
|
2008-03-07 21:13:01 +00:00
|
|
|
// larger set of computers to be marked reliable
|
2008-02-21 20:10:10 +00:00
|
|
|
//
|
2008-03-07 21:13:01 +00:00
|
|
|
double multiplier = 1.0;
|
2008-12-16 16:29:54 +00:00
|
|
|
if (strstr(g_reply->host.os_name,"Windows")
|
|
|
|
|| strstr(g_reply->host.os_name,"Linux")
|
|
|
|
|| (strstr(g_reply->host.os_name,"Darwin")
|
|
|
|
&& !(strstr(g_reply->host.p_vendor,"Power Macintosh"))
|
2008-04-30 20:31:33 +00:00
|
|
|
)) {
|
2009-05-06 21:52:50 +00:00
|
|
|
multiplier = 1.0;
|
2006-10-22 00:42:44 +00:00
|
|
|
} else {
|
2009-05-06 21:52:50 +00:00
|
|
|
multiplier = 1.8;
|
2007-05-09 17:45:18 +00:00
|
|
|
}
|
|
|
|
|
2009-07-28 23:07:11 +00:00
|
|
|
if (g_reply->host.avg_turnaround > 0 && config.reliable_max_avg_turnaround) {
|
|
|
|
|
|
|
|
if (g_reply->host.avg_turnaround > config.reliable_max_avg_turnaround*multiplier) {
|
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [HOST#%d] not reliable; avg_turn_hrs: %.3f\n",
|
|
|
|
g_reply->host.id, g_reply->host.avg_turnaround/3600
|
|
|
|
);
|
|
|
|
}
|
|
|
|
g_wreq->reliable = false;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (config.reliable_max_error_rate) {
|
|
|
|
if (g_reply->host.error_rate > config.reliable_max_error_rate*multiplier) {
|
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [HOST#%d] not reliable; error rate: %.6f\n",
|
|
|
|
g_reply->host.id, g_reply->host.error_rate
|
|
|
|
);
|
|
|
|
}
|
|
|
|
g_wreq->reliable = false;
|
|
|
|
return;
|
|
|
|
}
|
2008-05-02 17:48:29 +00:00
|
|
|
}
|
2009-07-28 23:07:11 +00:00
|
|
|
if (config.daily_result_quota) {
|
|
|
|
if (g_reply->host.max_results_day < config.daily_result_quota) {
|
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [HOST#%d] not reliable; max_result_day %d\n",
|
|
|
|
g_reply->host.id, g_reply->host.max_results_day
|
|
|
|
);
|
|
|
|
}
|
|
|
|
g_wreq->reliable = false;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
g_wreq->reliable = true;
|
2008-05-02 17:48:29 +00:00
|
|
|
if (config.debug_send) {
|
2009-07-28 23:07:11 +00:00
|
|
|
log_messages.printf(MSG_NORMAL, "[send] [HOST#%d] is reliable\n");
|
2006-10-22 00:42:44 +00:00
|
|
|
}
|
2006-05-02 22:17:09 +00:00
|
|
|
}
|
|
|
|
|
2008-04-30 20:31:33 +00:00
|
|
|
// Return true if the user has set application preferences,
|
|
|
|
// and this job is not for a selected app
|
2007-05-09 17:45:18 +00:00
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
bool app_not_selected(WORKUNIT& wu) {
|
2006-11-07 17:40:55 +00:00
|
|
|
unsigned int i;
|
2007-05-09 17:45:18 +00:00
|
|
|
|
2008-12-18 21:25:51 +00:00
|
|
|
if (g_wreq->preferred_apps.size() == 0) return false;
|
|
|
|
for (i=0; i<g_wreq->preferred_apps.size(); i++) {
|
|
|
|
if (wu.appid == g_wreq->preferred_apps[i].appid) {
|
2009-05-06 21:52:50 +00:00
|
|
|
g_wreq->preferred_apps[i].work_available = true;
|
2008-04-30 20:31:33 +00:00
|
|
|
return false;
|
2008-04-26 23:34:38 +00:00
|
|
|
}
|
2007-05-09 17:45:18 +00:00
|
|
|
}
|
2008-04-30 20:31:33 +00:00
|
|
|
return true;
|
2006-10-22 00:42:44 +00:00
|
|
|
}
|
|
|
|
|
2007-08-16 17:33:41 +00:00
|
|
|
// see how much RAM we can use on this machine
|
|
|
|
//
|
2009-03-05 23:08:53 +00:00
|
|
|
static inline void get_mem_sizes() {
|
2009-01-29 20:42:45 +00:00
|
|
|
g_wreq->ram = g_reply->host.m_nbytes;
|
|
|
|
if (g_wreq->ram <= 0) g_wreq->ram = DEFAULT_RAM_SIZE;
|
|
|
|
g_wreq->usable_ram = g_wreq->ram;
|
2008-12-16 16:29:54 +00:00
|
|
|
double busy_frac = g_request->global_prefs.ram_max_used_busy_frac;
|
|
|
|
double idle_frac = g_request->global_prefs.ram_max_used_idle_frac;
|
2006-10-04 17:01:36 +00:00
|
|
|
double frac = 1;
|
|
|
|
if (busy_frac>0 && idle_frac>0) {
|
|
|
|
frac = std::max(busy_frac, idle_frac);
|
|
|
|
if (frac > 1) frac = 1;
|
2009-01-29 20:42:45 +00:00
|
|
|
g_wreq->usable_ram *= frac;
|
2006-10-04 17:01:36 +00:00
|
|
|
}
|
2007-08-16 17:33:41 +00:00
|
|
|
}
|
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
static inline int check_memory(WORKUNIT& wu) {
|
2009-01-29 20:42:45 +00:00
|
|
|
double diff = wu.rsc_memory_bound - g_wreq->usable_ram;
|
2007-08-16 17:33:41 +00:00
|
|
|
if (diff > 0) {
|
2008-03-07 21:13:01 +00:00
|
|
|
char message[256];
|
|
|
|
sprintf(message,
|
|
|
|
"%s needs %0.2f MB RAM but only %0.2f MB is available for use.",
|
|
|
|
find_user_friendly_name(wu.appid),
|
2009-01-29 20:42:45 +00:00
|
|
|
wu.rsc_memory_bound/MEGA, g_wreq->usable_ram/MEGA
|
2008-03-07 21:13:01 +00:00
|
|
|
);
|
2009-08-21 19:14:15 +00:00
|
|
|
g_wreq->insert_no_work_message(message);
|
2009-06-01 22:15:14 +00:00
|
|
|
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [WU#%d %s] needs %0.2fMB RAM; [HOST#%d] has %0.2fMB, %0.2fMB usable\n",
|
2008-04-26 23:34:38 +00:00
|
|
|
wu.id, wu.name, wu.rsc_memory_bound/MEGA,
|
2009-01-29 20:42:45 +00:00
|
|
|
g_reply->host.id, g_wreq->ram/MEGA, g_wreq->usable_ram/MEGA
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->mem.set_insufficient(wu.rsc_memory_bound);
|
|
|
|
g_reply->set_delay(DELAY_NO_WORK_TEMP);
|
2007-05-30 17:25:51 +00:00
|
|
|
return INFEASIBLE_MEM;
|
2005-02-08 19:54:10 +00:00
|
|
|
}
|
2007-05-30 17:25:51 +00:00
|
|
|
return 0;
|
2007-05-09 17:45:18 +00:00
|
|
|
}
|
2005-02-08 19:54:10 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
static inline int check_disk(WORKUNIT& wu) {
|
|
|
|
double diff = wu.rsc_disk_bound - g_wreq->disk_available;
|
2007-08-16 17:33:41 +00:00
|
|
|
if (diff > 0) {
|
2008-03-07 21:13:01 +00:00
|
|
|
char message[256];
|
|
|
|
sprintf(message,
|
|
|
|
"%s needs %0.2fMB more disk space. You currently have %0.2f MB available and it needs %0.2f MB.",
|
|
|
|
find_user_friendly_name(wu.appid),
|
2008-12-16 16:29:54 +00:00
|
|
|
diff/MEGA, g_wreq->disk_available/MEGA, wu.rsc_disk_bound/MEGA
|
2008-03-07 21:13:01 +00:00
|
|
|
);
|
2009-08-21 19:14:15 +00:00
|
|
|
g_wreq->insert_no_work_message(message);
|
2008-03-07 21:13:01 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->disk.set_insufficient(diff);
|
2007-05-30 17:25:51 +00:00
|
|
|
return INFEASIBLE_DISK;
|
2004-04-04 01:59:47 +00:00
|
|
|
}
|
2007-05-30 17:25:51 +00:00
|
|
|
return 0;
|
2007-05-09 17:45:18 +00:00
|
|
|
}
|
2004-04-04 01:59:47 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
static inline int check_bandwidth(WORKUNIT& wu) {
|
2008-03-07 21:13:01 +00:00
|
|
|
if (wu.rsc_bandwidth_bound == 0) return 0;
|
2009-06-01 22:15:14 +00:00
|
|
|
|
2008-06-17 16:09:27 +00:00
|
|
|
// if n_bwdown is zero, the host has never downloaded anything,
|
|
|
|
// so skip this check
|
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_reply->host.n_bwdown == 0) return 0;
|
2008-06-17 16:09:27 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
double diff = wu.rsc_bandwidth_bound - g_reply->host.n_bwdown;
|
2008-03-07 21:13:01 +00:00
|
|
|
if (diff > 0) {
|
|
|
|
char message[256];
|
|
|
|
sprintf(message,
|
2008-06-17 16:15:39 +00:00
|
|
|
"%s requires %0.2f KB/sec download bandwidth. Your computer has been measured at %0.2f KB/sec.",
|
2008-03-07 21:13:01 +00:00
|
|
|
find_user_friendly_name(wu.appid),
|
2008-12-16 16:29:54 +00:00
|
|
|
wu.rsc_bandwidth_bound/KILO, g_reply->host.n_bwdown/KILO
|
2008-03-07 21:13:01 +00:00
|
|
|
);
|
2009-08-21 19:14:15 +00:00
|
|
|
g_wreq->insert_no_work_message(message);
|
2008-03-07 21:13:01 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->bandwidth.set_insufficient(diff);
|
2008-03-07 21:13:01 +00:00
|
|
|
return INFEASIBLE_BANDWIDTH;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-07-15 21:43:45 +00:00
|
|
|
// Determine if the app is "hard",
|
|
|
|
// and we should send it only to high-end hosts.
|
|
|
|
// Currently this is specified by setting weight=-1;
|
|
|
|
// this is a kludge for SETI@home/Astropulse.
|
|
|
|
//
|
|
|
|
static inline bool hard_app(APP& app) {
|
|
|
|
return (app.weight == -1);
|
|
|
|
}
|
|
|
|
|
2009-01-30 21:25:24 +00:00
|
|
|
static inline double get_estimated_delay(BEST_APP_VERSION& bav) {
|
2009-03-05 00:10:16 +00:00
|
|
|
if (bav.host_usage.ncudas) {
|
2009-01-30 21:25:24 +00:00
|
|
|
return g_request->coproc_cuda->estimated_delay;
|
2009-08-17 17:07:38 +00:00
|
|
|
} else if (bav.host_usage.natis) {
|
|
|
|
return g_request->coproc_ati->estimated_delay;
|
2009-01-30 21:25:24 +00:00
|
|
|
} else {
|
|
|
|
return g_request->cpu_estimated_delay;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void update_estimated_delay(BEST_APP_VERSION& bav, double dt) {
|
2009-03-05 00:10:16 +00:00
|
|
|
if (bav.host_usage.ncudas) {
|
2009-01-30 21:25:24 +00:00
|
|
|
g_request->coproc_cuda->estimated_delay += dt;
|
2009-08-17 17:07:38 +00:00
|
|
|
} else if (bav.host_usage.natis) {
|
|
|
|
g_request->coproc_ati->estimated_delay += dt;
|
2009-01-30 21:25:24 +00:00
|
|
|
} else {
|
|
|
|
g_request->cpu_estimated_delay += dt;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-05-30 17:25:51 +00:00
|
|
|
static inline int check_deadline(
|
2008-12-15 21:14:32 +00:00
|
|
|
WORKUNIT& wu, APP& app, BEST_APP_VERSION& bav
|
2007-05-30 17:25:51 +00:00
|
|
|
) {
|
2008-07-14 19:13:19 +00:00
|
|
|
if (config.ignore_delay_bound) return 0;
|
|
|
|
|
2005-02-16 23:17:43 +00:00
|
|
|
// skip delay check if host currently doesn't have any work
|
2008-08-12 19:06:35 +00:00
|
|
|
// and it's not a hard app.
|
2005-02-16 23:17:43 +00:00
|
|
|
// (i.e. everyone gets one result, no matter how slow they are)
|
|
|
|
//
|
2009-01-30 21:25:24 +00:00
|
|
|
if (get_estimated_delay(bav) == 0 && !hard_app(app)) return 0;
|
2008-07-14 19:13:19 +00:00
|
|
|
|
2008-08-12 19:06:35 +00:00
|
|
|
// if it's a hard app, don't send it to a host with no credit
|
|
|
|
//
|
2008-12-15 21:14:32 +00:00
|
|
|
if (hard_app(app) && g_reply->host.total_credit == 0) {
|
2008-08-12 19:06:35 +00:00
|
|
|
return INFEASIBLE_CPU;
|
|
|
|
}
|
|
|
|
|
2008-12-26 22:56:42 +00:00
|
|
|
if (config.workload_sim && g_request->have_other_results_list) {
|
|
|
|
double est_dur = estimate_duration(wu, bav);
|
|
|
|
if (g_reply->wreq.edf_reject_test(est_dur, wu.delay_bound)) {
|
|
|
|
return INFEASIBLE_WORKLOAD;
|
|
|
|
}
|
|
|
|
IP_RESULT candidate("", wu.delay_bound, est_dur);
|
|
|
|
strcpy(candidate.name, wu.name);
|
2009-06-22 21:11:19 +00:00
|
|
|
if (check_candidate(candidate, g_wreq->effective_ncpus, g_request->ip_results)) {
|
2008-12-26 22:56:42 +00:00
|
|
|
// it passed the feasibility test,
|
2009-06-22 21:11:19 +00:00
|
|
|
// but don't add it to the workload yet;
|
2008-12-26 22:56:42 +00:00
|
|
|
// wait until we commit to sending it
|
|
|
|
} else {
|
|
|
|
g_reply->wreq.edf_reject(est_dur, wu.delay_bound);
|
|
|
|
g_reply->wreq.speed.set_insufficient(0);
|
|
|
|
return INFEASIBLE_WORKLOAD;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
double ewd = estimate_duration(wu, bav);
|
|
|
|
if (hard_app(app)) ewd *= 1.3;
|
2009-01-30 21:25:24 +00:00
|
|
|
double est_completion_delay = get_estimated_delay(bav) + ewd;
|
2009-02-26 00:23:23 +00:00
|
|
|
double est_report_delay = std::max(est_completion_delay, g_request->global_prefs.work_buf_min());
|
2008-12-26 22:56:42 +00:00
|
|
|
double diff = est_report_delay - wu.delay_bound;
|
|
|
|
if (diff > 0) {
|
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-01-24 21:51:19 +00:00
|
|
|
"[send] [WU#%d] deadline miss %d > %d\n",
|
|
|
|
wu.id, (int)est_report_delay, wu.delay_bound
|
2008-12-26 22:56:42 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
g_reply->wreq.speed.set_insufficient(diff);
|
|
|
|
return INFEASIBLE_CPU;
|
2009-01-30 22:30:35 +00:00
|
|
|
} else {
|
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-03-06 22:21:47 +00:00
|
|
|
"[send] [WU#%d] meets deadline: %.2f + %.2f < %d\n",
|
2009-01-30 22:30:35 +00:00
|
|
|
wu.id, get_estimated_delay(bav), ewd, wu.delay_bound
|
|
|
|
);
|
|
|
|
}
|
2004-07-15 18:54:17 +00:00
|
|
|
}
|
2004-04-04 01:59:47 +00:00
|
|
|
}
|
2007-05-30 17:25:51 +00:00
|
|
|
return 0;
|
2007-05-09 17:45:18 +00:00
|
|
|
}
|
|
|
|
|
2008-04-30 20:31:33 +00:00
|
|
|
// Fast checks (no DB access) to see if the job can be sent to the host.
|
2007-05-30 17:25:51 +00:00
|
|
|
// Reasons why not include:
|
2007-05-09 17:45:18 +00:00
|
|
|
// 1) the host doesn't have enough memory;
|
|
|
|
// 2) the host doesn't have enough disk space;
|
|
|
|
// 3) based on CPU speed, resource share and estimated delay,
|
|
|
|
// the host probably won't get the result done within the delay bound
|
|
|
|
// 4) app isn't in user's "approved apps" list
|
|
|
|
//
|
2008-12-15 21:14:32 +00:00
|
|
|
int wu_is_infeasible_fast(WORKUNIT& wu, APP& app, BEST_APP_VERSION& bav) {
|
2007-05-30 17:25:51 +00:00
|
|
|
int retval;
|
2007-05-31 18:14:45 +00:00
|
|
|
|
2009-07-29 18:34:27 +00:00
|
|
|
// project-specific check
|
|
|
|
//
|
|
|
|
if (wu_is_infeasible_custom(wu, app, bav)) {
|
|
|
|
return INFEASIBLE_CUSTOM;
|
|
|
|
}
|
|
|
|
|
2007-05-31 18:14:45 +00:00
|
|
|
// homogeneous redundancy, quick check
|
|
|
|
//
|
2007-10-04 21:55:37 +00:00
|
|
|
if (app_hr_type(app)) {
|
2008-12-15 21:14:32 +00:00
|
|
|
if (hr_unknown_platform_type(g_reply->host, app_hr_type(app))) {
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [HOST#%d] [WU#%d %s] host is of unknown class in HR type %d\n",
|
2008-12-15 21:14:32 +00:00
|
|
|
g_reply->host.id, wu.id, wu.name, app_hr_type(app)
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2007-10-04 21:55:37 +00:00
|
|
|
return INFEASIBLE_HR;
|
|
|
|
}
|
2008-12-19 18:14:02 +00:00
|
|
|
if (already_sent_to_different_platform_quick(wu, app)) {
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [HOST#%d] [WU#%d %s] failed quick HR check: WU is class %d, host is class %d\n",
|
2008-12-15 21:14:32 +00:00
|
|
|
g_reply->host.id, wu.id, wu.name, wu.hr_class, hr_class(g_request->host, app_hr_type(app))
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2007-05-31 18:14:45 +00:00
|
|
|
return INFEASIBLE_HR;
|
|
|
|
}
|
|
|
|
}
|
2009-06-01 22:15:14 +00:00
|
|
|
|
2007-05-31 18:14:45 +00:00
|
|
|
if (config.one_result_per_user_per_wu || config.one_result_per_host_per_wu) {
|
2008-12-19 18:14:02 +00:00
|
|
|
if (wu_already_in_reply(wu)) {
|
2007-05-31 18:14:45 +00:00
|
|
|
return INFEASIBLE_DUP;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
retval = check_memory(wu);
|
2007-05-30 17:25:51 +00:00
|
|
|
if (retval) return retval;
|
2008-12-16 16:29:54 +00:00
|
|
|
retval = check_disk(wu);
|
2007-05-30 17:25:51 +00:00
|
|
|
if (retval) return retval;
|
2008-12-16 16:29:54 +00:00
|
|
|
retval = check_bandwidth(wu);
|
2008-03-07 21:13:01 +00:00
|
|
|
if (retval) return retval;
|
- scheduler: add <workload_sim> config option.
If set, the scheduler will use EDF simulation,
together with the in-progress workload reported by the client,
to avoid sending results that
1) will miss their deadline, or
2) will cause an in-progress result to miss its deadline, or
3) will make an in-progress result miss its deadline
by more than is already predicted.
If this option is not set, or if the client request doesn't
include a workload description (i.e. the client is old)
use the existing approach, which assumes there's no workload.
NOTE: this is experimental. Production projects should not use it.
- EDF sim: write debug stuff to stderr instead of stdout
- Account manager:
- if an account is detach_when_done, set dont_request_more_work
- check done_request_more_work even for first-time projects
- update_uotd: generate a file for use by Google gadget
- user_links(): use full URLs (so can use in Google gadget)
client/
acct_mgr.C
work_fetch.C
html/
inc/
uotd.inc
util.inc
user/
uotd_gadget.php (new)
sched/
Makefile.am
edf_sim.C
sched_config.C,h
sched_resend.C
sched_send.C,h
server_types.C,h
svn path=/trunk/boinc/; revision=12639
2007-05-10 21:50:52 +00:00
|
|
|
|
2007-05-31 18:14:45 +00:00
|
|
|
// do this last because EDF sim uses some CPU
|
2008-12-26 22:56:42 +00:00
|
|
|
retval = check_deadline(wu, app, bav);
|
|
|
|
if (retval) return INFEASIBLE_WORKLOAD;
|
2004-04-04 01:59:47 +00:00
|
|
|
|
2007-05-30 17:25:51 +00:00
|
|
|
return 0;
|
2004-04-04 01:59:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// insert "text" right after "after" in the given buffer
|
|
|
|
//
|
2005-02-16 23:17:43 +00:00
|
|
|
int insert_after(char* buffer, const char* after, const char* text) {
|
2004-04-04 01:59:47 +00:00
|
|
|
char* p;
|
2008-03-31 16:19:45 +00:00
|
|
|
char temp[BLOB_SIZE];
|
2004-04-04 01:59:47 +00:00
|
|
|
|
2008-03-31 16:19:45 +00:00
|
|
|
if (strlen(buffer) + strlen(text) > BLOB_SIZE-1) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2008-08-19 03:00:17 +00:00
|
|
|
"insert_after: overflow: %d %d\n", strlen(buffer), strlen(text)
|
2005-09-26 23:28:48 +00:00
|
|
|
);
|
2004-04-04 01:59:47 +00:00
|
|
|
return ERR_BUFFER_OVERFLOW;
|
|
|
|
}
|
|
|
|
p = strstr(buffer, after);
|
|
|
|
if (!p) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2005-09-26 23:28:48 +00:00
|
|
|
"insert_after: %s not found in %s\n", after, buffer
|
|
|
|
);
|
2004-04-04 01:59:47 +00:00
|
|
|
return ERR_NULL;
|
|
|
|
}
|
|
|
|
p += strlen(after);
|
|
|
|
strcpy(temp, p);
|
|
|
|
strcpy(p, text);
|
|
|
|
strcat(p, temp);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-02-06 21:26:21 +00:00
|
|
|
// add elements to WU's xml_doc,
|
|
|
|
// in preparation for sending it to a client
|
2004-04-04 01:59:47 +00:00
|
|
|
//
|
|
|
|
int insert_wu_tags(WORKUNIT& wu, APP& app) {
|
2008-03-31 16:19:45 +00:00
|
|
|
char buf[BLOB_SIZE];
|
2009-06-01 22:15:14 +00:00
|
|
|
|
2004-04-04 01:59:47 +00:00
|
|
|
sprintf(buf,
|
|
|
|
" <rsc_fpops_est>%f</rsc_fpops_est>\n"
|
|
|
|
" <rsc_fpops_bound>%f</rsc_fpops_bound>\n"
|
|
|
|
" <rsc_memory_bound>%f</rsc_memory_bound>\n"
|
|
|
|
" <rsc_disk_bound>%f</rsc_disk_bound>\n"
|
|
|
|
" <name>%s</name>\n"
|
|
|
|
" <app_name>%s</app_name>\n",
|
|
|
|
wu.rsc_fpops_est,
|
|
|
|
wu.rsc_fpops_bound,
|
|
|
|
wu.rsc_memory_bound,
|
|
|
|
wu.rsc_disk_bound,
|
|
|
|
wu.name,
|
|
|
|
app.name
|
|
|
|
);
|
|
|
|
return insert_after(wu.xml_doc, "<workunit>\n", buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
// add the given workunit to a reply.
|
|
|
|
// Add the app and app_version to the reply also.
|
|
|
|
//
|
|
|
|
int add_wu_to_reply(
|
2008-03-27 18:25:29 +00:00
|
|
|
WORKUNIT& wu, SCHEDULER_REPLY& reply, APP* app, BEST_APP_VERSION* bavp
|
2004-04-04 01:59:47 +00:00
|
|
|
) {
|
|
|
|
int retval;
|
2005-02-02 18:13:00 +00:00
|
|
|
WORKUNIT wu2, wu3;
|
2009-06-01 22:15:14 +00:00
|
|
|
|
2008-03-27 18:25:29 +00:00
|
|
|
APP_VERSION* avp = bavp->avp;
|
|
|
|
|
2004-04-04 01:59:47 +00:00
|
|
|
// add the app, app_version, and workunit to the reply,
|
|
|
|
// but only if they aren't already there
|
|
|
|
//
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
if (avp) {
|
2005-02-02 18:13:00 +00:00
|
|
|
APP_VERSION av2=*avp, *avp2=&av2;
|
2009-06-01 22:15:14 +00:00
|
|
|
|
2009-03-02 23:47:11 +00:00
|
|
|
if (strlen(config.replace_download_url_by_timezone)) {
|
2008-12-19 18:14:02 +00:00
|
|
|
process_av_timezone(avp, av2);
|
2005-02-02 18:13:00 +00:00
|
|
|
}
|
2009-06-01 22:15:14 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->insert_app_unique(*app);
|
2008-03-28 18:00:27 +00:00
|
|
|
av2.bavp = bavp;
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->insert_app_version_unique(*avp2);
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [HOST#%d] Sending app_version %s %d %d %s; %.2f GFLOPS\n",
|
2008-12-17 20:53:46 +00:00
|
|
|
g_reply->host.id, app->name,
|
|
|
|
avp2->platformid, avp2->version_num, avp2->plan_class,
|
2009-01-10 00:43:33 +00:00
|
|
|
bavp->host_usage.flops/1e9
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2004-04-04 01:59:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// add time estimate to reply
|
|
|
|
//
|
|
|
|
wu2 = wu; // make copy since we're going to modify its XML field
|
|
|
|
retval = insert_wu_tags(wu2, *app);
|
|
|
|
if (retval) {
|
2008-08-19 03:00:17 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "insert_wu_tags failed %d\n", retval);
|
2004-04-04 01:59:47 +00:00
|
|
|
return retval;
|
|
|
|
}
|
2008-12-19 18:14:02 +00:00
|
|
|
wu3 = wu2;
|
2009-03-02 23:47:11 +00:00
|
|
|
if (strlen(config.replace_download_url_by_timezone)) {
|
2008-12-19 18:14:02 +00:00
|
|
|
process_wu_timezone(wu2, wu3);
|
2005-02-02 18:13:00 +00:00
|
|
|
}
|
2009-06-01 22:15:14 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->insert_workunit_unique(wu3);
|
2005-02-14 20:11:01 +00:00
|
|
|
|
|
|
|
// switch to tighter policy for estimating delay
|
|
|
|
//
|
2004-04-04 01:59:47 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int insert_name_tags(RESULT& result, WORKUNIT const& wu) {
|
|
|
|
char buf[256];
|
|
|
|
int retval;
|
|
|
|
|
|
|
|
sprintf(buf, "<name>%s</name>\n", result.name);
|
|
|
|
retval = insert_after(result.xml_doc_in, "<result>\n", buf);
|
|
|
|
if (retval) return retval;
|
|
|
|
sprintf(buf, "<wu_name>%s</wu_name>\n", wu.name);
|
|
|
|
retval = insert_after(result.xml_doc_in, "<result>\n", buf);
|
|
|
|
if (retval) return retval;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int insert_deadline_tag(RESULT& result) {
|
|
|
|
char buf[256];
|
|
|
|
sprintf(buf, "<report_deadline>%d</report_deadline>\n", result.report_deadline);
|
|
|
|
int retval = insert_after(result.xml_doc_in, "<result>\n", buf);
|
|
|
|
if (retval) return retval;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-08-04 03:50:04 +00:00
|
|
|
int update_wu_transition_time(WORKUNIT wu, time_t x) {
|
2004-04-04 01:59:47 +00:00
|
|
|
DB_WORKUNIT dbwu;
|
2004-08-21 00:40:01 +00:00
|
|
|
char buf[256];
|
2004-04-04 01:59:47 +00:00
|
|
|
|
2004-08-21 00:40:01 +00:00
|
|
|
dbwu.id = wu.id;
|
2009-06-01 22:15:14 +00:00
|
|
|
|
2005-07-14 10:02:20 +00:00
|
|
|
// SQL note: can't use min() here
|
|
|
|
//
|
|
|
|
sprintf(buf,
|
|
|
|
"transition_time=if(transition_time<%d, transition_time, %d)",
|
|
|
|
(int)x, (int)x
|
|
|
|
);
|
2004-08-21 00:40:01 +00:00
|
|
|
return dbwu.update_field(buf);
|
2004-04-04 01:59:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// return true iff a result for same WU is already being sent
|
|
|
|
//
|
2008-12-19 18:14:02 +00:00
|
|
|
bool wu_already_in_reply(WORKUNIT& wu) {
|
2004-04-04 01:59:47 +00:00
|
|
|
unsigned int i;
|
2008-12-16 16:29:54 +00:00
|
|
|
for (i=0; i<g_reply->results.size(); i++) {
|
|
|
|
if (wu.id == g_reply->results[i].workunitid) {
|
2004-04-04 01:59:47 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
void lock_sema() {
|
|
|
|
lock_semaphore(sema_key);
|
|
|
|
}
|
|
|
|
|
|
|
|
void unlock_sema() {
|
|
|
|
unlock_semaphore(sema_key);
|
|
|
|
}
|
|
|
|
|
2005-02-06 21:26:21 +00:00
|
|
|
// return true if additional work is needed,
|
|
|
|
// and there's disk space left,
|
|
|
|
// and we haven't exceeded result per RPC limit,
|
|
|
|
// and we haven't exceeded results per day limit
|
|
|
|
//
|
2008-12-19 18:14:02 +00:00
|
|
|
bool work_needed(bool locality_sched) {
|
2005-02-09 20:06:15 +00:00
|
|
|
if (locality_sched) {
|
|
|
|
// if we've failed to send a result because of a transient condition,
|
|
|
|
// return false to preserve invariant
|
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_wreq->disk.insufficient || g_wreq->speed.insufficient || g_wreq->mem.insufficient || g_wreq->no_allowed_apps_available) {
|
2009-03-07 01:00:05 +00:00
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] stopping work search - locality condition\n"
|
|
|
|
);
|
|
|
|
}
|
2005-02-09 20:06:15 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2005-02-26 00:24:37 +00:00
|
|
|
|
2009-06-01 22:15:14 +00:00
|
|
|
if (g_reply->host.nresults_today >= g_wreq->max_jobs_per_day) {
|
|
|
|
g_wreq->daily_result_quota_exceeded = true;
|
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] stopping work search - daily quota exceeded (%d>=%d)\n",
|
|
|
|
g_reply->host.nresults_today, g_wreq->max_jobs_per_day
|
|
|
|
);
|
2004-09-10 00:41:48 +00:00
|
|
|
}
|
2009-06-01 22:15:14 +00:00
|
|
|
return false;
|
2004-09-10 00:41:48 +00:00
|
|
|
}
|
2007-05-14 15:21:38 +00:00
|
|
|
|
2009-06-01 22:15:14 +00:00
|
|
|
if (g_wreq->njobs_on_host >= g_wreq->max_jobs_on_host) {
|
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] in-progress job limit exceeded; %d >= %d\n",
|
|
|
|
g_wreq->njobs_on_host, g_wreq->max_jobs_on_host
|
|
|
|
);
|
2007-05-14 15:21:38 +00:00
|
|
|
}
|
2009-06-01 22:15:14 +00:00
|
|
|
g_wreq->max_jobs_on_host_exceeded = true;
|
|
|
|
return false;
|
2007-05-14 15:21:38 +00:00
|
|
|
}
|
2009-06-01 22:15:14 +00:00
|
|
|
|
|
|
|
if (g_wreq->njobs_on_host_cpu >= g_wreq->max_jobs_on_host_cpu) {
|
|
|
|
g_wreq->clear_cpu_req();
|
|
|
|
g_wreq->max_jobs_on_host_cpu_exceeded = true;
|
|
|
|
}
|
|
|
|
if (g_wreq->njobs_on_host_gpu >= g_wreq->max_jobs_on_host_gpu) {
|
|
|
|
g_wreq->clear_gpu_req();
|
2009-06-22 21:11:19 +00:00
|
|
|
if (g_wreq->effective_ngpus) {
|
|
|
|
g_wreq->max_jobs_on_host_gpu_exceeded = true;
|
|
|
|
}
|
2009-06-01 22:15:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (g_wreq->njobs_sent >= g_wreq->max_jobs_per_rpc) {
|
2009-03-07 01:00:05 +00:00
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-06-01 22:15:14 +00:00
|
|
|
"[send] stopping work search - njobs %d >= max_jobs_per_rpc %d\n",
|
|
|
|
g_wreq->njobs_sent, g_wreq->max_jobs_per_rpc
|
2009-03-07 01:00:05 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2009-01-10 00:43:33 +00:00
|
|
|
|
2009-01-23 22:52:35 +00:00
|
|
|
#if 0
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-08-17 17:07:38 +00:00
|
|
|
"work_needed: spec req %d sec to fill %.2f; CPU (%.2f, %.2f) CUDA (%.2f, %.2f) ATI(%.2f, %.2f)\n",
|
2009-01-23 22:52:35 +00:00
|
|
|
g_wreq->rsc_spec_request,
|
|
|
|
g_wreq->seconds_to_fill,
|
|
|
|
g_wreq->cpu_req_secs, g_wreq->cpu_req_instances,
|
2009-08-17 17:07:38 +00:00
|
|
|
g_wreq->cuda_req_secs, g_wreq->cuda_req_instances,
|
|
|
|
g_wreq->ati_req_secs, g_wreq->ati_req_instances
|
2009-01-23 22:52:35 +00:00
|
|
|
);
|
|
|
|
#endif
|
2009-01-10 00:43:33 +00:00
|
|
|
if (g_wreq->rsc_spec_request) {
|
2009-01-23 22:52:35 +00:00
|
|
|
if (g_wreq->need_cpu()) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (g_wreq->need_cuda()) {
|
|
|
|
return true;
|
|
|
|
}
|
2009-08-17 17:07:38 +00:00
|
|
|
if (g_wreq->need_ati()) {
|
|
|
|
return true;
|
|
|
|
}
|
2009-01-10 00:43:33 +00:00
|
|
|
} else {
|
2009-01-23 22:52:35 +00:00
|
|
|
if (g_wreq->seconds_to_fill > 0) {
|
|
|
|
return true;
|
|
|
|
}
|
2009-01-10 00:43:33 +00:00
|
|
|
}
|
2009-03-07 01:00:05 +00:00
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL, "[send] don't need more work\n");
|
|
|
|
}
|
2009-01-10 00:43:33 +00:00
|
|
|
return false;
|
2004-09-10 00:41:48 +00:00
|
|
|
}
|
|
|
|
|
2009-03-03 16:38:54 +00:00
|
|
|
int add_result_to_reply(
|
|
|
|
DB_RESULT& result, WORKUNIT& wu, BEST_APP_VERSION* bavp,
|
|
|
|
bool locality_scheduling
|
|
|
|
) {
|
2004-09-10 00:41:48 +00:00
|
|
|
int retval;
|
2005-09-12 23:49:16 +00:00
|
|
|
bool resent_result = false;
|
2008-03-27 18:25:29 +00:00
|
|
|
APP* app = ssp->lookup_app(wu.appid);
|
2004-09-10 00:41:48 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
retval = add_wu_to_reply(wu, *g_reply, app, bavp);
|
2004-09-10 00:41:48 +00:00
|
|
|
if (retval) return retval;
|
|
|
|
|
2005-08-04 00:12:50 +00:00
|
|
|
// in the scheduling locality case,
|
|
|
|
// reduce the available space by LESS than the workunit rsc_disk_bound,
|
|
|
|
// IF the host already has the file OR the file was not already sent.
|
2005-02-02 22:58:46 +00:00
|
|
|
//
|
2009-03-03 16:38:54 +00:00
|
|
|
if (!locality_scheduling || decrement_disk_space_locality(wu)) {
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->disk_available -= wu.rsc_disk_bound;
|
2005-02-02 22:58:46 +00:00
|
|
|
}
|
2004-09-10 00:41:48 +00:00
|
|
|
|
|
|
|
// update the result in DB
|
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
result.hostid = g_reply->host.id;
|
|
|
|
result.userid = g_reply->user.id;
|
2005-07-29 08:13:23 +00:00
|
|
|
result.sent_time = time(0);
|
2006-03-17 04:47:51 +00:00
|
|
|
int old_server_state = result.server_state;
|
2005-08-02 19:38:43 +00:00
|
|
|
|
2006-10-22 00:42:44 +00:00
|
|
|
int delay_bound = wu.delay_bound;
|
2005-07-28 22:21:46 +00:00
|
|
|
if (result.server_state != RESULT_SERVER_STATE_IN_PROGRESS) {
|
2007-11-30 23:02:55 +00:00
|
|
|
// We are sending this result for the first time
|
|
|
|
//
|
2007-09-21 18:10:54 +00:00
|
|
|
// If the workunit needs reliable and is being sent to a reliable host,
|
|
|
|
// then shorten the delay bound by the percent specified
|
|
|
|
//
|
2008-03-07 21:13:01 +00:00
|
|
|
if (config.reliable_on_priority && result.priority >= config.reliable_on_priority && config.reliable_reduced_delay_bound > 0.01
|
|
|
|
) {
|
2009-05-06 21:52:50 +00:00
|
|
|
double reduced_delay_bound = delay_bound*config.reliable_reduced_delay_bound;
|
|
|
|
double est_wallclock_duration = estimate_duration(wu, *bavp);
|
2008-03-13 23:35:13 +00:00
|
|
|
// Check to see how reasonable this reduced time is.
|
2008-03-07 21:13:01 +00:00
|
|
|
// Increase it to twice the estimated delay bound
|
|
|
|
// if all the following apply:
|
2008-03-13 23:35:13 +00:00
|
|
|
//
|
2009-05-06 21:52:50 +00:00
|
|
|
// 1) Twice the estimate is longer then the reduced delay bound
|
|
|
|
// 2) Twice the estimate is less then the original delay bound
|
|
|
|
// 3) Twice the estimate is less then the twice the reduced delay bound
|
|
|
|
if (est_wallclock_duration*2 > reduced_delay_bound
|
2008-12-15 21:14:32 +00:00
|
|
|
&& est_wallclock_duration*2 < delay_bound
|
|
|
|
&& est_wallclock_duration*2 < delay_bound*config.reliable_reduced_delay_bound*2
|
|
|
|
) {
|
2009-05-06 21:52:50 +00:00
|
|
|
reduced_delay_bound = est_wallclock_duration*2;
|
2007-09-21 18:10:54 +00:00
|
|
|
}
|
2009-05-06 21:52:50 +00:00
|
|
|
delay_bound = (int) reduced_delay_bound;
|
2007-09-21 18:10:54 +00:00
|
|
|
}
|
|
|
|
|
2006-10-22 00:42:44 +00:00
|
|
|
result.report_deadline = result.sent_time + delay_bound;
|
2005-07-28 22:21:46 +00:00
|
|
|
result.server_state = RESULT_SERVER_STATE_IN_PROGRESS;
|
2005-08-04 00:12:50 +00:00
|
|
|
} else {
|
2007-11-30 23:02:55 +00:00
|
|
|
// Result was already sent to this host but was lost,
|
|
|
|
// so we are resending it.
|
2005-08-02 19:38:43 +00:00
|
|
|
//
|
2005-09-12 23:49:16 +00:00
|
|
|
resent_result = true;
|
2009-06-01 22:15:14 +00:00
|
|
|
|
2007-11-30 23:02:55 +00:00
|
|
|
// TODO: explain the following
|
|
|
|
//
|
2005-08-02 19:38:43 +00:00
|
|
|
if (result.report_deadline < result.sent_time) {
|
2005-08-04 00:12:50 +00:00
|
|
|
result.report_deadline = result.sent_time + 10;
|
|
|
|
}
|
2006-10-22 00:42:44 +00:00
|
|
|
if (result.report_deadline > result.sent_time + delay_bound) {
|
|
|
|
result.report_deadline = result.sent_time + delay_bound;
|
2005-08-04 00:12:50 +00:00
|
|
|
}
|
2005-08-02 19:38:43 +00:00
|
|
|
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [RESULT#%d] [HOST#%d] (resend lost work)\n",
|
2008-12-16 16:29:54 +00:00
|
|
|
result.id, g_reply->host.id
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2005-07-28 22:21:46 +00:00
|
|
|
}
|
2006-03-17 04:47:51 +00:00
|
|
|
retval = result.mark_as_sent(old_server_state);
|
2008-10-27 21:23:07 +00:00
|
|
|
if (retval == ERR_DB_NOT_FOUND) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2006-03-17 14:31:25 +00:00
|
|
|
"[RESULT#%d] [HOST#%d]: CAN'T SEND, already sent to another host\n",
|
2008-12-16 16:29:54 +00:00
|
|
|
result.id, g_reply->host.id
|
2006-03-17 14:31:25 +00:00
|
|
|
);
|
|
|
|
} else if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2005-05-09 20:20:33 +00:00
|
|
|
"add_result_to_reply: can't update result: %d\n", retval
|
|
|
|
);
|
|
|
|
}
|
2006-03-17 14:31:25 +00:00
|
|
|
if (retval) return retval;
|
2004-09-10 00:41:48 +00:00
|
|
|
|
2009-01-10 00:43:33 +00:00
|
|
|
double est_dur = estimate_duration(wu, *bavp);
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-01-10 00:43:33 +00:00
|
|
|
"[HOST#%d] Sending [RESULT#%d %s] (est. dur. %.2f seconds)\n",
|
|
|
|
g_reply->host.id, result.id, result.name, est_dur
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2004-09-10 00:41:48 +00:00
|
|
|
|
2005-07-14 10:02:20 +00:00
|
|
|
retval = update_wu_transition_time(wu, result.report_deadline);
|
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2005-07-14 10:02:20 +00:00
|
|
|
"add_result_to_reply: can't update WU transition time: %d\n",
|
|
|
|
retval
|
|
|
|
);
|
|
|
|
return retval;
|
2004-09-10 00:41:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// The following overwrites the result's xml_doc field.
|
|
|
|
// But that's OK cuz we're done with DB updates
|
|
|
|
//
|
|
|
|
retval = insert_name_tags(result, wu);
|
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2007-02-15 21:11:05 +00:00
|
|
|
"add_result_to_reply: can't insert name tags: %d\n",
|
2005-05-09 20:20:33 +00:00
|
|
|
retval
|
2004-09-10 00:41:48 +00:00
|
|
|
);
|
2005-05-09 20:20:33 +00:00
|
|
|
return retval;
|
2004-09-10 00:41:48 +00:00
|
|
|
}
|
|
|
|
retval = insert_deadline_tag(result);
|
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2007-02-15 21:11:05 +00:00
|
|
|
"add_result_to_reply: can't insert deadline tag: %d\n", retval
|
2004-09-10 00:41:48 +00:00
|
|
|
);
|
2005-05-09 20:20:33 +00:00
|
|
|
return retval;
|
2004-09-10 00:41:48 +00:00
|
|
|
}
|
2008-03-28 19:35:00 +00:00
|
|
|
result.bavp = bavp;
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->insert_result(result);
|
2009-01-10 00:43:33 +00:00
|
|
|
if (g_wreq->rsc_spec_request) {
|
2009-03-05 00:10:16 +00:00
|
|
|
if (bavp->host_usage.ncudas) {
|
2009-01-10 00:43:33 +00:00
|
|
|
g_wreq->cuda_req_secs -= est_dur;
|
2009-03-05 00:10:16 +00:00
|
|
|
g_wreq->cuda_req_instances -= bavp->host_usage.ncudas;
|
2009-08-17 17:07:38 +00:00
|
|
|
} else if (bavp->host_usage.natis) {
|
|
|
|
g_wreq->ati_req_secs -= est_dur;
|
|
|
|
g_wreq->ati_req_instances -= bavp->host_usage.natis;
|
2009-01-10 00:43:33 +00:00
|
|
|
} else {
|
|
|
|
g_wreq->cpu_req_secs -= est_dur;
|
|
|
|
g_wreq->cpu_req_instances -= bavp->host_usage.avg_ncpus;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
g_wreq->seconds_to_fill -= est_dur;
|
|
|
|
}
|
2009-01-30 21:25:24 +00:00
|
|
|
update_estimated_delay(*bavp, est_dur);
|
2009-06-01 22:15:14 +00:00
|
|
|
g_wreq->njobs_sent++;
|
|
|
|
g_wreq->njobs_on_host++;
|
2009-06-02 05:12:06 +00:00
|
|
|
if (bavp->host_usage.ncudas > 0) {
|
2009-06-01 22:15:14 +00:00
|
|
|
g_wreq->njobs_on_host_gpu++;
|
2009-08-17 17:07:38 +00:00
|
|
|
} else if (bavp->host_usage.natis > 0) {
|
|
|
|
g_wreq->njobs_on_host_gpu++;
|
2009-06-01 22:15:14 +00:00
|
|
|
} else {
|
|
|
|
g_wreq->njobs_on_host_cpu++;
|
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
if (!resent_result) g_reply->host.nresults_today++;
|
- scheduler: add <workload_sim> config option.
If set, the scheduler will use EDF simulation,
together with the in-progress workload reported by the client,
to avoid sending results that
1) will miss their deadline, or
2) will cause an in-progress result to miss its deadline, or
3) will make an in-progress result miss its deadline
by more than is already predicted.
If this option is not set, or if the client request doesn't
include a workload description (i.e. the client is old)
use the existing approach, which assumes there's no workload.
NOTE: this is experimental. Production projects should not use it.
- EDF sim: write debug stuff to stderr instead of stdout
- Account manager:
- if an account is detach_when_done, set dont_request_more_work
- check done_request_more_work even for first-time projects
- update_uotd: generate a file for use by Google gadget
- user_links(): use full URLs (so can use in Google gadget)
client/
acct_mgr.C
work_fetch.C
html/
inc/
uotd.inc
util.inc
user/
uotd_gadget.php (new)
sched/
Makefile.am
edf_sim.C
sched_config.C,h
sched_resend.C
sched_send.C,h
server_types.C,h
svn path=/trunk/boinc/; revision=12639
2007-05-10 21:50:52 +00:00
|
|
|
|
|
|
|
// add this result to workload for simulation
|
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
if (config.workload_sim && g_request->have_other_results_list) {
|
2008-12-15 21:14:32 +00:00
|
|
|
IP_RESULT ipr ("", time(0)+wu.delay_bound, est_dur);
|
2008-12-16 16:29:54 +00:00
|
|
|
g_request->ip_results.push_back(ipr);
|
- scheduler: add <workload_sim> config option.
If set, the scheduler will use EDF simulation,
together with the in-progress workload reported by the client,
to avoid sending results that
1) will miss their deadline, or
2) will cause an in-progress result to miss its deadline, or
3) will make an in-progress result miss its deadline
by more than is already predicted.
If this option is not set, or if the client request doesn't
include a workload description (i.e. the client is old)
use the existing approach, which assumes there's no workload.
NOTE: this is experimental. Production projects should not use it.
- EDF sim: write debug stuff to stderr instead of stdout
- Account manager:
- if an account is detach_when_done, set dont_request_more_work
- check done_request_more_work even for first-time projects
- update_uotd: generate a file for use by Google gadget
- user_links(): use full URLs (so can use in Google gadget)
client/
acct_mgr.C
work_fetch.C
html/
inc/
uotd.inc
util.inc
user/
uotd_gadget.php (new)
sched/
Makefile.am
edf_sim.C
sched_config.C,h
sched_resend.C
sched_send.C,h
server_types.C,h
svn path=/trunk/boinc/; revision=12639
2007-05-10 21:50:52 +00:00
|
|
|
}
|
2007-07-05 04:18:48 +00:00
|
|
|
|
2008-06-04 23:04:12 +00:00
|
|
|
// mark job as done if debugging flag is set;
|
|
|
|
// this is used by sched_driver.C (performance testing)
|
2007-07-05 04:18:48 +00:00
|
|
|
//
|
|
|
|
if (mark_jobs_done) {
|
|
|
|
DB_WORKUNIT dbwu;
|
|
|
|
char buf[256];
|
|
|
|
sprintf(buf,
|
|
|
|
"server_state=%d outcome=%d",
|
|
|
|
RESULT_SERVER_STATE_OVER, RESULT_OUTCOME_SUCCESS
|
|
|
|
);
|
|
|
|
result.update_field(buf);
|
|
|
|
|
|
|
|
dbwu.id = wu.id;
|
- server code: at some point I made a global var "SCHED_CONFIG config",
mostly so that the parse function could assume
that everything was initially zero.
However, various back-end functions pass around SCHED_CONFIG&
as an argument (also named "config").
This creates a shadow, which is always bad.
Worse is the possibility that some projects have back-end programs
that have a SCHED_CONFIG variable that's automatic,
and therefore isn't zero initially,
and therefore isn't parsing correctly.
To fix this, I changed the 2 vectors in SCHED_CONFIG into pointers,
and have the parse routine zero the structure.
I was tempted to remove the SCHED_CONFIG& args to back-end functions,
but this would have broken some projects' code.
I did, however, change the name from config to config_loc
to avoid shadowing.
Also fixed various other compiler warnings.
svn path=/trunk/boinc/; revision=15541
2008-07-02 17:24:53 +00:00
|
|
|
sprintf(buf, "transition_time=%ld", time(0));
|
2007-07-05 04:18:48 +00:00
|
|
|
dbwu.update_field(buf);
|
|
|
|
|
|
|
|
}
|
2008-06-04 23:04:12 +00:00
|
|
|
|
|
|
|
// If we're sending an unreplicated job to an untrusted host,
|
|
|
|
// mark it as replicated
|
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
if (wu.target_nresults == 1 && app->target_nresults > 1) {
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_wreq->trust) {
|
2008-08-06 18:36:30 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [WU#%d] sending to trusted host, not replicating\n", wu.id
|
2008-08-06 18:36:30 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
DB_WORKUNIT dbwu;
|
|
|
|
char buf[256];
|
2008-11-07 04:13:08 +00:00
|
|
|
sprintf(buf,
|
|
|
|
"target_nresults=%d, min_quorum=%d, transition_time=%ld",
|
2008-08-06 18:36:30 +00:00
|
|
|
app->target_nresults, app->target_nresults, time(0)
|
2008-06-04 23:04:12 +00:00
|
|
|
);
|
2008-08-06 18:36:30 +00:00
|
|
|
dbwu.id = wu.id;
|
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [WU#%d] sending to untrusted host, replicating\n", wu.id
|
2008-08-06 18:36:30 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
retval = dbwu.update_field(buf);
|
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"WU update failed: %d", retval
|
|
|
|
);
|
|
|
|
}
|
2008-06-04 23:04:12 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-09-10 00:41:48 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-04-26 23:34:38 +00:00
|
|
|
// send messages to user about why jobs were or weren't sent
|
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
static void explain_to_user() {
|
2007-08-16 17:33:41 +00:00
|
|
|
char helpful[512];
|
2008-04-30 20:31:33 +00:00
|
|
|
unsigned int i;
|
- server code: at some point I made a global var "SCHED_CONFIG config",
mostly so that the parse function could assume
that everything was initially zero.
However, various back-end functions pass around SCHED_CONFIG&
as an argument (also named "config").
This creates a shadow, which is always bad.
Worse is the possibility that some projects have back-end programs
that have a SCHED_CONFIG variable that's automatic,
and therefore isn't zero initially,
and therefore isn't parsing correctly.
To fix this, I changed the 2 vectors in SCHED_CONFIG into pointers,
and have the parse routine zero the structure.
I was tempted to remove the SCHED_CONFIG& args to back-end functions,
but this would have broken some projects' code.
I did, however, change the name from config to config_loc
to avoid shadowing.
Also fixed various other compiler warnings.
svn path=/trunk/boinc/; revision=15541
2008-07-02 17:24:53 +00:00
|
|
|
int j;
|
2007-09-21 18:10:54 +00:00
|
|
|
|
2008-08-14 22:06:51 +00:00
|
|
|
// If work was sent from apps the user did not select, explain.
|
|
|
|
// NOTE: this will have to be done differently with matchmaker scheduling
|
2008-03-07 21:13:01 +00:00
|
|
|
//
|
2008-08-14 22:06:51 +00:00
|
|
|
if (!config.locality_scheduling && !config.matchmaker) {
|
2009-06-01 22:15:14 +00:00
|
|
|
if (g_wreq->njobs_sent && !g_wreq->user_apps_only) {
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(
|
2009-08-21 19:14:15 +00:00
|
|
|
"No work can be sent for the applications you have selected",
|
|
|
|
"high"
|
2008-08-14 22:06:51 +00:00
|
|
|
);
|
2008-03-07 21:13:01 +00:00
|
|
|
|
2008-08-14 22:06:51 +00:00
|
|
|
// Inform the user about applications with no work
|
|
|
|
//
|
2008-12-18 21:25:51 +00:00
|
|
|
for (i=0; i<g_wreq->preferred_apps.size(); i++) {
|
|
|
|
if (!g_wreq->preferred_apps[i].work_available) {
|
|
|
|
APP* app = ssp->lookup_app(g_wreq->preferred_apps[i].appid);
|
2008-08-14 22:06:51 +00:00
|
|
|
// don't write message if the app is deprecated
|
|
|
|
//
|
|
|
|
if (app) {
|
|
|
|
char explanation[256];
|
|
|
|
sprintf(explanation,
|
|
|
|
"No work is available for %s",
|
2008-12-18 21:25:51 +00:00
|
|
|
find_user_friendly_name(g_wreq->preferred_apps[i].appid)
|
2008-08-14 22:06:51 +00:00
|
|
|
);
|
2009-08-21 19:14:15 +00:00
|
|
|
g_reply->insert_message( explanation, "high");
|
2008-08-14 22:06:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2008-03-07 21:13:01 +00:00
|
|
|
|
2008-08-14 22:06:51 +00:00
|
|
|
// Tell the user about applications they didn't qualify for
|
|
|
|
//
|
|
|
|
for (j=0; j<preferred_app_message_index; j++){
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->insert_message(g_wreq->no_work_messages.at(j));
|
2008-08-14 22:06:51 +00:00
|
|
|
}
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(
|
2009-08-21 19:14:15 +00:00
|
|
|
"You have selected to receive work from other applications if no work is available for the applications you selected",
|
|
|
|
"high"
|
2009-02-26 03:03:35 +00:00
|
|
|
);
|
|
|
|
g_reply->insert_message(
|
2009-08-21 19:14:15 +00:00
|
|
|
"Sending work from other applications", "high"
|
2008-08-14 22:06:51 +00:00
|
|
|
);
|
2008-03-07 21:13:01 +00:00
|
|
|
}
|
2008-08-14 22:06:51 +00:00
|
|
|
}
|
2008-03-07 21:13:01 +00:00
|
|
|
|
2007-08-16 17:33:41 +00:00
|
|
|
// if client asked for work and we're not sending any, explain why
|
|
|
|
//
|
2009-06-01 22:15:14 +00:00
|
|
|
if (g_wreq->njobs_sent == 0) {
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->set_delay(DELAY_NO_WORK_TEMP);
|
2009-08-21 19:14:15 +00:00
|
|
|
g_reply->insert_message("No work sent", "high");
|
2009-01-20 21:31:13 +00:00
|
|
|
|
|
|
|
// Tell the user about applications with no work
|
|
|
|
//
|
2008-12-18 21:25:51 +00:00
|
|
|
for (i=0; i<g_wreq->preferred_apps.size(); i++) {
|
2009-05-06 21:52:50 +00:00
|
|
|
if (!g_wreq->preferred_apps[i].work_available) {
|
|
|
|
APP* app = ssp->lookup_app(g_wreq->preferred_apps[i].appid);
|
|
|
|
// don't write message if the app is deprecated
|
|
|
|
if (app != NULL) {
|
|
|
|
char explanation[256];
|
|
|
|
sprintf(explanation, "No work is available for %s",
|
2008-12-18 21:25:51 +00:00
|
|
|
find_user_friendly_name(g_wreq->preferred_apps[i].appid)
|
2008-10-27 21:23:07 +00:00
|
|
|
);
|
2009-08-21 19:14:15 +00:00
|
|
|
g_reply->insert_message(explanation, "high");
|
2009-05-06 21:52:50 +00:00
|
|
|
}
|
|
|
|
}
|
2008-03-07 21:13:01 +00:00
|
|
|
}
|
2009-01-20 21:31:13 +00:00
|
|
|
|
|
|
|
// Tell the user about applications they didn't qualify for
|
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
for (i=0; i<g_wreq->no_work_messages.size(); i++){
|
2009-05-06 21:52:50 +00:00
|
|
|
g_reply->insert_message(g_wreq->no_work_messages.at(i));
|
2008-03-07 21:13:01 +00:00
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_wreq->no_allowed_apps_available) {
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(
|
2009-08-21 19:14:15 +00:00
|
|
|
"No work available for the applications you have selected. Please check your settings on the web site.",
|
|
|
|
"high"
|
2006-05-02 22:17:09 +00:00
|
|
|
);
|
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_wreq->speed.insufficient) {
|
|
|
|
if (g_request->core_client_version>419) {
|
2005-03-19 18:24:24 +00:00
|
|
|
sprintf(helpful,
|
|
|
|
"(won't finish in time) "
|
2008-05-20 21:47:02 +00:00
|
|
|
"BOINC runs %.1f%% of time, computation enabled %.1f%% of that",
|
2008-12-16 16:29:54 +00:00
|
|
|
100.0*g_reply->host.on_frac, 100.0*g_reply->host.active_frac
|
2005-03-19 18:24:24 +00:00
|
|
|
);
|
2007-08-16 17:33:41 +00:00
|
|
|
} else {
|
2005-03-19 18:24:24 +00:00
|
|
|
sprintf(helpful,
|
|
|
|
"(won't finish in time) "
|
2007-08-21 20:07:50 +00:00
|
|
|
"Computer available %.1f%% of time",
|
2008-12-16 16:29:54 +00:00
|
|
|
100.0*g_reply->host.on_frac
|
2005-03-19 18:24:24 +00:00
|
|
|
);
|
|
|
|
}
|
2009-08-21 19:14:15 +00:00
|
|
|
g_reply->insert_message(helpful, "high");
|
2004-04-04 01:59:47 +00:00
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_wreq->hr_reject_temp) {
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(
|
2009-08-21 19:14:15 +00:00
|
|
|
"(there was work but it was committed to other platforms)",
|
|
|
|
"high"
|
2004-05-18 18:33:01 +00:00
|
|
|
);
|
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_wreq->hr_reject_perm) {
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(
|
2009-08-21 19:14:15 +00:00
|
|
|
"(your platform is not supported by this project)",
|
|
|
|
"high"
|
2005-11-30 22:52:23 +00:00
|
|
|
);
|
|
|
|
}
|
2009-01-20 21:31:13 +00:00
|
|
|
if (g_wreq->outdated_client) {
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(
|
2009-08-21 19:14:15 +00:00
|
|
|
" (your BOINC client is old - please install current version)",
|
|
|
|
"high"
|
2004-04-04 01:59:47 +00:00
|
|
|
);
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->set_delay(DELAY_NO_WORK_PERM);
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2007-09-21 18:10:54 +00:00
|
|
|
"Not sending work because client is outdated\n"
|
2004-04-04 01:59:47 +00:00
|
|
|
);
|
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_wreq->excessive_work_buf) {
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(
|
2009-08-21 19:14:15 +00:00
|
|
|
"(Your network connection interval is longer than WU deadline)",
|
|
|
|
"high"
|
2005-04-18 18:42:29 +00:00
|
|
|
);
|
|
|
|
}
|
2009-01-12 23:47:52 +00:00
|
|
|
if (g_wreq->no_gpus_prefs) {
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(
|
2009-08-21 19:14:15 +00:00
|
|
|
"GPU jobs are available, but your preferences are set to not accept them",
|
|
|
|
"low"
|
2009-01-12 23:47:52 +00:00
|
|
|
);
|
|
|
|
}
|
2009-03-18 21:14:44 +00:00
|
|
|
if (g_wreq->no_cpu_prefs) {
|
|
|
|
g_reply->insert_message(
|
2009-08-21 19:14:15 +00:00
|
|
|
"CPU jobs are available, but your preferences are set to not accept them",
|
|
|
|
"low"
|
2009-03-18 21:14:44 +00:00
|
|
|
);
|
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_wreq->daily_result_quota_exceeded) {
|
2005-05-12 21:04:39 +00:00
|
|
|
struct tm *rpc_time_tm;
|
|
|
|
int delay_time;
|
|
|
|
|
2009-06-01 22:15:14 +00:00
|
|
|
sprintf(helpful, "(reached daily quota of %d tasks)",
|
|
|
|
g_wreq->max_jobs_per_day
|
2009-01-20 00:54:16 +00:00
|
|
|
);
|
2009-08-21 19:14:15 +00:00
|
|
|
g_reply->insert_message(helpful, "high");
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-06-01 22:15:14 +00:00
|
|
|
"Daily result quota %d exceeded for host %d\n",
|
|
|
|
g_wreq->max_jobs_per_day, g_reply->host.id
|
2004-05-27 18:13:00 +00:00
|
|
|
);
|
2005-05-12 21:04:39 +00:00
|
|
|
|
|
|
|
// set delay so host won't return until a random time in
|
2008-06-30 23:05:16 +00:00
|
|
|
// the first hour of the next day.
|
|
|
|
// This is to prevent a lot of hosts from flooding the scheduler
|
|
|
|
// with requests at the same time of day.
|
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
time_t t = g_reply->host.rpc_time;
|
2008-06-30 23:05:16 +00:00
|
|
|
rpc_time_tm = localtime(&t);
|
|
|
|
delay_time = (23 - rpc_time_tm->tm_hour) * 3600
|
|
|
|
+ (59 - rpc_time_tm->tm_min) * 60
|
|
|
|
+ (60 - rpc_time_tm->tm_sec)
|
|
|
|
+ (int)(3600*(double)rand()/(double)RAND_MAX);
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->set_delay(delay_time);
|
2004-05-27 18:13:00 +00:00
|
|
|
}
|
2009-06-01 22:15:14 +00:00
|
|
|
if (g_wreq->max_jobs_on_host_exceeded) {
|
|
|
|
sprintf(helpful, "(reached limit of %d tasks)",
|
|
|
|
g_wreq->max_jobs_on_host
|
2007-05-14 15:21:38 +00:00
|
|
|
);
|
2009-08-21 19:14:15 +00:00
|
|
|
g_reply->insert_message(helpful, "high");
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->set_delay(DELAY_NO_WORK_CACHE);
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-06-01 22:15:14 +00:00
|
|
|
"host %d already has %d job(s) in progress\n",
|
|
|
|
g_reply->host.id, g_wreq->njobs_on_host
|
2007-05-14 15:21:38 +00:00
|
|
|
);
|
2009-06-01 22:15:14 +00:00
|
|
|
}
|
|
|
|
if (g_wreq->max_jobs_on_host_cpu_exceeded) {
|
|
|
|
sprintf(helpful, "(reached limit of %d CPU tasks)",
|
|
|
|
g_wreq->max_jobs_on_host_cpu
|
|
|
|
);
|
2009-08-21 19:14:15 +00:00
|
|
|
g_reply->insert_message(helpful, "high");
|
2009-06-01 22:15:14 +00:00
|
|
|
g_reply->set_delay(DELAY_NO_WORK_CACHE);
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-06-01 22:15:14 +00:00
|
|
|
"host %d already has %d CPU job(s) in progress\n",
|
|
|
|
g_reply->host.id, g_wreq->njobs_on_host_cpu
|
2008-05-23 16:13:30 +00:00
|
|
|
);
|
|
|
|
}
|
2009-06-01 22:15:14 +00:00
|
|
|
if (g_wreq->max_jobs_on_host_gpu_exceeded) {
|
|
|
|
sprintf(helpful, "(reached limit of %d GPU tasks)",
|
|
|
|
g_wreq->max_jobs_on_host_gpu
|
|
|
|
);
|
2009-08-21 19:14:15 +00:00
|
|
|
g_reply->insert_message(helpful, "high");
|
2009-06-01 22:15:14 +00:00
|
|
|
g_reply->set_delay(DELAY_NO_WORK_CACHE);
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-06-01 22:15:14 +00:00
|
|
|
"host %d already has %d GPU job(s) in progress\n",
|
|
|
|
g_reply->host.id, g_wreq->njobs_on_host_gpu
|
2008-05-23 16:13:30 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-06-04 23:04:12 +00:00
|
|
|
#define ER_MAX 0.05
|
|
|
|
// decide whether to unreplicated jobs to this host
|
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
void set_trust() {
|
|
|
|
g_wreq->trust = false;
|
|
|
|
if (g_reply->host.error_rate > ER_MAX) {
|
2008-09-17 23:35:16 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] set_trust: error rate %f > %f, don't trust\n",
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->host.error_rate, ER_MAX
|
2008-09-17 23:35:16 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
double x = sqrt(g_reply->host.error_rate/ER_MAX);
|
|
|
|
if (drand() > x) g_wreq->trust = true;
|
2008-09-17 23:35:16 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] set_trust: random choice for error rate %f: %s\n",
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->host.error_rate, g_wreq->trust?"yes":"no"
|
2008-09-17 23:35:16 +00:00
|
|
|
);
|
|
|
|
}
|
2008-06-04 23:04:12 +00:00
|
|
|
}
|
|
|
|
|
2009-01-10 00:43:33 +00:00
|
|
|
static double clamp_req_sec(double x) {
|
|
|
|
if (x < MIN_REQ_SECS) return MIN_REQ_SECS;
|
|
|
|
if (x > MAX_REQ_SECS) return MAX_REQ_SECS;
|
|
|
|
return x;
|
|
|
|
}
|
|
|
|
|
2009-03-05 23:08:53 +00:00
|
|
|
// decipher request type, fill in WORK_REQ
|
|
|
|
//
|
|
|
|
void send_work_setup() {
|
|
|
|
g_wreq->disk_available = max_allowable_disk();
|
|
|
|
get_mem_sizes();
|
|
|
|
get_running_frac();
|
|
|
|
get_dcf();
|
2009-06-22 21:11:19 +00:00
|
|
|
g_wreq->get_job_limits();
|
2009-03-05 23:08:53 +00:00
|
|
|
|
2009-01-10 00:43:33 +00:00
|
|
|
g_wreq->seconds_to_fill = clamp_req_sec(g_request->work_req_seconds);
|
|
|
|
g_wreq->cpu_req_secs = clamp_req_sec(g_request->cpu_req_secs);
|
|
|
|
g_wreq->cpu_req_instances = g_request->cpu_req_instances;
|
2009-01-24 21:51:19 +00:00
|
|
|
g_wreq->anonymous_platform = anonymous(g_request->platforms.list[0]);
|
|
|
|
|
2009-01-30 22:30:35 +00:00
|
|
|
if (g_request->coproc_cuda) {
|
2009-03-05 23:08:53 +00:00
|
|
|
g_wreq->cuda_req_secs = clamp_req_sec(g_request->coproc_cuda->req_secs);
|
|
|
|
g_wreq->cuda_req_instances = g_request->coproc_cuda->req_instances;
|
2009-01-30 22:30:35 +00:00
|
|
|
if (g_request->coproc_cuda->estimated_delay < 0) {
|
|
|
|
g_request->coproc_cuda->estimated_delay = g_request->cpu_estimated_delay;
|
2009-01-24 21:51:19 +00:00
|
|
|
}
|
2009-01-10 00:43:33 +00:00
|
|
|
}
|
2009-08-17 17:07:38 +00:00
|
|
|
if (g_request->coproc_ati) {
|
|
|
|
g_wreq->ati_req_secs = clamp_req_sec(g_request->coproc_ati->req_secs);
|
|
|
|
g_wreq->ati_req_instances = g_request->coproc_ati->req_instances;
|
|
|
|
if (g_request->coproc_ati->estimated_delay < 0) {
|
|
|
|
g_request->coproc_ati->estimated_delay = g_request->cpu_estimated_delay;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (g_wreq->cpu_req_secs || g_wreq->cuda_req_secs || g_wreq->ati_req_secs) {
|
2009-01-10 00:43:33 +00:00
|
|
|
g_wreq->rsc_spec_request = true;
|
|
|
|
} else {
|
2009-01-13 00:56:12 +00:00
|
|
|
g_wreq->rsc_spec_request = false;
|
2009-01-10 00:43:33 +00:00
|
|
|
}
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-01-30 21:25:24 +00:00
|
|
|
"[send] CPU: req %.2f sec, %.2f instances; est delay %.2f\n",
|
|
|
|
g_wreq->cpu_req_secs, g_wreq->cpu_req_instances,
|
|
|
|
g_request->cpu_estimated_delay
|
2009-01-10 00:43:33 +00:00
|
|
|
);
|
2009-01-30 21:25:24 +00:00
|
|
|
if (g_request->coproc_cuda) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] CUDA: req %.2f sec, %.2f instances; est delay %.2f\n",
|
|
|
|
g_wreq->cuda_req_secs, g_wreq->cuda_req_instances,
|
|
|
|
g_request->coproc_cuda->estimated_delay
|
|
|
|
);
|
|
|
|
}
|
2009-08-17 17:07:38 +00:00
|
|
|
if (g_request->coproc_ati) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] ATI: req %.2f sec, %.2f instances; est delay %.2f\n",
|
|
|
|
g_wreq->ati_req_secs, g_wreq->ati_req_instances,
|
|
|
|
g_request->coproc_ati->estimated_delay
|
|
|
|
);
|
|
|
|
}
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] work_req_seconds: %.2f secs\n",
|
2009-01-10 00:43:33 +00:00
|
|
|
g_wreq->seconds_to_fill
|
|
|
|
);
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] %s matchmaker scheduling; %s EDF sim\n",
|
2008-05-23 16:13:30 +00:00
|
|
|
config.matchmaker?"Using":"Not using",
|
2008-05-02 17:48:29 +00:00
|
|
|
config.workload_sim?"Using":"Not using"
|
|
|
|
);
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-01-27 18:57:27 +00:00
|
|
|
"[send] available disk %.2f GB, work_buf_min %d\n",
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->disk_available/GIGA,
|
|
|
|
(int)g_request->global_prefs.work_buf_min()
|
2008-05-02 17:48:29 +00:00
|
|
|
);
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-01-30 21:25:24 +00:00
|
|
|
"[send] active_frac %f on_frac %f DCF %f\n",
|
2008-12-18 18:19:42 +00:00
|
|
|
g_reply->host.active_frac,
|
|
|
|
g_reply->host.on_frac,
|
2009-01-30 21:25:24 +00:00
|
|
|
g_reply->host.duration_correction_factor
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2009-03-05 23:08:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void send_work() {
|
|
|
|
if (!g_wreq->rsc_spec_request && g_wreq->seconds_to_fill == 0) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (all_apps_use_hr && hr_unknown_platform(g_request->host)) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"Not sending work because unknown HR class\n"
|
|
|
|
);
|
|
|
|
g_wreq->hr_reject_perm = true;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
get_host_info();
|
|
|
|
get_prefs_info();
|
|
|
|
|
|
|
|
set_trust();
|
2008-04-26 23:34:38 +00:00
|
|
|
|
|
|
|
if (config.enable_assignment) {
|
2008-12-19 18:14:02 +00:00
|
|
|
if (send_assigned_jobs()) {
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_assignment) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[assign] [HOST#%d] sent assigned jobs\n", g_reply->host.id
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
if (config.workload_sim && g_request->have_other_results_list) {
|
2008-04-26 23:34:38 +00:00
|
|
|
init_ip_results(
|
2009-06-22 21:11:19 +00:00
|
|
|
g_request->global_prefs.work_buf_min(),
|
|
|
|
g_wreq->effective_ncpus, g_request->ip_results
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2009-03-03 00:12:55 +00:00
|
|
|
if (config.locality_scheduler_fraction > 0) {
|
|
|
|
if (drand() < config.locality_scheduler_fraction) {
|
2009-05-06 21:52:50 +00:00
|
|
|
if (config.debug_locality)
|
|
|
|
log_messages.printf(MSG_NORMAL, "[mixed] sending locality work first\n");
|
|
|
|
send_work_locality();
|
|
|
|
if (config.debug_locality)
|
|
|
|
log_messages.printf(MSG_NORMAL, "[mixed] sending non-locality work second\n");
|
|
|
|
send_work_old();
|
2009-03-03 00:12:55 +00:00
|
|
|
} else {
|
2009-05-06 21:52:50 +00:00
|
|
|
if (config.debug_locality)
|
|
|
|
log_messages.printf(MSG_NORMAL, "[mixed] sending non-locality work first\n");
|
|
|
|
send_work_old();
|
|
|
|
if (config.debug_locality)
|
|
|
|
log_messages.printf(MSG_NORMAL, "[mixed] sending locality work second\n");
|
|
|
|
send_work_locality();
|
2009-03-03 00:12:55 +00:00
|
|
|
}
|
|
|
|
} else if (config.locality_scheduling) {
|
2008-12-19 18:14:02 +00:00
|
|
|
send_work_locality();
|
2008-05-23 16:13:30 +00:00
|
|
|
} else if (config.matchmaker) {
|
2008-12-16 16:29:54 +00:00
|
|
|
send_work_matchmaker();
|
2008-05-23 16:13:30 +00:00
|
|
|
} else {
|
2008-12-16 16:29:54 +00:00
|
|
|
send_work_old();
|
2008-04-26 23:34:38 +00:00
|
|
|
}
|
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
explain_to_user();
|
2008-04-26 23:34:38 +00:00
|
|
|
}
|
|
|
|
|
2005-01-02 18:29:53 +00:00
|
|
|
const char *BOINC_RCSID_32dcd335e7 = "$Id$";
|