2008-08-06 18:36:30 +00:00
|
|
|
// This file is part of BOINC.
|
2005-01-20 23:22:22 +00:00
|
|
|
// http://boinc.berkeley.edu
|
2008-08-06 18:36:30 +00:00
|
|
|
// Copyright (C) 2008 University of California
|
2004-07-09 21:17:42 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
|
|
// under the terms of the GNU Lesser General Public License
|
|
|
|
// as published by the Free Software Foundation,
|
|
|
|
// either version 3 of the License, or (at your option) any later version.
|
2004-07-09 21:17:42 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// BOINC is distributed in the hope that it will be useful,
|
2005-01-20 23:22:22 +00:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
// See the GNU Lesser General Public License for more details.
|
2004-04-04 01:59:47 +00:00
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
2004-04-04 01:59:47 +00:00
|
|
|
|
2008-04-26 23:34:38 +00:00
|
|
|
// scheduler code related to sending jobs
|
2005-02-10 06:21:58 +00:00
|
|
|
|
2005-11-21 18:34:44 +00:00
|
|
|
#include "config.h"
|
2004-04-04 01:59:47 +00:00
|
|
|
#include <vector>
|
2008-02-25 18:05:04 +00:00
|
|
|
#include <list>
|
2004-04-04 01:59:47 +00:00
|
|
|
#include <string>
|
2004-06-24 08:49:37 +00:00
|
|
|
#include <ctime>
|
|
|
|
#include <cstdio>
|
2008-02-27 23:26:38 +00:00
|
|
|
#include <cstring>
|
2005-02-02 18:13:00 +00:00
|
|
|
#include <stdlib.h>
|
2009-03-03 00:12:55 +00:00
|
|
|
#include <sys/time.h>
|
2005-02-10 06:21:58 +00:00
|
|
|
#include <unistd.h>
|
|
|
|
|
2004-04-04 01:59:47 +00:00
|
|
|
#include "error_numbers.h"
|
2005-02-10 20:31:11 +00:00
|
|
|
#include "parse.h"
|
2006-07-11 21:56:53 +00:00
|
|
|
#include "util.h"
|
2007-02-21 16:26:51 +00:00
|
|
|
#include "str_util.h"
|
2009-01-13 23:06:02 +00:00
|
|
|
#include "synch.h"
|
2005-02-10 20:31:11 +00:00
|
|
|
|
2004-04-04 01:59:47 +00:00
|
|
|
#include "server_types.h"
|
|
|
|
#include "sched_shmem.h"
|
|
|
|
#include "sched_config.h"
|
|
|
|
#include "sched_util.h"
|
|
|
|
#include "main.h"
|
2005-08-04 03:50:04 +00:00
|
|
|
#include "sched_array.h"
|
2004-04-08 08:15:23 +00:00
|
|
|
#include "sched_msgs.h"
|
2007-05-31 18:14:45 +00:00
|
|
|
#include "sched_hr.h"
|
2007-06-20 22:34:06 +00:00
|
|
|
#include "hr.h"
|
2004-09-10 21:02:11 +00:00
|
|
|
#include "sched_locality.h"
|
2005-02-10 20:31:11 +00:00
|
|
|
#include "sched_timezone.h"
|
2008-02-21 00:47:50 +00:00
|
|
|
#include "sched_assign.h"
|
2008-03-18 21:22:44 +00:00
|
|
|
#include "sched_plan.h"
|
2004-04-04 01:59:47 +00:00
|
|
|
|
2007-05-31 18:14:45 +00:00
|
|
|
#include "sched_send.h"
|
2005-02-10 06:21:58 +00:00
|
|
|
|
2004-07-03 21:38:22 +00:00
|
|
|
#ifdef _USING_FCGI_
|
2008-09-09 19:10:42 +00:00
|
|
|
#include "boinc_fcgi.h"
|
2004-07-03 21:38:22 +00:00
|
|
|
#endif
|
|
|
|
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
// if host sends us an impossible RAM size, use this instead
|
|
|
|
//
|
|
|
|
const double DEFAULT_RAM_SIZE = 64000000;
|
2008-02-27 22:26:37 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
void send_work_matchmaker();
|
2008-02-25 18:05:04 +00:00
|
|
|
|
2008-04-26 23:34:38 +00:00
|
|
|
int preferred_app_message_index=0;
|
|
|
|
|
2007-11-30 23:02:55 +00:00
|
|
|
const char* infeasible_string(int code) {
|
|
|
|
switch (code) {
|
|
|
|
case INFEASIBLE_MEM: return "Not enough memory";
|
|
|
|
case INFEASIBLE_DISK: return "Not enough disk";
|
|
|
|
case INFEASIBLE_CPU: return "CPU too slow";
|
|
|
|
case INFEASIBLE_APP_SETTING: return "App not selected";
|
|
|
|
case INFEASIBLE_WORKLOAD: return "Existing workload";
|
|
|
|
case INFEASIBLE_DUP: return "Already in reply";
|
|
|
|
case INFEASIBLE_HR: return "Homogeneous redundancy";
|
2008-03-07 21:13:01 +00:00
|
|
|
case INFEASIBLE_BANDWIDTH: return "Download bandwidth too low";
|
2007-11-30 23:02:55 +00:00
|
|
|
}
|
|
|
|
return "Unknown";
|
|
|
|
}
|
|
|
|
|
2009-01-10 00:43:33 +00:00
|
|
|
const double MIN_REQ_SECS = 0;
|
|
|
|
const double MAX_REQ_SECS = (28*SECONDS_IN_DAY);
|
2008-02-21 20:10:10 +00:00
|
|
|
|
2009-01-20 00:54:16 +00:00
|
|
|
const int MAX_CUDA_DEVS = 8;
|
|
|
|
// don't believe clients who claim they have more CUDA devices than this
|
|
|
|
|
|
|
|
// the # of CPUs in EDF simulation
|
2008-10-21 23:16:07 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
inline int effective_ncpus() {
|
|
|
|
int ncpus = g_reply->host.p_ncpus;
|
2008-02-21 20:10:10 +00:00
|
|
|
if (ncpus > config.max_ncpus) ncpus = config.max_ncpus;
|
|
|
|
if (ncpus < 1) ncpus = 1;
|
2009-01-20 00:54:16 +00:00
|
|
|
return ncpus;
|
|
|
|
}
|
|
|
|
|
|
|
|
// total_max_results_per is this multiplier times max_results_day
|
|
|
|
//
|
|
|
|
inline int max_results_day_multiplier() {
|
|
|
|
int n = g_reply->host.p_ncpus;
|
|
|
|
if (n > config.max_ncpus) n = config.max_ncpus;
|
|
|
|
if (n < 1) n = 1;
|
|
|
|
if (config.cuda_multiplier) {
|
2008-12-22 00:19:20 +00:00
|
|
|
COPROC* cp = g_request->coprocs.lookup("CUDA");
|
2009-01-20 00:54:16 +00:00
|
|
|
if (cp) {
|
|
|
|
int m = cp->count;
|
|
|
|
if (m > MAX_CUDA_DEVS) m = MAX_CUDA_DEVS;
|
2009-02-25 15:34:51 +00:00
|
|
|
//n += m*config.cuda_multiplier;
|
|
|
|
n = m*config.cuda_multiplier;
|
2008-10-21 23:16:07 +00:00
|
|
|
}
|
|
|
|
}
|
2009-01-20 00:54:16 +00:00
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
|
|
|
// scale factor for max_wus_in_progress
|
|
|
|
//
|
|
|
|
inline int max_wus_in_progress_multiplier() {
|
|
|
|
int n = g_reply->host.p_ncpus;
|
|
|
|
if (n > config.max_ncpus) n = config.max_ncpus;
|
|
|
|
if (n < 1) n = 1;
|
|
|
|
COPROC* cp = g_request->coprocs.lookup("CUDA");
|
|
|
|
if (cp) {
|
|
|
|
int m = cp->count;
|
|
|
|
if (m > MAX_CUDA_DEVS) m = MAX_CUDA_DEVS;
|
|
|
|
if (m > n) {
|
|
|
|
n = m;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return n;
|
2008-02-21 20:10:10 +00:00
|
|
|
}
|
2004-04-04 01:59:47 +00:00
|
|
|
|
2009-03-06 22:21:47 +00:00
|
|
|
inline void dont_need_message(
|
|
|
|
const char* p, APP_VERSION* avp, CLIENT_APP_VERSION* cavp
|
|
|
|
) {
|
|
|
|
if (!config.debug_version_select) return;
|
|
|
|
if (avp) {
|
|
|
|
APP* app = ssp->lookup_app(avp->appid);
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[version] Don't need %s jobs, skipping version %d for %s (%s)\n",
|
|
|
|
p, avp->version_num, app->name, avp->plan_class
|
|
|
|
);
|
|
|
|
} else if (cavp) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[version] Don't need %s jobs, skipping anonymous version %d for %s (%s)\n",
|
|
|
|
p, cavp->version_num, cavp->app_name, cavp->plan_class
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
// for new-style requests, check that the app version uses a
|
|
|
|
// resource for which we need work
|
|
|
|
//
|
2009-03-06 22:21:47 +00:00
|
|
|
bool need_this_resource(
|
|
|
|
HOST_USAGE& host_usage, APP_VERSION* avp, CLIENT_APP_VERSION* cavp
|
|
|
|
) {
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
if (g_wreq->rsc_spec_request) {
|
|
|
|
if (host_usage.ncudas) {
|
|
|
|
if (!g_wreq->need_cuda()) {
|
2009-03-06 22:21:47 +00:00
|
|
|
dont_need_message("CUDA", avp, cavp);
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (!g_wreq->need_cpu()) {
|
2009-03-06 22:21:47 +00:00
|
|
|
dont_need_message("CPU", avp, cavp);
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
return false;;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
2004-04-04 01:59:47 +00:00
|
|
|
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
// scan through client's anonymous apps and pick the best one
|
|
|
|
//
|
|
|
|
CLIENT_APP_VERSION* get_app_version_anonymous(APP& app) {
|
2004-04-04 01:59:47 +00:00
|
|
|
unsigned int i;
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
CLIENT_APP_VERSION* best = NULL;
|
|
|
|
bool found = false;
|
|
|
|
char message[256];
|
2004-04-04 01:59:47 +00:00
|
|
|
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
for (i=0; i<g_request->client_app_versions.size(); i++) {
|
|
|
|
CLIENT_APP_VERSION& cav = g_request->client_app_versions[i];
|
|
|
|
if (strcmp(cav.app_name, app.name)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (cav.version_num < app.min_version) {
|
|
|
|
continue;
|
|
|
|
}
|
2009-03-06 22:21:47 +00:00
|
|
|
found = true;
|
|
|
|
if (!need_this_resource(cav.host_usage, NULL, &cav)) {
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (best) {
|
|
|
|
if (cav.host_usage.flops > best->host_usage.flops) {
|
|
|
|
best = &cav;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
best = &cav;
|
2004-04-04 01:59:47 +00:00
|
|
|
}
|
|
|
|
}
|
2009-03-06 00:20:27 +00:00
|
|
|
if (!best) {
|
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] Didn't find anonymous platform app for %s\n",
|
|
|
|
app.name
|
|
|
|
);
|
|
|
|
}
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
}
|
|
|
|
if (!found) {
|
|
|
|
sprintf(message,
|
|
|
|
"Your app_info.xml file doesn't have a version of %s.",
|
|
|
|
app.user_friendly_name
|
|
|
|
);
|
|
|
|
g_wreq->insert_no_work_message(USER_MESSAGE(message, "high"));
|
|
|
|
}
|
|
|
|
return best;
|
2004-04-04 01:59:47 +00:00
|
|
|
}
|
|
|
|
|
2008-03-27 18:25:29 +00:00
|
|
|
// return BEST_APP_VERSION for the given host, or NULL if none
|
2005-07-28 10:13:30 +00:00
|
|
|
//
|
2008-03-18 21:22:44 +00:00
|
|
|
//
|
2009-03-07 01:00:05 +00:00
|
|
|
BEST_APP_VERSION* get_app_version(WORKUNIT& wu, bool check_req) {
|
2005-07-28 10:13:30 +00:00
|
|
|
bool found;
|
2008-03-18 21:22:44 +00:00
|
|
|
unsigned int i;
|
2009-01-20 21:31:13 +00:00
|
|
|
int retval, j;
|
2008-03-27 18:25:29 +00:00
|
|
|
BEST_APP_VERSION* bavp;
|
2008-08-06 22:08:47 +00:00
|
|
|
char message[256];
|
2008-03-18 21:22:44 +00:00
|
|
|
|
2008-03-27 18:25:29 +00:00
|
|
|
// see if app is already in memoized array
|
|
|
|
//
|
2009-02-26 00:23:23 +00:00
|
|
|
std::vector<BEST_APP_VERSION*>::iterator bavi;
|
2009-01-27 21:18:06 +00:00
|
|
|
bavi = g_wreq->best_app_versions.begin();
|
|
|
|
while (bavi != g_wreq->best_app_versions.end()) {
|
|
|
|
bavp = *bavi;
|
2008-03-27 18:25:29 +00:00
|
|
|
if (bavp->appid == wu.appid) {
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
if (!bavp->present) return NULL;
|
2009-01-10 00:43:33 +00:00
|
|
|
|
|
|
|
// if we previously chose a CUDA app but don't need more CUDA work,
|
2009-01-27 21:18:06 +00:00
|
|
|
// delete record, fall through, and find another version
|
2009-01-10 00:43:33 +00:00
|
|
|
//
|
2009-03-07 01:00:05 +00:00
|
|
|
if (check_req
|
|
|
|
&& g_wreq->rsc_spec_request
|
2009-03-05 00:10:16 +00:00
|
|
|
&& bavp->host_usage.ncudas > 0
|
2009-01-10 00:43:33 +00:00
|
|
|
&& !g_wreq->need_cuda()
|
|
|
|
) {
|
2009-03-07 01:00:05 +00:00
|
|
|
if (config.debug_version_select) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[version] have CUDA version but no more CUDA work needed\n"
|
|
|
|
);
|
|
|
|
}
|
2009-01-27 21:18:06 +00:00
|
|
|
g_wreq->best_app_versions.erase(bavi);
|
|
|
|
break;
|
2009-01-10 00:43:33 +00:00
|
|
|
}
|
2009-03-16 15:53:35 +00:00
|
|
|
|
|
|
|
// same, CPU
|
|
|
|
//
|
|
|
|
if (check_req
|
|
|
|
&& g_wreq->rsc_spec_request
|
|
|
|
&& !bavp->host_usage.ncudas
|
|
|
|
&& !g_wreq->need_cpu()
|
|
|
|
) {
|
|
|
|
if (config.debug_version_select) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[version] have CPU version but no more CPU work needed\n"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
g_wreq->best_app_versions.erase(bavi);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2008-03-27 18:25:29 +00:00
|
|
|
return bavp;
|
|
|
|
}
|
2009-01-27 21:18:06 +00:00
|
|
|
bavi++;
|
2008-03-27 18:25:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
APP* app = ssp->lookup_app(wu.appid);
|
2008-03-18 21:22:44 +00:00
|
|
|
if (!app) {
|
2008-12-16 16:29:54 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"WU refers to nonexistent app: %d\n", wu.appid
|
|
|
|
);
|
2008-03-27 18:25:29 +00:00
|
|
|
return NULL;
|
2008-03-18 21:22:44 +00:00
|
|
|
}
|
|
|
|
|
2008-03-27 18:25:29 +00:00
|
|
|
bavp = new BEST_APP_VERSION;
|
|
|
|
bavp->appid = wu.appid;
|
2009-01-24 21:51:19 +00:00
|
|
|
if (g_wreq->anonymous_platform) {
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
CLIENT_APP_VERSION* cavp = get_app_version_anonymous(*app);
|
|
|
|
if (!cavp) {
|
|
|
|
bavp->present = false;
|
2008-03-27 18:25:29 +00:00
|
|
|
} else {
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
bavp->present = true;
|
2009-03-07 01:00:05 +00:00
|
|
|
if (config.debug_version_select) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-03-07 01:00:05 +00:00
|
|
|
"[version] Found anonymous platform app for %s: plan class %s\n",
|
2009-03-06 00:20:27 +00:00
|
|
|
app->name, cavp->plan_class
|
2008-08-04 18:48:26 +00:00
|
|
|
);
|
|
|
|
}
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
bavp->host_usage = cavp->host_usage;
|
|
|
|
|
|
|
|
// if client didn't tell us about the app version,
|
|
|
|
// assume it uses 1 CPU
|
2008-05-06 04:20:32 +00:00
|
|
|
//
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
if (bavp->host_usage.flops == 0) {
|
|
|
|
bavp->host_usage.flops = g_reply->host.p_fpops;
|
|
|
|
}
|
|
|
|
if (bavp->host_usage.avg_ncpus == 0 && bavp->host_usage.ncudas == 0) {
|
|
|
|
bavp->host_usage.avg_ncpus = 1;
|
|
|
|
}
|
|
|
|
bavp->cavp = cavp;
|
2005-07-28 10:13:30 +00:00
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->best_app_versions.push_back(bavp);
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
if (!bavp->present) return NULL;
|
2008-03-27 18:25:29 +00:00
|
|
|
return bavp;
|
2008-03-18 21:22:44 +00:00
|
|
|
}
|
|
|
|
|
2008-08-04 18:48:26 +00:00
|
|
|
// Go through the client's platforms.
|
2008-03-18 21:22:44 +00:00
|
|
|
// Scan the app versions for each platform.
|
|
|
|
// Find the one with highest expected FLOPS
|
|
|
|
//
|
2008-03-27 18:25:29 +00:00
|
|
|
bavp->host_usage.flops = 0;
|
|
|
|
bavp->avp = NULL;
|
2009-01-20 21:31:13 +00:00
|
|
|
bool no_version_for_platform = true;
|
2009-01-23 19:36:17 +00:00
|
|
|
int app_plan_reject = 0;
|
2008-12-16 16:29:54 +00:00
|
|
|
for (i=0; i<g_request->platforms.list.size(); i++) {
|
|
|
|
PLATFORM* p = g_request->platforms.list[i];
|
2008-03-18 21:22:44 +00:00
|
|
|
for (j=0; j<ssp->napp_versions; j++) {
|
|
|
|
HOST_USAGE host_usage;
|
|
|
|
APP_VERSION& av = ssp->app_versions[j];
|
|
|
|
if (av.appid != wu.appid) continue;
|
|
|
|
if (av.platformid != p->id) continue;
|
2009-01-20 21:31:13 +00:00
|
|
|
no_version_for_platform = false;
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_request->core_client_version < av.min_core_version) {
|
2008-08-19 03:00:17 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2008-08-04 23:30:37 +00:00
|
|
|
"outdated client version %d < min core version %d\n",
|
2008-12-16 16:29:54 +00:00
|
|
|
g_request->core_client_version, av.min_core_version
|
2008-08-04 23:30:37 +00:00
|
|
|
);
|
2009-01-20 21:31:13 +00:00
|
|
|
g_wreq->outdated_client = true;
|
2008-03-18 21:22:44 +00:00
|
|
|
continue;
|
|
|
|
}
|
2008-07-16 04:25:11 +00:00
|
|
|
if (strlen(av.plan_class)) {
|
2009-01-20 21:31:13 +00:00
|
|
|
if (!g_request->client_cap_plan_class) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"client version %d lacks plan class capability\n",
|
|
|
|
g_request->core_client_version
|
|
|
|
);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
retval = app_plan(*g_request, av.plan_class, host_usage);
|
|
|
|
if (retval) {
|
2009-01-23 19:36:17 +00:00
|
|
|
app_plan_reject = retval;
|
2008-03-27 18:25:29 +00:00
|
|
|
continue;
|
2008-03-18 21:22:44 +00:00
|
|
|
}
|
|
|
|
} else {
|
2008-12-16 16:29:54 +00:00
|
|
|
host_usage.sequential_app(g_reply->host.p_fpops);
|
2008-03-18 21:22:44 +00:00
|
|
|
}
|
2009-01-13 00:56:12 +00:00
|
|
|
|
2009-03-18 21:14:44 +00:00
|
|
|
// skip versions for resources we don't need
|
|
|
|
//
|
2009-03-06 22:21:47 +00:00
|
|
|
if (!need_this_resource(host_usage, &av, NULL)) {
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
continue;
|
2009-01-10 00:43:33 +00:00
|
|
|
}
|
2009-03-18 21:14:44 +00:00
|
|
|
|
|
|
|
// skip versions that go against resource prefs
|
|
|
|
//
|
|
|
|
if (host_usage.ncudas && g_wreq->no_gpus) {
|
|
|
|
if (config.debug_version_select) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[version] Skipping CUDA version - user prefs say no GPUS\n"
|
|
|
|
);
|
|
|
|
g_wreq->no_gpus_prefs = true;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!host_usage.ncudas && g_wreq->no_cpu) {
|
|
|
|
if (config.debug_version_select) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[version] Skipping CPU version - user prefs say no CPUs\n"
|
|
|
|
);
|
|
|
|
g_wreq->no_cpu_prefs = true;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// pick the fastest version
|
|
|
|
//
|
2008-03-27 18:25:29 +00:00
|
|
|
if (host_usage.flops > bavp->host_usage.flops) {
|
|
|
|
bavp->host_usage = host_usage;
|
|
|
|
bavp->avp = &av;
|
2008-03-18 21:22:44 +00:00
|
|
|
}
|
2005-07-28 10:13:30 +00:00
|
|
|
}
|
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->best_app_versions.push_back(bavp);
|
2008-03-27 18:25:29 +00:00
|
|
|
if (bavp->avp) {
|
2008-03-18 21:22:44 +00:00
|
|
|
if (config.debug_version_select) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-03-06 22:21:47 +00:00
|
|
|
"[version] Best version of app %s is ID %d (%.2f GFLOPS)\n",
|
2008-05-09 20:54:52 +00:00
|
|
|
app->name, bavp->avp->id, bavp->host_usage.flops/1e9
|
2008-03-18 21:22:44 +00:00
|
|
|
);
|
|
|
|
}
|
2009-03-07 01:00:05 +00:00
|
|
|
bavp->present = true;
|
2008-03-27 18:25:29 +00:00
|
|
|
} else {
|
2009-01-20 21:31:13 +00:00
|
|
|
// Here if there's no app version we can use.
|
|
|
|
// Could be because:
|
|
|
|
// - none exists for platform
|
|
|
|
// - one exists for platform, but host lacks processor type
|
|
|
|
// - one exists for platform, but no work requested for processor type
|
|
|
|
// - one exists but requires newer client
|
|
|
|
// - one exists but plan function rejects this host
|
2008-03-27 18:25:29 +00:00
|
|
|
//
|
|
|
|
if (config.debug_version_select) {
|
2009-01-13 00:56:12 +00:00
|
|
|
for (i=0; i<g_request->platforms.list.size(); i++) {
|
|
|
|
PLATFORM* p = g_request->platforms.list[i];
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[version] no app version available: APP#%d (%s) PLATFORM#%d (%s) min_version %d\n",
|
2009-01-13 00:56:12 +00:00
|
|
|
app->id, app->name, p->id, p->name, app->min_version
|
|
|
|
);
|
|
|
|
}
|
2008-03-27 18:25:29 +00:00
|
|
|
}
|
2009-01-20 21:31:13 +00:00
|
|
|
if (no_version_for_platform) {
|
2009-01-12 23:47:52 +00:00
|
|
|
sprintf(message,
|
|
|
|
"%s is not available for your type of computer.",
|
|
|
|
app->user_friendly_name
|
|
|
|
);
|
2009-02-26 03:03:35 +00:00
|
|
|
g_wreq->insert_no_work_message(USER_MESSAGE(message, "high"));
|
2009-01-12 23:47:52 +00:00
|
|
|
}
|
2009-03-05 22:12:21 +00:00
|
|
|
const char* p = NULL;
|
2009-01-23 19:36:17 +00:00
|
|
|
switch (app_plan_reject) {
|
2009-02-17 03:16:25 +00:00
|
|
|
case PLAN_REJECT_CUDA_NO_DEVICE:
|
2009-01-20 21:31:13 +00:00
|
|
|
p = "Your computer has no CUDA device"; break;
|
2009-02-17 03:16:25 +00:00
|
|
|
case PLAN_REJECT_CUDA_VERSION:
|
2009-01-20 21:31:13 +00:00
|
|
|
p = "Your CUDA device has the wrong software version"; break;
|
2009-02-17 03:16:25 +00:00
|
|
|
case PLAN_REJECT_NVIDIA_DRIVER_VERSION:
|
|
|
|
p = "Your CUDA device has the wrong driver version"; break;
|
|
|
|
case PLAN_REJECT_CUDA_MEM:
|
2009-01-20 21:31:13 +00:00
|
|
|
p = "Your CUDA device has insufficient memory"; break;
|
2009-02-17 03:16:25 +00:00
|
|
|
case PLAN_REJECT_CUDA_SPEED:
|
2009-01-20 21:31:13 +00:00
|
|
|
p = "Your CUDA device is too slow"; break;
|
|
|
|
}
|
|
|
|
if (p) {
|
|
|
|
sprintf(message,
|
|
|
|
"Can't use CUDA app for %s: %s",
|
|
|
|
app->user_friendly_name, p
|
|
|
|
);
|
2009-02-26 03:03:35 +00:00
|
|
|
g_wreq->insert_no_work_message(USER_MESSAGE(message, "high"));
|
2009-01-20 21:31:13 +00:00
|
|
|
}
|
2008-04-23 23:34:26 +00:00
|
|
|
return NULL;
|
2008-03-18 21:22:44 +00:00
|
|
|
}
|
2008-03-27 18:25:29 +00:00
|
|
|
return bavp;
|
2005-07-28 10:13:30 +00:00
|
|
|
}
|
|
|
|
|
- server code: at some point I made a global var "SCHED_CONFIG config",
mostly so that the parse function could assume
that everything was initially zero.
However, various back-end functions pass around SCHED_CONFIG&
as an argument (also named "config").
This creates a shadow, which is always bad.
Worse is the possibility that some projects have back-end programs
that have a SCHED_CONFIG variable that's automatic,
and therefore isn't zero initially,
and therefore isn't parsing correctly.
To fix this, I changed the 2 vectors in SCHED_CONFIG into pointers,
and have the parse routine zero the structure.
I was tempted to remove the SCHED_CONFIG& args to back-end functions,
but this would have broken some projects' code.
I did, however, change the name from config to config_loc
to avoid shadowing.
Also fixed various other compiler warnings.
svn path=/trunk/boinc/; revision=15541
2008-07-02 17:24:53 +00:00
|
|
|
static const char* find_user_friendly_name(int appid) {
|
2008-03-07 21:13:01 +00:00
|
|
|
APP* app = ssp->lookup_app(appid);
|
|
|
|
if (app) return app->user_friendly_name;
|
|
|
|
return "deprecated application";
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2005-10-04 21:44:58 +00:00
|
|
|
// Compute the max additional disk usage we can impose on the host.
|
|
|
|
// Depending on the client version, it can either send us
|
|
|
|
// - d_total and d_free (pre 4 oct 2005)
|
|
|
|
// - the above plus d_boinc_used_total and d_boinc_used_project
|
2004-04-04 01:59:47 +00:00
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
double max_allowable_disk() {
|
|
|
|
HOST host = g_request->host;
|
|
|
|
GLOBAL_PREFS prefs = g_request->global_prefs;
|
2004-08-14 00:37:38 +00:00
|
|
|
double x1, x2, x3, x;
|
|
|
|
|
2005-12-17 04:53:48 +00:00
|
|
|
// defaults are from config.xml
|
|
|
|
// if not there these are used:
|
|
|
|
// -default_max_used_gb= 100
|
|
|
|
// -default_max_used_pct = 50
|
|
|
|
// -default_min_free_gb = .001
|
2004-08-14 00:37:38 +00:00
|
|
|
//
|
2006-10-06 18:52:50 +00:00
|
|
|
if (prefs.disk_max_used_gb == 0) {
|
2005-12-17 04:53:48 +00:00
|
|
|
prefs.disk_max_used_gb = config.default_disk_max_used_gb;
|
2006-10-06 18:52:50 +00:00
|
|
|
}
|
|
|
|
if (prefs.disk_max_used_pct == 0) {
|
2005-12-17 04:53:48 +00:00
|
|
|
prefs.disk_max_used_pct = config.default_disk_max_used_pct;
|
2006-10-06 18:52:50 +00:00
|
|
|
}
|
|
|
|
if (prefs.disk_min_free_gb < config.default_disk_min_free_gb) {
|
2005-12-17 04:53:48 +00:00
|
|
|
prefs.disk_min_free_gb = config.default_disk_min_free_gb;
|
2006-10-06 18:52:50 +00:00
|
|
|
}
|
2004-08-14 00:37:38 +00:00
|
|
|
|
|
|
|
// no defaults for total/free disk space (host.d_total, d_free)
|
2005-10-04 21:44:58 +00:00
|
|
|
// if they're zero, client will get no work.
|
2004-08-14 00:37:38 +00:00
|
|
|
//
|
|
|
|
|
2005-10-04 21:44:58 +00:00
|
|
|
if (host.d_boinc_used_total) {
|
|
|
|
// The post 4 oct 2005 case.
|
|
|
|
// Compute the max allowable additional disk usage based on prefs
|
|
|
|
//
|
2008-11-01 23:13:55 +00:00
|
|
|
x1 = prefs.disk_max_used_gb*GIGA - host.d_boinc_used_total;
|
2005-10-04 21:44:58 +00:00
|
|
|
x2 = host.d_total*prefs.disk_max_used_pct/100.
|
|
|
|
- host.d_boinc_used_total;
|
2008-11-01 23:13:55 +00:00
|
|
|
x3 = host.d_free - prefs.disk_min_free_gb*GIGA; // may be negative
|
2009-02-26 00:23:23 +00:00
|
|
|
x = std::min(x1, std::min(x2, x3));
|
2005-01-31 19:34:43 +00:00
|
|
|
|
2005-10-04 21:44:58 +00:00
|
|
|
// see which bound is the most stringent
|
|
|
|
//
|
|
|
|
if (x==x1) {
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->disk_limits.max_used = x;
|
2005-10-04 21:44:58 +00:00
|
|
|
} else if (x==x2) {
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->disk_limits.max_frac = x;
|
2005-10-04 21:44:58 +00:00
|
|
|
} else {
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->disk_limits.min_free = x;
|
2005-10-04 21:44:58 +00:00
|
|
|
}
|
2005-02-06 21:26:21 +00:00
|
|
|
} else {
|
2005-10-04 21:44:58 +00:00
|
|
|
// here we don't know how much space BOINC is using.
|
|
|
|
// so we're kinda screwed.
|
|
|
|
// All we can do is assume that BOINC is using zero space.
|
|
|
|
// We can't honor the max_used for max_used_pct preferences.
|
|
|
|
// We can only honor the min_free pref.
|
|
|
|
//
|
2008-11-01 23:13:55 +00:00
|
|
|
x = host.d_free - prefs.disk_min_free_gb*GIGA; // may be negative
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->disk_limits.min_free = x;
|
2005-10-04 21:44:58 +00:00
|
|
|
x1 = x2 = x3 = 0;
|
2005-02-10 20:31:11 +00:00
|
|
|
}
|
2005-01-31 19:34:43 +00:00
|
|
|
|
2004-08-14 00:37:38 +00:00
|
|
|
if (x < 0) {
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-01-27 18:57:27 +00:00
|
|
|
"[send] No disk space available: disk_max_used_gb %.2fGB disk_max_used_pct %.2f disk_min_free_gb %.2fGB\n",
|
|
|
|
prefs.disk_max_used_gb/GIGA,
|
|
|
|
prefs.disk_max_used_pct,
|
|
|
|
prefs.disk_min_free_gb/GIGA
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-01-27 18:57:27 +00:00
|
|
|
"[send] No disk space available: host.d_total %.2fGB host.d_free %.2fGB host.d_boinc_used_total %.2fGB\n",
|
|
|
|
host.d_total/GIGA,
|
|
|
|
host.d_free/GIGA,
|
|
|
|
host.d_boinc_used_total/GIGA
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-01-27 18:57:27 +00:00
|
|
|
"[send] No disk space available: x1 %.2fGB x2 %.2fGB x3 %.2fGB x %.2fGB\n",
|
|
|
|
x1/GIGA, x2/GIGA, x3/GIGA, x/GIGA
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->disk.set_insufficient(-x);
|
2008-11-26 20:37:11 +00:00
|
|
|
x = 0;
|
2004-08-14 00:37:38 +00:00
|
|
|
}
|
|
|
|
return x;
|
2004-04-04 01:59:47 +00:00
|
|
|
}
|
|
|
|
|
2008-12-15 21:14:32 +00:00
|
|
|
static double estimate_duration_unscaled(WORKUNIT& wu, BEST_APP_VERSION& bav) {
|
|
|
|
double rsc_fpops_est = wu.rsc_fpops_est;
|
|
|
|
if (rsc_fpops_est <= 0) rsc_fpops_est = 1e12;
|
|
|
|
return rsc_fpops_est/bav.host_usage.flops;
|
|
|
|
}
|
2004-04-04 01:59:47 +00:00
|
|
|
|
2009-03-05 23:08:53 +00:00
|
|
|
static inline void get_running_frac() {
|
2008-12-18 18:19:42 +00:00
|
|
|
double rf;
|
|
|
|
if (g_request->core_client_version<=419) {
|
|
|
|
rf = g_reply->host.on_frac;
|
|
|
|
} else {
|
|
|
|
rf = g_reply->host.active_frac * g_reply->host.on_frac;
|
|
|
|
}
|
|
|
|
|
|
|
|
// clamp running_frac and DCF to a reasonable range
|
|
|
|
//
|
|
|
|
if (rf > 1) {
|
2009-01-29 20:42:45 +00:00
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL, "running_frac=%f; setting to 1\n", rf);
|
|
|
|
}
|
2008-12-18 18:19:42 +00:00
|
|
|
rf = 1;
|
|
|
|
} else if (rf < .1) {
|
2009-01-29 20:42:45 +00:00
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL, "running_frac=%f; setting to 0.1\n", rf);
|
|
|
|
}
|
2008-12-18 18:19:42 +00:00
|
|
|
rf = .1;
|
|
|
|
}
|
2009-01-29 20:42:45 +00:00
|
|
|
g_wreq->running_frac = rf;
|
|
|
|
}
|
|
|
|
|
2009-03-05 23:08:53 +00:00
|
|
|
static inline void get_dcf() {
|
2009-01-29 20:42:45 +00:00
|
|
|
double dcf = g_reply->host.duration_correction_factor;
|
|
|
|
if (dcf > 10) {
|
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] DCF=%f; setting to 10\n", dcf
|
|
|
|
);
|
2008-12-18 18:19:42 +00:00
|
|
|
}
|
2009-01-29 20:42:45 +00:00
|
|
|
dcf = 10;
|
|
|
|
} else if (dcf < 0.1) {
|
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] DCF=%f; setting to 0.1\n", dcf
|
|
|
|
);
|
|
|
|
}
|
|
|
|
dcf = 0.1;
|
|
|
|
}
|
|
|
|
g_wreq->dcf = dcf;
|
|
|
|
}
|
|
|
|
|
|
|
|
// estimate the amount of real time to complete this WU,
|
|
|
|
// taking into account active_frac etc.
|
|
|
|
// Note: don't factor in resource_share_fraction.
|
|
|
|
// The core client no longer necessarily does round-robin
|
|
|
|
// across all projects.
|
|
|
|
//
|
|
|
|
double estimate_duration(WORKUNIT& wu, BEST_APP_VERSION& bav) {
|
|
|
|
double edu = estimate_duration_unscaled(wu, bav);
|
|
|
|
double ed = edu/g_wreq->running_frac;
|
|
|
|
if (!config.ignore_dcf) {
|
|
|
|
ed *= g_wreq->dcf;
|
2005-06-23 07:42:45 +00:00
|
|
|
}
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-01-24 21:51:19 +00:00
|
|
|
"[send] est. duration for WU %d: unscaled %.2f scaled %.2f\n",
|
2009-01-15 20:23:20 +00:00
|
|
|
wu.id, edu, ed
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2008-12-18 18:19:42 +00:00
|
|
|
return ed;
|
2004-04-04 01:59:47 +00:00
|
|
|
}
|
|
|
|
|
2008-12-18 21:25:51 +00:00
|
|
|
static void get_prefs_info() {
|
2006-05-02 22:17:09 +00:00
|
|
|
char buf[8096];
|
2009-02-26 00:23:23 +00:00
|
|
|
std::string str;
|
2007-09-21 18:10:54 +00:00
|
|
|
unsigned int pos = 0;
|
|
|
|
int temp_int;
|
2008-03-10 17:03:15 +00:00
|
|
|
bool flag;
|
2006-10-22 00:42:44 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
extract_venue(g_reply->user.project_prefs, g_reply->host.venue, buf);
|
2007-12-24 21:34:21 +00:00
|
|
|
str = buf;
|
|
|
|
|
2006-10-22 00:42:44 +00:00
|
|
|
// scan user's project prefs for elements of the form <app_id>N</app_id>,
|
|
|
|
// indicating the apps they want to run.
|
|
|
|
//
|
2008-12-18 21:25:51 +00:00
|
|
|
g_wreq->preferred_apps.clear();
|
2007-09-21 18:10:54 +00:00
|
|
|
while (parse_int(str.substr(pos,str.length()-pos).c_str(), "<app_id>", temp_int)) {
|
2006-10-22 01:46:33 +00:00
|
|
|
APP_INFO ai;
|
|
|
|
ai.appid = temp_int;
|
2008-03-07 21:13:01 +00:00
|
|
|
ai.work_available = false;
|
2008-12-18 21:25:51 +00:00
|
|
|
g_wreq->preferred_apps.push_back(ai);
|
2006-10-22 00:42:44 +00:00
|
|
|
|
2007-09-21 18:10:54 +00:00
|
|
|
pos = str.find("<app_id>", pos) + 1;
|
|
|
|
}
|
2008-03-27 21:39:02 +00:00
|
|
|
if (parse_bool(buf,"allow_non_preferred_apps", flag)) {
|
2008-12-18 21:25:51 +00:00
|
|
|
g_wreq->allow_non_preferred_apps = flag;
|
2008-03-07 21:13:01 +00:00
|
|
|
}
|
2008-03-27 21:39:02 +00:00
|
|
|
if (parse_bool(buf,"allow_beta_work", flag)) {
|
2008-12-18 21:25:51 +00:00
|
|
|
g_wreq->allow_beta_work = flag;
|
2008-03-07 21:13:01 +00:00
|
|
|
}
|
2008-12-18 21:25:51 +00:00
|
|
|
if (parse_bool(buf,"no_gpus", flag)) {
|
|
|
|
g_wreq->no_gpus = flag;
|
|
|
|
}
|
2009-03-18 21:14:44 +00:00
|
|
|
if (parse_bool(buf,"no_cpu", flag)) {
|
|
|
|
g_wreq->no_cpu = flag;
|
|
|
|
}
|
2008-12-18 21:25:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Find or compute various info about the host;
|
|
|
|
// this info affects which jobs are sent to the host.
|
|
|
|
//
|
|
|
|
static void get_host_info() {
|
2008-04-30 20:31:33 +00:00
|
|
|
// Decide whether or not this computer is 'reliable'
|
2008-03-07 21:13:01 +00:00
|
|
|
// A computer is reliable if the following conditions are true
|
|
|
|
// (for those that are set in the config file)
|
|
|
|
// 1) The host average turnaround is less than the config
|
|
|
|
// max average turnaround
|
|
|
|
// 2) The host error rate is less then the config max error rate
|
|
|
|
// 3) The host results per day is equal to the config file value
|
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
double expavg_credit = g_reply->host.expavg_credit;
|
|
|
|
double expavg_time = g_reply->host.expavg_time;
|
2008-04-30 20:31:33 +00:00
|
|
|
update_average(0, 0, CREDIT_HALF_LIFE, expavg_credit, expavg_time);
|
2008-02-21 20:10:10 +00:00
|
|
|
|
2008-03-07 21:13:01 +00:00
|
|
|
// Platforms other then Windows, Linux and Intel Macs need a
|
|
|
|
// larger set of computers to be marked reliable
|
2008-02-21 20:10:10 +00:00
|
|
|
//
|
2008-03-07 21:13:01 +00:00
|
|
|
double multiplier = 1.0;
|
2008-12-16 16:29:54 +00:00
|
|
|
if (strstr(g_reply->host.os_name,"Windows")
|
|
|
|
|| strstr(g_reply->host.os_name,"Linux")
|
|
|
|
|| (strstr(g_reply->host.os_name,"Darwin")
|
|
|
|
&& !(strstr(g_reply->host.p_vendor,"Power Macintosh"))
|
2008-04-30 20:31:33 +00:00
|
|
|
)) {
|
2008-03-07 21:13:01 +00:00
|
|
|
multiplier = 1.0;
|
2006-10-22 00:42:44 +00:00
|
|
|
} else {
|
2008-03-07 21:13:01 +00:00
|
|
|
multiplier = 1.8;
|
2007-05-09 17:45:18 +00:00
|
|
|
}
|
|
|
|
|
2009-01-28 04:58:01 +00:00
|
|
|
if (
|
|
|
|
(g_reply->host.avg_turnaround > 0)
|
|
|
|
&& (config.reliable_max_avg_turnaround == 0 || g_reply->host.avg_turnaround < config.reliable_max_avg_turnaround*multiplier)
|
2008-12-16 16:29:54 +00:00
|
|
|
&& (config.reliable_max_error_rate == 0 || g_reply->host.error_rate < config.reliable_max_error_rate*multiplier)
|
|
|
|
&& (config.daily_result_quota == 0 || g_reply->host.max_results_day >= config.daily_result_quota)
|
2008-03-07 21:13:01 +00:00
|
|
|
) {
|
2008-12-18 21:25:51 +00:00
|
|
|
g_wreq->reliable = true;
|
2008-05-02 17:48:29 +00:00
|
|
|
}
|
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-01-28 04:58:01 +00:00
|
|
|
"[send] [HOST#%d] is%s reliable; OS: %s, error_rate: %.6f, avg_turn_hrs: %.3f max res/day %d\n",
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->host.id,
|
2008-12-18 21:25:51 +00:00
|
|
|
g_wreq->reliable?"":" not",
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->host.os_name, g_reply->host.error_rate,
|
2009-01-28 04:58:01 +00:00
|
|
|
g_reply->host.avg_turnaround/3600,
|
|
|
|
g_reply->host.max_results_day
|
2008-05-02 17:48:29 +00:00
|
|
|
);
|
2006-10-22 00:42:44 +00:00
|
|
|
}
|
2006-05-02 22:17:09 +00:00
|
|
|
}
|
|
|
|
|
2008-04-30 20:31:33 +00:00
|
|
|
// Return true if the user has set application preferences,
|
|
|
|
// and this job is not for a selected app
|
2007-05-09 17:45:18 +00:00
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
bool app_not_selected(WORKUNIT& wu) {
|
2006-11-07 17:40:55 +00:00
|
|
|
unsigned int i;
|
2007-05-09 17:45:18 +00:00
|
|
|
|
2008-12-18 21:25:51 +00:00
|
|
|
if (g_wreq->preferred_apps.size() == 0) return false;
|
|
|
|
for (i=0; i<g_wreq->preferred_apps.size(); i++) {
|
|
|
|
if (wu.appid == g_wreq->preferred_apps[i].appid) {
|
|
|
|
g_wreq->preferred_apps[i].work_available = true;
|
2008-04-30 20:31:33 +00:00
|
|
|
return false;
|
2008-04-26 23:34:38 +00:00
|
|
|
}
|
2007-05-09 17:45:18 +00:00
|
|
|
}
|
2008-04-30 20:31:33 +00:00
|
|
|
return true;
|
2006-10-22 00:42:44 +00:00
|
|
|
}
|
|
|
|
|
2007-08-16 17:33:41 +00:00
|
|
|
// see how much RAM we can use on this machine
|
|
|
|
//
|
2009-03-05 23:08:53 +00:00
|
|
|
static inline void get_mem_sizes() {
|
2009-01-29 20:42:45 +00:00
|
|
|
g_wreq->ram = g_reply->host.m_nbytes;
|
|
|
|
if (g_wreq->ram <= 0) g_wreq->ram = DEFAULT_RAM_SIZE;
|
|
|
|
g_wreq->usable_ram = g_wreq->ram;
|
2008-12-16 16:29:54 +00:00
|
|
|
double busy_frac = g_request->global_prefs.ram_max_used_busy_frac;
|
|
|
|
double idle_frac = g_request->global_prefs.ram_max_used_idle_frac;
|
2006-10-04 17:01:36 +00:00
|
|
|
double frac = 1;
|
|
|
|
if (busy_frac>0 && idle_frac>0) {
|
|
|
|
frac = std::max(busy_frac, idle_frac);
|
|
|
|
if (frac > 1) frac = 1;
|
2009-01-29 20:42:45 +00:00
|
|
|
g_wreq->usable_ram *= frac;
|
2006-10-04 17:01:36 +00:00
|
|
|
}
|
2007-08-16 17:33:41 +00:00
|
|
|
}
|
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
static inline int check_memory(WORKUNIT& wu) {
|
2009-01-29 20:42:45 +00:00
|
|
|
double diff = wu.rsc_memory_bound - g_wreq->usable_ram;
|
2007-08-16 17:33:41 +00:00
|
|
|
if (diff > 0) {
|
2008-03-07 21:13:01 +00:00
|
|
|
char message[256];
|
|
|
|
sprintf(message,
|
|
|
|
"%s needs %0.2f MB RAM but only %0.2f MB is available for use.",
|
|
|
|
find_user_friendly_name(wu.appid),
|
2009-01-29 20:42:45 +00:00
|
|
|
wu.rsc_memory_bound/MEGA, g_wreq->usable_ram/MEGA
|
2008-03-07 21:13:01 +00:00
|
|
|
);
|
2009-02-26 03:03:35 +00:00
|
|
|
g_wreq->insert_no_work_message(USER_MESSAGE(message,"high"));
|
2008-03-07 21:13:01 +00:00
|
|
|
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [WU#%d %s] needs %0.2fMB RAM; [HOST#%d] has %0.2fMB, %0.2fMB usable\n",
|
2008-04-26 23:34:38 +00:00
|
|
|
wu.id, wu.name, wu.rsc_memory_bound/MEGA,
|
2009-01-29 20:42:45 +00:00
|
|
|
g_reply->host.id, g_wreq->ram/MEGA, g_wreq->usable_ram/MEGA
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->mem.set_insufficient(wu.rsc_memory_bound);
|
|
|
|
g_reply->set_delay(DELAY_NO_WORK_TEMP);
|
2007-05-30 17:25:51 +00:00
|
|
|
return INFEASIBLE_MEM;
|
2005-02-08 19:54:10 +00:00
|
|
|
}
|
2007-05-30 17:25:51 +00:00
|
|
|
return 0;
|
2007-05-09 17:45:18 +00:00
|
|
|
}
|
2005-02-08 19:54:10 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
static inline int check_disk(WORKUNIT& wu) {
|
|
|
|
double diff = wu.rsc_disk_bound - g_wreq->disk_available;
|
2007-08-16 17:33:41 +00:00
|
|
|
if (diff > 0) {
|
2008-03-07 21:13:01 +00:00
|
|
|
char message[256];
|
|
|
|
sprintf(message,
|
|
|
|
"%s needs %0.2fMB more disk space. You currently have %0.2f MB available and it needs %0.2f MB.",
|
|
|
|
find_user_friendly_name(wu.appid),
|
2008-12-16 16:29:54 +00:00
|
|
|
diff/MEGA, g_wreq->disk_available/MEGA, wu.rsc_disk_bound/MEGA
|
2008-03-07 21:13:01 +00:00
|
|
|
);
|
2009-02-26 03:03:35 +00:00
|
|
|
g_wreq->insert_no_work_message(USER_MESSAGE(message,"high"));
|
2008-03-07 21:13:01 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->disk.set_insufficient(diff);
|
2007-05-30 17:25:51 +00:00
|
|
|
return INFEASIBLE_DISK;
|
2004-04-04 01:59:47 +00:00
|
|
|
}
|
2007-05-30 17:25:51 +00:00
|
|
|
return 0;
|
2007-05-09 17:45:18 +00:00
|
|
|
}
|
2004-04-04 01:59:47 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
static inline int check_bandwidth(WORKUNIT& wu) {
|
2008-03-07 21:13:01 +00:00
|
|
|
if (wu.rsc_bandwidth_bound == 0) return 0;
|
2008-06-17 16:09:27 +00:00
|
|
|
|
|
|
|
// if n_bwdown is zero, the host has never downloaded anything,
|
|
|
|
// so skip this check
|
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_reply->host.n_bwdown == 0) return 0;
|
2008-06-17 16:09:27 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
double diff = wu.rsc_bandwidth_bound - g_reply->host.n_bwdown;
|
2008-03-07 21:13:01 +00:00
|
|
|
if (diff > 0) {
|
|
|
|
char message[256];
|
|
|
|
sprintf(message,
|
2008-06-17 16:15:39 +00:00
|
|
|
"%s requires %0.2f KB/sec download bandwidth. Your computer has been measured at %0.2f KB/sec.",
|
2008-03-07 21:13:01 +00:00
|
|
|
find_user_friendly_name(wu.appid),
|
2008-12-16 16:29:54 +00:00
|
|
|
wu.rsc_bandwidth_bound/KILO, g_reply->host.n_bwdown/KILO
|
2008-03-07 21:13:01 +00:00
|
|
|
);
|
2009-02-26 03:03:35 +00:00
|
|
|
g_wreq->insert_no_work_message(USER_MESSAGE(message,"high"));
|
2008-03-07 21:13:01 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->bandwidth.set_insufficient(diff);
|
2008-03-07 21:13:01 +00:00
|
|
|
return INFEASIBLE_BANDWIDTH;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-07-15 21:43:45 +00:00
|
|
|
// Determine if the app is "hard",
|
|
|
|
// and we should send it only to high-end hosts.
|
|
|
|
// Currently this is specified by setting weight=-1;
|
|
|
|
// this is a kludge for SETI@home/Astropulse.
|
|
|
|
//
|
|
|
|
static inline bool hard_app(APP& app) {
|
|
|
|
return (app.weight == -1);
|
|
|
|
}
|
|
|
|
|
2009-01-30 21:25:24 +00:00
|
|
|
static inline double get_estimated_delay(BEST_APP_VERSION& bav) {
|
2009-03-05 00:10:16 +00:00
|
|
|
if (bav.host_usage.ncudas) {
|
2009-01-30 21:25:24 +00:00
|
|
|
return g_request->coproc_cuda->estimated_delay;
|
|
|
|
} else {
|
|
|
|
return g_request->cpu_estimated_delay;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void update_estimated_delay(BEST_APP_VERSION& bav, double dt) {
|
2009-03-05 00:10:16 +00:00
|
|
|
if (bav.host_usage.ncudas) {
|
2009-01-30 21:25:24 +00:00
|
|
|
g_request->coproc_cuda->estimated_delay += dt;
|
|
|
|
} else {
|
|
|
|
g_request->cpu_estimated_delay += dt;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-05-30 17:25:51 +00:00
|
|
|
static inline int check_deadline(
|
2008-12-15 21:14:32 +00:00
|
|
|
WORKUNIT& wu, APP& app, BEST_APP_VERSION& bav
|
2007-05-30 17:25:51 +00:00
|
|
|
) {
|
2008-07-14 19:13:19 +00:00
|
|
|
if (config.ignore_delay_bound) return 0;
|
|
|
|
|
2005-02-16 23:17:43 +00:00
|
|
|
// skip delay check if host currently doesn't have any work
|
2008-08-12 19:06:35 +00:00
|
|
|
// and it's not a hard app.
|
2005-02-16 23:17:43 +00:00
|
|
|
// (i.e. everyone gets one result, no matter how slow they are)
|
|
|
|
//
|
2009-01-30 21:25:24 +00:00
|
|
|
if (get_estimated_delay(bav) == 0 && !hard_app(app)) return 0;
|
2008-07-14 19:13:19 +00:00
|
|
|
|
2008-08-12 19:06:35 +00:00
|
|
|
// if it's a hard app, don't send it to a host with no credit
|
|
|
|
//
|
2008-12-15 21:14:32 +00:00
|
|
|
if (hard_app(app) && g_reply->host.total_credit == 0) {
|
2008-08-12 19:06:35 +00:00
|
|
|
return INFEASIBLE_CPU;
|
|
|
|
}
|
|
|
|
|
2008-12-26 22:56:42 +00:00
|
|
|
if (config.workload_sim && g_request->have_other_results_list) {
|
|
|
|
double est_dur = estimate_duration(wu, bav);
|
|
|
|
if (g_reply->wreq.edf_reject_test(est_dur, wu.delay_bound)) {
|
|
|
|
return INFEASIBLE_WORKLOAD;
|
|
|
|
}
|
|
|
|
IP_RESULT candidate("", wu.delay_bound, est_dur);
|
|
|
|
strcpy(candidate.name, wu.name);
|
|
|
|
if (check_candidate(candidate, effective_ncpus(), g_request->ip_results)) {
|
|
|
|
// it passed the feasibility test,
|
|
|
|
// but don't add it the the workload yet;
|
|
|
|
// wait until we commit to sending it
|
|
|
|
} else {
|
|
|
|
g_reply->wreq.edf_reject(est_dur, wu.delay_bound);
|
|
|
|
g_reply->wreq.speed.set_insufficient(0);
|
|
|
|
return INFEASIBLE_WORKLOAD;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
double ewd = estimate_duration(wu, bav);
|
|
|
|
if (hard_app(app)) ewd *= 1.3;
|
2009-01-30 21:25:24 +00:00
|
|
|
double est_completion_delay = get_estimated_delay(bav) + ewd;
|
2009-02-26 00:23:23 +00:00
|
|
|
double est_report_delay = std::max(est_completion_delay, g_request->global_prefs.work_buf_min());
|
2008-12-26 22:56:42 +00:00
|
|
|
double diff = est_report_delay - wu.delay_bound;
|
|
|
|
if (diff > 0) {
|
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-01-24 21:51:19 +00:00
|
|
|
"[send] [WU#%d] deadline miss %d > %d\n",
|
|
|
|
wu.id, (int)est_report_delay, wu.delay_bound
|
2008-12-26 22:56:42 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
g_reply->wreq.speed.set_insufficient(diff);
|
|
|
|
return INFEASIBLE_CPU;
|
2009-01-30 22:30:35 +00:00
|
|
|
} else {
|
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-03-06 22:21:47 +00:00
|
|
|
"[send] [WU#%d] meets deadline: %.2f + %.2f < %d\n",
|
2009-01-30 22:30:35 +00:00
|
|
|
wu.id, get_estimated_delay(bav), ewd, wu.delay_bound
|
|
|
|
);
|
|
|
|
}
|
2004-07-15 18:54:17 +00:00
|
|
|
}
|
2004-04-04 01:59:47 +00:00
|
|
|
}
|
2007-05-30 17:25:51 +00:00
|
|
|
return 0;
|
2007-05-09 17:45:18 +00:00
|
|
|
}
|
|
|
|
|
2008-04-30 20:31:33 +00:00
|
|
|
// Fast checks (no DB access) to see if the job can be sent to the host.
|
2007-05-30 17:25:51 +00:00
|
|
|
// Reasons why not include:
|
2007-05-09 17:45:18 +00:00
|
|
|
// 1) the host doesn't have enough memory;
|
|
|
|
// 2) the host doesn't have enough disk space;
|
|
|
|
// 3) based on CPU speed, resource share and estimated delay,
|
|
|
|
// the host probably won't get the result done within the delay bound
|
|
|
|
// 4) app isn't in user's "approved apps" list
|
|
|
|
//
|
2008-12-15 21:14:32 +00:00
|
|
|
int wu_is_infeasible_fast(WORKUNIT& wu, APP& app, BEST_APP_VERSION& bav) {
|
2007-05-30 17:25:51 +00:00
|
|
|
int retval;
|
2007-05-31 18:14:45 +00:00
|
|
|
|
|
|
|
// homogeneous redundancy, quick check
|
|
|
|
//
|
2007-10-04 21:55:37 +00:00
|
|
|
if (app_hr_type(app)) {
|
2008-12-15 21:14:32 +00:00
|
|
|
if (hr_unknown_platform_type(g_reply->host, app_hr_type(app))) {
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [HOST#%d] [WU#%d %s] host is of unknown class in HR type %d\n",
|
2008-12-15 21:14:32 +00:00
|
|
|
g_reply->host.id, wu.id, wu.name, app_hr_type(app)
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2007-10-04 21:55:37 +00:00
|
|
|
return INFEASIBLE_HR;
|
|
|
|
}
|
2008-12-19 18:14:02 +00:00
|
|
|
if (already_sent_to_different_platform_quick(wu, app)) {
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [HOST#%d] [WU#%d %s] failed quick HR check: WU is class %d, host is class %d\n",
|
2008-12-15 21:14:32 +00:00
|
|
|
g_reply->host.id, wu.id, wu.name, wu.hr_class, hr_class(g_request->host, app_hr_type(app))
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2007-05-31 18:14:45 +00:00
|
|
|
return INFEASIBLE_HR;
|
|
|
|
}
|
|
|
|
}
|
2007-05-09 17:45:18 +00:00
|
|
|
|
2007-05-31 18:14:45 +00:00
|
|
|
if (config.one_result_per_user_per_wu || config.one_result_per_host_per_wu) {
|
2008-12-19 18:14:02 +00:00
|
|
|
if (wu_already_in_reply(wu)) {
|
2007-05-31 18:14:45 +00:00
|
|
|
return INFEASIBLE_DUP;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
retval = check_memory(wu);
|
2007-05-30 17:25:51 +00:00
|
|
|
if (retval) return retval;
|
2008-12-16 16:29:54 +00:00
|
|
|
retval = check_disk(wu);
|
2007-05-30 17:25:51 +00:00
|
|
|
if (retval) return retval;
|
2008-12-16 16:29:54 +00:00
|
|
|
retval = check_bandwidth(wu);
|
2008-03-07 21:13:01 +00:00
|
|
|
if (retval) return retval;
|
- scheduler: add <workload_sim> config option.
If set, the scheduler will use EDF simulation,
together with the in-progress workload reported by the client,
to avoid sending results that
1) will miss their deadline, or
2) will cause an in-progress result to miss its deadline, or
3) will make an in-progress result miss its deadline
by more than is already predicted.
If this option is not set, or if the client request doesn't
include a workload description (i.e. the client is old)
use the existing approach, which assumes there's no workload.
NOTE: this is experimental. Production projects should not use it.
- EDF sim: write debug stuff to stderr instead of stdout
- Account manager:
- if an account is detach_when_done, set dont_request_more_work
- check done_request_more_work even for first-time projects
- update_uotd: generate a file for use by Google gadget
- user_links(): use full URLs (so can use in Google gadget)
client/
acct_mgr.C
work_fetch.C
html/
inc/
uotd.inc
util.inc
user/
uotd_gadget.php (new)
sched/
Makefile.am
edf_sim.C
sched_config.C,h
sched_resend.C
sched_send.C,h
server_types.C,h
svn path=/trunk/boinc/; revision=12639
2007-05-10 21:50:52 +00:00
|
|
|
|
2007-05-31 18:14:45 +00:00
|
|
|
// do this last because EDF sim uses some CPU
|
2008-12-26 22:56:42 +00:00
|
|
|
retval = check_deadline(wu, app, bav);
|
|
|
|
if (retval) return INFEASIBLE_WORKLOAD;
|
2004-04-04 01:59:47 +00:00
|
|
|
|
2007-05-30 17:25:51 +00:00
|
|
|
return 0;
|
2004-04-04 01:59:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// insert "text" right after "after" in the given buffer
|
|
|
|
//
|
2005-02-16 23:17:43 +00:00
|
|
|
int insert_after(char* buffer, const char* after, const char* text) {
|
2004-04-04 01:59:47 +00:00
|
|
|
char* p;
|
2008-03-31 16:19:45 +00:00
|
|
|
char temp[BLOB_SIZE];
|
2004-04-04 01:59:47 +00:00
|
|
|
|
2008-03-31 16:19:45 +00:00
|
|
|
if (strlen(buffer) + strlen(text) > BLOB_SIZE-1) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2008-08-19 03:00:17 +00:00
|
|
|
"insert_after: overflow: %d %d\n", strlen(buffer), strlen(text)
|
2005-09-26 23:28:48 +00:00
|
|
|
);
|
2004-04-04 01:59:47 +00:00
|
|
|
return ERR_BUFFER_OVERFLOW;
|
|
|
|
}
|
|
|
|
p = strstr(buffer, after);
|
|
|
|
if (!p) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2005-09-26 23:28:48 +00:00
|
|
|
"insert_after: %s not found in %s\n", after, buffer
|
|
|
|
);
|
2004-04-04 01:59:47 +00:00
|
|
|
return ERR_NULL;
|
|
|
|
}
|
|
|
|
p += strlen(after);
|
|
|
|
strcpy(temp, p);
|
|
|
|
strcpy(p, text);
|
|
|
|
strcat(p, temp);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-02-06 21:26:21 +00:00
|
|
|
// add elements to WU's xml_doc,
|
|
|
|
// in preparation for sending it to a client
|
2004-04-04 01:59:47 +00:00
|
|
|
//
|
|
|
|
int insert_wu_tags(WORKUNIT& wu, APP& app) {
|
2008-03-31 16:19:45 +00:00
|
|
|
char buf[BLOB_SIZE];
|
2005-02-02 18:13:00 +00:00
|
|
|
|
2004-04-04 01:59:47 +00:00
|
|
|
sprintf(buf,
|
|
|
|
" <rsc_fpops_est>%f</rsc_fpops_est>\n"
|
|
|
|
" <rsc_fpops_bound>%f</rsc_fpops_bound>\n"
|
|
|
|
" <rsc_memory_bound>%f</rsc_memory_bound>\n"
|
|
|
|
" <rsc_disk_bound>%f</rsc_disk_bound>\n"
|
|
|
|
" <name>%s</name>\n"
|
|
|
|
" <app_name>%s</app_name>\n",
|
|
|
|
wu.rsc_fpops_est,
|
|
|
|
wu.rsc_fpops_bound,
|
|
|
|
wu.rsc_memory_bound,
|
|
|
|
wu.rsc_disk_bound,
|
|
|
|
wu.name,
|
|
|
|
app.name
|
|
|
|
);
|
|
|
|
return insert_after(wu.xml_doc, "<workunit>\n", buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
// add the given workunit to a reply.
|
|
|
|
// Add the app and app_version to the reply also.
|
|
|
|
//
|
|
|
|
int add_wu_to_reply(
|
2008-03-27 18:25:29 +00:00
|
|
|
WORKUNIT& wu, SCHEDULER_REPLY& reply, APP* app, BEST_APP_VERSION* bavp
|
2004-04-04 01:59:47 +00:00
|
|
|
) {
|
|
|
|
int retval;
|
2005-02-02 18:13:00 +00:00
|
|
|
WORKUNIT wu2, wu3;
|
|
|
|
|
2008-03-27 18:25:29 +00:00
|
|
|
APP_VERSION* avp = bavp->avp;
|
|
|
|
|
2004-04-04 01:59:47 +00:00
|
|
|
// add the app, app_version, and workunit to the reply,
|
|
|
|
// but only if they aren't already there
|
|
|
|
//
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
if (avp) {
|
2005-02-02 18:13:00 +00:00
|
|
|
APP_VERSION av2=*avp, *avp2=&av2;
|
|
|
|
|
2009-03-02 23:47:11 +00:00
|
|
|
if (strlen(config.replace_download_url_by_timezone)) {
|
2008-12-19 18:14:02 +00:00
|
|
|
process_av_timezone(avp, av2);
|
2005-02-02 18:13:00 +00:00
|
|
|
}
|
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->insert_app_unique(*app);
|
2008-03-28 18:00:27 +00:00
|
|
|
av2.bavp = bavp;
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->insert_app_version_unique(*avp2);
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [HOST#%d] Sending app_version %s %d %d %s; %.2f GFLOPS\n",
|
2008-12-17 20:53:46 +00:00
|
|
|
g_reply->host.id, app->name,
|
|
|
|
avp2->platformid, avp2->version_num, avp2->plan_class,
|
2009-01-10 00:43:33 +00:00
|
|
|
bavp->host_usage.flops/1e9
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2004-04-04 01:59:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// add time estimate to reply
|
|
|
|
//
|
|
|
|
wu2 = wu; // make copy since we're going to modify its XML field
|
|
|
|
retval = insert_wu_tags(wu2, *app);
|
|
|
|
if (retval) {
|
2008-08-19 03:00:17 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL, "insert_wu_tags failed %d\n", retval);
|
2004-04-04 01:59:47 +00:00
|
|
|
return retval;
|
|
|
|
}
|
2008-12-19 18:14:02 +00:00
|
|
|
wu3 = wu2;
|
2009-03-02 23:47:11 +00:00
|
|
|
if (strlen(config.replace_download_url_by_timezone)) {
|
2008-12-19 18:14:02 +00:00
|
|
|
process_wu_timezone(wu2, wu3);
|
2005-02-02 18:13:00 +00:00
|
|
|
}
|
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->insert_workunit_unique(wu3);
|
2005-02-14 20:11:01 +00:00
|
|
|
|
|
|
|
// switch to tighter policy for estimating delay
|
|
|
|
//
|
2004-04-04 01:59:47 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int insert_name_tags(RESULT& result, WORKUNIT const& wu) {
|
|
|
|
char buf[256];
|
|
|
|
int retval;
|
|
|
|
|
|
|
|
sprintf(buf, "<name>%s</name>\n", result.name);
|
|
|
|
retval = insert_after(result.xml_doc_in, "<result>\n", buf);
|
|
|
|
if (retval) return retval;
|
|
|
|
sprintf(buf, "<wu_name>%s</wu_name>\n", wu.name);
|
|
|
|
retval = insert_after(result.xml_doc_in, "<result>\n", buf);
|
|
|
|
if (retval) return retval;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int insert_deadline_tag(RESULT& result) {
|
|
|
|
char buf[256];
|
|
|
|
sprintf(buf, "<report_deadline>%d</report_deadline>\n", result.report_deadline);
|
|
|
|
int retval = insert_after(result.xml_doc_in, "<result>\n", buf);
|
|
|
|
if (retval) return retval;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-08-04 03:50:04 +00:00
|
|
|
int update_wu_transition_time(WORKUNIT wu, time_t x) {
|
2004-04-04 01:59:47 +00:00
|
|
|
DB_WORKUNIT dbwu;
|
2004-08-21 00:40:01 +00:00
|
|
|
char buf[256];
|
2004-04-04 01:59:47 +00:00
|
|
|
|
2004-08-21 00:40:01 +00:00
|
|
|
dbwu.id = wu.id;
|
2006-07-11 21:49:20 +00:00
|
|
|
|
2005-07-14 10:02:20 +00:00
|
|
|
// SQL note: can't use min() here
|
|
|
|
//
|
|
|
|
sprintf(buf,
|
|
|
|
"transition_time=if(transition_time<%d, transition_time, %d)",
|
|
|
|
(int)x, (int)x
|
|
|
|
);
|
2004-08-21 00:40:01 +00:00
|
|
|
return dbwu.update_field(buf);
|
2004-04-04 01:59:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// return true iff a result for same WU is already being sent
|
|
|
|
//
|
2008-12-19 18:14:02 +00:00
|
|
|
bool wu_already_in_reply(WORKUNIT& wu) {
|
2004-04-04 01:59:47 +00:00
|
|
|
unsigned int i;
|
2008-12-16 16:29:54 +00:00
|
|
|
for (i=0; i<g_reply->results.size(); i++) {
|
|
|
|
if (wu.id == g_reply->results[i].workunitid) {
|
2004-04-04 01:59:47 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
void lock_sema() {
|
|
|
|
lock_semaphore(sema_key);
|
|
|
|
}
|
|
|
|
|
|
|
|
void unlock_sema() {
|
|
|
|
unlock_semaphore(sema_key);
|
|
|
|
}
|
|
|
|
|
2005-02-06 21:26:21 +00:00
|
|
|
// return true if additional work is needed,
|
|
|
|
// and there's disk space left,
|
|
|
|
// and we haven't exceeded result per RPC limit,
|
|
|
|
// and we haven't exceeded results per day limit
|
|
|
|
//
|
2008-12-19 18:14:02 +00:00
|
|
|
bool work_needed(bool locality_sched) {
|
2005-02-09 20:06:15 +00:00
|
|
|
if (locality_sched) {
|
|
|
|
// if we've failed to send a result because of a transient condition,
|
|
|
|
// return false to preserve invariant
|
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_wreq->disk.insufficient || g_wreq->speed.insufficient || g_wreq->mem.insufficient || g_wreq->no_allowed_apps_available) {
|
2009-03-07 01:00:05 +00:00
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] stopping work search - locality condition\n"
|
|
|
|
);
|
|
|
|
}
|
2005-02-09 20:06:15 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2005-02-26 00:24:37 +00:00
|
|
|
|
2005-05-10 19:23:40 +00:00
|
|
|
// host.max_results_day is between 1 and config.daily_result_quota inclusive
|
2008-01-08 18:58:44 +00:00
|
|
|
// wreq.daily_result_quota is between ncpus
|
|
|
|
// and ncpus*host.max_results_day inclusive
|
|
|
|
//
|
2005-03-19 18:24:24 +00:00
|
|
|
if (config.daily_result_quota) {
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_reply->host.max_results_day == 0 || g_reply->host.max_results_day>config.daily_result_quota) {
|
|
|
|
g_reply->host.max_results_day = config.daily_result_quota;
|
2005-03-19 18:40:11 +00:00
|
|
|
}
|
2009-01-20 00:54:16 +00:00
|
|
|
int mult = max_results_day_multiplier();
|
|
|
|
g_wreq->total_max_results_day = mult*g_reply->host.max_results_day;
|
|
|
|
if (g_reply->host.nresults_today >= g_wreq->total_max_results_day) {
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->daily_result_quota_exceeded = true;
|
2009-03-07 01:00:05 +00:00
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] stopping work search - daily quota exceeded\n"
|
|
|
|
);
|
|
|
|
}
|
2004-09-10 00:41:48 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2007-05-14 15:21:38 +00:00
|
|
|
|
|
|
|
if (config.max_wus_in_progress) {
|
2009-01-20 00:54:16 +00:00
|
|
|
int mult = max_wus_in_progress_multiplier();
|
|
|
|
if (g_wreq->nresults_on_host >= mult*config.max_wus_in_progress) {
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] in-progress job limit exceeded; %d >= %d*%d\n",
|
2009-01-20 00:54:16 +00:00
|
|
|
g_wreq->nresults_on_host, config.max_wus_in_progress, mult
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->cache_size_exceeded = true;
|
2007-05-14 15:21:38 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2009-03-07 01:00:05 +00:00
|
|
|
if (g_wreq->nresults >= config.max_wus_to_send) {
|
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] stopping work search - nresults %d >= max_wus_to_send %d\n",
|
|
|
|
g_wreq->nresults, config.max_wus_to_send
|
|
|
|
);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2009-01-10 00:43:33 +00:00
|
|
|
|
2009-01-23 22:52:35 +00:00
|
|
|
#if 0
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"work_needed: spec req %d sec to fill %.2f; CPU (%.2f, %.2f) CUDA (%.2f, %.2f)\n",
|
|
|
|
g_wreq->rsc_spec_request,
|
|
|
|
g_wreq->seconds_to_fill,
|
|
|
|
g_wreq->cpu_req_secs, g_wreq->cpu_req_instances,
|
|
|
|
g_wreq->cuda_req_secs, g_wreq->cuda_req_instances
|
|
|
|
);
|
|
|
|
#endif
|
2009-01-10 00:43:33 +00:00
|
|
|
if (g_wreq->rsc_spec_request) {
|
2009-01-23 22:52:35 +00:00
|
|
|
if (g_wreq->need_cpu()) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (g_wreq->need_cuda()) {
|
|
|
|
return true;
|
|
|
|
}
|
2009-01-10 00:43:33 +00:00
|
|
|
} else {
|
2009-01-23 22:52:35 +00:00
|
|
|
if (g_wreq->seconds_to_fill > 0) {
|
|
|
|
return true;
|
|
|
|
}
|
2009-01-10 00:43:33 +00:00
|
|
|
}
|
2009-03-07 01:00:05 +00:00
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL, "[send] don't need more work\n");
|
|
|
|
}
|
2009-01-10 00:43:33 +00:00
|
|
|
return false;
|
2004-09-10 00:41:48 +00:00
|
|
|
}
|
|
|
|
|
2009-03-03 16:38:54 +00:00
|
|
|
int add_result_to_reply(
|
|
|
|
DB_RESULT& result, WORKUNIT& wu, BEST_APP_VERSION* bavp,
|
|
|
|
bool locality_scheduling
|
|
|
|
) {
|
2004-09-10 00:41:48 +00:00
|
|
|
int retval;
|
2005-09-12 23:49:16 +00:00
|
|
|
bool resent_result = false;
|
2008-03-27 18:25:29 +00:00
|
|
|
APP* app = ssp->lookup_app(wu.appid);
|
2004-09-10 00:41:48 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
retval = add_wu_to_reply(wu, *g_reply, app, bavp);
|
2004-09-10 00:41:48 +00:00
|
|
|
if (retval) return retval;
|
|
|
|
|
2005-08-04 00:12:50 +00:00
|
|
|
// in the scheduling locality case,
|
|
|
|
// reduce the available space by LESS than the workunit rsc_disk_bound,
|
|
|
|
// IF the host already has the file OR the file was not already sent.
|
2005-02-02 22:58:46 +00:00
|
|
|
//
|
2009-03-03 16:38:54 +00:00
|
|
|
if (!locality_scheduling || decrement_disk_space_locality(wu)) {
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->disk_available -= wu.rsc_disk_bound;
|
2005-02-02 22:58:46 +00:00
|
|
|
}
|
2004-09-10 00:41:48 +00:00
|
|
|
|
|
|
|
// update the result in DB
|
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
result.hostid = g_reply->host.id;
|
|
|
|
result.userid = g_reply->user.id;
|
2005-07-29 08:13:23 +00:00
|
|
|
result.sent_time = time(0);
|
2006-03-17 04:47:51 +00:00
|
|
|
int old_server_state = result.server_state;
|
2005-08-02 19:38:43 +00:00
|
|
|
|
2006-10-22 00:42:44 +00:00
|
|
|
int delay_bound = wu.delay_bound;
|
2005-07-28 22:21:46 +00:00
|
|
|
if (result.server_state != RESULT_SERVER_STATE_IN_PROGRESS) {
|
2007-11-30 23:02:55 +00:00
|
|
|
// We are sending this result for the first time
|
|
|
|
//
|
2007-09-21 18:10:54 +00:00
|
|
|
// If the workunit needs reliable and is being sent to a reliable host,
|
|
|
|
// then shorten the delay bound by the percent specified
|
|
|
|
//
|
2008-03-07 21:13:01 +00:00
|
|
|
if (config.reliable_on_priority && result.priority >= config.reliable_on_priority && config.reliable_reduced_delay_bound > 0.01
|
|
|
|
) {
|
|
|
|
double reduced_delay_bound = delay_bound*config.reliable_reduced_delay_bound;
|
2008-12-15 21:14:32 +00:00
|
|
|
double est_wallclock_duration = estimate_duration(wu, *bavp);
|
2008-03-13 23:35:13 +00:00
|
|
|
// Check to see how reasonable this reduced time is.
|
2008-03-07 21:13:01 +00:00
|
|
|
// Increase it to twice the estimated delay bound
|
|
|
|
// if all the following apply:
|
2008-03-13 23:35:13 +00:00
|
|
|
//
|
2008-03-07 21:13:01 +00:00
|
|
|
// 1) Twice the estimate is longer then the reduced delay bound
|
|
|
|
// 2) Twice the estimate is less then the original delay bound
|
|
|
|
// 3) Twice the estimate is less then the twice the reduced delay bound
|
2008-12-15 21:14:32 +00:00
|
|
|
if (est_wallclock_duration*2 > reduced_delay_bound
|
|
|
|
&& est_wallclock_duration*2 < delay_bound
|
|
|
|
&& est_wallclock_duration*2 < delay_bound*config.reliable_reduced_delay_bound*2
|
|
|
|
) {
|
2008-03-07 21:13:01 +00:00
|
|
|
reduced_delay_bound = est_wallclock_duration*2;
|
2007-09-21 18:10:54 +00:00
|
|
|
}
|
2008-03-07 21:13:01 +00:00
|
|
|
delay_bound = (int) reduced_delay_bound;
|
2007-09-21 18:10:54 +00:00
|
|
|
}
|
|
|
|
|
2006-10-22 00:42:44 +00:00
|
|
|
result.report_deadline = result.sent_time + delay_bound;
|
2005-07-28 22:21:46 +00:00
|
|
|
result.server_state = RESULT_SERVER_STATE_IN_PROGRESS;
|
2005-08-04 00:12:50 +00:00
|
|
|
} else {
|
2007-11-30 23:02:55 +00:00
|
|
|
// Result was already sent to this host but was lost,
|
|
|
|
// so we are resending it.
|
2005-08-02 19:38:43 +00:00
|
|
|
//
|
2005-09-12 23:49:16 +00:00
|
|
|
resent_result = true;
|
|
|
|
|
2007-11-30 23:02:55 +00:00
|
|
|
// TODO: explain the following
|
|
|
|
//
|
2005-08-02 19:38:43 +00:00
|
|
|
if (result.report_deadline < result.sent_time) {
|
2005-08-04 00:12:50 +00:00
|
|
|
result.report_deadline = result.sent_time + 10;
|
|
|
|
}
|
2006-10-22 00:42:44 +00:00
|
|
|
if (result.report_deadline > result.sent_time + delay_bound) {
|
|
|
|
result.report_deadline = result.sent_time + delay_bound;
|
2005-08-04 00:12:50 +00:00
|
|
|
}
|
2005-08-02 19:38:43 +00:00
|
|
|
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [RESULT#%d] [HOST#%d] (resend lost work)\n",
|
2008-12-16 16:29:54 +00:00
|
|
|
result.id, g_reply->host.id
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2005-07-28 22:21:46 +00:00
|
|
|
}
|
2006-03-17 04:47:51 +00:00
|
|
|
retval = result.mark_as_sent(old_server_state);
|
2008-10-27 21:23:07 +00:00
|
|
|
if (retval == ERR_DB_NOT_FOUND) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2006-03-17 14:31:25 +00:00
|
|
|
"[RESULT#%d] [HOST#%d]: CAN'T SEND, already sent to another host\n",
|
2008-12-16 16:29:54 +00:00
|
|
|
result.id, g_reply->host.id
|
2006-03-17 14:31:25 +00:00
|
|
|
);
|
|
|
|
} else if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2005-05-09 20:20:33 +00:00
|
|
|
"add_result_to_reply: can't update result: %d\n", retval
|
|
|
|
);
|
|
|
|
}
|
2006-03-17 14:31:25 +00:00
|
|
|
if (retval) return retval;
|
2004-09-10 00:41:48 +00:00
|
|
|
|
2009-01-10 00:43:33 +00:00
|
|
|
double est_dur = estimate_duration(wu, *bavp);
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-01-10 00:43:33 +00:00
|
|
|
"[HOST#%d] Sending [RESULT#%d %s] (est. dur. %.2f seconds)\n",
|
|
|
|
g_reply->host.id, result.id, result.name, est_dur
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2004-09-10 00:41:48 +00:00
|
|
|
|
2005-07-14 10:02:20 +00:00
|
|
|
retval = update_wu_transition_time(wu, result.report_deadline);
|
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2005-07-14 10:02:20 +00:00
|
|
|
"add_result_to_reply: can't update WU transition time: %d\n",
|
|
|
|
retval
|
|
|
|
);
|
|
|
|
return retval;
|
2004-09-10 00:41:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// The following overwrites the result's xml_doc field.
|
|
|
|
// But that's OK cuz we're done with DB updates
|
|
|
|
//
|
|
|
|
retval = insert_name_tags(result, wu);
|
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2007-02-15 21:11:05 +00:00
|
|
|
"add_result_to_reply: can't insert name tags: %d\n",
|
2005-05-09 20:20:33 +00:00
|
|
|
retval
|
2004-09-10 00:41:48 +00:00
|
|
|
);
|
2005-05-09 20:20:33 +00:00
|
|
|
return retval;
|
2004-09-10 00:41:48 +00:00
|
|
|
}
|
|
|
|
retval = insert_deadline_tag(result);
|
|
|
|
if (retval) {
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_CRITICAL,
|
2007-02-15 21:11:05 +00:00
|
|
|
"add_result_to_reply: can't insert deadline tag: %d\n", retval
|
2004-09-10 00:41:48 +00:00
|
|
|
);
|
2005-05-09 20:20:33 +00:00
|
|
|
return retval;
|
2004-09-10 00:41:48 +00:00
|
|
|
}
|
2008-03-28 19:35:00 +00:00
|
|
|
result.bavp = bavp;
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->insert_result(result);
|
2009-01-10 00:43:33 +00:00
|
|
|
if (g_wreq->rsc_spec_request) {
|
2009-03-05 00:10:16 +00:00
|
|
|
if (bavp->host_usage.ncudas) {
|
2009-01-10 00:43:33 +00:00
|
|
|
g_wreq->cuda_req_secs -= est_dur;
|
2009-03-05 00:10:16 +00:00
|
|
|
g_wreq->cuda_req_instances -= bavp->host_usage.ncudas;
|
2009-01-10 00:43:33 +00:00
|
|
|
} else {
|
|
|
|
g_wreq->cpu_req_secs -= est_dur;
|
|
|
|
g_wreq->cpu_req_instances -= bavp->host_usage.avg_ncpus;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
g_wreq->seconds_to_fill -= est_dur;
|
|
|
|
}
|
2009-01-30 21:25:24 +00:00
|
|
|
update_estimated_delay(*bavp, est_dur);
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->nresults++;
|
|
|
|
g_wreq->nresults_on_host++;
|
|
|
|
if (!resent_result) g_reply->host.nresults_today++;
|
- scheduler: add <workload_sim> config option.
If set, the scheduler will use EDF simulation,
together with the in-progress workload reported by the client,
to avoid sending results that
1) will miss their deadline, or
2) will cause an in-progress result to miss its deadline, or
3) will make an in-progress result miss its deadline
by more than is already predicted.
If this option is not set, or if the client request doesn't
include a workload description (i.e. the client is old)
use the existing approach, which assumes there's no workload.
NOTE: this is experimental. Production projects should not use it.
- EDF sim: write debug stuff to stderr instead of stdout
- Account manager:
- if an account is detach_when_done, set dont_request_more_work
- check done_request_more_work even for first-time projects
- update_uotd: generate a file for use by Google gadget
- user_links(): use full URLs (so can use in Google gadget)
client/
acct_mgr.C
work_fetch.C
html/
inc/
uotd.inc
util.inc
user/
uotd_gadget.php (new)
sched/
Makefile.am
edf_sim.C
sched_config.C,h
sched_resend.C
sched_send.C,h
server_types.C,h
svn path=/trunk/boinc/; revision=12639
2007-05-10 21:50:52 +00:00
|
|
|
|
|
|
|
// add this result to workload for simulation
|
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
if (config.workload_sim && g_request->have_other_results_list) {
|
2008-12-15 21:14:32 +00:00
|
|
|
IP_RESULT ipr ("", time(0)+wu.delay_bound, est_dur);
|
2008-12-16 16:29:54 +00:00
|
|
|
g_request->ip_results.push_back(ipr);
|
- scheduler: add <workload_sim> config option.
If set, the scheduler will use EDF simulation,
together with the in-progress workload reported by the client,
to avoid sending results that
1) will miss their deadline, or
2) will cause an in-progress result to miss its deadline, or
3) will make an in-progress result miss its deadline
by more than is already predicted.
If this option is not set, or if the client request doesn't
include a workload description (i.e. the client is old)
use the existing approach, which assumes there's no workload.
NOTE: this is experimental. Production projects should not use it.
- EDF sim: write debug stuff to stderr instead of stdout
- Account manager:
- if an account is detach_when_done, set dont_request_more_work
- check done_request_more_work even for first-time projects
- update_uotd: generate a file for use by Google gadget
- user_links(): use full URLs (so can use in Google gadget)
client/
acct_mgr.C
work_fetch.C
html/
inc/
uotd.inc
util.inc
user/
uotd_gadget.php (new)
sched/
Makefile.am
edf_sim.C
sched_config.C,h
sched_resend.C
sched_send.C,h
server_types.C,h
svn path=/trunk/boinc/; revision=12639
2007-05-10 21:50:52 +00:00
|
|
|
}
|
2007-07-05 04:18:48 +00:00
|
|
|
|
2008-06-04 23:04:12 +00:00
|
|
|
// mark job as done if debugging flag is set;
|
|
|
|
// this is used by sched_driver.C (performance testing)
|
2007-07-05 04:18:48 +00:00
|
|
|
//
|
|
|
|
if (mark_jobs_done) {
|
|
|
|
DB_WORKUNIT dbwu;
|
|
|
|
char buf[256];
|
|
|
|
sprintf(buf,
|
|
|
|
"server_state=%d outcome=%d",
|
|
|
|
RESULT_SERVER_STATE_OVER, RESULT_OUTCOME_SUCCESS
|
|
|
|
);
|
|
|
|
result.update_field(buf);
|
|
|
|
|
|
|
|
dbwu.id = wu.id;
|
- server code: at some point I made a global var "SCHED_CONFIG config",
mostly so that the parse function could assume
that everything was initially zero.
However, various back-end functions pass around SCHED_CONFIG&
as an argument (also named "config").
This creates a shadow, which is always bad.
Worse is the possibility that some projects have back-end programs
that have a SCHED_CONFIG variable that's automatic,
and therefore isn't zero initially,
and therefore isn't parsing correctly.
To fix this, I changed the 2 vectors in SCHED_CONFIG into pointers,
and have the parse routine zero the structure.
I was tempted to remove the SCHED_CONFIG& args to back-end functions,
but this would have broken some projects' code.
I did, however, change the name from config to config_loc
to avoid shadowing.
Also fixed various other compiler warnings.
svn path=/trunk/boinc/; revision=15541
2008-07-02 17:24:53 +00:00
|
|
|
sprintf(buf, "transition_time=%ld", time(0));
|
2007-07-05 04:18:48 +00:00
|
|
|
dbwu.update_field(buf);
|
|
|
|
|
|
|
|
}
|
2008-06-04 23:04:12 +00:00
|
|
|
|
|
|
|
// If we're sending an unreplicated job to an untrusted host,
|
|
|
|
// mark it as replicated
|
|
|
|
//
|
2008-08-06 18:36:30 +00:00
|
|
|
if (wu.target_nresults == 1 && app->target_nresults > 1) {
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_wreq->trust) {
|
2008-08-06 18:36:30 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [WU#%d] sending to trusted host, not replicating\n", wu.id
|
2008-08-06 18:36:30 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
DB_WORKUNIT dbwu;
|
|
|
|
char buf[256];
|
2008-11-07 04:13:08 +00:00
|
|
|
sprintf(buf,
|
|
|
|
"target_nresults=%d, min_quorum=%d, transition_time=%ld",
|
2008-08-06 18:36:30 +00:00
|
|
|
app->target_nresults, app->target_nresults, time(0)
|
2008-06-04 23:04:12 +00:00
|
|
|
);
|
2008-08-06 18:36:30 +00:00
|
|
|
dbwu.id = wu.id;
|
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [WU#%d] sending to untrusted host, replicating\n", wu.id
|
2008-08-06 18:36:30 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
retval = dbwu.update_field(buf);
|
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"WU update failed: %d", retval
|
|
|
|
);
|
|
|
|
}
|
2008-06-04 23:04:12 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-09-10 00:41:48 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-04-26 23:34:38 +00:00
|
|
|
// send messages to user about why jobs were or weren't sent
|
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
static void explain_to_user() {
|
2007-08-16 17:33:41 +00:00
|
|
|
char helpful[512];
|
2008-04-30 20:31:33 +00:00
|
|
|
unsigned int i;
|
- server code: at some point I made a global var "SCHED_CONFIG config",
mostly so that the parse function could assume
that everything was initially zero.
However, various back-end functions pass around SCHED_CONFIG&
as an argument (also named "config").
This creates a shadow, which is always bad.
Worse is the possibility that some projects have back-end programs
that have a SCHED_CONFIG variable that's automatic,
and therefore isn't zero initially,
and therefore isn't parsing correctly.
To fix this, I changed the 2 vectors in SCHED_CONFIG into pointers,
and have the parse routine zero the structure.
I was tempted to remove the SCHED_CONFIG& args to back-end functions,
but this would have broken some projects' code.
I did, however, change the name from config to config_loc
to avoid shadowing.
Also fixed various other compiler warnings.
svn path=/trunk/boinc/; revision=15541
2008-07-02 17:24:53 +00:00
|
|
|
int j;
|
2007-09-21 18:10:54 +00:00
|
|
|
|
2008-08-14 22:06:51 +00:00
|
|
|
// If work was sent from apps the user did not select, explain.
|
|
|
|
// NOTE: this will have to be done differently with matchmaker scheduling
|
2008-03-07 21:13:01 +00:00
|
|
|
//
|
2008-08-14 22:06:51 +00:00
|
|
|
if (!config.locality_scheduling && !config.matchmaker) {
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_wreq->nresults && !g_wreq->user_apps_only) {
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(
|
|
|
|
USER_MESSAGE(
|
|
|
|
"No work can be sent for the applications you have selected",
|
|
|
|
"high"
|
|
|
|
)
|
2008-08-14 22:06:51 +00:00
|
|
|
);
|
2008-03-07 21:13:01 +00:00
|
|
|
|
2008-08-14 22:06:51 +00:00
|
|
|
// Inform the user about applications with no work
|
|
|
|
//
|
2008-12-18 21:25:51 +00:00
|
|
|
for (i=0; i<g_wreq->preferred_apps.size(); i++) {
|
|
|
|
if (!g_wreq->preferred_apps[i].work_available) {
|
|
|
|
APP* app = ssp->lookup_app(g_wreq->preferred_apps[i].appid);
|
2008-08-14 22:06:51 +00:00
|
|
|
// don't write message if the app is deprecated
|
|
|
|
//
|
|
|
|
if (app) {
|
|
|
|
char explanation[256];
|
|
|
|
sprintf(explanation,
|
|
|
|
"No work is available for %s",
|
2008-12-18 21:25:51 +00:00
|
|
|
find_user_friendly_name(g_wreq->preferred_apps[i].appid)
|
2008-08-14 22:06:51 +00:00
|
|
|
);
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(
|
|
|
|
USER_MESSAGE(explanation, "high")
|
|
|
|
);
|
2008-08-14 22:06:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2008-03-07 21:13:01 +00:00
|
|
|
|
2008-08-14 22:06:51 +00:00
|
|
|
// Tell the user about applications they didn't qualify for
|
|
|
|
//
|
|
|
|
for (j=0; j<preferred_app_message_index; j++){
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->insert_message(g_wreq->no_work_messages.at(j));
|
2008-08-14 22:06:51 +00:00
|
|
|
}
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(
|
|
|
|
USER_MESSAGE(
|
|
|
|
"You have selected to receive work from other applications if no work is available for the applications you selected",
|
|
|
|
"high"
|
|
|
|
)
|
|
|
|
);
|
|
|
|
g_reply->insert_message(
|
|
|
|
USER_MESSAGE("Sending work from other applications", "high")
|
2008-08-14 22:06:51 +00:00
|
|
|
);
|
2008-03-07 21:13:01 +00:00
|
|
|
}
|
2008-08-14 22:06:51 +00:00
|
|
|
}
|
2008-03-07 21:13:01 +00:00
|
|
|
|
2007-08-16 17:33:41 +00:00
|
|
|
// if client asked for work and we're not sending any, explain why
|
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_wreq->nresults == 0) {
|
|
|
|
g_reply->set_delay(DELAY_NO_WORK_TEMP);
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(USER_MESSAGE("No work sent", "high"));
|
2009-01-20 21:31:13 +00:00
|
|
|
|
|
|
|
// Tell the user about applications with no work
|
|
|
|
//
|
2008-12-18 21:25:51 +00:00
|
|
|
for (i=0; i<g_wreq->preferred_apps.size(); i++) {
|
|
|
|
if (!g_wreq->preferred_apps[i].work_available) {
|
|
|
|
APP* app = ssp->lookup_app(g_wreq->preferred_apps[i].appid);
|
2008-03-07 21:13:01 +00:00
|
|
|
// don't write message if the app is deprecated
|
2008-10-27 21:23:07 +00:00
|
|
|
if (app != NULL) {
|
2008-03-07 21:13:01 +00:00
|
|
|
char explanation[256];
|
2008-10-27 21:23:07 +00:00
|
|
|
sprintf(explanation, "No work is available for %s",
|
2008-12-18 21:25:51 +00:00
|
|
|
find_user_friendly_name(g_wreq->preferred_apps[i].appid)
|
2008-10-27 21:23:07 +00:00
|
|
|
);
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(USER_MESSAGE(explanation, "high"));
|
2008-03-07 21:13:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2009-01-20 21:31:13 +00:00
|
|
|
|
|
|
|
// Tell the user about applications they didn't qualify for
|
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
for (i=0; i<g_wreq->no_work_messages.size(); i++){
|
|
|
|
g_reply->insert_message(g_wreq->no_work_messages.at(i));
|
2008-03-07 21:13:01 +00:00
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_wreq->no_allowed_apps_available) {
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(
|
|
|
|
USER_MESSAGE(
|
|
|
|
"No work available for the applications you have selected. Please check your settings on the web site.",
|
|
|
|
"high"
|
|
|
|
)
|
2006-05-02 22:17:09 +00:00
|
|
|
);
|
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_wreq->speed.insufficient) {
|
|
|
|
if (g_request->core_client_version>419) {
|
2005-03-19 18:24:24 +00:00
|
|
|
sprintf(helpful,
|
|
|
|
"(won't finish in time) "
|
2008-05-20 21:47:02 +00:00
|
|
|
"BOINC runs %.1f%% of time, computation enabled %.1f%% of that",
|
2008-12-16 16:29:54 +00:00
|
|
|
100.0*g_reply->host.on_frac, 100.0*g_reply->host.active_frac
|
2005-03-19 18:24:24 +00:00
|
|
|
);
|
2007-08-16 17:33:41 +00:00
|
|
|
} else {
|
2005-03-19 18:24:24 +00:00
|
|
|
sprintf(helpful,
|
|
|
|
"(won't finish in time) "
|
2007-08-21 20:07:50 +00:00
|
|
|
"Computer available %.1f%% of time",
|
2008-12-16 16:29:54 +00:00
|
|
|
100.0*g_reply->host.on_frac
|
2005-03-19 18:24:24 +00:00
|
|
|
);
|
|
|
|
}
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(USER_MESSAGE(helpful, "high"));
|
2004-04-04 01:59:47 +00:00
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_wreq->hr_reject_temp) {
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(
|
|
|
|
USER_MESSAGE(
|
|
|
|
"(there was work but it was committed to other platforms)",
|
|
|
|
"high"
|
|
|
|
)
|
2004-05-18 18:33:01 +00:00
|
|
|
);
|
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_wreq->hr_reject_perm) {
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(
|
|
|
|
USER_MESSAGE(
|
|
|
|
"(your platform is not supported by this project)",
|
|
|
|
"high"
|
|
|
|
)
|
2005-11-30 22:52:23 +00:00
|
|
|
);
|
|
|
|
}
|
2009-01-20 21:31:13 +00:00
|
|
|
if (g_wreq->outdated_client) {
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(
|
|
|
|
USER_MESSAGE(
|
|
|
|
" (your BOINC client is old - please install current version)",
|
|
|
|
"high"
|
|
|
|
)
|
2004-04-04 01:59:47 +00:00
|
|
|
);
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->set_delay(DELAY_NO_WORK_PERM);
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2007-09-21 18:10:54 +00:00
|
|
|
"Not sending work because client is outdated\n"
|
2004-04-04 01:59:47 +00:00
|
|
|
);
|
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_wreq->excessive_work_buf) {
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(
|
|
|
|
USER_MESSAGE(
|
|
|
|
"(Your network connection interval is longer than WU deadline)",
|
|
|
|
"high"
|
|
|
|
)
|
2005-04-18 18:42:29 +00:00
|
|
|
);
|
|
|
|
}
|
2009-01-12 05:28:36 +00:00
|
|
|
if (g_wreq->gpu_too_slow) {
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(
|
|
|
|
USER_MESSAGE(
|
|
|
|
"Not sending CUDA jobs because slow GPUs can cause crashes on Windows",
|
|
|
|
"low"
|
|
|
|
)
|
2009-01-12 05:28:36 +00:00
|
|
|
);
|
|
|
|
}
|
2009-01-12 23:47:52 +00:00
|
|
|
if (g_wreq->no_gpus_prefs) {
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(
|
|
|
|
USER_MESSAGE(
|
|
|
|
"CUDA (GPU) jobs are available, but your preferences are set to not accept them",
|
|
|
|
"low"
|
|
|
|
)
|
2009-01-12 23:47:52 +00:00
|
|
|
);
|
|
|
|
}
|
2009-03-18 21:14:44 +00:00
|
|
|
if (g_wreq->no_cpu_prefs) {
|
|
|
|
g_reply->insert_message(
|
|
|
|
USER_MESSAGE(
|
|
|
|
"CPU jobs are available, but your preferences are set to not accept them",
|
|
|
|
"low"
|
|
|
|
)
|
|
|
|
);
|
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_wreq->daily_result_quota_exceeded) {
|
2005-05-12 21:04:39 +00:00
|
|
|
struct tm *rpc_time_tm;
|
|
|
|
int delay_time;
|
|
|
|
|
2009-01-20 00:54:16 +00:00
|
|
|
sprintf(helpful, "(reached daily quota of %d results)",
|
|
|
|
g_wreq->total_max_results_day
|
|
|
|
);
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(USER_MESSAGE(helpful, "high"));
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2004-05-27 18:13:00 +00:00
|
|
|
"Daily result quota exceeded for host %d\n",
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->host.id
|
2004-05-27 18:13:00 +00:00
|
|
|
);
|
2005-05-12 21:04:39 +00:00
|
|
|
|
|
|
|
// set delay so host won't return until a random time in
|
2008-06-30 23:05:16 +00:00
|
|
|
// the first hour of the next day.
|
|
|
|
// This is to prevent a lot of hosts from flooding the scheduler
|
|
|
|
// with requests at the same time of day.
|
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
time_t t = g_reply->host.rpc_time;
|
2008-06-30 23:05:16 +00:00
|
|
|
rpc_time_tm = localtime(&t);
|
|
|
|
delay_time = (23 - rpc_time_tm->tm_hour) * 3600
|
|
|
|
+ (59 - rpc_time_tm->tm_min) * 60
|
|
|
|
+ (60 - rpc_time_tm->tm_sec)
|
|
|
|
+ (int)(3600*(double)rand()/(double)RAND_MAX);
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->set_delay(delay_time);
|
2004-05-27 18:13:00 +00:00
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_wreq->cache_size_exceeded) {
|
2008-03-30 08:05:45 +00:00
|
|
|
sprintf(helpful, "(reached per-CPU limit of %d tasks)",
|
2007-05-14 15:21:38 +00:00
|
|
|
config.max_wus_in_progress
|
|
|
|
);
|
2009-02-26 03:03:35 +00:00
|
|
|
g_reply->insert_message(USER_MESSAGE(helpful, "high"));
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->set_delay(DELAY_NO_WORK_CACHE);
|
2008-02-21 21:00:58 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2008-04-26 23:34:38 +00:00
|
|
|
"host %d already has %d result(s) in progress\n",
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->host.id, g_wreq->nresults_on_host
|
2007-05-14 15:21:38 +00:00
|
|
|
);
|
|
|
|
}
|
2004-04-04 01:59:47 +00:00
|
|
|
}
|
|
|
|
}
|
2004-12-08 00:40:19 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
static void send_work_old() {
|
2008-12-22 00:10:02 +00:00
|
|
|
g_wreq->no_jobs_available = true;
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->beta_only = false;
|
|
|
|
g_wreq->user_apps_only = true;
|
2009-03-03 16:46:47 +00:00
|
|
|
g_wreq->infeasible_only = false;
|
2008-05-23 16:13:30 +00:00
|
|
|
|
|
|
|
// give top priority to results that require a 'reliable host'
|
|
|
|
//
|
2008-12-18 21:25:51 +00:00
|
|
|
if (g_wreq->reliable) {
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->reliable_only = true;
|
2008-12-19 18:14:02 +00:00
|
|
|
scan_work_array();
|
2008-05-23 16:13:30 +00:00
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->reliable_only = false;
|
2008-05-23 16:13:30 +00:00
|
|
|
|
|
|
|
// give 2nd priority to results for a beta app
|
|
|
|
// (projects should load beta work with care,
|
|
|
|
// otherwise your users won't get production work done!
|
|
|
|
//
|
2008-12-18 21:25:51 +00:00
|
|
|
if (g_wreq->allow_beta_work) {
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->beta_only = true;
|
2008-05-23 16:13:30 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [HOST#%d] will accept beta work. Scanning for beta work.\n",
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->host.id
|
2008-05-23 16:13:30 +00:00
|
|
|
);
|
|
|
|
}
|
2008-12-19 18:14:02 +00:00
|
|
|
scan_work_array();
|
2008-05-23 16:13:30 +00:00
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->beta_only = false;
|
2008-05-23 16:13:30 +00:00
|
|
|
|
|
|
|
// give next priority to results that were infeasible for some other host
|
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->infeasible_only = true;
|
2008-12-19 18:14:02 +00:00
|
|
|
scan_work_array();
|
2008-05-23 16:13:30 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->infeasible_only = false;
|
2008-12-19 18:14:02 +00:00
|
|
|
scan_work_array();
|
2008-05-23 16:13:30 +00:00
|
|
|
|
|
|
|
// If user has selected apps but will accept any,
|
|
|
|
// and we haven't found any jobs for selected apps, try others
|
|
|
|
//
|
2008-12-18 21:25:51 +00:00
|
|
|
if (!g_wreq->nresults && g_wreq->allow_non_preferred_apps ) {
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->user_apps_only = false;
|
|
|
|
preferred_app_message_index = g_wreq->no_work_messages.size();
|
2008-05-23 16:13:30 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [HOST#%d] is looking for work from a non-preferred application\n",
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->host.id
|
2008-05-23 16:13:30 +00:00
|
|
|
);
|
|
|
|
}
|
2008-12-19 18:14:02 +00:00
|
|
|
scan_work_array();
|
2008-05-23 16:13:30 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-06-04 23:04:12 +00:00
|
|
|
#define ER_MAX 0.05
|
|
|
|
// decide whether to unreplicated jobs to this host
|
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
void set_trust() {
|
|
|
|
g_wreq->trust = false;
|
|
|
|
if (g_reply->host.error_rate > ER_MAX) {
|
2008-09-17 23:35:16 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] set_trust: error rate %f > %f, don't trust\n",
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->host.error_rate, ER_MAX
|
2008-09-17 23:35:16 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
double x = sqrt(g_reply->host.error_rate/ER_MAX);
|
|
|
|
if (drand() > x) g_wreq->trust = true;
|
2008-09-17 23:35:16 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] set_trust: random choice for error rate %f: %s\n",
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->host.error_rate, g_wreq->trust?"yes":"no"
|
2008-09-17 23:35:16 +00:00
|
|
|
);
|
|
|
|
}
|
2008-06-04 23:04:12 +00:00
|
|
|
}
|
|
|
|
|
2009-01-10 00:43:33 +00:00
|
|
|
static double clamp_req_sec(double x) {
|
|
|
|
if (x < MIN_REQ_SECS) return MIN_REQ_SECS;
|
|
|
|
if (x > MAX_REQ_SECS) return MAX_REQ_SECS;
|
|
|
|
return x;
|
|
|
|
}
|
|
|
|
|
2009-03-05 23:08:53 +00:00
|
|
|
// decipher request type, fill in WORK_REQ
|
|
|
|
//
|
|
|
|
void send_work_setup() {
|
|
|
|
g_wreq->disk_available = max_allowable_disk();
|
|
|
|
get_mem_sizes();
|
|
|
|
get_running_frac();
|
|
|
|
get_dcf();
|
|
|
|
|
2009-01-10 00:43:33 +00:00
|
|
|
g_wreq->seconds_to_fill = clamp_req_sec(g_request->work_req_seconds);
|
|
|
|
g_wreq->cpu_req_secs = clamp_req_sec(g_request->cpu_req_secs);
|
|
|
|
g_wreq->cpu_req_instances = g_request->cpu_req_instances;
|
2009-01-24 21:51:19 +00:00
|
|
|
g_wreq->anonymous_platform = anonymous(g_request->platforms.list[0]);
|
|
|
|
|
2009-01-30 22:30:35 +00:00
|
|
|
if (g_request->coproc_cuda) {
|
2009-03-05 23:08:53 +00:00
|
|
|
g_wreq->cuda_req_secs = clamp_req_sec(g_request->coproc_cuda->req_secs);
|
|
|
|
g_wreq->cuda_req_instances = g_request->coproc_cuda->req_instances;
|
2009-01-30 22:30:35 +00:00
|
|
|
if (g_request->coproc_cuda->estimated_delay < 0) {
|
|
|
|
g_request->coproc_cuda->estimated_delay = g_request->cpu_estimated_delay;
|
2009-01-24 21:51:19 +00:00
|
|
|
}
|
2009-01-10 00:43:33 +00:00
|
|
|
}
|
|
|
|
if (g_wreq->cpu_req_secs || g_wreq->cuda_req_secs) {
|
|
|
|
g_wreq->rsc_spec_request = true;
|
|
|
|
} else {
|
2009-01-13 00:56:12 +00:00
|
|
|
g_wreq->rsc_spec_request = false;
|
2009-01-10 00:43:33 +00:00
|
|
|
}
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-01-30 21:25:24 +00:00
|
|
|
"[send] CPU: req %.2f sec, %.2f instances; est delay %.2f\n",
|
|
|
|
g_wreq->cpu_req_secs, g_wreq->cpu_req_instances,
|
|
|
|
g_request->cpu_estimated_delay
|
2009-01-10 00:43:33 +00:00
|
|
|
);
|
2009-01-30 21:25:24 +00:00
|
|
|
if (g_request->coproc_cuda) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] CUDA: req %.2f sec, %.2f instances; est delay %.2f\n",
|
|
|
|
g_wreq->cuda_req_secs, g_wreq->cuda_req_instances,
|
|
|
|
g_request->coproc_cuda->estimated_delay
|
|
|
|
);
|
|
|
|
}
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] work_req_seconds: %.2f secs\n",
|
2009-01-10 00:43:33 +00:00
|
|
|
g_wreq->seconds_to_fill
|
|
|
|
);
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] %s matchmaker scheduling; %s EDF sim\n",
|
2008-05-23 16:13:30 +00:00
|
|
|
config.matchmaker?"Using":"Not using",
|
2008-05-02 17:48:29 +00:00
|
|
|
config.workload_sim?"Using":"Not using"
|
|
|
|
);
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-01-27 18:57:27 +00:00
|
|
|
"[send] available disk %.2f GB, work_buf_min %d\n",
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->disk_available/GIGA,
|
|
|
|
(int)g_request->global_prefs.work_buf_min()
|
2008-05-02 17:48:29 +00:00
|
|
|
);
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2009-01-30 21:25:24 +00:00
|
|
|
"[send] active_frac %f on_frac %f DCF %f\n",
|
2008-12-18 18:19:42 +00:00
|
|
|
g_reply->host.active_frac,
|
|
|
|
g_reply->host.on_frac,
|
2009-01-30 21:25:24 +00:00
|
|
|
g_reply->host.duration_correction_factor
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2009-03-05 23:08:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void send_work() {
|
|
|
|
if (!g_wreq->rsc_spec_request && g_wreq->seconds_to_fill == 0) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (all_apps_use_hr && hr_unknown_platform(g_request->host)) {
|
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"Not sending work because unknown HR class\n"
|
|
|
|
);
|
|
|
|
g_wreq->hr_reject_perm = true;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
get_host_info();
|
|
|
|
get_prefs_info();
|
|
|
|
|
|
|
|
set_trust();
|
2008-04-26 23:34:38 +00:00
|
|
|
|
|
|
|
if (config.enable_assignment) {
|
2008-12-19 18:14:02 +00:00
|
|
|
if (send_assigned_jobs()) {
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_assignment) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[assign] [HOST#%d] sent assigned jobs\n", g_reply->host.id
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
if (config.workload_sim && g_request->have_other_results_list) {
|
2008-04-26 23:34:38 +00:00
|
|
|
init_ip_results(
|
2008-12-16 16:29:54 +00:00
|
|
|
g_request->global_prefs.work_buf_min(), effective_ncpus(), g_request->ip_results
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2009-03-03 00:12:55 +00:00
|
|
|
if (config.locality_scheduler_fraction > 0) {
|
|
|
|
if (drand() < config.locality_scheduler_fraction) {
|
|
|
|
send_work_locality();
|
|
|
|
send_work_old();
|
|
|
|
} else {
|
|
|
|
send_work_old();
|
|
|
|
send_work_locality();
|
|
|
|
}
|
|
|
|
} else if (config.locality_scheduling) {
|
2008-12-19 18:14:02 +00:00
|
|
|
send_work_locality();
|
2008-05-23 16:13:30 +00:00
|
|
|
} else if (config.matchmaker) {
|
2008-12-16 16:29:54 +00:00
|
|
|
send_work_matchmaker();
|
2008-05-23 16:13:30 +00:00
|
|
|
} else {
|
2008-12-16 16:29:54 +00:00
|
|
|
send_work_old();
|
2008-04-26 23:34:38 +00:00
|
|
|
}
|
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
explain_to_user();
|
2008-04-26 23:34:38 +00:00
|
|
|
}
|
|
|
|
|
2008-05-23 16:13:30 +00:00
|
|
|
// Matchmaker scheduling code follows
|
2008-02-27 22:26:37 +00:00
|
|
|
|
2008-09-17 23:35:16 +00:00
|
|
|
struct JOB {
|
2008-02-27 22:26:37 +00:00
|
|
|
int index;
|
2008-04-30 20:31:33 +00:00
|
|
|
double score;
|
2008-02-27 22:26:37 +00:00
|
|
|
double est_time;
|
|
|
|
double disk_usage;
|
|
|
|
APP* app;
|
2008-04-26 23:34:38 +00:00
|
|
|
BEST_APP_VERSION* bavp;
|
2008-02-27 22:26:37 +00:00
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
bool get_score();
|
2008-02-27 22:26:37 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct JOB_SET {
|
|
|
|
double work_req;
|
|
|
|
double est_time;
|
|
|
|
double disk_usage;
|
|
|
|
double disk_limit;
|
2008-10-01 20:58:28 +00:00
|
|
|
int max_jobs;
|
2008-02-27 22:26:37 +00:00
|
|
|
std::list<JOB> jobs; // sorted high to low
|
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
JOB_SET() {
|
|
|
|
work_req = g_request->work_req_seconds;
|
2008-09-17 23:35:16 +00:00
|
|
|
est_time = 0;
|
|
|
|
disk_usage = 0;
|
2008-12-16 16:29:54 +00:00
|
|
|
disk_limit = g_wreq->disk_available;
|
2008-10-01 20:58:28 +00:00
|
|
|
max_jobs = config.max_wus_to_send;
|
2009-01-20 00:54:16 +00:00
|
|
|
int n;
|
2008-10-01 20:58:28 +00:00
|
|
|
|
|
|
|
if (config.daily_result_quota) {
|
2009-01-20 00:54:16 +00:00
|
|
|
int mult = max_results_day_multiplier();
|
2008-12-16 16:29:54 +00:00
|
|
|
if (g_reply->host.max_results_day == 0 || g_reply->host.max_results_day>config.daily_result_quota) {
|
|
|
|
g_reply->host.max_results_day = config.daily_result_quota;
|
2008-10-01 20:58:28 +00:00
|
|
|
}
|
2009-01-20 00:54:16 +00:00
|
|
|
g_wreq->total_max_results_day = mult*g_reply->host.max_results_day;
|
|
|
|
n = g_wreq->total_max_results_day - g_reply->host.nresults_today;
|
2008-10-01 20:58:28 +00:00
|
|
|
if (n < 0) n = 0;
|
|
|
|
if (n < max_jobs) max_jobs = n;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (config.max_wus_in_progress) {
|
2009-01-20 00:54:16 +00:00
|
|
|
int mult = max_wus_in_progress_multiplier();
|
|
|
|
n = config.max_wus_in_progress*mult - g_wreq->nresults_on_host;
|
2008-10-01 20:58:28 +00:00
|
|
|
if (n < 0) n = 0;
|
|
|
|
if (n < max_jobs) max_jobs = n;
|
|
|
|
}
|
2008-09-17 23:35:16 +00:00
|
|
|
}
|
2008-02-27 22:26:37 +00:00
|
|
|
void add_job(JOB&);
|
2008-04-30 20:31:33 +00:00
|
|
|
double higher_score_disk_usage(double);
|
|
|
|
double lowest_score();
|
2008-02-27 22:26:37 +00:00
|
|
|
inline bool request_satisfied() {
|
|
|
|
return est_time >= work_req;
|
|
|
|
}
|
2008-12-16 16:29:54 +00:00
|
|
|
void send();
|
2008-02-27 22:26:37 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
// reread result from DB, make sure it's still unsent
|
|
|
|
// TODO: from here to add_result_to_reply()
|
|
|
|
// (which updates the DB record) should be a transaction
|
|
|
|
//
|
|
|
|
int read_sendable_result(DB_RESULT& result) {
|
|
|
|
int retval = result.lookup_id(result.id);
|
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"[RESULT#%d] result.lookup_id() failed %d\n",
|
|
|
|
result.id, retval
|
|
|
|
);
|
|
|
|
return ERR_NOT_FOUND;
|
|
|
|
}
|
|
|
|
if (result.server_state != RESULT_SERVER_STATE_UNSENT) {
|
2008-04-26 23:34:38 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
2008-02-27 22:26:37 +00:00
|
|
|
"[RESULT#%d] expected to be unsent; instead, state is %d\n",
|
|
|
|
result.id, result.server_state
|
|
|
|
);
|
|
|
|
return ERR_BAD_RESULT_STATE;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2008-02-25 18:05:04 +00:00
|
|
|
|
2008-04-30 20:31:33 +00:00
|
|
|
// compute a "score" for sending this job to this host.
|
2008-09-17 23:35:16 +00:00
|
|
|
// Return false if the WU is infeasible.
|
|
|
|
// Otherwise set est_time and disk_usage.
|
2008-02-25 18:05:04 +00:00
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
bool JOB::get_score() {
|
2008-02-25 18:05:04 +00:00
|
|
|
WORKUNIT wu;
|
|
|
|
int retval;
|
|
|
|
|
2008-02-27 22:26:37 +00:00
|
|
|
WU_RESULT& wu_result = ssp->wu_results[index];
|
2008-02-25 18:05:04 +00:00
|
|
|
wu = wu_result.workunit;
|
2008-05-01 22:11:08 +00:00
|
|
|
app = ssp->lookup_app(wu.appid);
|
2008-02-25 18:05:04 +00:00
|
|
|
|
2008-04-30 20:31:33 +00:00
|
|
|
score = 0;
|
2008-02-27 22:26:37 +00:00
|
|
|
|
2008-05-06 04:20:32 +00:00
|
|
|
// Find the app_version for the client's platform.
|
2008-02-25 18:05:04 +00:00
|
|
|
//
|
2009-03-07 01:00:05 +00:00
|
|
|
bavp = get_app_version(wu, true);
|
2008-05-06 20:09:07 +00:00
|
|
|
if (!bavp) return false;
|
2008-02-25 18:05:04 +00:00
|
|
|
|
2008-12-15 21:14:32 +00:00
|
|
|
retval = wu_is_infeasible_fast(wu, *app, *bavp);
|
2008-02-25 18:05:04 +00:00
|
|
|
if (retval) {
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [HOST#%d] [WU#%d %s] WU is infeasible: %s\n",
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->host.id, wu.id, wu.name, infeasible_string(retval)
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2008-05-06 20:09:07 +00:00
|
|
|
return false;
|
2008-02-25 18:05:04 +00:00
|
|
|
}
|
|
|
|
|
2008-04-30 20:31:33 +00:00
|
|
|
score = 1;
|
|
|
|
|
2008-05-23 16:13:30 +00:00
|
|
|
// check if user has selected apps,
|
|
|
|
// and send beta work to beta users
|
2008-04-30 20:31:33 +00:00
|
|
|
//
|
2008-09-17 23:35:16 +00:00
|
|
|
if (app->beta && !config.distinct_beta_apps) {
|
2008-12-18 21:25:51 +00:00
|
|
|
if (g_wreq->allow_beta_work) {
|
2008-05-23 16:13:30 +00:00
|
|
|
score += 1;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else {
|
2008-12-16 16:29:54 +00:00
|
|
|
if (app_not_selected(wu)) {
|
2008-12-18 21:25:51 +00:00
|
|
|
if (!g_wreq->allow_non_preferred_apps) {
|
2008-05-06 20:09:07 +00:00
|
|
|
return false;
|
2008-05-23 16:13:30 +00:00
|
|
|
} else {
|
|
|
|
// Allow work to be sent, but it will not get a bump in its score
|
2008-04-30 20:31:33 +00:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
score += 1;
|
2008-02-25 18:05:04 +00:00
|
|
|
}
|
2008-04-30 20:31:33 +00:00
|
|
|
}
|
2008-05-23 16:13:30 +00:00
|
|
|
|
2008-04-30 20:31:33 +00:00
|
|
|
// if job needs to get done fast, send to fast/reliable host
|
|
|
|
//
|
2008-12-18 21:25:51 +00:00
|
|
|
if (g_wreq->reliable && (wu_result.need_reliable)) {
|
2008-04-30 20:31:33 +00:00
|
|
|
score += 1;
|
2008-02-25 18:05:04 +00:00
|
|
|
}
|
|
|
|
|
2008-04-30 20:31:33 +00:00
|
|
|
// if job already committed to an HR class,
|
|
|
|
// try to send to host in that class
|
|
|
|
//
|
2008-02-25 18:05:04 +00:00
|
|
|
if (wu_result.infeasible_count) {
|
2008-04-30 20:31:33 +00:00
|
|
|
score += 1;
|
2008-02-25 18:05:04 +00:00
|
|
|
}
|
2008-04-30 20:31:33 +00:00
|
|
|
|
2008-05-06 04:20:32 +00:00
|
|
|
// Favor jobs that will run fast
|
|
|
|
//
|
|
|
|
score += bavp->host_usage.flops/1e9;
|
|
|
|
|
2008-05-06 20:09:07 +00:00
|
|
|
// match large jobs to fast hosts
|
|
|
|
//
|
|
|
|
if (config.job_size_matching) {
|
2008-12-16 16:29:54 +00:00
|
|
|
double host_stdev = (g_reply->host.p_fpops - ssp->perf_info.host_fpops_mean)/ ssp->perf_info.host_fpops_stdev;
|
2008-05-06 20:09:07 +00:00
|
|
|
double diff = host_stdev - wu_result.fpops_size;
|
|
|
|
score -= diff*diff;
|
|
|
|
}
|
|
|
|
|
2008-05-06 04:20:32 +00:00
|
|
|
// TODO: If user has selected some apps but will accept jobs from others,
|
2008-04-30 20:31:33 +00:00
|
|
|
// try to send them jobs from the selected apps
|
|
|
|
//
|
2008-05-06 20:09:07 +00:00
|
|
|
|
2008-12-15 21:14:32 +00:00
|
|
|
est_time = estimate_duration(wu, *bavp);
|
2008-09-17 23:35:16 +00:00
|
|
|
disk_usage = wu.rsc_disk_bound;
|
2008-05-06 20:09:07 +00:00
|
|
|
return true;
|
2008-02-25 18:05:04 +00:00
|
|
|
}
|
|
|
|
|
2008-02-27 22:26:37 +00:00
|
|
|
bool wu_is_infeasible_slow(
|
|
|
|
WU_RESULT& wu_result, SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply
|
|
|
|
) {
|
2008-02-25 18:05:04 +00:00
|
|
|
char buf[256];
|
|
|
|
int retval;
|
|
|
|
int n;
|
|
|
|
DB_RESULT result;
|
|
|
|
|
|
|
|
// Don't send if we've already sent a result of this WU to this user.
|
|
|
|
//
|
|
|
|
if (config.one_result_per_user_per_wu) {
|
|
|
|
sprintf(buf,
|
|
|
|
"where workunitid=%d and userid=%d",
|
2008-12-16 16:29:54 +00:00
|
|
|
wu_result.workunit.id, g_reply->user.id
|
2008-02-25 18:05:04 +00:00
|
|
|
);
|
|
|
|
retval = result.count(n, buf);
|
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"send_work: can't get result count (%d)\n", retval
|
|
|
|
);
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
if (n>0) {
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] send_work: user %d already has %d result(s) for WU %d\n",
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->user.id, n, wu_result.workunit.id
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2008-02-25 18:05:04 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (config.one_result_per_host_per_wu) {
|
|
|
|
// Don't send if we've already sent a result
|
|
|
|
// of this WU to this host.
|
|
|
|
// We only have to check this
|
|
|
|
// if we don't send one result per user.
|
|
|
|
//
|
|
|
|
sprintf(buf,
|
|
|
|
"where workunitid=%d and hostid=%d",
|
2008-12-16 16:29:54 +00:00
|
|
|
wu_result.workunit.id, g_reply->host.id
|
2008-02-25 18:05:04 +00:00
|
|
|
);
|
|
|
|
retval = result.count(n, buf);
|
|
|
|
if (retval) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"send_work: can't get result count (%d)\n", retval
|
|
|
|
);
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
if (n>0) {
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] send_work: host %d already has %d result(s) for WU %d\n",
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->host.id, n, wu_result.workunit.id
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
return true;
|
2008-02-25 18:05:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
APP* app = ssp->lookup_app(wu_result.workunit.appid);
|
|
|
|
WORKUNIT wu = wu_result.workunit;
|
|
|
|
if (app_hr_type(*app)) {
|
2008-12-19 18:14:02 +00:00
|
|
|
if (already_sent_to_different_platform_careful(wu, *app)) {
|
2008-04-26 23:34:38 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] [HOST#%d] [WU#%d %s] WU is infeasible (assigned to different platform)\n",
|
2008-12-16 16:29:54 +00:00
|
|
|
g_reply->host.id, wu.id, wu.name
|
2008-04-26 23:34:38 +00:00
|
|
|
);
|
|
|
|
}
|
2008-02-25 18:05:04 +00:00
|
|
|
// Mark the workunit as infeasible.
|
|
|
|
// This ensures that jobs already assigned to a platform
|
|
|
|
// are processed first.
|
|
|
|
//
|
|
|
|
wu_result.infeasible_count++;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2008-04-30 20:31:33 +00:00
|
|
|
double JOB_SET::lowest_score() {
|
2008-02-27 22:26:37 +00:00
|
|
|
if (jobs.empty()) return 0;
|
2008-04-30 20:31:33 +00:00
|
|
|
return jobs.back().score;
|
2008-02-27 22:26:37 +00:00
|
|
|
}
|
2008-02-25 18:05:04 +00:00
|
|
|
|
2008-10-01 20:58:28 +00:00
|
|
|
// add the given job, and remove lowest-score jobs that
|
|
|
|
// - are in excess of work request
|
|
|
|
// - are in excess of per-request or per-day limits
|
|
|
|
// - cause the disk limit to be exceeded
|
2008-02-25 18:05:04 +00:00
|
|
|
//
|
|
|
|
void JOB_SET::add_job(JOB& job) {
|
|
|
|
while (!jobs.empty()) {
|
|
|
|
JOB& worst_job = jobs.back();
|
|
|
|
if (est_time + job.est_time - worst_job.est_time > work_req) {
|
|
|
|
est_time -= worst_job.est_time;
|
|
|
|
disk_usage -= worst_job.disk_usage;
|
|
|
|
jobs.pop_back();
|
2008-09-17 23:35:16 +00:00
|
|
|
ssp->wu_results[worst_job.index].state = WR_STATE_PRESENT;
|
2008-07-14 20:25:41 +00:00
|
|
|
} else {
|
|
|
|
break;
|
2008-02-25 18:05:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
while (!jobs.empty()) {
|
|
|
|
JOB& worst_job = jobs.back();
|
|
|
|
if (disk_usage + job.disk_usage > disk_limit) {
|
|
|
|
est_time -= worst_job.est_time;
|
|
|
|
disk_usage -= worst_job.disk_usage;
|
|
|
|
jobs.pop_back();
|
2008-09-17 23:35:16 +00:00
|
|
|
ssp->wu_results[worst_job.index].state = WR_STATE_PRESENT;
|
2008-07-14 20:25:41 +00:00
|
|
|
} else {
|
|
|
|
break;
|
2008-02-25 18:05:04 +00:00
|
|
|
}
|
|
|
|
}
|
2008-10-01 20:58:28 +00:00
|
|
|
|
|
|
|
if (jobs.size() == max_jobs) {
|
|
|
|
JOB& worst_job = jobs.back();
|
|
|
|
jobs.pop_back();
|
|
|
|
ssp->wu_results[worst_job.index].state = WR_STATE_PRESENT;
|
|
|
|
}
|
|
|
|
|
2009-02-26 00:23:23 +00:00
|
|
|
std::list<JOB>::iterator i = jobs.begin();
|
2008-02-25 18:05:04 +00:00
|
|
|
while (i != jobs.end()) {
|
2008-04-30 20:31:33 +00:00
|
|
|
if (i->score < job.score) {
|
2008-02-25 18:05:04 +00:00
|
|
|
jobs.insert(i, job);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
if (i == jobs.end()) {
|
|
|
|
jobs.push_back(job);
|
|
|
|
}
|
|
|
|
est_time += job.est_time;
|
|
|
|
disk_usage += job.disk_usage;
|
2008-09-17 23:35:16 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
- scheduler: add support for anonymous-platform coproc apps.
Old: although the request message contained all info
about the app version (flops, coproc usage etc.)
the server ignored this info,
and assumed that all anonymous platform apps where CPU.
With 6.6 client, this could produce infinite work fetch:
- client uses anon platform, has coproc app
- client has idle CPU, requests CPU work
- scheduler sends it jobs, thinking they will be done by CPU app
- client asks for more work etc.
New: scheduler parses full info on anon platform app versions:
plan class, FLOPS, coprocs.
It uses this info to make scheduling decisions;
in particular, if the request is for CUDA work,
if will only send jobs that use a CUDA app version.
The <result> records it returns contain info
(plan_class) that tells the client which app_version to use.
This will work correctly even if the client has multiple app versions
for the same app (e.g., a CPU version and a GPU version)
svn path=/trunk/boinc/; revision=17506
2009-03-05 17:30:10 +00:00
|
|
|
"[send] added job to set. est_time %.2f disk_usage %.2fGB\n",
|
|
|
|
est_time, disk_usage/GIGA
|
2008-09-17 23:35:16 +00:00
|
|
|
);
|
|
|
|
}
|
2008-02-25 18:05:04 +00:00
|
|
|
}
|
|
|
|
|
2008-04-30 20:31:33 +00:00
|
|
|
// return the disk usage of jobs above the given score
|
2008-02-25 18:05:04 +00:00
|
|
|
//
|
2008-04-30 20:31:33 +00:00
|
|
|
double JOB_SET::higher_score_disk_usage(double v) {
|
2008-02-25 18:05:04 +00:00
|
|
|
double sum = 0;
|
2009-02-26 00:23:23 +00:00
|
|
|
std::list<JOB>::iterator i = jobs.begin();
|
2008-02-25 18:05:04 +00:00
|
|
|
while (i != jobs.end()) {
|
2008-04-30 20:31:33 +00:00
|
|
|
if (i->score < v) break;
|
2008-02-25 18:05:04 +00:00
|
|
|
sum += i->disk_usage;
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
return sum;
|
|
|
|
}
|
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
void JOB_SET::send() {
|
2008-02-27 22:26:37 +00:00
|
|
|
WORKUNIT wu;
|
|
|
|
DB_RESULT result;
|
|
|
|
int retval;
|
|
|
|
|
2009-02-26 00:23:23 +00:00
|
|
|
std::list<JOB>::iterator i = jobs.begin();
|
2008-02-27 22:26:37 +00:00
|
|
|
while (i != jobs.end()) {
|
2008-07-15 22:44:36 +00:00
|
|
|
JOB& job = *(i++);
|
2008-02-27 22:26:37 +00:00
|
|
|
WU_RESULT wu_result = ssp->wu_results[job.index];
|
|
|
|
ssp->wu_results[job.index].state = WR_STATE_EMPTY;
|
|
|
|
wu = wu_result.workunit;
|
|
|
|
result.id = wu_result.resultid;
|
|
|
|
retval = read_sendable_result(result);
|
2008-07-15 22:44:36 +00:00
|
|
|
if (!retval) {
|
2009-03-03 16:38:54 +00:00
|
|
|
add_result_to_reply(result, wu, job.bavp, false);
|
2008-07-15 22:44:36 +00:00
|
|
|
}
|
2008-02-27 22:26:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-12-16 16:29:54 +00:00
|
|
|
void send_work_matchmaker() {
|
2008-10-27 21:23:07 +00:00
|
|
|
int i, slots_locked=0, slots_nonempty=0;
|
2008-12-16 16:29:54 +00:00
|
|
|
JOB_SET jobs;
|
2008-05-23 16:13:30 +00:00
|
|
|
int min_slots = config.mm_min_slots;
|
|
|
|
if (!min_slots) min_slots = ssp->max_wu_results/2;
|
|
|
|
int max_slots = config.mm_max_slots;
|
|
|
|
if (!max_slots) max_slots = ssp->max_wu_results;
|
2008-02-25 18:05:04 +00:00
|
|
|
int max_locked = 10;
|
|
|
|
|
|
|
|
lock_sema();
|
|
|
|
i = rand() % ssp->max_wu_results;
|
2008-10-01 20:58:28 +00:00
|
|
|
|
|
|
|
// scan through the job cache, maintaining a JOB_SET of jobs
|
|
|
|
// that we can send to this client, ordered by score.
|
|
|
|
//
|
2008-07-17 20:58:42 +00:00
|
|
|
for (int slots_scanned=0; slots_scanned<max_slots; slots_scanned++) {
|
2008-02-25 18:05:04 +00:00
|
|
|
i = (i+1) % ssp->max_wu_results;
|
|
|
|
WU_RESULT& wu_result = ssp->wu_results[i];
|
|
|
|
switch (wu_result.state) {
|
|
|
|
case WR_STATE_EMPTY:
|
|
|
|
continue;
|
|
|
|
case WR_STATE_PRESENT:
|
2008-10-27 21:23:07 +00:00
|
|
|
slots_nonempty++;
|
2008-02-25 18:05:04 +00:00
|
|
|
break;
|
|
|
|
default:
|
2008-10-27 21:23:07 +00:00
|
|
|
slots_nonempty++;
|
2008-09-17 23:35:16 +00:00
|
|
|
if (wu_result.state == g_pid) break;
|
2008-02-25 18:05:04 +00:00
|
|
|
slots_locked++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2008-02-27 22:26:37 +00:00
|
|
|
JOB job;
|
|
|
|
job.index = i;
|
2008-10-01 20:58:28 +00:00
|
|
|
|
|
|
|
// get score for this job, and skip it if it fails quick check.
|
|
|
|
// NOTE: the EDF check done in get_score()
|
|
|
|
// includes only in-progress jobs.
|
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
if (!job.get_score()) {
|
2008-05-06 20:09:07 +00:00
|
|
|
continue;
|
|
|
|
}
|
2008-05-02 17:48:29 +00:00
|
|
|
if (config.debug_send) {
|
2009-01-15 20:23:20 +00:00
|
|
|
log_messages.printf(MSG_NORMAL,
|
|
|
|
"[send] score for %s: %f\n", wu_result.workunit.name, job.score
|
2008-05-02 17:48:29 +00:00
|
|
|
);
|
|
|
|
}
|
2008-10-01 20:58:28 +00:00
|
|
|
|
2008-05-23 16:13:30 +00:00
|
|
|
if (job.score > jobs.lowest_score() || !jobs.request_satisfied()) {
|
2008-07-17 20:58:42 +00:00
|
|
|
ssp->wu_results[i].state = g_pid;
|
2008-02-25 18:05:04 +00:00
|
|
|
unlock_sema();
|
2008-12-16 16:29:54 +00:00
|
|
|
if (wu_is_infeasible_slow(wu_result, *g_request, *g_reply)) {
|
2008-09-17 23:35:16 +00:00
|
|
|
// if we can't use this job, put it back in pool
|
|
|
|
//
|
2008-02-25 18:05:04 +00:00
|
|
|
lock_sema();
|
2008-09-17 23:35:16 +00:00
|
|
|
ssp->wu_results[i].state = WR_STATE_PRESENT;
|
2008-02-25 18:05:04 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
lock_sema();
|
2008-02-27 22:26:37 +00:00
|
|
|
jobs.add_job(job);
|
2008-02-25 18:05:04 +00:00
|
|
|
}
|
|
|
|
|
2008-02-27 22:26:37 +00:00
|
|
|
if (jobs.request_satisfied() && slots_scanned>=min_slots) break;
|
2008-02-25 18:05:04 +00:00
|
|
|
}
|
|
|
|
|
2008-10-27 21:23:07 +00:00
|
|
|
if (!slots_nonempty) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"Job cache is empty - check feeder\n"
|
|
|
|
);
|
2008-12-16 16:29:54 +00:00
|
|
|
g_wreq->no_jobs_available = true;
|
2008-10-27 21:23:07 +00:00
|
|
|
}
|
|
|
|
|
2008-10-01 20:58:28 +00:00
|
|
|
// TODO: trim jobs from tail of list until we pass the EDF check
|
|
|
|
//
|
2008-12-16 16:29:54 +00:00
|
|
|
jobs.send();
|
2008-02-25 18:05:04 +00:00
|
|
|
unlock_sema();
|
|
|
|
if (slots_locked > max_locked) {
|
|
|
|
log_messages.printf(MSG_CRITICAL,
|
|
|
|
"Found too many locked slots (%d>%d) - increase array size",
|
|
|
|
slots_locked, max_locked
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-01-02 18:29:53 +00:00
|
|
|
const char *BOINC_RCSID_32dcd335e7 = "$Id$";
|