- client: if a GPU job is blocked on available mem,

don't fetch more jobs for that resource type

svn path=/trunk/boinc/; revision=20817
This commit is contained in:
David Anderson 2010-03-10 06:00:37 +00:00
parent 0ad0886df3
commit 4f77556c74
8 changed files with 57 additions and 20 deletions

View File

@ -1771,3 +1771,16 @@ David 9 Mar 2010
credit_test.cpp
lib/
average.cpp,h
David 9 Mar 2010
- client: if a GPU job is blocked on available mem,
don't fetch more jobs for that resource type
db/
boinc_db.h
sched/
credit_test.cpp
client/
client_types.cpp,h
work_fetch.cpp
cpu_sched.cpp

View File

@ -71,6 +71,8 @@ void PROJECT::init() {
no_cpu_apps = false;
no_cuda_apps = false;
no_ati_apps = false;
cuda_low_mem = false;
ati_low_mem = false;
strcpy(host_venue, "");
using_venue_specific_prefs = false;
scheduler_urls.clear();

View File

@ -209,6 +209,9 @@ struct PROJECT {
bool no_cpu_apps;
bool no_cuda_apps;
bool no_ati_apps;
// the following set dynamically
bool cuda_low_mem;
bool ati_low_mem;
/// logically, this belongs in the client state file
/// rather than the account file.

View File

@ -1258,6 +1258,11 @@ bool CLIENT_STATE::enforce_schedule() {
//
assign_coprocs(runnable_jobs);
for (i=0; i<projects.size(); i++) {
projects[i]->cuda_low_mem = false;
projects[i]->ati_low_mem = false;
}
// prune jobs that don't fit in RAM or that exceed CPU usage limits.
// Mark the rest as SCHEDULED
//
@ -1274,6 +1279,11 @@ bool CLIENT_STATE::enforce_schedule() {
//
if (!atp || !atp->process_exists()) {
if (rp->insufficient_video_ram()) {
if (rp->uses_cuda()) {
rp->project->cuda_low_mem = true;
} else {
rp->project->ati_low_mem = true;
}
rp->schedule_backoff = now + 300; // try again in 5 minutes
request_schedule_cpus("insufficient GPU RAM");
continue;

View File

@ -128,9 +128,11 @@ bool RSC_PROJECT_WORK_FETCH::compute_may_have_work(PROJECT* p, int rsc_type) {
break;
case RSC_TYPE_CUDA:
if (p->no_cuda_pref) return false;
if (p->cuda_low_mem) return false;
break;
case RSC_TYPE_ATI:
if (p->no_ati_pref) return false;
if (p->ati_low_mem) return false;
break;
}
return (backoff_time < gstate.now);

View File

@ -498,13 +498,18 @@ struct RESULT {
int teamid;
int priority;
char mod_time[16];
// the following 3 fields returned by 6.10+ clients
double elapsed_time; // AKA runtime
double flops_estimate; // as returned by app_plan()
double elapsed_time;
// AKA runtime; returned by 6.10+ clients
double flops_estimate;
// misnomer: actually the peak device FLOPS,
// returned by app_plan()
// An adjusted version of this is sent to clients.
int app_version_id;
// ID of app versions used to compute this
// 0 if not reported (pre-6.10 client)
// -1 if anonymous platform
// ID of app version used to compute this
// 0 if unknown (relic of old scheduler)
// -1 if anonymous platform CPU
// -2 if anonymous platform NVIDIA
// -3 if anonymous platform ATI
// the following used by the scheduler, but not stored in the DB
//

View File

@ -261,21 +261,17 @@ function is_dev($v) {
return (strstr($v["status"], "Development") != null);
}
$url_base = "http://boinc.ssl.berkeley.edu/dl/";
//$url_base = "http://mirror.worldcommunitygrid.org/mirror/";
// note: rand() is inclusive
//switch(rand(0,2)) {
switch(rand(0,4)) {
//switch(0) {
//case 0: $url_base = "http://boinc.berkeley.edu/dl/"; break;
//case 0: $url_base = "http://boincdl3.ssl.berkeley.edu/mirror/"; break;
//case 1: $url_base = "http://einstein.ligo.caltech.edu/download/boinc/dl/"; break;
//case 2: $url_base = "http://einstein.aei.mpg.de/download/boinc/dl/"; break;
case 0: $url_base = "http://boincdl3.ssl.berkeley.edu/mirror/"; break;
case 1: $url_base = "http://einstein.ligo.caltech.edu/download/boinc/dl/"; break;
case 2: $url_base = "http://einstein.aei.mpg.de/download/boinc/dl/"; break;
//case 1: $url_base = "http://morel.mit.edu/download/boinc/dl/"; break;
//case 2: $url_base = "http://einstein.aei.mpg.de/download/boinc/dl/"; break;
//case 3: $url_base = "http://einstein.astro.gla.ac.uk/download/boinc/dl/"; break;
case 3: $url_base = "http://einstein.astro.gla.ac.uk/download/boinc/dl/"; break;
case 4: $url_base = "http://einstein-dl.phys.uwm.edu/download/boinc/dl/"; break;
//case 4: $url_base = "http://albert.gravity.psu.edu/download/boinc/dl/"; break;
//case 6: $url_base = "http://einstein.phys.uwm.edu/download/boinc/dl/"; break;
//}
}
?>

View File

@ -226,7 +226,7 @@ void update_av_scales() {
// and update data structures.
// Return false if the PFC is an average.
//
bool get_pfc(RESULT& r, double& pfc) {
bool get_pfc(RESULT& r, WORKUNIT& wu, double& pfc) {
APP_VERSION* avp = NULL;
DB_HOST host;
int rsc_type;
@ -249,7 +249,7 @@ bool get_pfc(RESULT& r, double& pfc) {
printf(" skipping: anon platform\n");
return false;
} else {
pfc = r.elapsed_time * r.flops_estimate;
pfc = (r.elapsed_time * r.flops_estimate)/wu.rsc_fpops_est;
avp = lookup_av(r.app_version_id);
printf(" sec: %.0f GFLOPS: %.0f PFC: %.0fG raw credit: %.2f\n",
r.elapsed_time, r.flops_estimate/1e9, pfc/1e9, pfc*COBBLESTONE_SCALE
@ -341,9 +341,15 @@ int main(int argc, char** argv) {
printf("%d) result %d WU %d host %d\n",
n, r.id, r.workunitid, r.hostid
);
DB_WORKUNIT wu;
retval = wu.lookup_id(r.workunitid);
if (retval) {
printf(" No WU!\n");
continue;
}
double pfc;
if (!get_pfc(r, pfc)) {
if (!get_pfc(r, wu, pfc)) {
continue;
}
double new_claimed_credit = pfc * COBBLESTONE_SCALE;