boinc/sched/sched_locality.C

457 lines
13 KiB
C

// The contents of this file are subject to the BOINC Public License
// Version 1.0 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://boinc.berkeley.edu/license_1.0.txt
//
// Software distributed under the License is distributed on an "AS IS"
// basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
// License for the specific language governing rights and limitations
// under the License.
//
// The Original Code is the Berkeley Open Infrastructure for Network Computing.
//
// The Initial Developer of the Original Code is the SETI@home project.
// Portions created by the SETI@home project are Copyright (C) 2002
// University of California at Berkeley. All Rights Reserved.
//
// Contributor(s):
//
// Locality scheduling: see doc/sched_locality.php
#include <stdio.h>
#include <unistd.h>
#include <glob.h>
#include "boinc_db.h"
#include "error_numbers.h"
#include "main.h"
#include "server_types.h"
#include "sched_shmem.h"
#include "sched_send.h"
#include "sched_msgs.h"
#include "sched_locality.h"
#define VERBOSE_DEBUG
// get filename from result name
//
static int extract_filename(char* in, char* out) {
strcpy(out, in);
char* p = strstr(out, "__");
if (!p) return -1;
*p = 0;
return 0;
}
// Find the app and app_version for the client's platform.
//
static int get_app_version(
WORKUNIT& wu, APP* &app, APP_VERSION* &avp,
SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply, PLATFORM& platform,
WORK_REQ& wreq, SCHED_SHMEM& ss
) {
bool found;
if (anonymous(platform)) {
app = ss.lookup_app(wu.appid);
found = sreq.has_version(*app);
if (!found) {
return ERR_NO_APP_VERSION;
}
avp = NULL;
} else {
found = find_app_version(wreq, wu, platform, ss, app, avp);
if (!found) {
return ERR_NO_APP_VERSION;
}
// see if the core client is too old.
//
if (!app_core_compatible(wreq, *avp)) {
return ERR_NO_APP_VERSION;
}
}
return 0;
}
// Try to send the client this result
// This can fail because:
// - already sent a result for this WU
// - no app_version available
//
static int possibly_send_result(
DB_RESULT& result,
SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply, PLATFORM& platform,
WORK_REQ& wreq, SCHED_SHMEM& ss
) {
DB_WORKUNIT wu;
DB_RESULT result2;
int retval, count;
char buf[256];
APP* app;
APP_VERSION* avp;
if (config.one_result_per_user_per_wu) {
retval = wu.lookup_id(result.workunitid);
if (retval) return retval;
sprintf(buf, "where userid=%d and workunitid=%d", reply.user.id, wu.id);
retval = result2.count(count, buf);
if (retval) return retval;
if (count > 0) return ERR_WU_USER_RULE;
}
retval = get_app_version(
wu, app, avp,
sreq, reply, platform, wreq, ss
);
if (retval) return retval;
return add_result_to_reply(result, wu, reply, platform, wreq, app, avp);
}
// Ask the WU generator to make more WUs for this file.
// Returns nonzero if can't make more work.
// Returns zero if it *might* have made more work
// (no way to be sure if it suceeded).
//
int make_more_work_for_file(char* filename) {
char fullpath[512];
sprintf(fullpath, "../locality_scheduling/no_work_available/%s", filename);
FILE *fp=fopen(fullpath, "r");
if (fp) {
// since we found this file, it means that no work remains for this WU.
// So give up trying to interact with the WU generator.
fclose(fp);
log_messages.printf(
SCHED_MSG_LOG::DEBUG,
"found %s indicating no work remaining for file %s\n", fullpath, filename
);
return -1;
}
// open and touch a file in the need_work/
// directory as a way of indicating that we need work for this file.
// If this operation fails, don't worry or tarry!
//
sprintf(fullpath, "../locality_scheduling/need_work/%s", filename);
FILE *fp2=fopen(fullpath, "w");
if (!fp2) {
log_messages.printf(
SCHED_MSG_LOG::CRITICAL,
"unable to touch %s\n", fullpath
);
return -1;
}
fclose(fp2);
log_messages.printf(
SCHED_MSG_LOG::DEBUG,
"touching %s: need work for file %s\n", fullpath, filename
);
sleep(config.locality_scheduling_wait_period);
return 0;
}
// Get a randomly-chosen filename in the working set.
//
static int get_working_set_filename(char *filename) {
glob_t globbuf;
int retglob, random_file;
char *last_slash;
const char *pattern = "../locality_scheduling/work_available/*";
retglob=glob(pattern, GLOB_ERR|GLOB_NOSORT|GLOB_NOCHECK, NULL, &globbuf);
if (retglob || !globbuf.gl_pathc) {
// directory did not exist or is not readable
goto error_exit;
}
if (globbuf.gl_pathc==1 && !strcmp(pattern, globbuf.gl_pathv[0])) {
// directory was empty
goto error_exit;
}
// Choose a file at random.
random_file = rand() % globbuf.gl_pathc;
// remove trailing slash from randomly-selected file path
last_slash = rindex(globbuf.gl_pathv[random_file], '/');
if (!last_slash || *last_slash=='\0' || *(++last_slash)=='\0') {
// no trailing slash found, or it's a directory name
goto error_exit;
}
strcpy(filename, last_slash);
globfree(&globbuf);
log_messages.printf(SCHED_MSG_LOG::DEBUG,
"get_working_set_filename(): returning %s\n", filename
);
return 0;
error_exit:
log_messages.printf(SCHED_MSG_LOG::CRITICAL,
"get_working_set_filename(): pattern %s not found\n", pattern
);
globfree(&globbuf);
return 1;
}
void flag_for_possible_removal(char* filename) {
char path[256];
sprintf(path, "../locality_scheduling/working_set_removal/%s", filename);
FILE *f = fopen(path, "w");
if (f) fclose(f);
}
// The client has (or will soon have) the given file.
// Try to send it results that use that file.
// If don't get any the first time,
// trigger the work generator, then try again.
//
static int send_results_for_file(
char* filename,
int& nsent,
SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply, PLATFORM& platform,
WORK_REQ& wreq, SCHED_SHMEM& ss,
bool in_working_set
) {
DB_RESULT result, prev_result;
int retval, i, maxid;
char buf[256], query[1024];
bool work_generator_invoked = false;
// find largest ID of results already sent to this user
// for this file, if any
//
sprintf(buf, "where userid=%d and name like '%s__%%'",
reply.user.id, filename
);
retval = result.max_id(maxid, buf);
if (retval) {
prev_result.id = 0;
} else {
retval = prev_result.lookup_id(maxid);
if (retval) return retval;
}
nsent = 0;
for (i=0; i<100; i++) { // avoid infinite loop
if (!wreq.work_needed(reply)) break;
// Use a transaction so that if we get a result,
// someone else doesn't send it before we do
//
boinc_db.start_transaction();
// find unsent result with next larger ID than previous largest ID
//
if (config.one_result_per_user_per_wu && prev_result.id) {
// if one result per user per WU, insist on different WUID too
//
sprintf(query,
"where name like '%s__%%' and id>%d and workunitid<>%d and server_state=%d order by id limit 1 ",
filename, prev_result.id, prev_result.workunitid, RESULT_SERVER_STATE_UNSENT
);
} else {
sprintf(query,
"where name like '%s__%%' and id>%d and server_state=%d order by id limit 1 ",
filename, prev_result.id, RESULT_SERVER_STATE_UNSENT
);
}
retval = result.lookup(query);
if (retval) {
// if didn't get a result, trigger the work generator if relevant
//
boinc_db.commit_transaction();
if (!work_generator_invoked && config.locality_scheduling_wait_period) {
retval = make_more_work_for_file(filename);
if (retval) break;
work_generator_invoked = true;
} else {
if (in_working_set) {
flag_for_possible_removal(filename);
}
break;
}
} else {
retval = possibly_send_result(
result, sreq, reply, platform, wreq, ss
);
boinc_db.commit_transaction();
// if no app version, give up completely
//
if (retval == ERR_NO_APP_VERSION) return retval;
// if we couldn't send it for other reason, something's wacky;
// print a message, but keep on looking
//
if (retval) {
log_messages.printf(SCHED_MSG_LOG::NORMAL,
"possibly_send_result(): %d\n", retval
);
} else {
nsent++;
}
prev_result = result;
}
}
return 0;
}
// Find a file with work, and send.
// This is guaranteed to send work if ANY is available for this user.
// However, it ignores the working set,
// and should be done only if we fail to send work from the working set.
//
// logic:
// min_filename = ""
// loop
// R = first unsent result where filename>min_filename order by filename
// // order by filename implies order by ID
// send_results_for_file(R.filename)
// // this skips disqualified results
// min_filename = R.filename;
//
static int send_new_file_work_deterministic(
SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply, PLATFORM& platform,
WORK_REQ& wreq, SCHED_SHMEM& ss
) {
DB_RESULT result;
char filename[256], min_filename[256], query[1024];
int retval, nsent;
strcpy(min_filename, "");
while (1) {
sprintf(query,
"where server_state=%d and name>'%s' order by name limit 1",
RESULT_SERVER_STATE_UNSENT, min_filename
);
retval = result.lookup(query);
if (retval) break;
retval = extract_filename(result.name, filename);
if (retval) return retval;
retval = send_results_for_file(
filename, nsent, sreq, reply, platform, wreq, ss, false
);
if (nsent>0) break;
strcpy(min_filename, filename);
}
return 0;
}
static int send_new_file_work_working_set(
SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply, PLATFORM& platform,
WORK_REQ& wreq, SCHED_SHMEM& ss
) {
char filename[256];
int retval, nsent;
retval = get_working_set_filename(filename);
if (retval) return retval;
return send_results_for_file(
filename, nsent, sreq, reply, platform, wreq, ss, true
);
}
// The host doesn't have any files for which work is available.
// Pick new file to send.
//
static int send_new_file_work(
SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply, PLATFORM& platform,
WORK_REQ& wreq, SCHED_SHMEM& ss
) {
send_new_file_work_working_set(sreq, reply, platform, wreq, ss);
if (wreq.work_needed(reply)) {
send_new_file_work_deterministic(sreq, reply, platform, wreq, ss);
}
return 0;
}
static int send_old_work(
SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply, PLATFORM& platform,
WORK_REQ& wreq, SCHED_SHMEM& ss
) {
char buf[1024], filename[256];
int retval, nsent;
DB_RESULT result;
int cutoff = time(0) - config.locality_scheduling_send_timeout;
boinc_db.start_transaction();
sprintf(buf, "where server_state=%d and create_time<%d limit 1",
RESULT_SERVER_STATE_UNSENT, cutoff
);
retval = result.lookup(buf);
if (!retval) {
retval = possibly_send_result(
result, sreq, reply, platform, wreq, ss
);
boinc_db.commit_transaction();
if (!retval) {
extract_filename(result.name, filename);
send_results_for_file(
filename, nsent,
sreq, reply, platform, wreq, ss, false
);
}
} else {
boinc_db.commit_transaction();
}
}
void send_work_locality(
SCHEDULER_REQUEST& sreq, SCHEDULER_REPLY& reply, PLATFORM& platform,
WORK_REQ& wreq, SCHED_SHMEM& ss
) {
unsigned int i;
int retval, nsent, nfiles, j, k;
nfiles = (int) sreq.file_infos.size();
j = rand()%nfiles;
// send old work if there is any
//
if (config.locality_scheduling_send_timeout) {
send_old_work(sreq, reply, platform, wreq, ss);
}
// send work for existing files
//
for (i=0; i<sreq.file_infos.size(); i++) {
k = (i+j)%nfiles;
if (!wreq.work_needed(reply)) break;
FILE_INFO& fi = sreq.file_infos[k];
send_results_for_file(
fi.name, nsent, sreq, reply, platform, wreq, ss, false
);
// if we couldn't send any work for this file, tell client to delete it
//
if (nsent == 0) {
reply.file_deletes.push_back(fi);
log_messages.printf(
SCHED_MSG_LOG::DEBUG,
"[HOST#%d]: delete file %s\n", reply.host.id, fi.name
);
}
}
// send new files if needed
//
if (wreq.work_needed(reply)) {
send_new_file_work(sreq, reply, platform, wreq, ss);
}
}
const char *BOINC_RCSID_238cc1aec4 = "$Id$";