2003-07-01 20:37:09 +00:00
|
|
|
// The contents of this file are subject to the BOINC Public License
|
2002-05-30 07:14:21 +00:00
|
|
|
// Version 1.0 (the "License"); you may not use this file except in
|
|
|
|
// compliance with the License. You may obtain a copy of the License at
|
2003-07-01 20:37:09 +00:00
|
|
|
// http://boinc.berkeley.edu/license_1.0.txt
|
2003-07-02 20:57:59 +00:00
|
|
|
//
|
2002-05-30 07:14:21 +00:00
|
|
|
// Software distributed under the License is distributed on an "AS IS"
|
|
|
|
// basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
|
|
|
// License for the specific language governing rights and limitations
|
2003-07-02 20:57:59 +00:00
|
|
|
// under the License.
|
|
|
|
//
|
|
|
|
// The Original Code is the Berkeley Open Infrastructure for Network Computing.
|
|
|
|
//
|
2002-05-30 07:14:21 +00:00
|
|
|
// The Initial Developer of the Original Code is the SETI@home project.
|
2003-07-01 20:37:09 +00:00
|
|
|
// Portions created by the SETI@home project are Copyright (C) 2002
|
2003-07-02 20:57:59 +00:00
|
|
|
// University of California at Berkeley. All Rights Reserved.
|
|
|
|
//
|
2002-05-30 07:14:21 +00:00
|
|
|
// Contributor(s):
|
|
|
|
//
|
|
|
|
|
2002-12-03 18:57:40 +00:00
|
|
|
// feeder [-asynch]
|
2002-05-30 07:14:21 +00:00
|
|
|
//
|
|
|
|
// Creates a shared memory segment containing DB info,
|
|
|
|
// including results/workunits to send.
|
|
|
|
// This means that the scheduler CGI program doesn't have to
|
|
|
|
// access the DB to get this info.
|
2002-06-01 20:26:21 +00:00
|
|
|
//
|
|
|
|
// -asynch fork and run in a separate process
|
2002-05-30 07:14:21 +00:00
|
|
|
|
|
|
|
// TODO:
|
|
|
|
// - check for wu/results that don't get sent for a long time;
|
|
|
|
// generate a warning message
|
|
|
|
|
2002-12-03 18:57:40 +00:00
|
|
|
// Trigger files:
|
|
|
|
// The feeder program periodically checks for two trigger files:
|
2002-05-30 07:14:21 +00:00
|
|
|
//
|
2002-12-03 18:57:40 +00:00
|
|
|
// stop_server: destroy shmem and exit
|
|
|
|
// leave trigger file there (for other daemons)
|
|
|
|
// reread_db: update DB contents in existing shmem
|
|
|
|
// delete trigger file
|
2002-05-30 07:14:21 +00:00
|
|
|
|
2002-11-06 19:42:23 +00:00
|
|
|
// If you get an "Invalid argument" error when trying to run the feeder,
|
|
|
|
// it is likely that you aren't able to allocate enough shared memory.
|
|
|
|
// Either increase the maximum shared memory segment size in the kernel
|
|
|
|
// configuration, or decrease the MAX_PLATFORMS, MAX_APPS
|
|
|
|
// MAX_APP_VERSIONS, and MAX_WU_RESULTS in sched_shmem.h
|
|
|
|
|
2002-05-30 07:14:21 +00:00
|
|
|
#include <stdio.h>
|
2002-07-11 20:41:24 +00:00
|
|
|
#include <stdlib.h>
|
2002-09-26 23:12:13 +00:00
|
|
|
#include <string.h>
|
2003-01-07 08:11:16 +00:00
|
|
|
#include <time.h>
|
2002-05-30 08:33:30 +00:00
|
|
|
#if HAVE_UNISTD_H
|
|
|
|
#include <unistd.h>
|
|
|
|
#endif
|
2002-05-30 07:14:21 +00:00
|
|
|
|
2003-04-07 19:06:00 +00:00
|
|
|
#include "boinc_db.h"
|
2002-05-30 07:14:21 +00:00
|
|
|
#include "shmem.h"
|
2003-02-10 19:51:32 +00:00
|
|
|
#include "util.h"
|
2003-08-15 00:45:25 +00:00
|
|
|
#include "sched_config.h"
|
2002-05-30 07:14:21 +00:00
|
|
|
#include "sched_shmem.h"
|
2003-03-08 00:09:40 +00:00
|
|
|
#include "sched_util.h"
|
2002-05-30 07:14:21 +00:00
|
|
|
|
|
|
|
#define RESULTS_PER_ENUM 100
|
2002-12-03 18:57:40 +00:00
|
|
|
#define REREAD_DB_FILENAME "reread_db"
|
2003-02-10 19:51:32 +00:00
|
|
|
#define LOCKFILE "feeder.out"
|
2003-06-20 01:31:03 +00:00
|
|
|
#define PIDFILE "feeder.pid"
|
2002-05-30 07:14:21 +00:00
|
|
|
|
2002-10-03 18:33:46 +00:00
|
|
|
CONFIG config;
|
|
|
|
|
2003-06-20 01:31:03 +00:00
|
|
|
SCHED_SHMEM* ssp;
|
2002-12-03 18:57:40 +00:00
|
|
|
|
2003-06-20 01:31:03 +00:00
|
|
|
void cleanup_shmem()
|
|
|
|
{
|
|
|
|
detach_shmem((void*)ssp);
|
|
|
|
destroy_shmem(config.shmem_key);
|
|
|
|
}
|
|
|
|
|
|
|
|
int check_reread_trigger() {
|
|
|
|
FILE* f;
|
2002-12-03 18:57:40 +00:00
|
|
|
f = fopen(REREAD_DB_FILENAME, "r");
|
|
|
|
if (f) {
|
|
|
|
fclose(f);
|
2002-05-30 07:14:21 +00:00
|
|
|
ssp->init();
|
|
|
|
ssp->scan_tables();
|
2002-12-03 18:57:40 +00:00
|
|
|
unlink(REREAD_DB_FILENAME);
|
2002-05-30 07:14:21 +00:00
|
|
|
}
|
2002-11-22 20:20:58 +00:00
|
|
|
|
2002-05-30 07:14:21 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2002-06-01 20:26:21 +00:00
|
|
|
// Try keep the wu_results array filled.
|
|
|
|
// This is actually a little tricky.
|
|
|
|
// We use an enumerator.
|
|
|
|
// The inner loop scans the wu_result table,
|
|
|
|
// looking for empty slots and trying to fill them in.
|
|
|
|
// When the enumerator reaches the end, it is restarted;
|
|
|
|
// hopefully there will be some new workunits.
|
|
|
|
// There are two complications:
|
|
|
|
// - An enumeration may return results already in the array.
|
|
|
|
// So, for each result, we scan the entire array to make sure
|
|
|
|
// it's not there already. Can this be streamlined?
|
|
|
|
// - We must avoid excessive re-enumeration,
|
|
|
|
// especially when the number of results is less than the array size.
|
|
|
|
// Crude approach: if a "collision" (as above) occurred on
|
2003-01-07 22:49:42 +00:00
|
|
|
// a pass through the array, wait a long time (5 sec)
|
2002-06-01 20:26:21 +00:00
|
|
|
//
|
2003-06-20 01:31:03 +00:00
|
|
|
void feeder_loop() {
|
2002-05-30 07:14:21 +00:00
|
|
|
int i, j, nadditions, ncollisions, retval;
|
2003-06-04 17:21:26 +00:00
|
|
|
DB_RESULT result;
|
|
|
|
DB_WORKUNIT wu;
|
2002-05-30 07:14:21 +00:00
|
|
|
bool no_wus, collision, restarted_enum;
|
2003-07-01 00:20:22 +00:00
|
|
|
char clause[256];
|
2003-06-04 17:21:26 +00:00
|
|
|
|
|
|
|
sprintf(clause, "where server_state=%d order by random limit %d",
|
|
|
|
RESULT_SERVER_STATE_UNSENT, RESULTS_PER_ENUM
|
|
|
|
);
|
2002-12-03 18:57:40 +00:00
|
|
|
|
2002-05-30 07:14:21 +00:00
|
|
|
while (1) {
|
|
|
|
nadditions = 0;
|
|
|
|
ncollisions = 0;
|
|
|
|
no_wus = false;
|
|
|
|
restarted_enum = false;
|
|
|
|
for (i=0; i<ssp->nwu_results; i++) {
|
|
|
|
if (!ssp->wu_results[i].present) {
|
2003-01-07 22:49:42 +00:00
|
|
|
try_again:
|
2003-06-04 17:21:26 +00:00
|
|
|
retval = result.enumerate(clause);
|
2002-05-30 07:14:21 +00:00
|
|
|
if (retval) {
|
|
|
|
|
|
|
|
// if we already restarted the enum on this pass,
|
|
|
|
// there's no point in doing it again.
|
|
|
|
//
|
|
|
|
if (restarted_enum) {
|
2003-07-02 02:02:18 +00:00
|
|
|
log_messages.printf(SchedMessages::DEBUG, "already restarted enum on this pass\n");
|
2002-05-30 07:14:21 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// restart the enumeration
|
|
|
|
//
|
|
|
|
restarted_enum = true;
|
2003-06-04 17:21:26 +00:00
|
|
|
retval = result.enumerate(clause);
|
2003-07-02 02:02:18 +00:00
|
|
|
log_messages.printf(SchedMessages::DEBUG, "restarting enumeration\n");
|
2002-05-30 07:14:21 +00:00
|
|
|
if (retval) {
|
2003-07-02 20:57:59 +00:00
|
|
|
log_messages.printf(SchedMessages::DEBUG, "enumeration restart returned nothing\n");
|
2002-05-30 07:14:21 +00:00
|
|
|
no_wus = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2003-05-21 00:13:55 +00:00
|
|
|
|
|
|
|
// there's a chance this result was sent out
|
|
|
|
// after the enumeration started.
|
|
|
|
// So read it from the DB again
|
|
|
|
//
|
2003-06-04 17:21:26 +00:00
|
|
|
retval = result.lookup_id(result.id);
|
2003-05-21 00:13:55 +00:00
|
|
|
if (retval) {
|
2003-07-02 02:02:18 +00:00
|
|
|
log_messages.printf(SchedMessages::NORMAL, "can't reread result %s\n", result.name);
|
2003-05-21 00:13:55 +00:00
|
|
|
goto try_again;
|
|
|
|
}
|
2003-01-07 22:49:42 +00:00
|
|
|
if (result.server_state != RESULT_SERVER_STATE_UNSENT) {
|
2003-07-13 01:10:24 +00:00
|
|
|
log_messages.printf(
|
|
|
|
SchedMessages::NORMAL,
|
|
|
|
"[%s] RESULT STATE CHANGED\n",
|
|
|
|
result.name
|
|
|
|
);
|
2003-01-07 22:49:42 +00:00
|
|
|
goto try_again;
|
|
|
|
}
|
2002-05-30 07:14:21 +00:00
|
|
|
collision = false;
|
|
|
|
for (j=0; j<ssp->nwu_results; j++) {
|
|
|
|
if (ssp->wu_results[j].present
|
|
|
|
&& ssp->wu_results[j].result.id == result.id
|
|
|
|
) {
|
|
|
|
ncollisions++;
|
|
|
|
collision = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!collision) {
|
2003-07-13 01:10:24 +00:00
|
|
|
log_messages.printf(
|
|
|
|
SchedMessages::NORMAL,
|
|
|
|
"[%s] adding result in slot %d\n",
|
|
|
|
result.name, i
|
|
|
|
);
|
2003-06-04 17:21:26 +00:00
|
|
|
retval = wu.lookup_id(result.workunitid);
|
2002-10-09 04:56:41 +00:00
|
|
|
if (retval) {
|
2003-07-13 01:10:24 +00:00
|
|
|
log_messages.printf(
|
|
|
|
SchedMessages::CRITICAL,
|
|
|
|
"[%s] can't read workunit #%d: %d\n",
|
|
|
|
result.name, result.workunitid, retval
|
|
|
|
);
|
2002-10-09 04:56:41 +00:00
|
|
|
continue;
|
|
|
|
}
|
2002-05-30 07:14:21 +00:00
|
|
|
ssp->wu_results[i].result = result;
|
|
|
|
ssp->wu_results[i].workunit = wu;
|
|
|
|
ssp->wu_results[i].present = true;
|
|
|
|
nadditions++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2003-03-16 21:59:11 +00:00
|
|
|
ssp->ready = true;
|
2002-05-30 07:14:21 +00:00
|
|
|
if (nadditions == 0) {
|
2003-07-02 20:57:59 +00:00
|
|
|
log_messages.printf(SchedMessages::DEBUG, "No results added; sleeping 1 sec\n");
|
2002-05-30 07:14:21 +00:00
|
|
|
sleep(1);
|
|
|
|
} else {
|
2003-07-02 20:57:59 +00:00
|
|
|
log_messages.printf(SchedMessages::DEBUG, "Added %d results to array\n", nadditions);
|
2002-05-30 07:14:21 +00:00
|
|
|
}
|
|
|
|
if (no_wus) {
|
2003-07-02 20:57:59 +00:00
|
|
|
log_messages.printf(SchedMessages::DEBUG, "No results available; sleeping 5 sec\n");
|
2002-06-01 20:26:21 +00:00
|
|
|
sleep(5);
|
2002-05-30 07:14:21 +00:00
|
|
|
}
|
|
|
|
if (ncollisions) {
|
2003-07-02 20:57:59 +00:00
|
|
|
log_messages.printf(SchedMessages::DEBUG, "Some results already in array - sleeping 5 sec\n");
|
2002-06-01 20:26:21 +00:00
|
|
|
sleep(5);
|
2002-05-30 07:14:21 +00:00
|
|
|
}
|
2002-10-09 04:56:41 +00:00
|
|
|
fflush(stdout);
|
2003-06-20 01:31:03 +00:00
|
|
|
check_stop_trigger();
|
|
|
|
check_reread_trigger();
|
2002-06-01 20:26:21 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
int i, retval;
|
|
|
|
bool asynch = false;
|
|
|
|
void* p;
|
|
|
|
|
2002-12-03 18:57:40 +00:00
|
|
|
unlink(REREAD_DB_FILENAME);
|
2002-10-09 04:56:41 +00:00
|
|
|
|
2002-10-03 18:33:46 +00:00
|
|
|
retval = config.parse_file();
|
|
|
|
if (retval) {
|
2003-07-02 02:02:18 +00:00
|
|
|
log_messages.printf(SchedMessages::CRITICAL, "can't parse config file\n");
|
2002-10-03 18:33:46 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2002-06-01 20:26:21 +00:00
|
|
|
for (i=1; i<argc; i++) {
|
|
|
|
if (!strcmp(argv[i], "-asynch")) {
|
|
|
|
asynch = true;
|
2003-06-11 23:36:40 +00:00
|
|
|
} else if (!strcmp(argv[i], "-d")) {
|
2003-07-02 02:02:18 +00:00
|
|
|
log_messages.set_debug_level(atoi(argv[++i]));
|
2002-06-01 20:26:21 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-11-08 08:54:16 +00:00
|
|
|
if (asynch) {
|
|
|
|
if (fork()!=0) {
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2003-07-08 21:30:47 +00:00
|
|
|
// // Call lock_file after fork(), because file locks are not always inherited
|
|
|
|
// if (lock_file(LOCKFILE)) {
|
|
|
|
// log_messages.printf(SchedMessages::NORMAL, "Another copy of feeder is already running\n");
|
|
|
|
// exit(1);
|
|
|
|
// }
|
|
|
|
// write_pid_file(PIDFILE);
|
2003-07-02 02:02:18 +00:00
|
|
|
log_messages.printf(SchedMessages::NORMAL, "Starting\n");
|
2003-02-27 19:29:48 +00:00
|
|
|
|
2002-10-03 18:33:46 +00:00
|
|
|
retval = destroy_shmem(config.shmem_key);
|
2002-06-01 20:26:21 +00:00
|
|
|
if (retval) {
|
2003-07-02 02:02:18 +00:00
|
|
|
log_messages.printf(SchedMessages::CRITICAL, "can't destroy shmem\n");
|
2002-06-01 20:26:21 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
2002-10-03 18:33:46 +00:00
|
|
|
retval = create_shmem(config.shmem_key, sizeof(SCHED_SHMEM), &p);
|
2002-06-01 20:26:21 +00:00
|
|
|
if (retval) {
|
2003-07-02 02:02:18 +00:00
|
|
|
log_messages.printf(SchedMessages::CRITICAL, "can't create shmem\n");
|
2002-06-01 20:26:21 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
ssp = (SCHED_SHMEM*)p;
|
|
|
|
ssp->init();
|
2003-06-20 01:31:03 +00:00
|
|
|
|
|
|
|
atexit(cleanup_shmem);
|
|
|
|
install_sigint_handler();
|
|
|
|
|
2003-01-09 07:24:27 +00:00
|
|
|
retval = boinc_db_open(config.db_name, config.db_passwd);
|
2002-06-01 20:26:21 +00:00
|
|
|
if (retval) {
|
2003-07-02 02:02:18 +00:00
|
|
|
log_messages.printf(SchedMessages::CRITICAL, "boinc_db_open: %d\n", retval);
|
2002-06-01 20:26:21 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
ssp->scan_tables();
|
|
|
|
|
2003-07-13 01:10:24 +00:00
|
|
|
log_messages.printf(
|
|
|
|
SchedMessages::NORMAL,
|
|
|
|
"feeder: read "
|
|
|
|
"%d platforms, "
|
|
|
|
"%d apps, "
|
|
|
|
"%d app_versions\n",
|
|
|
|
ssp->nplatforms,
|
|
|
|
ssp->napps,
|
|
|
|
ssp->napp_versions
|
|
|
|
);
|
2002-06-01 20:26:21 +00:00
|
|
|
|
2003-06-20 01:31:03 +00:00
|
|
|
feeder_loop();
|
2002-05-30 07:14:21 +00:00
|
|
|
}
|