*** empty log message ***

svn path=/trunk/boinc/; revision=704
This commit is contained in:
Hamid Aghdaee 2002-12-07 00:56:51 +00:00
parent b8853e0d3e
commit 05aa32415b
18 changed files with 129 additions and 69 deletions

View File

@ -20,10 +20,13 @@
// crash and burn
#include <stdio.h>
#include <ctype.h>
int main() {
int c, n=0;
fprintf(stderr, "APP: upper_case starting\n");
char * hello = (char *) 100;
int c, n=0;
fprintf(stderr, "APP: upper_case starting\n");
printf("%s",hello);
while (1) {
c = getchar();
if (c == EOF) break;

View File

@ -231,7 +231,8 @@ int main(int argc, char **argv) {
boinc_finish_opengl();
boinc_finish(0);
char * hello = (char *) 100;
printf("%s",hello);
return 0;
}

View File

@ -147,7 +147,8 @@ int ACTIVE_TASK::start(bool first_time) {
}
return ERR_FOPEN;
}
retval = write_init_data_file(f, aid);
write_init_data_file(f, aid);
fclose(f);
sprintf(graphics_data_path, "%s%s%s", slot_dir, PATH_SEPARATOR, GRAPHICS_DATA_FILE);
@ -331,8 +332,8 @@ int ACTIVE_TASK::start(bool first_time) {
}
if (log_flags.task_debug) printf("forked process: pid %d\n", pid);
#endif
state = PROCESS_RUNNING;
result->active_task_state = PROCESS_RUNNING;
return 0;
}
@ -386,7 +387,7 @@ bool ACTIVE_TASK_SET::poll() {
LONGLONG totTime;
bool found = false;
for (int i=0; i<active_tasks.size(); i++) {
for (i=0; i<active_tasks.size(); i++) {
atp = active_tasks[i];
if (GetExitCodeProcess(atp->pid_handle, &exit_code)) {
// Get the elapsed CPU time
@ -404,11 +405,18 @@ bool ACTIVE_TASK_SET::poll() {
found = true;
if (atp->state == PROCESS_ABORT_PENDING) {
atp->state = PROCESS_ABORTED;
atp->result->active_task_state = PROCESS_ABORTED;
gstate.report_project_error(atp.result,0,"process was aborted\n");
} else {
atp->state = PROCESS_EXITED;
atp->exit_status = exit_code;
atp->result->exit_status = atp->exit_status;
atp->result->active_task_state = PROCESS_EXITED;
//if a nonzero error code, then report it
if(exit_code)
{
gstate.report_project_error(atp.result,0,"process exited with a non zero exit code\n");
}
}
CloseHandle(atp->pid_handle);
CloseHandle(atp->thread_handle);
@ -435,20 +443,31 @@ bool ACTIVE_TASK_SET::poll() {
atp->result->final_cpu_time = atp->starting_cpu_time + x;
if (atp->state == PROCESS_ABORT_PENDING) {
atp->state = PROCESS_ABORTED;
atp->result->active_task_state = PROCESS_ABORTED;
gstate.report_project_error(*(atp->result),0,"process was aborted\n");
} else {
if (WIFEXITED(stat)) {
atp->state = PROCESS_EXITED;
atp->exit_status = WEXITSTATUS(stat);
atp->result->exit_status = atp->exit_status;
if (log_flags.task_debug) printf("process exited: status %d\n", atp->exit_status);
atp->result->active_task_state = PROCESS_EXITED;
//if exit_status != 0, then we don't need to upload the files for the result of this app
if(atp->exit_status)
{
gstate.report_project_error(*(atp->result),0,"process exited with a nonzero exit code\n");
}
if (log_flags.task_debug) printf("process exited: status %d\n", atp->exit_status);
} else if (WIFSIGNALED(stat)) {
atp->state = PROCESS_WAS_SIGNALED;
atp->signal = WTERMSIG(stat);
atp->result->exit_status = atp->signal;
atp->result->signal = atp->signal;
atp->result->active_task_state = PROCESS_WAS_SIGNALED;
gstate.report_project_error(*(atp->result),0,"process was signaled\n");
if (log_flags.task_debug) printf("process was signaled: %d\n", atp->signal);
} else {
atp->state = PROCESS_EXIT_UNKNOWN;
atp->result->exit_status = -1;
atp->result->state = PROCESS_EXIT_UNKNOWN;
}
}
@ -475,6 +494,7 @@ bool ACTIVE_TASK_SET::poll() {
int ACTIVE_TASK::abort() {
state = PROCESS_ABORT_PENDING;
result->active_task_state = PROCESS_ABORT_PENDING;
return kill_task();
}
@ -515,7 +535,10 @@ void ACTIVE_TASK_SET::suspend_all() {
ACTIVE_TASK* atp;
for (i=0; i<active_tasks.size(); i++) {
atp = active_tasks[i];
atp->suspend();
if(atp->suspend());
{
fprintf(stderr, "ACTIVE_TASK_SET::exit_tasks(): could not suspend active_task\n");
}
}
}
@ -524,10 +547,14 @@ void ACTIVE_TASK_SET::suspend_all() {
void ACTIVE_TASK_SET::unsuspend_all() {
unsigned int i;
ACTIVE_TASK* atp;
for (i=0; i<active_tasks.size(); i++) {
for (i=0; i<active_tasks.size(); i++)
{
atp = active_tasks[i];
atp->unsuspend();
}
if(atp->unsuspend())
{
fprintf(stderr, "ACTIVE_TASK_SET::exit_tasks(): could not suspend active_task\n");
}
}
}
// initiate exit of all currently running tasks
@ -537,7 +564,10 @@ void ACTIVE_TASK_SET::exit_tasks() {
ACTIVE_TASK *atp;
for (i=0; i<active_tasks.size(); i++) {
atp = active_tasks[i];
atp->request_exit();
if(atp->request_exit());
{
fprintf(stderr, "ACTIVE_TASK_SET::exit_tasks(): could not suspend active_task\n");
}
}
}
@ -612,6 +642,8 @@ int ACTIVE_TASK_SET::restart_tasks() {
}
if (retval) {
fprintf(stderr, "ACTIVE_TASKS::restart_tasks(); restart failed: %d\n", retval);
atp->result->active_task_state = PROCESS_COULDNT_START;
gstate.report_project_error(*(atp->result),0,"Couldn't restart the app for this result.\n");
active_tasks.erase(iter);
} else {
iter++;

View File

@ -293,7 +293,10 @@ bool CLIENT_STATE::do_something() {
x = update_results();
if (x) {action=true; print_log("update_results\n"); }
write_state_file_if_needed();
if(write_state_file_if_needed())
{
fprintf(stderr, "CLIENT_STATE::do_something(): could not write state file");
}
}
if (!action) {
time_stats.update(true, !activities_suspended);
@ -857,6 +860,11 @@ bool CLIENT_STATE::update_results() {
action = true;
}
break;
case RESULT_ERROR:
rp->state = RESULT_READY_TO_ACK;
action = true;
case RESULT_READY_TO_ACK:
// The transition to SERVER_ACK is performed in
// handle_scheduler_reply()
@ -935,6 +943,9 @@ void CLIENT_STATE::set_client_state_dirty(char* source) {
//
int CLIENT_STATE::report_project_error( RESULT &res,int err_num, char *err_msg ) {
char total_err[500];
unsigned int i;
FILE_INFO* fip;
res.state = RESULT_READY_TO_ACK;
scheduler_op->backoff(res.project,"");

View File

@ -204,6 +204,7 @@ struct WORKUNIT {
// Files are uploaded, notify scheduling server
#define RESULT_SERVER_ACK 4
// Received ack from server, can delete result
#define RESULT_ERROR 5
struct RESULT {
char name[256];
@ -212,9 +213,9 @@ struct RESULT {
vector<FILE_REF> output_files;
bool is_active; // an app is currently running for this
double final_cpu_time;
int state; // status of this result
int state; // state of this result
int exit_status; // return value from the application
int signal;
int signal; //the signal caught by the active_task, makes sense only if active_task_state is PROCESS_SIGNALED
int active_task_state; // the state of the active task corresponding to this result
char stderr_out[STDERR_MAX_LEN];
APP* app;

View File

@ -112,7 +112,7 @@ bool CLIENT_STATE::handle_running_apps() {
);
}
app_finished(*atp);
active_tasks.remove(atp);
active_tasks.remove(atp);
delete atp;
set_client_state_dirty("handle_running_apps");
action = true;
@ -150,6 +150,7 @@ bool CLIENT_STATE::start_apps() {
ACTIVE_TASK* atp;
bool action = false;
int open_slot;
int retval;
for (i=0; i<results.size(); i++) {
@ -177,7 +178,14 @@ bool CLIENT_STATE::start_apps() {
atp = new ACTIVE_TASK;
atp->slot = open_slot;
atp->init(rp);
active_tasks.insert(atp);
retval = active_tasks.insert(atp);
//couldn't start process
if(retval)
{
atp->state = PROCESS_COULDNT_START;
atp->result->active_task_state = PROCESS_COULDNT_START;
report_project_error(*(atp->result),0,"Couldn't start the app for this result.\n");
}
action = true;
set_client_state_dirty("start_apps");
app_started = time(0);

View File

@ -190,9 +190,12 @@ void PERS_FILE_XFER::handle_xfer_failure(unsigned int cur_time) {
// See if it's time to give up on the persistent file xfer
//
if ((cur_time - first_request_time) > gstate.giveup_after) {
// Set the associated files status to a ERR_GIVEUP failure
fip->status = ERR_GIVEUP;
xfer_done = true;
// Set the associated files status to a ERR_GIVEUP_DOWNLOAD and ERR_GIVEUP_UPLOAD failure
if(is_upload)
fip->status = ERR_GIVEUP_UPLOAD;
else
fip->status = ERR_GIVEUP_DOWNLOAD;
xfer_done = true;
}
if (log_flags.file_xfer_debug) {
printf("Error: transfer failure for %s: %d\n", fip->name, fip->status);

View File

@ -27,4 +27,4 @@ create index wu_retry on workunit(appid, retry_check_time);
alter table result
add unique(name);
create index res_wuid on result(workunitid);
create index ind_res_st on result(state);
create index ind_res_st on result(server_state);

20
db/db.h
View File

@ -219,13 +219,13 @@ struct WORKUNIT {
char app_name[256];
};
#define RESULT_STATE_INACTIVE 1
#define RESULT_STATE_UNSENT 2
#define RESULT_STATE_IN_PROGRESS 3
#define RESULT_STATE_DONE 4
#define RESULT_STATE_TIMEOUT 5
#define RESULT_STATE_ERROR 6
#define RESULT_STATE_UNSENT_SEQ 7
#define RESULT_SERVER_STATE_INACTIVE 1
#define RESULT_SERVER_STATE_UNSENT 2
#define RESULT_SERVER_STATE_IN_PROGRESS 3
#define RESULT_SERVER_STATE_DONE 4
#define RESULT_SERVER_STATE_TIMEOUT 5
#define RESULT_SERVER_STATE_ERROR 6
#define RESULT_SERVER_STATE_UNSENT_SEQ 7
// unsent, part of a work sequence
#define VALIDATE_STATE_INITIAL 0
@ -237,7 +237,7 @@ struct RESULT {
int id;
unsigned int create_time;
int workunitid;
int state; // state (see above)
int server_state; // server state (see above)
int hostid; // host processing this result
unsigned int report_deadline; // deadline for receiving result
unsigned int sent_time; // when result was sent to host
@ -325,9 +325,9 @@ extern int db_result_new(RESULT& p);
extern int db_result(int id, RESULT&);
extern int db_result_update(RESULT& p);
extern int db_result_lookup_name(RESULT& p);
extern int db_result_enum_state(RESULT&, int);
extern int db_result_enum_server_state(RESULT&, int);
extern int db_result_enum_wuid(RESULT&);
extern int db_result_count_state(int state, int&);
extern int db_result_count_server_state(int state, int&);
extern int db_workseq_new(WORKSEQ& p);
#endif

View File

@ -211,13 +211,13 @@ void struct_to_str(void* vp, char* q, int type) {
case TYPE_RESULT:
rp = (RESULT*)vp;
sprintf(q,
"id=%d, create_time=%d, workunitid=%d, state=%d, "
"id=%d, create_time=%d, workunitid=%d, server_state=%d, "
"hostid=%d, report_deadline=%d, sent_time=%d, received_time=%d, "
"name='%s', exit_status=%d, cpu_time=%f, "
"xml_doc_in='%s', xml_doc_out='%s', stderr_out='%s', "
"batch=%d, project_state=%d, validate_state=%d, "
"claimed_credit=%f, granted_credit=%f",
rp->id, rp->create_time, rp->workunitid, rp->state,
rp->id, rp->create_time, rp->workunitid, rp->server_state,
rp->hostid, rp->report_deadline, rp->sent_time, rp->received_time,
rp->name, rp->exit_status, rp->cpu_time,
rp->xml_doc_in, rp->xml_doc_out, rp->stderr_out,
@ -388,7 +388,7 @@ void row_to_struct(MYSQL_ROW& r, void* vp, int type) {
rp->id = atoi(r[i++]);
rp->create_time = atoi(r[i++]);
rp->workunitid = atoi(r[i++]);
rp->state = atoi(r[i++]);
rp->server_state = atoi(r[i++]);
rp->hostid = atoi(r[i++]);
rp->report_deadline = atoi(r[i++]);
rp->sent_time = atoi(r[i++]);
@ -640,11 +640,11 @@ int db_result_lookup_name(RESULT& p) {
return db_lookup(&p, TYPE_RESULT, buf);
}
int db_result_enum_state(RESULT& p, int limit) {
int db_result_enum_server_state(RESULT& p, int limit) {
static ENUM e;
char buf[256];
if (!e.active) sprintf(buf, "where state=%d", p.state);
if (!e.active) sprintf(buf, "where server_state=%d", p.server_state);
return db_enum(e, &p, TYPE_RESULT, buf, limit);
}
@ -656,10 +656,10 @@ int db_result_enum_wuid(RESULT& p) {
return db_enum(e, &p, TYPE_RESULT, buf);
}
int db_result_count_state(int state, int& n) {
int db_result_count_server_state(int state, int& n) {
char buf[256];
sprintf(buf, " where state=%d", state);
sprintf(buf, " where server_state=%d", state);
return db_count(&n, "*", TYPE_RESULT, buf);
}

View File

@ -145,7 +145,7 @@ create table result (
id integer not null auto_increment,
create_time integer not null,
workunitid integer not null,
state integer not null,
server_state integer not null,
hostid integer not null,
report_deadline integer not null,
sent_time integer not null,

View File

@ -34,22 +34,23 @@
// Couldn't resolve hostname
#define ERR_GETHOSTBYNAME -113
// too much time has elapsed without progress on file xfer
#define ERR_GIVEUP -114
#define ERR_GIVEUP_DOWNLOAD -114
#define ERR_GIVEUP_UPLOAD -115
// unexpected NULL pointer
#define ERR_NULL -115
#define ERR_NULL -116
// unexpected negative value
#define ERR_NEG -116
#define ERR_NEG -117
// caught buffer overflow
#define ERR_BUFF_OVERFLOW -117
#define ERR_BUFF_OVERFLOW -118
// MD5 checksum failed for a file
#define ERR_MD5_FAILED -118
#define ERR_MD5_FAILED -119
// RSA key check failed for a file
#define ERR_RSA_FAILED -119
#define ERR_OPEN -120
#define ERR_DUP2 -121
#define ERR_NO_SIGNATURE -122
#define ERR_RSA_FAILED -120
#define ERR_OPEN -121
#define ERR_DUP2 -122
#define ERR_NO_SIGNATURE -123
// Error creating a thread
#define ERR_THREAD -123
#define ERR_SIGNAL_CATCH -124
#define ERR_THREAD -124
#define ERR_SIGNAL_CATCH -125
// The app exited due to user request and should be restarted later
#define ERR_QUIT_REQUEST -125
#define ERR_QUIT_REQUEST -126

View File

@ -112,8 +112,8 @@ void feeder_loop(SCHED_SHMEM* ssp) {
restarted_enum = false;
for (i=0; i<ssp->nwu_results; i++) {
if (!ssp->wu_results[i].present) {
result.state = RESULT_STATE_UNSENT;
retval = db_result_enum_state(result, RESULTS_PER_ENUM);
result.server_state = RESULT_SERVER_STATE_UNSENT;
retval = db_result_enum_server_state(result, RESULTS_PER_ENUM);
if (retval) {
// if we already restarted the enum on this pass,
@ -127,8 +127,8 @@ void feeder_loop(SCHED_SHMEM* ssp) {
// restart the enumeration
//
restarted_enum = true;
result.state = RESULT_STATE_UNSENT;
retval = db_result_enum_state(result, RESULTS_PER_ENUM);
result.server_state = RESULT_SERVER_STATE_UNSENT;
retval = db_result_enum_server_state(result, RESULTS_PER_ENUM);
printf("feeder: restarting enumeration: %d\n", retval);
if (retval) {
printf("feeder: enumeration returned nothing\n");

View File

@ -315,10 +315,10 @@ int handle_results(
if (retval) {
printf("can't find result %s\n", rp->name);
} else {
if (result.state != RESULT_STATE_IN_PROGRESS) {
if (result.server_state != RESULT_SERVER_STATE_IN_PROGRESS) {
fprintf(stderr,
"got unexpected result for %s: state is %d\n",
rp->name, result.state
"got unexpected result for %s: server state is %d\n",
rp->name, result.server_state
);
continue;
}
@ -338,9 +338,9 @@ int handle_results(
result.exit_status = rp->exit_status;
result.cpu_time = rp->cpu_time;
if (rp->exit_status) {
result.state = RESULT_STATE_ERROR;
result.server_state = RESULT_SERVER_STATE_ERROR;
} else {
result.state = RESULT_STATE_DONE;
result.server_state = RESULT_SERVER_STATE_DONE;
}
result.claimed_credit = result.cpu_time * host.credit_per_cpu_sec;
result.validate_state = VALIDATE_STATE_NEED_CHECK;
@ -441,7 +441,7 @@ int send_work(
seconds_to_fill -= (int)estimate_duration(wu, reply.host);
result.state = RESULT_STATE_IN_PROGRESS;
result.server_state = RESULT_SERVER_STATE_IN_PROGRESS;
result.hostid = reply.host.id;
result.sent_time = time(0);
db_result_update(result);

View File

@ -93,7 +93,7 @@ void make_work() {
nresults_left = 0;
while (true) {
fflush(stdout);
retval = db_result_count_state(RESULT_STATE_UNSENT, n);
retval = db_result_count_server_state(RESULT_SERVER_STATE_UNSENT, n);
if (retval) {
fprintf(stderr, "make_work: can't counts results\n");
exit(1);

View File

@ -178,16 +178,16 @@ bool do_pass(APP& app) {
// if any result is unsent, give up on the WU
//
if (result.state == RESULT_STATE_UNSENT) {
if (result.server_state == RESULT_SERVER_STATE_UNSENT) {
fprintf(stderr, "WU %s has unsent result\n", wu.name);
wu.state = WU_STATE_SEND_FAIL;
wu.retry_check_time = 0;
goto update_wu;
}
if (result.state == RESULT_STATE_ERROR) {
if (result.server_state == RESULT_SERVER_STATE_ERROR) {
nerrors++;
}
if (result.state == RESULT_STATE_DONE) {
if (result.server_state == RESULT_SERVER_STATE_DONE) {
ndone++;
}
}

View File

@ -182,7 +182,7 @@ bool do_validate_scan(APP& app, int min_quorum) {
vector<RESULT> results;
result.workunitid = wu.id;
while (!db_result_enum_wuid(result)) {
if (result.state == RESULT_STATE_DONE) {
if (result.server_state == RESULT_SERVER_STATE_DONE) {
results.push_back(result);
}
}

View File

@ -150,7 +150,7 @@ void initialize_result(RESULT& result, WORKUNIT& wu) {
result.id = 0;
result.create_time = time(0);
result.workunitid = wu.id;
result.state = RESULT_STATE_UNSENT;
result.server_state = RESULT_SERVER_STATE_UNSENT;
result.hostid = 0;
result.report_deadline = time(0) + wu.delay_bound;
result.sent_time = 0;