mirror of https://github.com/BOINC/boinc.git
- VBOX: Per tester feedback, at an hourly status report to track
network usage. - VBOX: For errors where an error code is specified in the output, use that value when returning from vbm_open(). Make it easier to track failure rates by problem type in the ops pages. (Windows Only) - VBOX: Try making the process of extracting the hypervisor system log more robust, make a temp copy of it in the slot directory. Avoid exclusive file lock issues since VboxSvc will still be logging to it. - VBOX: Change the session lock text to make it clearer that the use of other VirtualBox management tools can cause a problem. samples/vboxwrapper/ vbox.cpp, .h vboxwrapper.cpp svn path=/trunk/boinc/; revision=24990
This commit is contained in:
parent
21aed83249
commit
d2784ffc4b
|
@ -124,4 +124,23 @@ Rom 3 Jan 2012
|
|||
They didn't wait for the next OpenCL spec change.
|
||||
|
||||
client/
|
||||
coproc_detect.cpp
|
||||
coproc_detect.cpp
|
||||
|
||||
Rom 3 Jan 2012
|
||||
- VBOX: Per tester feedback, at an hourly status report to track
|
||||
network usage.
|
||||
- VBOX: For errors where an error code is specified in the output,
|
||||
use that value when returning from vbm_open(). Make it easier
|
||||
to track failure rates by problem type in the ops pages.
|
||||
(Windows Only)
|
||||
- VBOX: Try making the process of extracting the hypervisor system
|
||||
log more robust, make a temp copy of it in the slot directory.
|
||||
Avoid exclusive file lock issues since VboxSvc will still be
|
||||
logging to it.
|
||||
- VBOX: Change the session lock text to make it clearer that the
|
||||
use of other VirtualBox management tools can cause a problem.
|
||||
|
||||
|
||||
samples/vboxwrapper/
|
||||
vbox.cpp, .h
|
||||
vboxwrapper.cpp
|
||||
|
|
|
@ -63,7 +63,6 @@ VBOX_VM::VBOX_VM() {
|
|||
job_duration = 0.0;
|
||||
suspended = false;
|
||||
network_suspended = false;
|
||||
startup_completed = false;
|
||||
online = false;
|
||||
crashed = false;
|
||||
enable_cern_dataformat = false;
|
||||
|
@ -119,10 +118,7 @@ int VBOX_VM::run() {
|
|||
boinc_sleep(1.0);
|
||||
} while (timeout <= dtime());
|
||||
|
||||
// From here on out we don't need to report the system log when a vboxmanage
|
||||
// command fails.
|
||||
startup_completed = true;
|
||||
|
||||
if (!online) return ERR_EXEC;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -163,17 +159,19 @@ int VBOX_VM::vbm_popen(string& arguments, string& output, const char* item, bool
|
|||
if ((output.find("VBOX_E_INVALID_OBJECT_STATE") != string::npos) &&
|
||||
(output.find("already locked") != string::npos))
|
||||
{
|
||||
if (retry_notes.find("already locked") == string::npos) {
|
||||
retry_notes += "Virtual Machine session already locked.\n";
|
||||
if (retry_notes.find("Another VirtualBox management") == string::npos) {
|
||||
retry_notes += "Another VirtualBox management application has locked the session for\n";
|
||||
retry_notes += "this virtual machine. BOINC cannot properly monitor this virtual machine\n";
|
||||
retry_notes += "and so this job will be aborted.\n\n";
|
||||
}
|
||||
}
|
||||
|
||||
// Timeout?
|
||||
if (retry_count >= 6) break;
|
||||
|
||||
// Retry?
|
||||
if (!retry_failures) break;
|
||||
|
||||
// Timeout?
|
||||
if (retry_count >= 6) break;
|
||||
|
||||
retry_count++;
|
||||
boinc_sleep(5.0);
|
||||
}
|
||||
|
@ -196,12 +194,6 @@ int VBOX_VM::vbm_popen(string& arguments, string& output, const char* item, bool
|
|||
output += "\nNotes:\n\n" + retry_notes;
|
||||
}
|
||||
|
||||
if (!startup_completed) {
|
||||
string system_log;
|
||||
get_system_log(system_log);
|
||||
output += "\nHypervisor System Log:\n\n" + system_log;
|
||||
}
|
||||
|
||||
fprintf(
|
||||
stderr,
|
||||
"%s Error in %s for VM: %d\nArguments:\n%s\nOutput:\n%s\n",
|
||||
|
@ -240,6 +232,9 @@ int VBOX_VM::vbm_popen_raw(string& arguments, string& output) {
|
|||
DWORD dwCount = 0;
|
||||
unsigned long ulExitCode = 0;
|
||||
unsigned long ulExitTimeout = 0;
|
||||
size_t errcode_start;
|
||||
size_t errcode_end;
|
||||
string errcode;
|
||||
|
||||
memset(&si, 0, sizeof(si));
|
||||
memset(&pi, 0, sizeof(pi));
|
||||
|
@ -335,7 +330,17 @@ CLEANUP:
|
|||
if (hWritePipe) CloseHandle(hWritePipe);
|
||||
|
||||
if ((ulExitCode != 0) || (!pi.hProcess)) {
|
||||
retval = ERR_FOPEN;
|
||||
|
||||
// Determine the real error code by parsing the output
|
||||
errcode_start = output.find("(0x");
|
||||
errcode_start += 1;
|
||||
errcode_end = output.find(")", errcode_start);
|
||||
errcode = output.substr(errcode_start, errcode_end - errcode_start);
|
||||
|
||||
retval = atol(errcode.c_str());
|
||||
|
||||
// If something couldn't be found, just return ERR_FOPEN
|
||||
if (!retval) retval = ERR_FOPEN;
|
||||
}
|
||||
|
||||
#else
|
||||
|
@ -1292,7 +1297,10 @@ int VBOX_VM::get_network_bytes_sent(double& sent) {
|
|||
|
||||
int VBOX_VM::get_system_log(string& log) {
|
||||
string virtualbox_user_home;
|
||||
string virtualbox_system_log;
|
||||
string slot_directory;
|
||||
string virtualbox_system_log_src;
|
||||
string virtualbox_system_log_dst;
|
||||
char buf[256];
|
||||
int retval = 0;
|
||||
|
||||
// Where is VirtualBox storing its configuration files?
|
||||
|
@ -1309,12 +1317,47 @@ int VBOX_VM::get_system_log(string& log) {
|
|||
virtualbox_user_home += "/.VirtualBox";
|
||||
}
|
||||
|
||||
// Where should we copy temp files to?
|
||||
get_slot_directory(slot_directory);
|
||||
|
||||
// Locate and read log file
|
||||
virtualbox_system_log = virtualbox_user_home + "/VBoxSVC.log";
|
||||
if (boinc_file_exists(virtualbox_system_log.c_str())) {
|
||||
virtualbox_system_log_src = virtualbox_user_home + "/VBoxSVC.log";
|
||||
virtualbox_system_log_dst = slot_directory + "/VBoxSVC.log";
|
||||
|
||||
if (boinc_file_exists(virtualbox_system_log_src.c_str())) {
|
||||
// Skip having to deal with various forms of file locks by just making a temp
|
||||
// copy of the log file.
|
||||
boinc_copy(virtualbox_system_log_src.c_str(), virtualbox_system_log_dst.c_str());
|
||||
|
||||
// Keep only the last 16k if it is larger than that.
|
||||
retval = read_file_string(virtualbox_system_log.c_str(), log, 16384, true);
|
||||
read_file_string(virtualbox_system_log_dst.c_str(), log, 16384, true);
|
||||
|
||||
#ifdef _WIN32
|
||||
// Remove \r from the log spew
|
||||
for (string::iterator iter = log.begin(); iter != log.end(); ++iter) {
|
||||
if (*iter == '\r') {
|
||||
log.erase(iter);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (log.size() >= 16384) {
|
||||
// Look for the next whole line of text.
|
||||
for (string::iterator iter = log.begin(); iter != log.end(); ++iter) {
|
||||
if (*iter == '\n') {
|
||||
log.erase(iter);
|
||||
break;
|
||||
}
|
||||
log.erase(iter);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
fprintf(
|
||||
stderr,
|
||||
"%s Could not find the system log at '%s'.\n",
|
||||
boinc_msg_prefix(buf, sizeof(buf)),
|
||||
virtualbox_system_log_src.c_str()
|
||||
);
|
||||
retval = ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
|
@ -1336,6 +1379,26 @@ int VBOX_VM::get_vm_log(string& log) {
|
|||
size_t size = output.size();
|
||||
if (size > 16384) {
|
||||
log = output.substr(size - 16384, size);
|
||||
|
||||
#ifdef _WIN32
|
||||
// Remove \r from the log spew
|
||||
for (string::iterator iter = log.begin(); iter != log.end(); ++iter) {
|
||||
if (*iter == '\r') {
|
||||
log.erase(iter);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (log.size() >= 16384) {
|
||||
// Look for the next whole line of text.
|
||||
for (string::iterator iter = log.begin(); iter != log.end(); ++iter) {
|
||||
if (*iter == '\n') {
|
||||
log.erase(iter);
|
||||
break;
|
||||
}
|
||||
log.erase(iter);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log = output;
|
||||
}
|
||||
|
|
|
@ -51,8 +51,6 @@ struct VBOX_VM {
|
|||
bool suspended;
|
||||
// is network access temporarily suspended?
|
||||
bool network_suspended;
|
||||
// has the VM successfully completed the startup code
|
||||
bool startup_completed;
|
||||
// is VM even online?
|
||||
bool online;
|
||||
// Has the VM crashed?
|
||||
|
|
|
@ -119,17 +119,15 @@ int parse_job_file(VBOX_VM& vm) {
|
|||
}
|
||||
|
||||
void write_checkpoint(double cpu) {
|
||||
boinc_begin_critical_section();
|
||||
FILE* f = fopen(CHECKPOINT_FILENAME, "w");
|
||||
if (!f) return;
|
||||
fprintf(f, "%f\n", cpu);
|
||||
fclose(f);
|
||||
boinc_checkpoint_completed();
|
||||
}
|
||||
|
||||
void read_checkpoint(double& cpu) {
|
||||
double c;
|
||||
cpu = 0;
|
||||
cpu = 0.0;
|
||||
FILE* f = fopen(CHECKPOINT_FILENAME, "r");
|
||||
if (!f) return;
|
||||
int n = fscanf(f, "%lf", &c);
|
||||
|
@ -204,18 +202,55 @@ void set_floppy_image(APP_INIT_DATA& aid, VBOX_VM& vm) {
|
|||
}
|
||||
}
|
||||
|
||||
// set port forwarding information if needed
|
||||
//
|
||||
void set_port_forwarding_info(APP_INIT_DATA& /* aid */, VBOX_VM& vm) {
|
||||
char buf[256];
|
||||
|
||||
if (vm.pf_guest_port && vm.pf_host_port) {
|
||||
fprintf(
|
||||
stderr,
|
||||
"%s port forwarding enabled on port '%d'.\n",
|
||||
boinc_msg_prefix(buf, sizeof(buf)), vm.pf_host_port
|
||||
);
|
||||
|
||||
// Write info to disk
|
||||
//
|
||||
MIOFILE mf;
|
||||
FILE* f = boinc_fopen(PORTFORWARD_FILENAME, "w");
|
||||
mf.init_file(f);
|
||||
|
||||
mf.printf(
|
||||
"<port_forwarding>\n"
|
||||
" <rule>\n"
|
||||
" <host_port>%d</host_port>\n"
|
||||
" <guest_port>%d</guest_port>\n"
|
||||
" </rule>\n"
|
||||
"</port_forwarding>\n",
|
||||
vm.pf_host_port,
|
||||
vm.pf_guest_port
|
||||
);
|
||||
|
||||
fclose(f);
|
||||
sprintf(buf, "http://localhost:%d", vm.pf_host_port);
|
||||
boinc_web_graphics_url(buf);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
int retval;
|
||||
BOINC_OPTIONS boinc_options;
|
||||
VBOX_VM vm;
|
||||
APP_INIT_DATA aid;
|
||||
double elapsed_time = 0.0;
|
||||
double checkpoint_cpu_time = 0.0;
|
||||
double trickle_period = 0.0;
|
||||
double trickle_cpu_time = 0.0;
|
||||
double fraction_done = 0.0;
|
||||
double bytes_sent = 0.0;
|
||||
double bytes_received = 0.0;
|
||||
double checkpoint_cpu_time = 0.0;
|
||||
double last_status_report_time = 0.0;
|
||||
bool report_vm_pid = false;
|
||||
double bytes_sent=0, bytes_received=0;
|
||||
bool report_net_usage = false;
|
||||
int vm_pid=0;
|
||||
int vm_max_cpus=0;
|
||||
|
@ -328,47 +363,32 @@ int main(int argc, char** argv) {
|
|||
elapsed_time = aid.starting_elapsed_time;
|
||||
read_checkpoint(checkpoint_cpu_time);
|
||||
|
||||
// Should we even try to start things up?
|
||||
if (vm.job_duration && (elapsed_time > vm.job_duration)) {
|
||||
return ERR_RSC_LIMIT_EXCEEDED;
|
||||
}
|
||||
|
||||
retval = vm.run();
|
||||
if (retval) {
|
||||
vm.cleanup();
|
||||
write_checkpoint(checkpoint_cpu_time);
|
||||
write_checkpoint(elapsed_time);
|
||||
|
||||
vm.get_system_log(system_log);
|
||||
fprintf(
|
||||
stderr,
|
||||
"%s Hypervisor System Log:\n\n"
|
||||
"%s\n",
|
||||
boinc_msg_prefix(buf, sizeof(buf)),
|
||||
system_log.c_str()
|
||||
);
|
||||
|
||||
boinc_finish(retval);
|
||||
}
|
||||
|
||||
set_floppy_image(aid, vm);
|
||||
set_port_forwarding_info(aid, vm);
|
||||
set_throttles(aid, vm);
|
||||
|
||||
// write port-forwarding info if relevant
|
||||
//
|
||||
if (vm.pf_guest_port && vm.pf_host_port) {
|
||||
fprintf(
|
||||
stderr,
|
||||
"%s port forwarding enabled on port '%d'.\n",
|
||||
boinc_msg_prefix(buf, sizeof(buf)), vm.pf_host_port
|
||||
);
|
||||
|
||||
// Write info to disk
|
||||
//
|
||||
MIOFILE mf;
|
||||
FILE* f = boinc_fopen(PORTFORWARD_FILENAME, "w");
|
||||
mf.init_file(f);
|
||||
|
||||
mf.printf(
|
||||
"<port_forwarding>\n"
|
||||
" <rule>\n"
|
||||
" <host_port>%d</host_port>\n"
|
||||
" <guest_port>%d</guest_port>\n"
|
||||
" </rule>\n"
|
||||
"</port_forwarding>\n",
|
||||
vm.pf_host_port,
|
||||
vm.pf_guest_port
|
||||
);
|
||||
|
||||
fclose(f);
|
||||
sprintf(buf, "http://localhost:%d", vm.pf_host_port);
|
||||
boinc_web_graphics_url(buf);
|
||||
}
|
||||
|
||||
while (1) {
|
||||
// Discover the VM's current state
|
||||
vm.poll();
|
||||
|
@ -380,7 +400,7 @@ int main(int argc, char** argv) {
|
|||
}
|
||||
if (boinc_status.abort_request) {
|
||||
vm.cleanup();
|
||||
write_checkpoint(checkpoint_cpu_time);
|
||||
write_checkpoint(elapsed_time);
|
||||
boinc_finish(EXIT_ABORTED_BY_CLIENT);
|
||||
}
|
||||
if (!vm.online) {
|
||||
|
@ -389,7 +409,7 @@ int main(int argc, char** argv) {
|
|||
vm.get_vm_log(vm_log);
|
||||
}
|
||||
vm.cleanup();
|
||||
write_checkpoint(checkpoint_cpu_time);
|
||||
write_checkpoint(elapsed_time);
|
||||
|
||||
if (vm.crashed || (elapsed_time < vm.job_duration)) {
|
||||
fprintf(
|
||||
|
@ -478,11 +498,14 @@ int main(int argc, char** argv) {
|
|||
if (vm.suspended) {
|
||||
vm.resume();
|
||||
}
|
||||
|
||||
elapsed_time += POLL_PERIOD;
|
||||
|
||||
if (!vm_pid) {
|
||||
vm.get_process_id(vm_pid);
|
||||
report_vm_pid = true;
|
||||
}
|
||||
|
||||
if (boinc_time_to_checkpoint()) {
|
||||
checkpoint_cpu_time = elapsed_time;
|
||||
write_checkpoint(checkpoint_cpu_time);
|
||||
|
@ -492,19 +515,40 @@ int main(int argc, char** argv) {
|
|||
fraction_done = 1.0;
|
||||
}
|
||||
}
|
||||
if (report_vm_pid || report_net_usage) {
|
||||
retval = boinc_report_app_status_aux(
|
||||
elapsed_time,
|
||||
checkpoint_cpu_time,
|
||||
fraction_done,
|
||||
vm_pid,
|
||||
bytes_sent,
|
||||
bytes_received
|
||||
);
|
||||
if (!retval) {
|
||||
report_vm_pid = false;
|
||||
report_net_usage = false;
|
||||
}
|
||||
} else {
|
||||
boinc_report_app_status(
|
||||
elapsed_time,
|
||||
checkpoint_cpu_time,
|
||||
fraction_done
|
||||
);
|
||||
}
|
||||
if ((elapsed_time - last_status_report_time) >= 6000.0) {
|
||||
last_status_report_time = elapsed_time;
|
||||
fprintf(
|
||||
stderr,
|
||||
"%s Status Report: Elapsed Time: '%f', Network Bytes Sent (Total): '%f', Network Bytes Received (Total): '%f'\n",
|
||||
boinc_msg_prefix(buf, sizeof(buf)),
|
||||
elapsed_time,
|
||||
bytes_sent,
|
||||
bytes_received
|
||||
);
|
||||
}
|
||||
boinc_checkpoint_completed();
|
||||
}
|
||||
if (report_vm_pid || report_net_usage) {
|
||||
retval = boinc_report_app_status_aux(
|
||||
elapsed_time, checkpoint_cpu_time, fraction_done, vm_pid, bytes_sent, bytes_received
|
||||
);
|
||||
if (!retval) {
|
||||
report_vm_pid = false;
|
||||
report_net_usage = false;
|
||||
}
|
||||
} else {
|
||||
boinc_report_app_status(elapsed_time, checkpoint_cpu_time, fraction_done);
|
||||
}
|
||||
|
||||
if (trickle_period) {
|
||||
trickle_cpu_time += POLL_PERIOD;
|
||||
if (trickle_cpu_time >= trickle_period) {
|
||||
|
@ -517,12 +561,10 @@ int main(int argc, char** argv) {
|
|||
// if the VM has a maximum amount of time it is allowed to run,
|
||||
// shut it down gacefully and exit.
|
||||
//
|
||||
if (vm.job_duration > 0.0) {
|
||||
if (elapsed_time > vm.job_duration) {
|
||||
vm.cleanup();
|
||||
write_checkpoint(checkpoint_cpu_time);
|
||||
boinc_finish(0);
|
||||
}
|
||||
if (vm.job_duration && (elapsed_time > vm.job_duration)) {
|
||||
vm.cleanup();
|
||||
write_checkpoint(elapsed_time);
|
||||
boinc_finish(0);
|
||||
}
|
||||
}
|
||||
if (vm.enable_network) {
|
||||
|
@ -538,6 +580,13 @@ int main(int argc, char** argv) {
|
|||
}
|
||||
if (boinc_status.reread_init_data_file) {
|
||||
boinc_status.reread_init_data_file = false;
|
||||
|
||||
fprintf(
|
||||
stderr,
|
||||
"%s Preference change detected\n",
|
||||
boinc_msg_prefix(buf, sizeof(buf))
|
||||
);
|
||||
|
||||
boinc_parse_init_data_file();
|
||||
boinc_get_init_data_p(&aid);
|
||||
set_throttles(aid, vm);
|
||||
|
|
Loading…
Reference in New Issue