diff --git a/checkin_notes b/checkin_notes
index 070fbf675e..b15fdb0113 100644
--- a/checkin_notes
+++ b/checkin_notes
@@ -124,4 +124,23 @@ Rom 3 Jan 2012
They didn't wait for the next OpenCL spec change.
client/
- coproc_detect.cpp
\ No newline at end of file
+ coproc_detect.cpp
+
+Rom 3 Jan 2012
+ - VBOX: Per tester feedback, at an hourly status report to track
+ network usage.
+ - VBOX: For errors where an error code is specified in the output,
+ use that value when returning from vbm_open(). Make it easier
+ to track failure rates by problem type in the ops pages.
+ (Windows Only)
+ - VBOX: Try making the process of extracting the hypervisor system
+ log more robust, make a temp copy of it in the slot directory.
+ Avoid exclusive file lock issues since VboxSvc will still be
+ logging to it.
+ - VBOX: Change the session lock text to make it clearer that the
+ use of other VirtualBox management tools can cause a problem.
+
+
+ samples/vboxwrapper/
+ vbox.cpp, .h
+ vboxwrapper.cpp
diff --git a/samples/vboxwrapper/vbox.cpp b/samples/vboxwrapper/vbox.cpp
index a1e17dd4d9..9985bfea93 100644
--- a/samples/vboxwrapper/vbox.cpp
+++ b/samples/vboxwrapper/vbox.cpp
@@ -63,7 +63,6 @@ VBOX_VM::VBOX_VM() {
job_duration = 0.0;
suspended = false;
network_suspended = false;
- startup_completed = false;
online = false;
crashed = false;
enable_cern_dataformat = false;
@@ -119,10 +118,7 @@ int VBOX_VM::run() {
boinc_sleep(1.0);
} while (timeout <= dtime());
- // From here on out we don't need to report the system log when a vboxmanage
- // command fails.
- startup_completed = true;
-
+ if (!online) return ERR_EXEC;
return 0;
}
@@ -163,17 +159,19 @@ int VBOX_VM::vbm_popen(string& arguments, string& output, const char* item, bool
if ((output.find("VBOX_E_INVALID_OBJECT_STATE") != string::npos) &&
(output.find("already locked") != string::npos))
{
- if (retry_notes.find("already locked") == string::npos) {
- retry_notes += "Virtual Machine session already locked.\n";
+ if (retry_notes.find("Another VirtualBox management") == string::npos) {
+ retry_notes += "Another VirtualBox management application has locked the session for\n";
+ retry_notes += "this virtual machine. BOINC cannot properly monitor this virtual machine\n";
+ retry_notes += "and so this job will be aborted.\n\n";
}
}
- // Timeout?
- if (retry_count >= 6) break;
-
// Retry?
if (!retry_failures) break;
+ // Timeout?
+ if (retry_count >= 6) break;
+
retry_count++;
boinc_sleep(5.0);
}
@@ -196,12 +194,6 @@ int VBOX_VM::vbm_popen(string& arguments, string& output, const char* item, bool
output += "\nNotes:\n\n" + retry_notes;
}
- if (!startup_completed) {
- string system_log;
- get_system_log(system_log);
- output += "\nHypervisor System Log:\n\n" + system_log;
- }
-
fprintf(
stderr,
"%s Error in %s for VM: %d\nArguments:\n%s\nOutput:\n%s\n",
@@ -240,6 +232,9 @@ int VBOX_VM::vbm_popen_raw(string& arguments, string& output) {
DWORD dwCount = 0;
unsigned long ulExitCode = 0;
unsigned long ulExitTimeout = 0;
+ size_t errcode_start;
+ size_t errcode_end;
+ string errcode;
memset(&si, 0, sizeof(si));
memset(&pi, 0, sizeof(pi));
@@ -335,7 +330,17 @@ CLEANUP:
if (hWritePipe) CloseHandle(hWritePipe);
if ((ulExitCode != 0) || (!pi.hProcess)) {
- retval = ERR_FOPEN;
+
+ // Determine the real error code by parsing the output
+ errcode_start = output.find("(0x");
+ errcode_start += 1;
+ errcode_end = output.find(")", errcode_start);
+ errcode = output.substr(errcode_start, errcode_end - errcode_start);
+
+ retval = atol(errcode.c_str());
+
+ // If something couldn't be found, just return ERR_FOPEN
+ if (!retval) retval = ERR_FOPEN;
}
#else
@@ -1292,7 +1297,10 @@ int VBOX_VM::get_network_bytes_sent(double& sent) {
int VBOX_VM::get_system_log(string& log) {
string virtualbox_user_home;
- string virtualbox_system_log;
+ string slot_directory;
+ string virtualbox_system_log_src;
+ string virtualbox_system_log_dst;
+ char buf[256];
int retval = 0;
// Where is VirtualBox storing its configuration files?
@@ -1309,12 +1317,47 @@ int VBOX_VM::get_system_log(string& log) {
virtualbox_user_home += "/.VirtualBox";
}
+ // Where should we copy temp files to?
+ get_slot_directory(slot_directory);
+
// Locate and read log file
- virtualbox_system_log = virtualbox_user_home + "/VBoxSVC.log";
- if (boinc_file_exists(virtualbox_system_log.c_str())) {
+ virtualbox_system_log_src = virtualbox_user_home + "/VBoxSVC.log";
+ virtualbox_system_log_dst = slot_directory + "/VBoxSVC.log";
+
+ if (boinc_file_exists(virtualbox_system_log_src.c_str())) {
+ // Skip having to deal with various forms of file locks by just making a temp
+ // copy of the log file.
+ boinc_copy(virtualbox_system_log_src.c_str(), virtualbox_system_log_dst.c_str());
+
// Keep only the last 16k if it is larger than that.
- retval = read_file_string(virtualbox_system_log.c_str(), log, 16384, true);
+ read_file_string(virtualbox_system_log_dst.c_str(), log, 16384, true);
+
+#ifdef _WIN32
+ // Remove \r from the log spew
+ for (string::iterator iter = log.begin(); iter != log.end(); ++iter) {
+ if (*iter == '\r') {
+ log.erase(iter);
+ }
+ }
+#endif
+
+ if (log.size() >= 16384) {
+ // Look for the next whole line of text.
+ for (string::iterator iter = log.begin(); iter != log.end(); ++iter) {
+ if (*iter == '\n') {
+ log.erase(iter);
+ break;
+ }
+ log.erase(iter);
+ }
+ }
} else {
+ fprintf(
+ stderr,
+ "%s Could not find the system log at '%s'.\n",
+ boinc_msg_prefix(buf, sizeof(buf)),
+ virtualbox_system_log_src.c_str()
+ );
retval = ERR_NOT_FOUND;
}
@@ -1336,6 +1379,26 @@ int VBOX_VM::get_vm_log(string& log) {
size_t size = output.size();
if (size > 16384) {
log = output.substr(size - 16384, size);
+
+#ifdef _WIN32
+ // Remove \r from the log spew
+ for (string::iterator iter = log.begin(); iter != log.end(); ++iter) {
+ if (*iter == '\r') {
+ log.erase(iter);
+ }
+ }
+#endif
+
+ if (log.size() >= 16384) {
+ // Look for the next whole line of text.
+ for (string::iterator iter = log.begin(); iter != log.end(); ++iter) {
+ if (*iter == '\n') {
+ log.erase(iter);
+ break;
+ }
+ log.erase(iter);
+ }
+ }
} else {
log = output;
}
diff --git a/samples/vboxwrapper/vbox.h b/samples/vboxwrapper/vbox.h
index 26df081a8d..6078ab57f8 100644
--- a/samples/vboxwrapper/vbox.h
+++ b/samples/vboxwrapper/vbox.h
@@ -51,8 +51,6 @@ struct VBOX_VM {
bool suspended;
// is network access temporarily suspended?
bool network_suspended;
- // has the VM successfully completed the startup code
- bool startup_completed;
// is VM even online?
bool online;
// Has the VM crashed?
diff --git a/samples/vboxwrapper/vboxwrapper.cpp b/samples/vboxwrapper/vboxwrapper.cpp
index ec90d0c745..5d417853d7 100644
--- a/samples/vboxwrapper/vboxwrapper.cpp
+++ b/samples/vboxwrapper/vboxwrapper.cpp
@@ -119,17 +119,15 @@ int parse_job_file(VBOX_VM& vm) {
}
void write_checkpoint(double cpu) {
- boinc_begin_critical_section();
FILE* f = fopen(CHECKPOINT_FILENAME, "w");
if (!f) return;
fprintf(f, "%f\n", cpu);
fclose(f);
- boinc_checkpoint_completed();
}
void read_checkpoint(double& cpu) {
double c;
- cpu = 0;
+ cpu = 0.0;
FILE* f = fopen(CHECKPOINT_FILENAME, "r");
if (!f) return;
int n = fscanf(f, "%lf", &c);
@@ -204,18 +202,55 @@ void set_floppy_image(APP_INIT_DATA& aid, VBOX_VM& vm) {
}
}
+// set port forwarding information if needed
+//
+void set_port_forwarding_info(APP_INIT_DATA& /* aid */, VBOX_VM& vm) {
+ char buf[256];
+
+ if (vm.pf_guest_port && vm.pf_host_port) {
+ fprintf(
+ stderr,
+ "%s port forwarding enabled on port '%d'.\n",
+ boinc_msg_prefix(buf, sizeof(buf)), vm.pf_host_port
+ );
+
+ // Write info to disk
+ //
+ MIOFILE mf;
+ FILE* f = boinc_fopen(PORTFORWARD_FILENAME, "w");
+ mf.init_file(f);
+
+ mf.printf(
+ "\n"
+ " \n"
+ " %d\n"
+ " %d\n"
+ " \n"
+ "\n",
+ vm.pf_host_port,
+ vm.pf_guest_port
+ );
+
+ fclose(f);
+ sprintf(buf, "http://localhost:%d", vm.pf_host_port);
+ boinc_web_graphics_url(buf);
+ }
+}
+
int main(int argc, char** argv) {
int retval;
BOINC_OPTIONS boinc_options;
VBOX_VM vm;
APP_INIT_DATA aid;
double elapsed_time = 0.0;
- double checkpoint_cpu_time = 0.0;
double trickle_period = 0.0;
double trickle_cpu_time = 0.0;
double fraction_done = 0.0;
+ double bytes_sent = 0.0;
+ double bytes_received = 0.0;
+ double checkpoint_cpu_time = 0.0;
+ double last_status_report_time = 0.0;
bool report_vm_pid = false;
- double bytes_sent=0, bytes_received=0;
bool report_net_usage = false;
int vm_pid=0;
int vm_max_cpus=0;
@@ -328,47 +363,32 @@ int main(int argc, char** argv) {
elapsed_time = aid.starting_elapsed_time;
read_checkpoint(checkpoint_cpu_time);
+ // Should we even try to start things up?
+ if (vm.job_duration && (elapsed_time > vm.job_duration)) {
+ return ERR_RSC_LIMIT_EXCEEDED;
+ }
+
retval = vm.run();
if (retval) {
vm.cleanup();
- write_checkpoint(checkpoint_cpu_time);
+ write_checkpoint(elapsed_time);
+
+ vm.get_system_log(system_log);
+ fprintf(
+ stderr,
+ "%s Hypervisor System Log:\n\n"
+ "%s\n",
+ boinc_msg_prefix(buf, sizeof(buf)),
+ system_log.c_str()
+ );
+
boinc_finish(retval);
}
set_floppy_image(aid, vm);
+ set_port_forwarding_info(aid, vm);
set_throttles(aid, vm);
- // write port-forwarding info if relevant
- //
- if (vm.pf_guest_port && vm.pf_host_port) {
- fprintf(
- stderr,
- "%s port forwarding enabled on port '%d'.\n",
- boinc_msg_prefix(buf, sizeof(buf)), vm.pf_host_port
- );
-
- // Write info to disk
- //
- MIOFILE mf;
- FILE* f = boinc_fopen(PORTFORWARD_FILENAME, "w");
- mf.init_file(f);
-
- mf.printf(
- "\n"
- " \n"
- " %d\n"
- " %d\n"
- " \n"
- "\n",
- vm.pf_host_port,
- vm.pf_guest_port
- );
-
- fclose(f);
- sprintf(buf, "http://localhost:%d", vm.pf_host_port);
- boinc_web_graphics_url(buf);
- }
-
while (1) {
// Discover the VM's current state
vm.poll();
@@ -380,7 +400,7 @@ int main(int argc, char** argv) {
}
if (boinc_status.abort_request) {
vm.cleanup();
- write_checkpoint(checkpoint_cpu_time);
+ write_checkpoint(elapsed_time);
boinc_finish(EXIT_ABORTED_BY_CLIENT);
}
if (!vm.online) {
@@ -389,7 +409,7 @@ int main(int argc, char** argv) {
vm.get_vm_log(vm_log);
}
vm.cleanup();
- write_checkpoint(checkpoint_cpu_time);
+ write_checkpoint(elapsed_time);
if (vm.crashed || (elapsed_time < vm.job_duration)) {
fprintf(
@@ -478,11 +498,14 @@ int main(int argc, char** argv) {
if (vm.suspended) {
vm.resume();
}
+
elapsed_time += POLL_PERIOD;
+
if (!vm_pid) {
vm.get_process_id(vm_pid);
report_vm_pid = true;
}
+
if (boinc_time_to_checkpoint()) {
checkpoint_cpu_time = elapsed_time;
write_checkpoint(checkpoint_cpu_time);
@@ -492,19 +515,40 @@ int main(int argc, char** argv) {
fraction_done = 1.0;
}
}
+ if (report_vm_pid || report_net_usage) {
+ retval = boinc_report_app_status_aux(
+ elapsed_time,
+ checkpoint_cpu_time,
+ fraction_done,
+ vm_pid,
+ bytes_sent,
+ bytes_received
+ );
+ if (!retval) {
+ report_vm_pid = false;
+ report_net_usage = false;
+ }
+ } else {
+ boinc_report_app_status(
+ elapsed_time,
+ checkpoint_cpu_time,
+ fraction_done
+ );
+ }
+ if ((elapsed_time - last_status_report_time) >= 6000.0) {
+ last_status_report_time = elapsed_time;
+ fprintf(
+ stderr,
+ "%s Status Report: Elapsed Time: '%f', Network Bytes Sent (Total): '%f', Network Bytes Received (Total): '%f'\n",
+ boinc_msg_prefix(buf, sizeof(buf)),
+ elapsed_time,
+ bytes_sent,
+ bytes_received
+ );
+ }
boinc_checkpoint_completed();
}
- if (report_vm_pid || report_net_usage) {
- retval = boinc_report_app_status_aux(
- elapsed_time, checkpoint_cpu_time, fraction_done, vm_pid, bytes_sent, bytes_received
- );
- if (!retval) {
- report_vm_pid = false;
- report_net_usage = false;
- }
- } else {
- boinc_report_app_status(elapsed_time, checkpoint_cpu_time, fraction_done);
- }
+
if (trickle_period) {
trickle_cpu_time += POLL_PERIOD;
if (trickle_cpu_time >= trickle_period) {
@@ -517,12 +561,10 @@ int main(int argc, char** argv) {
// if the VM has a maximum amount of time it is allowed to run,
// shut it down gacefully and exit.
//
- if (vm.job_duration > 0.0) {
- if (elapsed_time > vm.job_duration) {
- vm.cleanup();
- write_checkpoint(checkpoint_cpu_time);
- boinc_finish(0);
- }
+ if (vm.job_duration && (elapsed_time > vm.job_duration)) {
+ vm.cleanup();
+ write_checkpoint(elapsed_time);
+ boinc_finish(0);
}
}
if (vm.enable_network) {
@@ -538,6 +580,13 @@ int main(int argc, char** argv) {
}
if (boinc_status.reread_init_data_file) {
boinc_status.reread_init_data_file = false;
+
+ fprintf(
+ stderr,
+ "%s Preference change detected\n",
+ boinc_msg_prefix(buf, sizeof(buf))
+ );
+
boinc_parse_init_data_file();
boinc_get_init_data_p(&aid);
set_throttles(aid, vm);