From fc818a027fa8f69d817ae2c5632cf4e86e7d4029 Mon Sep 17 00:00:00 2001 From: Rom Walton Date: Thu, 22 Mar 2012 18:10:42 +0000 Subject: [PATCH] - VBOX: Detect another form of out of memory error from VirtualBox. Reschedule the job for some time in the future when there might be enough memory. - VBOX: Resume the VM's execution after the creation of the snapshot but before we attempt to delete the previous, now stale, snapshot. - VBOX: Treat the 'livesnapshotting', 'deletingsnapshotlive', and 'deletingsnapshotlivepaused' states as online states. samples/vboxwrapper/ vbox.cpp vboxwrapper.cpp svn path=/trunk/boinc/; revision=25473 --- checkin_notes | 13 ++++++ samples/vboxwrapper/vbox.cpp | 20 +++++++-- samples/vboxwrapper/vboxwrapper.cpp | 63 +++++++++++++++++------------ 3 files changed, 67 insertions(+), 29 deletions(-) diff --git a/checkin_notes b/checkin_notes index 1ca7aac9e8..e845440d4d 100644 --- a/checkin_notes +++ b/checkin_notes @@ -2941,3 +2941,16 @@ Rom 21 Mar 2012 samples/vboxwrapper/ vbox.cpp, .h + +Rom 22 Mar 2012 + - VBOX: Detect another form of out of memory error from VirtualBox. + Reschedule the job for some time in the future when there might + be enough memory. + - VBOX: Resume the VM's execution after the creation of the snapshot + but before we attempt to delete the previous, now stale, snapshot. + - VBOX: Treat the 'livesnapshotting', 'deletingsnapshotlive', and + 'deletingsnapshotlivepaused' states as online states. + + samples/vboxwrapper/ + vbox.cpp + vboxwrapper.cpp diff --git a/samples/vboxwrapper/vbox.cpp b/samples/vboxwrapper/vbox.cpp index 8619a1d6ae..3b09991933 100644 --- a/samples/vboxwrapper/vbox.cpp +++ b/samples/vboxwrapper/vbox.cpp @@ -407,6 +407,13 @@ int VBOX_VM::createsnapshot(double elapsed_time, double checkpoint_cpu_time) { retval = vbm_popen(command, output, "create new snapshot"); if (retval) return retval; + // Resume VM + resume(); + + // Set the suspended flag back to false before deleting the stale + // snapshot + poll(false); + // Delete stale snapshot, if one exists if (checkpoint_cpu_time) { sprintf(buf, "%d", (int)checkpoint_cpu_time); @@ -417,9 +424,6 @@ int VBOX_VM::createsnapshot(double elapsed_time, double checkpoint_cpu_time) { if (retval) return retval; } - // Resume VM - resume(); - fprintf( stderr, "%s Checkpoint completed.\n", @@ -441,7 +445,6 @@ int VBOX_VM::restoresnapshot() { vboxwrapper_msg_prefix(buf, sizeof(buf)) ); - // Create from snapshot command = "snapshot \"" + vm_name + "\" "; command += "restorecurrent "; retval = vbm_popen(command, output, "restore current snapshot"); @@ -505,6 +508,15 @@ void VBOX_VM::poll(bool log_state) { online = true; } else if (vmstate == "restoring") { online = true; + } else if (vmstate == "livesnapshotting") { + online = true; + } else if (vmstate == "deletingsnapshotlive") { + online = true; + } else if (vmstate == "deletingsnapshotlivepaused") { + online = true; + } else if (vmstate == "aborted") { + online = false; + crashed = true; } else if (vmstate == "gurumeditation") { online = false; crashed = true; diff --git a/samples/vboxwrapper/vboxwrapper.cpp b/samples/vboxwrapper/vboxwrapper.cpp index bf81e0da96..a0130b4e53 100644 --- a/samples/vboxwrapper/vboxwrapper.cpp +++ b/samples/vboxwrapper/vboxwrapper.cpp @@ -496,7 +496,7 @@ int main(int argc, char** argv) { " Please report this issue to the project so that it can be addresssed.\n", vboxwrapper_msg_prefix(buf, sizeof(buf)) ); - } else if (vm_log.find("VERR_EM_NO_MEMORY") != std::string::npos) { + } else if ((vm_log.find("VERR_EM_NO_MEMORY") != std::string::npos) || (vm_log.find("VERR_NO_MEMORY") != std::string::npos)) { fprintf( stderr, "%s NOTE: VirtualBox has failed to allocate enough memory to start the configured virtual machine.\n" @@ -555,35 +555,48 @@ int main(int argc, char** argv) { vm.get_vm_log(vm_log); vm.get_vm_exit_code(vm_exit_code); } - vm.cleanup(); - if (vm.crashed || (elapsed_time < vm.job_duration)) { + // Is this a type of event we can recover from? + if ((vm_log.find("VERR_EM_NO_MEMORY") != std::string::npos) || (vm_log.find("VERR_NO_MEMORY") != std::string::npos)) { fprintf( stderr, - "%s VM Premature Shutdown Detected.\n" - " Hypervisor System Log:\n\n" - "%s\n" - " VM Execution Log:\n\n" - "%s\n" - " VM Exit Code: %d (0x%x)\n\n", - vboxwrapper_msg_prefix(buf, sizeof(buf)), - system_log.c_str(), - vm_log.c_str(), - (unsigned int)vm_exit_code, - (unsigned int)vm_exit_code - ); - if (vm_exit_code) { - boinc_finish(vm_exit_code); - } else { - boinc_finish(EXIT_ABORTED_BY_CLIENT); - } - } else { - fprintf( - stderr, - "%s Virtual machine exited.\n", + "%s NOTE: VirtualBox has failed to allocate enough memory to continue.\n" + " This might be a temporary problem and so this job will be rescheduled for another time.\n", vboxwrapper_msg_prefix(buf, sizeof(buf)) ); - boinc_finish(0); + vm.reset_vm_process_priority(); + vm.poweroff(); + boinc_temporary_exit(300, "VM Hypervisor was unable to allocate enough memory."); + } else { + vm.cleanup(); + if (vm.crashed || (elapsed_time < vm.job_duration)) { + fprintf( + stderr, + "%s VM Premature Shutdown Detected.\n" + " Hypervisor System Log:\n\n" + "%s\n" + " VM Execution Log:\n\n" + "%s\n" + " VM Exit Code: %d (0x%x)\n\n", + vboxwrapper_msg_prefix(buf, sizeof(buf)), + system_log.c_str(), + vm_log.c_str(), + (unsigned int)vm_exit_code, + (unsigned int)vm_exit_code + ); + if (vm_exit_code) { + boinc_finish(vm_exit_code); + } else { + boinc_finish(EXIT_ABORTED_BY_CLIENT); + } + } else { + fprintf( + stderr, + "%s Virtual machine exited.\n", + vboxwrapper_msg_prefix(buf, sizeof(buf)) + ); + boinc_finish(0); + } } } if (boinc_status.suspended) {