- VBOX: Detect another form of out of memory error from VirtualBox.

Reschedule the job for some time in the future when there might
        be enough memory.
    - VBOX: Resume the VM's execution after the creation of the snapshot
        but before we attempt to delete the previous, now stale, snapshot.
    - VBOX: Treat the 'livesnapshotting', 'deletingsnapshotlive', and
        'deletingsnapshotlivepaused' states as online states.
        
    samples/vboxwrapper/
        vbox.cpp
        vboxwrapper.cpp

svn path=/trunk/boinc/; revision=25473
This commit is contained in:
Rom Walton 2012-03-22 18:10:42 +00:00
parent 97fe7c2667
commit fc818a027f
3 changed files with 67 additions and 29 deletions

View File

@ -2941,3 +2941,16 @@ Rom 21 Mar 2012
samples/vboxwrapper/
vbox.cpp, .h
Rom 22 Mar 2012
- VBOX: Detect another form of out of memory error from VirtualBox.
Reschedule the job for some time in the future when there might
be enough memory.
- VBOX: Resume the VM's execution after the creation of the snapshot
but before we attempt to delete the previous, now stale, snapshot.
- VBOX: Treat the 'livesnapshotting', 'deletingsnapshotlive', and
'deletingsnapshotlivepaused' states as online states.
samples/vboxwrapper/
vbox.cpp
vboxwrapper.cpp

View File

@ -407,6 +407,13 @@ int VBOX_VM::createsnapshot(double elapsed_time, double checkpoint_cpu_time) {
retval = vbm_popen(command, output, "create new snapshot");
if (retval) return retval;
// Resume VM
resume();
// Set the suspended flag back to false before deleting the stale
// snapshot
poll(false);
// Delete stale snapshot, if one exists
if (checkpoint_cpu_time) {
sprintf(buf, "%d", (int)checkpoint_cpu_time);
@ -417,9 +424,6 @@ int VBOX_VM::createsnapshot(double elapsed_time, double checkpoint_cpu_time) {
if (retval) return retval;
}
// Resume VM
resume();
fprintf(
stderr,
"%s Checkpoint completed.\n",
@ -441,7 +445,6 @@ int VBOX_VM::restoresnapshot() {
vboxwrapper_msg_prefix(buf, sizeof(buf))
);
// Create from snapshot
command = "snapshot \"" + vm_name + "\" ";
command += "restorecurrent ";
retval = vbm_popen(command, output, "restore current snapshot");
@ -505,6 +508,15 @@ void VBOX_VM::poll(bool log_state) {
online = true;
} else if (vmstate == "restoring") {
online = true;
} else if (vmstate == "livesnapshotting") {
online = true;
} else if (vmstate == "deletingsnapshotlive") {
online = true;
} else if (vmstate == "deletingsnapshotlivepaused") {
online = true;
} else if (vmstate == "aborted") {
online = false;
crashed = true;
} else if (vmstate == "gurumeditation") {
online = false;
crashed = true;

View File

@ -496,7 +496,7 @@ int main(int argc, char** argv) {
" Please report this issue to the project so that it can be addresssed.\n",
vboxwrapper_msg_prefix(buf, sizeof(buf))
);
} else if (vm_log.find("VERR_EM_NO_MEMORY") != std::string::npos) {
} else if ((vm_log.find("VERR_EM_NO_MEMORY") != std::string::npos) || (vm_log.find("VERR_NO_MEMORY") != std::string::npos)) {
fprintf(
stderr,
"%s NOTE: VirtualBox has failed to allocate enough memory to start the configured virtual machine.\n"
@ -555,35 +555,48 @@ int main(int argc, char** argv) {
vm.get_vm_log(vm_log);
vm.get_vm_exit_code(vm_exit_code);
}
vm.cleanup();
if (vm.crashed || (elapsed_time < vm.job_duration)) {
// Is this a type of event we can recover from?
if ((vm_log.find("VERR_EM_NO_MEMORY") != std::string::npos) || (vm_log.find("VERR_NO_MEMORY") != std::string::npos)) {
fprintf(
stderr,
"%s VM Premature Shutdown Detected.\n"
" Hypervisor System Log:\n\n"
"%s\n"
" VM Execution Log:\n\n"
"%s\n"
" VM Exit Code: %d (0x%x)\n\n",
vboxwrapper_msg_prefix(buf, sizeof(buf)),
system_log.c_str(),
vm_log.c_str(),
(unsigned int)vm_exit_code,
(unsigned int)vm_exit_code
);
if (vm_exit_code) {
boinc_finish(vm_exit_code);
} else {
boinc_finish(EXIT_ABORTED_BY_CLIENT);
}
} else {
fprintf(
stderr,
"%s Virtual machine exited.\n",
"%s NOTE: VirtualBox has failed to allocate enough memory to continue.\n"
" This might be a temporary problem and so this job will be rescheduled for another time.\n",
vboxwrapper_msg_prefix(buf, sizeof(buf))
);
boinc_finish(0);
vm.reset_vm_process_priority();
vm.poweroff();
boinc_temporary_exit(300, "VM Hypervisor was unable to allocate enough memory.");
} else {
vm.cleanup();
if (vm.crashed || (elapsed_time < vm.job_duration)) {
fprintf(
stderr,
"%s VM Premature Shutdown Detected.\n"
" Hypervisor System Log:\n\n"
"%s\n"
" VM Execution Log:\n\n"
"%s\n"
" VM Exit Code: %d (0x%x)\n\n",
vboxwrapper_msg_prefix(buf, sizeof(buf)),
system_log.c_str(),
vm_log.c_str(),
(unsigned int)vm_exit_code,
(unsigned int)vm_exit_code
);
if (vm_exit_code) {
boinc_finish(vm_exit_code);
} else {
boinc_finish(EXIT_ABORTED_BY_CLIENT);
}
} else {
fprintf(
stderr,
"%s Virtual machine exited.\n",
vboxwrapper_msg_prefix(buf, sizeof(buf))
);
boinc_finish(0);
}
}
}
if (boinc_status.suspended) {