VBOX: Fortify the launch_vboxvm() function and print out any error information if virtualbox/vboxheadless fails wthin the first second of execution.

VBOX: Rework the delete stale snapshot mechanism to avoid the 'multiple child hard drive' errors.
VBOX: Treat the RPC_S_SERVER_UNAVAILABLE error when restoring the current snapshot as unrecoverable (vboxsvc has crashed) and exit without attempting to cleaning up. Other VMs running and being monitored by BOINC will crash as well.
This commit is contained in:
Rom Walton 2014-01-22 10:44:38 -05:00
parent 8cebb0d6ac
commit 5f2fbe9dfc
4 changed files with 150 additions and 34 deletions

View File

@ -1303,6 +1303,7 @@ int VBOX_VM::createsnapshot(double elapsed_time) {
int VBOX_VM::cleanupsnapshots(bool delete_active) {
string command;
string output;
string snapshotlist;
string line;
string uuid;
size_t eol_pos;
@ -1319,7 +1320,7 @@ int VBOX_VM::cleanupsnapshots(bool delete_active) {
// Only log the error if we are not attempting to deregister the VM.
// delete_active is only set to true when we are deregistering the VM.
retval = vbm_popen(command, output, "enumerate snapshot(s)", !delete_active, false, 0);
retval = vbm_popen(command, snapshotlist, "enumerate snapshot(s)", !delete_active, false, 0);
if (retval) return retval;
// Output should look a little like this:
@ -1327,16 +1328,29 @@ int VBOX_VM::cleanupsnapshots(bool delete_active) {
// Name: Snapshot 3 (UUID: 92fa8b35-873a-4197-9d54-7b6b746b2c58)
// Name: Snapshot 4 (UUID: c049023a-5132-45d5-987d-a9cfadb09664) *
//
eol_prev_pos = 0;
eol_pos = output.find("\n");
// Traverse the list from newest to oldest. Otherwise we end up with an error:
// VBoxManage.exe: error: Snapshot operation failed
// VBoxManage.exe: error: Hard disk 'C:\ProgramData\BOINC\slots\23\vm_image.vdi' has
// more than one child hard disk (2)
//
// Prepend a space and line feed to the output since we are going to traverse it backwards
snapshotlist = " \n" + snapshotlist;
eol_prev_pos = snapshotlist.rfind("\n");
eol_pos = snapshotlist.rfind("\n", eol_prev_pos - 1);
while (eol_pos != string::npos) {
line = output.substr(eol_prev_pos, eol_pos - eol_prev_pos);
line = snapshotlist.substr(eol_pos, eol_prev_pos - eol_pos);
// Find the previous line to use in the next iteration
eol_prev_pos = eol_pos;
eol_pos = snapshotlist.rfind("\n", eol_prev_pos - 1);
// This VM does not yet have any snapshots
if (line.find("does not have any snapshots") != string::npos) break;
// The * signifies that it is the active snapshot and one we do not want to delete
if (!delete_active && (line.find("*") != string::npos)) break;
if (!delete_active && (line.rfind("*") != string::npos)) continue;
uuid_start = line.find("(UUID: ");
if (uuid_start != string::npos) {
@ -1362,9 +1376,6 @@ int VBOX_VM::cleanupsnapshots(bool delete_active) {
retval = vbm_popen(command, output, "delete stale snapshot", !delete_active, false, 0);
if (retval) return retval;
}
eol_prev_pos = eol_pos + 1;
eol_pos = output.find("\n", eol_prev_pos);
}
return 0;
@ -2321,6 +2332,7 @@ int VBOX_VM::launch_vboxsvc() {
// Launch the VM.
int VBOX_VM::launch_vboxvm() {
char buf[256];
char error_msg[256];
int retval = ERR_EXEC;
char* argv[5];
int argc;
@ -2346,13 +2358,44 @@ int VBOX_VM::launch_vboxvm() {
char cmdline[1024];
STARTUPINFO si;
PROCESS_INFORMATION pi;
SECURITY_ATTRIBUTES sa;
SECURITY_DESCRIPTOR sd;
HANDLE hReadPipe = NULL, hWritePipe = NULL;
void* pBuf = NULL;
DWORD dwCount = 0;
unsigned long ulExitCode = 0;
unsigned long ulExitTimeout = 0;
std::string output;
memset(&si, 0, sizeof(si));
memset(&pi, 0, sizeof(pi));
memset(&sa, 0, sizeof(sa));
memset(&sd, 0, sizeof(sd));
InitializeSecurityDescriptor(&sd, SECURITY_DESCRIPTOR_REVISION);
SetSecurityDescriptorDacl(&sd, true, NULL, false);
sa.nLength = sizeof(SECURITY_ATTRIBUTES);
sa.bInheritHandle = TRUE;
sa.lpSecurityDescriptor = &sd;
if (!CreatePipe(&hReadPipe, &hWritePipe, &sa, NULL)) {
fprintf(
stderr,
"%s CreatePipe failed! (%d).\n",
vboxwrapper_msg_prefix(buf, sizeof(buf)),
GetLastError()
);
goto CLEANUP;
}
SetHandleInformation(hReadPipe, HANDLE_FLAG_INHERIT, 0);
si.cb = sizeof(STARTUPINFO);
si.dwFlags |= STARTF_FORCEOFFFEEDBACK | STARTF_USESHOWWINDOW;
si.dwFlags |= STARTF_FORCEOFFFEEDBACK | STARTF_USESHOWWINDOW | STARTF_USESTDHANDLES;
si.wShowWindow = SW_HIDE;
si.hStdOutput = hWritePipe;
si.hStdError = hWritePipe;
si.hStdInput = NULL;
strcpy(cmdline, "");
for (int i=0; i<argc; i++) {
@ -2362,25 +2405,83 @@ int VBOX_VM::launch_vboxvm() {
}
}
CreateProcess(
NULL, cmdline, NULL, NULL, TRUE, CREATE_NO_WINDOW, NULL, NULL, &si, &pi
);
// Execute command
if (!CreateProcess(
NULL,
cmdline,
NULL,
NULL,
TRUE,
CREATE_NO_WINDOW,
NULL,
NULL,
&si,
&pi
)) {
vboxwrapper_msg_prefix(buf, sizeof(buf)),
fprintf(
stderr,
"%s Status Report: Launching virtualbox.exe/vboxheadless.exe failed!.\n"
"%s Error: %s (%d)\n",
buf,
buf,
windows_format_error_string(GetLastError(), error_msg, sizeof(error_msg)),
GetLastError()
);
goto CLEANUP;
}
while(1) {
GetExitCodeProcess(pi.hProcess, &ulExitCode);
// Copy stdout/stderr to output buffer, handle in the loop so that we can
// copy the pipe as it is populated and prevent the child process from blocking
// in case the output is bigger than pipe buffer.
PeekNamedPipe(hReadPipe, NULL, NULL, NULL, &dwCount, NULL);
if (dwCount) {
pBuf = malloc(dwCount+1);
memset(pBuf, 0, dwCount+1);
if (ReadFile(hReadPipe, pBuf, dwCount, &dwCount, NULL)) {
output += (char*)pBuf;
}
free(pBuf);
}
if ((ulExitCode != STILL_ACTIVE) || (ulExitTimeout >= 1000)) break;
Sleep(250);
ulExitTimeout += 250;
}
if (ulExitCode != STILL_ACTIVE) {
vboxwrapper_msg_prefix(buf, sizeof(buf)),
fprintf(
stderr,
"%s Status Report: Virtualbox.exe/Vboxheadless.exe exited prematurely!.\n"
"%s Exit Code: %d\n"
"%s Output:\n"
"%s\n",
buf,
buf,
ulExitCode,
buf,
output.c_str()
);
}
if (pi.hThread) CloseHandle(pi.hThread);
if (pi.hProcess) {
vm_pid = pi.dwProcessId;
vm_pid_handle = pi.hProcess;
retval = BOINC_SUCCESS;
} else {
fprintf(
stderr,
"%s Status Report: Launching virtualbox.exe/vboxheadless.exe failed!.\n"
" Error: %s",
vboxwrapper_msg_prefix(buf, sizeof(buf)),
windows_format_error_string(GetLastError(), buf, sizeof(buf))
);
}
CLEANUP:
if (pi.hThread) CloseHandle(pi.hThread);
if (hReadPipe) CloseHandle(hReadPipe);
if (hWritePipe) CloseHandle(hWritePipe);
#else
int pid = fork();
if (-1 == pid) {
@ -2402,8 +2503,6 @@ int VBOX_VM::launch_vboxvm() {
}
#endif
boinc_sleep(1);
return retval;
}

View File

@ -188,7 +188,7 @@ struct VBOX_VM {
int launch_vboxvm();
int vbm_popen(
std::string& command, std::string& output, const char* item, bool log_error = true, bool retry_failures = true, unsigned int timeout = 60
std::string& command, std::string& output, const char* item, bool log_error = true, bool retry_failures = true, unsigned int timeout = 45
);
int vbm_popen_raw(
std::string& command, std::string& output, unsigned int timeout

View File

@ -678,6 +678,7 @@ int main(int argc, char** argv) {
if (retval) {
// All 'failure to start' errors are unrecoverable by default
bool unrecoverable_error = true;
bool skip_cleanup = false;
bool dump_hypervisor_logs = false;
string error_reason;
char* temp_reason = (char*)"";
@ -700,6 +701,12 @@ int main(int argc, char** argv) {
unrecoverable_error = false;
temp_reason = (char*)"Please upgrade BOINC to the latest version.";
temp_delay = 86400;
} else if (RPC_S_SERVER_UNAVAILABLE == retval) {
error_reason =
" VboxSvc crashed while attempting to restore the current snapshot. This is a critical\n"
" operation and this job cannot be recovered.\n";
skip_cleanup = true;
retval = ERR_EXEC;
} else if (vm.is_logged_failure_vm_extensions_disabled()) {
error_reason =
" NOTE: BOINC has detected that your computer's processor supports hardware acceleration for\n"
@ -742,7 +749,9 @@ int main(int argc, char** argv) {
if (unrecoverable_error) {
// Attempt to cleanup the VM and exit.
vm.cleanup();
if (!skip_cleanup) {
vm.cleanup();
}
write_checkpoint(elapsed_time, vm);
if (error_reason.size()) {
@ -795,7 +804,6 @@ int main(int argc, char** argv) {
// Report the VM pid to BOINC so BOINC can deal with it when needed.
//
vm.lower_vm_process_priority();
vboxwrapper_msg_prefix(buf, sizeof(buf));
fprintf(
stderr,
@ -821,8 +829,15 @@ int main(int argc, char** argv) {
if (vm.online && !vm.restoring) break;
boinc_sleep(1.0);
} while (timeout >= dtime());
// Lower the VM process priority after it has successfully brought itself online.
vm.lower_vm_process_priority();
// Log our current state
vm.poll(true);
// Did we timeout?
if (timeout <= dtime()) {
vm.poll(true);
vboxwrapper_msg_prefix(buf, sizeof(buf));
fprintf(
stderr,
@ -839,9 +854,11 @@ int main(int argc, char** argv) {
set_floppy_image(aid, vm);
set_port_forwarding_info(aid, vm);
set_remote_desktop_info(aid, vm);
set_throttles(aid, vm);
write_checkpoint(elapsed_time, vm);
// Force throttling on our first pass through the loop
boinc_status.reread_init_data_file = true;
while (1) {
// Begin stopwatch timer
stopwatch_time = dtime();

View File

@ -128,12 +128,12 @@
</ResourceCompile>
<Link>
<AdditionalDependencies>libcmt.lib;libcpmt.lib;kernel32.lib;user32.lib;gdi32.lib;ole32.lib;wsock32.lib;psapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
<OutputFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_26062_windows_intelx86.exe</OutputFile>
<OutputFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_26063_windows_intelx86.exe</OutputFile>
<SuppressStartupBanner>true</SuppressStartupBanner>
<IgnoreAllDefaultLibraries>true</IgnoreAllDefaultLibraries>
<DelayLoadDLLs>%(DelayLoadDLLs)</DelayLoadDLLs>
<GenerateDebugInformation>true</GenerateDebugInformation>
<ProgramDatabaseFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_26062_windows_intelx86.pdb</ProgramDatabaseFile>
<ProgramDatabaseFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_26063_windows_intelx86.pdb</ProgramDatabaseFile>
<SubSystem>Windows</SubSystem>
<TargetMachine>MachineX86</TargetMachine>
</Link>
@ -177,12 +177,12 @@
</ResourceCompile>
<Link>
<AdditionalDependencies>libcmt.lib;libcpmt.lib;kernel32.lib;user32.lib;gdi32.lib;ole32.lib;wsock32.lib;psapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
<OutputFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_26062_windows_x86_64.exe</OutputFile>
<OutputFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_26063_windows_x86_64.exe</OutputFile>
<SuppressStartupBanner>true</SuppressStartupBanner>
<IgnoreAllDefaultLibraries>true</IgnoreAllDefaultLibraries>
<DelayLoadDLLs>%(DelayLoadDLLs)</DelayLoadDLLs>
<GenerateDebugInformation>true</GenerateDebugInformation>
<ProgramDatabaseFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_26062_windows_x86_64.pdb</ProgramDatabaseFile>
<ProgramDatabaseFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_26063_windows_x86_64.pdb</ProgramDatabaseFile>
<SubSystem>Windows</SubSystem>
<TargetMachine>MachineX64</TargetMachine>
</Link>
@ -267,12 +267,12 @@
</ResourceCompile>
<Link>
<AdditionalDependencies>libcmtd.lib;libcpmtd.lib;kernel32.lib;user32.lib;gdi32.lib;ole32.lib;wsock32.lib;psapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
<OutputFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_6.19_windows_x86_64.exe</OutputFile>
<OutputFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_26063_windows_x86_64.exe</OutputFile>
<SuppressStartupBanner>true</SuppressStartupBanner>
<IgnoreAllDefaultLibraries>true</IgnoreAllDefaultLibraries>
<DelayLoadDLLs>%(DelayLoadDLLs)</DelayLoadDLLs>
<GenerateDebugInformation>true</GenerateDebugInformation>
<ProgramDatabaseFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_6.19_windows_x86_64.pdb</ProgramDatabaseFile>
<ProgramDatabaseFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_26063_windows_x86_64.pdb</ProgramDatabaseFile>
<SubSystem>Windows</SubSystem>
<TargetMachine>MachineX64</TargetMachine>
</Link>