mirror of https://github.com/BOINC/boinc.git
VBOX: Fortify the launch_vboxvm() function and print out any error information if virtualbox/vboxheadless fails wthin the first second of execution.
VBOX: Rework the delete stale snapshot mechanism to avoid the 'multiple child hard drive' errors. VBOX: Treat the RPC_S_SERVER_UNAVAILABLE error when restoring the current snapshot as unrecoverable (vboxsvc has crashed) and exit without attempting to cleaning up. Other VMs running and being monitored by BOINC will crash as well.
This commit is contained in:
parent
8cebb0d6ac
commit
5f2fbe9dfc
|
@ -1303,6 +1303,7 @@ int VBOX_VM::createsnapshot(double elapsed_time) {
|
|||
int VBOX_VM::cleanupsnapshots(bool delete_active) {
|
||||
string command;
|
||||
string output;
|
||||
string snapshotlist;
|
||||
string line;
|
||||
string uuid;
|
||||
size_t eol_pos;
|
||||
|
@ -1319,7 +1320,7 @@ int VBOX_VM::cleanupsnapshots(bool delete_active) {
|
|||
|
||||
// Only log the error if we are not attempting to deregister the VM.
|
||||
// delete_active is only set to true when we are deregistering the VM.
|
||||
retval = vbm_popen(command, output, "enumerate snapshot(s)", !delete_active, false, 0);
|
||||
retval = vbm_popen(command, snapshotlist, "enumerate snapshot(s)", !delete_active, false, 0);
|
||||
if (retval) return retval;
|
||||
|
||||
// Output should look a little like this:
|
||||
|
@ -1327,16 +1328,29 @@ int VBOX_VM::cleanupsnapshots(bool delete_active) {
|
|||
// Name: Snapshot 3 (UUID: 92fa8b35-873a-4197-9d54-7b6b746b2c58)
|
||||
// Name: Snapshot 4 (UUID: c049023a-5132-45d5-987d-a9cfadb09664) *
|
||||
//
|
||||
eol_prev_pos = 0;
|
||||
eol_pos = output.find("\n");
|
||||
// Traverse the list from newest to oldest. Otherwise we end up with an error:
|
||||
// VBoxManage.exe: error: Snapshot operation failed
|
||||
// VBoxManage.exe: error: Hard disk 'C:\ProgramData\BOINC\slots\23\vm_image.vdi' has
|
||||
// more than one child hard disk (2)
|
||||
//
|
||||
|
||||
// Prepend a space and line feed to the output since we are going to traverse it backwards
|
||||
snapshotlist = " \n" + snapshotlist;
|
||||
|
||||
eol_prev_pos = snapshotlist.rfind("\n");
|
||||
eol_pos = snapshotlist.rfind("\n", eol_prev_pos - 1);
|
||||
while (eol_pos != string::npos) {
|
||||
line = output.substr(eol_prev_pos, eol_pos - eol_prev_pos);
|
||||
line = snapshotlist.substr(eol_pos, eol_prev_pos - eol_pos);
|
||||
|
||||
// Find the previous line to use in the next iteration
|
||||
eol_prev_pos = eol_pos;
|
||||
eol_pos = snapshotlist.rfind("\n", eol_prev_pos - 1);
|
||||
|
||||
// This VM does not yet have any snapshots
|
||||
if (line.find("does not have any snapshots") != string::npos) break;
|
||||
|
||||
// The * signifies that it is the active snapshot and one we do not want to delete
|
||||
if (!delete_active && (line.find("*") != string::npos)) break;
|
||||
if (!delete_active && (line.rfind("*") != string::npos)) continue;
|
||||
|
||||
uuid_start = line.find("(UUID: ");
|
||||
if (uuid_start != string::npos) {
|
||||
|
@ -1362,9 +1376,6 @@ int VBOX_VM::cleanupsnapshots(bool delete_active) {
|
|||
retval = vbm_popen(command, output, "delete stale snapshot", !delete_active, false, 0);
|
||||
if (retval) return retval;
|
||||
}
|
||||
|
||||
eol_prev_pos = eol_pos + 1;
|
||||
eol_pos = output.find("\n", eol_prev_pos);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -2321,6 +2332,7 @@ int VBOX_VM::launch_vboxsvc() {
|
|||
// Launch the VM.
|
||||
int VBOX_VM::launch_vboxvm() {
|
||||
char buf[256];
|
||||
char error_msg[256];
|
||||
int retval = ERR_EXEC;
|
||||
char* argv[5];
|
||||
int argc;
|
||||
|
@ -2346,13 +2358,44 @@ int VBOX_VM::launch_vboxvm() {
|
|||
char cmdline[1024];
|
||||
STARTUPINFO si;
|
||||
PROCESS_INFORMATION pi;
|
||||
SECURITY_ATTRIBUTES sa;
|
||||
SECURITY_DESCRIPTOR sd;
|
||||
HANDLE hReadPipe = NULL, hWritePipe = NULL;
|
||||
void* pBuf = NULL;
|
||||
DWORD dwCount = 0;
|
||||
unsigned long ulExitCode = 0;
|
||||
unsigned long ulExitTimeout = 0;
|
||||
std::string output;
|
||||
|
||||
memset(&si, 0, sizeof(si));
|
||||
memset(&pi, 0, sizeof(pi));
|
||||
memset(&sa, 0, sizeof(sa));
|
||||
memset(&sd, 0, sizeof(sd));
|
||||
|
||||
InitializeSecurityDescriptor(&sd, SECURITY_DESCRIPTOR_REVISION);
|
||||
SetSecurityDescriptorDacl(&sd, true, NULL, false);
|
||||
|
||||
sa.nLength = sizeof(SECURITY_ATTRIBUTES);
|
||||
sa.bInheritHandle = TRUE;
|
||||
sa.lpSecurityDescriptor = &sd;
|
||||
|
||||
if (!CreatePipe(&hReadPipe, &hWritePipe, &sa, NULL)) {
|
||||
fprintf(
|
||||
stderr,
|
||||
"%s CreatePipe failed! (%d).\n",
|
||||
vboxwrapper_msg_prefix(buf, sizeof(buf)),
|
||||
GetLastError()
|
||||
);
|
||||
goto CLEANUP;
|
||||
}
|
||||
SetHandleInformation(hReadPipe, HANDLE_FLAG_INHERIT, 0);
|
||||
|
||||
si.cb = sizeof(STARTUPINFO);
|
||||
si.dwFlags |= STARTF_FORCEOFFFEEDBACK | STARTF_USESHOWWINDOW;
|
||||
si.dwFlags |= STARTF_FORCEOFFFEEDBACK | STARTF_USESHOWWINDOW | STARTF_USESTDHANDLES;
|
||||
si.wShowWindow = SW_HIDE;
|
||||
si.hStdOutput = hWritePipe;
|
||||
si.hStdError = hWritePipe;
|
||||
si.hStdInput = NULL;
|
||||
|
||||
strcpy(cmdline, "");
|
||||
for (int i=0; i<argc; i++) {
|
||||
|
@ -2362,25 +2405,83 @@ int VBOX_VM::launch_vboxvm() {
|
|||
}
|
||||
}
|
||||
|
||||
CreateProcess(
|
||||
NULL, cmdline, NULL, NULL, TRUE, CREATE_NO_WINDOW, NULL, NULL, &si, &pi
|
||||
);
|
||||
// Execute command
|
||||
if (!CreateProcess(
|
||||
NULL,
|
||||
cmdline,
|
||||
NULL,
|
||||
NULL,
|
||||
TRUE,
|
||||
CREATE_NO_WINDOW,
|
||||
NULL,
|
||||
NULL,
|
||||
&si,
|
||||
&pi
|
||||
)) {
|
||||
vboxwrapper_msg_prefix(buf, sizeof(buf)),
|
||||
fprintf(
|
||||
stderr,
|
||||
"%s Status Report: Launching virtualbox.exe/vboxheadless.exe failed!.\n"
|
||||
"%s Error: %s (%d)\n",
|
||||
buf,
|
||||
buf,
|
||||
windows_format_error_string(GetLastError(), error_msg, sizeof(error_msg)),
|
||||
GetLastError()
|
||||
);
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
while(1) {
|
||||
GetExitCodeProcess(pi.hProcess, &ulExitCode);
|
||||
|
||||
// Copy stdout/stderr to output buffer, handle in the loop so that we can
|
||||
// copy the pipe as it is populated and prevent the child process from blocking
|
||||
// in case the output is bigger than pipe buffer.
|
||||
PeekNamedPipe(hReadPipe, NULL, NULL, NULL, &dwCount, NULL);
|
||||
if (dwCount) {
|
||||
pBuf = malloc(dwCount+1);
|
||||
memset(pBuf, 0, dwCount+1);
|
||||
|
||||
if (ReadFile(hReadPipe, pBuf, dwCount, &dwCount, NULL)) {
|
||||
output += (char*)pBuf;
|
||||
}
|
||||
|
||||
free(pBuf);
|
||||
}
|
||||
|
||||
if ((ulExitCode != STILL_ACTIVE) || (ulExitTimeout >= 1000)) break;
|
||||
|
||||
Sleep(250);
|
||||
ulExitTimeout += 250;
|
||||
}
|
||||
|
||||
if (ulExitCode != STILL_ACTIVE) {
|
||||
vboxwrapper_msg_prefix(buf, sizeof(buf)),
|
||||
fprintf(
|
||||
stderr,
|
||||
"%s Status Report: Virtualbox.exe/Vboxheadless.exe exited prematurely!.\n"
|
||||
"%s Exit Code: %d\n"
|
||||
"%s Output:\n"
|
||||
"%s\n",
|
||||
buf,
|
||||
buf,
|
||||
ulExitCode,
|
||||
buf,
|
||||
output.c_str()
|
||||
);
|
||||
}
|
||||
|
||||
if (pi.hThread) CloseHandle(pi.hThread);
|
||||
if (pi.hProcess) {
|
||||
vm_pid = pi.dwProcessId;
|
||||
vm_pid_handle = pi.hProcess;
|
||||
retval = BOINC_SUCCESS;
|
||||
} else {
|
||||
fprintf(
|
||||
stderr,
|
||||
"%s Status Report: Launching virtualbox.exe/vboxheadless.exe failed!.\n"
|
||||
" Error: %s",
|
||||
vboxwrapper_msg_prefix(buf, sizeof(buf)),
|
||||
windows_format_error_string(GetLastError(), buf, sizeof(buf))
|
||||
);
|
||||
}
|
||||
|
||||
CLEANUP:
|
||||
if (pi.hThread) CloseHandle(pi.hThread);
|
||||
if (hReadPipe) CloseHandle(hReadPipe);
|
||||
if (hWritePipe) CloseHandle(hWritePipe);
|
||||
|
||||
#else
|
||||
int pid = fork();
|
||||
if (-1 == pid) {
|
||||
|
@ -2402,8 +2503,6 @@ int VBOX_VM::launch_vboxvm() {
|
|||
}
|
||||
#endif
|
||||
|
||||
boinc_sleep(1);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
|
|
@ -188,7 +188,7 @@ struct VBOX_VM {
|
|||
int launch_vboxvm();
|
||||
|
||||
int vbm_popen(
|
||||
std::string& command, std::string& output, const char* item, bool log_error = true, bool retry_failures = true, unsigned int timeout = 60
|
||||
std::string& command, std::string& output, const char* item, bool log_error = true, bool retry_failures = true, unsigned int timeout = 45
|
||||
);
|
||||
int vbm_popen_raw(
|
||||
std::string& command, std::string& output, unsigned int timeout
|
||||
|
|
|
@ -678,6 +678,7 @@ int main(int argc, char** argv) {
|
|||
if (retval) {
|
||||
// All 'failure to start' errors are unrecoverable by default
|
||||
bool unrecoverable_error = true;
|
||||
bool skip_cleanup = false;
|
||||
bool dump_hypervisor_logs = false;
|
||||
string error_reason;
|
||||
char* temp_reason = (char*)"";
|
||||
|
@ -700,6 +701,12 @@ int main(int argc, char** argv) {
|
|||
unrecoverable_error = false;
|
||||
temp_reason = (char*)"Please upgrade BOINC to the latest version.";
|
||||
temp_delay = 86400;
|
||||
} else if (RPC_S_SERVER_UNAVAILABLE == retval) {
|
||||
error_reason =
|
||||
" VboxSvc crashed while attempting to restore the current snapshot. This is a critical\n"
|
||||
" operation and this job cannot be recovered.\n";
|
||||
skip_cleanup = true;
|
||||
retval = ERR_EXEC;
|
||||
} else if (vm.is_logged_failure_vm_extensions_disabled()) {
|
||||
error_reason =
|
||||
" NOTE: BOINC has detected that your computer's processor supports hardware acceleration for\n"
|
||||
|
@ -742,7 +749,9 @@ int main(int argc, char** argv) {
|
|||
|
||||
if (unrecoverable_error) {
|
||||
// Attempt to cleanup the VM and exit.
|
||||
vm.cleanup();
|
||||
if (!skip_cleanup) {
|
||||
vm.cleanup();
|
||||
}
|
||||
write_checkpoint(elapsed_time, vm);
|
||||
|
||||
if (error_reason.size()) {
|
||||
|
@ -795,7 +804,6 @@ int main(int argc, char** argv) {
|
|||
|
||||
// Report the VM pid to BOINC so BOINC can deal with it when needed.
|
||||
//
|
||||
vm.lower_vm_process_priority();
|
||||
vboxwrapper_msg_prefix(buf, sizeof(buf));
|
||||
fprintf(
|
||||
stderr,
|
||||
|
@ -821,8 +829,15 @@ int main(int argc, char** argv) {
|
|||
if (vm.online && !vm.restoring) break;
|
||||
boinc_sleep(1.0);
|
||||
} while (timeout >= dtime());
|
||||
|
||||
// Lower the VM process priority after it has successfully brought itself online.
|
||||
vm.lower_vm_process_priority();
|
||||
|
||||
// Log our current state
|
||||
vm.poll(true);
|
||||
|
||||
// Did we timeout?
|
||||
if (timeout <= dtime()) {
|
||||
vm.poll(true);
|
||||
vboxwrapper_msg_prefix(buf, sizeof(buf));
|
||||
fprintf(
|
||||
stderr,
|
||||
|
@ -839,9 +854,11 @@ int main(int argc, char** argv) {
|
|||
set_floppy_image(aid, vm);
|
||||
set_port_forwarding_info(aid, vm);
|
||||
set_remote_desktop_info(aid, vm);
|
||||
set_throttles(aid, vm);
|
||||
write_checkpoint(elapsed_time, vm);
|
||||
|
||||
// Force throttling on our first pass through the loop
|
||||
boinc_status.reread_init_data_file = true;
|
||||
|
||||
while (1) {
|
||||
// Begin stopwatch timer
|
||||
stopwatch_time = dtime();
|
||||
|
|
|
@ -128,12 +128,12 @@
|
|||
</ResourceCompile>
|
||||
<Link>
|
||||
<AdditionalDependencies>libcmt.lib;libcpmt.lib;kernel32.lib;user32.lib;gdi32.lib;ole32.lib;wsock32.lib;psapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<OutputFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_26062_windows_intelx86.exe</OutputFile>
|
||||
<OutputFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_26063_windows_intelx86.exe</OutputFile>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<IgnoreAllDefaultLibraries>true</IgnoreAllDefaultLibraries>
|
||||
<DelayLoadDLLs>%(DelayLoadDLLs)</DelayLoadDLLs>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_26062_windows_intelx86.pdb</ProgramDatabaseFile>
|
||||
<ProgramDatabaseFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_26063_windows_intelx86.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
|
@ -177,12 +177,12 @@
|
|||
</ResourceCompile>
|
||||
<Link>
|
||||
<AdditionalDependencies>libcmt.lib;libcpmt.lib;kernel32.lib;user32.lib;gdi32.lib;ole32.lib;wsock32.lib;psapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<OutputFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_26062_windows_x86_64.exe</OutputFile>
|
||||
<OutputFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_26063_windows_x86_64.exe</OutputFile>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<IgnoreAllDefaultLibraries>true</IgnoreAllDefaultLibraries>
|
||||
<DelayLoadDLLs>%(DelayLoadDLLs)</DelayLoadDLLs>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_26062_windows_x86_64.pdb</ProgramDatabaseFile>
|
||||
<ProgramDatabaseFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_26063_windows_x86_64.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
|
@ -267,12 +267,12 @@
|
|||
</ResourceCompile>
|
||||
<Link>
|
||||
<AdditionalDependencies>libcmtd.lib;libcpmtd.lib;kernel32.lib;user32.lib;gdi32.lib;ole32.lib;wsock32.lib;psapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<OutputFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_6.19_windows_x86_64.exe</OutputFile>
|
||||
<OutputFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_26063_windows_x86_64.exe</OutputFile>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<IgnoreAllDefaultLibraries>true</IgnoreAllDefaultLibraries>
|
||||
<DelayLoadDLLs>%(DelayLoadDLLs)</DelayLoadDLLs>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ProgramDatabaseFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_6.19_windows_x86_64.pdb</ProgramDatabaseFile>
|
||||
<ProgramDatabaseFile>.\Build\$(Platform)\$(Configuration)\vboxwrapper_26063_windows_x86_64.pdb</ProgramDatabaseFile>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
|
|
Loading…
Reference in New Issue