mirror of https://github.com/BOINC/boinc.git
1364 lines
49 KiB
C++
1364 lines
49 KiB
C++
// This file is part of BOINC.
|
|
// http://boinc.berkeley.edu
|
|
// Copyright (C) 2010-2012 University of California
|
|
//
|
|
// BOINC is free software; you can redistribute it and/or modify it
|
|
// under the terms of the GNU Lesser General Public License
|
|
// as published by the Free Software Foundation,
|
|
// either version 3 of the License, or (at your option) any later version.
|
|
//
|
|
// BOINC is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
// See the GNU Lesser General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Lesser General Public License
|
|
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
// BOINC VirtualBox wrapper; lets you run apps in VMs
|
|
// see: http://boinc.berkeley.edu/trac/wiki/VboxApps
|
|
//
|
|
// usage: vboxwrapper [options]
|
|
//
|
|
// --trickle X send a trickle-up message reporting elapsed time every X sec
|
|
// (use this for credit granting if your app does its
|
|
// own job management, like CernVM).
|
|
// --nthreads N create a VM with N threads.
|
|
// --vmimage N Use "vm_image_N" as the VM image.
|
|
// This lets you create an app version with several images,
|
|
// and the app_plan function can decide which one to use
|
|
// for the particular host.
|
|
// --register_only Register the VM but don't run it.
|
|
// Useful for debugging; see the wiki page
|
|
//
|
|
// Handles:
|
|
// - suspend/resume/quit/abort
|
|
// - reporting CPU time
|
|
// - loss of heartbeat from client
|
|
// - checkpoint (using snapshots)
|
|
// - a bunch of other stuff; see the wiki page
|
|
//
|
|
// Contributors:
|
|
// Rom Walton
|
|
// David Anderson
|
|
// Andrew J. Younge (ajy4490 AT umiacs DOT umd DOT edu)
|
|
// Jie Wu <jiewu AT cern DOT ch>
|
|
// Daniel Lombraña González <teleyinex AT gmail DOT com>
|
|
|
|
#ifdef _WIN32
|
|
#include "boinc_win.h"
|
|
#include "win_util.h"
|
|
#else
|
|
#include <vector>
|
|
#include <sys/wait.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <stdio.h>
|
|
#include <cmath>
|
|
#include <string>
|
|
#include <sstream>
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
#include "version.h"
|
|
#include "boinc_api.h"
|
|
#include "graphics2.h"
|
|
#include "diagnostics.h"
|
|
#include "filesys.h"
|
|
#include "md5_file.h"
|
|
#include "parse.h"
|
|
#include "str_util.h"
|
|
#include "str_replace.h"
|
|
#include "util.h"
|
|
#include "error_numbers.h"
|
|
#include "procinfo.h"
|
|
#include "floppyio.h"
|
|
#include "vboxlogging.h"
|
|
#include "vboxcheckpoint.h"
|
|
#include "vboxwrapper.h"
|
|
#include "vbox_common.h"
|
|
#ifdef _WIN32
|
|
#include "vbox_mscom42.h"
|
|
#include "vbox_mscom43.h"
|
|
#include "vbox_mscom50.h"
|
|
#include "vbox_mscom51.h"
|
|
#endif
|
|
#include "vbox_vboxmanage.h"
|
|
|
|
|
|
using std::vector;
|
|
using std::string;
|
|
|
|
bool shared_file_exists(std::string& filename) {
|
|
char path[MAXPATHLEN];
|
|
sprintf(path, "shared/%s", filename.c_str());
|
|
if (filename.size() && boinc_file_exists(path)) return true;
|
|
return false;
|
|
}
|
|
|
|
void shared_delete_file(std::string& filename) {
|
|
char path[MAXPATHLEN];
|
|
sprintf(path, "shared/%s", filename.c_str());
|
|
boinc_delete_file(path);
|
|
}
|
|
|
|
int shared_stat(std::string& filename, struct stat* stat_file) {
|
|
char path[MAXPATHLEN];
|
|
sprintf(path, "shared/%s", filename.c_str());
|
|
return stat(path, stat_file);
|
|
}
|
|
|
|
bool read_fraction_done(double& frac_done, VBOX_VM& vm) {
|
|
char path[MAXPATHLEN];
|
|
char buf[256];
|
|
double temp, frac = 0;
|
|
|
|
sprintf(path, "shared/%s", vm.fraction_done_filename.c_str());
|
|
FILE* f = fopen(path, "r");
|
|
if (!f) return false;
|
|
|
|
// read the last line of the file
|
|
//
|
|
fseek(f, -32, SEEK_END);
|
|
while (!feof(f)) {
|
|
char* p = fgets(buf, 256, f);
|
|
if (p == NULL) break;
|
|
int n = sscanf(buf, "%lf", &temp);
|
|
if (n == 1) frac = temp;
|
|
}
|
|
fclose(f);
|
|
|
|
if (frac < 0) {
|
|
frac = 0;
|
|
}
|
|
if (frac > 1) {
|
|
frac = 1;
|
|
}
|
|
|
|
frac_done = frac;
|
|
return true;
|
|
}
|
|
|
|
void read_completion_file_info(unsigned long& exit_code, bool& is_notice, string& message, VBOX_VM& vm) {
|
|
char path[MAXPATHLEN];
|
|
char buf[1024];
|
|
|
|
exit_code = 0;
|
|
message = "";
|
|
|
|
sprintf(path, "shared/%s", vm.completion_trigger_file.c_str());
|
|
FILE* f = fopen(path, "r");
|
|
if (f) {
|
|
if (fgets(buf, 1024, f) != NULL) {
|
|
exit_code = atoi(buf);
|
|
}
|
|
if (fgets(buf, 1024, f) != NULL) {
|
|
is_notice = atoi(buf) != 0;
|
|
}
|
|
while (fgets(buf, 1024, f) != NULL) {
|
|
message += buf;
|
|
}
|
|
fclose(f);
|
|
}
|
|
}
|
|
|
|
void read_temporary_exit_file_info(int& temp_delay, bool& is_notice, string& message, VBOX_VM& vm) {
|
|
char path[MAXPATHLEN];
|
|
char buf[1024];
|
|
|
|
temp_delay = 0;
|
|
message = "";
|
|
|
|
sprintf(path, "shared/%s", vm.temporary_exit_trigger_file.c_str());
|
|
FILE* f = fopen(path, "r");
|
|
if (f) {
|
|
if (fgets(buf, 1024, f) != NULL) {
|
|
temp_delay = atoi(buf);
|
|
}
|
|
if (fgets(buf, 1024, f) != NULL) {
|
|
is_notice = atoi(buf) != 0;
|
|
}
|
|
while (fgets(buf, 1024, f) != NULL) {
|
|
message += buf;
|
|
}
|
|
fclose(f);
|
|
}
|
|
}
|
|
|
|
// set CPU and network throttling if needed
|
|
//
|
|
void set_throttles(APP_INIT_DATA& aid, VBOX_VM& vm) {
|
|
double x = 0, y = 0;
|
|
|
|
// VirtualBox freaks out if the CPU Usage value is too low to actually
|
|
// do any processing. It probably wouldn't be so bad if the RDP interface
|
|
// didn't also get hosed by it.
|
|
//
|
|
x = aid.global_prefs.cpu_usage_limit;
|
|
// 0 means "no limit"
|
|
//
|
|
if (x == 0.0) x = 100;
|
|
// For now set the minimum CPU Usage value to 1.
|
|
//
|
|
if (x < 1) x = 1;
|
|
vm.set_cpu_usage((int)x);
|
|
|
|
// vbox doesn't distinguish up and down bandwidth; use the min of the prefs
|
|
//
|
|
x = aid.global_prefs.max_bytes_sec_up;
|
|
y = aid.global_prefs.max_bytes_sec_down;
|
|
if (y) {
|
|
if (!x || y < x) {
|
|
x = y;
|
|
}
|
|
}
|
|
if (x) {
|
|
vm.set_network_usage((int)(x/1024));
|
|
}
|
|
|
|
}
|
|
|
|
// If the Floppy device has been specified, initialize its state so that
|
|
// it contains the contents of the init_data.xml file.
|
|
// In theory this would allow network enabled VMs to know about
|
|
// proxy server configurations either specified by the volunteer
|
|
// or automatically detected by the client.
|
|
//
|
|
// CERN decided they only needed a small subset of the data and changed the
|
|
// data format to 'name=value\n' pairs. So if we are running under their
|
|
// environment set things up accordingly.
|
|
//
|
|
void set_floppy_image(APP_INIT_DATA& aid, VBOX_VM& vm) {
|
|
int retval;
|
|
char buf[256];
|
|
std::string scratch;
|
|
|
|
if (vm.enable_floppyio) {
|
|
scratch = "";
|
|
if (!vm.enable_cern_dataformat) {
|
|
retval = read_file_string(INIT_DATA_FILE, scratch);
|
|
if (retval) {
|
|
vboxlog_msg("WARNING: Cannot write init_data.xml to floppy abstration device");
|
|
}
|
|
} else {
|
|
// Per: https://github.com/stig/json-framework/issues/48
|
|
//
|
|
// Use %.17g to represent doubles
|
|
//
|
|
scratch = "BOINC_USERNAME=" + string(aid.user_name) + "\n";
|
|
scratch += "BOINC_AUTHENTICATOR=" + string(aid.authenticator) + "\n";
|
|
|
|
sprintf(buf, "%d", aid.userid);
|
|
scratch += "BOINC_USERID=" + string(buf) + "\n";
|
|
|
|
sprintf(buf, "%d", aid.hostid);
|
|
scratch += "BOINC_HOSTID=" + string(buf) + "\n";
|
|
|
|
sprintf(buf, "%.17g", aid.user_total_credit);
|
|
scratch += "BOINC_USER_TOTAL_CREDIT=" + string(buf) + "\n";
|
|
|
|
sprintf(buf, "%.17g", aid.host_total_credit);
|
|
scratch += "BOINC_HOST_TOTAL_CREDIT=" + string(buf) + "\n";
|
|
}
|
|
vm.write_floppy(scratch);
|
|
}
|
|
}
|
|
|
|
// if there's a port for web graphics, tell the client about it
|
|
//
|
|
void report_web_graphics_url(VBOX_VM& vm) {
|
|
char buf[256];
|
|
if (vm.pf_host_port && !boinc_file_exists("graphics_app")) {
|
|
sprintf(buf, "http://localhost:%d", vm.pf_host_port);
|
|
vboxlog_msg("Detected: Web Application Enabled (%s)", buf);
|
|
boinc_web_graphics_url(buf);
|
|
}
|
|
}
|
|
|
|
// set remote desktop information if needed
|
|
//
|
|
void report_remote_desktop_info(VBOX_VM& vm) {
|
|
char buf[256];
|
|
if (vm.rd_host_port) {
|
|
sprintf(buf, "localhost:%d", vm.rd_host_port);
|
|
vboxlog_msg("Detected: Remote Desktop Enabled (%s)", buf);
|
|
boinc_remote_desktop_addr(buf);
|
|
}
|
|
}
|
|
|
|
// check for trickle trigger files, and send trickles if find them.
|
|
//
|
|
void check_trickle_triggers(VBOX_VM& vm) {
|
|
int retval;
|
|
char filename[256], path[MAXPATHLEN];
|
|
std::string text;
|
|
for (unsigned int i=0; i<vm.trickle_trigger_files.size(); i++) {
|
|
strcpy(filename, vm.trickle_trigger_files[i].c_str());
|
|
sprintf(path, "shared/%s", filename);
|
|
if (!boinc_file_exists(path)) continue;
|
|
vboxlog_msg("Reporting a trickle. (%s)", filename);
|
|
retval = read_file_string(path, text);
|
|
if (retval) {
|
|
vboxlog_msg("ERROR: can't read trickle trigger file %s", filename);
|
|
} else {
|
|
retval = boinc_send_trickle_up(
|
|
filename, const_cast<char*>(text.c_str())
|
|
);
|
|
if (retval) {
|
|
vboxlog_msg("boinc_send_trickle_up() failed: %s (%d)", boincerror(retval), retval);
|
|
}
|
|
}
|
|
boinc_delete_file(path);
|
|
}
|
|
}
|
|
|
|
// check for intermediate upload files, and send them if found.
|
|
//
|
|
void check_intermediate_uploads(VBOX_VM& vm) {
|
|
int retval;
|
|
char filename[256], path[MAXPATHLEN];
|
|
for (unsigned int i=0; i<vm.intermediate_upload_files.size(); i++) {
|
|
strcpy(filename, vm.intermediate_upload_files[i].file.c_str());
|
|
sprintf(path, "shared/%s", filename);
|
|
if (!boinc_file_exists(path)) continue;
|
|
if (!vm.intermediate_upload_files[i].reported && !vm.intermediate_upload_files[i].ignore) {
|
|
vboxlog_msg("Reporting an intermediate file. (%s)", vm.intermediate_upload_files[i].file.c_str());
|
|
retval = boinc_upload_file(vm.intermediate_upload_files[i].file);
|
|
if (retval) {
|
|
vboxlog_msg("boinc_upload_file() failed: %s", boincerror(retval));
|
|
vm.intermediate_upload_files[i].ignore = true;
|
|
} else {
|
|
vm.intermediate_upload_files[i].reported = true;
|
|
}
|
|
} else if (vm.intermediate_upload_files[i].reported && !vm.intermediate_upload_files[i].ignore) {
|
|
retval = boinc_upload_status(vm.intermediate_upload_files[i].file);
|
|
if (!retval) {
|
|
vboxlog_msg("Intermediate file uploaded. (%s)", vm.intermediate_upload_files[i].file.c_str());
|
|
vm.intermediate_upload_files[i].ignore = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// see if it's time to send trickle-up reporting elapsed time
|
|
//
|
|
void check_trickle_period(double& elapsed_time, double& trickle_period) {
|
|
char buf[256];
|
|
static double last_trickle_report_time = 0;
|
|
|
|
if ((elapsed_time - last_trickle_report_time) < trickle_period) {
|
|
return;
|
|
}
|
|
last_trickle_report_time = elapsed_time;
|
|
vboxlog_msg("Status Report: Trickle-Up Event.");
|
|
sprintf(buf,
|
|
"<cpu_time>%f</cpu_time>", last_trickle_report_time
|
|
);
|
|
int retval = boinc_send_trickle_up(
|
|
const_cast<char*>("cpu_time"), buf
|
|
);
|
|
if (retval) {
|
|
vboxlog_msg("Sending Trickle-Up Event failed (%d).", retval);
|
|
}
|
|
}
|
|
|
|
int main(int argc, char** argv) {
|
|
int retval = 0;
|
|
int loop_iteration = 0;
|
|
BOINC_OPTIONS boinc_options;
|
|
APP_INIT_DATA aid;
|
|
VBOX_CHECKPOINT checkpoint;
|
|
VBOX_VM* pVM = NULL;
|
|
double desired_checkpoint_interval = 0;
|
|
double random_checkpoint_factor = 0;
|
|
double elapsed_time = 0;
|
|
double fraction_done = 0;
|
|
double trickle_period = 0;
|
|
double current_cpu_time = 0;
|
|
double starting_cpu_time = 0;
|
|
double last_heartbeat_elapsed_time = 0;
|
|
double last_checkpoint_cpu_time = 0;
|
|
double last_checkpoint_elapsed_time = 0;
|
|
double last_status_report_time = 0;
|
|
double stopwatch_starttime = 0;
|
|
double stopwatch_endtime = 0;
|
|
double stopwatch_elapsedtime = 0;
|
|
double sleep_time = 0;
|
|
double bytes_sent = 0;
|
|
double bytes_received = 0;
|
|
double ncpus = 0;
|
|
double memory_size_mb = 0;
|
|
double timeout = 0.0;
|
|
bool report_net_usage = false;
|
|
bool initial_heartbeat_check = true;
|
|
double net_usage_timer = 600;
|
|
int vm_image = 0;
|
|
unsigned long vm_exit_code = 0;
|
|
bool is_notice = false;
|
|
int temp_delay = 86400;
|
|
time_t last_heartbeat_mod_time = 0;
|
|
string message;
|
|
string scratch_dir;
|
|
char buf[256];
|
|
|
|
// Initialize diagnostics system
|
|
//
|
|
boinc_init_diagnostics(BOINC_DIAG_DEFAULTS);
|
|
|
|
// Configure BOINC Runtime System environment
|
|
//
|
|
memset(&boinc_options, 0, sizeof(boinc_options));
|
|
boinc_options.main_program = true;
|
|
boinc_options.check_heartbeat = true;
|
|
boinc_options.handle_process_control = true;
|
|
boinc_init_options(&boinc_options);
|
|
|
|
// Log banner
|
|
//
|
|
vboxlog_msg("vboxwrapper (%d.%d.%d): starting", BOINC_MAJOR_VERSION, BOINC_MINOR_VERSION, VBOXWRAPPER_RELEASE);
|
|
|
|
// Initialize system services
|
|
//
|
|
#ifdef _WIN32
|
|
CoInitialize(NULL);
|
|
#ifdef USE_WINSOCK
|
|
WSADATA wsdata;
|
|
retval = WSAStartup( MAKEWORD( 1, 1 ), &wsdata);
|
|
if (retval) {
|
|
vboxlog_msg("ERROR: Cannot initialize winsock: %d", retval);
|
|
boinc_finish(retval);
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
// Prepare environment for detecting system conditions
|
|
//
|
|
boinc_parse_init_data_file();
|
|
boinc_get_init_data(aid);
|
|
|
|
#ifdef _WIN32
|
|
// Determine what version of VirtualBox we are using via the registry. Use a
|
|
// namespace specific version of the function because VirtualBox has been known
|
|
// to change the registry location from time to time.
|
|
//
|
|
// NOTE: We cannot use COM to automatically detect which interfaces are installed
|
|
// on the machine because it will attempt to launch the 'vboxsvc' process
|
|
// without out environment variable changes and muck everything up.
|
|
//
|
|
string vbox_version_raw;
|
|
string vbox_version_display;
|
|
int vbox_major = 0, vbox_minor = 0;
|
|
|
|
if (BOINC_SUCCESS != vbox42::VBOX_VM::get_version_information(vbox_version_raw, vbox_version_display)) {
|
|
if (BOINC_SUCCESS != vbox43::VBOX_VM::get_version_information(vbox_version_raw, vbox_version_display)) {
|
|
if (BOINC_SUCCESS != vbox50::VBOX_VM::get_version_information(vbox_version_raw, vbox_version_display)) {
|
|
vbox51::VBOX_VM::get_version_information(vbox_version_raw, vbox_version_display);
|
|
}
|
|
}
|
|
}
|
|
if (!vbox_version_raw.empty()) {
|
|
sscanf(vbox_version_raw.c_str(), "%d.%d", &vbox_major, &vbox_minor);
|
|
if ((4 == vbox_major) && (2 == vbox_minor)) {
|
|
pVM = (VBOX_VM*) new vbox42::VBOX_VM();
|
|
}
|
|
if ((4 == vbox_major) && (3 == vbox_minor)) {
|
|
pVM = (VBOX_VM*) new vbox43::VBOX_VM();
|
|
}
|
|
if ((5 == vbox_major) && (0 == vbox_minor)) {
|
|
pVM = (VBOX_VM*) new vbox50::VBOX_VM();
|
|
}
|
|
if ((5 == vbox_major) && (1 <= vbox_minor)) {
|
|
pVM = (VBOX_VM*) new vbox51::VBOX_VM();
|
|
}
|
|
if (pVM) {
|
|
retval = pVM->initialize();
|
|
if (retval) {
|
|
delete pVM;
|
|
pVM = NULL;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
// Initialize VM Hypervisor
|
|
//
|
|
if (!pVM) {
|
|
pVM = (VBOX_VM*) new vboxmanage::VBOX_VM();
|
|
retval = pVM->initialize();
|
|
if (retval) {
|
|
vboxlog_msg("Could not detect VM Hypervisor. Rescheduling execution for a later date.");
|
|
pVM->dump_hypervisor_logs(true);
|
|
boinc_temporary_exit(86400, "Detection of VM Hypervisor failed.");
|
|
}
|
|
}
|
|
|
|
// Parse command line parameters
|
|
//
|
|
for (int i=1; i<argc; i++) {
|
|
if (!strcmp(argv[i], "--trickle")) {
|
|
trickle_period = atof(argv[++i]);
|
|
}
|
|
if (!strcmp(argv[i], "--nthreads")) {
|
|
ncpus = atof(argv[++i]);
|
|
}
|
|
if (!strcmp(argv[i], "--memory_size_mb")) {
|
|
memory_size_mb = atof(argv[++i]);
|
|
}
|
|
if (!strcmp(argv[i], "--vmimage")) {
|
|
vm_image = atoi(argv[++i]);
|
|
}
|
|
if (!strcmp(argv[i], "--register_only")) {
|
|
pVM->register_only = true;
|
|
}
|
|
}
|
|
|
|
// Choose a random interleave value for checkpoint intervals to stagger disk I/O.
|
|
//
|
|
struct stat vm_image_stat;
|
|
if (stat(IMAGE_FILENAME_COMPLETE, &vm_image_stat)) {
|
|
// Error
|
|
srand((int)time(NULL));
|
|
} else {
|
|
srand((int)(vm_image_stat.st_mtime * time(NULL)));
|
|
}
|
|
random_checkpoint_factor = (double)(((int)(drand() * 100000.0)) % 600);
|
|
|
|
vboxlog_msg("Feature: Checkpoint interval offset (%d seconds)", (int)random_checkpoint_factor);
|
|
|
|
// Display trickle value if specified
|
|
//
|
|
if (trickle_period > 0.0) {
|
|
vboxlog_msg("Feature: Enabling trickle-ups (Interval: %f)", trickle_period);
|
|
}
|
|
|
|
// Check for architecture incompatibilities
|
|
//
|
|
#if defined(_WIN32) && defined(_M_IX86)
|
|
if (strstr(aid.host_info.os_version, "x64")) {
|
|
vboxlog_msg("64-bit version of BOINC is required, please upgrade. Rescheduling execution for a later date.");
|
|
boinc_temporary_exit(86400, "Architecture incompatibility detected.");
|
|
}
|
|
#endif
|
|
|
|
// Record what version of VirtualBox was used.
|
|
//
|
|
if (!pVM->virtualbox_version_display.empty()) {
|
|
vboxlog_msg("Detected: %s", pVM->virtualbox_version_display.c_str());
|
|
}
|
|
|
|
// Record if anonymous platform was used.
|
|
//
|
|
if (boinc_file_exists((std::string(aid.project_dir) + std::string("/app_info.xml")).c_str())) {
|
|
vboxlog_msg("Detected: Anonymous Platform Enabled");
|
|
}
|
|
|
|
// Record if the sandboxed configuration is going to be used.
|
|
//
|
|
if (aid.using_sandbox) {
|
|
vboxlog_msg("Detected: Sandbox Configuration Enabled");
|
|
}
|
|
|
|
// Record which mode VirtualBox should be started in.
|
|
//
|
|
if (aid.vbox_window || boinc_is_standalone()) {
|
|
vboxlog_msg("Detected: Headless Mode Disabled");
|
|
pVM->headless = false;
|
|
}
|
|
|
|
// Check for invalid confgiurations.
|
|
//
|
|
if (aid.using_sandbox && aid.vbox_window) {
|
|
vboxlog_msg("Invalid configuration detected.");
|
|
vboxlog_msg("NOTE: BOINC cannot be installed as a service and run VirtualBox in headfull mode at the same time.");
|
|
boinc_temporary_exit(86400, "Incompatible configuration detected.");
|
|
}
|
|
|
|
// Check against known incompatible versions of VirtualBox.
|
|
// VirtualBox 4.2.6 crashes during snapshot operations
|
|
// and 4.2.18 fails to restore from snapshots properly.
|
|
//
|
|
if ((pVM->virtualbox_version_raw.find("4.2.6") != std::string::npos) ||
|
|
(pVM->virtualbox_version_raw.find("4.2.18") != std::string::npos) ||
|
|
(pVM->virtualbox_version_raw.find("4.3.0") != std::string::npos) ) {
|
|
vboxlog_msg("Incompatible version of VirtualBox detected. Please upgrade to a later version.");
|
|
boinc_temporary_exit(86400,
|
|
"Incompatible version of VirtualBox detected; please upgrade.",
|
|
true
|
|
);
|
|
}
|
|
|
|
// Check to see if the system is in a state in which we expect to be able to run
|
|
// VirtualBox successfully. Sometimes the system is in a wierd state after a
|
|
// reboot and the system needs a little bit of time.
|
|
//
|
|
if (!pVM->is_system_ready(message)) {
|
|
vboxlog_msg("Could not communicate with VM Hypervisor. Rescheduling execution for a later date.");
|
|
boinc_temporary_exit(86400, message.c_str());
|
|
}
|
|
|
|
// Parse Job File
|
|
//
|
|
retval = pVM->parse();
|
|
if (retval) {
|
|
vboxlog_msg("ERROR: Cannot parse job file: %d", retval);
|
|
boinc_finish(retval);
|
|
}
|
|
|
|
// Record what the minimum checkpoint interval is.
|
|
//
|
|
vboxlog_msg("Detected: Minimum checkpoint interval (%f seconds)", pVM->minimum_checkpoint_interval);
|
|
|
|
// Record what the minimum heartbeat interval is.
|
|
//
|
|
if (pVM->heartbeat_filename.size()) {
|
|
vboxlog_msg("Detected: Heartbeat check (file: '%s' every %f seconds)", pVM->heartbeat_filename.c_str(), pVM->minimum_heartbeat_interval);
|
|
}
|
|
|
|
// Validate whatever configuration options we can
|
|
//
|
|
if (pVM->enable_shared_directory) {
|
|
pVM->get_scratch_directory(scratch_dir);
|
|
if (boinc_file_exists("shared")) {
|
|
if (!is_dir("shared")) {
|
|
vboxlog_msg("ERROR: 'shared' exists but is not a directory.");
|
|
}
|
|
} else {
|
|
retval = boinc_mkdir("shared");
|
|
if (retval) {
|
|
vboxlog_msg("ERROR: couldn't create shared directory: %s.", boincerror(retval));
|
|
}
|
|
}
|
|
if (boinc_file_exists(scratch_dir.c_str())) {
|
|
if (!is_dir(scratch_dir.c_str())) {
|
|
vboxlog_msg("ERROR: 'scratch' exists but is not a directory.");
|
|
}
|
|
} else {
|
|
retval = boinc_mkdir(scratch_dir.c_str());
|
|
if (retval) {
|
|
vboxlog_msg("ERROR: couldn't create scratch directory: %s.", boincerror(retval));
|
|
}
|
|
}
|
|
}
|
|
|
|
// Copy files to the shared directory
|
|
//
|
|
if (pVM->enable_shared_directory && pVM->copy_to_shared.size()) {
|
|
for (vector<string>::iterator iter = pVM->copy_to_shared.begin(); iter != pVM->copy_to_shared.end(); ++iter) {
|
|
string source = *iter;
|
|
string destination = string("shared/") + *iter;
|
|
if (!boinc_file_exists(destination.c_str())) {
|
|
if (!boinc_copy(source.c_str(), destination.c_str())) {
|
|
vboxlog_msg("Successfully copied '%s' to the shared directory.", source.c_str());
|
|
} else {
|
|
vboxlog_msg("Failed to copy '%s' to the shared directory.", source.c_str());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Configure Instance specific VM Parameters
|
|
//
|
|
pVM->vm_master_name = "boinc_";
|
|
pVM->image_filename = IMAGE_FILENAME_COMPLETE;
|
|
if (boinc_is_standalone()) {
|
|
pVM->vm_master_name += "standalone";
|
|
pVM->vm_master_description = "standalone";
|
|
if (pVM->enable_floppyio) {
|
|
sprintf(buf, "%s.%s",
|
|
FLOPPY_IMAGE_FILENAME, FLOPPY_IMAGE_FILENAME_EXTENSION
|
|
);
|
|
pVM->floppy_image_filename = buf;
|
|
}
|
|
} else {
|
|
pVM->vm_master_name += md5_string(std::string(aid.result_name)).substr(0, 16);
|
|
pVM->vm_master_description = aid.result_name;
|
|
if (vm_image) {
|
|
sprintf(buf, "%s_%d.%s",
|
|
IMAGE_FILENAME, vm_image, IMAGE_FILENAME_EXTENSION
|
|
);
|
|
pVM->image_filename = buf;
|
|
}
|
|
if (pVM->enable_floppyio) {
|
|
sprintf(buf, "%s_%d.%s",
|
|
FLOPPY_IMAGE_FILENAME, aid.slot,
|
|
FLOPPY_IMAGE_FILENAME_EXTENSION
|
|
);
|
|
pVM->floppy_image_filename = buf;
|
|
}
|
|
}
|
|
if (pVM->enable_cache_disk) {
|
|
pVM->cache_disk_filename = CACHE_DISK_FILENAME;
|
|
}
|
|
if (pVM->enable_isocontextualization) {
|
|
pVM->iso_image_filename = ISO_IMAGE_FILENAME;
|
|
}
|
|
if (aid.ncpus > 1.0 || ncpus > 1.0) {
|
|
if (ncpus > 32.0) {
|
|
vboxlog_msg("WARNING: Virtualbox only allows up to 32 processors to be allocated to a VM, resetting to 32. (%f allocated)", ncpus);
|
|
ncpus = 32.0;
|
|
}
|
|
if (ncpus) {
|
|
sprintf(buf, "%d", (int)ceil(ncpus));
|
|
} else {
|
|
sprintf(buf, "%d", (int)ceil(aid.ncpus));
|
|
}
|
|
pVM->vm_cpu_count = buf;
|
|
} else {
|
|
pVM->vm_cpu_count = "1";
|
|
}
|
|
if (pVM->memory_size_mb > 1.0 || memory_size_mb > 1.0) {
|
|
if (memory_size_mb) {
|
|
sprintf(buf, "%d", (int)ceil(memory_size_mb));
|
|
} else {
|
|
sprintf(buf, "%d", (int)ceil(pVM->memory_size_mb));
|
|
}
|
|
}
|
|
if (aid.vbox_window && !aid.using_sandbox) {
|
|
pVM->headless = false;
|
|
}
|
|
|
|
// Restore from checkpoint
|
|
//
|
|
checkpoint.parse();
|
|
pVM->pf_host_port = checkpoint.webapi_port;
|
|
pVM->rd_host_port = checkpoint.remote_desktop_port;
|
|
elapsed_time = checkpoint.elapsed_time;
|
|
starting_cpu_time = checkpoint.cpu_time;
|
|
current_cpu_time = starting_cpu_time;
|
|
last_checkpoint_elapsed_time = elapsed_time;
|
|
last_heartbeat_elapsed_time = elapsed_time;
|
|
last_checkpoint_cpu_time = starting_cpu_time;
|
|
|
|
// Should we even try to start things up?
|
|
//
|
|
if (pVM->job_duration && (elapsed_time > pVM->job_duration)) {
|
|
return EXIT_TIME_LIMIT_EXCEEDED;
|
|
}
|
|
|
|
retval = pVM->run(current_cpu_time > 0);
|
|
if (retval) {
|
|
// All 'failure to start' errors are unrecoverable by default
|
|
bool unrecoverable_error = true;
|
|
bool skip_cleanup = false;
|
|
bool do_dump_hypervisor_logs = false;
|
|
string error_reason;
|
|
const char* temp_reason = "";
|
|
|
|
if (VBOXWRAPPER_ERR_RECOVERABLE == retval) {
|
|
error_reason =
|
|
" BOINC will be notified that it needs to clean up the environment.\n"
|
|
" This is a temporary problem and so this job will be rescheduled for another time.\n";
|
|
unrecoverable_error = false;
|
|
temp_reason = "VM environment needed to be cleaned up.";
|
|
} else if (ERR_NOT_EXITED == retval) {
|
|
error_reason =
|
|
" NOTE: VM was already running.\n"
|
|
" BOINC will be notified that it needs to clean up the environment.\n"
|
|
" This might be a temporary problem and so this job will be rescheduled for another time.\n";
|
|
unrecoverable_error = false;
|
|
temp_reason = "VM environment needed to be cleaned up.";
|
|
} else if (ERR_INVALID_PARAM == retval) {
|
|
unrecoverable_error = false;
|
|
temp_reason = "Please upgrade BOINC to the latest version.";
|
|
temp_delay = 86400;
|
|
} else if (retval == (int)RPC_S_SERVER_UNAVAILABLE) {
|
|
error_reason =
|
|
" VboxSvc crashed while attempting to restore the current snapshot. This is a critical\n"
|
|
" operation and this job cannot be recovered.\n";
|
|
skip_cleanup = true;
|
|
retval = ERR_EXEC;
|
|
} else if (retval == (int)VBOX_E_INVALID_OBJECT_STATE) {
|
|
error_reason =
|
|
" NOTE: VM session lock error encountered.\n"
|
|
" BOINC will be notified that it needs to clean up the environment.\n"
|
|
" This might be a temporary problem and so this job will be rescheduled for another time.\n";
|
|
unrecoverable_error = false;
|
|
temp_reason = "VM environment needed to be cleaned up.";
|
|
} else {
|
|
do_dump_hypervisor_logs = true;
|
|
}
|
|
|
|
if (unrecoverable_error) {
|
|
// Attempt to cleanup the VM and exit.
|
|
if (!skip_cleanup) {
|
|
pVM->cleanup();
|
|
}
|
|
|
|
checkpoint.update(elapsed_time, current_cpu_time);
|
|
|
|
if (error_reason.size()) {
|
|
vboxlog_msg("\n%s", error_reason.c_str());
|
|
}
|
|
|
|
if (do_dump_hypervisor_logs) {
|
|
pVM->dump_hypervisor_logs(true);
|
|
}
|
|
|
|
boinc_finish(retval);
|
|
} else {
|
|
// if the VM is already running notify BOINC about the process ID so it can
|
|
// clean up the environment. We should be safe to run after that.
|
|
//
|
|
if (pVM->vm_pid) {
|
|
retval = boinc_report_app_status_aux(
|
|
current_cpu_time,
|
|
last_checkpoint_cpu_time,
|
|
fraction_done,
|
|
pVM->vm_pid,
|
|
bytes_sent,
|
|
bytes_received
|
|
);
|
|
}
|
|
|
|
// Give the BOINC API time to report the pid to BOINC.
|
|
//
|
|
boinc_sleep(5.0);
|
|
|
|
if (error_reason.size()) {
|
|
vboxlog_msg("\n%s", error_reason.c_str());
|
|
}
|
|
|
|
if (do_dump_hypervisor_logs) {
|
|
pVM->dump_hypervisor_logs(true);
|
|
}
|
|
|
|
// Exit and let BOINC clean up the rest.
|
|
//
|
|
boinc_temporary_exit(temp_delay, temp_reason);
|
|
}
|
|
}
|
|
|
|
// Report the VM pid to BOINC so BOINC can deal with it when needed.
|
|
//
|
|
vboxlog_msg("Reporting VM Process ID to BOINC.");
|
|
retval = boinc_report_app_status_aux(
|
|
current_cpu_time,
|
|
last_checkpoint_cpu_time,
|
|
fraction_done,
|
|
pVM->vm_pid,
|
|
bytes_sent,
|
|
bytes_received
|
|
);
|
|
|
|
// Wait for up to 5 minutes for the VM to switch states.
|
|
// A system under load can take a while.
|
|
// Since the poll function can wait for up to 60 seconds
|
|
// to execute a command we need to make this time based instead
|
|
// of iteration based.
|
|
//
|
|
timeout = dtime() + 300;
|
|
do {
|
|
pVM->poll(false);
|
|
if (pVM->online && !pVM->restoring) break;
|
|
boinc_sleep(1.0);
|
|
} while (timeout >= dtime());
|
|
|
|
// Lower the VM process priority after it has successfully brought itself online.
|
|
//
|
|
pVM->lower_vm_process_priority();
|
|
|
|
// Log our current state
|
|
pVM->poll(true);
|
|
|
|
// Is the VM still running? If not, why not?
|
|
//
|
|
if (!pVM->online) {
|
|
// All 'failure to start' errors are unrecoverable by default
|
|
bool unrecoverable_error = true;
|
|
bool skip_cleanup = false;
|
|
bool do_dump_hypervisor_logs = false;
|
|
string error_reason;
|
|
const char* temp_reason = "";
|
|
|
|
if (pVM->is_logged_failure_vm_extensions_disabled()) {
|
|
error_reason =
|
|
" NOTE: BOINC has detected that your computer's processor supports hardware acceleration for\n"
|
|
" virtual machines but the hypervisor failed to successfully launch with this feature enabled.\n"
|
|
" This means that the hardware acceleration feature has been disabled in the computer's BIOS.\n"
|
|
" Please enable this feature in your computer's BIOS.\n"
|
|
" Intel calls it 'VT-x'\n"
|
|
" AMD calls it 'AMD-V'\n"
|
|
" More information can be found here: https://en.wikipedia.org/wiki/X86_virtualization\n"
|
|
" Error Code: ERR_CPU_VM_EXTENSIONS_DISABLED\n";
|
|
retval = ERR_EXEC;
|
|
} else if (pVM->is_logged_failure_vm_extensions_not_supported()) {
|
|
error_reason =
|
|
" NOTE: VirtualBox has reported an improperly configured virtual machine. It was configured to require\n"
|
|
" hardware acceleration for virtual machines, but your processor does not support the required feature.\n"
|
|
" Please report this issue to the project so that it can be addresssed.\n";
|
|
} else if (pVM->is_logged_failure_vm_extensions_in_use()) {
|
|
error_reason =
|
|
" NOTE: VirtualBox hypervisor reports that another hypervisor has locked the hardware acceleration\n"
|
|
" for virtual machines feature in exclusive mode.\n";
|
|
unrecoverable_error = false;
|
|
temp_reason = "Forign VM Hypervisor locked hardware acceleration features.";
|
|
temp_delay = 86400;
|
|
} else if (pVM->is_logged_failure_host_out_of_memory()) {
|
|
error_reason =
|
|
" NOTE: VirtualBox has failed to allocate enough memory to start the configured virtual machine.\n"
|
|
" This might be a temporary problem and so this job will be rescheduled for another time.\n";
|
|
unrecoverable_error = false;
|
|
temp_reason = "VM Hypervisor was unable to allocate enough memory to start VM.";
|
|
} else if (timeout <= dtime()) {
|
|
error_reason =
|
|
" NOTE: VM failed to enter an online state within the timeout period.\n"
|
|
" This might be a temporary problem and so this job will be rescheduled for another time.\n";
|
|
unrecoverable_error = false;
|
|
do_dump_hypervisor_logs = true;
|
|
temp_reason = "VM Hypervisor failed to enter an online state in a timely fashion.";
|
|
temp_delay = 86400;
|
|
}
|
|
|
|
if (unrecoverable_error) {
|
|
// Attempt to cleanup the VM and exit.
|
|
if (!skip_cleanup) {
|
|
pVM->cleanup();
|
|
}
|
|
|
|
checkpoint.update(elapsed_time, current_cpu_time);
|
|
|
|
if (error_reason.size()) {
|
|
vboxlog_msg("\n%s", error_reason.c_str());
|
|
}
|
|
|
|
if (do_dump_hypervisor_logs) {
|
|
pVM->dump_hypervisor_logs(true);
|
|
}
|
|
|
|
boinc_finish(retval);
|
|
} else {
|
|
// if the VM is already running notify BOINC about the process ID so it can
|
|
// clean up the environment. We should be safe to run after that.
|
|
//
|
|
if (pVM->vm_pid) {
|
|
retval = boinc_report_app_status_aux(
|
|
current_cpu_time,
|
|
last_checkpoint_cpu_time,
|
|
fraction_done,
|
|
pVM->vm_pid,
|
|
bytes_sent,
|
|
bytes_received
|
|
);
|
|
}
|
|
|
|
// Give the BOINC API time to report the pid to BOINC.
|
|
//
|
|
boinc_sleep(5.0);
|
|
|
|
if (error_reason.size()) {
|
|
vboxlog_msg("\n%s", error_reason.c_str());
|
|
}
|
|
|
|
if (do_dump_hypervisor_logs) {
|
|
pVM->dump_hypervisor_logs(true);
|
|
}
|
|
|
|
// Exit and let BOINC clean up the rest.
|
|
//
|
|
boinc_temporary_exit(temp_delay, temp_reason);
|
|
}
|
|
}
|
|
|
|
set_floppy_image(aid, *pVM);
|
|
report_web_graphics_url(*pVM);
|
|
report_remote_desktop_info(*pVM);
|
|
checkpoint.webapi_port = pVM->pf_host_port;
|
|
checkpoint.remote_desktop_port = pVM->rd_host_port;
|
|
checkpoint.update(elapsed_time, current_cpu_time);
|
|
|
|
// Force throttling on our first pass through the loop
|
|
boinc_status.reread_init_data_file = true;
|
|
|
|
while (1) {
|
|
// Begin stopwatch timer
|
|
stopwatch_starttime = dtime();
|
|
loop_iteration += 1;
|
|
|
|
// Discover the VM's current state
|
|
retval = pVM->poll();
|
|
if (retval) {
|
|
vboxlog_msg("ERROR: Vboxwrapper lost communication with VirtualBox, rescheduling task for a later time.");
|
|
pVM->reset_vm_process_priority();
|
|
pVM->poweroff();
|
|
boinc_temporary_exit(86400, "VM job unmanageable, restarting later.");
|
|
}
|
|
|
|
// Write updates for the graphics application's use
|
|
if (pVM->enable_graphics_support) {
|
|
boinc_write_graphics_status(current_cpu_time, elapsed_time, fraction_done);
|
|
}
|
|
|
|
if (boinc_status.no_heartbeat || boinc_status.quit_request) {
|
|
pVM->reset_vm_process_priority();
|
|
if (pVM->enable_vm_savestate_usage) {
|
|
retval = pVM->create_snapshot(elapsed_time);
|
|
if (!retval) {
|
|
checkpoint.update(elapsed_time, current_cpu_time);
|
|
boinc_checkpoint_completed();
|
|
}
|
|
pVM->stop();
|
|
} else {
|
|
pVM->poweroff();
|
|
}
|
|
boinc_temporary_exit(86400);
|
|
}
|
|
if (boinc_status.abort_request) {
|
|
pVM->reset_vm_process_priority();
|
|
pVM->cleanup();
|
|
pVM->dump_hypervisor_logs(true);
|
|
boinc_finish(EXIT_ABORTED_BY_CLIENT);
|
|
}
|
|
if (pVM->heartbeat_filename.size()) {
|
|
if (
|
|
(initial_heartbeat_check && (elapsed_time >= (last_heartbeat_elapsed_time + 600.0))) ||
|
|
(!initial_heartbeat_check && (elapsed_time >= (last_heartbeat_elapsed_time + pVM->minimum_heartbeat_interval)))
|
|
){
|
|
bool should_exit = false;
|
|
struct stat heartbeat_stat;
|
|
|
|
if (!shared_file_exists(pVM->heartbeat_filename)) {
|
|
vboxlog_msg("VM Heartbeat file specified, but missing.");
|
|
should_exit = true;
|
|
}
|
|
|
|
if (shared_stat(pVM->heartbeat_filename, &heartbeat_stat)) {
|
|
// Error
|
|
vboxlog_msg("VM Heartbeat file specified, but missing file system status. (errno = '%d')", errno);
|
|
should_exit = true;
|
|
}
|
|
|
|
if (initial_heartbeat_check) {
|
|
// Force the next check to be successful
|
|
last_heartbeat_mod_time = heartbeat_stat.st_mtime - 1;
|
|
}
|
|
|
|
if (heartbeat_stat.st_mtime > last_heartbeat_mod_time) {
|
|
// Heartbeat successful
|
|
last_heartbeat_mod_time = heartbeat_stat.st_mtime;
|
|
last_heartbeat_elapsed_time = elapsed_time;
|
|
} else {
|
|
vboxlog_msg("VM Heartbeat file specified, but missing heartbeat.");
|
|
should_exit = true;
|
|
}
|
|
|
|
if (should_exit) {
|
|
pVM->reset_vm_process_priority();
|
|
pVM->cleanup();
|
|
pVM->dump_hypervisor_logs(true);
|
|
boinc_finish(EXIT_ABORTED_BY_CLIENT);
|
|
}
|
|
|
|
initial_heartbeat_check = false;
|
|
}
|
|
}
|
|
if (shared_file_exists(pVM->completion_trigger_file)) {
|
|
vboxlog_msg("VM Completion File Detected.");
|
|
read_completion_file_info(vm_exit_code, is_notice, message, *pVM);
|
|
if (message.size()) {
|
|
vboxlog_msg("VM Completion Message: %s.", message.c_str());
|
|
}
|
|
pVM->reset_vm_process_priority();
|
|
pVM->cleanup();
|
|
if (is_notice) {
|
|
boinc_finish_message(vm_exit_code, message.c_str(), is_notice);
|
|
} else {
|
|
boinc_finish(vm_exit_code);
|
|
}
|
|
}
|
|
if (shared_file_exists(pVM->temporary_exit_trigger_file)) {
|
|
vboxlog_msg("VM Temporary Exit File Detected.");
|
|
read_temporary_exit_file_info(temp_delay, is_notice, message, *pVM);
|
|
if (message.size()) {
|
|
vboxlog_msg("VM Temporary Exit Message: %s.", message.c_str());
|
|
}
|
|
shared_delete_file(pVM->temporary_exit_trigger_file);
|
|
pVM->reset_vm_process_priority();
|
|
retval = pVM->create_snapshot(elapsed_time);
|
|
if (!retval) {
|
|
checkpoint.update(elapsed_time, current_cpu_time);
|
|
boinc_checkpoint_completed();
|
|
}
|
|
pVM->poweroff();
|
|
if (is_notice) {
|
|
boinc_temporary_exit(temp_delay, message.c_str(), is_notice);
|
|
} else {
|
|
boinc_temporary_exit(temp_delay);
|
|
}
|
|
}
|
|
if (!pVM->online) {
|
|
// Is this a type of event we can recover from?
|
|
if (pVM->is_logged_failure_host_out_of_memory()) {
|
|
vboxlog_msg("NOTE: VirtualBox has failed to allocate enough memory to continue.");
|
|
vboxlog_msg(" This might be a temporary problem and so this job will be rescheduled for another time.");
|
|
pVM->reset_vm_process_priority();
|
|
pVM->poweroff();
|
|
boinc_temporary_exit(86400, "VM Hypervisor was unable to allocate enough memory.");
|
|
} else {
|
|
pVM->cleanup();
|
|
if (pVM->crashed || (elapsed_time < pVM->job_duration)) {
|
|
vboxlog_msg("VM Premature Shutdown Detected.");
|
|
pVM->dump_hypervisor_logs(true);
|
|
pVM->get_vm_exit_code(vm_exit_code);
|
|
if (vm_exit_code) {
|
|
boinc_finish(vm_exit_code);
|
|
} else {
|
|
boinc_finish(EXIT_ABORTED_BY_CLIENT);
|
|
}
|
|
} else {
|
|
vboxlog_msg("Virtual machine exited.");
|
|
pVM->dump_hypervisor_logs(false);
|
|
boinc_finish(0);
|
|
}
|
|
}
|
|
} else {
|
|
// Check to see if the guest VM has any log messages that indicate that we need need
|
|
// to take action.
|
|
if (pVM->is_logged_failure_guest_job_out_of_memory()) {
|
|
vboxlog_msg("ERROR: VM reports there is not enough memory to finish the task.");
|
|
pVM->reset_vm_process_priority();
|
|
pVM->dump_hypervisor_logs(true);
|
|
pVM->poweroff();
|
|
boinc_finish(EXIT_OUT_OF_MEMORY);
|
|
}
|
|
}
|
|
if (boinc_status.suspended) {
|
|
if (!pVM->suspended) {
|
|
retval = pVM->pause();
|
|
if (retval && (VBOX_E_INVALID_OBJECT_STATE == retval)) {
|
|
vboxlog_msg("ERROR: VM task failed to pause, rescheduling task for a later time.");
|
|
pVM->poweroff();
|
|
boinc_temporary_exit(86400, "VM job unmanageable, restarting later.");
|
|
}
|
|
}
|
|
} else {
|
|
if (pVM->suspended) {
|
|
retval = pVM->resume();
|
|
if (retval && (VBOX_E_INVALID_OBJECT_STATE == retval)) {
|
|
vboxlog_msg("ERROR: VM task failed to resume, rescheduling task for a later time.");
|
|
pVM->poweroff();
|
|
boinc_temporary_exit(86400, "VM job unmanageable, restarting later.");
|
|
}
|
|
}
|
|
|
|
// stuff to do every 10 secs (everything else is 1/sec)
|
|
//
|
|
if ((loop_iteration % 10) == 0) {
|
|
current_cpu_time = starting_cpu_time + pVM->get_vm_cpu_time();
|
|
check_trickle_triggers(*pVM);
|
|
check_intermediate_uploads(*pVM);
|
|
}
|
|
|
|
if (pVM->job_duration) {
|
|
fraction_done = elapsed_time / pVM->job_duration;
|
|
} else if (pVM->fraction_done_filename.size() > 0) {
|
|
if (!read_fraction_done(fraction_done, *pVM)) {
|
|
// Report a non-zero fraction done so that BOINC will not attempt to use CPU Time and
|
|
// deadline as a means to calculate fraction done when a fraction done file is
|
|
// specified.
|
|
//
|
|
fraction_done = 0.001;
|
|
}
|
|
}
|
|
if (fraction_done > 1.0) {
|
|
fraction_done = 1.0;
|
|
}
|
|
boinc_report_app_status(
|
|
current_cpu_time,
|
|
last_checkpoint_cpu_time,
|
|
fraction_done
|
|
);
|
|
|
|
// write status report to stderr at regular intervals
|
|
//
|
|
if ((elapsed_time - last_status_report_time) >= 6000.0) {
|
|
last_status_report_time = elapsed_time;
|
|
if (pVM->job_duration) {
|
|
vboxlog_msg("Status Report: Job Duration: '%f'", pVM->job_duration);
|
|
}
|
|
if (elapsed_time) {
|
|
vboxlog_msg("Status Report: Elapsed Time: '%f'", elapsed_time);
|
|
}
|
|
vboxlog_msg("Status Report: CPU Time: '%f'", current_cpu_time);
|
|
if (aid.global_prefs.daily_xfer_limit_mb) {
|
|
vboxlog_msg("Status Report: Network Bytes Sent (Total): '%f'", bytes_sent);
|
|
vboxlog_msg("Status Report: Network Bytes Received (Total): '%f'", bytes_received);
|
|
}
|
|
|
|
pVM->dump_hypervisor_status_reports();
|
|
}
|
|
|
|
// Real VM checkpoints (snapshots) are expensive, don't do them very often.
|
|
//
|
|
// If the project has disabled automatic checkpoints, just report that we have
|
|
// successfully completed the checkpoint as soon as the API reports that we should
|
|
// checkpoint.
|
|
//
|
|
if (boinc_time_to_checkpoint()) {
|
|
if (
|
|
(elapsed_time >= last_checkpoint_elapsed_time + desired_checkpoint_interval + random_checkpoint_factor) ||
|
|
pVM->disable_automatic_checkpoints
|
|
) {
|
|
// Basic interleave factor is only needed once.
|
|
if (random_checkpoint_factor > 0) {
|
|
random_checkpoint_factor = 0.0;
|
|
}
|
|
|
|
// Checkpoint
|
|
retval = pVM->create_snapshot(elapsed_time);
|
|
if (retval) {
|
|
// Let BOINC clean-up the environment which should release any file/mutex locks and then attempt
|
|
// to resume from a previous snapshot.
|
|
//
|
|
vboxlog_msg("ERROR: Checkpoint maintenance failed, rescheduling task for a later time. (%d)", retval);
|
|
pVM->poweroff();
|
|
boinc_temporary_exit(86400, "VM job unmanageable, restarting later.");
|
|
} else {
|
|
// tell BOINC we've successfully created a checkpoint.
|
|
//
|
|
checkpoint.update(elapsed_time, current_cpu_time);
|
|
last_checkpoint_elapsed_time = elapsed_time;
|
|
last_checkpoint_cpu_time = current_cpu_time;
|
|
boinc_checkpoint_completed();
|
|
}
|
|
}
|
|
}
|
|
|
|
// Send elapsed-time trickle message if needed
|
|
//
|
|
if (trickle_period) {
|
|
check_trickle_period(elapsed_time, trickle_period);
|
|
}
|
|
|
|
// Changes detected, re-read preferences
|
|
//
|
|
if (boinc_status.reread_init_data_file) {
|
|
boinc_status.reread_init_data_file = false;
|
|
|
|
vboxlog_msg("Preference change detected");
|
|
|
|
boinc_parse_init_data_file();
|
|
boinc_get_init_data_p(&aid);
|
|
set_throttles(aid, *pVM);
|
|
|
|
desired_checkpoint_interval = aid.checkpoint_period;
|
|
if (pVM->minimum_checkpoint_interval > aid.checkpoint_period) {
|
|
desired_checkpoint_interval = pVM->minimum_checkpoint_interval;
|
|
}
|
|
|
|
vboxlog_msg(
|
|
"Setting checkpoint interval to %d seconds. (Higher value of (Preference: %d seconds) or (Vbox_job.xml: %d seconds))",
|
|
(int)desired_checkpoint_interval,
|
|
(int)aid.checkpoint_period,
|
|
(int)pVM->minimum_checkpoint_interval
|
|
);
|
|
}
|
|
|
|
// if the VM has a maximum amount of time it is allowed to run,
|
|
// shut it down gacefully and exit.
|
|
//
|
|
if (pVM->job_duration && (elapsed_time > pVM->job_duration)) {
|
|
pVM->cleanup();
|
|
|
|
if (pVM->enable_cern_dataformat) {
|
|
FILE* output = fopen("output", "w");
|
|
if (output) {
|
|
fprintf(
|
|
output,
|
|
"Work Unit completed!\n"
|
|
);
|
|
fclose(output);
|
|
}
|
|
}
|
|
|
|
boinc_finish(0);
|
|
}
|
|
}
|
|
|
|
if (pVM->enable_network) {
|
|
if (boinc_status.network_suspended) {
|
|
if (!pVM->network_suspended) {
|
|
pVM->set_network_access(false);
|
|
}
|
|
} else {
|
|
if (pVM->network_suspended) {
|
|
pVM->set_network_access(true);
|
|
}
|
|
}
|
|
}
|
|
|
|
// report network usage every 10 min so the client can enforce quota
|
|
//
|
|
if (aid.global_prefs.daily_xfer_limit_mb
|
|
&& pVM->enable_network
|
|
&& !pVM->suspended
|
|
) {
|
|
net_usage_timer -= POLL_PERIOD;
|
|
if (net_usage_timer <= 0) {
|
|
net_usage_timer = 600;
|
|
double sent, received;
|
|
retval = pVM->get_vm_network_bytes_sent(sent);
|
|
if (!retval && (sent != bytes_sent)) {
|
|
bytes_sent = sent;
|
|
report_net_usage = true;
|
|
}
|
|
retval = pVM->get_vm_network_bytes_received(received);
|
|
if (!retval && (received != bytes_received)) {
|
|
bytes_received = received;
|
|
report_net_usage = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (report_net_usage) {
|
|
retval = boinc_report_app_status_aux(
|
|
elapsed_time,
|
|
last_checkpoint_cpu_time,
|
|
fraction_done,
|
|
pVM->vm_pid,
|
|
bytes_sent,
|
|
bytes_received
|
|
);
|
|
if (!retval) {
|
|
report_net_usage = false;
|
|
}
|
|
}
|
|
|
|
stopwatch_endtime = dtime();
|
|
stopwatch_elapsedtime = stopwatch_endtime - stopwatch_starttime;
|
|
|
|
// user may have changed system clock, so do sanity checks
|
|
//
|
|
if (stopwatch_elapsedtime < 0) {
|
|
stopwatch_elapsedtime = 0;
|
|
}
|
|
if (stopwatch_elapsedtime > 60) {
|
|
stopwatch_elapsedtime = 0;
|
|
}
|
|
|
|
// Sleep for the remainder of the polling period
|
|
//
|
|
sleep_time = POLL_PERIOD - stopwatch_elapsedtime;
|
|
if (sleep_time > 0) {
|
|
boinc_sleep(sleep_time);
|
|
}
|
|
|
|
// if VM is running, increment elapsed time
|
|
//
|
|
if (!boinc_status.suspended && !pVM->suspended) {
|
|
if (sleep_time > 0) {
|
|
elapsed_time += POLL_PERIOD;
|
|
} else {
|
|
elapsed_time += stopwatch_elapsedtime;
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef _WIN32
|
|
CoUninitialize();
|
|
#ifdef USE_WINSOCK
|
|
WSACleanup();
|
|
#endif
|
|
#endif
|
|
|
|
return 0;
|
|
}
|