boinc/client/cs_benchmark.C

559 lines
16 KiB
C

// Berkeley Open Infrastructure for Network Computing
// http://boinc.berkeley.edu
// Copyright (C) 2005 University of California
//
// This is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation;
// either version 2.1 of the License, or (at your option) any later version.
//
// This software is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU Lesser General Public License for more details.
//
// To view the GNU Lesser General Public License visit
// http://www.gnu.org/copyleft/lesser.html
// or write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
// Manage a (perhaps multi-processor) benchmark.
// Because of hyperthreaded CPUs we can't just benchmark 1 CPU;
// we must run parallel benchmarks
// and ensure that they run more or less concurrently.
// Here's our scheme:
// - the main program forks N benchmarks threads or processes
// - after FP_START seconds it creates a file "do_fp"
// - after FP_END seconds it deletes do_fp
// - after INT_START seconds it creates do_int
// - after INT_END seconds it deletes do_int and starts waiting for processes
// Each thread/process checks for the relevant file before
// starting or stopping each benchmark
#include "cpp.h"
#ifdef _WIN32
#include "boinc_win.h"
#else
#include "config.h"
#if HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#if HAVE_SYS_RESOURCE_H
#include <sys/resource.h>
#endif
#if HAVE_SYS_WAIT_H
#include <sys/wait.h>
#endif
#include <csignal>
#if HAVE_SYS_SIGNAL_H
#include <sys/signal.h>
#endif
#include <unistd.h>
#include <cstdio>
#include <cstdlib>
#include <ctime>
#endif
#include "error_numbers.h"
#include "file_names.h"
#include "filesys.h"
#include "util.h"
#include "cpu_benchmark.h"
#include "client_msgs.h"
#include "log_flags.h"
#include "client_state.h"
// defaults in case benchmarks fail or time out.
// better to err on the low side so hosts don't get too much work
#define DEFAULT_FPOPS 1e9
#define DEFAULT_IOPS 1e9
#define DEFAULT_MEMBW 1e9
#define DEFAULT_CACHE 1e6
#define FP_START 2
#define FP_END 12
#define INT_START 17
#define INT_END 27
#define OVERALL_END 30
#define MIN_CPU_TIME 3
// if the CPU time accumulated during one of the 10-sec segments
// is less than this, ignored the benchmark
#define BM_FP_INIT 0
#define BM_FP 1
#define BM_INT_INIT 2
#define BM_INT 3
#define BM_SLEEP 4
#define BM_DONE 5
static int bm_state;
#define BENCHMARK_PERIOD (SECONDS_PER_DAY*5)
// rerun CPU benchmarks this often (hardware may have been upgraded)
// represents a benchmark thread/process, in progress or completed
//
struct BENCHMARK_DESC {
int ordinal;
HOST_INFO host_info;
bool done;
bool error;
char error_str[256];
double int_loops;
double int_time;
#ifdef _WIN32
HANDLE handle;
DWORD pid;
#else
char filename[256];
PROCESS_ID pid;
#endif
};
static BENCHMARK_DESC* benchmark_descs=0;
static bool benchmarks_running=false; // at least 1 benchmark thread running
static double cpu_benchmarks_start;
static int bm_ncpus;
// user might change ncpus during benchmarks.
// store starting value here.
const char *file_names[2] = {"do_fp", "do_int"};
static void remove_benchmark_file(int which) {
boinc_delete_file(file_names[which]);
}
static void make_benchmark_file(int which) {
FILE* f = boinc_fopen(file_names[which], "w");
fclose(f);
}
void benchmark_wait_to_start(int which) {
while (1) {
if (boinc_file_exists(file_names[which])) {
break;
}
boinc_sleep(0.1);
}
}
bool benchmark_time_to_stop(int which) {
if (boinc_file_exists(file_names[which])) {
return false;
}
return true;
}
// benchmark a single CPU
//
int cpu_benchmarks(BENCHMARK_DESC* bdp) {
HOST_INFO host_info;
int retval;
double vax_mips, int_loops=0, int_time=0, fp_time;
bdp->error_str[0] = '\0';
host_info.clear_host_info();
retval = whetstone(host_info.p_fpops, fp_time, MIN_CPU_TIME);
if (retval) {
bdp->error = true;
sprintf(bdp->error_str, "FP benchmark ran only %f sec; ignoring", fp_time);
return 0;
}
#ifdef _WIN32
// Windows: do integer benchmark only on CPU zero.
// There's a mysterious bug/problem that gives wildly
// differing benchmarks on multi-CPU and multi-core machines,
// if you use all the CPUs at once.
//
if (bdp->ordinal == 0) {
#endif
retval = dhrystone(vax_mips, int_loops, int_time, MIN_CPU_TIME);
if (retval) {
bdp->error = true;
sprintf(bdp->error_str, "Integer benchmark ran only %f sec; ignoring", int_time);
return 0;
}
host_info.p_iops = vax_mips*1e6;
host_info.p_membw = 1e9;
host_info.m_cache = 1e6; // TODO: measure the cache
#ifdef _WIN32
}
bdp->host_info = host_info;
bdp->int_loops = int_loops;
bdp->int_time = int_time;
#else
FILE* finfo;
finfo = boinc_fopen(bdp->filename, "w");
if (!finfo) return ERR_FOPEN;
host_info.write_cpu_benchmarks(finfo);
fclose(finfo);
#endif
return 0;
}
#ifdef _WIN32
DWORD WINAPI win_cpu_benchmarks(LPVOID p) {
return cpu_benchmarks((BENCHMARK_DESC*)p);
}
#endif
void CLIENT_STATE::start_cpu_benchmarks() {
int i;
if (benchmarks_running) {
msg_printf(0, MSG_INFO,
"Can't start benchmarks - they're already running"
);
return;
}
if (skip_cpu_benchmarks) {
if (log_flags.benchmark_debug) {
msg_printf(0, MSG_INFO,
"[benchmark_debug] start_cpu_benchmarks(): Skipping CPU benchmarks"
);
}
cpu_benchmarks_set_defaults();
return;
}
msg_printf(NULL, MSG_INFO, "Running CPU benchmarks");
cpu_benchmarks_pending = false;
bm_state = BM_FP_INIT;
remove_benchmark_file(BM_TYPE_FP);
remove_benchmark_file(BM_TYPE_INT);
cpu_benchmarks_start = dtime();
if (benchmark_descs) {
free(benchmark_descs);
}
bm_ncpus = ncpus;
benchmark_descs = (BENCHMARK_DESC*)calloc(bm_ncpus, sizeof(BENCHMARK_DESC));
benchmarks_running = true;
for (i=0; i<bm_ncpus; i++) {
benchmark_descs[i].ordinal = i;
benchmark_descs[i].done = false;
benchmark_descs[i].error = false;
#ifdef _WIN32
benchmark_descs[i].handle = CreateThread(
NULL, 0, win_cpu_benchmarks, benchmark_descs+i, 0,
&benchmark_descs[i].pid
);
SetThreadAffinityMask(benchmark_descs[i].handle, 1<<i);
SetThreadPriority(benchmark_descs[i].handle, THREAD_PRIORITY_IDLE);
#else
sprintf(benchmark_descs[i].filename, "%s_%d.xml", CPU_BENCHMARKS_FILE_NAME, i);
PROCESS_ID pid = fork();
if (pid == 0) {
#ifdef HAVE_SETPRIORITY
if (setpriority(PRIO_PROCESS, 0, PROCESS_IDLE_PRIORITY)) {
perror("setpriority");
}
#endif
int retval = cpu_benchmarks(benchmark_descs+i);
fflush(NULL);
_exit(retval);
} else {
benchmark_descs[i].pid = pid;
}
#endif
}
}
// Returns true if CPU benchmarks should be run:
// flag is set or it's been 5 days since we last ran
//
bool CLIENT_STATE::should_run_cpu_benchmarks() {
// Note: if skip_cpu_benchmarks we still should "run" cpu benchmarks
// (we'll just use default values in cpu_benchmarks())
//
if (tasks_suspended) return false;
// if user has changed p_calculated into the future
// (as part of cheating, presumably) always run benchmarks
//
double diff = now - host_info.p_calculated;
if (diff < 0) return true;
// if no projects attached yet, don't run
//
if (projects.size()==0 && !run_cpu_benchmarks) return false;
return ((run_cpu_benchmarks || diff > BENCHMARK_PERIOD));
}
// abort a running benchmark thread/process
//
void abort_benchmark(BENCHMARK_DESC& desc) {
if (desc.done) return;
#ifdef _WIN32
TerminateThread(desc.handle, 0);
CloseHandle(desc.handle);
#else
kill(desc.pid, SIGKILL);
#endif
}
// check a running benchmark thread/process.
//
void check_benchmark(BENCHMARK_DESC& desc) {
#ifdef _WIN32
DWORD exit_code = 0;
GetExitCodeThread(desc.handle, &exit_code);
if (exit_code != STILL_ACTIVE) {
CloseHandle(desc.handle);
desc.done = true;
}
#else
int retval;
int exit_code = 0;
retval = waitpid(desc.pid, &exit_code, WNOHANG);
if (retval) {
desc.done = true;
FILE* f = fopen(desc.filename, "r");
if (!f) {
desc.error = true;
return;
}
retval = desc.host_info.parse_cpu_benchmarks(f);
fclose(f);
boinc_delete_file(desc.filename);
if (retval) {
desc.error = true;
}
}
#endif
}
void CLIENT_STATE::abort_cpu_benchmarks() {
int i;
if (!benchmarks_running) return;
for (i=0; i<bm_ncpus; i++) {
abort_benchmark(benchmark_descs[i]);
}
}
bool CLIENT_STATE::cpu_benchmarks_poll() {
int i;
static double last_time = 0;
if (!benchmarks_running) return false;
if (now < last_time + 1) return false;
last_time = now;
active_tasks.send_heartbeats();
// if active tasks don't quit after 10 sec, give up on benchmark
//
if (now >= (cpu_benchmarks_start + 10.0) && active_tasks.is_task_executing()) {
msg_printf(NULL, MSG_INTERNAL_ERROR,
"Failed to stop applications; aborting CPU benchmarks"
);
host_info.p_calculated = now;
abort_cpu_benchmarks();
benchmarks_running = false;
set_client_state_dirty("CPU benchmarks");
cpu_benchmarks_pending = true;
return false;
}
// do transitions through benchmark states
//
switch (bm_state) {
case BM_FP_INIT:
if (now - cpu_benchmarks_start > FP_START) {
if (log_flags.benchmark_debug) {
msg_printf(0, MSG_INFO,
"[benchmark_debug] Starting floating-point benchmark"
);
}
make_benchmark_file(BM_TYPE_FP);
bm_state = BM_FP;
}
return false;
case BM_FP:
if (now - cpu_benchmarks_start > FP_END) {
if (log_flags.benchmark_debug) {
msg_printf(0, MSG_INFO,
"[benchmark_debug] Ended floating-point benchmark"
);
}
remove_benchmark_file(BM_TYPE_FP);
bm_state = BM_INT_INIT;
}
return false;
case BM_INT_INIT:
if (now - cpu_benchmarks_start > INT_START) {
if (log_flags.benchmark_debug) {
msg_printf(0, MSG_INFO,
"[benchmark_debug] Starting integer benchmark"
);
}
make_benchmark_file(BM_TYPE_INT);
bm_state = BM_INT;
}
return false;
case BM_INT:
if (now - cpu_benchmarks_start > INT_END) {
if (log_flags.benchmark_debug) {
msg_printf(0, MSG_INFO,
"[benchmark_debug] Ended integer benchmark"
);
}
remove_benchmark_file(BM_TYPE_INT);
bm_state = BM_SLEEP;
}
return false;
case BM_SLEEP:
if (now - cpu_benchmarks_start > OVERALL_END) {
if (log_flags.benchmark_debug) {
msg_printf(0, MSG_INFO,
"[benchmark_debug] Ended benchmark"
);
}
bm_state = BM_DONE;
}
return false;
}
// check for timeout
//
if (now > cpu_benchmarks_start + MAX_CPU_BENCHMARKS_SECONDS) {
msg_printf(NULL, MSG_INTERNAL_ERROR,
"CPU benchmarks timed out, using default values"
);
abort_cpu_benchmarks();
cpu_benchmarks_set_defaults();
benchmarks_running = false;
set_client_state_dirty("CPU benchmarks");
}
int ndone = 0;
bool had_error = false;
for (i=0; i<bm_ncpus; i++) {
if (!benchmark_descs[i].done) {
check_benchmark(benchmark_descs[i]);
}
if (benchmark_descs[i].done) {
if (log_flags.benchmark_debug) {
msg_printf(0, MSG_INFO,
"[benchmark_debug] CPU %d has finished", i
);
}
ndone++;
if (benchmark_descs[i].error) {
msg_printf(0, MSG_INTERNAL_ERROR, benchmark_descs[i].error_str);
had_error = true;
}
}
}
if (log_flags.benchmark_debug) {
msg_printf(0, MSG_INFO,
"[benchmark_debug] %d out of %d CPUs done", ndone, bm_ncpus
);
}
if (ndone == bm_ncpus) {
double old_p_fpops = host_info.p_fpops;
if (had_error) {
msg_printf(NULL, MSG_INTERNAL_ERROR, "CPU benchmarks error");
cpu_benchmarks_set_defaults();
} else {
double p_fpops = 0;
double p_iops = 0;
double p_membw = 0;
double m_cache = 0;
for (i=0; i<bm_ncpus; i++) {
if (log_flags.benchmark_debug) {
msg_printf(0, MSG_INFO,
"[benchmark_debug] CPU %d: fp %f int %f intloops %f inttime %f",
i, benchmark_descs[i].host_info.p_fpops,
benchmark_descs[i].host_info.p_iops,
benchmark_descs[i].int_loops,
benchmark_descs[i].int_time
);
}
p_fpops += benchmark_descs[i].host_info.p_fpops;
#ifdef _WIN32
p_iops += benchmark_descs[0].host_info.p_iops;
#else
p_iops += benchmark_descs[i].host_info.p_iops;
#endif
p_membw += benchmark_descs[i].host_info.p_membw;
m_cache += benchmark_descs[i].host_info.m_cache;
}
p_fpops /= bm_ncpus;
p_iops /= bm_ncpus;
p_membw /= bm_ncpus;
m_cache /= bm_ncpus;
if (p_fpops > 0) {
host_info.p_fpops = p_fpops;
} else {
msg_printf(NULL, MSG_INTERNAL_ERROR, "Benchmark: FP unexpectedly zero; ignoring");
}
if (p_iops > 0) {
host_info.p_iops = p_iops;
} else {
msg_printf(NULL, MSG_INTERNAL_ERROR, "Benchmark: int unexpectedly zero; ignoring");
}
host_info.p_membw = p_membw;
host_info.m_cache = m_cache;
print_benchmark_results();
}
// scale duration correction factor according to change in benchmarks.
//
if (host_info.p_calculated && old_p_fpops) {
scale_duration_correction_factors(host_info.p_fpops/old_p_fpops);
}
host_info.p_calculated = now;
benchmarks_running = false;
set_client_state_dirty("CPU benchmarks");
}
return false;
}
void CLIENT_STATE::print_benchmark_results() {
msg_printf(NULL, MSG_INFO, "Benchmark results:");
msg_printf(NULL, MSG_INFO, " Number of CPUs: %d", bm_ncpus);
msg_printf(
NULL, MSG_INFO, " %.0f floating point MIPS (Whetstone) per CPU",
host_info.p_fpops/1e6
);
msg_printf(
NULL, MSG_INFO, " %.0f integer MIPS (Dhrystone) per CPU",
host_info.p_iops/1e6
);
#if 0
msg_printf(
NULL, MSG_INFO, "Benchmark results: %.0f million bytes/sec memory bandwidth%s",
host_info.p_membw/1e6, (host_info.p_membw_err?" [ERROR]":"")
);
#endif
}
bool CLIENT_STATE::cpu_benchmarks_done() {
return (host_info.p_calculated != 0);
}
// If a benchmark is nonzero, keep it. Otherwise use default value
//
void CLIENT_STATE::cpu_benchmarks_set_defaults() {
if (!host_info.p_fpops) host_info.p_fpops = DEFAULT_FPOPS;
if (!host_info.p_iops) host_info.p_iops = DEFAULT_IOPS;
if (!host_info.p_membw) host_info.p_membw = DEFAULT_MEMBW;
if (!host_info.m_cache) host_info.m_cache = DEFAULT_CACHE;
}
// return true if any CPU benchmark thread/process is running
//
bool CLIENT_STATE::are_cpu_benchmarks_running() {
return benchmarks_running;
}
const char *BOINC_RCSID_97ee090db0 = "$Id$";