- scheduler: add preliminary support for "job size matching"

(attempt to send big jobs to fast hosts, small jobs to slow hosts).
    - have "census" compute mean/stdev of host speeds,
        write it to a file perf_info.txt
    - have feeder compute mean/stdev of sizes of jobs in shmem
    - have feeder read perf_info.txt into shmem
- scheduler: add some debugging messages for app version selection
- Add LGPL license to a few files
- upgrade/setup scripts: copy census to bin/


svn path=/trunk/boinc/; revision=15136
This commit is contained in:
David Anderson 2008-05-06 19:53:49 +00:00
parent 32536e5194
commit 05f703559f
20 changed files with 303 additions and 12 deletions

View File

@ -3621,4 +3621,31 @@ Eric K May 6 2008
api/
x_opengl.C
David May 6 2008
- scheduler: add preliminary support for "job size matching"
(attempt to send big jobs to fast hosts, small jobs to slow hosts).
- have "census" compute mean/stdev of host speeds,
write it to a file perf_info.txt
- have feeder compute mean/stdev of sizes of jobs in shmem
- have feeder read perf_info.txt into shmem
- scheduler: add some debugging messages for app version selection
- Add LGPL license to a few files
- upgrade/setup scripts: copy census to bin/
py/Boinc/
setup_project.py
sched/
assimilate_handler.C
census.C
feeder.C
hr_info.C,h
sched_array.h
sched_config.C,h
sched_plan.C
sched_resend.h
sched_shmem.h
sched_timezone.h
server_types.C
single_job_assimilator.C
validate_util2.h
validator.h

View File

@ -466,14 +466,14 @@ array('boinc_server_status.zip',
);
$web = array(
array('http://download.conmunix.net/pub/boinc_lcs/boinc_lcs_3.0_beta.tar.gz',
array('http://download.conmunix.net/pub/boinc_lcs/boinc_lcs_3.0.tar.gz',
'Boinc LCS',
'3.0 beta',
'3.0 final',
'Shows the current state and other information from an unlimited number of BOINC clients connected to the Internet.',
'http://www.conmunix.net/boinc-lcs',
'Apache, PHP',
'Boinc LCS is a free PHP based script, that allows you to monitor the current state (and other information) from each of your connected BOINC clients. It runs on a simple webserver and since version 2.1 it is also platform independent. Boinc LCS is released under the GNU/GPL license. You can modify and redistribute or just using it!',
'1193023938'
'Boinc LCS is a free PHP based script, that allows you to monitor the current state (and other information) from each of your connected BOINC clients. It runs on a simple webserver and is platform independent. Boinc LCS is released under the GNU/GPL license. You can modify and redistribute or just use it!',
'1210049520'
),
array('b-sig.zip',
'Forum signature showing jobs in progress',

View File

@ -1,6 +1,9 @@
<?
$project_news = array(
array("May 5, 2008",
"Version 3.0 of <a href=http://www.conmunix.net/offene-projekte/boinc-lcs/>Boinc LCS</a> (a free PHP script for monitoring remote BOINC clients) has been released."
),
array("Apr 20, 2008",
"The BOINC user documentation has been moved from
<a href=trac/wiki/RunningBoinc>Trac</a>

View File

@ -367,7 +367,7 @@ def install_boinc_files(dest_dir, web_only):
'file_deleter', 'sample_dummy_assimilator',
'sample_assimilator', 'sample_work_generator',
'single_job_assimilator',
'update_stats', 'db_dump', 'db_purge', 'show_shmem' ])
'update_stats', 'db_dump', 'db_purge', 'show_shmem', 'census' ])
map(lambda (s): install(srcdir('tools',s), dir('bin',s)),
[ 'create_work', 'xadd', 'dbcheck_files_exist', 'run_in_ops',
'update_versions', 'parse_config', 'grep_logs', 'db_query',

View File

@ -1,3 +1,22 @@
// Berkeley Open Infrastructure for Network Computing
// http://boinc.berkeley.edu
// Copyright (C) 2008 University of California
//
// This is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation;
// either version 2.1 of the License, or (at your option) any later version.
//
// This software is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU Lesser General Public License for more details.
//
// To view the GNU Lesser General Public License visit
// http://www.gnu.org/copyleft/lesser.html
// or write to the Free Software Foundation, Inc.,
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#include <vector>
#include "boinc_db.h"
#define DEFER_ASSIMILATION 123321

View File

@ -70,4 +70,5 @@ int main(int argc, char** argv) {
hri.init();
hri.scan_db();
hri.write_file();
hri.perf_info.write_file();
}

View File

@ -98,6 +98,7 @@
#include <ctime>
#include <csignal>
#include <unistd.h>
#include <math.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <vector>
@ -352,8 +353,33 @@ void weighted_interleave(double* weights, int n, int k, int* v, int* count) {
free(x);
}
// update the job size statistics fields of array entries
//
static void update_stats() {
int i, n=0;
double sum=0, sum_sqr=0;
for (i=0; i<ssp->max_wu_results; i++) {
WU_RESULT& wu_result = ssp->wu_results[i];
if (wu_result.state != WR_STATE_PRESENT) continue;
n++;
double e = wu_result.workunit.rsc_fpops_est;
sum += e;
sum_sqr += e*e;
}
double mean = sum/n;
double stdev = sqrt((sum_sqr - sum*mean)/n);
for (i=0; i<ssp->max_wu_results; i++) {
WU_RESULT& wu_result = ssp->wu_results[i];
if (wu_result.state != WR_STATE_PRESENT) continue;
double e = wu_result.workunit.rsc_fpops_est;
double diff = e - mean;
wu_result.fpops_size = diff/stdev;
}
}
// Make one pass through the work array, filling in empty slots.
// Return true if should call this again immediately.
// Return true if we filled in any.
//
static bool scan_work_array(vector<DB_WORK_ITEM> &work_items) {
int i;
@ -471,6 +497,10 @@ void feeder_loop() {
"No action; sleeping %.2f sec\n", sleep_interval
);
boinc_sleep(sleep_interval);
} else {
if (config.job_size_matching) {
update_stats();
}
}
fflush(stdout);
@ -704,6 +734,15 @@ int main(int argc, char** argv) {
}
hr_init();
if (config.job_size_matching) {
retval = ssp->perf_info.read_file();
if (retval) {
log_messages.printf(MSG_CRITICAL,
"can't read perf_info file; run census\n"
);
exit(1);
}
}
signal(SIGUSR1, show_state);

View File

@ -1,7 +1,25 @@
// Berkeley Open Infrastructure for Network Computing
// http://boinc.berkeley.edu
// Copyright (C) 2008 University of California
//
// This is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation;
// either version 2.1 of the License, or (at your option) any later version.
//
// This software is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU Lesser General Public License for more details.
//
// To view the GNU Lesser General Public License visit
// http://www.gnu.org/copyleft/lesser.html
// or write to the Free Software Foundation, Inc.,
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#include <stdio.h>
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
#include <math.h>
#include "error_numbers.h"
#include "sched_msgs.h"
@ -66,11 +84,17 @@ void HR_INFO::init() {
void HR_INFO::scan_db() {
DB_HOST host;
int retval;
int i;
int i, n=0;
double sum=0, sum_sqr=0;
while (1) {
retval = host.enumerate("where expavg_credit>1");
if (retval) break;
if (host.p_fpops > 1e7 && host.p_fpops < 1e13) {
n++;
sum += host.p_fpops;
sum_sqr += host.p_fpops*host.p_fpops;
}
//printf("host %d: %s | %s | %s\n", host.id, host.os_name, host.p_vendor, host.p_model);
for (i=1; i<HR_NTYPES; i++) {
if (hr_unknown_platform_type(host, i)) {
@ -87,6 +111,15 @@ void HR_INFO::scan_db() {
fprintf(stderr, "host enum: %d", retval);
exit(1);
}
// if no hosts, use reasonable defaults
//
if (n) {
perf_info.host_fpops_mean = sum/n;
perf_info.host_fpops_stdev = sqrt((sum_sqr - sum*perf_info.host_fpops_mean)/n);
} else {
perf_info.host_fpops_mean = 3e9;
perf_info.host_fpops_stdev = 1e9;
}
}
void HR_INFO::allocate(int total_slots) {
@ -168,3 +201,25 @@ void HR_INFO::show(FILE* f) {
}
}
}
int PERF_INFO::read_file() {
FILE* f = fopen(PERF_INFO_FILENAME, "r");
if (!f) return ERR_FOPEN;
fscanf(f, "%f %f",
&host_fpops_mean,
&host_fpops_stdev
);
fclose(f);
return 0;
}
int PERF_INFO::write_file() {
FILE* f = fopen(PERF_INFO_FILENAME, "w");
if (!f) return ERR_FOPEN;
fprintf(f, "%f %f\n",
host_fpops_mean,
host_fpops_stdev
);
fclose(f);
return 0;
}

View File

@ -1,5 +1,40 @@
// Berkeley Open Infrastructure for Network Computing
// http://boinc.berkeley.edu
// Copyright (C) 2008 University of California
//
// This is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation;
// either version 2.1 of the License, or (at your option) any later version.
//
// This software is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU Lesser General Public License for more details.
//
// To view the GNU Lesser General Public License visit
// http://www.gnu.org/copyleft/lesser.html
// or write to the Free Software Foundation, Inc.,
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#ifndef _HR_INFO_
#define _HR_INFO_
#include "hr.h"
// statistics about the host population,
// and about the set of jobs in shared mem.
//
struct PERF_INFO {
double host_fpops_mean;
double host_fpops_stdev;
double wu_fpops_mean;
double wu_fpops_stdev;
int write_file();
int read_file();
};
struct HR_INFO {
double *rac_per_class[HR_NTYPES];
// how much RAC per class
@ -13,6 +48,8 @@ struct HR_INFO {
// # of slots per type (fixed at start)
bool type_being_used[HR_NTYPES];
// whether any app is actually using this HR type
PERF_INFO perf_info;
int write_file();
int read_file();
void scan_db();
@ -24,3 +61,6 @@ struct HR_INFO {
};
#define HR_INFO_FILENAME "../hr_info.txt"
#define PERF_INFO_FILENAME "../perf_info.txt"
#endif

View File

@ -1 +1,20 @@
// Berkeley Open Infrastructure for Network Computing
// http://boinc.berkeley.edu
// Copyright (C) 2008 University of California
//
// This is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation;
// either version 2.1 of the License, or (at your option) any later version.
//
// This software is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU Lesser General Public License for more details.
//
// To view the GNU Lesser General Public License visit
// http://www.gnu.org/copyleft/lesser.html
// or write to the Free Software Foundation, Inc.,
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
extern void scan_work_array(SCHEDULER_REQUEST&, SCHEDULER_REPLY&);

View File

@ -183,6 +183,8 @@ int SCHED_CONFIG::parse(FILE* f) {
}
continue;
}
if (xp.parse_bool(tag, "job_size_matching", job_size_matching)) continue;
if (xp.parse_bool(tag, "debug_version_select", debug_version_select)) continue;
if (xp.parse_bool(tag, "debug_assignment", debug_assignment)) continue;
if (xp.parse_bool(tag, "debug_prefs", debug_prefs)) continue;

View File

@ -120,6 +120,7 @@ public:
int max_ncpus;
vector<regex_t> ban_os;
vector<regex_t> ban_cpu;
bool job_size_matching;
// log flags
//
bool debug_version_select;

View File

@ -27,6 +27,7 @@
// (you need to prevent that from being overwritten too)
// In either case, put your version under source-code control, e.g. SVN
#include "sched_config.h"
#include "sched_msgs.h"
#include "sched_plan.h"
@ -80,6 +81,11 @@ bool app_plan(SCHEDULER_REQUEST& sreq, char* plan_class, HOST_USAGE& hu) {
return true;
}
}
if (config.debug_version_select) {
log_messages.printf(MSG_DEBUG,
"Host lacks CUDA coprocessor for plan class %s\n", plan_class
);
}
return false;
}
log_messages.printf(MSG_CRITICAL,

View File

@ -1,2 +1,20 @@
extern bool resend_lost_work(SCHEDULER_REQUEST&, SCHEDULER_REPLY&);
// Berkeley Open Infrastructure for Network Computing
// http://boinc.berkeley.edu
// Copyright (C) 2008 University of California
//
// This is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation;
// either version 2.1 of the License, or (at your option) any later version.
//
// This software is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU Lesser General Public License for more details.
//
// To view the GNU Lesser General Public License visit
// http://www.gnu.org/copyleft/lesser.html
// or write to the Free Software Foundation, Inc.,
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
extern bool resend_lost_work(SCHEDULER_REQUEST&, SCHEDULER_REPLY&);

View File

@ -27,6 +27,7 @@
#define _SCHED_SHMEM_H_
#include "boinc_db.h"
#include "hr_info.h"
// the following must be at least as large as DB tables
// (counting only non-deprecated entries for the current major version)
@ -60,6 +61,7 @@ struct WU_RESULT {
int resultid;
int time_added_to_shared_memory;
int result_priority;
double fpops_size; // measured in stdevs
};
// this struct is followed in memory by an array of WU_RESULTS
@ -68,7 +70,7 @@ struct SCHED_SHMEM {
bool ready; // feeder sets to true when init done
// the following fields let the scheduler make sure
// that the shared mem has the right format
int ss_size; // size of this struct, including array
int ss_size; // size of this struct, including WU_RESULT array
int platform_size; // sizeof(PLATFORM)
int app_size; // sizeof(APP)
int app_version_size; // sizeof(APP_VERSION)
@ -84,6 +86,7 @@ struct SCHED_SHMEM {
int max_app_versions;
int max_assignments;
int max_wu_results;
PERF_INFO perf_info;
PLATFORM platforms[MAX_PLATFORMS];
APP apps[MAX_APPS];
APP_VERSION app_versions[MAX_APP_VERSIONS];

View File

@ -1,3 +1,22 @@
// Berkeley Open Infrastructure for Network Computing
// http://boinc.berkeley.edu
// Copyright (C) 2008 University of California
//
// This is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation;
// either version 2.1 of the License, or (at your option) any later version.
//
// This software is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU Lesser General Public License for more details.
//
// To view the GNU Lesser General Public License visit
// http://www.gnu.org/copyleft/lesser.html
// or write to the Free Software Foundation, Inc.,
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
extern void process_av_timezone(
SCHEDULER_REPLY& reply, APP_VERSION* avp, APP_VERSION& av2
);

View File

@ -16,6 +16,7 @@
// http://www.gnu.org/copyleft/lesser.html
// or write to the Free Software Foundation, Inc.,
// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "config.h"
#include <cassert>
#include <vector>

View File

@ -16,7 +16,7 @@
// http://www.gnu.org/copyleft/lesser.html
// or write to the Free Software Foundation, Inc.,
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// assimilator for single jobs.
// - if success, move the output file(s) to job directory
// - delete job description file

View File

@ -1,3 +1,22 @@
// Berkeley Open Infrastructure for Network Computing
// http://boinc.berkeley.edu
// Copyright (C) 2008 University of California
//
// This is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation;
// either version 2.1 of the License, or (at your option) any later version.
//
// This software is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU Lesser General Public License for more details.
//
// To view the GNU Lesser General Public License visit
// http://www.gnu.org/copyleft/lesser.html
// or write to the Free Software Foundation, Inc.,
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#ifndef _VALIDATE_UTIL2_
#define _VALIDATE_UTIL2_

View File

@ -1,3 +1,22 @@
// Berkeley Open Infrastructure for Network Computing
// http://boinc.berkeley.edu
// Copyright (C) 2008 University of California
//
// This is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation;
// either version 2.1 of the License, or (at your option) any later version.
//
// This software is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU Lesser General Public License for more details.
//
// To view the GNU Lesser General Public License visit
// http://www.gnu.org/copyleft/lesser.html
// or write to the Free Software Foundation, Inc.,
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#include "sched_config.h"
extern SCHED_CONFIG config;