2008-08-06 18:36:30 +00:00
// This file is part of BOINC.
2005-01-20 23:22:22 +00:00
// http://boinc.berkeley.edu
2008-08-06 18:36:30 +00:00
// Copyright (C) 2008 University of California
2003-08-14 00:02:15 +00:00
//
2008-08-06 18:36:30 +00:00
// BOINC is free software; you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License
// as published by the Free Software Foundation,
// either version 3 of the License, or (at your option) any later version.
2003-08-14 00:02:15 +00:00
//
2008-08-06 18:36:30 +00:00
// BOINC is distributed in the hope that it will be useful,
2005-01-20 23:22:22 +00:00
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU Lesser General Public License for more details.
2003-08-14 00:02:15 +00:00
//
2008-08-06 18:36:30 +00:00
// You should have received a copy of the GNU Lesser General Public License
// along with BOINC. If not, see <http://www.gnu.org/licenses/>.
2003-08-14 00:02:15 +00:00
2009-08-10 04:22:02 +00:00
// Support functions for validators:
// 1) functions for locating the output files
// 2) various ways of deciding how much credit to grant
// a group of replicated results
2003-09-20 17:38:13 +00:00
2008-02-27 23:26:38 +00:00
# include <cstring>
2005-11-21 18:34:44 +00:00
# include "config.h"
2003-09-20 17:38:13 +00:00
2003-10-21 04:06:55 +00:00
# include "error_numbers.h"
2003-08-14 00:02:15 +00:00
# include "parse.h"
2004-08-06 11:42:41 +00:00
# include "util.h"
2005-01-02 08:01:27 +00:00
# include "filesys.h"
2004-04-08 08:15:23 +00:00
# include "sched_util.h"
# include "sched_config.h"
# include "sched_msgs.h"
2006-06-10 21:29:51 +00:00
# include "validator.h"
2003-10-21 04:06:55 +00:00
# include "validate_util.h"
2003-08-14 00:02:15 +00:00
2004-06-30 18:17:21 +00:00
using std : : vector ;
using std : : string ;
2009-08-10 04:22:02 +00:00
////////// functions for locating output files ///////////////
2008-05-29 20:11:43 +00:00
int FILE_INFO : : parse ( XML_PARSER & xp ) {
2006-10-28 18:25:35 +00:00
char tag [ 256 ] ;
bool is_tag , found = false ;
2008-05-29 20:11:43 +00:00
optional = false ;
2008-06-30 16:46:05 +00:00
no_validate = false ;
2006-10-28 18:25:35 +00:00
while ( ! xp . get ( tag , sizeof ( tag ) , is_tag ) ) {
if ( ! is_tag ) continue ;
2008-05-30 03:29:31 +00:00
if ( ! strcmp ( tag , " /file_ref " ) ) {
2006-10-28 18:25:35 +00:00
return found ? 0 : ERR_XML_PARSE ;
}
2008-05-30 03:29:31 +00:00
if ( xp . parse_string ( tag , " file_name " , name ) ) {
2006-10-28 18:25:35 +00:00
found = true ;
2008-05-29 20:11:43 +00:00
continue ;
2006-10-28 18:25:35 +00:00
}
2008-05-29 20:11:43 +00:00
if ( xp . parse_bool ( tag , " optional " , optional ) ) continue ;
2008-06-30 16:46:05 +00:00
if ( xp . parse_bool ( tag , " no_validate " , no_validate ) ) continue ;
2006-10-28 18:25:35 +00:00
}
return ERR_XML_PARSE ;
}
2008-05-29 21:54:18 +00:00
int get_output_file_info ( RESULT & result , FILE_INFO & fi ) {
2006-10-28 18:25:35 +00:00
char tag [ 256 ] , path [ 1024 ] ;
bool is_tag ;
string name ;
MIOFILE mf ;
2008-06-02 20:06:54 +00:00
mf . init_buf_read ( result . xml_doc_in ) ;
2006-10-28 18:25:35 +00:00
XML_PARSER xp ( & mf ) ;
while ( ! xp . get ( tag , sizeof ( tag ) , is_tag ) ) {
if ( ! is_tag ) continue ;
2008-06-30 16:46:05 +00:00
if ( ! strcmp ( tag , " file_ref " ) ) {
2008-05-29 20:11:43 +00:00
int retval = fi . parse ( xp ) ;
2006-10-28 18:25:35 +00:00
if ( retval ) return retval ;
2008-05-29 20:11:43 +00:00
dir_hier_path (
fi . name . c_str ( ) , config . upload_dir , config . uldl_dir_fanout , path
) ;
fi . path = path ;
2006-10-28 18:25:35 +00:00
return 0 ;
}
}
return ERR_XML_PARSE ;
}
2008-05-29 21:54:18 +00:00
int get_output_file_infos ( RESULT & result , vector < FILE_INFO > & fis ) {
2006-10-28 18:25:35 +00:00
char tag [ 256 ] , path [ 1024 ] ;
bool is_tag ;
MIOFILE mf ;
string name ;
2008-05-29 21:54:18 +00:00
mf . init_buf_read ( result . xml_doc_in ) ;
2006-10-28 18:25:35 +00:00
XML_PARSER xp ( & mf ) ;
2008-05-29 20:11:43 +00:00
fis . clear ( ) ;
2006-10-28 18:25:35 +00:00
while ( ! xp . get ( tag , sizeof ( tag ) , is_tag ) ) {
if ( ! is_tag ) continue ;
2008-05-30 03:29:31 +00:00
if ( ! strcmp ( tag , " file_ref " ) ) {
2008-05-29 20:11:43 +00:00
FILE_INFO fi ;
int retval = fi . parse ( xp ) ;
2006-10-28 18:25:35 +00:00
if ( retval ) return retval ;
2008-05-29 20:11:43 +00:00
dir_hier_path (
fi . name . c_str ( ) , config . upload_dir , config . uldl_dir_fanout , path
) ;
fi . path = path ;
fis . push_back ( fi ) ;
2006-10-28 18:25:35 +00:00
}
}
return 0 ;
}
2003-08-14 00:02:15 +00:00
2008-05-29 21:54:18 +00:00
int get_output_file_path ( RESULT & result , string & path ) {
FILE_INFO fi ;
int retval = get_output_file_info ( result , fi ) ;
if ( retval ) return retval ;
path = fi . path ;
return 0 ;
}
int get_output_file_paths ( RESULT & result , vector < string > & paths ) {
vector < FILE_INFO > fis ;
int retval = get_output_file_infos ( result , fis ) ;
if ( retval ) return retval ;
paths . clear ( ) ;
for ( unsigned int i = 0 ; i < fis . size ( ) ; i + + ) {
paths . push_back ( fis [ i ] . path ) ;
}
return 0 ;
}
2008-03-05 20:16:10 +00:00
struct FILE_REF {
char file_name [ 256 ] ;
char open_name [ 256 ] ;
int parse ( XML_PARSER & xp ) {
2008-06-26 21:20:07 +00:00
char tag [ 256 ] ;
2008-03-05 20:16:10 +00:00
bool is_tag ;
strcpy ( file_name , " " ) ;
strcpy ( open_name , " " ) ;
while ( ! xp . get ( tag , sizeof ( tag ) , is_tag ) ) {
if ( ! is_tag ) continue ;
if ( ! strcmp ( tag , " /file_ref " ) ) {
return 0 ;
}
if ( xp . parse_str ( tag , " file_name " , file_name , sizeof ( file_name ) ) ) continue ;
if ( xp . parse_str ( tag , " open_name " , open_name , sizeof ( open_name ) ) ) continue ;
}
return ERR_XML_PARSE ;
}
} ;
// given a path returned by the above, get the corresponding logical name
//
2008-05-29 20:11:43 +00:00
int get_logical_name ( RESULT & result , string & path , string & name ) {
2008-03-05 20:16:10 +00:00
char phys_name [ 1024 ] ;
char tag [ 256 ] ;
bool is_tag ;
MIOFILE mf ;
int retval ;
mf . init_buf_read ( result . xml_doc_in ) ;
XML_PARSER xp ( & mf ) ;
strcpy ( phys_name , path . c_str ( ) ) ;
char * p = strrchr ( phys_name , ' / ' ) ;
if ( ! p ) return ERR_NOT_FOUND ;
strcpy ( phys_name , p + 1 ) ;
while ( ! xp . get ( tag , sizeof ( tag ) , is_tag ) ) {
if ( ! is_tag ) continue ;
if ( ! strcmp ( tag , " result " ) ) continue ;
if ( ! strcmp ( tag , " file_ref " ) ) {
FILE_REF fr ;
retval = fr . parse ( xp ) ;
if ( retval ) continue ;
if ( ! strcmp ( phys_name , fr . file_name ) ) {
name = fr . open_name ;
return 0 ;
}
continue ;
}
2008-06-26 21:20:07 +00:00
xp . skip_unexpected ( tag , false , 0 ) ;
2008-03-05 20:16:10 +00:00
}
return ERR_XML_PARSE ;
}
2009-08-10 04:22:02 +00:00
//////////////// credit computation functions ///////////////
2005-03-07 23:15:53 +00:00
# define CREDIT_EPSILON .001
// If we have N correct results with nonzero claimed credit,
// compute a canonical credit as follows:
// - if N==0 (all claimed credits are infinitesmal), return CREDIT_EPSILON
// - if N==1, return that credit
// - if N==2, return min
// - if N>2, toss out min and max, return average of rest
2003-08-14 00:02:15 +00:00
//
2007-01-30 18:19:30 +00:00
double median_mean_credit ( WORKUNIT & /*wu*/ , vector < RESULT > & results ) {
2005-03-07 23:15:53 +00:00
int ilow = - 1 , ihigh = - 1 ;
2003-08-14 00:02:15 +00:00
double credit_low = 0 , credit_high = 0 ;
2005-03-07 23:15:53 +00:00
int nvalid = 0 ;
unsigned int i ;
2005-03-22 05:30:32 +00:00
for ( i = 0 ; i < results . size ( ) ; i + + ) {
2005-03-07 23:15:53 +00:00
RESULT & result = results [ i ] ;
if ( result . validate_state ! = VALIDATE_STATE_VALID ) continue ;
if ( result . claimed_credit < CREDIT_EPSILON ) continue ;
if ( ilow < 0 ) {
ilow = ihigh = i ;
credit_low = credit_high = result . claimed_credit ;
2003-08-14 00:02:15 +00:00
} else {
2005-03-07 23:15:53 +00:00
if ( result . claimed_credit < credit_low ) {
ilow = i ;
credit_low = result . claimed_credit ;
2003-08-14 00:02:15 +00:00
}
2005-03-07 23:15:53 +00:00
if ( result . claimed_credit > credit_high ) {
ihigh = i ;
credit_high = result . claimed_credit ;
2003-08-14 00:02:15 +00:00
}
}
2005-03-07 23:15:53 +00:00
nvalid + + ;
2003-08-14 00:02:15 +00:00
}
2005-03-07 23:15:53 +00:00
switch ( nvalid ) {
case 0 :
return CREDIT_EPSILON ;
case 1 :
case 2 :
2003-08-14 00:02:15 +00:00
return credit_low ;
2005-03-07 23:15:53 +00:00
default :
2003-08-14 00:02:15 +00:00
double sum = 0 ;
2005-03-22 05:30:32 +00:00
for ( i = 0 ; i < results . size ( ) ; i + + ) {
2005-03-10 22:05:42 +00:00
if ( i = = ( unsigned int ) ilow ) continue ;
if ( i = = ( unsigned int ) ihigh ) continue ;
2005-03-07 23:15:53 +00:00
RESULT & result = results [ i ] ;
if ( result . validate_state ! = VALIDATE_STATE_VALID ) continue ;
2003-08-14 00:02:15 +00:00
2005-03-07 23:15:53 +00:00
sum + = result . claimed_credit ;
2003-08-14 00:02:15 +00:00
}
2005-03-07 23:15:53 +00:00
return sum / ( nvalid - 2 ) ;
2003-08-14 00:02:15 +00:00
}
}
2008-08-07 22:50:05 +00:00
int get_credit_from_wu ( WORKUNIT & wu , vector < RESULT > & , double & credit ) {
2007-01-30 18:19:30 +00:00
double x ;
2007-02-06 21:50:48 +00:00
int retval ;
DB_WORKUNIT dbwu ;
dbwu . id = wu . id ;
retval = dbwu . get_field_str ( " xml_doc " , dbwu . xml_doc , sizeof ( dbwu . xml_doc ) ) ;
if ( ! retval ) {
if ( parse_double ( dbwu . xml_doc , " <credit> " , x ) ) {
2008-08-07 22:50:05 +00:00
credit = x ;
return 0 ;
2007-02-06 21:50:48 +00:00
}
2007-01-30 18:19:30 +00:00
}
2008-08-07 22:50:05 +00:00
return ERR_XML_PARSE ;
2007-01-30 18:19:30 +00:00
}
2008-03-07 21:13:01 +00:00
double stddev_credit ( WORKUNIT & wu , std : : vector < RESULT > & results ) {
double credit_low_bound = 0 , credit_high_bound = 0 ;
double penalize_credit_high_bound = 0 ;
double credit_avg = 0 ;
double credit = 0 ;
double old = 0 ;
double std_dev = 0 ;
int nvalid = 0 ;
unsigned int i ;
2009-08-10 04:22:02 +00:00
// calculate average
//
2008-03-07 21:13:01 +00:00
for ( i = 0 ; i < results . size ( ) ; i + + ) {
RESULT & result = results [ i ] ;
if ( result . validate_state ! = VALIDATE_STATE_VALID ) continue ;
credit = credit + result . claimed_credit ;
nvalid + + ;
}
2008-06-26 21:20:07 +00:00
if ( nvalid = = 0 ) {
return CREDIT_EPSILON ;
2008-03-07 21:13:01 +00:00
}
2008-06-26 21:20:07 +00:00
2008-03-07 21:13:01 +00:00
credit_avg = credit / nvalid ;
nvalid = 0 ;
2009-08-10 04:22:02 +00:00
// calculate stddev difference
//
2008-03-07 21:13:01 +00:00
for ( i = 0 ; i < results . size ( ) ; i + + ) {
RESULT & result = results [ i ] ;
if ( result . validate_state ! = VALIDATE_STATE_VALID ) continue ;
std_dev = pow ( credit_avg - result . claimed_credit , 2 ) + std_dev ;
nvalid + + ;
}
2008-06-26 21:20:07 +00:00
2008-03-07 21:13:01 +00:00
std_dev = std_dev / ( double ) nvalid ;
std_dev = sqrt ( std_dev ) ;
2009-08-10 04:22:02 +00:00
credit_low_bound = credit_avg - std_dev ;
2008-06-26 21:20:07 +00:00
if ( credit_low_bound > credit_avg * .85 ) {
credit_low_bound = credit_avg * .85 ;
2008-03-07 21:13:01 +00:00
}
credit_low_bound = credit_low_bound - 2.5 ;
2008-06-26 21:20:07 +00:00
if ( credit_low_bound < 1 ) credit_low_bound = 1 ;
2009-08-10 04:22:02 +00:00
credit_high_bound = credit_avg + std_dev ;
2008-06-26 21:20:07 +00:00
if ( credit_high_bound < credit_avg * 1.15 ) {
credit_high_bound = credit_avg * 1.15 ;
2008-03-07 21:13:01 +00:00
}
credit_high_bound = credit_high_bound + 5 ;
2008-06-26 21:20:07 +00:00
2008-03-07 21:13:01 +00:00
nvalid = 0 ;
credit = 0 ;
for ( i = 0 ; i < results . size ( ) ; i + + ) {
RESULT & result = results [ i ] ;
if ( result . validate_state ! = VALIDATE_STATE_VALID ) continue ;
2008-06-26 21:20:07 +00:00
if ( result . claimed_credit < credit_high_bound & & result . claimed_credit > credit_low_bound ) {
credit = credit + result . claimed_credit ;
nvalid + + ;
2008-03-07 21:13:01 +00:00
} else {
2008-06-26 21:20:07 +00:00
log_messages . printf ( MSG_NORMAL ,
" [RESULT#%d %s] CREDIT_CALC_SD Discarding invalid credit %.1lf, avg %.1lf, low %.1lf, high %.1lf \n " ,
result . id , result . name , result . claimed_credit ,
credit_avg , credit_low_bound , credit_high_bound
) ;
2008-03-07 21:13:01 +00:00
}
}
2008-06-26 21:20:07 +00:00
2008-03-07 21:13:01 +00:00
double grant_credit ;
switch ( nvalid ) {
case 0 :
grant_credit = median_mean_credit ( wu , results ) ;
old = grant_credit ;
break ;
default :
2008-06-26 21:20:07 +00:00
grant_credit = credit / nvalid ;
old = median_mean_credit ( wu , results ) ;
2008-03-07 21:13:01 +00:00
}
2008-06-26 21:20:07 +00:00
2008-03-07 21:13:01 +00:00
// Log what happened
2008-06-26 21:20:07 +00:00
if ( old > grant_credit ) {
log_messages . printf ( MSG_DEBUG ,
" CREDIT_CALC_VAL New Method grant: %.1lf Old Method grant: %.1lf Less awarded \n " ,
grant_credit , old
) ;
} else if ( old = = grant_credit ) {
log_messages . printf ( MSG_DEBUG ,
" CREDIT_CALC_VAL New Method grant: %.1lf Old Method grant: %.1lf Same awarded \n " ,
grant_credit , old
) ;
2008-03-07 21:13:01 +00:00
} else {
2008-06-26 21:20:07 +00:00
log_messages . printf ( MSG_DEBUG ,
" CREDIT_CALC_VAL New Method grant: %.1lf Old Method grant: %.1lf More awarded \n " ,
grant_credit , old
) ;
2008-03-07 21:13:01 +00:00
}
2008-06-26 21:20:07 +00:00
2008-03-07 21:13:01 +00:00
// penalize hosts that are claiming too much
2009-08-10 04:22:02 +00:00
//
penalize_credit_high_bound = grant_credit + 1.5 * std_dev ;
2008-06-26 21:20:07 +00:00
if ( penalize_credit_high_bound < grant_credit * 1.65 ) {
penalize_credit_high_bound = grant_credit * 1.65 ;
2008-03-07 21:13:01 +00:00
}
penalize_credit_high_bound = penalize_credit_high_bound + 20 ;
for ( i = 0 ; i < results . size ( ) ; i + + ) {
2008-06-26 21:20:07 +00:00
RESULT & result = results [ i ] ;
2008-03-07 21:13:01 +00:00
if ( result . validate_state ! = VALIDATE_STATE_VALID ) continue ;
2008-06-26 21:20:07 +00:00
if ( result . claimed_credit > penalize_credit_high_bound ) {
result . granted_credit = grant_credit * 0.5 ;
log_messages . printf ( MSG_NORMAL ,
" [RESULT#%d %s] CREDIT_CALC_PENALTY Penalizing host for too high credit %.1lf, grant %.1lf, penalize %.1lf, stddev %.1lf, avg %.1lf, low %.1lf, high %.1lf \n " ,
result . id , result . name , result . claimed_credit , grant_credit ,
penalize_credit_high_bound , std_dev , credit_avg ,
credit_low_bound , credit_high_bound
) ;
2008-03-07 21:13:01 +00:00
}
}
return grant_credit ;
}
double two_credit ( WORKUNIT & wu , std : : vector < RESULT > & results ) {
2009-03-16 23:12:20 +00:00
int i ;
2008-06-26 21:20:07 +00:00
double credit = 0 ;
double credit_avg = 0 ;
double last_credit = 0 ;
int nvalid = 0 ;
2009-03-16 23:12:20 +00:00
// calculate average
//
2008-03-07 21:13:01 +00:00
for ( i = 0 ; i < results . size ( ) ; i + + ) {
RESULT & result = results [ i ] ;
if ( result . validate_state ! = VALIDATE_STATE_VALID ) continue ;
credit = credit + result . claimed_credit ;
last_credit = result . claimed_credit ;
nvalid + + ;
}
2008-06-26 21:20:07 +00:00
2008-03-07 21:13:01 +00:00
// If more then 2 valid results, compute via stddev method
2009-03-16 23:12:20 +00:00
//
if ( nvalid > 2 ) return stddev_credit ( wu , results ) ;
2008-06-26 21:20:07 +00:00
2009-03-16 23:12:20 +00:00
// This case should never occur
//
if ( nvalid = = 0 ) {
log_messages . printf ( MSG_CRITICAL ,
" [WORKUNIT#%d %s] No valid results \n " , wu . id , wu . name
) ;
exit ( - 1 ) ;
}
2009-08-10 04:22:02 +00:00
2009-03-16 23:12:20 +00:00
credit_avg = credit / nvalid ;
2009-08-10 04:22:02 +00:00
2009-03-16 23:12:20 +00:00
// Next check to see if there is reasonably close agreement between the
// two results. A study performed at World Community Grid found that in
// 85% of cases the credit claimed were within 15% of the average claimed
// credit for the workunit. Return the average of the claimed credit
// in these cases.
//
if ( fabs ( last_credit - credit_avg ) < 0.15 * credit_avg ) return credit_avg ;
2008-03-07 21:13:01 +00:00
2009-03-16 23:12:20 +00:00
// If we get here, then there was not agreement between the claimed credits
// So attempt to use the average of the historical granted credit instead
//
2008-03-07 21:13:01 +00:00
DB_HOST host ;
2009-03-16 23:12:20 +00:00
double credit_hist_avg = 0 ;
double credit_min_dev = credit_avg ;
// default award in case nobody matches the cases
nvalid = 0 ;
2008-03-07 21:13:01 +00:00
double deviation = - 1 ;
for ( i = 0 ; i < results . size ( ) ; i + + ) {
RESULT & result = results [ i ] ;
if ( result . validate_state ! = VALIDATE_STATE_VALID ) continue ;
host . lookup_id ( result . hostid ) ;
2009-03-16 23:12:20 +00:00
// skip if host is new or the cpu time is very low
if ( host . total_credit < config . granted_credit_ramp_up
| | result . cpu_time < 30 ) continue ;
// This is for computing the average based on the computers history
credit_hist_avg = credit_hist_avg + result . cpu_time * host . credit_per_cpu_sec ;
nvalid + + ;
last_credit = result . cpu_time * host . credit_per_cpu_sec ;
// This if is for finding the result whose claimed credit is the least
// different from the computers historical average
//
if ( ( deviation < 0 | | deviation > fabs ( result . claimed_credit - result . cpu_time * host . credit_per_cpu_sec ) )
) {
2009-08-10 04:22:02 +00:00
deviation = fabs ( result . claimed_credit - result . cpu_time * host . credit_per_cpu_sec ) ;
credit_min_dev = result . claimed_credit ;
2008-03-07 21:13:01 +00:00
}
}
2009-08-10 04:22:02 +00:00
2009-03-16 23:12:20 +00:00
// If this case occurs, then this is becuase neither host has
// been participating long. As a result, returned the claimed
// credit average
if ( nvalid = = 0 ) {
2009-08-10 04:22:02 +00:00
log_messages . printf ( MSG_DEBUG ,
" [WORKUNIT#%d %s] No qualifying results " ,
wu . id , wu . name
) ;
2009-03-16 23:12:20 +00:00
return credit_avg ;
}
credit_hist_avg = credit_hist_avg / nvalid ;
2009-08-10 04:22:02 +00:00
2009-03-16 23:12:20 +00:00
// Check to see if the result.cpu_time*host.credit_per_cpu_sec are close.
// If so use the average of the historical credit
//
if ( fabs ( last_credit - credit_hist_avg ) < 0.1 * credit_hist_avg ) {
log_messages . printf ( MSG_DEBUG , " [WORKUNIT#%d %s] Method1: "
" Credit Average = %.2lf Actual Credit Granted = %.2lf \n " ,
2009-08-10 04:22:02 +00:00
wu . id , wu . name , credit_avg , credit_hist_avg
) ;
2009-03-16 23:12:20 +00:00
return credit_hist_avg ;
}
log_messages . printf ( MSG_DEBUG , " [WORKUNIT#%d %s] Method2: "
" Credit Average = %.2lf Actual Credit Granted = %.2lf \n " ,
2009-08-10 04:22:02 +00:00
wu . id , wu . name , credit_avg , credit_min_dev
) ;
2009-03-16 23:12:20 +00:00
return credit_min_dev ;
2008-03-07 21:13:01 +00:00
}
2009-08-10 04:22:02 +00:00
//////////// END CREDIT CALCULATION FUNCTIONS ///////////////
2005-01-02 18:29:53 +00:00
const char * BOINC_RCSID_07049e8a0e = " $Id$ " ;