From fab00f1100268e60cb55f6db2cf4fcf00bedead1 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Thu, 1 May 2014 19:51:58 -0700 Subject: [PATCH 1/6] Use new (non-deprecated) NvAPI call to get driver version --- client/gpu_nvidia.cpp | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/client/gpu_nvidia.cpp b/client/gpu_nvidia.cpp index 895dc91c99..b7b3f7f2b9 100644 --- a/client/gpu_nvidia.cpp +++ b/client/gpu_nvidia.cpp @@ -207,13 +207,24 @@ void COPROC_NVIDIA::get( __cuMemGetInfo = (CUDA_MGI)GetProcAddress( cudalib, "cuMemGetInfo" ); #ifndef SIM - NvAPI_Status nvapiStatus; - NV_DISPLAY_DRIVER_VERSION Version; - memset(&Version, 0, sizeof(Version)); - Version.version = NV_DISPLAY_DRIVER_VERSION_VER; - NvAPI_Initialize(); - nvapiStatus = NvAPI_GetDisplayDriverVersion(NULL, &Version); + NvAPI_ShortString ss; + NvU32 Version = 0; + NvAPI_SYS_GetDriverAndBranchVersion(&Version, ss); + +#if 0 + // NvAPI now provides an API for getting #cores :-) + // But not FLOPs per clock cycle :-( + // Anyway, don't use this for now because server code estimates FLOPS + // based on compute capability, so we may as well do the same + // + NvPhysicalGpuHandle GPUHandle[NVAPI_MAX_PHYSICAL_GPUS]; + NvU32 GpuCount, nc; + NvAPI_EnumPhysicalGPUs(GPUHandle, &GpuCount); + for (unsigned int i=0; i 100) continue; // e.g. 9999 is an error #if defined(_WIN32) && !defined(SIM) - cc.display_driver_version = Version.drvVersion; + cc.display_driver_version = Version; #elif defined(__APPLE__) cc.display_driver_version = NSVersionOfRunTimeLibrary("cuda"); #else From 6a8eab73cd6d75c83785f86638f3573cd6bff50b Mon Sep 17 00:00:00 2001 From: David Anderson Date: Thu, 1 May 2014 21:03:49 -0700 Subject: [PATCH 2/6] replace tab characters with spaces --- client/acct_mgr.cpp | 2 +- client/app.cpp | 22 +++++++------- client/app_control.cpp | 54 +++++++++++++++++------------------ client/app_start.cpp | 16 +++++------ client/client_state.cpp | 2 +- client/client_state.h | 2 +- client/client_types.cpp | 2 +- client/cpu_sched.cpp | 48 +++++++++++++++---------------- client/cs_account.cpp | 20 ++++++------- client/cs_prefs.cpp | 4 +-- client/cs_scheduler.cpp | 2 +- client/cs_statefile.cpp | 12 ++++---- client/gpu_nvidia.cpp | 12 ++++---- client/gui_rpc_server_ops.cpp | 14 ++++----- client/http_curl.cpp | 8 +++--- client/mac_address.cpp | 54 +++++++++++++++++------------------ client/mac_address.h | 2 +- client/project.cpp | 6 ++-- client/project.h | 10 +++---- client/work_fetch.cpp | 2 +- 20 files changed, 147 insertions(+), 147 deletions(-) diff --git a/client/acct_mgr.cpp b/client/acct_mgr.cpp index efc1851d7b..e627bb9d84 100644 --- a/client/acct_mgr.cpp +++ b/client/acct_mgr.cpp @@ -278,7 +278,7 @@ int AM_ACCOUNT::parse(XML_PARSER& xp) { if (xp.parse_str("no_rsc", buf, sizeof(buf))) { handle_no_rsc(buf, true); - continue; + continue; } if (xp.parse_bool("dont_request_more_work", btemp)) { dont_request_more_work.set(btemp); diff --git a/client/app.cpp b/client/app.cpp index 9d902dacd7..fc3ff08274 100644 --- a/client/app.cpp +++ b/client/app.cpp @@ -190,7 +190,7 @@ int ACTIVE_TASK::preempt(int preempt_type, int reason) { result->name ); } - if (task_state() != PROCESS_EXECUTING) return 0; + if (task_state() != PROCESS_EXECUTING) return 0; return suspend(); } return 0; @@ -634,7 +634,7 @@ int ACTIVE_TASK::write_gui(MIOFILE& fout) { if (fd == 0 && elapsed_time > 0) { double est_time = wup->rsc_fpops_est/app_version->flops; double x = elapsed_time/est_time; - fd = 1 - exp(-x); + fd = 1 - exp(-x); } fout.printf( "\n" @@ -1082,7 +1082,7 @@ DWORD WINAPI throttler(LPVOID) { #else void* throttler(void*) { #endif - + // Initialize diagnostics framework for this thread // diagnostics_thread_init(); @@ -1094,7 +1094,7 @@ void* throttler(void*) { boinc_sleep(10); continue; } - double on, off, on_frac = gstate.global_prefs.cpu_usage_limit / 100; + double on, off, on_frac = gstate.global_prefs.cpu_usage_limit / 100; #if 0 // sub-second CPU throttling #define THROTTLE_PERIOD 1. @@ -1102,13 +1102,13 @@ void* throttler(void*) { off = THROTTLE_PERIOD - on; #else // throttling w/ at least 1 sec between suspend/resume - if (on_frac > .5) { - off = 1; - on = on_frac/(1.-on_frac); - } else { - on = 1; - off = (1.-on_frac)/on_frac; - } + if (on_frac > .5) { + off = 1; + on = on_frac/(1.-on_frac); + } else { + on = 1; + off = (1.-on_frac)/on_frac; + } #endif gstate.tasks_throttled = true; diff --git a/client/app_control.cpp b/client/app_control.cpp index e5dbab7a5d..964c4a6f99 100644 --- a/client/app_control.cpp +++ b/client/app_control.cpp @@ -794,10 +794,10 @@ bool ACTIVE_TASK_SET::check_rsc_limits_exceeded() { static double last_disk_check_time = 0; bool do_disk_check = false; bool did_anything = false; - char buf[256]; + char buf[256]; double ram_left = gstate.available_ram(); - double max_ram = gstate.max_available_ram(); + double max_ram = gstate.max_available_ram(); // Some slot dirs have lots of files, // so only check every min(disk_interval, 300) secs @@ -811,11 +811,11 @@ bool ACTIVE_TASK_SET::check_rsc_limits_exceeded() { atp = active_tasks[i]; if (atp->task_state() != PROCESS_EXECUTING) continue; if (!atp->result->non_cpu_intensive() && (atp->elapsed_time > atp->max_elapsed_time)) { - sprintf(buf, "exceeded elapsed time limit %.2f (%.2fG/%.2fG)", + sprintf(buf, "exceeded elapsed time limit %.2f (%.2fG/%.2fG)", atp->max_elapsed_time, atp->result->wup->rsc_fpops_bound/1e9, atp->result->avp->flops/1e9 - ); + ); msg_printf(atp->result->project, MSG_INFO, "Aborting task %s: %s", atp->result->name, buf ); @@ -830,10 +830,10 @@ bool ACTIVE_TASK_SET::check_rsc_limits_exceeded() { // and I don't think we can expect projects to provide // accurate bounds. // - if (atp->procinfo.working_set_size_smoothed > atp->max_mem_usage) { - sprintf(buf, "working set size > workunit.rsc_memory_bound: %.2fMB > %.2fMB", - atp->procinfo.working_set_size_smoothed/MEGA, atp->max_mem_usage/MEGA - ); + if (atp->procinfo.working_set_size_smoothed > atp->max_mem_usage) { + sprintf(buf, "working set size > workunit.rsc_memory_bound: %.2fMB > %.2fMB", + atp->procinfo.working_set_size_smoothed/MEGA, atp->max_mem_usage/MEGA + ); msg_printf(atp->result->project, MSG_INFO, "Aborting task %s: %s", atp->result->name, buf @@ -843,10 +843,10 @@ bool ACTIVE_TASK_SET::check_rsc_limits_exceeded() { continue; } #endif - if (atp->procinfo.working_set_size_smoothed > max_ram) { - sprintf(buf, "working set size > client RAM limit: %.2fMB > %.2fMB", - atp->procinfo.working_set_size_smoothed/MEGA, max_ram/MEGA - ); + if (atp->procinfo.working_set_size_smoothed > max_ram) { + sprintf(buf, "working set size > client RAM limit: %.2fMB > %.2fMB", + atp->procinfo.working_set_size_smoothed/MEGA, max_ram/MEGA + ); msg_printf(atp->result->project, MSG_INFO, "Aborting task %s: %s", atp->result->name, buf @@ -856,10 +856,10 @@ bool ACTIVE_TASK_SET::check_rsc_limits_exceeded() { continue; } if (do_disk_check || atp->peak_disk_usage == 0) { - if (atp->check_max_disk_exceeded()) { - did_anything = true; - continue; - } + if (atp->check_max_disk_exceeded()) { + did_anything = true; + continue; + } } // don't count RAM usage of non-CPU-intensive jobs @@ -1441,17 +1441,17 @@ void ACTIVE_TASK_SET::get_msgs() { } last_time = gstate.now; - double et_diff, et_diff_throttle; - switch (gstate.suspend_reason) { - case 0: - case SUSPEND_REASON_CPU_THROTTLE: - et_diff = delta_t; - et_diff_throttle = delta_t * gstate.global_prefs.cpu_usage_limit/100; - break; - default: - et_diff = et_diff_throttle = 0; - break; - } + double et_diff, et_diff_throttle; + switch (gstate.suspend_reason) { + case 0: + case SUSPEND_REASON_CPU_THROTTLE: + et_diff = delta_t; + et_diff_throttle = delta_t * gstate.global_prefs.cpu_usage_limit/100; + break; + default: + et_diff = et_diff_throttle = 0; + break; + } for (i=0; iproject, MSG_INFO, - "writing app_init.xml for %s; slot %d rt %s gpu_device_num %d", result->name, slot, aid.gpu_type, aid.gpu_device_num - ); + msg_printf(wup->project, MSG_INFO, + "writing app_init.xml for %s; slot %d rt %s gpu_device_num %d", result->name, slot, aid.gpu_type, aid.gpu_device_num + ); #endif sprintf(init_data_path, "%s/%s", slot_dir, INIT_DATA_FILE); @@ -1152,11 +1152,11 @@ int ACTIVE_TASK::resume_or_start(bool first_time) { return 0; } if (log_flags.task && first_time) { - msg_printf(result->project, MSG_INFO, - "Starting task %s", result->name - ); - } - if (log_flags.cpu_sched) { + msg_printf(result->project, MSG_INFO, + "Starting task %s", result->name + ); + } + if (log_flags.cpu_sched) { char buf[256]; strcpy(buf, ""); if (strlen(app_version->plan_class)) { diff --git a/client/client_state.cpp b/client/client_state.cpp index 7db3fd29cb..38582b50bb 100644 --- a/client/client_state.cpp +++ b/client/client_state.cpp @@ -113,7 +113,7 @@ CLIENT_STATE::CLIENT_STATE() #else core_client_version.prerelease = false; #endif - strcpy(language, ""); + strcpy(language, ""); exit_after_app_start_secs = 0; app_started = 0; exit_before_upload = false; diff --git a/client/client_state.h b/client/client_state.h index 9d68ea7355..2bcb395eec 100644 --- a/client/client_state.h +++ b/client/client_state.h @@ -109,7 +109,7 @@ struct CLIENT_STATE { DEVICE_STATUS device_status; double device_status_time; - char language[16]; // ISO language code reported by GUI + char language[16]; // ISO language code reported by GUI VERSION_INFO core_client_version; string statefile_platform_name; int file_xfer_giveup_period; diff --git a/client/client_types.cpp b/client/client_types.cpp index b1dbbc63f8..6bf31a21d0 100644 --- a/client/client_types.cpp +++ b/client/client_types.cpp @@ -759,7 +759,7 @@ int APP_VERSION::parse(XML_PARSER& xp) { if (xp.match_tag("/app_version")) { rt = gpu_usage.rsc_type; if (rt) { - dont_throttle = true; // don't throttle GPU apps + dont_throttle = true; // don't throttle GPU apps if (strstr(plan_class, "opencl")) { if (!coprocs.coprocs[rt].have_opencl) { msg_printf(0, MSG_INFO, diff --git a/client/cpu_sched.cpp b/client/cpu_sched.cpp index 853a6f34b9..da46e9abd1 100644 --- a/client/cpu_sched.cpp +++ b/client/cpu_sched.cpp @@ -135,13 +135,13 @@ struct PROC_RESOURCES { bool can_schedule(RESULT* rp, ACTIVE_TASK* atp) { if (max_concurrent_exceeded(rp)) return false; if (atp) { - // don't schedule if something's pending - // - switch (atp->task_state()) { - case PROCESS_ABORT_PENDING: - case PROCESS_QUIT_PENDING: - return false; - } + // don't schedule if something's pending + // + switch (atp->task_state()) { + case PROCESS_ABORT_PENDING: + case PROCESS_QUIT_PENDING: + return false; + } if (gstate.retry_shmem_time > gstate.now) { if (atp->app_client_shm.shm == NULL) { if (log_flags.cpu_sched_debug) { @@ -435,9 +435,9 @@ RESULT* first_coproc_result(int rsc_type) { RESULT* rp = gstate.results[i]; if (rp->resource_type() != rsc_type) continue; if (!rp->runnable()) { - //msg_printf(rp->project, MSG_INFO, "not runnable: %s", rp->name); - continue; - } + //msg_printf(rp->project, MSG_INFO, "not runnable: %s", rp->name); + continue; + } if (rp->non_cpu_intensive()) continue; if (rp->already_selected) continue; prio = rp->project->sched_priority; @@ -1119,10 +1119,10 @@ static inline void increment_pending_usage( int j = rp->coproc_indices[i]; cp->pending_usage[j] += x; if (log_flags.coproc_debug) { - msg_printf(rp->project, MSG_INFO, - "[coproc] %s instance %d; %f pending for %s", cp->type, i, x, rp->name - ); - if (cp->pending_usage[j] > 1) { + msg_printf(rp->project, MSG_INFO, + "[coproc] %s instance %d; %f pending for %s", cp->type, i, x, rp->name + ); + if (cp->pending_usage[j] > 1) { msg_printf(rp->project, MSG_INFO, "[coproc] huh? %s %d %s pending usage > 1", cp->type, i, rp->name @@ -1406,8 +1406,8 @@ static inline void assign_coprocs(vector& jobs) { ACTIVE_TASK* atp = gstate.lookup_active_task_by_result(rp); if (!atp) continue; if (is_gpu_task_running(atp)) { - increment_pending_usage(rp, usage, cp); - } + increment_pending_usage(rp, usage, cp); + } } vector::iterator job_iter; @@ -1783,14 +1783,14 @@ bool CLIENT_STATE::enforce_run_list(vector& run_list) { atp->preempt(preempt_type); break; case PROCESS_SUSPENDED: - // remove from memory GPU jobs that were suspended by CPU throttling - // and are now unscheduled. - // - if (atp->result->uses_coprocs()) { - atp->preempt(REMOVE_ALWAYS); - request_schedule_cpus("removed suspended GPU task"); - break; - } + // remove from memory GPU jobs that were suspended by CPU throttling + // and are now unscheduled. + // + if (atp->result->uses_coprocs()) { + atp->preempt(REMOVE_ALWAYS); + request_schedule_cpus("removed suspended GPU task"); + break; + } // Handle the case where user changes prefs from // "leave in memory" to "remove from memory"; diff --git a/client/cs_account.cpp b/client/cs_account.cpp index 9ee72aed36..74a2c028d6 100644 --- a/client/cs_account.cpp +++ b/client/cs_account.cpp @@ -100,7 +100,7 @@ int PROJECT::parse_account(FILE* in) { char buf2[256]; int retval; bool in_project_prefs = false, btemp; - double dtemp; + double dtemp; for (int i=0; ishow_no_work_notice(); diff --git a/client/cs_statefile.cpp b/client/cs_statefile.cpp index 083b95683d..a353d194dd 100644 --- a/client/cs_statefile.cpp +++ b/client/cs_statefile.cpp @@ -474,9 +474,9 @@ int CLIENT_STATE::parse_state_file_aux(const char* fname) { if (xp.parse_int("core_client_release", old_release)) { continue; } - if (xp.parse_str("language", language, sizeof(language))) { - continue; - } + if (xp.parse_str("language", language, sizeof(language))) { + continue; + } if (xp.match_tag("proxy_info")) { retval = gui_proxy_info.parse(xp); if (retval) { @@ -764,9 +764,9 @@ int CLIENT_STATE::write_state(MIOFILE& f) { new_version_check_time, all_projects_list_check_time ); - if (strlen(language)) { - f.printf("%s\n", language); - } + if (strlen(language)) { + f.printf("%s\n", language); + } if (newer_version.size()) { f.printf("%s\n", newer_version.c_str()); } diff --git a/client/gpu_nvidia.cpp b/client/gpu_nvidia.cpp index ce705a464f..215ea6562c 100644 --- a/client/gpu_nvidia.cpp +++ b/client/gpu_nvidia.cpp @@ -213,14 +213,14 @@ void COPROC_NVIDIA::get( NvAPI_SYS_GetDriverAndBranchVersion(&Version, ss); #if 0 - // NvAPI now provides an API for getting #cores :-) - // But not FLOPs per clock cycle :-( - // Anyway, don't use this for now because server code estimates FLOPS - // based on compute capability, so we may as well do the same - // + // NvAPI now provides an API for getting #cores :-) + // But not FLOPs per clock cycle :-( + // Anyway, don't use this for now because server code estimates FLOPS + // based on compute capability, so we may as well do the same + // NvPhysicalGpuHandle GPUHandle[NVAPI_MAX_PHYSICAL_GPUS]; NvU32 GpuCount, nc; - NvAPI_EnumPhysicalGPUs(GPUHandle, &GpuCount); + NvAPI_EnumPhysicalGPUs(GPUHandle, &GpuCount); for (unsigned int i=0; i\n"); - return; - } - } - grc.mfout.printf("no language found\n"); + gstate.set_client_state_dirty("set_language"); + grc.mfout.printf("\n"); + return; + } + } + grc.mfout.printf("no language found\n"); } static void handle_report_device_status(GUI_RPC_CONN& grc) { diff --git a/client/http_curl.cpp b/client/http_curl.cpp index 56c0aee198..c0c113bd35 100644 --- a/client/http_curl.cpp +++ b/client/http_curl.cpp @@ -590,10 +590,10 @@ int HTTP_OP::libcurl_exec( // pcurlList = curl_slist_append(pcurlList, g_content_type); - if (strlen(gstate.language)) { - sprintf(buf, "Accept-Language: %s", gstate.language); - pcurlList = curl_slist_append(pcurlList, buf); - } + if (strlen(gstate.language)) { + sprintf(buf, "Accept-Language: %s", gstate.language); + pcurlList = curl_slist_append(pcurlList, buf); + } // set the file offset for resumable downloads // diff --git a/client/mac_address.cpp b/client/mac_address.cpp index 7b5a8b6f7a..0c2cc32198 100644 --- a/client/mac_address.cpp +++ b/client/mac_address.cpp @@ -149,27 +149,27 @@ GetMACAddress(io_iterator_t intfIterator, char* buffer) int get_mac_address(char* address) { #if defined(_WIN32) IP_ADAPTER_INFO AdapterInfo[16]; // Allocate information for up to 16 NICs - DWORD dwBufLen = sizeof(AdapterInfo); // Save memory size of buffer - // Call GetAdapterInfo - DWORD dwStatus = GetAdaptersInfo(AdapterInfo, &dwBufLen); + DWORD dwBufLen = sizeof(AdapterInfo); // Save memory size of buffer + // Call GetAdapterInfo + DWORD dwStatus = GetAdaptersInfo(AdapterInfo, &dwBufLen); - if(dwStatus != ERROR_SUCCESS) { - return -1; - } - strcpy(address, ""); - PIP_ADAPTER_INFO pAdapterInfo = AdapterInfo; // Contains pointer to current adapter info - while (pAdapterInfo) { - sprintf(address, "%02x:%02x:%02x:%02x:%02x:%02x", - pAdapterInfo->Address[0], pAdapterInfo->Address[1], pAdapterInfo->Address[2], - pAdapterInfo->Address[3], pAdapterInfo->Address[4], pAdapterInfo->Address[5] - ); - if (pAdapterInfo->Type == MIB_IF_TYPE_ETHERNET) break; - pAdapterInfo = pAdapterInfo->Next; - } - return 0; + if(dwStatus != ERROR_SUCCESS) { + return -1; + } + strcpy(address, ""); + PIP_ADAPTER_INFO pAdapterInfo = AdapterInfo; // Contains pointer to current adapter info + while (pAdapterInfo) { + sprintf(address, "%02x:%02x:%02x:%02x:%02x:%02x", + pAdapterInfo->Address[0], pAdapterInfo->Address[1], pAdapterInfo->Address[2], + pAdapterInfo->Address[3], pAdapterInfo->Address[4], pAdapterInfo->Address[5] + ); + if (pAdapterInfo->Type == MIB_IF_TYPE_ETHERNET) break; + pAdapterInfo = pAdapterInfo->Next; + } + return 0; #elif defined(__APPLE__) - kern_return_t kernResult = KERN_SUCCESS; // on PowerPC this is an int (4 bytes) + kern_return_t kernResult = KERN_SUCCESS; // on PowerPC this is an int (4 bytes) /* * error number layout as follows (see mach/error.h and IOKitLib/IOReturn.h): * @@ -177,21 +177,21 @@ int get_mac_address(char* address) { * | system(6) | subsystem(12) | code(14) | */ io_iterator_t intfIterator; - int retval = 0; + int retval = 0; kernResult = FindEthernetInterfaces(&intfIterator); if (KERN_SUCCESS != kernResult) { - fprintf(stderr, "FindEthernetInterfaces returned 0x%08x\n", kernResult); - retval = -1; - } else { + fprintf(stderr, "FindEthernetInterfaces returned 0x%08x\n", kernResult); + retval = -1; + } else { kernResult = GetMACAddress(intfIterator, address); if (KERN_SUCCESS != kernResult) { - fprintf(stderr, "GetMACAddress returned 0x%08x\n", kernResult); - retval = -1; - } + fprintf(stderr, "GetMACAddress returned 0x%08x\n", kernResult); + retval = -1; + } } IOObjectRelease(intfIterator); - return retval; + return retval; #elif defined(SIOCGIFCONF) || defined(SIOCGLIFCONF) char buf[1024]; @@ -235,7 +235,7 @@ int get_mac_address(char* address) { ifr = ifc.lifc_req; nInterfaces = ifc.lifc_len / sizeof(struct lifreq); #else - ifr = ifc.ifc_req; + ifr = ifc.ifc_req; nInterfaces = ifc.ifc_len / sizeof(struct ifreq); #endif strcpy(address, ""); diff --git a/client/mac_address.h b/client/mac_address.h index ae9833d38e..d4a1727052 100644 --- a/client/mac_address.h +++ b/client/mac_address.h @@ -21,7 +21,7 @@ // Get the MAC address of a network interface. // If there's more than one, prefer eth0 // Note: the code on Mac OS X requires the following linkage flags -// -framework CoreFoundation -lIOKit +// -framework CoreFoundation -lIOKit // int get_mac_address(char* address); diff --git a/client/project.cpp b/client/project.cpp index 33a6867627..ea3968f5f0 100644 --- a/client/project.cpp +++ b/client/project.cpp @@ -917,7 +917,7 @@ void PROJECT::show_no_work_notice() { bool banned_by_user = no_rsc_pref[i] || no_rsc_config[i] || no_rsc_ams[i]; if (!banned_by_user) { // work for this resource is possible; return - notices.remove_notices(this, REMOVE_NO_WORK_MSG); + notices.remove_notices(this, REMOVE_NO_WORK_MSG); return; } if (no_rsc_pref[i]) show_prefs = true; @@ -928,14 +928,14 @@ void PROJECT::show_no_work_notice() { if (!user_action_possible) { // no work is possible because project has no apps for any resource // - notices.remove_notices(this, REMOVE_NO_WORK_MSG); + notices.remove_notices(this, REMOVE_NO_WORK_MSG); return; } bool first = true; string x; x = NO_WORK_MSG; - x += " "; + x += " "; x += _("To fix this, you can "); if (show_prefs) { first = false; diff --git a/client/project.h b/client/project.h index 014d5e14be..43258f4945 100644 --- a/client/project.h +++ b/client/project.h @@ -327,12 +327,12 @@ struct PROJECT : PROJ_AM { no_rsc_ams[i] = false; } - ams_resource_share = -1; + ams_resource_share = -1; - // parse the account file to get right resource share - // in case AMS had set it - // - parse_account_file(); + // parse the account file to get right resource share + // in case AMS had set it + // + parse_account_file(); } #ifdef SIM diff --git a/client/work_fetch.cpp b/client/work_fetch.cpp index cc1e430287..18e7eba0d5 100644 --- a/client/work_fetch.cpp +++ b/client/work_fetch.cpp @@ -528,7 +528,7 @@ void WORK_FETCH::piggyback_work_request(PROJECT* p) { for (unsigned int j=0; jsched_priority == p->sched_priority) continue; + if (p2->sched_priority == p->sched_priority) continue; if (p2->pwf.cant_fetch_work_reason) { DEBUG(msg_printf(p, MSG_INFO, "piggyback: %s can't fetch work", p2->project_name);) continue; From 72d1369342856df6a35d709c4fe4bac9964be5cc Mon Sep 17 00:00:00 2001 From: David Anderson Date: Thu, 1 May 2014 23:53:55 -0700 Subject: [PATCH 3/6] client: code shuffle; move GPU scheduling code to new file --- client/Makefile.am | 1 + client/app.h | 6 + client/coproc_sched.cpp | 448 ++++++++++++++++++++++++++++++++++++++++ client/coproc_sched.h | 22 ++ client/cpu_sched.cpp | 430 +------------------------------------- 5 files changed, 479 insertions(+), 428 deletions(-) create mode 100644 client/coproc_sched.cpp create mode 100644 client/coproc_sched.h diff --git a/client/Makefile.am b/client/Makefile.am index 9144c6a901..693432ab72 100644 --- a/client/Makefile.am +++ b/client/Makefile.am @@ -42,6 +42,7 @@ boinc_client_SOURCES = \ client_msgs.cpp \ client_state.cpp \ client_types.cpp \ + coproc_sched.cpp \ cpu_sched.cpp \ cs_account.cpp \ cs_apps.cpp \ diff --git a/client/app.h b/client/app.h index c18f037655..8221f8c7fe 100644 --- a/client/app.h +++ b/client/app.h @@ -232,6 +232,12 @@ struct ACTIVE_TASK { int abort_task(int exit_status, const char*); // can be called whether or not process exists + // is the GPU task running or suspended (due to CPU throttling) + // + inline bool is_gpu_task_running() { + int s = task_state(); + return s == PROCESS_EXECUTING || s == PROCESS_SUSPENDED; + } // Implementation stuff related to termination // diff --git a/client/coproc_sched.cpp b/client/coproc_sched.cpp new file mode 100644 index 0000000000..4d04562318 --- /dev/null +++ b/client/coproc_sched.cpp @@ -0,0 +1,448 @@ +// This file is part of BOINC. +// http://boinc.berkeley.edu +// Copyright (C) 2014 University of California +// +// BOINC is free software; you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License +// as published by the Free Software Foundation, +// either version 3 of the License, or (at your option) any later version. +// +// BOINC is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with BOINC. If not, see . + +#include + +#include "client_msgs.h" +#include "client_state.h" +#include "client_types.h" +#include "coproc.h" +#include "result.h" + +#include "coproc_sched.h" + +using std::vector; + +////////// Coprocessor scheduling //////////////// +// +// theory of operation: +// +// Jobs can use one or more integral instances, or a fractional instance +// +// RESULT::coproc_indices +// for a running job, the coprocessor instances it's using +// COPROC::pending_usage[]: for each instance, its usage by running jobs +// Note: "running" includes jobs suspended due to CPU throttling. +// That's the only kind of suspended GPU job. +// CORPOC::usage[]: for each instance, its usage +// +// enforce_run_list() calls assign_coprocs(), +// which assigns coproc instances to scheduled jobs, +// and prunes jobs for which we can't make an assignment +// (the job list is in order of decreasing priority) +// +// assign_coprocs(): +// clear usage and pending_usage of all instances +// for each running/suspended job J +// increment pending_usage for the instances assigned to J +// for each scheduled job J +// if J is running +// if J's assignment fits +// confirm assignment: dec pending_usage, inc usage +// else +// prune J +// else +// if J.usage is fractional +// look for an instance that's already fractionally assigned +// if that fails, look for a free instance +// if that fails, prune J +// else +// if there are enough instances with usage=0 +// assign instances with pending_usage = usage = 0 +// (avoid preempting running jobs) +// if need more, assign instances with usage = 0 +// else +// prune J + +static inline void increment_pending_usage( + RESULT* rp, double usage, COPROC* cp +) { + double x = (usage<1)?usage:1; + for (int i=0; icoproc_indices[i]; + cp->pending_usage[j] += x; + if (log_flags.coproc_debug) { + msg_printf(rp->project, MSG_INFO, + "[coproc] %s instance %d; %f pending for %s", cp->type, i, x, rp->name + ); + if (cp->pending_usage[j] > 1) { + msg_printf(rp->project, MSG_INFO, + "[coproc] huh? %s %d %s pending usage > 1", + cp->type, i, rp->name + ); + } + } + } +} + +// check the GPU assignment for a currently-running app. +// Note: don't check available RAM. +// It may not be known (e.g. NVIDIA) and in any case, +// if the app is still running, it has enough RAM +// +static inline bool current_assignment_ok( + RESULT* rp, double usage, COPROC* cp, bool& defer_sched +) { + defer_sched = false; + double x = (usage<1)?usage:1; + for (int i=0; icoproc_indices[i]; + if (cp->usage[j] + x > 1) { + if (log_flags.coproc_debug) { + msg_printf(rp->project, MSG_INFO, + "[coproc] %s %f instance of device %d already assigned to task %s", + cp->type, x, j, rp->name + ); + } + return false; + } + } + return true; +} + +static inline void confirm_current_assignment( + RESULT* rp, double usage, COPROC* cp +) { + double x = (usage<1)?usage:1; + for (int i=0; icoproc_indices[i]; + cp->usage[j] +=x; + cp->pending_usage[j] -=x; + if (log_flags.coproc_debug) { + msg_printf(rp->project, MSG_INFO, + "[coproc] %s instance %d: confirming %f instance for %s", + cp->type, j, x, rp->name + ); + } +#if DEFER_ON_GPU_AVAIL_RAM + cp->available_ram_temp[j] -= rp->avp->gpu_ram; +#endif + } +} + +static inline bool get_fractional_assignment( + RESULT* rp, double usage, COPROC* cp, bool& defer_sched +) { + int i; + defer_sched = false; + + // try to assign an instance that's already fractionally assigned + // + for (i=0; icount; i++) { + if (gpu_excluded(rp->app, *cp, i)) { + continue; + } + if ((cp->usage[i] || cp->pending_usage[i]) + && (cp->usage[i] + cp->pending_usage[i] + usage <= 1) + ) { +#if DEFER_ON_GPU_AVAIL_RAM + if (rp->avp->gpu_ram > cp->available_ram_temp[i]) { + defer_sched = true; + continue; + } + cp->available_ram_temp[i] -= rp->avp->gpu_ram; +#endif + rp->coproc_indices[0] = i; + cp->usage[i] += usage; + if (log_flags.coproc_debug) { + msg_printf(rp->project, MSG_INFO, + "[coproc] Assigning %f of %s instance %d to %s", + usage, cp->type, i, rp->name + ); + } + return true; + } + } + + // failing that, assign an unreserved instance + // + for (i=0; icount; i++) { + if (gpu_excluded(rp->app, *cp, i)) { + continue; + } + if (!cp->usage[i]) { +#if DEFER_ON_GPU_AVAIL_RAM + if (rp->avp->gpu_ram > cp->available_ram_temp[i]) { + defer_sched = true; + continue; + } + cp->available_ram_temp[i] -= rp->avp->gpu_ram; +#endif + rp->coproc_indices[0] = i; + cp->usage[i] += usage; + if (log_flags.coproc_debug) { + msg_printf(rp->project, MSG_INFO, + "[coproc] Assigning %f of %s free instance %d to %s", + usage, cp->type, i, rp->name + ); + } + return true; + } + } + if (log_flags.coproc_debug) { + msg_printf(rp->project, MSG_INFO, + "[coproc] Insufficient %s for %s: need %f", + cp->type, rp->name, usage + ); + } + + return false; +} + +static inline bool get_integer_assignment( + RESULT* rp, double usage, COPROC* cp, bool& defer_sched +) { + int i; + defer_sched = false; + + // make sure we have enough free instances + // + int nfree = 0; + for (i=0; icount; i++) { + if (gpu_excluded(rp->app, *cp, i)) { + continue; + } + if (!cp->usage[i]) { +#if DEFER_ON_GPU_AVAIL_RAM + if (rp->avp->gpu_ram > cp->available_ram_temp[i]) { + defer_sched = true; + if (log_flags.coproc_debug) { + msg_printf(rp->project, MSG_INFO, + "[coproc] task %s needs %.0fMB RAM, %s GPU %d has %.0fMB available", + rp->name, rp->avp->gpu_ram/MEGA, cp->type, i, cp->available_ram_temp[i]/MEGA + ); + } + continue; + }; +#endif + nfree++; + } + } + if (nfree < usage) { + if (log_flags.coproc_debug) { + msg_printf(rp->project, MSG_INFO, + "[coproc] Insufficient %s for %s; need %d, available %d", + cp->type, rp->name, (int)usage, nfree + ); + if (defer_sched) { + msg_printf(rp->project, MSG_INFO, + "[coproc] some instances lack available memory" + ); + } + } + return false; + } + + int n = 0; + + // assign non-pending instances first + + for (i=0; icount; i++) { + if (gpu_excluded(rp->app, *cp, i)) { + continue; + } + if (!cp->usage[i] + && !cp->pending_usage[i] +#if DEFER_ON_GPU_AVAIL_RAM + && (rp->avp->gpu_ram <= cp->available_ram_temp[i]) +#endif + ) { + cp->usage[i] = 1; +#if DEFER_ON_GPU_AVAIL_RAM + cp->available_ram_temp[i] -= rp->avp->gpu_ram; +#endif + rp->coproc_indices[n++] = i; + if (log_flags.coproc_debug) { + msg_printf(rp->project, MSG_INFO, + "[coproc] Assigning %s instance %d to %s", + cp->type, i, rp->name + ); + } + if (n == usage) return true; + } + } + + // if needed, assign pending instances + + for (i=0; icount; i++) { + if (gpu_excluded(rp->app, *cp, i)) { + continue; + } + if (!cp->usage[i] +#if DEFER_ON_GPU_AVAIL_RAM + && (rp->avp->gpu_ram <= cp->available_ram_temp[i]) +#endif + ) { + cp->usage[i] = 1; +#if DEFER_ON_GPU_AVAIL_RAM + cp->available_ram_temp[i] -= rp->avp->gpu_ram; +#endif + rp->coproc_indices[n++] = i; + if (log_flags.coproc_debug) { + msg_printf(rp->project, MSG_INFO, + "[coproc] Assigning %s pending instance %d to %s", + cp->type, i, rp->name + ); + } + if (n == usage) return true; + } + } + if (log_flags.coproc_debug) { + msg_printf(rp->project, MSG_INFO, + "[coproc] huh??? ran out of %s instances for %s", + cp->type, rp->name + ); + } + return false; +} + +static inline void mark_as_defer_sched(RESULT* rp) { + int i = rp->avp->gpu_usage.rsc_type; + if (i) { + rp->project->rsc_defer_sched[i] = true; + } + rp->schedule_backoff = gstate.now + 300; // try again in 5 minutes + gstate.request_schedule_cpus("insufficient GPU RAM"); +} + +#if DEFER_ON_GPU_AVAIL_RAM +static void copy_available_ram(COPROC& cp, const char* name) { + int rt = rsc_index(name); + if (rt > 0) { + for (int i=0; i& jobs) { + unsigned int i; + COPROC* cp; + double usage; + + coprocs.clear_usage(); +#if DEFER_ON_GPU_AVAIL_RAM + if (coprocs.have_nvidia()) { + copy_available_ram(coprocs.nvidia, GPU_TYPE_NVIDIA); + } + if (coprocs.have_ati()) { + copy_available_ram(coprocs.ati, GPU_TYPE_ATI); + } + if (coprocs.have_intel()) { + copy_available_ram(coprocs.intel_gpu, GPU_TYPE_INTEL); + } +#endif + + // fill in pending usage + // + for (i=0; iavp; + int rt = avp->gpu_usage.rsc_type; + if (rt) { + usage = avp->gpu_usage.usage; + cp = &coprocs.coprocs[rt]; + } else { + continue; + } + ACTIVE_TASK* atp = gstate.lookup_active_task_by_result(rp); + if (!atp) continue; + if (atp->is_gpu_task_running()) { + increment_pending_usage(rp, usage, cp); + } + } + + vector::iterator job_iter; + job_iter = jobs.begin(); + while (job_iter != jobs.end()) { + RESULT* rp = *job_iter; + APP_VERSION* avp = rp->avp; + int rt = avp->gpu_usage.rsc_type; + if (rt) { + usage = avp->gpu_usage.usage; + cp = &coprocs.coprocs[rt]; + } else { + job_iter++; + continue; + } + + ACTIVE_TASK* atp = gstate.lookup_active_task_by_result(rp); + bool defer_sched; + if (atp && atp->is_gpu_task_running()) { + if (current_assignment_ok(rp, usage, cp, defer_sched)) { + confirm_current_assignment(rp, usage, cp); + job_iter++; + } else { + if (defer_sched) { + mark_as_defer_sched(rp); + } + job_iter = jobs.erase(job_iter); + } + } else { + if (usage < 1) { + if (get_fractional_assignment(rp, usage, cp, defer_sched)) { + job_iter++; + } else { + if (defer_sched) { + mark_as_defer_sched(rp); + } + job_iter = jobs.erase(job_iter); + } + } else { + if (get_integer_assignment(rp, usage, cp, defer_sched)) { + job_iter++; + } else { + if (defer_sched) { + mark_as_defer_sched(rp); + } + job_iter = jobs.erase(job_iter); + } + } + } + } + +#if 0 + // enforce "don't use GPUs while active" pref in NVIDIA case; + // it applies only to GPUs running a graphics app + // + if (gstate.host_info.coprocs.nvidia.count && gstate.user_active && !gstate.global_prefs.run_gpu_if_user_active) { + job_iter = jobs.begin(); + while (job_iter != jobs.end()) { + RESULT* rp = *job_iter; + if (!rp->avp->ncudas) { + job_iter++; + continue; + } + ACTIVE_TASK* atp = gstate.lookup_active_task_by_result(rp); + bool some_gpu_busy = false; + for (i=0; iavp->ncudas; i++) { + int dev = atp->coproc_indices[i]; + if (gstate.host_info.coprocs.cuda.running_graphics_app[dev]) { + some_gpu_busy = true; + break; + } + } + if (some_gpu_busy) { + job_iter = jobs.erase(job_iter); + } else { + job_iter++; + } + } + } +#endif +} diff --git a/client/coproc_sched.h b/client/coproc_sched.h new file mode 100644 index 0000000000..4525558cb9 --- /dev/null +++ b/client/coproc_sched.h @@ -0,0 +1,22 @@ +// This file is part of BOINC. +// http://boinc.berkeley.edu +// Copyright (C) 2014 University of California +// +// BOINC is free software; you can redistribute it and/or modify it +// under the terms of the GNU Lesser General Public License +// as published by the Free Software Foundation, +// either version 3 of the License, or (at your option) any later version. +// +// BOINC is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// See the GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with BOINC. If not, see . + +#include + +struct RESULT; + +extern void assign_coprocs(std::vector& jobs); diff --git a/client/cpu_sched.cpp b/client/cpu_sched.cpp index da46e9abd1..5634f944a1 100644 --- a/client/cpu_sched.cpp +++ b/client/cpu_sched.cpp @@ -74,6 +74,7 @@ #include "app_config.h" #include "client_msgs.h" #include "client_state.h" +#include "coproc_sched.h" #include "log_flags.h" #include "project.h" #include "result.h" @@ -84,13 +85,6 @@ using std::list; static double rec_sum; -// is the GPU task running or suspended (due to CPU throttling) -// -static inline bool is_gpu_task_running(ACTIVE_TASK* atp) { - int s = atp->task_state(); - return s == PROCESS_EXECUTING || s == PROCESS_SUSPENDED; -} - // used in make_run_list() to keep track of resources used // by jobs tentatively scheduled so far // @@ -192,7 +186,7 @@ struct PROC_RESOURCES { bool dont_reserve = rsc_work_fetch[rt].has_exclusions && atp != NULL - && is_gpu_task_running(atp); + && atp->is_gpu_task_running(); if (!dont_reserve) { reserve_coprocs(*rp); } @@ -1070,426 +1064,6 @@ void CLIENT_STATE::append_unfinished_time_slice(vector &run_list) { } } -////////// Coprocessor scheduling //////////////// -// -// theory of operation: -// -// Jobs can use one or more integral instances, or a fractional instance -// -// RESULT::coproc_indices -// for a running job, the coprocessor instances it's using -// COPROC::pending_usage[]: for each instance, its usage by running jobs -// Note: "running" includes jobs suspended due to CPU throttling. -// That's the only kind of suspended GPU job. -// CORPOC::usage[]: for each instance, its usage -// -// enforce_run_list() calls assign_coprocs(), -// which assigns coproc instances to scheduled jobs, -// and prunes jobs for which we can't make an assignment -// (the job list is in order of decreasing priority) -// -// assign_coprocs(): -// clear usage and pending_usage of all instances -// for each running/suspended job J -// increment pending_usage for the instances assigned to J -// for each scheduled job J -// if J is running -// if J's assignment fits -// confirm assignment: dec pending_usage, inc usage -// else -// prune J -// else -// if J.usage is fractional -// look for an instance that's already fractionally assigned -// if that fails, look for a free instance -// if that fails, prune J -// else -// if there are enough instances with usage=0 -// assign instances with pending_usage = usage = 0 -// (avoid preempting running jobs) -// if need more, assign instances with usage = 0 -// else -// prune J - -static inline void increment_pending_usage( - RESULT* rp, double usage, COPROC* cp -) { - double x = (usage<1)?usage:1; - for (int i=0; icoproc_indices[i]; - cp->pending_usage[j] += x; - if (log_flags.coproc_debug) { - msg_printf(rp->project, MSG_INFO, - "[coproc] %s instance %d; %f pending for %s", cp->type, i, x, rp->name - ); - if (cp->pending_usage[j] > 1) { - msg_printf(rp->project, MSG_INFO, - "[coproc] huh? %s %d %s pending usage > 1", - cp->type, i, rp->name - ); - } - } - } -} - -// check the GPU assignment for a currently-running app. -// Note: don't check available RAM. -// It may not be known (e.g. NVIDIA) and in any case, -// if the app is still running, it has enough RAM -// -static inline bool current_assignment_ok( - RESULT* rp, double usage, COPROC* cp, bool& defer_sched -) { - defer_sched = false; - double x = (usage<1)?usage:1; - for (int i=0; icoproc_indices[i]; - if (cp->usage[j] + x > 1) { - if (log_flags.coproc_debug) { - msg_printf(rp->project, MSG_INFO, - "[coproc] %s %f instance of device %d already assigned to task %s", - cp->type, x, j, rp->name - ); - } - return false; - } - } - return true; -} - -static inline void confirm_current_assignment( - RESULT* rp, double usage, COPROC* cp -) { - double x = (usage<1)?usage:1; - for (int i=0; icoproc_indices[i]; - cp->usage[j] +=x; - cp->pending_usage[j] -=x; - if (log_flags.coproc_debug) { - msg_printf(rp->project, MSG_INFO, - "[coproc] %s instance %d: confirming %f instance for %s", - cp->type, j, x, rp->name - ); - } -#if DEFER_ON_GPU_AVAIL_RAM - cp->available_ram_temp[j] -= rp->avp->gpu_ram; -#endif - } -} - -static inline bool get_fractional_assignment( - RESULT* rp, double usage, COPROC* cp, bool& defer_sched -) { - int i; - defer_sched = false; - - // try to assign an instance that's already fractionally assigned - // - for (i=0; icount; i++) { - if (gpu_excluded(rp->app, *cp, i)) { - continue; - } - if ((cp->usage[i] || cp->pending_usage[i]) - && (cp->usage[i] + cp->pending_usage[i] + usage <= 1) - ) { -#if DEFER_ON_GPU_AVAIL_RAM - if (rp->avp->gpu_ram > cp->available_ram_temp[i]) { - defer_sched = true; - continue; - } - cp->available_ram_temp[i] -= rp->avp->gpu_ram; -#endif - rp->coproc_indices[0] = i; - cp->usage[i] += usage; - if (log_flags.coproc_debug) { - msg_printf(rp->project, MSG_INFO, - "[coproc] Assigning %f of %s instance %d to %s", - usage, cp->type, i, rp->name - ); - } - return true; - } - } - - // failing that, assign an unreserved instance - // - for (i=0; icount; i++) { - if (gpu_excluded(rp->app, *cp, i)) { - continue; - } - if (!cp->usage[i]) { -#if DEFER_ON_GPU_AVAIL_RAM - if (rp->avp->gpu_ram > cp->available_ram_temp[i]) { - defer_sched = true; - continue; - } - cp->available_ram_temp[i] -= rp->avp->gpu_ram; -#endif - rp->coproc_indices[0] = i; - cp->usage[i] += usage; - if (log_flags.coproc_debug) { - msg_printf(rp->project, MSG_INFO, - "[coproc] Assigning %f of %s free instance %d to %s", - usage, cp->type, i, rp->name - ); - } - return true; - } - } - if (log_flags.coproc_debug) { - msg_printf(rp->project, MSG_INFO, - "[coproc] Insufficient %s for %s: need %f", - cp->type, rp->name, usage - ); - } - - return false; -} - -static inline bool get_integer_assignment( - RESULT* rp, double usage, COPROC* cp, bool& defer_sched -) { - int i; - defer_sched = false; - - // make sure we have enough free instances - // - int nfree = 0; - for (i=0; icount; i++) { - if (gpu_excluded(rp->app, *cp, i)) { - continue; - } - if (!cp->usage[i]) { -#if DEFER_ON_GPU_AVAIL_RAM - if (rp->avp->gpu_ram > cp->available_ram_temp[i]) { - defer_sched = true; - if (log_flags.coproc_debug) { - msg_printf(rp->project, MSG_INFO, - "[coproc] task %s needs %.0fMB RAM, %s GPU %d has %.0fMB available", - rp->name, rp->avp->gpu_ram/MEGA, cp->type, i, cp->available_ram_temp[i]/MEGA - ); - } - continue; - }; -#endif - nfree++; - } - } - if (nfree < usage) { - if (log_flags.coproc_debug) { - msg_printf(rp->project, MSG_INFO, - "[coproc] Insufficient %s for %s; need %d, available %d", - cp->type, rp->name, (int)usage, nfree - ); - if (defer_sched) { - msg_printf(rp->project, MSG_INFO, - "[coproc] some instances lack available memory" - ); - } - } - return false; - } - - int n = 0; - - // assign non-pending instances first - - for (i=0; icount; i++) { - if (gpu_excluded(rp->app, *cp, i)) { - continue; - } - if (!cp->usage[i] - && !cp->pending_usage[i] -#if DEFER_ON_GPU_AVAIL_RAM - && (rp->avp->gpu_ram <= cp->available_ram_temp[i]) -#endif - ) { - cp->usage[i] = 1; -#if DEFER_ON_GPU_AVAIL_RAM - cp->available_ram_temp[i] -= rp->avp->gpu_ram; -#endif - rp->coproc_indices[n++] = i; - if (log_flags.coproc_debug) { - msg_printf(rp->project, MSG_INFO, - "[coproc] Assigning %s instance %d to %s", - cp->type, i, rp->name - ); - } - if (n == usage) return true; - } - } - - // if needed, assign pending instances - - for (i=0; icount; i++) { - if (gpu_excluded(rp->app, *cp, i)) { - continue; - } - if (!cp->usage[i] -#if DEFER_ON_GPU_AVAIL_RAM - && (rp->avp->gpu_ram <= cp->available_ram_temp[i]) -#endif - ) { - cp->usage[i] = 1; -#if DEFER_ON_GPU_AVAIL_RAM - cp->available_ram_temp[i] -= rp->avp->gpu_ram; -#endif - rp->coproc_indices[n++] = i; - if (log_flags.coproc_debug) { - msg_printf(rp->project, MSG_INFO, - "[coproc] Assigning %s pending instance %d to %s", - cp->type, i, rp->name - ); - } - if (n == usage) return true; - } - } - if (log_flags.coproc_debug) { - msg_printf(rp->project, MSG_INFO, - "[coproc] huh??? ran out of %s instances for %s", - cp->type, rp->name - ); - } - return false; -} - -static inline void mark_as_defer_sched(RESULT* rp) { - int i = rp->avp->gpu_usage.rsc_type; - if (i) { - rp->project->rsc_defer_sched[i] = true; - } - rp->schedule_backoff = gstate.now + 300; // try again in 5 minutes - gstate.request_schedule_cpus("insufficient GPU RAM"); -} - -#if DEFER_ON_GPU_AVAIL_RAM -static void copy_available_ram(COPROC& cp, const char* name) { - int rt = rsc_index(name); - if (rt > 0) { - for (int i=0; i& jobs) { - unsigned int i; - COPROC* cp; - double usage; - - coprocs.clear_usage(); -#if DEFER_ON_GPU_AVAIL_RAM - if (coprocs.have_nvidia()) { - copy_available_ram(coprocs.nvidia, GPU_TYPE_NVIDIA); - } - if (coprocs.have_ati()) { - copy_available_ram(coprocs.ati, GPU_TYPE_ATI); - } - if (coprocs.have_intel()) { - copy_available_ram(coprocs.intel_gpu, GPU_TYPE_INTEL); - } -#endif - - // fill in pending usage - // - for (i=0; iavp; - int rt = avp->gpu_usage.rsc_type; - if (rt) { - usage = avp->gpu_usage.usage; - cp = &coprocs.coprocs[rt]; - } else { - continue; - } - ACTIVE_TASK* atp = gstate.lookup_active_task_by_result(rp); - if (!atp) continue; - if (is_gpu_task_running(atp)) { - increment_pending_usage(rp, usage, cp); - } - } - - vector::iterator job_iter; - job_iter = jobs.begin(); - while (job_iter != jobs.end()) { - RESULT* rp = *job_iter; - APP_VERSION* avp = rp->avp; - int rt = avp->gpu_usage.rsc_type; - if (rt) { - usage = avp->gpu_usage.usage; - cp = &coprocs.coprocs[rt]; - } else { - job_iter++; - continue; - } - - ACTIVE_TASK* atp = gstate.lookup_active_task_by_result(rp); - bool defer_sched; - if (atp && is_gpu_task_running(atp)) { - if (current_assignment_ok(rp, usage, cp, defer_sched)) { - confirm_current_assignment(rp, usage, cp); - job_iter++; - } else { - if (defer_sched) { - mark_as_defer_sched(rp); - } - job_iter = jobs.erase(job_iter); - } - } else { - if (usage < 1) { - if (get_fractional_assignment(rp, usage, cp, defer_sched)) { - job_iter++; - } else { - if (defer_sched) { - mark_as_defer_sched(rp); - } - job_iter = jobs.erase(job_iter); - } - } else { - if (get_integer_assignment(rp, usage, cp, defer_sched)) { - job_iter++; - } else { - if (defer_sched) { - mark_as_defer_sched(rp); - } - job_iter = jobs.erase(job_iter); - } - } - } - } - -#if 0 - // enforce "don't use GPUs while active" pref in NVIDIA case; - // it applies only to GPUs running a graphics app - // - if (gstate.host_info.coprocs.nvidia.count && gstate.user_active && !gstate.global_prefs.run_gpu_if_user_active) { - job_iter = jobs.begin(); - while (job_iter != jobs.end()) { - RESULT* rp = *job_iter; - if (!rp->avp->ncudas) { - job_iter++; - continue; - } - ACTIVE_TASK* atp = gstate.lookup_active_task_by_result(rp); - bool some_gpu_busy = false; - for (i=0; iavp->ncudas; i++) { - int dev = atp->coproc_indices[i]; - if (gstate.host_info.coprocs.cuda.running_graphics_app[dev]) { - some_gpu_busy = true; - break; - } - } - if (some_gpu_busy) { - job_iter = jobs.erase(job_iter); - } else { - job_iter++; - } - } - } -#endif -} - // Enforce the CPU schedule. // Inputs: // ordered_scheduled_results From bedeeabf7e81d6f3a4873a3a253083e1ddb5deb6 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Thu, 1 May 2014 23:56:06 -0700 Subject: [PATCH 4/6] add coproc_sched.cpp to project file --- win_build/boinc_cli.vcxproj | 2 ++ 1 file changed, 2 insertions(+) diff --git a/win_build/boinc_cli.vcxproj b/win_build/boinc_cli.vcxproj index fed5f278a9..fab5322a5d 100644 --- a/win_build/boinc_cli.vcxproj +++ b/win_build/boinc_cli.vcxproj @@ -349,6 +349,7 @@ + @@ -413,6 +414,7 @@ + From 6179a2493b10c0ab2e9673cd3698e524145ad34d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rytis=20Slatkevi=C4=8Dius?= Date: Fri, 2 May 2014 09:57:02 +0300 Subject: [PATCH 5/6] Enable assigned work for remote job submission (RPC only) --- html/inc/submit.inc | 10 ++++++++++ html/user/submit_rpc_handler.php | 12 +++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/html/inc/submit.inc b/html/inc/submit.inc index 32482d40ab..b159109a66 100644 --- a/html/inc/submit.inc +++ b/html/inc/submit.inc @@ -44,6 +44,16 @@ function req_to_xml($req, $op) { $job->rsc_fpops_est $job->command_line "; + if ((isset($job->target_team)) && ($job->target_team)) { + $x .= " $job->target_team +"; + } elseif ((isset($job->target_user)) && ($job->target_user)) { + $x .= " $job->target_user +"; + } elseif ((isset($job->target_team)) && ($job->target_team)) { + $x .= " $job->target_team +"; + } foreach ($job->input_files as $file) { $x .= " \n"; $x .= " $file->mode\n"; diff --git a/html/user/submit_rpc_handler.php b/html/user/submit_rpc_handler.php index afcbf0c4df..042f2ce281 100644 --- a/html/user/submit_rpc_handler.php +++ b/html/user/submit_rpc_handler.php @@ -173,11 +173,18 @@ function submit_jobs($jobs, $template, $app, $batch_id, $priority) { $x = ""; foreach($jobs as $job) { if ($job->name) { - $x .= "--wu_name $job->name"; + $x .= " --wu_name $job->name"; } if ($job->command_line) { $x .= " --command_line \"$job->command_line\""; } + if ($job->target_team) { + $x .= " --target_team $job->target_team"; + } elseif ($job->target_user) { + $x .= " --target_user $job->target_user"; + } elseif ($job->target_host) { + $x .= " --target_host $job->target_host"; + } foreach ($job->input_files as $file) { if ($file->mode == "remote") { $x .= " --remote_file $file->url $file->nbytes $file->md5"; @@ -206,6 +213,9 @@ function xml_get_jobs($r) { $job = new StdClass; $job->input_files = array(); $job->command_line = (string)$j->command_line; + $job->target_team = (int)$j->target_team; + $job->target_user = (int)$j->target_user; + $job->target_host = (int)$j->target_host; $job->name = (string)$j->name; $job->rsc_fpops_est = (double)$j->rsc_fpops_est; foreach ($j->input_file as $f) { From cecee4bc9efb09748437508fb2715a4ebb05e92a Mon Sep 17 00:00:00 2001 From: David Anderson Date: Fri, 2 May 2014 00:24:59 -0700 Subject: [PATCH 6/6] create_work: make targeting work with stdin-based job creation --- tools/create_work.cpp | 112 +++++++++++++++++++++++++----------------- 1 file changed, 66 insertions(+), 46 deletions(-) diff --git a/tools/create_work.cpp b/tools/create_work.cpp index 7d191b7fb1..fc3a127883 100644 --- a/tools/create_work.cpp +++ b/tools/create_work.cpp @@ -332,45 +332,75 @@ int main(int argc, char** argv) { strcpy(jd.result_template_path, "./"); strcat(jd.result_template_path, jd.result_template_file); if (use_stdin) { - string values; - DB_WORKUNIT wu; - int _argc; - char* _argv[100], value_buf[MAX_QUERY_LEN]; - for (int j=0; ; j++) { - char* p = fgets(buf, sizeof(buf), stdin); - if (p == NULL) break; - JOB_DESC jd2 = jd; - strcpy(jd2.wu.name, ""); - _argc = parse_command_line(buf, _argv); - jd2.parse_cmdline(_argc, _argv); - if (!strlen(jd2.wu.name)) { - sprintf(jd2.wu.name, "%s_%d", jd.wu.name, j); + if (jd.assign_flag) { + // if we're doing assignment we can't use the bulk-query method; + // create the jobs one at a time. + // + int _argc; + char* _argv[100]; + for (int j=0; ; j++) { + char* p = fgets(buf, sizeof(buf), stdin); + if (p == NULL) break; + JOB_DESC jd2 = jd; + strcpy(jd2.wu.name, ""); + _argc = parse_command_line(buf, _argv); + jd2.parse_cmdline(_argc, _argv); + if (!strlen(jd2.wu.name)) { + sprintf(jd2.wu.name, "%s_%d", jd.wu.name, j); + } + jd2.create(); } - retval = create_work2( - jd2.wu, - jd2.wu_template, - jd2.result_template_file, - jd2.result_template_path, - jd2.infiles, - config, - jd2.command_line, - jd2.additional_xml, - value_buf - ); - if (retval) { - fprintf(stderr, "create_work() failed: %d\n", retval); - exit(1); + } else { + string values; + DB_WORKUNIT wu; + int _argc; + char* _argv[100], value_buf[MAX_QUERY_LEN]; + for (int j=0; ; j++) { + char* p = fgets(buf, sizeof(buf), stdin); + if (p == NULL) break; + JOB_DESC jd2 = jd; + strcpy(jd2.wu.name, ""); + _argc = parse_command_line(buf, _argv); + jd2.parse_cmdline(_argc, _argv); + if (!strlen(jd2.wu.name)) { + sprintf(jd2.wu.name, "%s_%d", jd.wu.name, j); + } + retval = create_work2( + jd2.wu, + jd2.wu_template, + jd2.result_template_file, + jd2.result_template_path, + jd2.infiles, + config, + jd2.command_line, + jd2.additional_xml, + value_buf + ); + if (retval) { + fprintf(stderr, "create_work() failed: %d\n", retval); + exit(1); + } + if (values.size()) { + values += ","; + values += value_buf; + } else { + values = value_buf; + } + // MySQL can handles queries at least 1 MB + // + int n = strlen(value_buf); + if (values.size() + 2*n > 1000000) { + retval = wu.insert_batch(values); + if (retval) { + fprintf(stderr, + "wu.insert_batch() failed: %d\n", retval + ); + exit(1); + } + values.clear(); + } } if (values.size()) { - values += ","; - values += value_buf; - } else { - values = value_buf; - } - // MySQL can handles queries at least 1 MB - // - int n = strlen(value_buf); - if (values.size() + 2*n > 1000000) { retval = wu.insert_batch(values); if (retval) { fprintf(stderr, @@ -378,16 +408,6 @@ int main(int argc, char** argv) { ); exit(1); } - values.clear(); - } - } - if (values.size()) { - retval = wu.insert_batch(values); - if (retval) { - fprintf(stderr, - "wu.insert_batch() failed: %d\n", retval - ); - exit(1); } } } else {