From a985959da55f83005939b469c75240421c4c1b1c Mon Sep 17 00:00:00 2001 From: David Anderson Date: Mon, 11 May 2020 20:02:47 -0700 Subject: [PATCH 1/4] Client: measure disk usage in terms of allocated disk space, not file size. On filesystems that use compression, the disk space allocated to a file is generally less than the (logical) size of the file. In this case, the client can fail to get work because it thinks there's insufficient disk space. Fix: change things so that the client measures disk usage (per-project and total) in terms of allocated space rather than file size. But still use logical file sizes in two places: - in checking that downloaded files are the right size - in calculating a job's disk usage to see if it exceeds the project-specified limit. New functions: file_size_alloc() and dir_size_alloc(). These are like file_size() and dir_size() except they return allocated rather than logical size. --- client/app.h | 5 +- client/cs_prefs.cpp | 6 +-- client/gui_rpc_server_ops.cpp | 14 +++-- lib/filesys.cpp | 96 +++++++++++++++++++++++++++++++++++ lib/filesys.h | 4 +- 5 files changed, 115 insertions(+), 10 deletions(-) diff --git a/client/app.h b/client/app.h index 9ca8736826..229b1ad87f 100644 --- a/client/app.h +++ b/client/app.h @@ -78,6 +78,7 @@ struct ACTIVE_TASK { double peak_working_set_size; double peak_swap_size; double peak_disk_usage; + // based on real (not allocated/compressed) file sizes // START OF ITEMS ALSO SAVED IN CLIENT STATE FILE @@ -204,7 +205,9 @@ struct ACTIVE_TASK { void cleanup_task(); int current_disk_usage(double&); - // disk used by output files and temp files of this task + // total sizes of output files and temp files of this task + // This is compared with project-specified limits + // to decide whether to abort job; no other use. int get_free_slot(RESULT*); int start(bool test=false); // start a process diff --git a/client/cs_prefs.cpp b/client/cs_prefs.cpp index 8fe7aba300..6af64f30a1 100644 --- a/client/cs_prefs.cpp +++ b/client/cs_prefs.cpp @@ -90,14 +90,14 @@ int CLIENT_STATE::get_disk_usages() { for (i=0; idisk_usage = 0; - retval = dir_size(p->project_dir(), size); + retval = dir_size_alloc(p->project_dir(), size); if (!retval) p->disk_usage = size; } for (i=0; islot, buf, sizeof(buf)); - retval = dir_size(buf, size); + retval = dir_size_alloc(buf, size); if (retval) continue; atp->wup->project->disk_usage += size; } @@ -105,7 +105,7 @@ int CLIENT_STATE::get_disk_usages() { p = projects[i]; total_disk_usage += p->disk_usage; } - retval = dir_size(".", size, false); + retval = dir_size_alloc(".", size, false); if (!retval) { client_disk_usage = size; total_disk_usage += size; diff --git a/client/gui_rpc_server_ops.cpp b/client/gui_rpc_server_ops.cpp index b684c356e7..905bf1fe6d 100644 --- a/client/gui_rpc_server_ops.cpp +++ b/client/gui_rpc_server_ops.cpp @@ -168,8 +168,8 @@ static void handle_get_disk_usage(GUI_RPC_CONN& grc) { ); } - dir_size(".", boinc_non_project, false); - dir_size("locale", size, false); + dir_size_alloc(".", boinc_non_project, false); + dir_size_alloc("locale", size, false); boinc_non_project += size; #ifdef __APPLE__ if (gstate.launched_by_manager) { @@ -179,9 +179,13 @@ static void handle_get_disk_usage(GUI_RPC_CONN& grc) { OSStatus err = noErr; retval = proc_pidpath(getppid(), path, sizeof(path)); - if (retval <= 0) err = fnfErr; - if (! err) dir_size(path, manager_size, true); - if (! err) boinc_non_project += manager_size; + if (retval <= 0) { + err = fnfErr; + } + if (!err) { + dir_size_alloc(path, manager_size, true); + boinc_non_project += manager_size; + } } #endif boinc_total = boinc_non_project; diff --git a/lib/filesys.cpp b/lib/filesys.cpp index e2522addc4..c75a74f789 100644 --- a/lib/filesys.cpp +++ b/lib/filesys.cpp @@ -370,6 +370,32 @@ int file_size(const char* path, double& size) { #endif } +// get file allocation size, i.e. how much disk space does it use. +// This can be less than the file size on compressed filesystems, +// or if the file has holes. +// It can also be slightly more. +// +int file_size_alloc(const char* path, double& size) { +#if defined(_WIN32) && !defined(__CYGWIN32__) && !defined(__MINGW32__) + HANDLE h = CreateFileA(path, 0, FILE_SHARE_READ|FILE_SHARE_WRITE|FILE_SHARE_DELETE, 0, OPEN_EXISTING, 0, 0); + if (h == INVALID_HANDLE_VALUE) return ERR_STAT; + LARGE_INTEGER lisize; + if (GetCompressedFileSizeEx(h, &lisize)) { + size = (double) lisize.QuadPart; + CloseHandle(h); + return 0; + } + return ERR_STAT; +#else + int retval; + struct stat sbuf; + retval = stat(path, &sbuf); + if (retval) return ERR_NOT_FOUND; + size = ((double)sbuf.st_blocks)*512.; + return 0; +#endif +} + int boinc_truncate(const char* path, double size) { int retval; #if defined(_WIN32) && !defined(__CYGWIN32__) @@ -487,6 +513,76 @@ int dir_size(const char* dirpath, double& size, bool recurse) { return 0; } +// return total allocated size of files in directory and optionally its subdirectories +// Win: use special version because stat() is slow, can be avoided +// Unix: follow symbolic links +// +int dir_size_alloc(const char* dirpath, double& size, bool recurse) { +#ifdef WIN32 + char buf[_MAX_PATH]; + char path2[_MAX_PATH]; + double dsize = 0.0; + WIN32_FIND_DATAA findData; + + size = 0.0; + snprintf(path2, sizeof(path2), "%s/*", dirpath); + path2[sizeof(path2)-1] = 0; + + HANDLE hFind = ::FindFirstFileA(path2, &findData); + if (INVALID_HANDLE_VALUE == hFind) return ERR_OPENDIR; + do { + snprintf(buf, sizeof(buf), "%.*s/%.*s", DIR_LEN, dirpath, FILE_LEN, findData.cFileName); + buf[sizeof(buf)-1] = 0; + if (findData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + if (!recurse) continue; + if (!strcmp(findData.cFileName, ".")) continue; + if (!strcmp(findData.cFileName, "..")) continue; + + dsize = 0.0; + dir_size_alloc(buf, dsize, true); + size += dsize; + } else { + double s; + if (file_size_alloc(buf, s) == 0) { + size += s; + } + } + } while (FindNextFileA(hFind, &findData)); + + ::FindClose(hFind); +#else + char filename[MAXPATHLEN], subdir[MAXPATHLEN]; + int retval=0; + DIRREF dirp; + double x; + + size = 0.0; + dirp = dir_open(dirpath); + if (!dirp) return ERR_OPENDIR; + while (1) { + retval = dir_scan(filename, dirp, sizeof(filename)); + if (retval) break; + + snprintf(subdir, sizeof(subdir), "%.*s/%.*s", DIR_LEN, dirpath, FILE_LEN, filename); + subdir[sizeof(subdir)-1] = 0; + + if (is_dir(subdir)) { + if (recurse) { + retval = dir_size_alloc(subdir, x); + if (retval) continue; + size += x; + } + } else if (is_file(subdir)) { + retval = file_size_alloc(subdir, x); + if (retval) continue; + size += x; + } + } + dir_close(dirp); +#endif + return 0; +} + FILE* boinc_fopen(const char* path, const char* mode) { // if opening for read, and file isn't there, // leave now (avoid 5-second delay!!) diff --git a/lib/filesys.h b/lib/filesys.h index 17f309e805..297e3258fb 100644 --- a/lib/filesys.h +++ b/lib/filesys.h @@ -95,8 +95,10 @@ extern "C" { #ifdef __cplusplus extern int file_size(const char*, double&); -extern int clean_out_dir(const char*); +extern int file_size_alloc(const char*, double&); extern int dir_size(const char* dirpath, double&, bool recurse=true); +extern int dir_size_alloc(const char* dirpath, double&, bool recurse=true); +extern int clean_out_dir(const char*); extern int get_filesystem_info(double& total, double& free, char* path=const_cast(".")); extern bool is_path_absolute(const std::string path); From 9cfd45dae9ecac717ab73e63430f5b3771caa6f3 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Mon, 11 May 2020 22:19:58 -0700 Subject: [PATCH 2/4] Client: minor tweak so project disk usage is displayed by boinc_cmd --- client/cs_prefs.cpp | 2 +- client/project.cpp | 9 +++++---- lib/gui_rpc_client_print.cpp | 6 +++--- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/client/cs_prefs.cpp b/client/cs_prefs.cpp index 6af64f30a1..e673e0fd69 100644 --- a/client/cs_prefs.cpp +++ b/client/cs_prefs.cpp @@ -119,7 +119,7 @@ int CLIENT_STATE::get_disk_usages() { // - each project has a "disk_resource_share" (DRS) // This is the resource share plus .1*(max resource share). // This ensures that backup projects get some disk. -// - each project as a "desired_disk_usage (DDU)", +// - each project has a "desired_disk_usage (DDU)", // which is either its current usage // or an amount sent from the scheduler. // - each project has a "quota": (available space)*(drs/total_drs). diff --git a/client/project.cpp b/client/project.cpp index 0dd770fbdd..74d1c5fa44 100644 --- a/client/project.cpp +++ b/client/project.cpp @@ -417,6 +417,8 @@ int PROJECT::write_state(MIOFILE& out, bool gui_rpc) { " %f\n" " %f\n" " %f\n" + " %f\n" + " %f\n" " %f\n" " %f\n" " %d\n" @@ -454,6 +456,7 @@ int PROJECT::write_state(MIOFILE& out, bool gui_rpc) { pwf.rec, pwf.rec_time, resource_share, + disk_usage, disk_share, desired_disk_usage, duration_correction_factor, sched_rpc_pending, @@ -557,10 +560,8 @@ int PROJECT::write_state(MIOFILE& out, bool gui_rpc) { " %f\n" " %f\n" " %f\n" - " %f\n" - " %f\n" - " %f\n", - cpu_ec, cpu_time, gpu_ec, gpu_time, disk_usage, disk_share + " %f\n", + cpu_ec, cpu_time, gpu_ec, gpu_time ); } out.printf( diff --git a/lib/gui_rpc_client_print.cpp b/lib/gui_rpc_client_print.cpp index 02d9c8a4dd..0c25e6e794 100644 --- a/lib/gui_rpc_client_print.cpp +++ b/lib/gui_rpc_client_print.cpp @@ -99,7 +99,7 @@ void PROJECT::print() { printf(" ended: %s\n", ended?"yes":"no"); printf(" suspended via GUI: %s\n", suspended_via_gui?"yes":"no"); printf(" don't request more work: %s\n", dont_request_more_work?"yes":"no"); - printf(" disk usage: %f\n", disk_usage); + printf(" disk usage: %.2fMB\n", disk_usage/MEGA); time_t foo = (time_t)last_rpc_time; printf(" last RPC: %s\n", ctime(&foo)); printf(" project files downloaded: %f\n", project_files_downloaded_time); @@ -428,8 +428,8 @@ void PROJECTS::print_urls() { void DISK_USAGE::print() { unsigned int i; printf("======== Disk usage ========\n"); - printf("total: %f\n", d_total); - printf("free: %f\n", d_free); + printf("total: %.2fMB\n", d_total/MEGA); + printf("free: %.2fMB\n", d_free/MEGA); for (i=0; iprint_disk_usage(); From 1cb7ee4b2424c063f0420f76b4f111ede10296df Mon Sep 17 00:00:00 2001 From: davidpanderson Date: Tue, 12 May 2020 00:40:47 -0700 Subject: [PATCH 3/4] Fix bugs in Win version of file_size_alloc() --- lib/filesys.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/filesys.cpp b/lib/filesys.cpp index c75a74f789..5aa60f2aa3 100644 --- a/lib/filesys.cpp +++ b/lib/filesys.cpp @@ -377,12 +377,11 @@ int file_size(const char* path, double& size) { // int file_size_alloc(const char* path, double& size) { #if defined(_WIN32) && !defined(__CYGWIN32__) && !defined(__MINGW32__) - HANDLE h = CreateFileA(path, 0, FILE_SHARE_READ|FILE_SHARE_WRITE|FILE_SHARE_DELETE, 0, OPEN_EXISTING, 0, 0); - if (h == INVALID_HANDLE_VALUE) return ERR_STAT; - LARGE_INTEGER lisize; - if (GetCompressedFileSizeEx(h, &lisize)) { - size = (double) lisize.QuadPart; - CloseHandle(h); + DWORD hi; + DWORD lo = GetCompressedFileSizeA(path, &hi); + if (lo != INVALID_FILE_SIZE) { + ULONGLONG x = (((ULONGLONG)hi) << 32) + lo; + size = (double) x; return 0; } return ERR_STAT; From 48200160e96081d59a6d8413e786c90742443e09 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Tue, 12 May 2020 23:09:56 -0700 Subject: [PATCH 4/4] Client: fix MinGW build: seems to lack stat.st_blocks --- lib/filesys.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/filesys.cpp b/lib/filesys.cpp index 5aa60f2aa3..a171ccb24c 100644 --- a/lib/filesys.cpp +++ b/lib/filesys.cpp @@ -390,7 +390,11 @@ int file_size_alloc(const char* path, double& size) { struct stat sbuf; retval = stat(path, &sbuf); if (retval) return ERR_NOT_FOUND; +#ifdef _WIN32 // cygwin, mingw + size = (double)sbuf.st_size; +#else size = ((double)sbuf.st_blocks)*512.; +#endif return 0; #endif }