From 80be72e9b5e6067777169c762ec490b250031254 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Sat, 18 Aug 2012 23:38:52 +0000 Subject: [PATCH] - file upload handler: Do first read from socket before opening the disk file (an attempt to fix filesystem lockups on WCG). Increase buffer size from 16KB to 256KB. svn path=/trunk/boinc/; revision=26046 --- checkin_notes | 11 ++- sched/file_upload_handler.cpp | 123 ++++++++++++++++++---------------- 2 files changed, 74 insertions(+), 60 deletions(-) diff --git a/checkin_notes b/checkin_notes index 300eef7162..1748b69f0b 100644 --- a/checkin_notes +++ b/checkin_notes @@ -5593,7 +5593,7 @@ David 17 Aug 2012 vda_lib2.cpp vda_lib.h -David 17 Aug 2012 +David 18 Aug 2012 - client: if a project has excluded GPUs of a given type, allow it to fetch work of that type if the # of runnable jobs it <= the # of non-excluded instances (rather than 0). @@ -5601,3 +5601,12 @@ David 17 Aug 2012 client/ work_fetch.cpp,h rr_sim.cpp + +David 18 Aug 2012 + - file upload handler: + Do first read from socket before opening the disk file + (an attempt to fix filesystem lockups on WCG). + Increase buffer size from 16KB to 256KB. + + sched/ + file_upload_handler.cpp diff --git a/sched/file_upload_handler.cpp b/sched/file_upload_handler.cpp index 0a2b78f085..afac7d05ec 100644 --- a/sched/file_upload_handler.cpp +++ b/sched/file_upload_handler.cpp @@ -105,7 +105,7 @@ int return_success(const char* text) { return 0; } -#define BLOCK_SIZE 16382 +#define BLOCK_SIZE (256*1024) double bytes_left=-1; // read from socket, write to file @@ -114,64 +114,7 @@ double bytes_left=-1; int copy_socket_to_file(FILE* in, char* path, double offset, double nbytes) { unsigned char buf[BLOCK_SIZE]; struct stat sbuf; - int pid; - - // open file. Use raw IO not buffered IO so that we can use reliable - // posix file locking. - // Advisory file locking is not guaranteed reliable when - // used with stream buffered IO. - // - int fd = open(path, - O_WRONLY|O_CREAT, - S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH - ); - if (fd<0) { - return return_error(ERR_TRANSIENT, - "can't open file %s: %s\n", path, strerror(errno) - ); - } - -#ifdef LOCK_FILES - // Put an advisory lock on the file. - // This will prevent OTHER instances of file_upload_handler - // from being able to write to the file. - // - pid = mylockf(fd); - if (pid>0) { - close(fd); - return return_error(ERR_TRANSIENT, - "can't lock file %s: %s locked by PID=%d\n", - path, strerror(errno), pid - ); - } else if (pid < 0) { - close(fd); - return return_error(ERR_TRANSIENT, "can't lock file %s\n", path); - } -#endif - - // check that file length corresponds to offset - // TODO: use a 64-bit variant - // - if (stat(path, &sbuf)) { - close(fd); - return return_error(ERR_TRANSIENT, - "can't stat file %s: %s\n", path, strerror(errno) - ); - } - if (sbuf.st_size < offset) { - close(fd); - return return_error(ERR_TRANSIENT, - "length of file %s %d bytes < offset %.0f bytes", - path, (int)sbuf.st_size, offset - ); - } - if (offset) lseek(fd, offset, SEEK_SET); - if (sbuf.st_size > offset) { - log_messages.printf(MSG_CRITICAL, - "file %s length on disk %d bytes; host upload starting at %.0f bytes.\n", - this_filename, (int)sbuf.st_size, offset - ); - } + int pid, fd=0; // caller guarantees that nbytes > offset // @@ -187,6 +130,68 @@ int copy_socket_to_file(FILE* in, char* path, double offset, double nbytes) { // n = fread(buf, 1, m, in); + // delay opening the file until we've done the first socket read + // to avoid filesystem lockups (WCG, possible paranoia) + // + if (!fd) { + // Use raw IO not buffered IO so that we can use reliable + // posix file locking. + // Advisory file locking is not guaranteed reliable when + // used with stream buffered IO. + // + int fd = open(path, + O_WRONLY|O_CREAT, + S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH + ); + if (fd<0) { + return return_error(ERR_TRANSIENT, + "can't open file %s: %s\n", path, strerror(errno) + ); + } + +#ifdef LOCK_FILES + // Put an advisory lock on the file. + // This will prevent OTHER instances of file_upload_handler + // from being able to write to the file. + // + pid = mylockf(fd); + if (pid>0) { + close(fd); + return return_error(ERR_TRANSIENT, + "can't lock file %s: %s locked by PID=%d\n", + path, strerror(errno), pid + ); + } else if (pid < 0) { + close(fd); + return return_error(ERR_TRANSIENT, "can't lock file %s\n", path); + } +#endif + + // check that file length corresponds to offset + // TODO: use a 64-bit variant + // + if (stat(path, &sbuf)) { + close(fd); + return return_error(ERR_TRANSIENT, + "can't stat file %s: %s\n", path, strerror(errno) + ); + } + if (sbuf.st_size < offset) { + close(fd); + return return_error(ERR_TRANSIENT, + "length of file %s %d bytes < offset %.0f bytes", + path, (int)sbuf.st_size, offset + ); + } + if (offset) lseek(fd, offset, SEEK_SET); + if (sbuf.st_size > offset) { + log_messages.printf(MSG_CRITICAL, + "file %s length on disk %d bytes; host upload starting at %.0f bytes.\n", + this_filename, (int)sbuf.st_size, offset + ); + } + } + // try to write n bytes to file // to_write=n;