From fb3b68f4b8dbc34958650db072fc2462c77024a8 Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Fri, 15 Sep 2023 09:19:28 -0500 Subject: [PATCH] fbtl/posix: fix data-sieving calculations as part of introducing atomicity support for ompi v5.0, we also tried to improve the robustness in some file I/O routines. Unfortunately, this also introduced a bug since ret_code returned by a function does not necessarily contain the number of bytes read or written, but could contain the last value (e.g. 0). The value was however used in a subsequent calculation and we ended not copying data out of the temporary buffer used in the data sieving at all. This commit also simplifies some of the logic in the while loop, no need to retry to read past the end of the file multiple times. Fixes issue #11917 Code was tested with the reproducer provided as part of the issue, our internal testsuite, and the hdf5-1.4.2 testsuite, all tests pass. Signed-off-by: Edgar Gabriel --- ompi/mca/fbtl/posix/fbtl_posix_preadv.c | 25 +++++------------------- ompi/mca/fbtl/posix/fbtl_posix_pwritev.c | 11 +---------- 2 files changed, 6 insertions(+), 30 deletions(-) diff --git a/ompi/mca/fbtl/posix/fbtl_posix_preadv.c b/ompi/mca/fbtl/posix/fbtl_posix_preadv.c index ea15adaf5fe..7f32c8e227b 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix_preadv.c +++ b/ompi/mca/fbtl/posix/fbtl_posix_preadv.c @@ -33,7 +33,6 @@ static ssize_t mca_fbtl_posix_preadv_datasieving (ompio_file_t *fh, struct flock static ssize_t mca_fbtl_posix_preadv_generic (ompio_file_t *fh, struct flock *lock, int *lock_counter); static ssize_t mca_fbtl_posix_preadv_single (ompio_file_t *fh, struct flock *lock, int *lock_counter); -#define MAX_RETRIES 10 ssize_t mca_fbtl_posix_preadv (ompio_file_t *fh ) { @@ -108,7 +107,6 @@ ssize_t mca_fbtl_posix_preadv_single (ompio_file_t *fh, struct flock *lock, int return OMPI_ERROR; } - int retries = 0; size_t len = fh->f_io_array[0].length; while ( total_bytes < len ) { ret_code = pread(fh->fd, (char*)fh->f_io_array[0].memory_address+total_bytes, @@ -121,13 +119,7 @@ ssize_t mca_fbtl_posix_preadv_single (ompio_file_t *fh, struct flock *lock, int } if ( ret_code == 0 ) { // end of file - retries++; - if ( retries == MAX_RETRIES ) { - break; - } - else { - continue; - } + break; } total_bytes += ret_code; } @@ -206,7 +198,6 @@ ssize_t mca_fbtl_posix_preadv_datasieving (ompio_file_t *fh, struct flock *lock, return OMPI_ERROR; } size_t total_bytes = 0; - int retries = 0; while ( total_bytes < len ) { ret_code = pread (fh->fd, temp_buf+total_bytes, len-total_bytes, start+total_bytes); @@ -218,13 +209,7 @@ ssize_t mca_fbtl_posix_preadv_datasieving (ompio_file_t *fh, struct flock *lock, } if ( ret_code == 0 ) { // end of file - retries++; - if ( retries == MAX_RETRIES ) { - break; - } - else { - continue; - } + break; } total_bytes += ret_code; } @@ -236,12 +221,12 @@ ssize_t mca_fbtl_posix_preadv_datasieving (ompio_file_t *fh, struct flock *lock, size_t start_offset = (size_t) fh->f_io_array[startindex].offset; for ( i = startindex ; i < endindex ; i++) { pos = (size_t) fh->f_io_array[i].offset - start_offset; - if ( (ssize_t) pos > ret_code ) { + if ( (ssize_t) pos > total_bytes ) { break; } num_bytes = fh->f_io_array[i].length; - if ( ((ssize_t) pos + (ssize_t)num_bytes) > ret_code ) { - num_bytes = ret_code - (ssize_t)pos; + if ( ((ssize_t) pos + (ssize_t)num_bytes) > total_bytes ) { + num_bytes = total_bytes - (ssize_t)pos; } memcpy (fh->f_io_array[i].memory_address, temp_buf + pos, num_bytes); diff --git a/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c b/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c index b96bddcb894..9b99f968f7c 100644 --- a/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c +++ b/ompi/mca/fbtl/posix/fbtl_posix_pwritev.c @@ -34,8 +34,6 @@ static ssize_t mca_fbtl_posix_pwritev_datasieving (ompio_file_t *fh, struct floc static ssize_t mca_fbtl_posix_pwritev_generic (ompio_file_t *fh, struct flock *lock, int *lock_counter ); static ssize_t mca_fbtl_posix_pwritev_single (ompio_file_t *fh, struct flock *lock, int *lock_counter ); -#define MAX_RETRIES 10 - ssize_t mca_fbtl_posix_pwritev(ompio_file_t *fh ) { ssize_t bytes_written=0; @@ -192,7 +190,6 @@ ssize_t mca_fbtl_posix_pwritev_datasieving (ompio_file_t *fh, struct flock *lock return OMPI_ERROR; } - int retries=0; while ( total_bytes < len ) { ret_code = pread (fh->fd, temp_buf, len, start); if ( ret_code == -1 ) { @@ -203,13 +200,7 @@ ssize_t mca_fbtl_posix_pwritev_datasieving (ompio_file_t *fh, struct flock *lock } if ( ret_code == 0 ) { // end of file - retries++; - if ( retries == MAX_RETRIES ) { - break; - } - else { - continue; - } + break; } total_bytes += ret_code; }