Skip to content

Commit

Permalink
Add a simple cache to the ros3 VFD (#3753)
Browse files Browse the repository at this point in the history
Adds a small cache of the first N bytes of a file opened with the
read-only S3 (ros3) VFD, where N is 4kiB or the size of the file,
whichever is smaller. This avoids a lot of small I/O operations
on file open.

Addresses GitHub issue #3381
  • Loading branch information
derobins authored Oct 23, 2023
1 parent 66396ca commit d76d591
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 22 deletions.
10 changes: 10 additions & 0 deletions release_docs/RELEASE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,16 @@ New Features

Library:
--------
- Added a simple cache to the read-only S3 VFD

The read-only S3 VFD now caches the first N bytes of a file stored
in S3 to avoid a lot of small I/O operations when opening files.
This cache is per-file and created when the file is opened.

N is currently 4kiB or the size of the file, whichever is smaller.

Addresses GitHub issue #3381

- Added new API function H5Pget_actual_selection_io_mode()

This function allows the user to determine if the library performed
Expand Down
73 changes: 51 additions & 22 deletions src/H5FDros3.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@
*/
#define ROS3_STATS 0

/* Max size of the cache, in bytes */
#define ROS3_MAX_CACHE_SIZE 4096

/* The driver identification number, initialized at runtime
*/
static hid_t H5FD_ROS3_g = 0;
Expand Down Expand Up @@ -189,6 +192,8 @@ typedef struct H5FD_ros3_t {
H5FD_ros3_fapl_t fa;
haddr_t eoa;
s3r_t *s3r_handle;
uint8_t *cache;
size_t cache_size;
#if ROS3_STATS
ros3_statsbin meta[ROS3_STATS_BIN_COUNT + 1];
ros3_statsbin raw[ROS3_STATS_BIN_COUNT + 1];
Expand Down Expand Up @@ -1000,15 +1005,29 @@ H5FD__ros3_open(const char *url, unsigned flags, hid_t fapl_id, haddr_t maxaddr)
HGOTO_ERROR(H5E_INTERNAL, H5E_UNINITIALIZED, NULL, "unable to reset file statistics");
#endif /* ROS3_STATS */

/* Cache the initial bytes of the file */
{
size_t filesize = H5FD_s3comms_s3r_get_filesize(file->s3r_handle);

file->cache_size = (filesize < ROS3_MAX_CACHE_SIZE) ? filesize : ROS3_MAX_CACHE_SIZE;

if (NULL == (file->cache = (uint8_t *)H5MM_calloc(file->cache_size)))
HGOTO_ERROR(H5E_VFL, H5E_NOSPACE, NULL, "unable to allocate cache memory");
if (H5FD_s3comms_s3r_read(file->s3r_handle, 0, file->cache_size, file->cache) == FAIL)
HGOTO_ERROR(H5E_VFL, H5E_READERROR, NULL, "unable to execute read");
}

ret_value = (H5FD_t *)file;

done:
if (ret_value == NULL) {
if (handle != NULL)
if (FAIL == H5FD_s3comms_s3r_close(handle))
HDONE_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, NULL, "unable to close s3 file handle");
if (file != NULL)
if (file != NULL) {
H5MM_xfree(file->cache);
file = H5FL_FREE(H5FD_ros3_t, file);
}
curl_global_cleanup(); /* early cleanup because open failed */
} /* end if null return value (error) */

Expand Down Expand Up @@ -1335,6 +1354,7 @@ H5FD__ros3_close(H5FD_t H5_ATTR_UNUSED *_file)
#endif /* ROS3_STATS */

/* Release the file info */
H5MM_xfree(file->cache);
file = H5FL_FREE(H5FD_ros3_t, file);

done:
Expand Down Expand Up @@ -1666,41 +1686,50 @@ H5FD__ros3_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, hid_t H5_ATTR_UNU
fprintf(stdout, "H5FD__ros3_read() called.\n");
#endif

assert(file != NULL);
assert(file->s3r_handle != NULL);
assert(buf != NULL);
assert(file);
assert(file->cache);
assert(file->s3r_handle);
assert(buf);

filesize = H5FD_s3comms_s3r_get_filesize(file->s3r_handle);

if ((addr > filesize) || ((addr + size) > filesize))
HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL, "range exceeds file address");

if (H5FD_s3comms_s3r_read(file->s3r_handle, addr, size, buf) == FAIL)
HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, "unable to execute read");
/* Copy from the cache when accessing the first N bytes of the file.
* Saves network I/O operations when opening files.
*/
if (addr + size < file->cache_size) {
memcpy(buf, file->cache + addr, size);
}
else {
if (H5FD_s3comms_s3r_read(file->s3r_handle, addr, size, buf) == FAIL)
HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, "unable to execute read");

#if ROS3_STATS

/* Find which "bin" this read fits in. Can be "overflow" bin. */
for (bin_i = 0; bin_i < ROS3_STATS_BIN_COUNT; bin_i++)
if ((unsigned long long)size < ros3_stats_boundaries[bin_i])
break;
bin = (type == H5FD_MEM_DRAW) ? &file->raw[bin_i] : &file->meta[bin_i];
/* Find which "bin" this read fits in. Can be "overflow" bin. */
for (bin_i = 0; bin_i < ROS3_STATS_BIN_COUNT; bin_i++)
if ((unsigned long long)size < ros3_stats_boundaries[bin_i])
break;
bin = (type == H5FD_MEM_DRAW) ? &file->raw[bin_i] : &file->meta[bin_i];

/* Store collected stats in appropriate bin */
if (bin->count == 0) {
bin->min = size;
bin->max = size;
}
else {
if (size < bin->min)
/* Store collected stats in appropriate bin */
if (bin->count == 0) {
bin->min = size;
if (size > bin->max)
bin->max = size;
}
bin->count++;
bin->bytes += (unsigned long long)size;
}
else {
if (size < bin->min)
bin->min = size;
if (size > bin->max)
bin->max = size;
}
bin->count++;
bin->bytes += (unsigned long long)size;

#endif /* ROS3_STATS */
}

done:
FUNC_LEAVE_NOAPI(ret_value)
Expand Down

0 comments on commit d76d591

Please sign in to comment.