From 96d6c1464e4bbf70f57a65dccfedbed119180835 Mon Sep 17 00:00:00 2001 From: Jordan Henderson Date: Wed, 17 Jan 2024 14:08:30 -0600 Subject: [PATCH 1/2] Remove cached datatype conversion path table entries on file close When performing datatype conversions during I/O, the library checks to see whether it can re-use a cached datatype conversion pathway by performing comparisons between the source and destination datatypes of the current operation and the source and destination datatypes associated with each cached datatype conversion pathway. For variable-length and reference datatypes, a comparison is made between the VOL object for the file associated with these datatypes, which may change as a file is closed and reopened. In workflows involving a loop that opens a file, performs I/O on an object with a variable-length or reference datatype and then closes the file, this can lead to constant memory usage growth as the library compares the file VOL objects between the datatypes as different and adds a new cached conversion pathway entry on each iteration during I/O. This is now fixed by clearing out any cached conversion pathway entries for variable-length or reference datatypes associated with a particular file when that file is closed. --- src/H5Fint.c | 12 +++ src/H5T.c | 52 ++++++++++--- src/H5Tpkg.h | 3 + src/H5Tprivate.h | 2 + test/tmisc.c | 190 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 249 insertions(+), 10 deletions(-) diff --git a/src/H5Fint.c b/src/H5Fint.c index 8738026d7c9..1feada6274e 100644 --- a/src/H5Fint.c +++ b/src/H5Fint.c @@ -1615,6 +1615,18 @@ H5F__dest(H5F_t *f, bool flush, bool free_on_failure) if (vol_wrap_ctx && (NULL == H5VL_object_unwrap(f->vol_obj))) HDONE_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "can't unwrap VOL object"); + /* + * Clean up any cached type conversion path table entries that + * may have been keeping a reference to the file's VOL object + * in order to prevent the file from being closed out from + * underneath other places that may access the conversion path + * or its src/dst datatypes later on (currently, conversions on + * variable-length and reference datatypes involve this) + */ + if (H5T_unregister(H5T_PERS_SOFT, NULL, NULL, NULL, f->vol_obj, NULL) < 0) + HDONE_ERROR(H5E_FILE, H5E_CANTRELEASE, FAIL, + "unable to free cached type conversion path table entries"); + if (H5VL_free_object(f->vol_obj) < 0) HDONE_ERROR(H5E_FILE, H5E_CANTDEC, FAIL, "unable to free VOL object"); f->vol_obj = NULL; diff --git a/src/H5T.c b/src/H5T.c index 8494680d243..811323e2d58 100644 --- a/src/H5T.c +++ b/src/H5T.c @@ -343,7 +343,6 @@ typedef H5T_t *(*H5T_copy_func_t)(H5T_t *old_dt); static herr_t H5T__register_int(H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst, H5T_lib_conv_t func); static herr_t H5T__register(H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst, H5T_conv_func_t *conv); -static herr_t H5T__unregister(H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst, H5T_conv_t func); static htri_t H5T__compiler_conv(H5T_t *src, H5T_t *dst); static herr_t H5T__set_size(H5T_t *dt, size_t size); static herr_t H5T__close_cb(H5T_t *dt, void **request); @@ -2672,7 +2671,7 @@ H5Tregister(H5T_pers_t pers, const char *name, hid_t src_id, hid_t dst_id, H5T_c } /* end H5Tregister() */ /*------------------------------------------------------------------------- - * Function: H5T__unregister + * Function: H5T_unregister * * Purpose: Removes conversion paths that match the specified criteria. * All arguments are optional. Missing arguments are wild cards. @@ -2683,15 +2682,16 @@ H5Tregister(H5T_pers_t pers, const char *name, hid_t src_id, hid_t dst_id, H5T_c * *------------------------------------------------------------------------- */ -static herr_t -H5T__unregister(H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst, H5T_conv_t func) +herr_t +H5T_unregister(H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst, H5VL_object_t *owned_vol_obj, + H5T_conv_t func) { H5T_path_t *path = NULL; /*conversion path */ H5T_soft_t *soft = NULL; /*soft conversion information */ int nprint = 0; /*number of paths shut down */ int i; /*counter */ - FUNC_ENTER_PACKAGE_NOERR + FUNC_ENTER_NOAPI_NOERR /* Remove matching entries from the soft list */ if (H5T_PERS_DONTCARE == pers || H5T_PERS_SOFT == pers) { @@ -2704,6 +2704,8 @@ H5T__unregister(H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst, H5T_c continue; if (dst && dst->shared->type != soft->dst) continue; + if (owned_vol_obj) + continue; if (func && func != soft->conv.u.app_func) continue; @@ -2714,13 +2716,20 @@ H5T__unregister(H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst, H5T_c /* Remove matching conversion paths, except no-op path */ for (i = H5T_g.npaths - 1; i > 0; --i) { + bool nomatch; + path = H5T_g.path[i]; assert(path); + nomatch = ((H5T_PERS_SOFT == pers && path->is_hard) || (H5T_PERS_HARD == pers && !path->is_hard)) || + (name && *name && strcmp(name, path->name) != 0) || + (src && H5T_cmp(src, path->src, false)) || (dst && H5T_cmp(dst, path->dst, false)) || + (owned_vol_obj && (owned_vol_obj != path->src->shared->owned_vol_obj) && + (owned_vol_obj != path->dst->shared->owned_vol_obj)) || + (func && func != path->conv.u.app_func); + /* Not a match */ - if (((H5T_PERS_SOFT == pers && path->is_hard) || (H5T_PERS_HARD == pers && !path->is_hard)) || - (name && *name && strcmp(name, path->name) != 0) || (src && H5T_cmp(src, path->src, false)) || - (dst && H5T_cmp(dst, path->dst, false)) || (func && func != path->conv.u.app_func)) { + if (nomatch) { /* * Notify all other functions to recalculate private data since some * functions might cache a list of conversion functions. For @@ -2769,7 +2778,7 @@ H5T__unregister(H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst, H5T_c } /* end for */ FUNC_LEAVE_NOAPI(SUCCEED) -} /* end H5T__unregister() */ +} /* end H5T_unregister() */ /*------------------------------------------------------------------------- * Function: H5Tunregister @@ -2799,7 +2808,7 @@ H5Tunregister(H5T_pers_t pers, const char *name, hid_t src_id, hid_t dst_id, H5T if (dst_id > 0 && (NULL == (dst = (H5T_t *)H5I_object_verify(dst_id, H5I_DATATYPE)))) HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "dst is not a data type"); - if (H5T__unregister(pers, name, src, dst, func) < 0) + if (H5T_unregister(pers, name, src, dst, NULL, func) < 0) HGOTO_ERROR(H5E_DATATYPE, H5E_CANTDELETE, FAIL, "internal unregister function failed"); done: @@ -6096,3 +6105,26 @@ H5T_own_vol_obj(H5T_t *dt, H5VL_object_t *vol_obj) done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5T_own_vol_obj() */ + +/*------------------------------------------------------------------------- + * Function: H5T__get_path_table_npaths + * + * Purpose: Testing function to return the number of type conversion + * paths currently stored in the type conversion path table + * cache. + * + * Return: Number of type conversion paths (can't fail) + * + *------------------------------------------------------------------------- + */ +int +H5T__get_path_table_npaths(void) +{ + int ret_value = 0; + + FUNC_ENTER_PACKAGE_NOERR + + ret_value = H5T_g.npaths; + + FUNC_LEAVE_NOAPI(ret_value) +} diff --git a/src/H5Tpkg.h b/src/H5Tpkg.h index b9e24be912b..ef5ba361f83 100644 --- a/src/H5Tpkg.h +++ b/src/H5Tpkg.h @@ -877,4 +877,7 @@ H5_DLL herr_t H5T__sort_name(const H5T_t *dt, int *map); /* Debugging functions */ H5_DLL herr_t H5T__print_stats(H5T_path_t *path, int *nprint /*in,out*/); +/* Testing functions */ +H5_DLL int H5T__get_path_table_npaths(void); + #endif /* H5Tpkg_H */ diff --git a/src/H5Tprivate.h b/src/H5Tprivate.h index 0332679e728..3120053e9a5 100644 --- a/src/H5Tprivate.h +++ b/src/H5Tprivate.h @@ -133,6 +133,8 @@ H5_DLL H5T_path_t *H5T_path_find(const H5T_t *src, const H5T_t *dst); H5_DLL bool H5T_path_noop(const H5T_path_t *p); H5_DLL H5T_bkg_t H5T_path_bkg(const H5T_path_t *p); H5_DLL H5T_subset_info_t *H5T_path_compound_subset(const H5T_path_t *p); +H5_DLL herr_t H5T_unregister(H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst, + H5VL_object_t *owned_vol_obj, H5T_conv_t func); H5_DLL herr_t H5T_convert(H5T_path_t *tpath, hid_t src_id, hid_t dst_id, size_t nelmts, size_t buf_stride, size_t bkg_stride, void *buf, void *bkg); H5_DLL herr_t H5T_reclaim(hid_t type_id, struct H5S_t *space, void *buf); diff --git a/test/tmisc.c b/test/tmisc.c index a8103afa16d..ddebc3d648f 100644 --- a/test/tmisc.c +++ b/test/tmisc.c @@ -21,6 +21,7 @@ *************************************************************/ #define H5D_FRIEND /*suppress error about including H5Dpkg */ +#define H5T_FRIEND /*suppress error about including H5Tpkg */ /* Define this macro to indicate that the testing APIs should be available */ #define H5D_TESTING @@ -28,6 +29,7 @@ #include "testhdf5.h" #include "H5srcdir.h" #include "H5Dpkg.h" /* Datasets */ +#include "H5Tpkg.h" /* Datatypes */ #include "H5MMprivate.h" /* Memory */ /* Definitions for misc. test #1 */ @@ -335,6 +337,8 @@ typedef struct { See https://nvd.nist.gov/vuln/detail/CVE-2020-10812 */ #define CVE_2020_10812_FILENAME "cve_2020_10812.h5" +#define MISC38_FILE "type_conversion_path_table_issue.h5" + /**************************************************************** ** ** test_misc1(): test unlinking a dataset from a group and immediately @@ -6257,6 +6261,190 @@ test_misc37(void) } /* end test_misc37() */ +/**************************************************************** +** +** test_misc38(): +** Test for issue where the type conversion path table cache +** would grow continuously when variable-length datatypes +** are involved due to file VOL object comparisons causing +** the library not to reuse type conversion paths +** +****************************************************************/ +static void +test_misc38(void) +{ + H5VL_object_t *file_vol_obj = NULL; + const char *buf[] = {"attr_value"}; + herr_t ret = SUCCEED; + hid_t file_id = H5I_INVALID_HID; + hid_t attr_id = H5I_INVALID_HID; + hid_t str_type = H5I_INVALID_HID; + hid_t space_id = H5I_INVALID_HID; + int init_npaths = 0; + int *irbuf = NULL; + char **rbuf = NULL; + bool vol_is_native; + + /* Output message about test being performed */ + MESSAGE(5, ("Fix for type conversion path table issue")); + + /* + * Get the initial number of type conversion path table + * entries that are currently defined + */ + init_npaths = H5T__get_path_table_npaths(); + + file_id = H5Fcreate(MISC38_FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + CHECK(file_id, H5I_INVALID_HID, "H5Fcreate"); + + /* Check if native VOL is being used */ + CHECK(h5_using_native_vol(H5P_DEFAULT, file_id, &vol_is_native), FAIL, "h5_using_native_vol"); + if (!vol_is_native) { + CHECK(H5Fclose(file_id), FAIL, "H5Fclose"); + MESSAGE(5, (" -- SKIPPED --\n")); + return; + } + + /* Retrieve file's VOL object field for further use */ + file_vol_obj = H5F_VOL_OBJ((H5F_t *)H5VL_object(file_id)); + + /* + * Check reference count of file's VOL object field. At this point, + * the object should have a reference count of 1 since the file + * was just created. + */ + VERIFY(file_vol_obj->rc, 1, "checking reference count"); + + str_type = H5Tcopy(H5T_C_S1); + CHECK(str_type, H5I_INVALID_HID, "H5Tcopy"); + + ret = H5Tset_size(str_type, H5T_VARIABLE); + CHECK(ret, FAIL, "H5Tset_size"); + + space_id = H5Screate(H5S_SCALAR); + CHECK(space_id, H5I_INVALID_HID, "H5Screate"); + + /* + * Check the number of type conversion path table entries currently + * stored in the cache. It shouldn't have changed yet. + */ + VERIFY(H5T__get_path_table_npaths(), init_npaths, + "checking number of type conversion path table entries"); + + attr_id = H5Acreate2(file_id, "attribute", str_type, space_id, H5P_DEFAULT, H5P_DEFAULT); + CHECK(attr_id, H5I_INVALID_HID, "H5Acreate2"); + + /* + * Check the number of type conversion path table entries currently + * stored in the cache. It shouldn't have changed yet. + */ + VERIFY(H5T__get_path_table_npaths(), init_npaths, + "checking number of type conversion path table entries"); + + /* + * Check reference count of file's VOL object field. At this point, + * the object should have a reference count of 2. Creating the + * attribute on the dataset will have caused a H5T_set_loc call that + * associates the attribute's datatype with the file's VOL object + * and will have incremented the reference count by 1. + */ + VERIFY(file_vol_obj->rc, 2, "checking reference count"); + + ret = H5Awrite(attr_id, str_type, buf); + CHECK(ret, FAIL, "H5Awrite"); + + /* + * Check the number of type conversion path table entries currently + * stored in the cache. The H5Awrite call should have added a new + * type conversion path. Note that if another test in this file uses + * the same conversion path, this check may fail and need to be + * refactored. + */ + VERIFY(H5T__get_path_table_npaths(), init_npaths + 1, + "checking number of type conversion path table entries"); + + /* + * Check reference count of file's VOL object field. At this point, + * the object should have a reference count of 3. Writing to the + * variable-length typed attribute will have caused an H5T_convert + * call that ends up incrementing the reference count of the + * associated file's VOL object. + */ + VERIFY(file_vol_obj->rc, 3, "checking reference count"); + + ret = H5Aclose(attr_id); + CHECK(ret, FAIL, "H5Aclose"); + ret = H5Fclose(file_id); + CHECK(ret, FAIL, "H5Fclose"); + + irbuf = malloc(100 * 100 * sizeof(int)); + CHECK_PTR(irbuf, "int read buf allocation"); + rbuf = malloc(sizeof(char *)); + CHECK_PTR(rbuf, "varstr read buf allocation"); + + for (size_t i = 0; i < 10; i++) { + file_id = H5Fopen(MISC38_FILE, H5F_ACC_RDONLY, H5P_DEFAULT); + CHECK(file_id, H5I_INVALID_HID, "H5Fopen"); + + /* Retrieve file's VOL object field for further use */ + file_vol_obj = H5F_VOL_OBJ((H5F_t *)H5VL_object(file_id)); + + /* + * Check reference count of file's VOL object field. At this point, + * the object should have a reference count of 1 since the file + * was just opened. + */ + VERIFY(file_vol_obj->rc, 1, "checking reference count"); + + attr_id = H5Aopen(file_id, "attribute", H5P_DEFAULT); + CHECK(attr_id, H5I_INVALID_HID, "H5Aopen"); + + /* + * Check reference count of file's VOL object field. At this point, + * the object should have a reference count of 2 since opening + * the attribute will also have associated its type with the file's + * VOL object. + */ + VERIFY(file_vol_obj->rc, 2, "checking reference count"); + + ret = H5Aread(attr_id, str_type, rbuf); + CHECK(ret, FAIL, "H5Aread"); + + /* + * Check the number of type conversion path table entries currently + * stored in the cache. Each H5Aread call shouldn't cause this number + * to go up, as the library should have removed the cached conversion + * paths on file close. + */ + VERIFY(H5T__get_path_table_npaths(), init_npaths + 1, + "checking number of type conversion path table entries"); + + /* + * Check reference count of file's VOL object field. At this point, + * the object should have a reference count of 3. Writing to the + * variable-length typed attribute will have caused an H5T_convert + * call that ends up incrementing the reference count of the + * associated file's VOL object. + */ + VERIFY(file_vol_obj->rc, 3, "checking reference count"); + + ret = H5Treclaim(str_type, space_id, H5P_DEFAULT, rbuf); + + ret = H5Aclose(attr_id); + CHECK(ret, FAIL, "H5Aclose"); + ret = H5Fclose(file_id); + CHECK(ret, FAIL, "H5Fclose"); + } + + free(irbuf); + free(rbuf); + + ret = H5Tclose(str_type); + CHECK(ret, FAIL, "H5Tclose"); + ret = H5Sclose(space_id); + CHECK(ret, FAIL, "H5Sclose"); +} + /**************************************************************** ** ** test_misc(): Main misc. test routine. @@ -6325,6 +6513,7 @@ test_misc(void) test_misc35(); /* Test behavior of free-list & allocation statistics API calls */ test_misc36(); /* Exercise H5atclose and H5is_library_terminating */ test_misc37(); /* Test for seg fault failure at file close */ + test_misc38(); /* Test for type conversion path table issue */ } /* test_misc() */ @@ -6380,6 +6569,7 @@ cleanup_misc(void) #ifndef H5_NO_DEPRECATED_SYMBOLS H5Fdelete(MISC31_FILE, H5P_DEFAULT); #endif /* H5_NO_DEPRECATED_SYMBOLS */ + H5Fdelete(MISC38_FILE, H5P_DEFAULT); } H5E_END_TRY } /* end cleanup_misc() */ From 0b2bf8af536d5357013322161c2a465a8f07f5d5 Mon Sep 17 00:00:00 2001 From: Jordan Henderson Date: Thu, 18 Jan 2024 14:35:58 -0600 Subject: [PATCH 2/2] Address comments from review --- src/H5T.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 66 insertions(+), 10 deletions(-) diff --git a/src/H5T.c b/src/H5T.c index 811323e2d58..09eeff8c2f1 100644 --- a/src/H5T.c +++ b/src/H5T.c @@ -348,6 +348,8 @@ static herr_t H5T__set_size(H5T_t *dt, size_t size); static herr_t H5T__close_cb(H5T_t *dt, void **request); static H5T_path_t *H5T__path_find_real(const H5T_t *src, const H5T_t *dst, const char *name, H5T_conv_func_t *conv); +static bool H5T_path_match(H5T_path_t *path, H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst, + H5VL_object_t *owned_vol_obj, H5T_conv_t func); static bool H5T__detect_vlen_ref(const H5T_t *dt); static H5T_t *H5T__initiate_copy(const H5T_t *old_dt); static H5T_t *H5T__copy_transient(H5T_t *old_dt); @@ -2693,8 +2695,22 @@ H5T_unregister(H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst, H5VL_o FUNC_ENTER_NOAPI_NOERR - /* Remove matching entries from the soft list */ - if (H5T_PERS_DONTCARE == pers || H5T_PERS_SOFT == pers) { + /* + * Remove matching entries from the soft list if: + * + * - The caller didn't specify a particular type (soft or hard) + * of conversion path to match against or specified that soft + * conversion paths should be matched against + * + * AND + * + * - The caller didn't provide the `owned_vol_obj` parameter; + * if this parameter is provided, we want to leave the soft + * list untouched and only remove cached conversion paths + * below where the file VOL object associated with the path's + * source or destination types matches the given VOL object. + */ + if ((H5T_PERS_DONTCARE == pers || H5T_PERS_SOFT == pers) && !owned_vol_obj) { for (i = H5T_g.nsoft - 1; i >= 0; --i) { soft = H5T_g.soft + i; assert(soft); @@ -2704,8 +2720,6 @@ H5T_unregister(H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst, H5VL_o continue; if (dst && dst->shared->type != soft->dst) continue; - if (owned_vol_obj) - continue; if (func && func != soft->conv.u.app_func) continue; @@ -2721,12 +2735,7 @@ H5T_unregister(H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst, H5VL_o path = H5T_g.path[i]; assert(path); - nomatch = ((H5T_PERS_SOFT == pers && path->is_hard) || (H5T_PERS_HARD == pers && !path->is_hard)) || - (name && *name && strcmp(name, path->name) != 0) || - (src && H5T_cmp(src, path->src, false)) || (dst && H5T_cmp(dst, path->dst, false)) || - (owned_vol_obj && (owned_vol_obj != path->src->shared->owned_vol_obj) && - (owned_vol_obj != path->dst->shared->owned_vol_obj)) || - (func && func != path->conv.u.app_func); + nomatch = !H5T_path_match(path, pers, name, src, dst, owned_vol_obj, func); /* Not a match */ if (nomatch) { @@ -5158,6 +5167,53 @@ H5T__path_find_real(const H5T_t *src, const H5T_t *dst, const char *name, H5T_co FUNC_LEAVE_NOAPI(ret_value) } /* end H5T__path_find_real() */ +/*------------------------------------------------------------------------- + * Function: H5T_path_match + * + * Purpose: Helper function to determine whether a datatype conversion + * path object matches against a given set of criteria. + * + * Return: true/false (can't fail) + * + *------------------------------------------------------------------------- + */ +static bool +H5T_path_match(H5T_path_t *path, H5T_pers_t pers, const char *name, H5T_t *src, H5T_t *dst, + H5VL_object_t *owned_vol_obj, H5T_conv_t func) +{ + bool ret_value = true; + + assert(path); + + FUNC_ENTER_NOAPI_NOINIT_NOERR + + if ( + /* Check that the specified conversion function persistence matches */ + ((H5T_PERS_SOFT == pers && path->is_hard) || (H5T_PERS_HARD == pers && !path->is_hard)) || + + /* Check that the specified conversion path name matches */ + (name && *name && strcmp(name, path->name) != 0) || + + /* + * Check that the specified source and destination datatypes match + * the source and destination datatypes in the conversion path + */ + (src && H5T_cmp(src, path->src, false)) || (dst && H5T_cmp(dst, path->dst, false)) || + + /* + * Check that the specified VOL object matches the VOL object + * in the conversion path + */ + (owned_vol_obj && (owned_vol_obj != path->src->shared->owned_vol_obj) && + (owned_vol_obj != path->dst->shared->owned_vol_obj)) || + + /* Check that the specified conversion function matches */ + (func && func != path->conv.u.app_func)) + ret_value = false; + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5T_path_match() */ + /*------------------------------------------------------------------------- * Function: H5T_path_noop *