Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hdf5 1 12 merges #1528

Merged
merged 17 commits into from
Mar 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
a4b68b2
Use internal version of H5Eprint2 to avoid possible stack overflow (#…
jhendersonHDF May 26, 2021
ae08543
Add support for parallel filters to h5repack (#832)
jhendersonHDF Jul 14, 2021
1a7706d
Allow parallel filters feature for comm size of 1 (#840)
jhendersonHDF Jul 20, 2021
04137ba
Avoid popping API context when one wasn't pushed (#848)
jhendersonHDF Jul 22, 2021
a5d2e86
Fix several warnings (#720)
jhendersonHDF Sep 14, 2021
5d71537
Don't allow H5Pset(get)_all_coll_metadata_ops for DXPLs (#1201)
jhendersonHDF Nov 18, 2021
4b36406
Fix free list tracking and cleanup cast alignment warnings (#1288)
jhendersonHDF Dec 13, 2021
e0769a0
Fix usage of several HDfprintf format specifiers after HDfprintf remo…
jhendersonHDF Jan 4, 2022
e36fa3a
Use appropriate printf format specifiers for haddr_t and hsize_t type…
jhendersonHDF Jan 11, 2022
81e07a3
Fix H5ACmpio dirty bytes creation debugging (#1357)
jhendersonHDF Jan 12, 2022
3742a45
Fix documentation for H5D_space_status_t enum values (#1372)
jhendersonHDF Jan 19, 2022
f6efaf8
Parallel rank0 deadlock fixes (#1183)
jhendersonHDF Jan 22, 2022
cb173eb
Fix a few issues noted by LGTM (#1421)
jhendersonHDF Feb 4, 2022
5282b72
Fix cache sanity checking code by moving functions to wider scope (#1…
jhendersonHDF Feb 21, 2022
17034b0
Fix metadata cache bug when resizing a pinned/protected entry (v2) (#…
jhendersonHDF Mar 1, 2022
b83cd9a
Disable memory alloc sanity checks by default for Autotools debug bui…
jhendersonHDF Mar 3, 2022
0af143d
Committing clang-format changes
github-actions[bot] Mar 25, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -2820,9 +2820,11 @@ AC_ARG_ENABLE([memory-alloc-sanity-check],
[Enable this option to turn on internal memory
allocation sanity checking. This could cause
more memory use and somewhat slower allocation.
This option is orthogonal to the
--enable-using-memchecker option.
[default=yes if debug build, otherwise no]
This option may also cause issues with HDF5
filter plugins, so should not be enabled if
filters are to be used. This option is orthogonal
to the --enable-using-memchecker option.
[default=no]
])],
[MEMORYALLOCSANITYCHECK=$enableval])

Expand All @@ -2832,11 +2834,10 @@ AC_SUBST([MEMORYALLOCSANITYCHECK])

## Set default
if test "X-$MEMORYALLOCSANITYCHECK" = X- ; then
if test "X-$BUILD_MODE" = "X-debug" ; then
MEMORYALLOCSANITYCHECK=yes
else
MEMORYALLOCSANITYCHECK=no
fi
# Should consider enabling this option by default for
# 'developer' builds if that build mode is added in
# the future
MEMORYALLOCSANITYCHECK=no
fi

case "X-$MEMORYALLOCSANITYCHECK" in
Expand Down
2 changes: 1 addition & 1 deletion hl/src/H5LT.c
Original file line number Diff line number Diff line change
Expand Up @@ -2180,7 +2180,7 @@ realloc_and_append(hbool_t _no_user_buf, size_t *len, char *buf, const char *str
*/
if (size_str < *len - 1) {
if (size_str + size_str_to_add < *len - 1) {
HDstrncat(buf, str_to_add, size_str_to_add);
HDstrcat(buf, str_to_add);
}
else {
HDstrncat(buf, str_to_add, (*len - 1) - size_str);
Expand Down
19 changes: 3 additions & 16 deletions java/src/jni/h5util.c
Original file line number Diff line number Diff line change
Expand Up @@ -870,11 +870,8 @@ h5str_sprintf(JNIEnv *env, h5str_t *out_str, hid_t container, hid_t tid, void *i
}
else {
if (typeSize > 0) {
if (NULL == (this_str = (char *)HDmalloc(typeSize + 1)))
if (NULL == (this_str = HDstrdup(tmp_str)))
H5_OUT_OF_MEMORY_ERROR(ENVONLY, "h5str_sprintf: failed to allocate string buffer");

HDstrncpy(this_str, tmp_str, typeSize);
this_str[typeSize] = '\0';
}
}

Expand Down Expand Up @@ -3664,19 +3661,14 @@ obj_info_all(hid_t loc_id, const char *name, const H5L_info2_t *info, void *op_d
info_all_t *datainfo = (info_all_t *)op_data;
H5O_info2_t object_info;
htri_t object_exists;
size_t str_len;

datainfo->otype[datainfo->count] = -1;
datainfo->ltype[datainfo->count] = -1;
datainfo->obj_token[datainfo->count] = H5O_TOKEN_UNDEF;

str_len = HDstrlen(name);
if (NULL == (datainfo->objname[datainfo->count] = (char *)HDmalloc(str_len + 1)))
if (NULL == (datainfo->objname[datainfo->count] = HDstrdup(name)))
goto done;

HDstrncpy(datainfo->objname[datainfo->count], name, str_len);
(datainfo->objname[datainfo->count])[str_len] = '\0';

if ((object_exists = H5Oexists_by_name(loc_id, name, H5P_DEFAULT)) < 0)
goto done;

Expand All @@ -3702,21 +3694,16 @@ obj_info_max(hid_t loc_id, const char *name, const H5L_info2_t *info, void *op_d
{
info_all_t *datainfo = (info_all_t *)op_data;
H5O_info2_t object_info;
size_t str_len;

datainfo->otype[datainfo->count] = -1;
datainfo->ltype[datainfo->count] = -1;
datainfo->objname[datainfo->count] = NULL;
datainfo->obj_token[datainfo->count] = H5O_TOKEN_UNDEF;

/* This will be freed by h5str_array_free(oName, n) */
str_len = HDstrlen(name);
if (NULL == (datainfo->objname[datainfo->count] = (char *)HDmalloc(str_len + 1)))
if (NULL == (datainfo->objname[datainfo->count] = HDstrdup(name)))
goto done;

HDstrncpy(datainfo->objname[datainfo->count], name, str_len);
(datainfo->objname[datainfo->count])[str_len] = '\0';

if (H5Oget_info3(loc_id, &object_info, H5O_INFO_ALL) < 0)
goto done;

Expand Down
48 changes: 48 additions & 0 deletions release_docs/RELEASE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,25 @@ New Features

(ADB - 2022/03/11)

- HDF5 memory allocation sanity checking is now off by default for
Autotools debug builds

HDF5 can be configured to perform sanity checking on internal memory
allocations by adding heap canaries to these allocations. However,
enabling this option can cause issues with external filter plugins
when working with (reallocating/freeing/allocating and passing back)
buffers.

Previously, this option was off by default for all CMake build types,
but only off by default for non-debug Autotools builds. Since debug
is the default build mode for HDF5 when built from source with
Autotools, this can result in surprising segfaults that don't occur
when an application is built against a release version of HDF5.
Therefore, this option is now off by default for all build types
across both CMake and Autotools.

(JTH - 2022/03/01)

- Refactored the utils folder.

Added subfolder test and moved the 'swmr_check_compat_vfd.c file'
Expand Down Expand Up @@ -224,6 +243,23 @@ Bug Fixes since HDF5-1.12.1 release
===================================
Library
-------
- Fixed a metadata cache bug when resizing a pinned/protected cache entry

When resizing a pinned/protected cache entry, the metadata
cache code previously would wait until after resizing the
entry to attempt to log the newly-dirtied entry. This would
cause H5C_resize_entry to mark the entry as dirty and make
H5AC_resize_entry think that it doesn't need to add the
newly-dirtied entry to the dirty entries skiplist.

Thus, a subsequent H5AC__log_moved_entry would think it
needs to allocate a new entry for insertion into the dirty
entry skip list, since the entry doesn't exist on that list.
This causes an assertion failure, as the code to allocate a
new entry assumes that the entry is not dirty.

(JRM - 2022/02/28)

- Issue #1436 identified a problem with the H5_VERS_RELEASE check in the
H5check_version function.

Expand Down Expand Up @@ -262,6 +298,18 @@ Bug Fixes since HDF5-1.12.1 release

(JTH - 2021/11/16, HDFFV-10501/HDFFV-10562)

- Fixed several potential MPI deadlocks in library failure conditions

In the parallel library, there were several places where MPI rank 0
could end up skipping past collective MPI operations when some failure
occurs in rank 0-specific processing. This would lead to deadlocks
where rank 0 completes an operation while other ranks wait in the
collective operation. These places have been rewritten to have rank 0
push an error and try to cleanup after the failure, then continue to
participate in the collective operation to the best of its ability.

(JTH - 2021/11/09)

- Fixed an issue with collective metadata reads being permanently disabled
after a dataset chunk lookup operation. This would usually cause a
mismatched MPI_Bcast and MPI_ERR_TRUNCATE issue in the library for
Expand Down
84 changes: 75 additions & 9 deletions src/H5AC.c
Original file line number Diff line number Diff line change
Expand Up @@ -1470,21 +1470,82 @@ H5AC_resize_entry(void *thing, size_t new_size)
cache_ptr = entry_ptr->cache_ptr;
HDassert(cache_ptr);

/* Resize the entry */
if (H5C_resize_entry(thing, new_size) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTRESIZE, FAIL, "can't resize entry")

#ifdef H5_HAVE_PARALLEL
{
/* Log the generation of dirty bytes of metadata iff:
*
* 1) The entry is clean on entry, and this resize will dirty it
* (i.e. the current and new sizes are different), and
*
* 2) This is a parallel computation -- which it is if the aux_ptr
* is non-null.
*
* A few points to note about this section of the code:
*
* 1) This call must occur before the call to H5C_resize_entry() since
* H5AC__log_dirtied_entry() expects the target entry to be clean
* on entry.
*
* 2) This code has some basic issues in terms of the number of bytes
* added to the dirty bytes count.
*
* First, it adds the initial entry size to aux_ptr->dirty_bytes,
* not the final size. Note that this code used to use the final
* size, but code to support this has been removed from
* H5AC__log_dirtied_entry() for reasons unknown since I wrote this
* code.
*
* As long as all ranks do the same thing here, this probably doesn't
* matter much, although it will delay initiation of sync points.
*
* A more interesting point is that this code will not increment
* aux_ptr->dirty_bytes if a dirty entry is resized. At first glance
* this seems major, as particularly with the older file formats,
* resizes can be quite large. However, this is probably not an
* issue either, since such resizes will be accompanied by large
* amounts of dirty metadata creation in other areas -- which will
* cause aux_ptr->dirty_bytes to be incremented.
*
* The bottom line is that this code is probably OK, but the above
* points should be kept in mind.
*
* One final observation: This comment is occasioned by a bug caused
* by moving the call to H5AC__log_dirtied_entry() after the call to
* H5C_resize_entry(), and then only calling H5AC__log_dirtied_entry()
* if entry_ptr->is_dirty was false.
*
* Since H5C_resize_entry() marks the target entry dirty unless there
* is not change in size, this had the effect of not calling
* H5AC__log_dirtied_entry() when it should be, and corrupting
* the cleaned and dirtied lists used by rank 0 in the parallel
* version of the metadata cache.
*
* The point here is that you should be very careful when working with
* this code, and not modify it unless you fully understand it.
*
* JRM -- 2/28/22
*/

if ((!entry_ptr->is_dirty) && (entry_ptr->size != new_size)) {

/* the entry is clean, and will be marked dirty in the resize
* operation.
*/
H5AC_aux_t *aux_ptr;

aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
if ((!entry_ptr->is_dirty) && (NULL != aux_ptr))

if (NULL != aux_ptr) {

if (H5AC__log_dirtied_entry(entry_ptr) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTMARKDIRTY, FAIL, "can't log dirtied entry")
}
}
#endif /* H5_HAVE_PARALLEL */

/* Resize the entry */
if (H5C_resize_entry(thing, new_size) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTRESIZE, FAIL, "can't resize entry")

done:
/* If currently logging, generate a message */
if (cache_ptr != NULL && cache_ptr->log_info != NULL)
Expand Down Expand Up @@ -1666,9 +1727,14 @@ H5AC_unprotect(H5F_t *f, const H5AC_class_t *type, haddr_t addr, void *thing, un
if (H5AC__log_dirtied_entry((H5AC_info_t *)thing) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "can't log dirtied entry")

if (deleted && aux_ptr->mpi_rank == 0)
if (H5AC__log_deleted_entry((H5AC_info_t *)thing) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "H5AC__log_deleted_entry() failed")
if (deleted && aux_ptr->mpi_rank == 0) {
if (H5AC__log_deleted_entry((H5AC_info_t *)thing) < 0) {
/* If we fail to log the deleted entry, push an error but still
* participate in a possible sync point ahead
*/
HDONE_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "H5AC__log_deleted_entry() failed")
}
}
} /* end if */
#endif /* H5_HAVE_PARALLEL */

Expand Down
Loading