Skip to content

Commit

Permalink
Add H5Dchunk_iter method for iterating over chunks (#6)
Browse files Browse the repository at this point in the history
* Add H5Dchunk_iter method for iterating over chunks

This method iterates over all chunks in dataset, calling a user-supplied
callback with the chunk information and optional user supplied data.

The iterator is stopped when ITER_STOP is returned by the user-supplied
callback or the iterator is exhausted.

Existing methods to get chunk_info performs an iteration each time, so
to get many or all chunks causes SUM(i) for i = 0 -> N operations for N
chunks, as opposed to N operations when using this iterator for this use case.

* H5Dchunk_iter: test iterating all chunks, some chunks and failing iteration.

* H5D: move H5Dchunk_iter private methods to specific

* trace: add H5D_chunk_iter_op_t and trace H5D.c

* chunks-iter: document chunk_iter

* chunk-iter: chunk add FUNC_ENTER/FUNC_LEAVE macros

* Committing clang-format changes

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
2 people authored and lrknox committed May 20, 2021
1 parent c1acf63 commit 53e1139
Show file tree
Hide file tree
Showing 10 changed files with 365 additions and 28 deletions.
1 change: 1 addition & 0 deletions bin/trace
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ $Source = "";
"H5D_vds_view_t" => "Dv",
"H5FD_mpio_xfer_t" => "Dt",
"H5FD_splitter_vfd_config_t" => "Dr",
"H5D_chunk_iter_op_t" => "x",
"herr_t" => "e",
"H5E_direction_t" => "Ed",
"H5E_error_t" => "Ee",
Expand Down
18 changes: 9 additions & 9 deletions fortran/src/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -839,9 +839,9 @@ LT_VERS_REVISION = 0
LT_CXX_VERS_INTERFACE = 201
LT_CXX_VERS_REVISION = 0
LT_CXX_VERS_AGE = 1
LT_F_VERS_INTERFACE = 200
LT_F_VERS_REVISION = 1
LT_F_VERS_AGE = 0
LT_F_VERS_INTERFACE = 201
LT_F_VERS_REVISION = 0
LT_F_VERS_AGE = 1
LT_HL_VERS_INTERFACE = 200
LT_HL_VERS_REVISION = 1
LT_HL_VERS_AGE = 0
Expand All @@ -851,12 +851,12 @@ LT_HL_CXX_VERS_AGE = 0
LT_HL_F_VERS_INTERFACE = 200
LT_HL_F_VERS_REVISION = 1
LT_HL_F_VERS_AGE = 0
LT_JAVA_VERS_INTERFACE = 200
LT_JAVA_VERS_REVISION = 1
LT_JAVA_VERS_AGE = 0
LT_TOOLS_VERS_INTERFACE = 200
LT_TOOLS_VERS_REVISION = 1
LT_TOOLS_VERS_AGE = 0
LT_JAVA_VERS_INTERFACE = 201
LT_JAVA_VERS_REVISION = 0
LT_JAVA_VERS_AGE = 1
LT_TOOLS_VERS_INTERFACE = 201
LT_TOOLS_VERS_REVISION = 0
LT_TOOLS_VERS_AGE = 1
AM_FCLIBS = $(LIBHDF5)

# This is our main target, the fortran library
Expand Down
15 changes: 15 additions & 0 deletions release_docs/RELEASE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,21 @@ New Features

Library:
--------
- H5Dchunk_iter() added for iterating over chunks

This method iterates over all chunks in a dataset, calling a user-supplied
callback with the chunk information and optional user supplied data.

The iterator is stopped when ITER_STOP is returned by the user-supplied
callback or the iterator is exhausted.

Existing methods to get chunk_info perform an iteration each time, so
whereas getting many or all chunks with existing methods caused SUM(i)
for i = 0 -> N operations for N chunks, using this iterator for this
use case will get N chunks with N operations.

(2021/05/19)

- H5Gcreate1() now rejects size_hint parameters larger than UINT32_MAX

The size_hint value is ultimately stored in a uint32_t struct field,
Expand Down
65 changes: 65 additions & 0 deletions src/H5D.c
Original file line number Diff line number Diff line change
Expand Up @@ -1126,3 +1126,68 @@ H5Dget_chunk_info_by_coord(hid_t dset_id, const hsize_t *offset, unsigned *filte
done:
FUNC_LEAVE_API(ret_value)
} /* end H5Dget_chunk_info_by_coord() */

/*-------------------------------------------------------------------------
* Function: H5Dchunk_iter
*
* Purpose: Iterates over all chunks in dataset with given callback and user data.
*
* Parameters:
* hid_t dset_id; IN: Chunked dataset ID
* H5D_chunk_iter_op_t cb IN: User callback function, called for every chunk.
* void *op_data IN/OUT: Optional user data passed on to user callback.
*
* Callback information:
* H5D_chunk_iter_op_t is defined as:
*
* typedef int (*H5D_chunk_iter_op_t)(
* const hsize_t *offset,
* uint32_t filter_mask,
* haddr_t addr,
* uint32_t nbytes,
* void *op_data);
*
* H5D_chunk_iter_op_t parameters:
* hsize_t *offset; IN/OUT: Array of starting logical coordinates of chunk.
* uint32_t filter_mask; IN: Filter mask of chunk.
* haddr_t addr; IN: Offset in file of chunk data.
* uint32_t nbytes; IN: Size in number of bytes of chunk data in file.
* void *op_data; IN/OUT: Pointer to any user-defined data
* associated with the operation.
*
* The return values from an operator are:
* Zero (H5_ITER_CONT) causes the iterator to continue, returning zero when all
* elements have been processed.
* Positive (H5_ITER_STOP) causes the iterator to immediately return that positive
* value, indicating short-circuit success.
* Negative (H5_ITER_ERROR) causes the iterator to immediately return that value,
* indicating failure.
*
* Return: Non-negative on success, negative on failure
*
* Programmer: Gaute Hope
* August 2020
*
*-------------------------------------------------------------------------
*/
herr_t
H5Dchunk_iter(hid_t dset_id, H5D_chunk_iter_op_t cb, void *op_data)
{
H5VL_object_t *vol_obj = NULL; /* Dataset for this operation */
herr_t ret_value = SUCCEED;

FUNC_ENTER_API(FAIL)
H5TRACE3("e", "ix*x", dset_id, cb, op_data);

/* Check arguments */
if (NULL == (vol_obj = (H5VL_object_t *)H5I_object_verify(dset_id, H5I_DATASET)))
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "invalid dataset identifier")

/* Call private function to get the chunk info given the chunk's index */
if (H5VL_dataset_specific(vol_obj, H5VL_DATASET_CHUNK_ITER, H5P_DATASET_XFER_DEFAULT, H5_REQUEST_NULL, cb,
op_data) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "Can't iterate over chunks")

done:
FUNC_LEAVE_API(ret_value)
} /* end H5Dchunk_iter() */
98 changes: 98 additions & 0 deletions src/H5Dchunk.c
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,11 @@ typedef struct H5D_chunk_coll_info_t {
} H5D_chunk_coll_info_t;
#endif /* H5_HAVE_PARALLEL */

typedef struct H5D_chunk_iter_cb_data_t {
H5D_chunk_iter_op_t cb; /* User defined callback */
void * op_data; /* User data for user defined callback */
} H5D_chunk_iter_cb_data_t;

/********************/
/* Local Prototypes */
/********************/
Expand All @@ -269,6 +274,7 @@ static herr_t H5D__chunk_dest(H5D_t *dset);
static int H5D__get_num_chunks_cb(const H5D_chunk_rec_t *chunk_rec, void *_udata);
static int H5D__get_chunk_info_cb(const H5D_chunk_rec_t *chunk_rec, void *_udata);
static int H5D__get_chunk_info_by_coord_cb(const H5D_chunk_rec_t *chunk_rec, void *_udata);
static int H5D__chunk_iter_cb(const H5D_chunk_rec_t *chunk_rec, void *udata);

/* "Nonexistent" layout operation callback */
static ssize_t H5D__nonexistent_readvv(const H5D_io_info_t *io_info, size_t chunk_max_nseq,
Expand Down Expand Up @@ -7451,3 +7457,95 @@ H5D__get_chunk_info_by_coord(const H5D_t *dset, const hsize_t *offset, unsigned
done:
FUNC_LEAVE_NOAPI_TAG(ret_value)
} /* end H5D__get_chunk_info_by_coord() */

/*-------------------------------------------------------------------------
* Function: H5D__chunk_iter
*
* Purpose: Iterate over all the chunks in the dataset with given callbak.
*
* Return: Success: Non-negative
* Failure: Negative
*
* Programmer: Gaute Hope
* August 2020
*
*-------------------------------------------------------------------------
*/
herr_t
H5D__chunk_iter(const H5D_t *dset, H5D_chunk_iter_op_t cb, void *op_data)
{
const H5O_layout_t *layout = NULL; /* Dataset layout */
const H5D_rdcc_t * rdcc = NULL; /* Raw data chunk cache */
H5D_rdcc_ent_t * ent; /* Cache entry index */
H5D_chk_idx_info_t idx_info; /* Chunked index info */
herr_t ret_value = SUCCEED; /* Return value */

FUNC_ENTER_PACKAGE_TAG(dset->oloc.addr)

/* Check args */
HDassert(dset);
HDassert(dset->shared);

/* Get dataset layout and raw data chunk cache */
layout = &(dset->shared->layout);
rdcc = &(dset->shared->cache.chunk);
HDassert(layout);
HDassert(rdcc);
HDassert(H5D_CHUNKED == layout->type);

/* Search for cached chunks that haven't been written out */
for (ent = rdcc->head; ent; ent = ent->next)
/* Flush the chunk out to disk, to make certain the size is correct later */
if (H5D__chunk_flush_entry(dset, ent, FALSE) < 0)
HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "cannot flush indexed storage buffer")

/* Compose chunked index info struct */
idx_info.f = dset->oloc.file;
idx_info.pline = &dset->shared->dcpl_cache.pline;
idx_info.layout = &dset->shared->layout.u.chunk;
idx_info.storage = &dset->shared->layout.storage.u.chunk;

/* If the dataset is not written, return without errors */
if (H5F_addr_defined(idx_info.storage->idx_addr)) {
H5D_chunk_iter_cb_data_t data;
data.cb = cb;
data.op_data = op_data;

/* Iterate over the allocated chunks calling the iterator callback */
if ((dset->shared->layout.storage.u.chunk.ops->iterate)(&idx_info, H5D__chunk_iter_cb, &data) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "unable to iterate over chunks.")
} /* end if H5F_addr_defined */

done:
FUNC_LEAVE_NOAPI_TAG(ret_value)
} /* end H5D__chunk_iter() */

/*-------------------------------------------------------------------------
* Function: H5D__chunk_iter_cb
*
* Purpose: Call the user-defined function with the chunk data. The iterator continues if
* the user-defined function returns H5_ITER_CONT, and stops if H5_ITER_STOP is
* returned.
*
* Return: Success: H5_ITER_CONT or H5_ITER_STOP
* Failure: Negative (H5_ITER_ERROR)
*
* Programmer: Gaute Hope
* August 2020
*
*-------------------------------------------------------------------------
*/
static int
H5D__chunk_iter_cb(const H5D_chunk_rec_t *chunk_rec, void *udata)
{
int ret_value = 0;

FUNC_ENTER_STATIC_NOERR

const H5D_chunk_iter_cb_data_t *data = (H5D_chunk_iter_cb_data_t *)udata;

ret_value = (data->cb)(chunk_rec->scaled, chunk_rec->filter_mask, chunk_rec->chunk_addr,
chunk_rec->nbytes, data->op_data);

FUNC_LEAVE_NOAPI(ret_value)
} /* end H5D__chunk_iter_cb */
1 change: 1 addition & 0 deletions src/H5Dpkg.h
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,7 @@ H5_DLL herr_t H5D__get_chunk_info(const H5D_t *dset, const H5S_t *space, hsize_
unsigned *filter_mask, haddr_t *offset, hsize_t *size);
H5_DLL herr_t H5D__get_chunk_info_by_coord(const H5D_t *dset, const hsize_t *coord, unsigned *filter_mask,
haddr_t *addr, hsize_t *size);
H5_DLL herr_t H5D__chunk_iter(const H5D_t *dset, H5D_chunk_iter_op_t cb, void *op_data);
H5_DLL haddr_t H5D__get_offset(const H5D_t *dset);
H5_DLL herr_t H5D__vlen_get_buf_size(H5D_t *dset, hid_t type_id, hid_t space_id, hsize_t *size);
H5_DLL herr_t H5D__vlen_get_buf_size_gen(H5VL_object_t *vol_obj, hid_t type_id, hid_t space_id,
Expand Down
53 changes: 53 additions & 0 deletions src/H5Dpublic.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,14 @@ typedef herr_t (*H5D_scatter_func_t)(const void **src_buf /*out*/, size_t *src_b
typedef herr_t (*H5D_gather_func_t)(const void *dst_buf, size_t dst_buf_bytes_used, void *op_data);
//! <!-- [H5D_gather_func_t_snip] -->

//! <!-- [H5D_chunk_iter_op_t_snip] -->
/**
* Define the operator function pointer for H5Dchunk_iter()
*/
//! <!-- [H5D_chunk_iter_op_t_snip] -->
typedef int (*H5D_chunk_iter_op_t)(const hsize_t *offset, uint32_t filter_mask, haddr_t addr, uint32_t nbytes,
void *op_data);

/********************/
/* Public Variables */
/********************/
Expand Down Expand Up @@ -601,6 +609,51 @@ H5_DLL herr_t H5Dget_num_chunks(hid_t dset_id, hid_t fspace_id, hsize_t *nchunks
H5_DLL herr_t H5Dget_chunk_info_by_coord(hid_t dset_id, const hsize_t *offset, unsigned *filter_mask,
haddr_t *addr, hsize_t *size);

/**
* --------------------------------------------------------------------------
* \ingroup H5D
*
* \brief Iterate over all chunks
*
* \dset_id
* \param[in] cb User callback function, called for every chunk.
* \param[in] op_data User-defined pointer to data required by op
*
* \return \herr_t
*
* \details H5Dget_chunk_iter iterates over all chunks in the dataset, calling the
* user supplied callback with the details of the chunk and the supplied
* \p op_data.
*
* Callback information:
* H5D_chunk_iter_op_t is defined as:
*
* typedef int (*H5D_chunk_iter_op_t)(
* const hsize_t *offset,
* uint32_t filter_mask,
* haddr_t addr,
* uint32_t nbytes,
* void *op_data);
*
* H5D_chunk_iter_op_t parameters:
* hsize_t *offset; IN/OUT: Array of starting logical coordinates of chunk.
* uint32_t filter_mask; IN: Filter mask of chunk.
* haddr_t addr; IN: Offset in file of chunk data.
* uint32_t nbytes; IN: Size in number of bytes of chunk data in file.
* void *op_data; IN/OUT: Pointer to any user-defined data
* associated with the operation.
*
* The return values from an operator are:
* Zero (H5_ITER_CONT) causes the iterator to continue, returning zero when all
* elements have been processed.
* Positive (H5_ITER_STOP) causes the iterator to immediately return that positive
* value, indicating short-circuit success.
* Negative (H5_ITER_ERROR) causes the iterator to immediately return that value,
* indicating failure.
*
*/
H5_DLL herr_t H5Dchunk_iter(hid_t dset_id, H5D_chunk_iter_op_t cb, void *op_data);

/**
* --------------------------------------------------------------------------
* \ingroup H5D
Expand Down
4 changes: 3 additions & 1 deletion src/H5VLconnector.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,9 @@ typedef enum H5VL_dataset_get_t {
typedef enum H5VL_dataset_specific_t {
H5VL_DATASET_SET_EXTENT, /* H5Dset_extent */
H5VL_DATASET_FLUSH, /* H5Dflush */
H5VL_DATASET_REFRESH /* H5Drefresh */
H5VL_DATASET_REFRESH, /* H5Drefresh */
H5VL_DATASET_WAIT, /* H5Dwait */
H5VL_DATASET_CHUNK_ITER /* H5Dchunk_iter */
} H5VL_dataset_specific_t;

/* Typedef for VOL connector dataset optional VOL operations */
Expand Down
25 changes: 25 additions & 0 deletions src/H5VLnative_dataset.c
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,31 @@ H5VL__native_dataset_specific(void *obj, H5VL_dataset_specific_t specific_type,
break;
}

case H5VL_DATASET_WAIT: { /* H5Dwait */
/* The native VOL connector doesn't support asynchronous
* operations, so this is a no-op.
*/
break;
}

case H5VL_DATASET_CHUNK_ITER: { /* H5Dchunk_iter */
H5D_chunk_iter_op_t cb = HDva_arg(arguments, H5D_chunk_iter_op_t);
void * op_data = HDva_arg(arguments, void *);

HDassert(dset->shared);

/* Make sure the dataset is chunked */
if (H5D_CHUNKED != dset->shared->layout.type) {
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a chunked dataset")
}

/* Call private function */
if (H5D__chunk_iter(dset, cb, op_data) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't iterate over chunks")

break;
}

default:
HGOTO_ERROR(H5E_VOL, H5E_UNSUPPORTED, FAIL, "invalid specific operation")
} /* end switch */
Expand Down
Loading

0 comments on commit 53e1139

Please sign in to comment.