Skip to content

Commit

Permalink
Suppress filters on variables with non-fixed-size types.
Browse files Browse the repository at this point in the history
re: Discussion Unidata#2554
re: PR Unidata#2231
re: Issue Unidata#2189

After some discussion, the issue of applying filters on variables
whose type is not fixed size, was resolved as follows:
1. A call to nc_def_var_filter will ignore such filters, but will issue a log warning.
2. Loading (from an existing file) a variable whose type is not fixed-size and which has filters, will cause the variable to be suppressed.

This PR enforces those rules.

### Misc. Other changes
* Add a test case to test the vlen change.
* Make some minor clean-ups in various cmake and automake files.
* Remove unused test
  • Loading branch information
DennisHeimbigner committed Jun 21, 2023
1 parent 9a5a6aa commit 8cab468
Show file tree
Hide file tree
Showing 15 changed files with 541 additions and 160 deletions.
1 change: 1 addition & 0 deletions include/nc4internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,7 @@ extern int NC4_inq_type_fixed_size(int ncid, nc_type xtype, int* isfixedsizep);
/* Manage the fixed/var sized'ness of a type */
extern int NC4_recheck_varsize(NC_TYPE_INFO_T* parenttype, nc_type addedtype);
extern int NC4_set_varsize(NC_TYPE_INFO_T* parenttype);
extern int NC4_var_varsized(NC_VAR_INFO_T* var);

/* Close the file. */
extern int nc4_close_netcdf4_file(NC_FILE_INFO_T *h5, int abort, NC_memio *memio);
Expand Down
6 changes: 5 additions & 1 deletion libdispatch/dfilter.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "netcdf_filter.h"
#include "ncdispatch.h"
#include "nc4internal.h"
#include "nclog.h"

#ifdef USE_HDF5
#include "hdf5internal.h"
Expand Down Expand Up @@ -134,7 +135,10 @@ nc_def_var_filter(int ncid, int varid, unsigned int id, size_t nparams, const un
if((stat = nc_inq_vartype(ncid,varid,&xtype))) return stat;
/* If the variable's type is not fixed-size, then signal error */
if((stat = NC4_inq_type_fixed_size(ncid, xtype, &fixedsize))) return stat;
if(!fixedsize) return NC_EFILTER;
if(!fixedsize) {
nclog(NCLOGWARN,"Filters cannot be applied to variable length data types.");
return NC_NOERR; /* Deliberately suppress */
}
if((stat = ncp->dispatch->def_var_filter(ncid,varid,id,nparams,params))) goto done;
done:
return stat;
Expand Down
14 changes: 12 additions & 2 deletions libhdf5/hdf5open.c
Original file line number Diff line number Diff line change
Expand Up @@ -1061,6 +1061,7 @@ static int get_filter_info(hid_t propid, NC_VAR_INFO_T *var)
int f;
int stat = NC_NOERR;
NC_HDF5_VAR_INFO_T *hdf5_var;
int varsized = 0;

assert(var);

Expand All @@ -1070,12 +1071,16 @@ static int get_filter_info(hid_t propid, NC_VAR_INFO_T *var)
if ((num_filters = H5Pget_nfilters(propid)) < 0)
{stat = NC_EHDFERR; goto done;}

/* If the type of the variable is variable length, and
it has filters defined, suppress the variable. */
varsized = NC4_var_varsized(var);

for (f = 0; f < num_filters; f++)
{
int flags = 0;
htri_t avail = -1;
unsigned flags = 0;
cd_nelems = 0;
if ((filter = H5Pget_filter2(propid, f, NULL, &cd_nelems, NULL, 0, NULL, NULL)) < 0)
if ((filter = H5Pget_filter2(propid, f, &flags, &cd_nelems, NULL, 0, NULL, NULL)) < 0)
{stat = NC_ENOFILTER; goto done;} /* Assume this means an unknown filter */
if((avail = H5Zfilter_avail(filter)) < 0)
{stat = NC_EHDFERR; goto done;} /* Something in HDF5 went wrong */
Expand All @@ -1084,6 +1089,11 @@ static int get_filter_info(hid_t propid, NC_VAR_INFO_T *var)
/* mark variable as unreadable */
hdf5_var->flags |= NC_HDF5_VAR_FILTER_MISSING;
}
/* If variable type is varsized and filter is mandatory then this variable is unreadable */
if(varsized && (flags & H5Z_FLAG_MANDATORY) != 0) {
/* mark variable as unreadable */
hdf5_var->flags |= NC_HDF5_VAR_FILTER_MISSING;
}
if((cd_values = calloc(sizeof(unsigned int),cd_nelems))==NULL)
{stat = NC_ENOMEM; goto done;}
if ((filter = H5Pget_filter2(propid, f, NULL, &cd_nelems, cd_values, 0, NULL, NULL)) < 0)
Expand Down
72 changes: 46 additions & 26 deletions libnczarr/zclose.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,49 @@ zclose_gatts(NC_GRP_INFO_T* grp)
return stat;
}

/**
* @internal Close resources for single var
*
* @param var var to reclaim
*
* @return ::NC_NOERR No error.
* @author Dennis Heimbigner
*/
int
NCZ_zclose_var1(NC_VAR_INFO_T* var)
{
int stat = NC_NOERR;
NCZ_VAR_INFO_T* zvar;
NC_ATT_INFO_T* att;
int a;

assert(var && var->format_var_info);
zvar = var->format_var_info;;
for(a = 0; a < ncindexsize(var->att); a++) {
NCZ_ATT_INFO_T* zatt;
att = (NC_ATT_INFO_T*)ncindexith(var->att, a);
assert(att && att->format_att_info);
zatt = att->format_att_info;
nullfree(zatt);
att->format_att_info = NULL; /* avoid memory errors */
}
#ifdef ENABLE_NCZARR_FILTERS
/* Reclaim filters */
if(var->filters != NULL) {
(void)NCZ_filter_freelists(var);
}
var->filters = NULL;
#endif
/* Reclaim the type */
if(var->type_info) (void)zclose_type(var->type_info);
if(zvar->cache) NCZ_free_chunk_cache(zvar->cache);
/* reclaim xarray */
if(zvar->xarray) nclistfreeall(zvar->xarray);
nullfree(zvar);
var->format_var_info = NULL; /* avoid memory errors */
return stat;
}

/**
* @internal Close resources for vars in a group.
*
Expand All @@ -148,37 +191,14 @@ zclose_vars(NC_GRP_INFO_T* grp)
{
int stat = NC_NOERR;
NC_VAR_INFO_T* var;
NCZ_VAR_INFO_T* zvar;
NC_ATT_INFO_T* att;
int a, i;
int i;

for(i = 0; i < ncindexsize(grp->vars); i++) {
var = (NC_VAR_INFO_T*)ncindexith(grp->vars, i);
assert(var && var->format_var_info);
zvar = var->format_var_info;;
for(a = 0; a < ncindexsize(var->att); a++) {
NCZ_ATT_INFO_T* zatt;
att = (NC_ATT_INFO_T*)ncindexith(var->att, a);
assert(att && att->format_att_info);
zatt = att->format_att_info;
nullfree(zatt);
att->format_att_info = NULL; /* avoid memory errors */
}
#ifdef ENABLE_NCZARR_FILTERS
/* Reclaim filters */
if(var->filters != NULL) {
(void)NCZ_filter_freelists(var);
}
var->filters = NULL;
#endif
/* Reclaim the type */
if(var->type_info) (void)zclose_type(var->type_info);
if(zvar->cache) NCZ_free_chunk_cache(zvar->cache);
/* reclaim xarray */
if(zvar->xarray) nclistfreeall(zvar->xarray);
nullfree(zvar);
var->format_var_info = NULL; /* avoid memory errors */
if((stat = NCZ_zclose_var1(var))) goto done;
}
done:
return stat;
}

Expand Down
1 change: 1 addition & 0 deletions libnczarr/zinternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ int ncz_closeorabort(NC_FILE_INFO_T*, void* params, int abort);

/* zclose.c */
int ncz_close_ncz_file(NC_FILE_INFO_T* file, int abort);
int NCZ_zclose_var1(NC_VAR_INFO_T* var);

/* zattr.c */
int ncz_getattlist(NC_GRP_INFO_T *grp, int varid, NC_VAR_INFO_T **varp, NCindex **attlist);
Expand Down
21 changes: 18 additions & 3 deletions libnczarr/zsync.c
Original file line number Diff line number Diff line change
Expand Up @@ -1451,6 +1451,8 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
NCjson* jfilter = NULL;
int chainindex;
#endif
int varsized;
int suppressvar = 0; /* 1 => make this variable invisible */

ZTRACE(3,"file=%s grp=%s |varnames|=%u",file->controller->path,grp->hdr.name,nclistlength(varnames));

Expand Down Expand Up @@ -1533,6 +1535,9 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
}
}

/* See if this variable is variable sized */
varsized = NC4_var_varsized(var);

if(!purezarr) {
/* Extract the _NCZARR_ARRAY values */
/* Do this first so we know about storage esp. scalar */
Expand Down Expand Up @@ -1719,7 +1724,7 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
/* compressor key */
/* From V2 Spec: A JSON object identifying the primary compression codec and providing
configuration parameters, or ``null`` if no compressor is to be used. */
{
if(!varsized) { /* Only process if variable is fixed-size */
#ifdef ENABLE_NCZARR_FILTERS
if(var->filters == NULL) var->filters = (void*)nclistnew();
if((stat = NCZ_filter_initialize())) goto done;
Expand All @@ -1730,6 +1735,9 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
}
#endif
}
/* Suppress variable if there are filters and var is not fixed-size */
if(varsized && nclistlength((NClist*)var->filters) > 0)
suppressvar = 1;

if((stat = computedimrefs(file, var, purezarr, xarray, rank, dimnames, shapes, var->dim)))
goto done;
Expand All @@ -1741,9 +1749,16 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
}

#ifdef ENABLE_NCZARR_FILTERS
/* At this point, we can finalize the filters */
if((stat = NCZ_filter_setup(var))) goto done;
if(!suppressvar) {
/* At this point, we can finalize the filters */
if((stat = NCZ_filter_setup(var))) goto done;
}
#endif

if(suppressvar) {
if((stat = NCZ_zclose_var1(var))) goto done;
}

/* Clean up from last cycle */
nclistfreeall(dimnames); dimnames = nclistnew();
nullfree(varpath); varpath = NULL;
Expand Down
18 changes: 18 additions & 0 deletions libsrc4/nc4type.c
Original file line number Diff line number Diff line change
Expand Up @@ -799,3 +799,21 @@ NC4_set_varsize(NC_TYPE_INFO_T* typ)
done:
return stat;
}

/**
* Test if a variable's type is fixed sized or not.
* @param var - to test
* @return 0 if fixed size, 1 otherwise.
*/
int
NC4_var_varsized(NC_VAR_INFO_T* var)
{
NC_TYPE_INFO_T* vtype = NULL;

/* Check the variable type */
vtype = var->type_info;
if(vtype->hdr.id < NC_STRING) return 0;
if(vtype->hdr.id == NC_STRING) return 1;
return vtype->varsized;
}

9 changes: 7 additions & 2 deletions nc_test4/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,20 @@ IF(USE_HDF5 AND ENABLE_FILTER_TESTING)
build_bin_test(tst_multifilter)
build_bin_test(test_filter_order)
build_bin_test(test_filter_repeat)
build_bin_test(tst_filter_avail)
build_bin_test(test_filter_vlen)
build_bin_test(tst_filter_vlen)
ADD_SH_TEST(nc_test4 tst_filter)
ADD_SH_TEST(nc_test4 tst_specific_filters)
ADD_SH_TEST(nc_test4 tst_bloscfail)
ADD_SH_TEST(nc_test4 tst_filter_vlen)
IF(FALSE)
# This test is too dangerous to run in a parallel make environment.
# It causes race conditions. So suppress and only test by hand.
ADD_SH_TEST(nc_test4 tst_unknown)
# The tst_filterinstall test can only be run after an install
# occurred with --with-plugin-dir enabled. So there is no point
#in running it via make check. It is kept here so it can be
# manually invoked if desired
ADD_SH_TEST(nc_test4 tst_filterinstall)
ENDIF(FALSE)
ENDIF(USE_HDF5 AND ENABLE_FILTER_TESTING)

Expand Down
26 changes: 17 additions & 9 deletions nc_test4/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -82,16 +82,29 @@ if USE_HDF5
if ENABLE_FILTER_TESTING
extradir =
check_PROGRAMS += test_filter test_filter_misc test_filter_order test_filter_repeat test_filter_vlen
check_PROGRAMS += tst_multifilter tst_filter_avail
check_PROGRAMS += tst_multifilter tst_filter_vlen
TESTS += tst_filter.sh
TESTS += tst_specific_filters.sh
TESTS += tst_bloscfail.sh
TESTS += tst_filter_vlen.sh
if ISMINGW
XFAIL_TESTS += tst_filter.sh
endif # ISMINGW

if AX_MANUAL
# This test is too dangerous to run in a parallel make environment.
# It causes race conditions. So suppress and only test by hand.
#TESTS += tst_unknown.sh
TESTS += tst_unknown.sh
endif

if AX_MANUAL
# The tst_filterinstall test can only be run after an install
# occurred with --with-plugin-dir enabled. So there is no point
#in running it via make check. It is kept here so it can be
# manually invoked if desired
TESTS += tst_filterinstall.sh
endif

endif # ENABLE_FILTER_TESTING
endif # USE_HDF5
endif # BUILD_UTILITIES
Expand Down Expand Up @@ -120,13 +133,8 @@ ref_filter_order_create.txt ref_filter_order_read.txt \
ref_any.cdl tst_specific_filters.sh tst_unknown.sh \
tst_virtual_datasets.c noop1.cdl unknown.cdl \
tst_broken_files.c ref_bloscx.cdl tst_bloscfail.sh \
tst_fixedstring.sh ref_fixedstring.h5 ref_fixedstring.cdl

# The tst_filterinstall test can only be run after an install
# occurred with --with-plugin-dir enabled. So there is no point
#in running it via make check. It is kept here so it can be
# manually invoked if desired
EXTRA_DIST += tst_filterinstall.sh
tst_fixedstring.sh ref_fixedstring.h5 ref_fixedstring.cdl \
tst_filterinstall.sh tst_filter_vlen.sh

CLEANFILES = tst_mpi_parallel.bin cdm_sea_soundings.nc bm_chunking.nc \
tst_floats_1D.cdl floats_1D_3.nc floats_1D.cdl tst_*.nc tmp_*.txt \
Expand Down
Loading

0 comments on commit 8cab468

Please sign in to comment.