From 878dc465c3f0c615ea98c234b10f5d6cd2995039 Mon Sep 17 00:00:00 2001 From: Dennis Heimbigner Date: Mon, 19 Jun 2023 18:47:02 -0600 Subject: [PATCH 1/4] mam2 --- plugins/Makefile.am | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/plugins/Makefile.am b/plugins/Makefile.am index 5ad2a51435..0a6e50b750 100644 --- a/plugins/Makefile.am +++ b/plugins/Makefile.am @@ -39,6 +39,7 @@ plugins_to_install = # These libraries are for testing only check_LTLIBRARIES = +test_plugins = if ISMINGW LDADD = ${top_builddir}/liblib/libnetcdf.la @@ -81,7 +82,7 @@ endif endif # ENABLE_NCZARR_FILTERS -if ENABLE_PLUGINS +if ENABLE_NCZARR_FILTERS # The NCZarr codec libraries (they need libnetcdf) lib__nczstdfilters_la_SOURCES = NCZstdfilters.c @@ -102,8 +103,6 @@ lib__nch5zstd_la_SOURCES = H5Zzstd.c H5Zzstd.h plugins_to_install += lib__nch5zstd.la endif -endif #ENABLE_PLUGINS - # The noop filter is to allow testing of multifilters and filter order # Need two distinct instances lib__nch5noop_la_SOURCES = H5Znoop.c H5Zutil.c h5noop.h @@ -121,8 +120,8 @@ lib__nczmisc_la_LDFLAGS = $(AM_LDFLAGS) ${RPATH} lib__nch5unknown_la_SOURCES = H5Zunknown.c lib__nch5unknown_la_LDFLAGS = $(AM_LDFLAGS) ${RPATH} -check_LTLIBRARIES += lib__nch5noop.la lib__nch5noop1.la lib__nch5unknown.la -check_LTLIBRARIES += lib__nch5misc.la lib__nczmisc.la +test_plugins += lib__nch5noop.la lib__nch5noop1.la lib__nch5unknown.la +test_plugins += lib__nch5misc.la lib__nczmisc.la # Bzip2 is used to test more complex filters lib__nch5bzip2_la_SOURCES = H5Zbzip2.c h5bzip2.h @@ -133,12 +132,14 @@ lib__nch5bzip2_la_SOURCES += ${BZIP2SRC} endif plugins_to_install += lib__nch5bzip2.la +endif # ENABLE_NCZARR_FILTERS + endif #ENABLE_FILTER_TESTING if ENABLE_PLUGIN_DIR plugin_LTLIBRARIES += $(plugins_to_install) else -check_LTLIBRARIES += $(plugins_to_install) +check_LTLIBRARIES += $(plugins_to_install) $(test_libraries) endif BUILT_SOURCES = H5Znoop1.c From 9a5a6aa961cf9c7b624f263e79b311ba3065be55 Mon Sep 17 00:00:00 2001 From: Dennis Heimbigner Date: Mon, 19 Jun 2023 18:49:36 -0600 Subject: [PATCH 2/4] revert --- plugins/Makefile.am | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/plugins/Makefile.am b/plugins/Makefile.am index 0a6e50b750..5ad2a51435 100644 --- a/plugins/Makefile.am +++ b/plugins/Makefile.am @@ -39,7 +39,6 @@ plugins_to_install = # These libraries are for testing only check_LTLIBRARIES = -test_plugins = if ISMINGW LDADD = ${top_builddir}/liblib/libnetcdf.la @@ -82,7 +81,7 @@ endif endif # ENABLE_NCZARR_FILTERS -if ENABLE_NCZARR_FILTERS +if ENABLE_PLUGINS # The NCZarr codec libraries (they need libnetcdf) lib__nczstdfilters_la_SOURCES = NCZstdfilters.c @@ -103,6 +102,8 @@ lib__nch5zstd_la_SOURCES = H5Zzstd.c H5Zzstd.h plugins_to_install += lib__nch5zstd.la endif +endif #ENABLE_PLUGINS + # The noop filter is to allow testing of multifilters and filter order # Need two distinct instances lib__nch5noop_la_SOURCES = H5Znoop.c H5Zutil.c h5noop.h @@ -120,8 +121,8 @@ lib__nczmisc_la_LDFLAGS = $(AM_LDFLAGS) ${RPATH} lib__nch5unknown_la_SOURCES = H5Zunknown.c lib__nch5unknown_la_LDFLAGS = $(AM_LDFLAGS) ${RPATH} -test_plugins += lib__nch5noop.la lib__nch5noop1.la lib__nch5unknown.la -test_plugins += lib__nch5misc.la lib__nczmisc.la +check_LTLIBRARIES += lib__nch5noop.la lib__nch5noop1.la lib__nch5unknown.la +check_LTLIBRARIES += lib__nch5misc.la lib__nczmisc.la # Bzip2 is used to test more complex filters lib__nch5bzip2_la_SOURCES = H5Zbzip2.c h5bzip2.h @@ -132,14 +133,12 @@ lib__nch5bzip2_la_SOURCES += ${BZIP2SRC} endif plugins_to_install += lib__nch5bzip2.la -endif # ENABLE_NCZARR_FILTERS - endif #ENABLE_FILTER_TESTING if ENABLE_PLUGIN_DIR plugin_LTLIBRARIES += $(plugins_to_install) else -check_LTLIBRARIES += $(plugins_to_install) $(test_libraries) +check_LTLIBRARIES += $(plugins_to_install) endif BUILT_SOURCES = H5Znoop1.c From 8cab468169e3c2aa9bca25a2f9a046f4d2a38b83 Mon Sep 17 00:00:00 2001 From: Dennis Heimbigner Date: Wed, 21 Jun 2023 14:46:22 -0600 Subject: [PATCH 3/4] Suppress filters on variables with non-fixed-size types. re: Discussion https://github.com/Unidata/netcdf-c/discussions/2554 re: PR https://github.com/Unidata/netcdf-c/pull/2231 re: Issue https://github.com/Unidata/netcdf-c/issues/2189 After some discussion, the issue of applying filters on variables whose type is not fixed size, was resolved as follows: 1. A call to nc_def_var_filter will ignore such filters, but will issue a log warning. 2. Loading (from an existing file) a variable whose type is not fixed-size and which has filters, will cause the variable to be suppressed. This PR enforces those rules. ### Misc. Other changes * Add a test case to test the vlen change. * Make some minor clean-ups in various cmake and automake files. * Remove unused test --- include/nc4internal.h | 1 + libdispatch/dfilter.c | 6 +- libhdf5/hdf5open.c | 14 ++- libnczarr/zclose.c | 72 +++++++++------ libnczarr/zinternal.h | 1 + libnczarr/zsync.c | 21 ++++- libsrc4/nc4type.c | 18 ++++ nc_test4/CMakeLists.txt | 9 +- nc_test4/Makefile.am | 26 ++++-- nc_test4/tst_filter_avail.c | 106 ---------------------- nc_test4/tst_filter_misc.sh | 116 ++++++++++++++++++++++++ nc_test4/tst_filter_vlen.c | 172 ++++++++++++++++++++++++++++++++++++ nc_test4/tst_filter_vlen.sh | 100 +++++++++++++++++++++ nczarr_test/CMakeLists.txt | 19 ++-- nczarr_test/Makefile.am | 20 +++-- 15 files changed, 541 insertions(+), 160 deletions(-) delete mode 100644 nc_test4/tst_filter_avail.c create mode 100755 nc_test4/tst_filter_misc.sh create mode 100644 nc_test4/tst_filter_vlen.c create mode 100755 nc_test4/tst_filter_vlen.sh diff --git a/include/nc4internal.h b/include/nc4internal.h index 797fbd32d0..f9925b1dea 100644 --- a/include/nc4internal.h +++ b/include/nc4internal.h @@ -451,6 +451,7 @@ extern int NC4_inq_type_fixed_size(int ncid, nc_type xtype, int* isfixedsizep); /* Manage the fixed/var sized'ness of a type */ extern int NC4_recheck_varsize(NC_TYPE_INFO_T* parenttype, nc_type addedtype); extern int NC4_set_varsize(NC_TYPE_INFO_T* parenttype); +extern int NC4_var_varsized(NC_VAR_INFO_T* var); /* Close the file. */ extern int nc4_close_netcdf4_file(NC_FILE_INFO_T *h5, int abort, NC_memio *memio); diff --git a/libdispatch/dfilter.c b/libdispatch/dfilter.c index 3780019e69..7c2cfe8c07 100644 --- a/libdispatch/dfilter.c +++ b/libdispatch/dfilter.c @@ -19,6 +19,7 @@ #include "netcdf_filter.h" #include "ncdispatch.h" #include "nc4internal.h" +#include "nclog.h" #ifdef USE_HDF5 #include "hdf5internal.h" @@ -134,7 +135,10 @@ nc_def_var_filter(int ncid, int varid, unsigned int id, size_t nparams, const un if((stat = nc_inq_vartype(ncid,varid,&xtype))) return stat; /* If the variable's type is not fixed-size, then signal error */ if((stat = NC4_inq_type_fixed_size(ncid, xtype, &fixedsize))) return stat; - if(!fixedsize) return NC_EFILTER; + if(!fixedsize) { + nclog(NCLOGWARN,"Filters cannot be applied to variable length data types."); + return NC_NOERR; /* Deliberately suppress */ + } if((stat = ncp->dispatch->def_var_filter(ncid,varid,id,nparams,params))) goto done; done: return stat; diff --git a/libhdf5/hdf5open.c b/libhdf5/hdf5open.c index caeec0c1e5..b6e5607920 100644 --- a/libhdf5/hdf5open.c +++ b/libhdf5/hdf5open.c @@ -1061,6 +1061,7 @@ static int get_filter_info(hid_t propid, NC_VAR_INFO_T *var) int f; int stat = NC_NOERR; NC_HDF5_VAR_INFO_T *hdf5_var; + int varsized = 0; assert(var); @@ -1070,12 +1071,16 @@ static int get_filter_info(hid_t propid, NC_VAR_INFO_T *var) if ((num_filters = H5Pget_nfilters(propid)) < 0) {stat = NC_EHDFERR; goto done;} + /* If the type of the variable is variable length, and + it has filters defined, suppress the variable. */ + varsized = NC4_var_varsized(var); + for (f = 0; f < num_filters; f++) { - int flags = 0; htri_t avail = -1; + unsigned flags = 0; cd_nelems = 0; - if ((filter = H5Pget_filter2(propid, f, NULL, &cd_nelems, NULL, 0, NULL, NULL)) < 0) + if ((filter = H5Pget_filter2(propid, f, &flags, &cd_nelems, NULL, 0, NULL, NULL)) < 0) {stat = NC_ENOFILTER; goto done;} /* Assume this means an unknown filter */ if((avail = H5Zfilter_avail(filter)) < 0) {stat = NC_EHDFERR; goto done;} /* Something in HDF5 went wrong */ @@ -1084,6 +1089,11 @@ static int get_filter_info(hid_t propid, NC_VAR_INFO_T *var) /* mark variable as unreadable */ hdf5_var->flags |= NC_HDF5_VAR_FILTER_MISSING; } + /* If variable type is varsized and filter is mandatory then this variable is unreadable */ + if(varsized && (flags & H5Z_FLAG_MANDATORY) != 0) { + /* mark variable as unreadable */ + hdf5_var->flags |= NC_HDF5_VAR_FILTER_MISSING; + } if((cd_values = calloc(sizeof(unsigned int),cd_nelems))==NULL) {stat = NC_ENOMEM; goto done;} if ((filter = H5Pget_filter2(propid, f, NULL, &cd_nelems, cd_values, 0, NULL, NULL)) < 0) diff --git a/libnczarr/zclose.c b/libnczarr/zclose.c index cc8b4d0064..eda6fcc2b0 100644 --- a/libnczarr/zclose.c +++ b/libnczarr/zclose.c @@ -135,6 +135,49 @@ zclose_gatts(NC_GRP_INFO_T* grp) return stat; } +/** + * @internal Close resources for single var + * + * @param var var to reclaim + * + * @return ::NC_NOERR No error. + * @author Dennis Heimbigner + */ +int +NCZ_zclose_var1(NC_VAR_INFO_T* var) +{ + int stat = NC_NOERR; + NCZ_VAR_INFO_T* zvar; + NC_ATT_INFO_T* att; + int a; + + assert(var && var->format_var_info); + zvar = var->format_var_info;; + for(a = 0; a < ncindexsize(var->att); a++) { + NCZ_ATT_INFO_T* zatt; + att = (NC_ATT_INFO_T*)ncindexith(var->att, a); + assert(att && att->format_att_info); + zatt = att->format_att_info; + nullfree(zatt); + att->format_att_info = NULL; /* avoid memory errors */ + } +#ifdef ENABLE_NCZARR_FILTERS + /* Reclaim filters */ + if(var->filters != NULL) { + (void)NCZ_filter_freelists(var); + } + var->filters = NULL; +#endif + /* Reclaim the type */ + if(var->type_info) (void)zclose_type(var->type_info); + if(zvar->cache) NCZ_free_chunk_cache(zvar->cache); + /* reclaim xarray */ + if(zvar->xarray) nclistfreeall(zvar->xarray); + nullfree(zvar); + var->format_var_info = NULL; /* avoid memory errors */ + return stat; +} + /** * @internal Close resources for vars in a group. * @@ -148,37 +191,14 @@ zclose_vars(NC_GRP_INFO_T* grp) { int stat = NC_NOERR; NC_VAR_INFO_T* var; - NCZ_VAR_INFO_T* zvar; - NC_ATT_INFO_T* att; - int a, i; + int i; for(i = 0; i < ncindexsize(grp->vars); i++) { var = (NC_VAR_INFO_T*)ncindexith(grp->vars, i); assert(var && var->format_var_info); - zvar = var->format_var_info;; - for(a = 0; a < ncindexsize(var->att); a++) { - NCZ_ATT_INFO_T* zatt; - att = (NC_ATT_INFO_T*)ncindexith(var->att, a); - assert(att && att->format_att_info); - zatt = att->format_att_info; - nullfree(zatt); - att->format_att_info = NULL; /* avoid memory errors */ - } -#ifdef ENABLE_NCZARR_FILTERS - /* Reclaim filters */ - if(var->filters != NULL) { - (void)NCZ_filter_freelists(var); - } - var->filters = NULL; -#endif - /* Reclaim the type */ - if(var->type_info) (void)zclose_type(var->type_info); - if(zvar->cache) NCZ_free_chunk_cache(zvar->cache); - /* reclaim xarray */ - if(zvar->xarray) nclistfreeall(zvar->xarray); - nullfree(zvar); - var->format_var_info = NULL; /* avoid memory errors */ + if((stat = NCZ_zclose_var1(var))) goto done; } +done: return stat; } diff --git a/libnczarr/zinternal.h b/libnczarr/zinternal.h index c5fc008cef..4a441c49fd 100644 --- a/libnczarr/zinternal.h +++ b/libnczarr/zinternal.h @@ -253,6 +253,7 @@ int ncz_closeorabort(NC_FILE_INFO_T*, void* params, int abort); /* zclose.c */ int ncz_close_ncz_file(NC_FILE_INFO_T* file, int abort); +int NCZ_zclose_var1(NC_VAR_INFO_T* var); /* zattr.c */ int ncz_getattlist(NC_GRP_INFO_T *grp, int varid, NC_VAR_INFO_T **varp, NCindex **attlist); diff --git a/libnczarr/zsync.c b/libnczarr/zsync.c index 075d91dbf5..cf3b61ae60 100644 --- a/libnczarr/zsync.c +++ b/libnczarr/zsync.c @@ -1451,6 +1451,8 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) NCjson* jfilter = NULL; int chainindex; #endif + int varsized; + int suppressvar = 0; /* 1 => make this variable invisible */ ZTRACE(3,"file=%s grp=%s |varnames|=%u",file->controller->path,grp->hdr.name,nclistlength(varnames)); @@ -1533,6 +1535,9 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) } } + /* See if this variable is variable sized */ + varsized = NC4_var_varsized(var); + if(!purezarr) { /* Extract the _NCZARR_ARRAY values */ /* Do this first so we know about storage esp. scalar */ @@ -1719,7 +1724,7 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) /* compressor key */ /* From V2 Spec: A JSON object identifying the primary compression codec and providing configuration parameters, or ``null`` if no compressor is to be used. */ - { + if(!varsized) { /* Only process if variable is fixed-size */ #ifdef ENABLE_NCZARR_FILTERS if(var->filters == NULL) var->filters = (void*)nclistnew(); if((stat = NCZ_filter_initialize())) goto done; @@ -1730,6 +1735,9 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) } #endif } + /* Suppress variable if there are filters and var is not fixed-size */ + if(varsized && nclistlength((NClist*)var->filters) > 0) + suppressvar = 1; if((stat = computedimrefs(file, var, purezarr, xarray, rank, dimnames, shapes, var->dim))) goto done; @@ -1741,9 +1749,16 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) } #ifdef ENABLE_NCZARR_FILTERS - /* At this point, we can finalize the filters */ - if((stat = NCZ_filter_setup(var))) goto done; + if(!suppressvar) { + /* At this point, we can finalize the filters */ + if((stat = NCZ_filter_setup(var))) goto done; + } #endif + + if(suppressvar) { + if((stat = NCZ_zclose_var1(var))) goto done; + } + /* Clean up from last cycle */ nclistfreeall(dimnames); dimnames = nclistnew(); nullfree(varpath); varpath = NULL; diff --git a/libsrc4/nc4type.c b/libsrc4/nc4type.c index f0b827b0e2..1aef7ea0e7 100644 --- a/libsrc4/nc4type.c +++ b/libsrc4/nc4type.c @@ -799,3 +799,21 @@ NC4_set_varsize(NC_TYPE_INFO_T* typ) done: return stat; } + +/** + * Test if a variable's type is fixed sized or not. + * @param var - to test + * @return 0 if fixed size, 1 otherwise. + */ +int +NC4_var_varsized(NC_VAR_INFO_T* var) +{ + NC_TYPE_INFO_T* vtype = NULL; + + /* Check the variable type */ + vtype = var->type_info; + if(vtype->hdr.id < NC_STRING) return 0; + if(vtype->hdr.id == NC_STRING) return 1; + return vtype->varsized; +} + diff --git a/nc_test4/CMakeLists.txt b/nc_test4/CMakeLists.txt index 08c4e27d5e..7b98fc4f4a 100644 --- a/nc_test4/CMakeLists.txt +++ b/nc_test4/CMakeLists.txt @@ -48,15 +48,20 @@ IF(USE_HDF5 AND ENABLE_FILTER_TESTING) build_bin_test(tst_multifilter) build_bin_test(test_filter_order) build_bin_test(test_filter_repeat) - build_bin_test(tst_filter_avail) - build_bin_test(test_filter_vlen) + build_bin_test(tst_filter_vlen) ADD_SH_TEST(nc_test4 tst_filter) ADD_SH_TEST(nc_test4 tst_specific_filters) ADD_SH_TEST(nc_test4 tst_bloscfail) + ADD_SH_TEST(nc_test4 tst_filter_vlen) IF(FALSE) # This test is too dangerous to run in a parallel make environment. # It causes race conditions. So suppress and only test by hand. ADD_SH_TEST(nc_test4 tst_unknown) + # The tst_filterinstall test can only be run after an install + # occurred with --with-plugin-dir enabled. So there is no point + #in running it via make check. It is kept here so it can be + # manually invoked if desired + ADD_SH_TEST(nc_test4 tst_filterinstall) ENDIF(FALSE) ENDIF(USE_HDF5 AND ENABLE_FILTER_TESTING) diff --git a/nc_test4/Makefile.am b/nc_test4/Makefile.am index feed5c204e..6af747dcfd 100644 --- a/nc_test4/Makefile.am +++ b/nc_test4/Makefile.am @@ -82,16 +82,29 @@ if USE_HDF5 if ENABLE_FILTER_TESTING extradir = check_PROGRAMS += test_filter test_filter_misc test_filter_order test_filter_repeat test_filter_vlen -check_PROGRAMS += tst_multifilter tst_filter_avail +check_PROGRAMS += tst_multifilter tst_filter_vlen TESTS += tst_filter.sh TESTS += tst_specific_filters.sh TESTS += tst_bloscfail.sh +TESTS += tst_filter_vlen.sh if ISMINGW XFAIL_TESTS += tst_filter.sh endif # ISMINGW + +if AX_MANUAL # This test is too dangerous to run in a parallel make environment. # It causes race conditions. So suppress and only test by hand. -#TESTS += tst_unknown.sh +TESTS += tst_unknown.sh +endif + +if AX_MANUAL +# The tst_filterinstall test can only be run after an install +# occurred with --with-plugin-dir enabled. So there is no point +#in running it via make check. It is kept here so it can be +# manually invoked if desired +TESTS += tst_filterinstall.sh +endif + endif # ENABLE_FILTER_TESTING endif # USE_HDF5 endif # BUILD_UTILITIES @@ -120,13 +133,8 @@ ref_filter_order_create.txt ref_filter_order_read.txt \ ref_any.cdl tst_specific_filters.sh tst_unknown.sh \ tst_virtual_datasets.c noop1.cdl unknown.cdl \ tst_broken_files.c ref_bloscx.cdl tst_bloscfail.sh \ -tst_fixedstring.sh ref_fixedstring.h5 ref_fixedstring.cdl - -# The tst_filterinstall test can only be run after an install -# occurred with --with-plugin-dir enabled. So there is no point -#in running it via make check. It is kept here so it can be -# manually invoked if desired -EXTRA_DIST += tst_filterinstall.sh +tst_fixedstring.sh ref_fixedstring.h5 ref_fixedstring.cdl \ +tst_filterinstall.sh tst_filter_vlen.sh CLEANFILES = tst_mpi_parallel.bin cdm_sea_soundings.nc bm_chunking.nc \ tst_floats_1D.cdl floats_1D_3.nc floats_1D.cdl tst_*.nc tmp_*.txt \ diff --git a/nc_test4/tst_filter_avail.c b/nc_test4/tst_filter_avail.c deleted file mode 100644 index 39fa323f95..0000000000 --- a/nc_test4/tst_filter_avail.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - Copyright 2018, UCAR/Unidata - See COPYRIGHT file for copying and redistribution conditions. -*/ - -#include "config.h" -#include -#include -#include - -#ifdef USE_HDF5 -#include -#endif - -#include "netcdf.h" -#include "netcdf_aux.h" -#include "netcdf_filter.h" - -#undef DEBUG - -#define MAXPARAMS 32 - -#ifdef TESTNCZARR -#define DFALT_TESTFILE "file://tmp_filter_avail.file#mode=nczarr,file" -#else -#define DFALT_TESTFILE "tmp_filter_avail.nc" -#endif - -static const char* testfile = NULL; -static int nerrs = 0; - -static int ncid; - -/* Forward */ -static int test_test1(void); -static void init(int argc, char** argv); - -#define ERRR do { \ -fflush(stdout); /* Make sure our stdout is synced with stderr. */ \ -fprintf(stderr, "Sorry! Unexpected result, %s, line: %d\n", \ - __FILE__, __LINE__); \ -nerrs++;\ -} while (0) - -static int -check(int err,int line) -{ - if(err != NC_NOERR) { - fprintf(stderr,"fail (%d): %s\n",line,nc_strerror(err)); - } - return NC_NOERR; -} - -#define CHECK(x) check(x,__LINE__) - -static int -test_test1(void) -{ - int stat = NC_NOERR; - - printf("test1: bzip2 availability\n"); - CHECK(nc_create(testfile,NC_NETCDF4|NC_CLOBBER,&ncid)); - CHECK(nc_enddef(ncid)); - switch (stat = nc_inq_filter_avail(ncid,H5Z_FILTER_BZIP2)) { - case NC_NOERR: break; - case NC_ENOFILTER: break; - default: CHECK(stat); goto done; - } - if(stat == NC_ENOFILTER) { - printf("*** FAIL: filter %d not available\n",H5Z_FILTER_BZIP2); - } else { - printf("*** PASS: filter %d available\n",H5Z_FILTER_BZIP2); - } - - CHECK(nc_abort(ncid)); -done: - return stat; -} - -/**************************************************/ -/* Utilities */ - -static void -init(int argc, char** argv) -{ - /* get the testfile path */ - if(argc > 1) - testfile = argv[1]; - else - testfile = DFALT_TESTFILE; -} - -/**************************************************/ -int -main(int argc, char **argv) -{ -#ifdef USE_HDF5 -#ifdef DEBUG - H5Eprint1(stderr); - nc_set_log_level(1); -#endif -#endif - init(argc,argv); - if(test_test1() != NC_NOERR) ERRR; - exit(nerrs > 0?1:0); -} diff --git a/nc_test4/tst_filter_misc.sh b/nc_test4/tst_filter_misc.sh new file mode 100755 index 0000000000..7e8ad8bef8 --- /dev/null +++ b/nc_test4/tst_filter_misc.sh @@ -0,0 +1,116 @@ +#!/bin/bash + +# Test the filter install +# This cannot be run as a regular test +# because installation will not have occurred + +if test "x$srcdir" = x ; then srcdir=`pwd`; fi +. ../test_common.sh + +if test "x$TESTNCZARR" = x1; then +. $srcdir/test_nczarr.sh +fi + +set -e + +isolate "testdir_filter_misc" +THISDIR=`pwd` +cd $ISOPATH + +if test "x$TESTNCZARR" = x1; then +s3isolate +fi + +# Load the findplugins function +. ${builddir}/findplugin.sh +echo "findplugin.sh loaded" + +if ! filteravail bzip2 ; then + echo ">>> Filter bzip2 not available; discontinuing test" + exit 0; +fi + +# Function to remove selected -s attributes from file; +# These attributes might be platform dependent +sclean() { + cat $1 \ + | sed -e '/:_IsNetcdf4/d' \ + | sed -e '/:_Endianness/d' \ + | sed -e '/_NCProperties/d' \ + | sed -e '/_SuperblockVersion/d' \ + | sed -e '/_Format/d' \ + | sed -e '/global attributes:/d' \ + | cat > $2 +} + +# Function to extract _Filter attribute from a file +# These attributes might be platform dependent +getfilterattr() { +V="$1" +sed -e '/${V}.*:_Filter/p' -ed <$2 >$3 +} + +# Function to extract _Codecs attribute from a file +# These attributes might be platform dependent +getcodecsattr() { +V="$1" +sed -e '/${V}.*:_Codecs/p' -ed <$2 >$3 +} + +trimleft() { +sed -e 's/[ ]*\([^ ].*\)/\1/' <$1 >$2 +} + +setfilter() { + FF="$1" + FSRC="$2" + FDST="$3" + FIH5="$4" + FICX="$5" + FFH5="$6" + FFCX="$7" + if test "x$FFH5" = x ; then FFH5="$FIH5" ; fi + if test "x$FFCX" = x ; then FFCX="$FICX" ; fi + rm -f $FDST + cat ${srcdir}/$FSRC \ + | sed -e "s/ref_any/${FF}/" \ + | sed -e "s/IH5/${FIH5}/" -e "s/FH5/${FFH5}/" \ + | sed -e "s/ICX/${FICX}/" -e "s/FCX/${FFCX}/" \ + | sed -e 's/"/\\"/g' -e 's/@/"/g' \ + | cat > $FDST +} + +# Execute the specified tests + +testavail() { + zext=$1 + if ! filteravail bzip2; then return 0; fi + if test "x$TESTNCZARR" = x1 ; then + ${execdir}/test_filter_avail + else + ${execdir}/tst_filter_avail + fi +} + +testvlen() { + zext=$1 + if test "x$TESTNCZARR" = x1 ; then + ${execdir}/test_filter_vlen + else + ${execdir}/tst_filter_vlen + fi +} + +testset() { +# Which test cases to exercise + testavail $1 + testvlen $1 +} + +if test "x$TESTNCZARR" = x1 ; then + testset file + if test "x$FEATURE_NCZARR_ZIP" = xyes ; then testset zip ; fi + if test "x$FEATURE_S3TESTS" = xyes ; then testset s3 ; fi +else + testset nc +fi diff --git a/nc_test4/tst_filter_vlen.c b/nc_test4/tst_filter_vlen.c new file mode 100644 index 0000000000..0248623963 --- /dev/null +++ b/nc_test4/tst_filter_vlen.c @@ -0,0 +1,172 @@ +/* + Copyright 2018, UCAR/Unidata + See COPYRIGHT file for copying and redistribution conditions. +*/ + +#include "config.h" +#include +#include +#include + +#ifdef HAVE_HDF5_H +#include +#endif +#include "netcdf.h" +#include "netcdf_aux.h" +#include "netcdf_filter.h" + +#undef DEBUG + +#ifndef H5Z_FILTER_BZIP2 +#define H5Z_FILTER_BZIP2 307 +#endif + +#define TEST_ID 32768 + +#define MAXERRS 8 + +#define MAXPARAMS 32 + +#define NPARAMS 14 + +static const char* testfile = NULL; + +#define MAXDIMS 8 + +#ifdef TESTNCZARR +#define DFALT_TESTFILE "file://tmp_filter_vlen.nc#mode=nczarr,file" +#else +#define DFALT_TESTFILE "tmp_filter_vlen.nc" +#endif + +#define NDIMS 4 +static size_t dimsize[NDIMS] = {4,4,4,4}; + +static size_t ndims = NDIMS; + +static size_t totalproduct = 1; /* x-product over max dims */ +static size_t actualproduct = 1; /* x-product over actualdims */ + +static int nerrs = 0; + +static int ncid, varid; +static int dimids[MAXDIMS]; +static float* array = NULL; +static float* expected = NULL; + +/* Forward */ +static int test_test1(void); +static void init(int argc, char** argv); +static void reset(void); + +#define ERRR do { \ +fflush(stdout); /* Make sure our stdout is synced with stderr. */ \ +fprintf(stderr, "Sorry! Unexpected result, %s, line: %d\n", \ + __FILE__, __LINE__); \ +nerrs++;\ +} while (0) + +static int +check(int err,int line) +{ + if(err != NC_NOERR) { + fprintf(stderr,"fail (%d): %s\n",line,nc_strerror(err)); + } + return NC_NOERR; +} +#define CHECK(x) check(x,__LINE__) + +static int +create(void) +{ + /* Create a file with one variable */ + CHECK(nc_create(testfile, NC_NETCDF4|NC_CLOBBER, &ncid)); + CHECK(nc_set_fill(ncid, NC_NOFILL, NULL)); + return NC_NOERR; +} + +static int +defvar(nc_type xtype) +{ + size_t i; + + /* Create a file with one variable-sized variable */ + for(i=0;i 0) { + fprintf(stderr,"*** id=%d\n",id); + ok = 0; + } + CHECK(nc_abort(ncid)); + return ok; +} + +/**************************************************/ +/* Utilities */ + +static void +reset() +{ + memset(array,0,sizeof(float)*actualproduct); +} + +static void +init(int argc, char** argv) +{ + size_t i; + + /* get the testfile path */ + if(argc > 1) + testfile = argv[1]; + else + testfile = DFALT_TESTFILE; + + /* Setup various variables */ + totalproduct = 1; + actualproduct = 1; + for(i=0;i 0 ? "FAILED" : "PASS")); + exit(nerrs > 0?1:0); +} diff --git a/nc_test4/tst_filter_vlen.sh b/nc_test4/tst_filter_vlen.sh new file mode 100755 index 0000000000..03beb080a5 --- /dev/null +++ b/nc_test4/tst_filter_vlen.sh @@ -0,0 +1,100 @@ +#!/bin/bash + +# Test the filter install +# This cannot be run as a regular test +# because installation will not have occurred + +if test "x$srcdir" = x ; then srcdir=`pwd`; fi +. ../test_common.sh + +if test "x$TESTNCZARR" = x1; then +. $srcdir/test_nczarr.sh +fi + +set -e + +isolate "testdir_filter_vlen" +THISDIR=`pwd` +cd $ISOPATH + +if test "x$TESTNCZARR" = x1; then +s3isolate +fi + +# Load the findplugins function +. ${builddir}/findplugin.sh +echo "findplugin.sh loaded" + +# Function to remove selected -s attributes from file; +# These attributes might be platform dependent +sclean() { + cat $1 \ + | sed -e '/:_IsNetcdf4/d' \ + | sed -e '/:_Endianness/d' \ + | sed -e '/_NCProperties/d' \ + | sed -e '/_SuperblockVersion/d' \ + | sed -e '/_Format/d' \ + | sed -e '/global attributes:/d' \ + | cat > $2 +} + +# Function to extract _Filter attribute from a file +# These attributes might be platform dependent +getfilterattr() { +V="$1" +sed -e '/${V}.*:_Filter/p' -ed <$2 >$3 +} + +# Function to extract _Codecs attribute from a file +# These attributes might be platform dependent +getcodecsattr() { +V="$1" +sed -e '/${V}.*:_Codecs/p' -ed <$2 >$3 +} + +trimleft() { +sed -e 's/[ ]*\([^ ].*\)/\1/' <$1 >$2 +} + +setfilter() { + FF="$1" + FSRC="$2" + FDST="$3" + FIH5="$4" + FICX="$5" + FFH5="$6" + FFCX="$7" + if test "x$FFH5" = x ; then FFH5="$FIH5" ; fi + if test "x$FFCX" = x ; then FFCX="$FICX" ; fi + rm -f $FDST + cat ${srcdir}/$FSRC \ + | sed -e "s/ref_any/${FF}/" \ + | sed -e "s/IH5/${FIH5}/" -e "s/FH5/${FFH5}/" \ + | sed -e "s/ICX/${FICX}/" -e "s/FCX/${FFCX}/" \ + | sed -e 's/"/\\"/g' -e 's/@/"/g' \ + | cat > $FDST +} + +# Execute the specified tests + +testvlen() { + zext=$1 + if test "x$TESTNCZARR" = x1 ; then + ${execdir}/test_filter_vlen + else + ${execdir}/tst_filter_vlen + fi +} + +testset() { +# Which test cases to exercise + testvlen $1 +} + +if test "x$TESTNCZARR" = x1 ; then + testset file + if test "x$FEATURE_NCZARR_ZIP" = xyes ; then testset zip ; fi + if test "x$FEATURE_S3TESTS" = xyes ; then testset s3 ; fi +else + testset nc +fi diff --git a/nczarr_test/CMakeLists.txt b/nczarr_test/CMakeLists.txt index d98ef4aa54..dc66e96bba 100644 --- a/nczarr_test/CMakeLists.txt +++ b/nczarr_test/CMakeLists.txt @@ -13,9 +13,9 @@ FILE(READ ${CMAKE_CURRENT_SOURCE_DIR}/../nc_test4/tst_quantize.c QSOURCE) FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_quantize.c "#define TESTNCZARR\n") FILE(APPEND ${CMAKE_CURRENT_BINARY_DIR}/test_quantize.c "${QSOURCE}") -FILE(READ ${CMAKE_CURRENT_SOURCE_DIR}/../nc_test4/tst_filter_avail.c ASOURCE) +FILE(READ ${CMAKE_CURRENT_SOURCE_DIR}/../nc_test4/tst_filter_vlen.c ASOURCE) STRING(PREPEND ASOURCE "#define TESTNCZARR\n") -FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_filter_avail.c "${ASOURCE}") +FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_filter_vlen.c "${ASOURCE}") FILE(READ ${CMAKE_CURRENT_SOURCE_DIR}/../nc_test4/tst_specific_filters.sh SPSOURCE) STRING(PREPEND SPSOURCE "#!/bin/bash\n") @@ -35,6 +35,14 @@ FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/run_unknown.1 "${SPSOURCE}") CONFIGURE_FILE(${CMAKE_CURRENT_BINARY_DIR}/run_unknown.1 ${CMAKE_CURRENT_BINARY_DIR}/run_unknown.sh FILE_PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE @ONLY NEWLINE_STYLE LF) FILE(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/run_unknown.1) +FILE(READ ${CMAKE_CURRENT_SOURCE_DIR}/../nc_test4/tst_filter_vlen.sh SPSOURCE) +STRING(PREPEND SPSOURCE "#!/bin/bash\n") +STRING(PREPEND SPSOURCE "TESTNCZARR=1\n") +# Replace with FILE(CONFIGURE) when cmake 3.18 is in common use +FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/run_filter_vlen.1 "${SPSOURCE}") +CONFIGURE_FILE(${CMAKE_CURRENT_BINARY_DIR}/run_filter_vlen.1 ${CMAKE_CURRENT_BINARY_DIR}/run_filter_vlen.sh FILE_PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE @ONLY NEWLINE_STYLE LF) +FILE(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/run_filter_vlen.1) + FILE(GLOB COPY_FILES ${CMAKE_CURRENT_SOURCE_DIR}/*.sh ${CMAKE_CURRENT_SOURCE_DIR}/ref*.cdl ${CMAKE_CURRENT_SOURCE_DIR}/ref*.txt @@ -139,14 +147,13 @@ IF(ENABLE_TESTS) build_bin_test(testfilter_multi) build_bin_test(testfilter_order) build_bin_test(testfilter_repeat) - build_bin_test(test_filter_avail) ADD_SH_TEST(nczarr_test run_nczfilter) ADD_SH_TEST(nczarr_test run_filter) ADD_SH_TEST(nczarr_test run_specific_filters) IF(FALSE) - # This test is too dangerous to run in a parallel make environment. - # It causes race conditions. So suppress and only test by hand. - ADD_SH_TEST(nczarr_test run_unknown) + # This test is too dangerous to run in a parallel make environment. + # It causes race conditions. So suppress and only test by hand. + ADD_SH_TEST(nczarr_test run_unknown) ENDIF(FALSE) ENDIF(ENABLE_FILTER_TESTING) if(ENABLE_NCZARR_ZIP) diff --git a/nczarr_test/Makefile.am b/nczarr_test/Makefile.am index de0a66a637..7b972fe3e4 100644 --- a/nczarr_test/Makefile.am +++ b/nczarr_test/Makefile.am @@ -96,7 +96,7 @@ check_PROGRAMS += tst_nczfilter TESTS += run_nczfilter.sh # Echo filter tests from nc_test4 -check_PROGRAMS += testfilter testfilter_misc testfilter_order testfilter_repeat testfilter_multi test_filter_avail +check_PROGRAMS += testfilter testfilter_misc testfilter_order testfilter_repeat testfilter_multi TESTS += run_filter.sh TESTS += run_specific_filters.sh @@ -170,16 +170,17 @@ ref_zarr_test_data.cdl.gz CLEANFILES = ut_*.txt ut*.cdl tmp*.nc tmp*.cdl tmp*.txt tmp*.dmp tmp*.zip tmp*.nc tmp*.dump tmp*.tmp tmp*.zmap tmp_ngc.c ref_zarr_test_data.cdl tst_*.nc.zip ref_quotes.zip ref_power_901_constants.zip -BUILT_SOURCES = test_quantize.c test_filter_avail.c run_specific_filters.sh run_filterinstall.sh run_unknown.sh +BUILT_SOURCES = test_quantize.c run_specific_filters.sh run_filterinstall.sh run_unknown.sh test_filter_vlen.c run_filter_vlen.sh + test_quantize.c: $(top_srcdir)/nc_test4/tst_quantize.c rm -f $@ echo "#define TESTNCZARR" > $@ cat $(top_srcdir)/nc_test4/tst_quantize.c >> $@ -test_filter_avail.c: $(top_srcdir)/nc_test4/tst_filter_avail.c +test_filter_vlen.c: $(top_srcdir)/nc_test4/tst_filter_vlen.c rm -f $@ echo "#define TESTNCZARR" > $@ - cat $(top_srcdir)/nc_test4/tst_filter_avail.c >> $@ + cat $(top_srcdir)/nc_test4/tst_filter_vlen.c >> $@ run_unknown.sh: $(top_srcdir)/nc_test4/tst_unknown.sh rm -f $@ run_unknown.tmp @@ -199,6 +200,15 @@ run_specific_filters.sh: $(top_srcdir)/nc_test4/tst_specific_filters.sh chmod a+x $@ rm -f run_specific_filters.tmp +run_filter_vlen.sh: $(top_srcdir)/nc_test4/tst_filter_vlen.sh + rm -f $@ run_filter_vlen.tmp + echo "#!/bin/bash" > run_filter_vlen.tmp + echo "TESTNCZARR=1" >> run_filter_vlen.tmp + cat $(top_srcdir)/nc_test4/tst_filter_vlen.sh >> run_filter_vlen.tmp + tr -d '\r' < run_filter_vlen.tmp > $@ + chmod a+x $@ + rm -f run_filter_vlen.tmp + run_filterinstall.sh: $(top_srcdir)/nc_test4/tst_filterinstall.sh rm -f $@ run_filterinstall.tmp echo "#!/bin/bash" > run_filterinstall.tmp @@ -215,7 +225,7 @@ clean-local: rm -fr rcmiscdir ref_power_901_constants.file -DISTCLEANFILES = findplugin.sh test_quantize.c run_specific_filters.sh run_filterinstall.sh run_unknown.sh test_filter_avail.c +DISTCLEANFILES = findplugin.sh test_quantize.c run_specific_filters.sh run_filterinstall.sh run_unknown.sh test_filter_vlen.c run_filter_vlen.sh # Provide a specific s3 cleanup action s3cleanup:: From fb422e696b1d1f8ebddad8a54c66ba9bde84bb85 Mon Sep 17 00:00:00 2001 From: Dennis Heimbigner Date: Fri, 23 Jun 2023 13:42:16 -0600 Subject: [PATCH 4/4] Update docs/filters.md and RELEASENOTES.md --- RELEASE_NOTES.md | 1 + docs/filters.md | 65 ++++++++++++++++++++++++++++++------------------ 2 files changed, 42 insertions(+), 24 deletions(-) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index ef88feb4ac..1ab194fc0b 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -7,6 +7,7 @@ This file contains a high-level description of this package's evolution. Release ## 4.9.3 - TBD +* Suppress filters on variables with non-fixed-size types. See [Github #2716](https://github.com/Unidata/netcdf-c/pull/2716). * Provide a single option to disable all network access and testing. See [Github #2708](https://github.com/Unidata/netcdf-c/pull/2708). * Fix some problems with earthdata authorization and data access. See [Github #2709](https://github.com/Unidata/netcdf-c/pull/2709). * Fix a race condition in some ncdump tests. See [Github #2682](https://github.com/Unidata/netcdf-c/pull/2682). diff --git a/docs/filters.md b/docs/filters.md index e68fcd4e2d..995544b82d 100644 --- a/docs/filters.md +++ b/docs/filters.md @@ -33,6 +33,9 @@ The most common kind of filter is a compression-decompression filter, and that is the focus of this document. But non-compression filters – fletcher32, for example – also exist. +This document describes the support for HDF5 filters and also +the newly added support for NCZarr filters. + The netCDF enhanced (aka netCDF-4) library inherits this capability since it depends on the HDF5 library. The HDF5 library (1.8.11 and later) supports filters, and netCDF is based @@ -45,30 +48,22 @@ multiple filters are defined on a variable, they are applied in first-defined order on writing and on the reverse order when reading. -This document describes the support for HDF5 filters and also -the newly added support for NCZarr filters. - -## A Warning on Backward Compatibility {#filters_compatibility} - -The API defined in this document should accurately reflect the -current state of filters in the netCDF-c library. Be aware that -there was a short period in which the filter code was undergoing -some revision and extension. Those extensions have largely been -reverted. Unfortunately, some users may experience some -compilation problems for previously working code because of -these reversions. In that case, please revise your code to -adhere to this document. Apologies are extended for any -inconvenience. - -A user may encounter an incompatibility if any of the following appears in user code. - -* The function *\_nc\_inq\_var\_filter* was returning the error value NC\_ENOFILTER if a variable had no associated filters. - It has been reverted to the previous case where it returns NC\_NOERR and the returned filter id was set to zero if the variable had no filters. -* The function *nc\_inq\_var\_filterids* was renamed to *nc\_inq\_var\_filter\_ids*. -* Some auxilliary functions for parsing textual filter specifications have been moved to the file *netcdf\_aux.h*. See [Appendix A](#filters_appendixa). -* All of the "filterx" functions have been removed. This is unlikely to cause problems because they had limited visibility. - -For additional information, see [Appendix B](#filters_appendixb). +There is an important "caveat" with respect to filters +and their application to variables. +If the type of the variable is variable-sized, then attempts +to define a filter on such a variable will not be allowed. +In this case, the call to *nc\_def\_var\_filter* will succeed +but the filter will be suppressed and a warning will be logged. +Similarly, if an existing file is opened, and there is a +variable-sized variable with a filter, then that variable will be +suppressed and will be inaccessible through the netcdf-c API. + +The concept of a variable-sized type is defined as follows: +1. The type NC_STRING is variable-sized. +2. Any user defined type of the class NC_VLEN is variable sized. +3. If a compound type has any field that is (transitively) variable-sized, + then that compound type is variable-sized. +4. All other types are fixed-size. ## Enabling A HDF5 Compression Filter {#filters_enable} @@ -1150,6 +1145,28 @@ The important thing to note is that at run-time, there are several cases to cons 3. HDF5_PLUGIN_DIR is not defined at either run-time or build-time -- no action needed 4. HDF5_PLUGIN_DIR is not defined at run-time but was defined at build-time -- this will probably fail +## Appendix I. A Warning on Backward Compatibility {#filters_appendixi} + +The API defined in this document should accurately reflect the +current state of filters in the netCDF-c library. Be aware that +there was a short period in which the filter code was undergoing +some revision and extension. Those extensions have largely been +reverted. Unfortunately, some users may experience some +compilation problems for previously working code because of +these reversions. In that case, please revise your code to +adhere to this document. Apologies are extended for any +inconvenience. + +A user may encounter an incompatibility if any of the following appears in user code. + +* The function *\_nc\_inq\_var\_filter* was returning the error value NC\_ENOFILTER if a variable had no associated filters. + It has been reverted to the previous case where it returns NC\_NOERR and the returned filter id was set to zero if the variable had no filters. +* The function *nc\_inq\_var\_filterids* was renamed to *nc\_inq\_var\_filter\_ids*. +* Some auxilliary functions for parsing textual filter specifications have been moved to the file *netcdf\_aux.h*. See [Appendix A](#filters_appendixa). +* All of the "filterx" functions have been removed. This is unlikely to cause problems because they had limited visibility. + +For additional information, see [Appendix B](#filters_appendixb). + ## Point of Contact {#filters_poc} *Author*: Dennis Heimbigner