From a4a969f0fd1c3b10c2a5458295d5d689e097a58c Mon Sep 17 00:00:00 2001
From: David Young <dyoung@hdfgroup.org>
Date: Thu, 3 Sep 2020 14:33:41 -0500
Subject: [PATCH 1/4] Give an overview of the bigsets test in a mega-comment at
 the top.  Also, describe the test pattern.  And while I'm here, repair a
 comment, s/writer/reader/.

---
 test/vfd_swmr_bigset_writer.c | 80 ++++++++++++++++++++++++++++++++++-
 1 file changed, 79 insertions(+), 1 deletion(-)

diff --git a/test/vfd_swmr_bigset_writer.c b/test/vfd_swmr_bigset_writer.c
index 39e8d7878c6..0a7c79b7ddc 100644
--- a/test/vfd_swmr_bigset_writer.c
+++ b/test/vfd_swmr_bigset_writer.c
@@ -11,6 +11,58 @@
  * help@hdfgroup.org.
  */
 
+/* This program performs "bigset" tests for VFD SWMR.  In VFD SWMR mode,
+ * he bigset tests exercise
+ *
+ * 1 the two major indices for extensible, chunked datasets: the extensible
+ *   array and the version-2 B-tree, with VFD SWMR active.
+ *
+ * 2 reading and writing virtual datasets with source datasets residing in
+ *   the same HDF5 file
+ *
+ * 3 virtual datasets with source datasets spread over a small number of
+ *   HDF5 files
+ *
+ * The program selects between two personalities, reader or writer, using
+ * the name it is invoked with (the last component of argv[0]).  Reader and
+ * writer should run simultaneously.
+ *
+ * The bigset tests use datasets extensible in one
+ * dimension to exercise the extensible array, and tests extensible in two dimensions to exercise the v2 B-tree.
+ *
+ * The writer opens an HDF5 file and creates `n` chunked datasets extensible in `1 <= d <= 2`
+ * dimensions and runs for `i` iterations.  The chunk size, `w` x `h`, is
+ * user-selectable, as are `d`, `i`, and `n`.  In each iteration, the writer extends
+ * each dataset by the width (or the width and height) of a chunk, and
+ * writes a test pattern to the dataset on chunk boundaries.
+ *
+ * The reader should be started with the same user-selectable parameters
+ * as the writer: iterations, number of datasets, chunk width and height,
+ * dimensions.
+ *
+ * The reader opens the same HDF5 file, reads and re-reads it until all
+ * `n` datasets appear, and then reads and re-reads the datasets until
+ * all iteration 0 data is available and contains the expected test pattern.
+ * The reader repeats for the iteration 1 data, iteration 2, and so on,
+ * until `i` iterations are complete.
+ *
+ * The reader reads datasets in chunk-sized units.  To challenge the
+ * chunk index a bit, the reader reads on a chunk boundary on even
+ * iterations and reads with a small offset from a chunk boundary on
+ * odd iterations.
+ *
+ * The writer adds an attribute to every `a`th dataset, where `a` is
+ * a user-selectable parameter.  The reader reads and verifies an
+ * attribute on every `a`th dataset.
+ *
+ * To help ensure that the reader and the writer are simultaneously
+ * reading and writing the HDF5 file, both reader and writer pause
+ * between each dataset written/verified (if there are at least as many
+ * iterations as datasets) or between each iteration (if there are
+ * fewer iterations than datasets).  The duration of the pause is
+ * user-selectable.
+ */
+
 #include <err.h>
 #include <libgen.h>
 #include <time.h> /* nanosleep(2) */
@@ -743,6 +795,32 @@ open_extensible_dset(state_t *s, unsigned int which)
     s->dataset[which] = ds;
 }
 
+/* Write or verify the dataset test pattern in the matrix `mat`.
+ * `mat` is a "subview" of the `which`th dataset with origin
+ * `(base.row, base.col)`.
+ *
+ * If `do_set` is true, write the pattern; otherwise, verify.
+ *
+ * The basic test pattern consists of increasing
+ * integers written in nested corners of the dataset
+ * starting at element (0, 0):
+ *
+ *  0
+ *
+ *  0  1
+ *  3  2
+ *
+ *  0  1  4
+ *  3  2  5
+ *  8  7  6
+ *
+ *  0  1  4  9
+ *  3  2  5 10
+ *  8  7  6 11
+ * 15 14 13 12
+ *
+ * In an actual pattern, the dataset number, `which`, is added to each integer.
+ */
 static void
 set_or_verify_matrix(mat_t *mat, unsigned int which, base_t base, bool do_set)
 {
@@ -929,7 +1007,7 @@ verify_extensible_dset(state_t *s, unsigned int which, mat_t *mat,
             }
 
             /* Across the bottom, stopping before the last column to
-             * avoid re-writing the bottom-right chunk.
+             * avoid re-reading the bottom-right chunk.
              */
             base.row = last.row;
             for (base.col = ofs; base.col < last.col;

From 101f040b80a1dcee79fe2bc70c0c59021871b09b Mon Sep 17 00:00:00 2001
From: David Young <dyoung@hdfgroup.org>
Date: Thu, 3 Sep 2020 14:34:53 -0500
Subject: [PATCH 2/4] Add Dana's words about MS Windows support.

---
 doc/vfd-swmr-user-guide.md | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/doc/vfd-swmr-user-guide.md b/doc/vfd-swmr-user-guide.md
index 80c53ee7a0a..806e34f547f 100644
--- a/doc/vfd-swmr-user-guide.md
+++ b/doc/vfd-swmr-user-guide.md
@@ -448,8 +448,11 @@ Improvements to VFD SWMR may also alleviate the problem.
 
 ## Microsoft Windows 
 
-VFD SWMR does not support Microsoft Windows at this time.  We are
-investigating to see when we can add Windows support.
+VFD SWMR is not officially supported on Microsoft Windows at this time.  The
+feature should in theory work on Windows and NTFS, however it has not been
+tested as the existing VFD SWMR tests rely on shell scripts.  Note that Windows
+file shares are not supported as there is no write ordering guarantee (as with
+NFS, et al.).
 
 ## Supported filesystems
 

From 68643772e30d9be7b2d123714cc7202e9d0991cf Mon Sep 17 00:00:00 2001
From: David Young <dyoung@hdfgroup.org>
Date: Thu, 3 Sep 2020 14:35:38 -0500
Subject: [PATCH 3/4] Mention that creating new objects is not possible with
 legacy SWMR. Add abbreviation TBD where we need to add a hyperlink.

---
 doc/vfd-swmr-user-guide.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/vfd-swmr-user-guide.md b/doc/vfd-swmr-user-guide.md
index 806e34f547f..650d869fb7a 100644
--- a/doc/vfd-swmr-user-guide.md
+++ b/doc/vfd-swmr-user-guide.md
@@ -18,13 +18,14 @@ better-performing replacement for the existing SWMR feature.
 
 * VFD SWMR allows HDF5 objects (groups, datasets, attributes) to be
   created and destroyed in the course of a reader-writer session.
+  Creating objects is not possible using the existing SWMR feature.
 * It compartmentalizes much of the SWMR functionality in a virtual-file
   driver (VFD), thus easing The HDF Group's software-maintenance burden.
 * And it makes guarantees for the maximum time from write to availability
   of data for read, provided that the reading and writing systems and
   their interconnections can keep up with the data flow.
 
-For details on how VFD SWMR is implemented, see [LINK to RFC].
+For details on how VFD SWMR is implemented, see [TBD: LINK to RFC].
 
 # Quick start
 

From 9bb743136ed3a262eca6e6fcbe0f0d8f5b78cd09 Mon Sep 17 00:00:00 2001
From: David Young <dyoung@hdfgroup.org>
Date: Thu, 3 Sep 2020 14:44:56 -0500
Subject: [PATCH 4/4] Individually track the steps verified on each dataset. 
 Take care not to skip a step.

---
 test/vfd_swmr_bigset_writer.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/test/vfd_swmr_bigset_writer.c b/test/vfd_swmr_bigset_writer.c
index 0a7c79b7ddc..7105e212706 100644
--- a/test/vfd_swmr_bigset_writer.c
+++ b/test/vfd_swmr_bigset_writer.c
@@ -1021,7 +1021,7 @@ verify_extensible_dset(state_t *s, unsigned int which, mat_t *mat,
             verify_dset_attribute(ds, which, step);
     }
 
-    *stepp = last_step;
+    *stepp = step;
 
 out:
     if (H5Sclose(filespace) < 0)
@@ -1196,19 +1196,33 @@ main(int argc, char **argv)
                 nanosleep(&s.update_interval, NULL);
         }
     } else {
+        unsigned *nextstep = calloc(s.ndatasets, sizeof(*nextstep));
+        unsigned finished_step;
+
+        if (nextstep == NULL)
+            err(EXIT_FAILURE, "could not allocate `nextstep` array");
+
         for (which = s.ndatasets; which > 0; which--)
             open_extensible_dset(&s, which - 1);
 
-        for (step = 0; hang_back + step < s.nsteps;) {
+        do {
+            finished_step = UINT_MAX;   /* the greatest step finished on
+                                         * *all* datasets
+                                         */
+
             for (which = s.ndatasets; which-- > 0; ) {
-                dbgf(2, "step %d which %d\n", step, which);
-                verify_extensible_dset(&s, which, mat, &step);
+                dbgf(2, "step %d which %d\n", nextstep[which], which);
+                verify_extensible_dset(&s, which, mat, &nextstep[which]);
+                if (nextstep[which] < finished_step)
+                    finished_step = nextstep[which];
                 if (s.ndatasets <= s.nsteps)
                     nanosleep(&s.update_interval, NULL);
             }
             if (s.ndatasets > s.nsteps)
                 nanosleep(&s.update_interval, NULL);
-        }
+        } while (hang_back + finished_step < s.nsteps);
+
+        free(nextstep);
     }
 
     for (which = 0; which < s.ndatasets; which++)