Issue #12975 Data availability shows data available where no data ex…

…ists (#8) * Issue #12975 Data availability shows data available where no data exists - refined data available algorithm to identify times spans having sparse data - modified data availability return to differentiate degrees of sparseness * Issue #12975 Data availability shows data available where no data exists - refined data available algorithm to identify times spans having sparse data - modified data availability return to differentiate degrees of sparseness - removed unneeded TODO comments * Issue #12975 Data availability shows data available where no data exists - refined data available algorithm to identify times spans having sparse data - modified data availability return to differentiate degrees of sparseness - added information on data availability query and interpretation to README - addressed review comments - removed unneeded TODO comments * Issue #12975 Data availability shows data available where no data exists - refined data available algorithm to identify times spans having sparse data - modified data availability return to differentiate degrees of sparseness - added information on data availability query and interpretation to README - moved sparsity tool tip text and break points to default_settings.py * Issue #12975 Data availability shows data available where no data exists - refined data available algorithm to identify times spans having sparse data - modified data availability return to differentiate degrees of sparseness - added information on data availability query and interpretation to README - moved sparsity tool tip text and break points to default_settings.py - moved additional tool tip and color values into default_settings.py * Issue #12975 Data availability shows data available where no data exists - updated based on integration testing - corrected edge case that was missing some available data - added sparse data identification for small data sets - use shorter lables for sparseness * Issue #12975 Data availability shows data available where no data exists - updated based on integration testing - corrected edge case that was missing some available data - added sparse data identification for small data sets - use shorter lables for sparseness - addressed review comments
oceanobservatories · Aug 3, 2018 · 553bdb6 · 553bdb6
1 parent bb355de
commit 553bdb6
Show file tree

Hide file tree

Showing 2 changed files with 25 additions and 15 deletions.
diff --git a/ooi_status/default_settings.py b/ooi_status/default_settings.py
@@ -14,9 +14,9 @@
 DATA_NOT_EXPECTED = 'Not Expected'
 DATA_MISSING = 'Missing'
 DATA_PRESENT = 'Present'
-DATA_SPARSE_1 = 'Sparsity Level 1'
-DATA_SPARSE_2 = 'Sparsity Level 2'
-DATA_SPARSE_3 = 'Sparsity Level 3'
+DATA_SPARSE_1 = 'Sparse 1'
+DATA_SPARSE_2 = 'Sparse 2'
+DATA_SPARSE_3 = 'Sparse 3'
 
 # Define colors for display of data availability results
 COLOR_NOT_EXPECTED = '#ffffff'

diff --git a/ooi_status/metadata_queries.py b/ooi_status/metadata_queries.py
@@ -120,23 +120,23 @@ def find_data_spans(session, subsite, node, sensor, method, stream, lower_bound,
             if last_first > lower_bound:
                 available.append((lower_bound, MISSING, last_first))
 
-            # create spans for all gaps
-            first_row = True
+            # create spans for gaps and sparse data
             for row in gaps_df.itertuples(index=False):
                 if row.pre_gap:
+                    if last_first < row.last_last:
+                        # only report current row if it is an interval
+                        available.append((last_first, PRESENT, row.last_last))
                     # report a data gap before the current row
-                    available.append((last_first, PRESENT, row.last_last))
                     available.append((row.last_last, MISSING, row.first))
                     last_first = row.first
                 else:
-                    # report data gap(s) within the first row
-                    if not first_row:
-                        # special handling -- no previous for first row
+                    if last_first < row.first:
+                        # special handling -- only report data before if valid
                         available.append((last_first, PRESENT, row.first))
-                    sparseness = compute_sparseness(row,overall_interval)
+                    # determine and report sparsity level of current row
+                    sparseness = compute_sparseness(row, overall_interval)
                     available.append((row.first, sparseness, row.last))
                     last_first = row.last
-                first_row = False
 
             # create an available span for the tail end
             available.append((last_first, PRESENT, last))
@@ -157,8 +157,9 @@ def find_data_spans(session, subsite, node, sensor, method, stream, lower_bound,
                 else:
                     first = row.first
                     last = row.last
-
-                available.append((first, PRESENT, last))
+                # row has data, determine sparseness
+                sparseness = compute_sparseness(row, overall_interval)
+                available.append((first, sparseness, last))
 
     return available
 
@@ -169,12 +170,21 @@ def compute_sparseness(row,ds_sep):
     SPARSE2: SPARSITY_MID <= row.mean_sep / ds_sep < SPARSITY_MAX
     SPARSE3: SPARSITY_MAX <= row.mean_sep / ds_sep
     :param row: The dataset row containing the sparse data
-    :param ds_sep: avaerage separation of data points in the dataset
+    :param ds_sep: average separation of data points in the dataset
     :return: The appropriate "sparsness" level
     """
+    # calculate the row's data density ratio
+    if 'mean_sep' in row:
+        sep_ratio = row.mean_sep / pd.to_timedelta(ds_sep, 's')
+    else:
+        # calculate the density from the row
+        interval = row.last - row.first
+        mean_sep = interval / row.count
+        sep_ratio = mean_sep / pd.to_timedelta(ds_sep, 's')
+
     # in case this method is called on a row that isn't sparse, default to having data present.
     ret_val = PRESENT
-    sep_ratio = row.mean_sep / pd.to_timedelta(ds_sep, 's')
+
     if sep_ratio >= SPARSITY_MAX:
         ret_val = SPARSE3
     elif sep_ratio >= SPARSITY_MID: