Skip to content

Commit

Permalink
Issue #12975 Data availability shows data available where no data ex…
Browse files Browse the repository at this point in the history
…ists (#8)

* Issue #12975 Data availability shows data available where no data exists

- refined data available algorithm to identify times spans having sparse data
- modified data availability return to differentiate degrees of sparseness

* Issue #12975 Data availability shows data available where no data exists

- refined data available algorithm to identify times spans having sparse data
- modified data availability return to differentiate degrees of sparseness
- removed unneeded TODO comments

* Issue #12975 Data availability shows data available where no data exists

- refined data available algorithm to identify times spans having sparse data
- modified data availability return to differentiate degrees of sparseness
- added information on data availability query and interpretation to README
- addressed review comments
- removed unneeded TODO comments

* Issue #12975 Data availability shows data available where no data exists

- refined data available algorithm to identify times spans having sparse data
- modified data availability return to differentiate degrees of sparseness
- added information on data availability query and interpretation to README
- moved sparsity tool tip text and break points to default_settings.py

* Issue #12975 Data availability shows data available where no data exists

- refined data available algorithm to identify times spans having sparse data
- modified data availability return to differentiate degrees of sparseness
- added information on data availability query and interpretation to README
- moved sparsity tool tip text and break points to default_settings.py
- moved additional tool tip and color values into default_settings.py

* Issue #12975 Data availability shows data available where no data exists

- updated based on integration testing
- corrected edge case that was missing some available data
- added sparse data identification for small data sets
- use shorter lables for sparseness

* Issue #12975 Data availability shows data available where no data exists

- updated based on integration testing
- corrected edge case that was missing some available data
- added sparse data identification for small data sets
- use shorter lables for sparseness
- addressed review comments
  • Loading branch information
mfegan authored and danmergens committed Aug 3, 2018
1 parent bb355de commit 553bdb6
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 15 deletions.
6 changes: 3 additions & 3 deletions ooi_status/default_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
DATA_NOT_EXPECTED = 'Not Expected'
DATA_MISSING = 'Missing'
DATA_PRESENT = 'Present'
DATA_SPARSE_1 = 'Sparsity Level 1'
DATA_SPARSE_2 = 'Sparsity Level 2'
DATA_SPARSE_3 = 'Sparsity Level 3'
DATA_SPARSE_1 = 'Sparse 1'
DATA_SPARSE_2 = 'Sparse 2'
DATA_SPARSE_3 = 'Sparse 3'

# Define colors for display of data availability results
COLOR_NOT_EXPECTED = '#ffffff'
Expand Down
34 changes: 22 additions & 12 deletions ooi_status/metadata_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,23 +120,23 @@ def find_data_spans(session, subsite, node, sensor, method, stream, lower_bound,
if last_first > lower_bound:
available.append((lower_bound, MISSING, last_first))

# create spans for all gaps
first_row = True
# create spans for gaps and sparse data
for row in gaps_df.itertuples(index=False):
if row.pre_gap:
if last_first < row.last_last:
# only report current row if it is an interval
available.append((last_first, PRESENT, row.last_last))
# report a data gap before the current row
available.append((last_first, PRESENT, row.last_last))
available.append((row.last_last, MISSING, row.first))
last_first = row.first
else:
# report data gap(s) within the first row
if not first_row:
# special handling -- no previous for first row
if last_first < row.first:
# special handling -- only report data before if valid
available.append((last_first, PRESENT, row.first))
sparseness = compute_sparseness(row,overall_interval)
# determine and report sparsity level of current row
sparseness = compute_sparseness(row, overall_interval)
available.append((row.first, sparseness, row.last))
last_first = row.last
first_row = False

# create an available span for the tail end
available.append((last_first, PRESENT, last))
Expand All @@ -157,8 +157,9 @@ def find_data_spans(session, subsite, node, sensor, method, stream, lower_bound,
else:
first = row.first
last = row.last

available.append((first, PRESENT, last))
# row has data, determine sparseness
sparseness = compute_sparseness(row, overall_interval)
available.append((first, sparseness, last))

return available

Expand All @@ -169,12 +170,21 @@ def compute_sparseness(row,ds_sep):
SPARSE2: SPARSITY_MID <= row.mean_sep / ds_sep < SPARSITY_MAX
SPARSE3: SPARSITY_MAX <= row.mean_sep / ds_sep
:param row: The dataset row containing the sparse data
:param ds_sep: avaerage separation of data points in the dataset
:param ds_sep: average separation of data points in the dataset
:return: The appropriate "sparsness" level
"""
# calculate the row's data density ratio
if 'mean_sep' in row:
sep_ratio = row.mean_sep / pd.to_timedelta(ds_sep, 's')
else:
# calculate the density from the row
interval = row.last - row.first
mean_sep = interval / row.count
sep_ratio = mean_sep / pd.to_timedelta(ds_sep, 's')

# in case this method is called on a row that isn't sparse, default to having data present.
ret_val = PRESENT
sep_ratio = row.mean_sep / pd.to_timedelta(ds_sep, 's')

if sep_ratio >= SPARSITY_MAX:
ret_val = SPARSE3
elif sep_ratio >= SPARSITY_MID:
Expand Down

0 comments on commit 553bdb6

Please sign in to comment.