Skip to content

Commit

Permalink
Fix #116
Browse files Browse the repository at this point in the history
  • Loading branch information
fpavogt committed Apr 23, 2024
1 parent 3ccbd80 commit e344261
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm

## [v2.0.0]
### Added:
- [fpavogt, 2024-04-16] Improve input data consistency check (fix #116).
- [regDaniel, 2024-04-09] Add flag for clouds above (MSA + MSA_HIT_BUFFER) and allow for NSC in METAR message.
- [fpavogt, 2024-03-26] Add option to reset only a single parameter.
### Fixed:
Expand Down
17 changes: 17 additions & 0 deletions src/ampycloud/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,23 @@ def check_data_consistency(pdf: pd.DataFrame,
logger.warning('Dropping the superfluous %s column from the input data.', key)
data.drop((key), axis=1, inplace=True)

# Check for any duplicated entry, which would make no sense.
if (duplic := data.duplicated()).any():
raise AmpycloudError('Duplicated hits in the input data:\n'
f'{data[duplic].to_string(index=False)}')

# Check for inconsistencies
# 1 - A non-detection should not be coincident with a detection
# 2 - A VV hit should not be coincident with a hit or a non-detection
for hit_type in [0, -1]:
nodets = data[data['type'] == hit_type][['dt', 'ceilo']]
dets = data[data['type'] != hit_type][['dt', 'ceilo']]
merged = dets.merge(nodets, how='inner', on=['dt', 'ceilo'])
if len(merged) > 0:
raise AmpycloudError('Inconsistent input data '
f'(simultaneous type {hit_type} and !{hit_type}):\n'
f'{merged.to_string(index=False)}')

# A brief sanity check of the heights. We do not issue Errors, since the code can cope
# with those elements: we simply raise Warnings.
msgs = []
Expand Down
4 changes: 2 additions & 2 deletions test/ampycloud/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ def test_group_separation(

reset_prms()


def test_bad_layer_sep_lims():
""" Test that giving problematic layer separation limits does raise an error. """

Expand Down Expand Up @@ -321,8 +322,7 @@ def test_layering_singlepts():

mock_data = pd.DataFrame(np.array([['dummy', -1, 2300, 1],
['dummy', -1, 4000, 2],
['dummy', -1, 4500, 3],
['dummy', -1, np.nan, 0]]),
['dummy', -1, 4500, 3]]),
columns=['ceilo', 'dt', 'height', 'type'])

# Set the proper column types
Expand Down
24 changes: 24 additions & 0 deletions test/ampycloud/utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def test_check_data_consistency():
assert np.all(out == canonical_demo_data())

# Now, let's check specific elements that should raise errors or warnings
### ERRORS ###
with raises(AmpycloudError):
# Empty DataFrame
data = pd.DataFrame(columns=['ceilo', 'dt', 'height', 'type'])
Expand All @@ -45,6 +46,29 @@ def test_check_data_consistency():
for col in ['ceilo', 'height', 'type']:
data[col] = data.loc[:, col].astype(hardcoded.REQ_DATA_COLS[col])
check_data_consistency(data)
with raises(AmpycloudError):
# Duplicated hit
data = pd.DataFrame(np.array([['a', 0., 1, 1], ['a', 0., 1, 1]]),
columns=['ceilo', 'dt', 'height', 'type'])
for col in ['ceilo', 'dt', 'height', 'type']:
data[col] = data.loc[:, col].astype(hardcoded.REQ_DATA_COLS[col])
check_data_consistency(data)
with raises(AmpycloudError):
# Inconsistent hits - type 0 vs type !0
data = pd.DataFrame(np.array([['a', 0, 1, 1], ['a', 0, np.nan, 0]]),
columns=['ceilo', 'dt', 'height', 'type'])
for col in ['ceilo', 'dt', 'height', 'type']:
data[col] = data.loc[:, col].astype(hardcoded.REQ_DATA_COLS[col])
check_data_consistency(data)
with raises(AmpycloudError):
# Inconsistent vv hits - it must be either a VV hit, either a hit, but not both.
data = pd.DataFrame(np.array([['a', 0, 1, -1], ['a', 0, 2, 1]]),
columns=['ceilo', 'dt', 'height', 'type'])
for col in ['ceilo', 'dt', 'height', 'type']:
data[col] = data.loc[:, col].astype(hardcoded.REQ_DATA_COLS[col])
check_data_consistency(data)

### WARNINGS ###
with warns(AmpycloudWarning):
# Bad data type
data = pd.DataFrame(np.array([['a', 0, 1, 1]]), columns=['ceilo', 'dt', 'height', 'type'])
Expand Down

0 comments on commit e344261

Please sign in to comment.