Skip to content

Commit

Permalink
Merge pull request #146 from BritishGeologicalSurvey/geojson-api
Browse files Browse the repository at this point in the history
Report AGS summary information within additional metadata

Looks good to me and working with selection of files
  • Loading branch information
KoalaGeo authored Mar 11, 2024
2 parents fe4cebf + 6c5628f commit 336bdca
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 79 deletions.
28 changes: 25 additions & 3 deletions app/checkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,33 @@ def check_ags(filename: Path, standard_AGS4_dictionary: Optional[str] = None) ->
errors = {'File read error': [{'line': line_no, 'group': '', 'desc': description}]}
dictionary = ''

# Use summary from errors dictionary is available
summary = errors.pop('Summary of data', [])
additional_metadata = convert_to_additional_metadata(summary)

return dict(checker=f'python_ags4 v{python_ags4.__version__}',
errors=errors, dictionary=dictionary, summary=summary)
errors=errors, dictionary=dictionary,
additional_metadata=additional_metadata)


def convert_to_additional_metadata(summary: list[dict]) -> dict:
if not summary:
return {}

descriptions = [item['desc'].replace('group present?', 'group present:')
for item in summary]

additional_metadata = {'bgs_projects': None}
for text in descriptions:
if 'groups identified in file' in text:
additional_metadata["bgs_all_groups"] = text
elif 'DICT' in text:
additional_metadata["bgs_dict"] = text
elif 'FILE' in text:
additional_metadata["bgs_file"] = text
elif 'LOCA' in text:
additional_metadata["bgs_loca_rows"] = text

return additional_metadata


def check_bgs(filename: Path, **kwargs) -> dict:
Expand Down Expand Up @@ -120,7 +142,7 @@ def generate_bgs_metadata(tables: Dict[str, pd.DataFrame]) -> dict:
'bgs_all_groups': f'{len(groups)} groups identified in file: {" ".join(groups)}',
'bgs_file': f'Optional FILE group present: {"FILE" in groups}',
'bgs_dict': f'Optional DICT group present: {"DICT" in groups}',
'bgs_loca_rows': f'{loca_rows} data rows in LOCA group',
'bgs_loca_rows': f'{loca_rows} data row(s) in LOCA group',
'bgs_projects': f'{len(projects)} projects found: {"; ".join(projects)}',
}
return bgs_metadata
Expand Down
1 change: 0 additions & 1 deletion app/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ class Validation(BaseModel):
message: str = Field(None, example="7 error(s) found in file!")
errors: Dict[str, List[LineError]] = Field(..., example="Rule 1a")
valid: bool = Field(..., example='false')
summary: List[dict] = list()
additional_metadata: dict = Field(...)
geojson: dict = dict()
geojson_error: str = None
Expand Down
31 changes: 13 additions & 18 deletions app/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ def validate(filename: Path,

all_errors = {}
all_checkers = []
bgs_additional_metadata = {}
ags_summary = []
additional_metadata_responses = {'bgs': {}, 'ags': {}}
# Don't process if file is not .ags format
if filename.suffix.lower() != '.ags':
all_errors.update(
Expand All @@ -61,29 +60,25 @@ def validate(filename: Path,
# result is a dictionary with 'errors', 'checker' and other keys
result: dict = checker(filename, standard_AGS4_dictionary=dictionary_file)

# Pull 'errors' out to add to running total
# Extract 'common' data
all_errors.update(result.pop('errors'))
all_checkers.append(result.pop('checker'))
current_checker = result.pop('checker')
all_checkers.append(current_checker)

# Extract checker-dependent additional metadata
try:
bgs_additional_metadata = result.pop('additional_metadata')
except KeyError:
pass

try:
ags_summary = result.pop('summary')
except KeyError:
pass
additional_metadata = result.pop('additional_metadata')
if current_checker.startswith('bgs_rules'):
additional_metadata_responses['bgs'] = additional_metadata
else:
additional_metadata_responses['ags'] = additional_metadata

# Add remaining keys to response
response.update(result)

# We only want one checker-dependent metadata; use BGS if available.
if bgs_additional_metadata:
response['additional_metadata'] = bgs_additional_metadata
# Use BGS metadata where available, as it contains more data
if additional_metadata_responses['bgs']:
response['additional_metadata'] = additional_metadata_responses['bgs']
else:
response['summary'] = ags_summary
response['additional_metadata'] = additional_metadata_responses['ags']

error_count = len(reduce(lambda total, current: total + current, all_errors.values(), []))
if error_count > 0:
Expand Down
29 changes: 14 additions & 15 deletions test/fixtures_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,12 @@
'message': 'All checks passed!',
'errors': {},
'valid': True,
'summary': [],
'additional_metadata': {},
'additional_metadata': {'bgs_all_groups': '7 groups identified in file: PROJ '
'ABBR TRAN TYPE UNIT LOCA SAMP',
'bgs_dict': 'Optional DICT group present: False',
'bgs_file': 'Optional FILE group present: False',
'bgs_loca_rows': '1 data row(s) in LOCA group',
'bgs_projects': None},
'geojson': {},
'geojson_error': None
},
Expand Down Expand Up @@ -98,7 +102,6 @@
],
},
"valid": False,
'summary': [],
'additional_metadata': {},
'geojson': {},
'geojson_error': None
Expand All @@ -125,8 +128,10 @@
'AGS Format Rule 15': [{'line': '-', 'group': 'UNIT', 'desc': 'UNIT group not found.'}],
'AGS Format Rule 17': [{'line': '-', 'group': 'TYPE', 'desc': 'TYPE group not found.'}]},
'valid': False,
'summary': [],
'additional_metadata': {},
'additional_metadata': {'bgs_all_groups': '0 groups identified in file: ',
'bgs_dict': 'Optional DICT group present: False',
'bgs_file': 'Optional FILE group present: False',
'bgs_projects': None},
'geojson': {},
'geojson_error': None
},
Expand Down Expand Up @@ -290,8 +295,10 @@
"If not 'utf-8', then the encoding is most likely to be 'windows-1252' "
"aka 'cp1252')"}]},
'valid': False,
'summary': [],
'additional_metadata': {},
'additional_metadata': {'bgs_all_groups': '0 groups identified in file: ',
'bgs_dict': 'Optional DICT group present: False',
'bgs_file': 'Optional FILE group present: False',
'bgs_projects': None},
'geojson': {},
'geojson_error': None
},
Expand All @@ -309,7 +316,6 @@
'group': '',
'line': 1}]},
'valid': False,
'summary': [],
'additional_metadata': {},
'geojson': {},
'geojson_error': None
Expand Down Expand Up @@ -710,7 +716,6 @@
'It is highly recommended that the file be saved without BOM encoding '
'to avoid issues with other software.'}]},
'valid': False,
'summary': [],
'additional_metadata': {},
'geojson': {},
'geojson_error': None
Expand All @@ -730,7 +735,6 @@
'line': 1}]},

'valid': False,
'summary': [],
'additional_metadata': {},
'geojson': {},
'geojson_error': None
Expand All @@ -745,7 +749,6 @@
'errors': {'File read error': [
{'line': '-', 'group': '', 'desc': 'extension_is.bad is not an .ags file'}]},
'valid': False,
'summary': [],
'additional_metadata': {},
'geojson': {},
'geojson_error': None
Expand All @@ -762,7 +765,6 @@
'message': 'All checks passed!',
'errors': {},
'valid': True,
'summary': [],
'additional_metadata': {},
'geojson': {
'features': [{
Expand Down Expand Up @@ -905,7 +907,6 @@
],
},
"valid": False,
'summary': [],
'additional_metadata': {},
'geojson': {},
'geojson_error': 'Line 31 does not have the same number of entries as the HEADING row in TYPE.'
Expand All @@ -927,7 +928,6 @@
'desc': ''}],
},
'valid': False,
'summary': [],
'additional_metadata': {},
'geojson': {},
'geojson_error': None
Expand All @@ -945,7 +945,6 @@
'desc': ''}],
},
'valid': False,
'summary': [],
'additional_metadata': {},
'geojson': {},
'geojson_error': None
Expand Down
14 changes: 13 additions & 1 deletion test/fixtures_plain_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@
Dictionary: Standard_dictionary_v4_1_1.ags
Time: 2021-08-23 14:25:43+00:00
7 groups identified in file: PROJ ABBR TRAN TYPE UNIT LOCA SAMP
Optional DICT group present: False
Optional FILE group present: False
1 data row(s) in LOCA group
None
================================================================================
""",
'example_broken_ags.ags': """
Expand Down Expand Up @@ -66,6 +70,10 @@
Dictionary: Standard_dictionary_v4_1_1.ags
Time: 2021-08-23 14:25:43+00:00
0 groups identified in file:
Optional DICT group present: False
Optional FILE group present: False
None
# Errors
Expand Down Expand Up @@ -110,6 +118,10 @@
Dictionary: Standard_dictionary_v4_1_1.ags
Time: 2021-08-23 14:25:43+00:00
0 groups identified in file:
Optional DICT group present: False
Optional FILE group present: False
None
# Errors
Expand Down
23 changes: 16 additions & 7 deletions test/unit/test_checkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,32 +97,41 @@ def test_check_bgs(filename, expected_rules, file_read_message):
'bgs_all_groups': '0 groups identified in file: ',
'bgs_dict': 'Optional DICT group present: False',
'bgs_file': 'Optional FILE group present: False',
'bgs_loca_rows': '0 data rows in LOCA group',
'bgs_loca_rows': '0 data row(s) in LOCA group',
'bgs_projects': '0 projects found: '
}),
('example_ags.ags', {
'bgs_all_groups': '7 groups identified in file: '
'PROJ ABBR TRAN TYPE UNIT LOCA SAMP',
'bgs_dict': 'Optional DICT group present: False',
'bgs_file': 'Optional FILE group present: False',
'bgs_loca_rows': '1 data rows in LOCA group',
'bgs_loca_rows': '1 data row(s) in LOCA group',
'bgs_projects': '1 projects found: 121415 (ACME Gas Works Redevelopment)'
}),
('real/Southwark.ags', {
'bgs_all_groups': '12 groups identified in file: '
'PROJ ABBR DICT TRAN TYPE UNIT CHIS GEOL ISPT LOCA SAMP WSTG',
'bgs_dict': 'Optional DICT group present: True',
'bgs_file': 'Optional FILE group present: False',
'bgs_loca_rows': '2 data rows in LOCA group',
'bgs_loca_rows': '2 data row(s) in LOCA group',
'bgs_projects': '1 projects found: 7500/75 (Southwark)'}),
])
def test_check_bgs_additional_metadata(filename, expected_metadata):
"""Check addtional metadata is added correctly."""
def test_check_additional_metadata(filename, expected_metadata):
"""
Check addtional metadata is added correctly. The AGS results don't
contain as much data as the BGS results.
"""
# Arrange
filename = TEST_FILE_DIR / filename

# Act
result = check_bgs(filename)
result_bgs = check_bgs(filename)['additional_metadata']
result_ags = check_ags(filename)['additional_metadata']

# Assert
assert result['additional_metadata'] == expected_metadata
assert result_bgs == expected_metadata

# AGS metadata should match BGS metadata, apart from "bgs_projects"
assert result_ags.pop('bgs_projects') is None
for key in result_ags:
assert result_ags[key] == expected_metadata[key]
Loading

0 comments on commit 336bdca

Please sign in to comment.