From e7661c95b1329ba4ec688266cfc62910bd5bf308 Mon Sep 17 00:00:00 2001 From: John A Stevenson Date: Mon, 11 Mar 2024 09:50:35 +0000 Subject: [PATCH 1/3] Pluralise row(s) in LOCA message --- app/checkers.py | 2 +- test/unit/test_checkers.py | 6 +++--- test/unit/test_validation.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/app/checkers.py b/app/checkers.py index 04f2145..861abbd 100644 --- a/app/checkers.py +++ b/app/checkers.py @@ -120,7 +120,7 @@ def generate_bgs_metadata(tables: Dict[str, pd.DataFrame]) -> dict: 'bgs_all_groups': f'{len(groups)} groups identified in file: {" ".join(groups)}', 'bgs_file': f'Optional FILE group present: {"FILE" in groups}', 'bgs_dict': f'Optional DICT group present: {"DICT" in groups}', - 'bgs_loca_rows': f'{loca_rows} data rows in LOCA group', + 'bgs_loca_rows': f'{loca_rows} data row(s) in LOCA group', 'bgs_projects': f'{len(projects)} projects found: {"; ".join(projects)}', } return bgs_metadata diff --git a/test/unit/test_checkers.py b/test/unit/test_checkers.py index 0ddc666..87c6997 100644 --- a/test/unit/test_checkers.py +++ b/test/unit/test_checkers.py @@ -97,7 +97,7 @@ def test_check_bgs(filename, expected_rules, file_read_message): 'bgs_all_groups': '0 groups identified in file: ', 'bgs_dict': 'Optional DICT group present: False', 'bgs_file': 'Optional FILE group present: False', - 'bgs_loca_rows': '0 data rows in LOCA group', + 'bgs_loca_rows': '0 data row(s) in LOCA group', 'bgs_projects': '0 projects found: ' }), ('example_ags.ags', { @@ -105,7 +105,7 @@ def test_check_bgs(filename, expected_rules, file_read_message): 'PROJ ABBR TRAN TYPE UNIT LOCA SAMP', 'bgs_dict': 'Optional DICT group present: False', 'bgs_file': 'Optional FILE group present: False', - 'bgs_loca_rows': '1 data rows in LOCA group', + 'bgs_loca_rows': '1 data row(s) in LOCA group', 'bgs_projects': '1 projects found: 121415 (ACME Gas Works Redevelopment)' }), ('real/Southwark.ags', { @@ -113,7 +113,7 @@ def test_check_bgs(filename, expected_rules, file_read_message): 'PROJ ABBR DICT TRAN TYPE UNIT CHIS GEOL ISPT LOCA SAMP WSTG', 'bgs_dict': 'Optional DICT group present: True', 'bgs_file': 'Optional FILE group present: False', - 'bgs_loca_rows': '2 data rows in LOCA group', + 'bgs_loca_rows': '2 data row(s) in LOCA group', 'bgs_projects': '1 projects found: 7500/75 (Southwark)'}), ]) def test_check_bgs_additional_metadata(filename, expected_metadata): diff --git a/test/unit/test_validation.py b/test/unit/test_validation.py index 43f6e53..8fb4225 100644 --- a/test/unit/test_validation.py +++ b/test/unit/test_validation.py @@ -38,7 +38,7 @@ def mock_check_bgs(filename, **kwargs): 'PROJ ABBR TRAN TYPE UNIT LOCA SAMP', 'bgs_dict': 'Optional DICT group present: False', 'bgs_file': 'Optional FILE group present: False', - 'bgs_loca_rows': '1 data rows in LOCA group', + 'bgs_loca_rows': '1 data row(s) in LOCA group', 'bgs_projects': '1 projects found: 121415 (ACME Gas Works Redevelopment)' }) @@ -95,7 +95,7 @@ def test_validate_bgs_checker(): 'ABBR TRAN TYPE UNIT LOCA SAMP', 'bgs_dict': 'Optional DICT group present: False', 'bgs_file': 'Optional FILE group present: False', - 'bgs_loca_rows': '1 data rows in LOCA group', + 'bgs_loca_rows': '1 data row(s) in LOCA group', 'bgs_projects': '1 projects found: 121415 (ACME Gas ' 'Works Redevelopment)'} } @@ -126,7 +126,7 @@ def test_validate_both_checkers(): 'PROJ ABBR TRAN TYPE UNIT LOCA SAMP', 'bgs_dict': 'Optional DICT group present: False', 'bgs_file': 'Optional FILE group present: False', - 'bgs_loca_rows': '1 data rows in LOCA group', + 'bgs_loca_rows': '1 data row(s) in LOCA group', 'bgs_projects': '1 projects found: 121415 (ACME Gas Works Redevelopment)' }} From f6dc9b35c3460c5589d9ab871b89a52daae77de8 Mon Sep 17 00:00:00 2001 From: John A Stevenson Date: Mon, 11 Mar 2024 09:54:05 +0000 Subject: [PATCH 2/3] Report AGS checker summary as additional metadata --- app/checkers.py | 23 +++++++++++++++++++++-- test/unit/test_checkers.py | 17 +++++++++++++---- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/app/checkers.py b/app/checkers.py index 861abbd..bfd891b 100644 --- a/app/checkers.py +++ b/app/checkers.py @@ -43,11 +43,30 @@ def check_ags(filename: Path, standard_AGS4_dictionary: Optional[str] = None) -> errors = {'File read error': [{'line': line_no, 'group': '', 'desc': description}]} dictionary = '' - # Use summary from errors dictionary is available summary = errors.pop('Summary of data', []) + additional_metadata = convert_to_additional_metadata(summary) return dict(checker=f'python_ags4 v{python_ags4.__version__}', - errors=errors, dictionary=dictionary, summary=summary) + errors=errors, dictionary=dictionary, + additional_metadata=additional_metadata) + + +def convert_to_additional_metadata(summary: list[dict]) -> dict: + descriptions = [item['desc'].replace('group present?', 'group present:') + for item in summary] + + additional_metadata = {'bgs_projects': None} + for text in descriptions: + if 'groups identified in file' in text: + additional_metadata["bgs_all_groups"] = text + elif 'DICT' in text: + additional_metadata["bgs_dict"] = text + elif 'FILE' in text: + additional_metadata["bgs_file"] = text + elif 'LOCA' in text: + additional_metadata["bgs_loca_rows"] = text + + return additional_metadata def check_bgs(filename: Path, **kwargs) -> dict: diff --git a/test/unit/test_checkers.py b/test/unit/test_checkers.py index 87c6997..3866171 100644 --- a/test/unit/test_checkers.py +++ b/test/unit/test_checkers.py @@ -116,13 +116,22 @@ def test_check_bgs(filename, expected_rules, file_read_message): 'bgs_loca_rows': '2 data row(s) in LOCA group', 'bgs_projects': '1 projects found: 7500/75 (Southwark)'}), ]) -def test_check_bgs_additional_metadata(filename, expected_metadata): - """Check addtional metadata is added correctly.""" +def test_check_additional_metadata(filename, expected_metadata): + """ + Check addtional metadata is added correctly. The AGS results don't + contain as much data as the BGS results. + """ # Arrange filename = TEST_FILE_DIR / filename # Act - result = check_bgs(filename) + result_bgs = check_bgs(filename)['additional_metadata'] + result_ags = check_ags(filename)['additional_metadata'] # Assert - assert result['additional_metadata'] == expected_metadata + assert result_bgs == expected_metadata + + # AGS metadata should match BGS metadata, apart from "bgs_projects" + assert result_ags.pop('bgs_projects') is None + for key in result_ags: + assert result_ags[key] == expected_metadata[key] From 6c5628f616a41984b5611dcbc8ca461595177cda Mon Sep 17 00:00:00 2001 From: John A Stevenson Date: Mon, 11 Mar 2024 12:25:18 +0000 Subject: [PATCH 3/3] Always populate additional_metadata This commit uses data from the AGS summary to populate as many fields as possible in the additional_metadata when BGS data are not available. The summary attribute from AGS validation is no longer returned as a standalone item. --- app/checkers.py | 3 ++ app/schemas.py | 1 - app/validation.py | 31 +++++++++------------ test/fixtures_json.py | 29 ++++++++++--------- test/fixtures_plain_text.py | 14 +++++++++- test/unit/test_validation.py | 54 +++++++++++++++--------------------- 6 files changed, 66 insertions(+), 66 deletions(-) diff --git a/app/checkers.py b/app/checkers.py index bfd891b..2e41854 100644 --- a/app/checkers.py +++ b/app/checkers.py @@ -52,6 +52,9 @@ def check_ags(filename: Path, standard_AGS4_dictionary: Optional[str] = None) -> def convert_to_additional_metadata(summary: list[dict]) -> dict: + if not summary: + return {} + descriptions = [item['desc'].replace('group present?', 'group present:') for item in summary] diff --git a/app/schemas.py b/app/schemas.py index 859f04f..e1040c3 100644 --- a/app/schemas.py +++ b/app/schemas.py @@ -47,7 +47,6 @@ class Validation(BaseModel): message: str = Field(None, example="7 error(s) found in file!") errors: Dict[str, List[LineError]] = Field(..., example="Rule 1a") valid: bool = Field(..., example='false') - summary: List[dict] = list() additional_metadata: dict = Field(...) geojson: dict = dict() geojson_error: str = None diff --git a/app/validation.py b/app/validation.py index 2f5b73a..8ec48a5 100644 --- a/app/validation.py +++ b/app/validation.py @@ -47,8 +47,7 @@ def validate(filename: Path, all_errors = {} all_checkers = [] - bgs_additional_metadata = {} - ags_summary = [] + additional_metadata_responses = {'bgs': {}, 'ags': {}} # Don't process if file is not .ags format if filename.suffix.lower() != '.ags': all_errors.update( @@ -61,29 +60,25 @@ def validate(filename: Path, # result is a dictionary with 'errors', 'checker' and other keys result: dict = checker(filename, standard_AGS4_dictionary=dictionary_file) - # Pull 'errors' out to add to running total + # Extract 'common' data all_errors.update(result.pop('errors')) - all_checkers.append(result.pop('checker')) + current_checker = result.pop('checker') + all_checkers.append(current_checker) - # Extract checker-dependent additional metadata - try: - bgs_additional_metadata = result.pop('additional_metadata') - except KeyError: - pass - - try: - ags_summary = result.pop('summary') - except KeyError: - pass + additional_metadata = result.pop('additional_metadata') + if current_checker.startswith('bgs_rules'): + additional_metadata_responses['bgs'] = additional_metadata + else: + additional_metadata_responses['ags'] = additional_metadata # Add remaining keys to response response.update(result) - # We only want one checker-dependent metadata; use BGS if available. - if bgs_additional_metadata: - response['additional_metadata'] = bgs_additional_metadata + # Use BGS metadata where available, as it contains more data + if additional_metadata_responses['bgs']: + response['additional_metadata'] = additional_metadata_responses['bgs'] else: - response['summary'] = ags_summary + response['additional_metadata'] = additional_metadata_responses['ags'] error_count = len(reduce(lambda total, current: total + current, all_errors.values(), [])) if error_count > 0: diff --git a/test/fixtures_json.py b/test/fixtures_json.py index 4b2f939..f63aebd 100644 --- a/test/fixtures_json.py +++ b/test/fixtures_json.py @@ -10,8 +10,12 @@ 'message': 'All checks passed!', 'errors': {}, 'valid': True, - 'summary': [], - 'additional_metadata': {}, + 'additional_metadata': {'bgs_all_groups': '7 groups identified in file: PROJ ' + 'ABBR TRAN TYPE UNIT LOCA SAMP', + 'bgs_dict': 'Optional DICT group present: False', + 'bgs_file': 'Optional FILE group present: False', + 'bgs_loca_rows': '1 data row(s) in LOCA group', + 'bgs_projects': None}, 'geojson': {}, 'geojson_error': None }, @@ -98,7 +102,6 @@ ], }, "valid": False, - 'summary': [], 'additional_metadata': {}, 'geojson': {}, 'geojson_error': None @@ -125,8 +128,10 @@ 'AGS Format Rule 15': [{'line': '-', 'group': 'UNIT', 'desc': 'UNIT group not found.'}], 'AGS Format Rule 17': [{'line': '-', 'group': 'TYPE', 'desc': 'TYPE group not found.'}]}, 'valid': False, - 'summary': [], - 'additional_metadata': {}, + 'additional_metadata': {'bgs_all_groups': '0 groups identified in file: ', + 'bgs_dict': 'Optional DICT group present: False', + 'bgs_file': 'Optional FILE group present: False', + 'bgs_projects': None}, 'geojson': {}, 'geojson_error': None }, @@ -290,8 +295,10 @@ "If not 'utf-8', then the encoding is most likely to be 'windows-1252' " "aka 'cp1252')"}]}, 'valid': False, - 'summary': [], - 'additional_metadata': {}, + 'additional_metadata': {'bgs_all_groups': '0 groups identified in file: ', + 'bgs_dict': 'Optional DICT group present: False', + 'bgs_file': 'Optional FILE group present: False', + 'bgs_projects': None}, 'geojson': {}, 'geojson_error': None }, @@ -309,7 +316,6 @@ 'group': '', 'line': 1}]}, 'valid': False, - 'summary': [], 'additional_metadata': {}, 'geojson': {}, 'geojson_error': None @@ -710,7 +716,6 @@ 'It is highly recommended that the file be saved without BOM encoding ' 'to avoid issues with other software.'}]}, 'valid': False, - 'summary': [], 'additional_metadata': {}, 'geojson': {}, 'geojson_error': None @@ -730,7 +735,6 @@ 'line': 1}]}, 'valid': False, - 'summary': [], 'additional_metadata': {}, 'geojson': {}, 'geojson_error': None @@ -745,7 +749,6 @@ 'errors': {'File read error': [ {'line': '-', 'group': '', 'desc': 'extension_is.bad is not an .ags file'}]}, 'valid': False, - 'summary': [], 'additional_metadata': {}, 'geojson': {}, 'geojson_error': None @@ -762,7 +765,6 @@ 'message': 'All checks passed!', 'errors': {}, 'valid': True, - 'summary': [], 'additional_metadata': {}, 'geojson': { 'features': [{ @@ -905,7 +907,6 @@ ], }, "valid": False, - 'summary': [], 'additional_metadata': {}, 'geojson': {}, 'geojson_error': 'Line 31 does not have the same number of entries as the HEADING row in TYPE.' @@ -927,7 +928,6 @@ 'desc': ''}], }, 'valid': False, - 'summary': [], 'additional_metadata': {}, 'geojson': {}, 'geojson_error': None @@ -945,7 +945,6 @@ 'desc': ''}], }, 'valid': False, - 'summary': [], 'additional_metadata': {}, 'geojson': {}, 'geojson_error': None diff --git a/test/fixtures_plain_text.py b/test/fixtures_plain_text.py index 119a185..3e7b320 100644 --- a/test/fixtures_plain_text.py +++ b/test/fixtures_plain_text.py @@ -11,7 +11,11 @@ Dictionary: Standard_dictionary_v4_1_1.ags Time: 2021-08-23 14:25:43+00:00 - +7 groups identified in file: PROJ ABBR TRAN TYPE UNIT LOCA SAMP +Optional DICT group present: False +Optional FILE group present: False +1 data row(s) in LOCA group +None ================================================================================ """, 'example_broken_ags.ags': """ @@ -66,6 +70,10 @@ Dictionary: Standard_dictionary_v4_1_1.ags Time: 2021-08-23 14:25:43+00:00 +0 groups identified in file: +Optional DICT group present: False +Optional FILE group present: False +None # Errors @@ -110,6 +118,10 @@ Dictionary: Standard_dictionary_v4_1_1.ags Time: 2021-08-23 14:25:43+00:00 +0 groups identified in file: +Optional DICT group present: False +Optional FILE group present: False +None # Errors diff --git a/test/unit/test_validation.py b/test/unit/test_validation.py index 8fb4225..1c988bd 100644 --- a/test/unit/test_validation.py +++ b/test/unit/test_validation.py @@ -14,24 +14,20 @@ def mock_check_ags(filename, standard_AGS4_dictionary=None): - return dict(checker='ags', dictionary='some_dict', + return dict(checker='python_ags4 v123', dictionary='some_dict', errors={}, - summary=[{'desc': '7 groups identified in file: PROJ ABBR TRAN TYPE UNIT ' - 'LOCA SAMP', - 'group': '', - 'line': ''}, - {'desc': '1 data row(s) in LOCA group', 'group': '', 'line': ''}, - {'desc': 'Optional DICT group present? False', - 'group': '', - 'line': ''}, - {'desc': 'Optional FILE group present? False', - 'group': '', - 'line': ''}] - ) + additional_metadata={ + 'bgs_all_groups': '7 groups identified in file: ' + 'PROJ ABBR TRAN TYPE UNIT LOCA SAMP', + 'bgs_dict': 'Optional DICT group present: False', + 'bgs_file': 'Optional FILE group present: False', + 'bgs_loca_rows': '1 data row(s) in LOCA group', + 'bgs_projects': None + }) def mock_check_bgs(filename, **kwargs): - return dict(checker='bgs', + return dict(checker='bgs_rules v123', errors={'BGS': [{}]}, additional_metadata={ 'bgs_all_groups': '7 groups identified in file: ' @@ -49,26 +45,22 @@ def test_validate_default_checker(): # Arrange filename = TEST_FILE_DIR / 'does_not_matter.ags' expected = { - 'checkers': ['ags'], + 'checkers': ['python_ags4 v123'], 'dictionary': 'some_dict', 'errors': {}, 'filename': filename.name, 'filesize': 0, 'message': 'All checks passed!', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), - 'summary': [{'desc': '7 groups identified in file: PROJ ABBR TRAN TYPE UNIT ' - 'LOCA SAMP', - 'group': '', - 'line': ''}, - {'desc': '1 data row(s) in LOCA group', 'group': '', 'line': ''}, - {'desc': 'Optional DICT group present? False', - 'group': '', - 'line': ''}, - {'desc': 'Optional FILE group present? False', - 'group': '', - 'line': ''}], 'valid': True, - 'additional_metadata': {}} + 'additional_metadata': { + 'bgs_all_groups': '7 groups identified in file: ' + 'PROJ ABBR TRAN TYPE UNIT LOCA SAMP', + 'bgs_dict': 'Optional DICT group present: False', + 'bgs_file': 'Optional FILE group present: False', + 'bgs_loca_rows': '1 data row(s) in LOCA group', + 'bgs_projects': None + }} # Act response = validation.validate(filename, checkers=[mock_check_ags]) @@ -83,7 +75,7 @@ def test_validate_bgs_checker(): # Arrange filename = TEST_FILE_DIR / 'does_not_matter.ags' expected = { - 'checkers': ['bgs'], + 'checkers': ['bgs_rules v123'], 'dictionary': '', 'errors': {'BGS': [{}]}, 'filename': filename.name, @@ -113,7 +105,7 @@ def test_validate_both_checkers(): # Arrange filename = TEST_FILE_DIR / 'does_not_matter.ags' expected = { - 'checkers': ['bgs', 'ags'], + 'checkers': ['bgs_rules v123', 'python_ags4 v123'], 'dictionary': 'some_dict', 'errors': {'BGS': [{}]}, 'filename': filename.name, @@ -151,8 +143,8 @@ def test_validate_non_ags(): 'message': '1 error(s) found in file!', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), 'valid': False, - 'summary': [], - 'additional_metadata': {}} + 'additional_metadata': {} + } # Act response = validation.validate(filename)