From 34c65db037eab349e1a2a5c8b9d0d8772ff4ff45 Mon Sep 17 00:00:00 2001 From: John A Stevenson Date: Mon, 4 Mar 2024 18:59:55 +0000 Subject: [PATCH 01/20] Update FastAPI container and dependencies This commit also adds pip-tools for dependency management. The versions of FastAPI and related core libraries (starlette, pydantic) were set to match versions in the container. --- .github/workflows/lint_and_test.yml | 6 ++ Dockerfile | 2 +- README.md | 18 +++- requirements.in | 18 ++++ requirements.txt | 131 +++++++++++++++++++++++++--- requirements_dev.in | 9 ++ requirements_dev.txt | 114 ++++++++++++++++++++++-- 7 files changed, 272 insertions(+), 26 deletions(-) create mode 100644 requirements.in create mode 100644 requirements_dev.in diff --git a/.github/workflows/lint_and_test.yml b/.github/workflows/lint_and_test.yml index f26a4b6c..ed8d822f 100644 --- a/.github/workflows/lint_and_test.yml +++ b/.github/workflows/lint_and_test.yml @@ -23,6 +23,12 @@ jobs: python-version: 3.11 architecture: x64 + - name: Ensure Nexus mirror configuration is not committed + run: | + # Grep checks for Nexus mirror in the index-url parameter, + # the ! negates the grep exit code, which would be 0 if URL was present + ! grep -E '.*index-url.*nexus.*' requirements*.txt + - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/Dockerfile b/Dockerfile index b65585ca..af1ce78e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM tiangolo/uvicorn-gunicorn-fastapi:python3.11-slim-2023-02-20 +FROM tiangolo/uvicorn-gunicorn-fastapi:python3.11-slim-2024-03-04 ## Install python-ags4 COPY requirements.txt . diff --git a/README.md b/README.md index 6ed5afe8..8195e1c6 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,20 @@ pytest -vs test The test environment is configured so that adding `--pdb` to the test command will start an IPython debugger session in the event of test failure. +### Updating dependencies + +We are using [pip-tools](https://pip-tools.readthedocs.io/en/stable/) to create a pinned list of all dependencies from the ones that we need to specify. + +To refresh the dependency list, update `requirements.in` and `requirements-dev.in` then run the following: + +```bash +pip-compile -o requirements.txt requirements.in +pip-compile -o requirements_dev.txt requirements_dev.in +``` + +The updated requirements files must be edited to remove reference to the Nexus mirror before they can then be committed. + + ### GUI Customisation To ammend the GUI HTML we recommend running via `Docker` using your own `Dockerfile` like the below to `COPY` in your own templates. @@ -108,7 +122,7 @@ COPY content/templates /app/app/templates Containers for the application are hosted in the GitHub Container Registry -Every push to `Main` branch commits builds `pyagsapi:latest`. +Every push to `main` branch commits builds `pyagsapi:latest`. Push Tagged Releases with `^v?[0-9]+[.][0-9]+([.][0-9])?` (v* == v2.0) builds `pyagsapi:2.0` (the "v" gets dropped for the tag). @@ -116,7 +130,7 @@ You can also push release candidates using the format `/^v?[0-9]+[.][0-9]+([.][0 ## Example Files -Files in [https://github.com/BritishGeologicalSurvey/pyagsapi/tree/main/test/files/real](https://github.com/BritishGeologicalSurvey/pyagsapi/tree/main/test/files/real) are a random collection of real AGS files which have been submitted to the BGS and are available under OGL, we have included them here as example files for testing pyagsapi. +Files in [https://github.com/BritishGeologicalSurvey/pyagsapi/tree/main/test/files/real](https://github.com/BritishGeologicalSurvey/pyagsapi/tree/main/test/files/real) are a collection of real AGS files which have been submitted to the BGS and are available under OGL, we have included them here as example files for testing pyagsapi. ## Licence diff --git a/requirements.in b/requirements.in new file mode 100644 index 00000000..6bc1a6f2 --- /dev/null +++ b/requirements.in @@ -0,0 +1,18 @@ +Fiona +Jinja2 +Shapely +aiofiles +colorlog +geopandas +numpy +pyproj +python-ags4==0.4.1 +requests +shortuuid +# These libraries are already in FastAPI container but need updated +fastapi==0.88.0 +h11==0.14.0 +pydantic==1.10.14 +python-multipart==0.0.9 +starlette==0.22.0 +uvicorn==0.20.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 6b00ee99..565aae92 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,119 @@ -aiofiles==23.1.0 -numpy==1.24.2 +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --output-file=requirements.txt requirements.in +# + +aiofiles==23.2.1 + # via -r requirements.in +anyio==4.3.0 + # via starlette +attrs==23.2.0 + # via fiona +certifi==2024.2.2 + # via + # fiona + # pyproj + # requests +charset-normalizer==3.3.2 + # via requests +click==8.1.7 + # via + # click-plugins + # cligj + # fiona + # python-ags4 + # uvicorn +click-plugins==1.1.1 + # via fiona +cligj==0.7.2 + # via fiona +colorlog==6.8.2 + # via -r requirements.in +commonmark==0.9.1 + # via rich +defusedxml==0.7.1 + # via python-ags4 +et-xmlfile==1.1.0 + # via openpyxl +fastapi==0.88.0 + # via -r requirements.in +fiona==1.9.5 + # via + # -r requirements.in + # geopandas +geopandas==0.14.3 + # via -r requirements.in +h11==0.14.0 + # via + # -r requirements.in + # uvicorn +idna==3.6 + # via + # anyio + # requests +jinja2==3.1.3 + # via -r requirements.in +markupsafe==2.1.5 + # via jinja2 +numpy==1.26.4 + # via + # -r requirements.in + # pandas + # shapely +openpyxl==3.1.2 + # via python-ags4 +packaging==23.2 + # via geopandas +pandas==1.5.3 + # via + # geopandas + # python-ags4 +pydantic==1.10.14 + # via + # -r requirements.in + # fastapi +pygments==2.17.2 + # via rich +pyproj==3.6.1 + # via + # -r requirements.in + # geopandas python-ags4==0.4.1 -python-multipart==0.0.5 -colorlog==6.7.0 -shortuuid==1.0.11 -Jinja2==3.1.2 -Fiona==1.9.1 -Shapely==2.0.1 -pyproj==3.4.1 -geopandas==0.12.2 -requests==2.28.2 -# These libraries are already in FastAPI container but need updated -fastapi==0.92.0 + # via -r requirements.in +python-dateutil==2.9.0.post0 + # via pandas +python-multipart==0.0.9 + # via -r requirements.in +pytz==2024.1 + # via pandas +requests==2.31.0 + # via -r requirements.in +rich==12.6.0 + # via python-ags4 +shapely==2.0.3 + # via + # -r requirements.in + # geopandas +shortuuid==1.0.12 + # via -r requirements.in +six==1.16.0 + # via + # fiona + # python-dateutil +sniffio==1.3.1 + # via anyio +starlette==0.22.0 + # via + # -r requirements.in + # fastapi +typing-extensions==4.10.0 + # via pydantic +urllib3==2.2.1 + # via requests uvicorn==0.20.0 -h11==0.14.0 + # via -r requirements.in + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/requirements_dev.in b/requirements_dev.in new file mode 100644 index 00000000..c9f00722 --- /dev/null +++ b/requirements_dev.in @@ -0,0 +1,9 @@ +ipython +ipdb +pytest +flake8 +requests +httpx +pytest-asyncio +requests-toolbelt +freezegun diff --git a/requirements_dev.txt b/requirements_dev.txt index d27f7670..5d4342c3 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,9 +1,105 @@ -ipython==8.10.0 -ipdb==0.13.11 -pytest==7.2.1 -flake8==6.0.0 -requests==2.28.2 -httpx==0.23.3 -pytest-asyncio==0.20.3 -requests-toolbelt==0.10.1 -freezegun==1.2.2 +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --output-file=requirements_dev.txt requirements_dev.in +# + +anyio==4.3.0 + # via httpx +asttokens==2.4.1 + # via stack-data +certifi==2024.2.2 + # via + # httpcore + # httpx + # requests +charset-normalizer==3.3.2 + # via requests +decorator==5.1.1 + # via + # ipdb + # ipython +executing==2.0.1 + # via stack-data +flake8==7.0.0 + # via -r requirements_dev.in +freezegun==1.4.0 + # via -r requirements_dev.in +h11==0.14.0 + # via httpcore +httpcore==1.0.4 + # via httpx +httpx==0.27.0 + # via -r requirements_dev.in +idna==3.6 + # via + # anyio + # httpx + # requests +iniconfig==2.0.0 + # via pytest +ipdb==0.13.13 + # via -r requirements_dev.in +ipython==8.22.2 + # via + # -r requirements_dev.in + # ipdb +jedi==0.19.1 + # via ipython +matplotlib-inline==0.1.6 + # via ipython +mccabe==0.7.0 + # via flake8 +packaging==23.2 + # via pytest +parso==0.8.3 + # via jedi +pexpect==4.9.0 + # via ipython +pluggy==1.4.0 + # via pytest +prompt-toolkit==3.0.43 + # via ipython +ptyprocess==0.7.0 + # via pexpect +pure-eval==0.2.2 + # via stack-data +pycodestyle==2.11.1 + # via flake8 +pyflakes==3.2.0 + # via flake8 +pygments==2.17.2 + # via ipython +pytest==8.1.0 + # via + # -r requirements_dev.in + # pytest-asyncio +pytest-asyncio==0.23.5 + # via -r requirements_dev.in +python-dateutil==2.9.0.post0 + # via freezegun +requests==2.31.0 + # via + # -r requirements_dev.in + # requests-toolbelt +requests-toolbelt==1.0.0 + # via -r requirements_dev.in +six==1.16.0 + # via + # asttokens + # python-dateutil +sniffio==1.3.1 + # via + # anyio + # httpx +stack-data==0.6.3 + # via ipython +traitlets==5.14.1 + # via + # ipython + # matplotlib-inline +urllib3==2.2.1 + # via requests +wcwidth==0.2.13 + # via prompt-toolkit From aa558f0a79297b9ebaf8a585428b1317361cbbc5 Mon Sep 17 00:00:00 2001 From: John A Stevenson Date: Tue, 5 Mar 2024 14:52:47 +0000 Subject: [PATCH 02/20] Update to python-ags4==0.5.0 --- requirements.in | 2 +- requirements.txt | 15 ++++++++++----- requirements_dev.in | 2 +- requirements_dev.txt | 3 ++- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/requirements.in b/requirements.in index 6bc1a6f2..28d70c2e 100644 --- a/requirements.in +++ b/requirements.in @@ -6,7 +6,7 @@ colorlog geopandas numpy pyproj -python-ags4==0.4.1 +python-ags4==0.5.0 requests shortuuid # These libraries are already in FastAPI container but need updated diff --git a/requirements.txt b/requirements.txt index 565aae92..388e5b6c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ # # pip-compile --output-file=requirements.txt requirements.in # +--index-url https://nexus-internal.bgs.ac.uk/repository/pypi-all/simple aiofiles==23.2.1 # via -r requirements.in @@ -31,8 +32,6 @@ cligj==0.7.2 # via fiona colorlog==6.8.2 # via -r requirements.in -commonmark==0.9.1 - # via rich defusedxml==0.7.1 # via python-ags4 et-xmlfile==1.1.0 @@ -55,8 +54,12 @@ idna==3.6 # requests jinja2==3.1.3 # via -r requirements.in +markdown-it-py==3.0.0 + # via rich markupsafe==2.1.5 # via jinja2 +mdurl==0.1.2 + # via markdown-it-py numpy==1.26.4 # via # -r requirements.in @@ -66,7 +69,7 @@ openpyxl==3.1.2 # via python-ags4 packaging==23.2 # via geopandas -pandas==1.5.3 +pandas==2.2.1 # via # geopandas # python-ags4 @@ -80,7 +83,7 @@ pyproj==3.6.1 # via # -r requirements.in # geopandas -python-ags4==0.4.1 +python-ags4==0.5.0 # via -r requirements.in python-dateutil==2.9.0.post0 # via pandas @@ -90,7 +93,7 @@ pytz==2024.1 # via pandas requests==2.31.0 # via -r requirements.in -rich==12.6.0 +rich==13.7.1 # via python-ags4 shapely==2.0.3 # via @@ -110,6 +113,8 @@ starlette==0.22.0 # fastapi typing-extensions==4.10.0 # via pydantic +tzdata==2024.1 + # via pandas urllib3==2.2.1 # via requests uvicorn==0.20.0 diff --git a/requirements_dev.in b/requirements_dev.in index c9f00722..ea87f77d 100644 --- a/requirements_dev.in +++ b/requirements_dev.in @@ -1,6 +1,6 @@ ipython ipdb -pytest +pytest==8.0.2 # 8.1.0 contains breaking changes flake8 requests httpx diff --git a/requirements_dev.txt b/requirements_dev.txt index 5d4342c3..e65ffe3e 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -4,6 +4,7 @@ # # pip-compile --output-file=requirements_dev.txt requirements_dev.in # +--index-url https://nexus-internal.bgs.ac.uk/repository/pypi-all/simple anyio==4.3.0 # via httpx @@ -71,7 +72,7 @@ pyflakes==3.2.0 # via flake8 pygments==2.17.2 # via ipython -pytest==8.1.0 +pytest==8.0.2 # via # -r requirements_dev.in # pytest-asyncio From 1e172f61b5a0f594d54328bdfaf9ec6ce5562e0d Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Tue, 5 Mar 2024 15:16:31 +0000 Subject: [PATCH 03/20] Update ags4 version in fixtures --- test/fixtures_json.py | 16 ++++++++-------- test/fixtures_plain_text.py | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/test/fixtures_json.py b/test/fixtures_json.py index 3925cb1b..cfec9396 100644 --- a/test/fixtures_json.py +++ b/test/fixtures_json.py @@ -4,7 +4,7 @@ 'example_ags.ags': { 'filename': 'example_ags.ags', 'filesize': 4039, - 'checkers': ['python_ags4 v0.4.1'], + 'checkers': ['python_ags4 v0.5.0'], 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), 'message': 'All checks passed!', @@ -15,7 +15,7 @@ 'nonsense.AGS': { 'filename': 'nonsense.AGS', 'filesize': 9, - 'checkers': ['python_ags4 v0.4.1'], + 'checkers': ['python_ags4 v0.5.0'], 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), 'message': '7 error(s) found in file!', @@ -39,7 +39,7 @@ 'random_binary.ags': { 'filename': 'random_binary.ags', 'filesize': 1024, - 'checkers': ['python_ags4 v0.4.1'], + 'checkers': ['python_ags4 v0.5.0'], 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), 'message': '36 error(s) found in file!', @@ -182,7 +182,7 @@ 'real/CG014058_F.ags': { 'filename': 'CG014058_F.ags', 'filesize': 50574, - 'checkers': ['python_ags4 v0.4.1'], + 'checkers': ['python_ags4 v0.5.0'], 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), 'message': '1 error(s) found in file!', @@ -198,7 +198,7 @@ 'real/Blackburn Southern Bypass.ags': { 'filename': 'Blackburn Southern Bypass.ags', 'filesize': 6566, - 'checkers': ['python_ags4 v0.4.1'], + 'checkers': ['python_ags4 v0.5.0'], 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), 'message': '95 error(s) found in file!', @@ -592,7 +592,7 @@ 'real/A3040_03.ags': { 'filename': 'A3040_03.ags', 'filesize': 264526, - 'checkers': ['python_ags4 v0.4.1'], + 'checkers': ['python_ags4 v0.5.0'], 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), 'message': '1 error(s) found in file!', @@ -625,7 +625,7 @@ { 'filename': 'nonsense.AGS', 'filesize': 9, - 'checkers': ['python_ags4 v0.4.1'], + 'checkers': ['python_ags4 v0.5.0'], 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), 'message': '7 error(s) found in file!', @@ -640,7 +640,7 @@ { 'filename': 'nonsense.AGS', 'filesize': 9, - 'checkers': ['python_ags4 v0.4.1'], + 'checkers': ['python_ags4 v0.5.0'], 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), 'message': '7 error(s) found in file!', diff --git a/test/fixtures_plain_text.py b/test/fixtures_plain_text.py index 3f24e0f7..7acceffa 100644 --- a/test/fixtures_plain_text.py +++ b/test/fixtures_plain_text.py @@ -7,7 +7,7 @@ # Metadata File size: 4039 bytes -Checkers: ['python_ags4 v0.4.1'] +Checkers: ['python_ags4 v0.5.0'] Dictionary: Standard_dictionary_v4_1_1.ags Time: 2021-08-23 14:25:43+00:00 @@ -21,7 +21,7 @@ # Metadata File size: 9 bytes -Checkers: ['python_ags4 v0.4.1'] +Checkers: ['python_ags4 v0.5.0'] Dictionary: Standard_dictionary_v4_1_1.ags Time: 2021-08-23 14:25:43+00:00 @@ -65,7 +65,7 @@ # Metadata File size: 1024 bytes -Checkers: ['python_ags4 v0.4.1'] +Checkers: ['python_ags4 v0.5.0'] Dictionary: Standard_dictionary_v4_1_1.ags Time: 2021-08-23 14:25:43+00:00 @@ -141,7 +141,7 @@ # Metadata File size: 6566 bytes -Checkers: ['python_ags4 v0.4.1'] +Checkers: ['python_ags4 v0.5.0'] Dictionary: Standard_dictionary_v4_1_1.ags Time: 2021-08-23 14:25:43+00:00 From 147cf0faad2584950a09e8fa4197980ca4ff3c6a Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Tue, 5 Mar 2024 16:11:13 +0000 Subject: [PATCH 04/20] Use sorting_strategy: alphabetical & None --- app/conversion.py | 4 ++-- test/unit/test_conversion.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/app/conversion.py b/app/conversion.py index 0888d8b0..db4ed17c 100644 --- a/app/conversion.py +++ b/app/conversion.py @@ -10,7 +10,7 @@ logger = logging.getLogger(__name__) -def convert(filename: Path, results_dir: Path, sort_tables=False) -> Tuple[Optional[Path], dict]: +def convert(filename: Path, results_dir: Path, sorting_strategy: Optional[str]=None) -> Tuple[Optional[Path], dict]: """ Convert filename between .ags and .xlsx. Write output to file in results_dir and return path alongside job status data in dictionary.""" @@ -28,7 +28,7 @@ def convert(filename: Path, results_dir: Path, sort_tables=False) -> Tuple[Optio success = False if filename.suffix.lower() == '.ags': try: - AGS4.AGS4_to_excel(filename, converted_file, sort_tables=sort_tables) + AGS4.AGS4_to_excel(filename, converted_file, sorting_strategy=sorting_strategy) success = True except IndexError: error_message = "ERROR: File does not have AGS4 format layout" diff --git a/test/unit/test_conversion.py b/test/unit/test_conversion.py index b780ca86..619c5d03 100644 --- a/test/unit/test_conversion.py +++ b/test/unit/test_conversion.py @@ -30,8 +30,8 @@ def test_convert(tmp_path, filename, expected): assert re.search(expected_message, response['message']) -@pytest.mark.parametrize('sort_tables', [True, False, None]) -def test_convert_sort_tables(tmp_path, sort_tables): +@pytest.mark.parametrize('sorting_strategy', ['alphabetical', None]) +def test_convert_sort_tables(tmp_path, sorting_strategy): # Arrange filename = Path(__file__).parent.parent / 'files' / 'example_ags.ags' tables, headings = AGS4.AGS4_to_dataframe(filename) @@ -41,8 +41,8 @@ def test_convert_sort_tables(tmp_path, sort_tables): results_dir.mkdir() # Act - if sort_tables is not None: - converted_file, response = convert(filename, results_dir, sort_tables=sort_tables) + if sorting_strategy is not None: + converted_file, response = convert(filename, results_dir, sorting_strategy=sorting_strategy) else: converted_file, response = convert(filename, results_dir) @@ -51,7 +51,7 @@ def test_convert_sort_tables(tmp_path, sort_tables): assert response['filename'] == filename.name xl = pd.ExcelFile(converted_file) - if sort_tables: + if sorting_strategy == 'alphabetical': assert xl.sheet_names == sorted(groups) else: assert xl.sheet_names == groups From d9fbb5bd8ebc85592afe69bd6bac1fa6459c1f17 Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Tue, 5 Mar 2024 17:23:12 +0000 Subject: [PATCH 05/20] Fix sort_tables in API and tests --- app/routes.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/routes.py b/app/routes.py index 354170a4..cca008c3 100644 --- a/app/routes.py +++ b/app/routes.py @@ -261,6 +261,9 @@ async def convert(background_tasks: BackgroundTasks, if not files[0].filename: raise InvalidPayloadError(request) RESULTS = 'results' + sorting_strategy = None + if sort_tables: + sorting_strategy = 'alphabetical' tmp_dir = Path(tempfile.mkdtemp()) results_dir = tmp_dir / RESULTS results_dir.mkdir() @@ -271,7 +274,7 @@ async def convert(background_tasks: BackgroundTasks, contents = await file.read() local_file = tmp_dir / file.filename local_file.write_bytes(contents) - converted, result = conversion.convert(local_file, results_dir, sort_tables=sort_tables) + converted, result = conversion.convert(local_file, results_dir, sorting_strategy=sorting_strategy) log = validation.to_plain_text(result) f.write(log) f.write('\n' + '=' * 80 + '\n') From 4e2fcb7a5ab37504eae049970cccc2320a18ab34 Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Tue, 5 Mar 2024 17:43:43 +0000 Subject: [PATCH 06/20] Discard unecessary summary from errors dictionary --- app/checkers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/app/checkers.py b/app/checkers.py index e01417eb..9eff809f 100644 --- a/app/checkers.py +++ b/app/checkers.py @@ -47,6 +47,9 @@ def check_ags(filename: Path, standard_AGS4_dictionary: Optional[str] = None) -> errors = {'File read error': [{'line': '-', 'group': '', 'desc': description}]} dictionary = '' + # Discard unecessary summary from errors dictionary + errors.pop('Summary of data', None) + return dict(checker=f'python_ags4 v{python_ags4.__version__}', errors=errors, dictionary=dictionary) From 9d4b9d910f5ee98553c6d951c8663ea356f95ef2 Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Tue, 5 Mar 2024 17:47:42 +0000 Subject: [PATCH 07/20] Add Warning and FYI to VALID_KEYS --- app/schemas.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/schemas.py b/app/schemas.py index 0f8c3efa..b12af8d3 100644 --- a/app/schemas.py +++ b/app/schemas.py @@ -15,6 +15,8 @@ 'AGS Format Rule 13', 'AGS Format Rule 14', 'AGS Format Rule 15', 'AGS Format Rule 16', 'AGS Format Rule 17', 'AGS Format Rule 18', 'AGS Format Rule 19', 'AGS Format Rule 19a', 'AGS Format Rule 19b', 'AGS Format Rule 20', 'General', + # Warnings and FYIs + 'Warning (Related to Rule 16)', 'FYI (Related to Rule 1)', # Errors 'File read error' ] From 6a7621f0139bbfbd8c82f1087af5f469b41cfd53 Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Tue, 5 Mar 2024 18:00:22 +0000 Subject: [PATCH 08/20] Change expected results for check_ags --- test/unit/test_checkers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/unit/test_checkers.py b/test/unit/test_checkers.py index 48b3a659..132c021f 100644 --- a/test/unit/test_checkers.py +++ b/test/unit/test_checkers.py @@ -18,15 +18,15 @@ @pytest.mark.parametrize('filename, expected_rules', [ - ('example_ags.ags', set()), - ('random_binary.ags', {'AGS Format Rule 1', 'AGS Format Rule 2a', 'AGS Format Rule 3', 'AGS Format Rule 5', + ('example_ags.ags', {'Warning (Related to Rule 16)'}), + ('random_binary.ags', {'General', 'AGS Format Rule 1', 'AGS Format Rule 2a', 'AGS Format Rule 3', 'AGS Format Rule 5', 'AGS Format Rule 13', 'AGS Format Rule 14', 'AGS Format Rule 15', 'AGS Format Rule 17'}), ('nonsense.AGS', {'AGS Format Rule 2a', 'AGS Format Rule 3', 'AGS Format Rule 5', 'AGS Format Rule 13', 'AGS Format Rule 14', 'AGS Format Rule 15', 'AGS Format Rule 17'}), ('empty.ags', {'AGS Format Rule 13', 'AGS Format Rule 14', 'AGS Format Rule 15', 'AGS Format Rule 17'}), ('real/A3040_03.ags', {'AGS Format Rule 3'}), - ('real/43370.ags', {'AGS Format Rule 2a', 'AGS Format Rule 1'}), - ('real/JohnStPrimarySchool.ags', {'File read error'}), + ('real/43370.ags', {'General', 'AGS Format Rule 2a', 'AGS Format Rule 1'}), + ('real/JohnStPrimarySchool.ags', {'AGS Format Rule 3', 'AGS Format Rule 5', 'AGS Format Rule ?', 'AGS Format Rule 4', 'AGS Format Rule 2a'}), ('real/19684.ags', {'AGS Format Rule 3'}), ('real/E52A4379 (2).ags', {'AGS Format Rule 3'}), ]) From 7692f2025de372f2385a6673bb754d7905ece7ad Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Tue, 5 Mar 2024 18:02:55 +0000 Subject: [PATCH 09/20] Fix data in valid example AGS file --- test/files/example_ags.ags | 4 ++-- test/fixtures_json.py | 2 +- test/unit/test_checkers.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/files/example_ags.ags b/test/files/example_ags.ags index d69f669a..acdf65ad 100644 --- a/test/files/example_ags.ags +++ b/test/files/example_ags.ags @@ -8,8 +8,8 @@ "HEADING","ABBR_HDNG","ABBR_CODE","ABBR_DESC","ABBR_LIST","ABBR_REM","FILE_FSET" "UNIT","","","","","","" "TYPE","X","X","X","X","X","X" -"DATA","DICT_TYPE","GROUP","Group","","","" -"DATA","DICT_TYPE","HEADING","Heading","","","" +"DATA","DICT_TYPE","GROUP","Flag to indicate definition is a GROUP","","","" +"DATA","DICT_TYPE","HEADING","Flag to indicate definition is a HEADING","","","" "DATA","DICT_STAT","OTHER","Other Field","","","" "DATA","DICT_STAT","KEY","Key Field","","","" "DATA","SAMP_TYPE","U","Undisturbed sample - open drive","","","" diff --git a/test/fixtures_json.py b/test/fixtures_json.py index cfec9396..38f06908 100644 --- a/test/fixtures_json.py +++ b/test/fixtures_json.py @@ -3,7 +3,7 @@ JSON_RESPONSES = { 'example_ags.ags': { 'filename': 'example_ags.ags', - 'filesize': 4039, + 'filesize': 4105, 'checkers': ['python_ags4 v0.5.0'], 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), diff --git a/test/unit/test_checkers.py b/test/unit/test_checkers.py index 132c021f..81e68760 100644 --- a/test/unit/test_checkers.py +++ b/test/unit/test_checkers.py @@ -18,7 +18,7 @@ @pytest.mark.parametrize('filename, expected_rules', [ - ('example_ags.ags', {'Warning (Related to Rule 16)'}), + ('example_ags.ags', set()), ('random_binary.ags', {'General', 'AGS Format Rule 1', 'AGS Format Rule 2a', 'AGS Format Rule 3', 'AGS Format Rule 5', 'AGS Format Rule 13', 'AGS Format Rule 14', 'AGS Format Rule 15', 'AGS Format Rule 17'}), ('nonsense.AGS', {'AGS Format Rule 2a', 'AGS Format Rule 3', 'AGS Format Rule 5', 'AGS Format Rule 13', From e9c6783d0530147e8ab1e7272164839a56054d3a Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Tue, 5 Mar 2024 18:25:02 +0000 Subject: [PATCH 10/20] Switch table to group --- test/fixtures_json.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/fixtures_json.py b/test/fixtures_json.py index 38f06908..e25c0f1b 100644 --- a/test/fixtures_json.py +++ b/test/fixtures_json.py @@ -29,10 +29,10 @@ 'AGS Format Rule 5': [{'line': 1, 'group': '', 'desc': 'Contains fields that are not enclosed in double quotes.'}], - 'AGS Format Rule 13': [{'line': '-', 'group': 'PROJ', 'desc': 'PROJ table not found.'}], - 'AGS Format Rule 14': [{'line': '-', 'group': 'TRAN', 'desc': 'TRAN table not found.'}], - 'AGS Format Rule 15': [{'line': '-', 'group': 'UNIT', 'desc': 'UNIT table not found.'}], - 'AGS Format Rule 17': [{'line': '-', 'group': 'TYPE', 'desc': 'TYPE table not found.'}]}, + 'AGS Format Rule 13': [{'line': '-', 'group': 'PROJ', 'desc': 'PROJ group not found.'}], + 'AGS Format Rule 14': [{'line': '-', 'group': 'TRAN', 'desc': 'TRAN group not found.'}], + 'AGS Format Rule 15': [{'line': '-', 'group': 'UNIT', 'desc': 'UNIT group not found.'}], + 'AGS Format Rule 17': [{'line': '-', 'group': 'TYPE', 'desc': 'TYPE group not found.'}]}, 'valid': False, 'additional_metadata': {} }, @@ -68,16 +68,16 @@ {'desc': 'Has Non-ASCII character(s).', 'group': '', 'line': 8}], - 'AGS Format Rule 13': [{'desc': 'PROJ table not found.', + 'AGS Format Rule 13': [{'desc': 'PROJ group not found.', 'group': 'PROJ', 'line': '-'}], - 'AGS Format Rule 14': [{'desc': 'TRAN table not found.', + 'AGS Format Rule 14': [{'desc': 'TRAN group not found.', 'group': 'TRAN', 'line': '-'}], - 'AGS Format Rule 15': [{'desc': 'UNIT table not found.', + 'AGS Format Rule 15': [{'desc': 'UNIT group not found.', 'group': 'UNIT', 'line': '-'}], - 'AGS Format Rule 17': [{'desc': 'TYPE table not found.', + 'AGS Format Rule 17': [{'desc': 'TYPE group not found.', 'group': 'TYPE', 'line': '-'}], 'AGS Format Rule 2a': [{'desc': 'Is not terminated by and ' From 693cfc12967c0b249d558dd12131b4d19e7d6544 Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Tue, 5 Mar 2024 18:25:37 +0000 Subject: [PATCH 11/20] Add utf-8 warning text --- test/fixtures_json.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/test/fixtures_json.py b/test/fixtures_json.py index e25c0f1b..18f59ae3 100644 --- a/test/fixtures_json.py +++ b/test/fixtures_json.py @@ -43,29 +43,29 @@ 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), 'message': '36 error(s) found in file!', - 'errors': {'AGS Format Rule 1': [{'desc': 'Has Non-ASCII character(s) and/or ' - 'a byte-order-mark (BOM).', + 'errors': {'AGS Format Rule 1': [{'desc': "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8') and/or " + "a byte-order-mark (BOM).", 'group': '', 'line': 1}, - {'desc': 'Has Non-ASCII character(s).', + {'desc': "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8').", 'group': '', 'line': 2}, - {'desc': 'Has Non-ASCII character(s).', + {'desc': "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8').", 'group': '', 'line': 3}, - {'desc': 'Has Non-ASCII character(s).', + {'desc': "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8').", 'group': '', 'line': 4}, - {'desc': 'Has Non-ASCII character(s).', + {'desc': "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8').", 'group': '', 'line': 5}, - {'desc': 'Has Non-ASCII character(s).', + {'desc': "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8').", 'group': '', 'line': 6}, - {'desc': 'Has Non-ASCII character(s).', + {'desc': "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8').", 'group': '', 'line': 7}, - {'desc': 'Has Non-ASCII character(s).', + {'desc': "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8').", 'group': '', 'line': 8}], 'AGS Format Rule 13': [{'desc': 'PROJ group not found.', @@ -202,8 +202,8 @@ 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), 'message': '95 error(s) found in file!', - 'errors': {'AGS Format Rule 1': [{'desc': 'Has Non-ASCII character(s) and/or ' - 'a byte-order-mark (BOM).', + 'errors': {'AGS Format Rule 1': [{'desc': "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8') and/or " + "a byte-order-mark (BOM).", 'group': '', 'line': 1}], 'AGS Format Rule 2a': [{'desc': 'Is not terminated by and ' From ee9c2042a0b55cc0db72bc2989760eb5584fd614 Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Tue, 5 Mar 2024 18:26:54 +0000 Subject: [PATCH 12/20] Add extra General item, switching dict order --- test/fixtures_json.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/test/fixtures_json.py b/test/fixtures_json.py index 18f59ae3..aa0d6f10 100644 --- a/test/fixtures_json.py +++ b/test/fixtures_json.py @@ -579,13 +579,20 @@ '...LOCA_ORID|LOCA_ORJO|LOCA_ORCO|LOCA_CHKG|LOCA_APPG|LOCA_PDEP', 'group': 'LOCA', 'line': 86}], - 'General': [{'desc': 'This file seems to be encoded with a ' - 'byte-order-mark (BOM). It is highly ' - 'recommended that the file be saved without ' - 'BOM encoding to avoid issues with other ' - 'sofware.', + 'General': [{'line': '', 'group': '', - 'line': ''}]}, + 'desc': "AGS4 Rule 1 is interpreted as allowing both standard ASCII characters " + "(Unicode code points 0-127) and extended ASCII characters (Unicode code points " + "160-255). Please beware that extended ASCII characters differ based on the " + "encoding used when the file was created. The validator defaults to 'utf-8' " + "encoding as it is the most widely used encoding compatible with Unicode. The user " + "can override this default if the file encoding is different but, it is highly " + "recommended that the 'utf-8' encoding be used when creating AGS4 files. (Hint: " + "If not 'utf-8', then the encoding is most likely to be 'windows-1252' aka 'cp1252')"}, + {'line': '', + 'group': '', + 'desc': 'This file seems to be encoded with a byte-order-mark (BOM). It is highly ' + 'recommended that the file be saved without BOM encoding to avoid issues with other software.'}]}, 'valid': False, 'additional_metadata': {} }, From 265c57a1e5bf54367fe5a4932b2044410b9d8a4e Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Tue, 5 Mar 2024 18:36:17 +0000 Subject: [PATCH 13/20] Add extra General item --- test/fixtures_json.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/test/fixtures_json.py b/test/fixtures_json.py index aa0d6f10..691f3bb0 100644 --- a/test/fixtures_json.py +++ b/test/fixtures_json.py @@ -42,7 +42,7 @@ 'checkers': ['python_ags4 v0.5.0'], 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), - 'message': '36 error(s) found in file!', + 'message': '37 error(s) found in file!', 'errors': {'AGS Format Rule 1': [{'desc': "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8') and/or " "a byte-order-mark (BOM).", 'group': '', @@ -175,7 +175,17 @@ {'desc': 'Contains fields that are not ' 'enclosed in double quotes.', 'group': '', - 'line': 8}]}, + 'line': 8}], + 'General': [{'line': '', + 'group': '', + 'desc': "AGS4 Rule 1 is interpreted as allowing both standard ASCII characters " + "(Unicode code points 0-127) and extended ASCII characters (Unicode code points " + "160-255). Please beware that extended ASCII characters differ based on the " + "encoding used when the file was created. The validator defaults to 'utf-8' " + "encoding as it is the most widely used encoding compatible with Unicode. The user " + "can override this default if the file encoding is different but, it is highly " + "recommended that the 'utf-8' encoding be used when creating AGS4 files. (Hint: " + "If not 'utf-8', then the encoding is most likely to be 'windows-1252' aka 'cp1252')"}]}, 'valid': False, 'additional_metadata': {} }, From a670eae47e21dc691a67bc1cfc0a9784a02c51b6 Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Tue, 5 Mar 2024 18:36:43 +0000 Subject: [PATCH 14/20] Rule 7 is no longer broken --- test/fixtures_json.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/test/fixtures_json.py b/test/fixtures_json.py index 691f3bb0..bd43eabf 100644 --- a/test/fixtures_json.py +++ b/test/fixtures_json.py @@ -584,11 +584,6 @@ 'enclosed in double quotes.', 'group': '', 'line': 1}], - 'AGS Format Rule 7': [{'desc': 'Headings not in order starting ' - 'from LOCA_CHKG. Expected order: ' - '...LOCA_ORID|LOCA_ORJO|LOCA_ORCO|LOCA_CHKG|LOCA_APPG|LOCA_PDEP', - 'group': 'LOCA', - 'line': 86}], 'General': [{'line': '', 'group': '', 'desc': "AGS4 Rule 1 is interpreted as allowing both standard ASCII characters " From dcc2ca387ad4963919554e431be5d0fb2c54e8ce Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Tue, 5 Mar 2024 18:52:13 +0000 Subject: [PATCH 15/20] Equivalent changes to expected plain text responses --- test/fixtures_plain_text.py | 49 +++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/test/fixtures_plain_text.py b/test/fixtures_plain_text.py index 7acceffa..becc188b 100644 --- a/test/fixtures_plain_text.py +++ b/test/fixtures_plain_text.py @@ -6,7 +6,7 @@ # Metadata -File size: 4039 bytes +File size: 4105 bytes Checkers: ['python_ags4 v0.5.0'] Dictionary: Standard_dictionary_v4_1_1.ags Time: 2021-08-23 14:25:43+00:00 @@ -42,25 +42,25 @@ ## AGS Format Rule 13 -Group: PROJ - PROJ table not found. +Group: PROJ - PROJ group not found. ## AGS Format Rule 14 -Group: TRAN - TRAN table not found. +Group: TRAN - TRAN group not found. ## AGS Format Rule 15 -Group: UNIT - UNIT table not found. +Group: UNIT - UNIT group not found. ## AGS Format Rule 17 -Group: TYPE - TYPE table not found. +Group: TYPE - TYPE group not found. ================================================================================ """, 'random_binary.ags': """ ================================================================================ -random_binary.ags: 36 error(s) found in file! +random_binary.ags: 37 error(s) found in file! # Metadata @@ -74,30 +74,30 @@ ## AGS Format Rule 1 -Line: 1 - Has Non-ASCII character(s) and/or a byte-order-mark (BOM). -Line: 2 - Has Non-ASCII character(s). -Line: 3 - Has Non-ASCII character(s). -Line: 4 - Has Non-ASCII character(s). -Line: 5 - Has Non-ASCII character(s). -Line: 6 - Has Non-ASCII character(s). -Line: 7 - Has Non-ASCII character(s). -Line: 8 - Has Non-ASCII character(s). +Line: 1 - Has Non-ASCII character(s) (assuming that file encoding is 'utf-8') and/or a byte-order-mark (BOM). +Line: 2 - Has Non-ASCII character(s) (assuming that file encoding is 'utf-8'). +Line: 3 - Has Non-ASCII character(s) (assuming that file encoding is 'utf-8'). +Line: 4 - Has Non-ASCII character(s) (assuming that file encoding is 'utf-8'). +Line: 5 - Has Non-ASCII character(s) (assuming that file encoding is 'utf-8'). +Line: 6 - Has Non-ASCII character(s) (assuming that file encoding is 'utf-8'). +Line: 7 - Has Non-ASCII character(s) (assuming that file encoding is 'utf-8'). +Line: 8 - Has Non-ASCII character(s) (assuming that file encoding is 'utf-8'). ## AGS Format Rule 13 -Group: PROJ - PROJ table not found. +Group: PROJ - PROJ group not found. ## AGS Format Rule 14 -Group: TRAN - TRAN table not found. +Group: TRAN - TRAN group not found. ## AGS Format Rule 15 -Group: UNIT - UNIT table not found. +Group: UNIT - UNIT group not found. ## AGS Format Rule 17 -Group: TYPE - TYPE table not found. +Group: TYPE - TYPE group not found. ## AGS Format Rule 2a @@ -132,6 +132,10 @@ Line: 7 - Contains fields that are not enclosed in double quotes. Line: 8 - Contains fields that are not enclosed in double quotes. +## General + +Line: - AGS4 Rule 1 is interpreted as allowing both standard ASCII characters (Unicode code points 0-127) and extended ASCII characters (Unicode code points 160-255). Please beware that extended ASCII characters differ based on the encoding used when the file was created. The validator defaults to 'utf-8' encoding as it is the most widely used encoding compatible with Unicode. The user can override this default if the file encoding is different but, it is highly recommended that the 'utf-8' encoding be used when creating AGS4 files. (Hint: If not 'utf-8', then the encoding is most likely to be 'windows-1252' aka 'cp1252') + ================================================================================ """, 'real/Blackburn Southern Bypass.ags': """ @@ -150,7 +154,7 @@ ## AGS Format Rule 1 -Line: 1 - Has Non-ASCII character(s) and/or a byte-order-mark (BOM). +Line: 1 - Has Non-ASCII character(s) (assuming that file encoding is 'utf-8') and/or a byte-order-mark (BOM). ## AGS Format Rule 2a @@ -253,13 +257,10 @@ Line: 1 - Contains fields that are not enclosed in double quotes. -## AGS Format Rule 7 - -Line: 86 - Headings not in order starting from LOCA_CHKG. Expected order: ...LOCA_ORID|LOCA_ORJO|LOCA_ORCO|LOCA_CHKG|LOCA_APPG|LOCA_PDEP - ## General -Line: - This file seems to be encoded with a byte-order-mark (BOM). It is highly recommended that the file be saved without BOM encoding to avoid issues with other sofware. +Line: - AGS4 Rule 1 is interpreted as allowing both standard ASCII characters (Unicode code points 0-127) and extended ASCII characters (Unicode code points 160-255). Please beware that extended ASCII characters differ based on the encoding used when the file was created. The validator defaults to 'utf-8' encoding as it is the most widely used encoding compatible with Unicode. The user can override this default if the file encoding is different but, it is highly recommended that the 'utf-8' encoding be used when creating AGS4 files. (Hint: If not 'utf-8', then the encoding is most likely to be 'windows-1252' aka 'cp1252') +Line: - This file seems to be encoded with a byte-order-mark (BOM). It is highly recommended that the file be saved without BOM encoding to avoid issues with other software. ================================================================================ """ From 650a8c325418c53f9a3992e6f1664a2ae1a0cc10 Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Tue, 5 Mar 2024 19:03:17 +0000 Subject: [PATCH 16/20] Fix flake8 errors, mainly long lines --- app/conversion.py | 2 +- test/fixtures_json.py | 39 ++++++++++++++++++++++++-------------- test/unit/test_checkers.py | 8 +++++--- 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/app/conversion.py b/app/conversion.py index db4ed17c..ef666126 100644 --- a/app/conversion.py +++ b/app/conversion.py @@ -10,7 +10,7 @@ logger = logging.getLogger(__name__) -def convert(filename: Path, results_dir: Path, sorting_strategy: Optional[str]=None) -> Tuple[Optional[Path], dict]: +def convert(filename: Path, results_dir: Path, sorting_strategy: Optional[str] = None) -> Tuple[Optional[Path], dict]: """ Convert filename between .ags and .xlsx. Write output to file in results_dir and return path alongside job status data in dictionary.""" diff --git a/test/fixtures_json.py b/test/fixtures_json.py index bd43eabf..42878ca4 100644 --- a/test/fixtures_json.py +++ b/test/fixtures_json.py @@ -43,29 +43,37 @@ 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), 'message': '37 error(s) found in file!', - 'errors': {'AGS Format Rule 1': [{'desc': "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8') and/or " - "a byte-order-mark (BOM).", + 'errors': {'AGS Format Rule 1': [{'desc': "Has Non-ASCII character(s) " + "(assuming that file encoding is 'utf-8') " + "and/or a byte-order-mark (BOM).", 'group': '', 'line': 1}, - {'desc': "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8').", + {'desc': "Has Non-ASCII character(s) " + "(assuming that file encoding is 'utf-8').", 'group': '', 'line': 2}, - {'desc': "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8').", + {'desc': "Has Non-ASCII character(s) " + "(assuming that file encoding is 'utf-8').", 'group': '', 'line': 3}, - {'desc': "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8').", + {'desc': "Has Non-ASCII character(s) " + "(assuming that file encoding is 'utf-8').", 'group': '', 'line': 4}, - {'desc': "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8').", + {'desc': "Has Non-ASCII character(s) " + "(assuming that file encoding is 'utf-8').", 'group': '', 'line': 5}, - {'desc': "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8').", + {'desc': "Has Non-ASCII character(s) " + "(assuming that file encoding is 'utf-8').", 'group': '', 'line': 6}, - {'desc': "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8').", + {'desc': "Has Non-ASCII character(s) " + "(assuming that file encoding is 'utf-8').", 'group': '', 'line': 7}, - {'desc': "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8').", + {'desc': "Has Non-ASCII character(s) " + "(assuming that file encoding is 'utf-8').", 'group': '', 'line': 8}], 'AGS Format Rule 13': [{'desc': 'PROJ group not found.', @@ -185,7 +193,8 @@ "encoding as it is the most widely used encoding compatible with Unicode. The user " "can override this default if the file encoding is different but, it is highly " "recommended that the 'utf-8' encoding be used when creating AGS4 files. (Hint: " - "If not 'utf-8', then the encoding is most likely to be 'windows-1252' aka 'cp1252')"}]}, + "If not 'utf-8', then the encoding is most likely to be 'windows-1252' " + "aka 'cp1252')"}]}, 'valid': False, 'additional_metadata': {} }, @@ -212,8 +221,9 @@ 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), 'message': '95 error(s) found in file!', - 'errors': {'AGS Format Rule 1': [{'desc': "Has Non-ASCII character(s) (assuming that file encoding is 'utf-8') and/or " - "a byte-order-mark (BOM).", + 'errors': {'AGS Format Rule 1': [{'desc': "Has Non-ASCII character(s) " + "(assuming that file encoding is 'utf-8') " + "and/or a byte-order-mark (BOM).", 'group': '', 'line': 1}], 'AGS Format Rule 2a': [{'desc': 'Is not terminated by and ' @@ -596,8 +606,9 @@ "If not 'utf-8', then the encoding is most likely to be 'windows-1252' aka 'cp1252')"}, {'line': '', 'group': '', - 'desc': 'This file seems to be encoded with a byte-order-mark (BOM). It is highly ' - 'recommended that the file be saved without BOM encoding to avoid issues with other software.'}]}, + 'desc': 'This file seems to be encoded with a byte-order-mark (BOM). ' + 'It is highly recommended that the file be saved without BOM encoding ' + 'to avoid issues with other software.'}]}, 'valid': False, 'additional_metadata': {} }, diff --git a/test/unit/test_checkers.py b/test/unit/test_checkers.py index 81e68760..548357e6 100644 --- a/test/unit/test_checkers.py +++ b/test/unit/test_checkers.py @@ -19,14 +19,16 @@ @pytest.mark.parametrize('filename, expected_rules', [ ('example_ags.ags', set()), - ('random_binary.ags', {'General', 'AGS Format Rule 1', 'AGS Format Rule 2a', 'AGS Format Rule 3', 'AGS Format Rule 5', - 'AGS Format Rule 13', 'AGS Format Rule 14', 'AGS Format Rule 15', 'AGS Format Rule 17'}), + ('random_binary.ags', {'General', 'AGS Format Rule 1', 'AGS Format Rule 2a', 'AGS Format Rule 3', + 'AGS Format Rule 5', 'AGS Format Rule 13', 'AGS Format Rule 14', 'AGS Format Rule 15', + 'AGS Format Rule 17'}), ('nonsense.AGS', {'AGS Format Rule 2a', 'AGS Format Rule 3', 'AGS Format Rule 5', 'AGS Format Rule 13', 'AGS Format Rule 14', 'AGS Format Rule 15', 'AGS Format Rule 17'}), ('empty.ags', {'AGS Format Rule 13', 'AGS Format Rule 14', 'AGS Format Rule 15', 'AGS Format Rule 17'}), ('real/A3040_03.ags', {'AGS Format Rule 3'}), ('real/43370.ags', {'General', 'AGS Format Rule 2a', 'AGS Format Rule 1'}), - ('real/JohnStPrimarySchool.ags', {'AGS Format Rule 3', 'AGS Format Rule 5', 'AGS Format Rule ?', 'AGS Format Rule 4', 'AGS Format Rule 2a'}), + ('real/JohnStPrimarySchool.ags', {'AGS Format Rule 3', 'AGS Format Rule 5', 'AGS Format Rule ?', + 'AGS Format Rule 4', 'AGS Format Rule 2a'}), ('real/19684.ags', {'AGS Format Rule 3'}), ('real/E52A4379 (2).ags', {'AGS Format Rule 3'}), ]) From bcfdc74d025c2ccbc0035158736f248020da2983 Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Wed, 6 Mar 2024 10:33:05 +0000 Subject: [PATCH 17/20] Remove nexus URL from requirements --- requirements.txt | 1 - requirements_dev.txt | 1 - 2 files changed, 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 388e5b6c..2a2f35cd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,6 @@ # # pip-compile --output-file=requirements.txt requirements.in # ---index-url https://nexus-internal.bgs.ac.uk/repository/pypi-all/simple aiofiles==23.2.1 # via -r requirements.in diff --git a/requirements_dev.txt b/requirements_dev.txt index e65ffe3e..8f1abe34 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -4,7 +4,6 @@ # # pip-compile --output-file=requirements_dev.txt requirements_dev.in # ---index-url https://nexus-internal.bgs.ac.uk/repository/pypi-all/simple anyio==4.3.0 # via httpx From 86de9800b2c1b66dfd2f61f0aca0b8ad2d0ed185 Mon Sep 17 00:00:00 2001 From: KoalaGeo Date: Wed, 6 Mar 2024 11:36:39 +0000 Subject: [PATCH 18/20] Update gui version numbers and sorting strategy input --- app/static/js/htmlView.js | 3 --- app/templates/_base.html | 2 +- app/templates/landing_page.html | 16 ++++++++++------ 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/app/static/js/htmlView.js b/app/static/js/htmlView.js index ef94d480..b8d344d0 100644 --- a/app/static/js/htmlView.js +++ b/app/static/js/htmlView.js @@ -207,9 +207,6 @@ agsHtml.displayFileResult=function(fileResult){ if(fileResult.additional_metadata.bgs_loca_rows){ xhtml=xhtml + "
  • " + fileResult.additional_metadata.bgs_loca_rows + "
  • "; }; - if(fileResult.additional_metadata.bgs_loca_rows){ - xhtml=xhtml + "
  • " + fileResult.additional_metadata.bgs_loca_rows + "
  • "; - }; xhtml=xhtml + ""; }; diff --git a/app/templates/_base.html b/app/templates/_base.html index 1704d162..2d5d47e3 100644 --- a/app/templates/_base.html +++ b/app/templates/_base.html @@ -409,7 +409,7 @@

    - Powered by pyagsapi 4.6. pyagsapi was created by and is maintained by the British Geological Survey and is distributed under the LGPL v3.0 licence, code is available on GitHub. pyagsapi uses the Official AGS Python Library. + Powered by pyagsapi 5.0. pyagsapi was created by and is maintained by the British Geological Survey and is distributed under the LGPL v3.0 licence, code is available on GitHub. pyagsapi uses the Official AGS Python Library v0.5.0.
    diff --git a/app/templates/landing_page.html b/app/templates/landing_page.html index 731e77e7..3b013c7d 100644 --- a/app/templates/landing_page.html +++ b/app/templates/landing_page.html @@ -30,7 +30,7 @@

    AGS Data Submission

    AGS4 Schema & Data Validation


    AGS4 Validation

    -

    Performs validation using the Official AGS Python Library version 0.4.1, this implements checks of the rules as written in the AGS data format standard v4.x.

    +

    Performs validation using the Official AGS Python Library version 0.5.0, this implements checks of the rules as written in the AGS data format standard v4.x.

    If you're using AGS Data Format Standard v3.x use our legacy AGS Validator


    BGS Data Validation

    @@ -165,11 +165,15 @@

    AGS Converter


    - Sort worksheets in .xlsx file in alphabetical order (Warning: .ags to .xlsx only. The original group order will be lost) - - - -
    + Sort worksheets in .xlsx file using sorting strategy(Warning: .ags to .xlsx only. The original group order will be lost) + + + + + + + +

    From 6ce297d1849c97130f30aa11780b03479e999b1a Mon Sep 17 00:00:00 2001 From: KoalaGeo Date: Wed, 6 Mar 2024 13:59:29 +0000 Subject: [PATCH 19/20] rename sort value maintain order to default --- app/templates/landing_page.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/templates/landing_page.html b/app/templates/landing_page.html index 3b013c7d..8d9098bd 100644 --- a/app/templates/landing_page.html +++ b/app/templates/landing_page.html @@ -172,7 +172,7 @@

    AGS Converter

    - +

    From de0a089804364ccb1db3004fdeeab904c30d6ad1 Mon Sep 17 00:00:00 2001 From: Colin Blackburn Date: Wed, 6 Mar 2024 14:20:06 +0000 Subject: [PATCH 20/20] Update sort tables input from GUI --- app/routes.py | 11 +++++------ test/integration/test_api.py | 4 ++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/app/routes.py b/app/routes.py index cca008c3..7c813fee 100644 --- a/app/routes.py +++ b/app/routes.py @@ -110,7 +110,7 @@ class ResponseType(StrEnum): ) sort_tables_form = Form( - default=False, + default='default', title='Sort worksheets', description=('Sort the worksheets into alphabetical order ' 'or leave in the order found in the AGS file. ' @@ -240,7 +240,7 @@ def prepare_validation_response(request, data): " sort worksheets in .xlsx file in alphabetical order.")) async def convert(background_tasks: BackgroundTasks, files: List[UploadFile] = conversion_file, - sort_tables: bool = sort_tables_form, + sort_tables: str = sort_tables_form, request: Request = None): """ Convert files between .ags and .xlsx format. Option to sort worksheets in .xlsx file in alphabetical order. @@ -258,12 +258,11 @@ async def convert(background_tasks: BackgroundTasks, :raises Exception: If the conversion fails or an unexpected error occurs. """ + if sort_tables == 'default': + sort_tables = None if not files[0].filename: raise InvalidPayloadError(request) RESULTS = 'results' - sorting_strategy = None - if sort_tables: - sorting_strategy = 'alphabetical' tmp_dir = Path(tempfile.mkdtemp()) results_dir = tmp_dir / RESULTS results_dir.mkdir() @@ -274,7 +273,7 @@ async def convert(background_tasks: BackgroundTasks, contents = await file.read() local_file = tmp_dir / file.filename local_file.write_bytes(contents) - converted, result = conversion.convert(local_file, results_dir, sorting_strategy=sorting_strategy) + converted, result = conversion.convert(local_file, results_dir, sorting_strategy=sort_tables) log = validation.to_plain_text(result) f.write(log) f.write('\n' + '=' * 80 + '\n') diff --git a/test/integration/test_api.py b/test/integration/test_api.py index 589e0d01..6111d50f 100644 --- a/test/integration/test_api.py +++ b/test/integration/test_api.py @@ -225,7 +225,7 @@ async def test_convert_good_files(async_client, tmp_path): @pytest.mark.asyncio -@pytest.mark.parametrize('sort_tables', [True, False, None]) +@pytest.mark.parametrize('sort_tables', ['alphabetical', 'default']) async def test_convert_sort_tables(async_client, tmp_path, sort_tables): # Arrange fields = [] @@ -257,7 +257,7 @@ async def test_convert_sort_tables(async_client, tmp_path, sort_tables): assert (zipfile.Path(ags_zip) / 'example_ags.xlsx').is_file() with ags_zip.open('example_ags.xlsx') as xl_file: xl = pd.ExcelFile(xl_file) - if sort_tables: + if sort_tables == 'alphabetical': assert xl.sheet_names == sorted(groups) else: assert xl.sheet_names == groups