diff --git a/.github/workflows/lint_and_test.yml b/.github/workflows/lint_and_test.yml index f26a4b6..ed8d822 100644 --- a/.github/workflows/lint_and_test.yml +++ b/.github/workflows/lint_and_test.yml @@ -23,6 +23,12 @@ jobs: python-version: 3.11 architecture: x64 + - name: Ensure Nexus mirror configuration is not committed + run: | + # Grep checks for Nexus mirror in the index-url parameter, + # the ! negates the grep exit code, which would be 0 if URL was present + ! grep -E '.*index-url.*nexus.*' requirements*.txt + - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/Dockerfile b/Dockerfile index b65585c..af1ce78 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM tiangolo/uvicorn-gunicorn-fastapi:python3.11-slim-2023-02-20 +FROM tiangolo/uvicorn-gunicorn-fastapi:python3.11-slim-2024-03-04 ## Install python-ags4 COPY requirements.txt . diff --git a/README.md b/README.md index 6ed5afe..8195e1c 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,20 @@ pytest -vs test The test environment is configured so that adding `--pdb` to the test command will start an IPython debugger session in the event of test failure. +### Updating dependencies + +We are using [pip-tools](https://pip-tools.readthedocs.io/en/stable/) to create a pinned list of all dependencies from the ones that we need to specify. + +To refresh the dependency list, update `requirements.in` and `requirements-dev.in` then run the following: + +```bash +pip-compile -o requirements.txt requirements.in +pip-compile -o requirements_dev.txt requirements_dev.in +``` + +The updated requirements files must be edited to remove reference to the Nexus mirror before they can then be committed. + + ### GUI Customisation To ammend the GUI HTML we recommend running via `Docker` using your own `Dockerfile` like the below to `COPY` in your own templates. @@ -108,7 +122,7 @@ COPY content/templates /app/app/templates Containers for the application are hosted in the GitHub Container Registry -Every push to `Main` branch commits builds `pyagsapi:latest`. +Every push to `main` branch commits builds `pyagsapi:latest`. Push Tagged Releases with `^v?[0-9]+[.][0-9]+([.][0-9])?` (v* == v2.0) builds `pyagsapi:2.0` (the "v" gets dropped for the tag). @@ -116,7 +130,7 @@ You can also push release candidates using the format `/^v?[0-9]+[.][0-9]+([.][0 ## Example Files -Files in [https://github.com/BritishGeologicalSurvey/pyagsapi/tree/main/test/files/real](https://github.com/BritishGeologicalSurvey/pyagsapi/tree/main/test/files/real) are a random collection of real AGS files which have been submitted to the BGS and are available under OGL, we have included them here as example files for testing pyagsapi. +Files in [https://github.com/BritishGeologicalSurvey/pyagsapi/tree/main/test/files/real](https://github.com/BritishGeologicalSurvey/pyagsapi/tree/main/test/files/real) are a collection of real AGS files which have been submitted to the BGS and are available under OGL, we have included them here as example files for testing pyagsapi. ## Licence diff --git a/app/checkers.py b/app/checkers.py index e01417e..9eff809 100644 --- a/app/checkers.py +++ b/app/checkers.py @@ -47,6 +47,9 @@ def check_ags(filename: Path, standard_AGS4_dictionary: Optional[str] = None) -> errors = {'File read error': [{'line': '-', 'group': '', 'desc': description}]} dictionary = '' + # Discard unecessary summary from errors dictionary + errors.pop('Summary of data', None) + return dict(checker=f'python_ags4 v{python_ags4.__version__}', errors=errors, dictionary=dictionary) diff --git a/app/conversion.py b/app/conversion.py index 0888d8b..ef66612 100644 --- a/app/conversion.py +++ b/app/conversion.py @@ -10,7 +10,7 @@ logger = logging.getLogger(__name__) -def convert(filename: Path, results_dir: Path, sort_tables=False) -> Tuple[Optional[Path], dict]: +def convert(filename: Path, results_dir: Path, sorting_strategy: Optional[str] = None) -> Tuple[Optional[Path], dict]: """ Convert filename between .ags and .xlsx. Write output to file in results_dir and return path alongside job status data in dictionary.""" @@ -28,7 +28,7 @@ def convert(filename: Path, results_dir: Path, sort_tables=False) -> Tuple[Optio success = False if filename.suffix.lower() == '.ags': try: - AGS4.AGS4_to_excel(filename, converted_file, sort_tables=sort_tables) + AGS4.AGS4_to_excel(filename, converted_file, sorting_strategy=sorting_strategy) success = True except IndexError: error_message = "ERROR: File does not have AGS4 format layout" diff --git a/app/routes.py b/app/routes.py index 354170a..7c813fe 100644 --- a/app/routes.py +++ b/app/routes.py @@ -110,7 +110,7 @@ class ResponseType(StrEnum): ) sort_tables_form = Form( - default=False, + default='default', title='Sort worksheets', description=('Sort the worksheets into alphabetical order ' 'or leave in the order found in the AGS file. ' @@ -240,7 +240,7 @@ def prepare_validation_response(request, data): " sort worksheets in .xlsx file in alphabetical order.")) async def convert(background_tasks: BackgroundTasks, files: List[UploadFile] = conversion_file, - sort_tables: bool = sort_tables_form, + sort_tables: str = sort_tables_form, request: Request = None): """ Convert files between .ags and .xlsx format. Option to sort worksheets in .xlsx file in alphabetical order. @@ -258,6 +258,8 @@ async def convert(background_tasks: BackgroundTasks, :raises Exception: If the conversion fails or an unexpected error occurs. """ + if sort_tables == 'default': + sort_tables = None if not files[0].filename: raise InvalidPayloadError(request) RESULTS = 'results' @@ -271,7 +273,7 @@ async def convert(background_tasks: BackgroundTasks, contents = await file.read() local_file = tmp_dir / file.filename local_file.write_bytes(contents) - converted, result = conversion.convert(local_file, results_dir, sort_tables=sort_tables) + converted, result = conversion.convert(local_file, results_dir, sorting_strategy=sort_tables) log = validation.to_plain_text(result) f.write(log) f.write('\n' + '=' * 80 + '\n') diff --git a/app/schemas.py b/app/schemas.py index 0f8c3ef..b12af8d 100644 --- a/app/schemas.py +++ b/app/schemas.py @@ -15,6 +15,8 @@ 'AGS Format Rule 13', 'AGS Format Rule 14', 'AGS Format Rule 15', 'AGS Format Rule 16', 'AGS Format Rule 17', 'AGS Format Rule 18', 'AGS Format Rule 19', 'AGS Format Rule 19a', 'AGS Format Rule 19b', 'AGS Format Rule 20', 'General', + # Warnings and FYIs + 'Warning (Related to Rule 16)', 'FYI (Related to Rule 1)', # Errors 'File read error' ] diff --git a/app/static/js/htmlView.js b/app/static/js/htmlView.js index ef94d48..b8d344d 100644 --- a/app/static/js/htmlView.js +++ b/app/static/js/htmlView.js @@ -207,9 +207,6 @@ agsHtml.displayFileResult=function(fileResult){ if(fileResult.additional_metadata.bgs_loca_rows){ xhtml=xhtml + "
  • " + fileResult.additional_metadata.bgs_loca_rows + "
  • "; }; - if(fileResult.additional_metadata.bgs_loca_rows){ - xhtml=xhtml + "
  • " + fileResult.additional_metadata.bgs_loca_rows + "
  • "; - }; xhtml=xhtml + ""; }; diff --git a/app/templates/_base.html b/app/templates/_base.html index 1704d16..2d5d47e 100644 --- a/app/templates/_base.html +++ b/app/templates/_base.html @@ -409,7 +409,7 @@

    - Powered by pyagsapi 4.6. pyagsapi was created by and is maintained by the British Geological Survey and is distributed under the LGPL v3.0 licence, code is available on GitHub. pyagsapi uses the Official AGS Python Library. + Powered by pyagsapi 5.0. pyagsapi was created by and is maintained by the British Geological Survey and is distributed under the LGPL v3.0 licence, code is available on GitHub. pyagsapi uses the Official AGS Python Library v0.5.0.
    diff --git a/app/templates/landing_page.html b/app/templates/landing_page.html index 731e77e..8d9098b 100644 --- a/app/templates/landing_page.html +++ b/app/templates/landing_page.html @@ -30,7 +30,7 @@

    AGS Data Submission

    AGS4 Schema & Data Validation


    AGS4 Validation

    -

    Performs validation using the Official AGS Python Library version 0.4.1, this implements checks of the rules as written in the AGS data format standard v4.x.

    +

    Performs validation using the Official AGS Python Library version 0.5.0, this implements checks of the rules as written in the AGS data format standard v4.x.

    If you're using AGS Data Format Standard v3.x use our legacy AGS Validator


    BGS Data Validation

    @@ -165,11 +165,15 @@

    AGS Converter


    - Sort worksheets in .xlsx file in alphabetical order (Warning: .ags to .xlsx only. The original group order will be lost) - - - -
    + Sort worksheets in .xlsx file using sorting strategy(Warning: .ags to .xlsx only. The original group order will be lost) + + + + + + + +

    diff --git a/requirements.in b/requirements.in new file mode 100644 index 0000000..28d70c2 --- /dev/null +++ b/requirements.in @@ -0,0 +1,18 @@ +Fiona +Jinja2 +Shapely +aiofiles +colorlog +geopandas +numpy +pyproj +python-ags4==0.5.0 +requests +shortuuid +# These libraries are already in FastAPI container but need updated +fastapi==0.88.0 +h11==0.14.0 +pydantic==1.10.14 +python-multipart==0.0.9 +starlette==0.22.0 +uvicorn==0.20.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 6b00ee9..2a2f35c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,123 @@ -aiofiles==23.1.0 -numpy==1.24.2 -python-ags4==0.4.1 -python-multipart==0.0.5 -colorlog==6.7.0 -shortuuid==1.0.11 -Jinja2==3.1.2 -Fiona==1.9.1 -Shapely==2.0.1 -pyproj==3.4.1 -geopandas==0.12.2 -requests==2.28.2 -# These libraries are already in FastAPI container but need updated -fastapi==0.92.0 -uvicorn==0.20.0 +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --output-file=requirements.txt requirements.in +# + +aiofiles==23.2.1 + # via -r requirements.in +anyio==4.3.0 + # via starlette +attrs==23.2.0 + # via fiona +certifi==2024.2.2 + # via + # fiona + # pyproj + # requests +charset-normalizer==3.3.2 + # via requests +click==8.1.7 + # via + # click-plugins + # cligj + # fiona + # python-ags4 + # uvicorn +click-plugins==1.1.1 + # via fiona +cligj==0.7.2 + # via fiona +colorlog==6.8.2 + # via -r requirements.in +defusedxml==0.7.1 + # via python-ags4 +et-xmlfile==1.1.0 + # via openpyxl +fastapi==0.88.0 + # via -r requirements.in +fiona==1.9.5 + # via + # -r requirements.in + # geopandas +geopandas==0.14.3 + # via -r requirements.in h11==0.14.0 + # via + # -r requirements.in + # uvicorn +idna==3.6 + # via + # anyio + # requests +jinja2==3.1.3 + # via -r requirements.in +markdown-it-py==3.0.0 + # via rich +markupsafe==2.1.5 + # via jinja2 +mdurl==0.1.2 + # via markdown-it-py +numpy==1.26.4 + # via + # -r requirements.in + # pandas + # shapely +openpyxl==3.1.2 + # via python-ags4 +packaging==23.2 + # via geopandas +pandas==2.2.1 + # via + # geopandas + # python-ags4 +pydantic==1.10.14 + # via + # -r requirements.in + # fastapi +pygments==2.17.2 + # via rich +pyproj==3.6.1 + # via + # -r requirements.in + # geopandas +python-ags4==0.5.0 + # via -r requirements.in +python-dateutil==2.9.0.post0 + # via pandas +python-multipart==0.0.9 + # via -r requirements.in +pytz==2024.1 + # via pandas +requests==2.31.0 + # via -r requirements.in +rich==13.7.1 + # via python-ags4 +shapely==2.0.3 + # via + # -r requirements.in + # geopandas +shortuuid==1.0.12 + # via -r requirements.in +six==1.16.0 + # via + # fiona + # python-dateutil +sniffio==1.3.1 + # via anyio +starlette==0.22.0 + # via + # -r requirements.in + # fastapi +typing-extensions==4.10.0 + # via pydantic +tzdata==2024.1 + # via pandas +urllib3==2.2.1 + # via requests +uvicorn==0.20.0 + # via -r requirements.in + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/requirements_dev.in b/requirements_dev.in new file mode 100644 index 0000000..ea87f77 --- /dev/null +++ b/requirements_dev.in @@ -0,0 +1,9 @@ +ipython +ipdb +pytest==8.0.2 # 8.1.0 contains breaking changes +flake8 +requests +httpx +pytest-asyncio +requests-toolbelt +freezegun diff --git a/requirements_dev.txt b/requirements_dev.txt index d27f767..8f1abe3 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,9 +1,105 @@ -ipython==8.10.0 -ipdb==0.13.11 -pytest==7.2.1 -flake8==6.0.0 -requests==2.28.2 -httpx==0.23.3 -pytest-asyncio==0.20.3 -requests-toolbelt==0.10.1 -freezegun==1.2.2 +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --output-file=requirements_dev.txt requirements_dev.in +# + +anyio==4.3.0 + # via httpx +asttokens==2.4.1 + # via stack-data +certifi==2024.2.2 + # via + # httpcore + # httpx + # requests +charset-normalizer==3.3.2 + # via requests +decorator==5.1.1 + # via + # ipdb + # ipython +executing==2.0.1 + # via stack-data +flake8==7.0.0 + # via -r requirements_dev.in +freezegun==1.4.0 + # via -r requirements_dev.in +h11==0.14.0 + # via httpcore +httpcore==1.0.4 + # via httpx +httpx==0.27.0 + # via -r requirements_dev.in +idna==3.6 + # via + # anyio + # httpx + # requests +iniconfig==2.0.0 + # via pytest +ipdb==0.13.13 + # via -r requirements_dev.in +ipython==8.22.2 + # via + # -r requirements_dev.in + # ipdb +jedi==0.19.1 + # via ipython +matplotlib-inline==0.1.6 + # via ipython +mccabe==0.7.0 + # via flake8 +packaging==23.2 + # via pytest +parso==0.8.3 + # via jedi +pexpect==4.9.0 + # via ipython +pluggy==1.4.0 + # via pytest +prompt-toolkit==3.0.43 + # via ipython +ptyprocess==0.7.0 + # via pexpect +pure-eval==0.2.2 + # via stack-data +pycodestyle==2.11.1 + # via flake8 +pyflakes==3.2.0 + # via flake8 +pygments==2.17.2 + # via ipython +pytest==8.0.2 + # via + # -r requirements_dev.in + # pytest-asyncio +pytest-asyncio==0.23.5 + # via -r requirements_dev.in +python-dateutil==2.9.0.post0 + # via freezegun +requests==2.31.0 + # via + # -r requirements_dev.in + # requests-toolbelt +requests-toolbelt==1.0.0 + # via -r requirements_dev.in +six==1.16.0 + # via + # asttokens + # python-dateutil +sniffio==1.3.1 + # via + # anyio + # httpx +stack-data==0.6.3 + # via ipython +traitlets==5.14.1 + # via + # ipython + # matplotlib-inline +urllib3==2.2.1 + # via requests +wcwidth==0.2.13 + # via prompt-toolkit diff --git a/test/files/example_ags.ags b/test/files/example_ags.ags index d69f669..acdf65a 100644 --- a/test/files/example_ags.ags +++ b/test/files/example_ags.ags @@ -8,8 +8,8 @@ "HEADING","ABBR_HDNG","ABBR_CODE","ABBR_DESC","ABBR_LIST","ABBR_REM","FILE_FSET" "UNIT","","","","","","" "TYPE","X","X","X","X","X","X" -"DATA","DICT_TYPE","GROUP","Group","","","" -"DATA","DICT_TYPE","HEADING","Heading","","","" +"DATA","DICT_TYPE","GROUP","Flag to indicate definition is a GROUP","","","" +"DATA","DICT_TYPE","HEADING","Flag to indicate definition is a HEADING","","","" "DATA","DICT_STAT","OTHER","Other Field","","","" "DATA","DICT_STAT","KEY","Key Field","","","" "DATA","SAMP_TYPE","U","Undisturbed sample - open drive","","","" diff --git a/test/fixtures_json.py b/test/fixtures_json.py index 3925cb1..42878ca 100644 --- a/test/fixtures_json.py +++ b/test/fixtures_json.py @@ -3,8 +3,8 @@ JSON_RESPONSES = { 'example_ags.ags': { 'filename': 'example_ags.ags', - 'filesize': 4039, - 'checkers': ['python_ags4 v0.4.1'], + 'filesize': 4105, + 'checkers': ['python_ags4 v0.5.0'], 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), 'message': 'All checks passed!', @@ -15,7 +15,7 @@ 'nonsense.AGS': { 'filename': 'nonsense.AGS', 'filesize': 9, - 'checkers': ['python_ags4 v0.4.1'], + 'checkers': ['python_ags4 v0.5.0'], 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), 'message': '7 error(s) found in file!', @@ -29,55 +29,63 @@ 'AGS Format Rule 5': [{'line': 1, 'group': '', 'desc': 'Contains fields that are not enclosed in double quotes.'}], - 'AGS Format Rule 13': [{'line': '-', 'group': 'PROJ', 'desc': 'PROJ table not found.'}], - 'AGS Format Rule 14': [{'line': '-', 'group': 'TRAN', 'desc': 'TRAN table not found.'}], - 'AGS Format Rule 15': [{'line': '-', 'group': 'UNIT', 'desc': 'UNIT table not found.'}], - 'AGS Format Rule 17': [{'line': '-', 'group': 'TYPE', 'desc': 'TYPE table not found.'}]}, + 'AGS Format Rule 13': [{'line': '-', 'group': 'PROJ', 'desc': 'PROJ group not found.'}], + 'AGS Format Rule 14': [{'line': '-', 'group': 'TRAN', 'desc': 'TRAN group not found.'}], + 'AGS Format Rule 15': [{'line': '-', 'group': 'UNIT', 'desc': 'UNIT group not found.'}], + 'AGS Format Rule 17': [{'line': '-', 'group': 'TYPE', 'desc': 'TYPE group not found.'}]}, 'valid': False, 'additional_metadata': {} }, 'random_binary.ags': { 'filename': 'random_binary.ags', 'filesize': 1024, - 'checkers': ['python_ags4 v0.4.1'], + 'checkers': ['python_ags4 v0.5.0'], 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), - 'message': '36 error(s) found in file!', - 'errors': {'AGS Format Rule 1': [{'desc': 'Has Non-ASCII character(s) and/or ' - 'a byte-order-mark (BOM).', + 'message': '37 error(s) found in file!', + 'errors': {'AGS Format Rule 1': [{'desc': "Has Non-ASCII character(s) " + "(assuming that file encoding is 'utf-8') " + "and/or a byte-order-mark (BOM).", 'group': '', 'line': 1}, - {'desc': 'Has Non-ASCII character(s).', + {'desc': "Has Non-ASCII character(s) " + "(assuming that file encoding is 'utf-8').", 'group': '', 'line': 2}, - {'desc': 'Has Non-ASCII character(s).', + {'desc': "Has Non-ASCII character(s) " + "(assuming that file encoding is 'utf-8').", 'group': '', 'line': 3}, - {'desc': 'Has Non-ASCII character(s).', + {'desc': "Has Non-ASCII character(s) " + "(assuming that file encoding is 'utf-8').", 'group': '', 'line': 4}, - {'desc': 'Has Non-ASCII character(s).', + {'desc': "Has Non-ASCII character(s) " + "(assuming that file encoding is 'utf-8').", 'group': '', 'line': 5}, - {'desc': 'Has Non-ASCII character(s).', + {'desc': "Has Non-ASCII character(s) " + "(assuming that file encoding is 'utf-8').", 'group': '', 'line': 6}, - {'desc': 'Has Non-ASCII character(s).', + {'desc': "Has Non-ASCII character(s) " + "(assuming that file encoding is 'utf-8').", 'group': '', 'line': 7}, - {'desc': 'Has Non-ASCII character(s).', + {'desc': "Has Non-ASCII character(s) " + "(assuming that file encoding is 'utf-8').", 'group': '', 'line': 8}], - 'AGS Format Rule 13': [{'desc': 'PROJ table not found.', + 'AGS Format Rule 13': [{'desc': 'PROJ group not found.', 'group': 'PROJ', 'line': '-'}], - 'AGS Format Rule 14': [{'desc': 'TRAN table not found.', + 'AGS Format Rule 14': [{'desc': 'TRAN group not found.', 'group': 'TRAN', 'line': '-'}], - 'AGS Format Rule 15': [{'desc': 'UNIT table not found.', + 'AGS Format Rule 15': [{'desc': 'UNIT group not found.', 'group': 'UNIT', 'line': '-'}], - 'AGS Format Rule 17': [{'desc': 'TYPE table not found.', + 'AGS Format Rule 17': [{'desc': 'TYPE group not found.', 'group': 'TYPE', 'line': '-'}], 'AGS Format Rule 2a': [{'desc': 'Is not terminated by and ' @@ -175,14 +183,25 @@ {'desc': 'Contains fields that are not ' 'enclosed in double quotes.', 'group': '', - 'line': 8}]}, + 'line': 8}], + 'General': [{'line': '', + 'group': '', + 'desc': "AGS4 Rule 1 is interpreted as allowing both standard ASCII characters " + "(Unicode code points 0-127) and extended ASCII characters (Unicode code points " + "160-255). Please beware that extended ASCII characters differ based on the " + "encoding used when the file was created. The validator defaults to 'utf-8' " + "encoding as it is the most widely used encoding compatible with Unicode. The user " + "can override this default if the file encoding is different but, it is highly " + "recommended that the 'utf-8' encoding be used when creating AGS4 files. (Hint: " + "If not 'utf-8', then the encoding is most likely to be 'windows-1252' " + "aka 'cp1252')"}]}, 'valid': False, 'additional_metadata': {} }, 'real/CG014058_F.ags': { 'filename': 'CG014058_F.ags', 'filesize': 50574, - 'checkers': ['python_ags4 v0.4.1'], + 'checkers': ['python_ags4 v0.5.0'], 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), 'message': '1 error(s) found in file!', @@ -198,12 +217,13 @@ 'real/Blackburn Southern Bypass.ags': { 'filename': 'Blackburn Southern Bypass.ags', 'filesize': 6566, - 'checkers': ['python_ags4 v0.4.1'], + 'checkers': ['python_ags4 v0.5.0'], 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), 'message': '95 error(s) found in file!', - 'errors': {'AGS Format Rule 1': [{'desc': 'Has Non-ASCII character(s) and/or ' - 'a byte-order-mark (BOM).', + 'errors': {'AGS Format Rule 1': [{'desc': "Has Non-ASCII character(s) " + "(assuming that file encoding is 'utf-8') " + "and/or a byte-order-mark (BOM).", 'group': '', 'line': 1}], 'AGS Format Rule 2a': [{'desc': 'Is not terminated by and ' @@ -574,25 +594,28 @@ 'enclosed in double quotes.', 'group': '', 'line': 1}], - 'AGS Format Rule 7': [{'desc': 'Headings not in order starting ' - 'from LOCA_CHKG. Expected order: ' - '...LOCA_ORID|LOCA_ORJO|LOCA_ORCO|LOCA_CHKG|LOCA_APPG|LOCA_PDEP', - 'group': 'LOCA', - 'line': 86}], - 'General': [{'desc': 'This file seems to be encoded with a ' - 'byte-order-mark (BOM). It is highly ' - 'recommended that the file be saved without ' - 'BOM encoding to avoid issues with other ' - 'sofware.', + 'General': [{'line': '', + 'group': '', + 'desc': "AGS4 Rule 1 is interpreted as allowing both standard ASCII characters " + "(Unicode code points 0-127) and extended ASCII characters (Unicode code points " + "160-255). Please beware that extended ASCII characters differ based on the " + "encoding used when the file was created. The validator defaults to 'utf-8' " + "encoding as it is the most widely used encoding compatible with Unicode. The user " + "can override this default if the file encoding is different but, it is highly " + "recommended that the 'utf-8' encoding be used when creating AGS4 files. (Hint: " + "If not 'utf-8', then the encoding is most likely to be 'windows-1252' aka 'cp1252')"}, + {'line': '', 'group': '', - 'line': ''}]}, + 'desc': 'This file seems to be encoded with a byte-order-mark (BOM). ' + 'It is highly recommended that the file be saved without BOM encoding ' + 'to avoid issues with other software.'}]}, 'valid': False, 'additional_metadata': {} }, 'real/A3040_03.ags': { 'filename': 'A3040_03.ags', 'filesize': 264526, - 'checkers': ['python_ags4 v0.4.1'], + 'checkers': ['python_ags4 v0.5.0'], 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), 'message': '1 error(s) found in file!', @@ -625,7 +648,7 @@ { 'filename': 'nonsense.AGS', 'filesize': 9, - 'checkers': ['python_ags4 v0.4.1'], + 'checkers': ['python_ags4 v0.5.0'], 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), 'message': '7 error(s) found in file!', @@ -640,7 +663,7 @@ { 'filename': 'nonsense.AGS', 'filesize': 9, - 'checkers': ['python_ags4 v0.4.1'], + 'checkers': ['python_ags4 v0.5.0'], 'dictionary': 'Standard_dictionary_v4_1_1.ags', 'time': dt.datetime(2021, 8, 23, 14, 25, 43, tzinfo=dt.timezone.utc), 'message': '7 error(s) found in file!', diff --git a/test/fixtures_plain_text.py b/test/fixtures_plain_text.py index 3f24e0f..becc188 100644 --- a/test/fixtures_plain_text.py +++ b/test/fixtures_plain_text.py @@ -6,8 +6,8 @@ # Metadata -File size: 4039 bytes -Checkers: ['python_ags4 v0.4.1'] +File size: 4105 bytes +Checkers: ['python_ags4 v0.5.0'] Dictionary: Standard_dictionary_v4_1_1.ags Time: 2021-08-23 14:25:43+00:00 @@ -21,7 +21,7 @@ # Metadata File size: 9 bytes -Checkers: ['python_ags4 v0.4.1'] +Checkers: ['python_ags4 v0.5.0'] Dictionary: Standard_dictionary_v4_1_1.ags Time: 2021-08-23 14:25:43+00:00 @@ -42,30 +42,30 @@ ## AGS Format Rule 13 -Group: PROJ - PROJ table not found. +Group: PROJ - PROJ group not found. ## AGS Format Rule 14 -Group: TRAN - TRAN table not found. +Group: TRAN - TRAN group not found. ## AGS Format Rule 15 -Group: UNIT - UNIT table not found. +Group: UNIT - UNIT group not found. ## AGS Format Rule 17 -Group: TYPE - TYPE table not found. +Group: TYPE - TYPE group not found. ================================================================================ """, 'random_binary.ags': """ ================================================================================ -random_binary.ags: 36 error(s) found in file! +random_binary.ags: 37 error(s) found in file! # Metadata File size: 1024 bytes -Checkers: ['python_ags4 v0.4.1'] +Checkers: ['python_ags4 v0.5.0'] Dictionary: Standard_dictionary_v4_1_1.ags Time: 2021-08-23 14:25:43+00:00 @@ -74,30 +74,30 @@ ## AGS Format Rule 1 -Line: 1 - Has Non-ASCII character(s) and/or a byte-order-mark (BOM). -Line: 2 - Has Non-ASCII character(s). -Line: 3 - Has Non-ASCII character(s). -Line: 4 - Has Non-ASCII character(s). -Line: 5 - Has Non-ASCII character(s). -Line: 6 - Has Non-ASCII character(s). -Line: 7 - Has Non-ASCII character(s). -Line: 8 - Has Non-ASCII character(s). +Line: 1 - Has Non-ASCII character(s) (assuming that file encoding is 'utf-8') and/or a byte-order-mark (BOM). +Line: 2 - Has Non-ASCII character(s) (assuming that file encoding is 'utf-8'). +Line: 3 - Has Non-ASCII character(s) (assuming that file encoding is 'utf-8'). +Line: 4 - Has Non-ASCII character(s) (assuming that file encoding is 'utf-8'). +Line: 5 - Has Non-ASCII character(s) (assuming that file encoding is 'utf-8'). +Line: 6 - Has Non-ASCII character(s) (assuming that file encoding is 'utf-8'). +Line: 7 - Has Non-ASCII character(s) (assuming that file encoding is 'utf-8'). +Line: 8 - Has Non-ASCII character(s) (assuming that file encoding is 'utf-8'). ## AGS Format Rule 13 -Group: PROJ - PROJ table not found. +Group: PROJ - PROJ group not found. ## AGS Format Rule 14 -Group: TRAN - TRAN table not found. +Group: TRAN - TRAN group not found. ## AGS Format Rule 15 -Group: UNIT - UNIT table not found. +Group: UNIT - UNIT group not found. ## AGS Format Rule 17 -Group: TYPE - TYPE table not found. +Group: TYPE - TYPE group not found. ## AGS Format Rule 2a @@ -132,6 +132,10 @@ Line: 7 - Contains fields that are not enclosed in double quotes. Line: 8 - Contains fields that are not enclosed in double quotes. +## General + +Line: - AGS4 Rule 1 is interpreted as allowing both standard ASCII characters (Unicode code points 0-127) and extended ASCII characters (Unicode code points 160-255). Please beware that extended ASCII characters differ based on the encoding used when the file was created. The validator defaults to 'utf-8' encoding as it is the most widely used encoding compatible with Unicode. The user can override this default if the file encoding is different but, it is highly recommended that the 'utf-8' encoding be used when creating AGS4 files. (Hint: If not 'utf-8', then the encoding is most likely to be 'windows-1252' aka 'cp1252') + ================================================================================ """, 'real/Blackburn Southern Bypass.ags': """ @@ -141,7 +145,7 @@ # Metadata File size: 6566 bytes -Checkers: ['python_ags4 v0.4.1'] +Checkers: ['python_ags4 v0.5.0'] Dictionary: Standard_dictionary_v4_1_1.ags Time: 2021-08-23 14:25:43+00:00 @@ -150,7 +154,7 @@ ## AGS Format Rule 1 -Line: 1 - Has Non-ASCII character(s) and/or a byte-order-mark (BOM). +Line: 1 - Has Non-ASCII character(s) (assuming that file encoding is 'utf-8') and/or a byte-order-mark (BOM). ## AGS Format Rule 2a @@ -253,13 +257,10 @@ Line: 1 - Contains fields that are not enclosed in double quotes. -## AGS Format Rule 7 - -Line: 86 - Headings not in order starting from LOCA_CHKG. Expected order: ...LOCA_ORID|LOCA_ORJO|LOCA_ORCO|LOCA_CHKG|LOCA_APPG|LOCA_PDEP - ## General -Line: - This file seems to be encoded with a byte-order-mark (BOM). It is highly recommended that the file be saved without BOM encoding to avoid issues with other sofware. +Line: - AGS4 Rule 1 is interpreted as allowing both standard ASCII characters (Unicode code points 0-127) and extended ASCII characters (Unicode code points 160-255). Please beware that extended ASCII characters differ based on the encoding used when the file was created. The validator defaults to 'utf-8' encoding as it is the most widely used encoding compatible with Unicode. The user can override this default if the file encoding is different but, it is highly recommended that the 'utf-8' encoding be used when creating AGS4 files. (Hint: If not 'utf-8', then the encoding is most likely to be 'windows-1252' aka 'cp1252') +Line: - This file seems to be encoded with a byte-order-mark (BOM). It is highly recommended that the file be saved without BOM encoding to avoid issues with other software. ================================================================================ """ diff --git a/test/integration/test_api.py b/test/integration/test_api.py index 589e0d0..6111d50 100644 --- a/test/integration/test_api.py +++ b/test/integration/test_api.py @@ -225,7 +225,7 @@ async def test_convert_good_files(async_client, tmp_path): @pytest.mark.asyncio -@pytest.mark.parametrize('sort_tables', [True, False, None]) +@pytest.mark.parametrize('sort_tables', ['alphabetical', 'default']) async def test_convert_sort_tables(async_client, tmp_path, sort_tables): # Arrange fields = [] @@ -257,7 +257,7 @@ async def test_convert_sort_tables(async_client, tmp_path, sort_tables): assert (zipfile.Path(ags_zip) / 'example_ags.xlsx').is_file() with ags_zip.open('example_ags.xlsx') as xl_file: xl = pd.ExcelFile(xl_file) - if sort_tables: + if sort_tables == 'alphabetical': assert xl.sheet_names == sorted(groups) else: assert xl.sheet_names == groups diff --git a/test/unit/test_checkers.py b/test/unit/test_checkers.py index 48b3a65..548357e 100644 --- a/test/unit/test_checkers.py +++ b/test/unit/test_checkers.py @@ -19,14 +19,16 @@ @pytest.mark.parametrize('filename, expected_rules', [ ('example_ags.ags', set()), - ('random_binary.ags', {'AGS Format Rule 1', 'AGS Format Rule 2a', 'AGS Format Rule 3', 'AGS Format Rule 5', - 'AGS Format Rule 13', 'AGS Format Rule 14', 'AGS Format Rule 15', 'AGS Format Rule 17'}), + ('random_binary.ags', {'General', 'AGS Format Rule 1', 'AGS Format Rule 2a', 'AGS Format Rule 3', + 'AGS Format Rule 5', 'AGS Format Rule 13', 'AGS Format Rule 14', 'AGS Format Rule 15', + 'AGS Format Rule 17'}), ('nonsense.AGS', {'AGS Format Rule 2a', 'AGS Format Rule 3', 'AGS Format Rule 5', 'AGS Format Rule 13', 'AGS Format Rule 14', 'AGS Format Rule 15', 'AGS Format Rule 17'}), ('empty.ags', {'AGS Format Rule 13', 'AGS Format Rule 14', 'AGS Format Rule 15', 'AGS Format Rule 17'}), ('real/A3040_03.ags', {'AGS Format Rule 3'}), - ('real/43370.ags', {'AGS Format Rule 2a', 'AGS Format Rule 1'}), - ('real/JohnStPrimarySchool.ags', {'File read error'}), + ('real/43370.ags', {'General', 'AGS Format Rule 2a', 'AGS Format Rule 1'}), + ('real/JohnStPrimarySchool.ags', {'AGS Format Rule 3', 'AGS Format Rule 5', 'AGS Format Rule ?', + 'AGS Format Rule 4', 'AGS Format Rule 2a'}), ('real/19684.ags', {'AGS Format Rule 3'}), ('real/E52A4379 (2).ags', {'AGS Format Rule 3'}), ]) diff --git a/test/unit/test_conversion.py b/test/unit/test_conversion.py index b780ca8..619c5d0 100644 --- a/test/unit/test_conversion.py +++ b/test/unit/test_conversion.py @@ -30,8 +30,8 @@ def test_convert(tmp_path, filename, expected): assert re.search(expected_message, response['message']) -@pytest.mark.parametrize('sort_tables', [True, False, None]) -def test_convert_sort_tables(tmp_path, sort_tables): +@pytest.mark.parametrize('sorting_strategy', ['alphabetical', None]) +def test_convert_sort_tables(tmp_path, sorting_strategy): # Arrange filename = Path(__file__).parent.parent / 'files' / 'example_ags.ags' tables, headings = AGS4.AGS4_to_dataframe(filename) @@ -41,8 +41,8 @@ def test_convert_sort_tables(tmp_path, sort_tables): results_dir.mkdir() # Act - if sort_tables is not None: - converted_file, response = convert(filename, results_dir, sort_tables=sort_tables) + if sorting_strategy is not None: + converted_file, response = convert(filename, results_dir, sorting_strategy=sorting_strategy) else: converted_file, response = convert(filename, results_dir) @@ -51,7 +51,7 @@ def test_convert_sort_tables(tmp_path, sort_tables): assert response['filename'] == filename.name xl = pd.ExcelFile(converted_file) - if sort_tables: + if sorting_strategy == 'alphabetical': assert xl.sheet_names == sorted(groups) else: assert xl.sheet_names == groups