Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add GeoJSON select & update github actions #139

Closed
wants to merge 25 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
1e05446
Extract load_tables_reporting_errors method
volcan01010 Mar 6, 2024
dd24ca1
Extract create_location_gpd function
volcan01010 Mar 6, 2024
91d2e89
Add extract_geojson function
volcan01010 Mar 6, 2024
da4f468
Refine output columns
volcan01010 Mar 6, 2024
fb1bb86
Add tests for exception cases
volcan01010 Mar 6, 2024
04899d1
Add concatenate_feature_collection function
volcan01010 Mar 6, 2024
7cfe4f4
Flake8 fixes
volcan01010 Mar 6, 2024
eed8922
Add geojson response option
KoalaGeo Mar 7, 2024
d58a8cc
Update GitHub Actions Versions
KoalaGeo Mar 7, 2024
9e6ba29
Python version 3.11 in actions
KoalaGeo Mar 7, 2024
4c7a57e
Add default selection for sorting strategy
KoalaGeo Mar 7, 2024
66bc41e
Update test file paths
ximenesuk Mar 7, 2024
fb4b411
Format Text
KoalaGeo Mar 7, 2024
8f84a7d
Remove nexus mirror configuration
ximenesuk Mar 7, 2024
4ba8e73
Merge pull request #137 from BritishGeologicalSurvey/32-geojson-extract
ximenesuk Mar 7, 2024
a0004e4
Add two missing rules
ximenesuk Mar 6, 2024
033d7d8
Add test case for rules 3, 4 & 5
ximenesuk Mar 7, 2024
09a6ca3
Exception no longer thrown
ximenesuk Mar 7, 2024
f2c4ef3
Merge pull request #138 from BritishGeologicalSurvey/missing-rules
KoalaGeo Mar 7, 2024
91b016b
Add geojson response option
KoalaGeo Mar 7, 2024
eddb960
Update GitHub Actions Versions
KoalaGeo Mar 7, 2024
27a070a
Python version 3.11 in actions
KoalaGeo Mar 7, 2024
ce6eab8
Add default selection for sorting strategy
KoalaGeo Mar 7, 2024
4e062d9
Format Text
KoalaGeo Mar 7, 2024
b12d63f
Merge branch 'add-geojson-gui' of https://github.com/BritishGeologica…
KoalaGeo Mar 7, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/lint_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ jobs:

steps:
- name: Checkout source repository
uses: actions/checkout@v3.3.0
uses: actions/checkout@v4

- name: Setup Python
uses: actions/setup-python@v4.5.0
uses: actions/setup-python@v5
with:
python-version: 3.11
architecture: x64
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
contents: read

steps:
- uses: actions/checkout@v3.3.0
- uses: actions/checkout@v4

- name: Build image
run: docker build . --file Dockerfile --tag $IMAGE_NAME --label "runnumber=${GITHUB_RUN_ID}"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ jobs:
run: |
git config user.name github-actions[bot]
git config user.email 41898282+github-actions[bot]@users.noreply.github.com
- uses: actions/setup-python@v4
- uses: actions/setup-python@v5
with:
python-version: 3.x
- run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
- uses: actions/cache@v3
- uses: actions/cache@v4
with:
key: mkdocs-material-${{ env.cache_id }}
path: .cache
Expand Down
17 changes: 11 additions & 6 deletions app/bgs_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,16 +184,11 @@ def check_loca_within_great_britain(tables: dict) -> List[dict]:

# Read data into geodataframe
try:
location = tables['LOCA'].set_index('LOCA_ID')
location['geometry'] = list(zip(location['LOCA_NATE'], location['LOCA_NATN']))
location = create_location_gpd(tables)
except KeyError:
# LOCA not present, already checked in earlier rule
return errors

location['geometry'] = location['geometry'].apply(Point)
location = gpd.GeoDataFrame(location, geometry='geometry', crs='EPSG:27700')
location['line_no'] = range(1, len(location) + 1)

inside_uk_eea_mask = location.intersects(uk_eea_outline)
inside_gb_mask = location.intersects(gb_outline)
as_irish_grid = location.to_crs("EPSG:29903")
Expand Down Expand Up @@ -225,6 +220,16 @@ def check_loca_within_great_britain(tables: dict) -> List[dict]:
return errors


def create_location_gpd(tables: dict[pd.DataFrame]) -> gpd.GeoDataFrame:
location: pd.DataFrame = tables['LOCA'].set_index('LOCA_ID')
location['geometry'] = list(zip(location['LOCA_NATE'], location['LOCA_NATN']))
location['geometry'] = location['geometry'].apply(Point)
location = gpd.GeoDataFrame(location, geometry='geometry', crs='EPSG:27700')
location['line_no'] = range(1, len(location) + 1)

return location


def check_locx_is_not_duplicate_of_other_column(tables: dict) -> List[dict]:
"""LOCA_LOCX and LOCA_LOCY are not duplicates of other columns"""

Expand Down
89 changes: 89 additions & 0 deletions app/borehole_map.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""
Functions used to generate a map of borehole locations by extracting a GeoJSON
representation of their metadata from the AGS files.
"""
from copy import copy
import json
from functools import reduce
import logging
from pathlib import Path

import pandas as pd
import geopandas as gpd

from app.checkers import load_tables_reporting_errors
from app.bgs_rules import create_location_gpd

logger = logging.getLogger(__name__)


def extract_geojson(filepath: Path) -> dict:
"""
Read an AGS4 file and extract geojson represenation of LOCA table and
metadata.
"""
logger.info("Extracting geojson from %s", filepath.name)

# Read data file
tables, load_error, _ = load_tables_reporting_errors(filepath)
if load_error:
raise ValueError(load_error)

# Convert to geodataframe
try:
location: gpd.GeoDataFrame = create_location_gpd(tables)
except KeyError:
msg = f"ERROR: LOCA group missing from {filepath}"
raise ValueError(msg)

# Add project columns and drop unwanted columns
try:
project: pd.DataFrame = tables['PROJ']
except KeyError:
msg = f"ERROR: PROJ group missing from {filepath}"
raise ValueError(msg)

for column in project.columns:
if column.startswith('PROJ_'):
# We assume that each file contains just one project
location[column] = project.loc[0, column]

try:
location['PROJ_FILE_FSET'] = project.loc[0, 'FILE_FSET']
location.rename(columns={'FILE_FSET': 'LOCA_FILE_FSET'}, inplace=True)
except KeyError:
logger.debug("No FILE_FSET for either/both PROJ and LOCA groups for %s",
filepath)
del location['HEADING']

# Create new ID from project and location IDs
location.reset_index(inplace=True)
location['ID'] = location['PROJ_ID'].str.cat(location['LOCA_ID'], sep='.')
location.set_index('ID', inplace=True)

# Reproject to WGS84
location = location.to_crs('EPSG:4326')

# Return dict representation of geojson
return json.loads(location.to_json())


def concantenate_feature_collections(feature_collections: list[dict]) -> dict:
"""
Concatenate feature collections, assuming collection metadata are all
the same.
"""

def join_two(first_collection: dict, next_collection: dict) -> dict:
"""
Join collections by extending the features list. Use copy because
lists and dictionaries are mutable and we don't want to change the
input values.
"""
new_features: list[dict] = copy(first_collection['features'])
new_features.extend(next_collection['features'])
new_collection = first_collection.copy()
new_collection['features'] = new_features
return new_collection

return reduce(join_two, feature_collections)
49 changes: 28 additions & 21 deletions app/checkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,6 @@ def check_ags(filename: Path, standard_AGS4_dictionary: Optional[str] = None) ->
description = f"UnicodeDecodeError: {err.reason}"
errors = {'File read error': [{'line': line_no, 'group': '', 'desc': description}]}
dictionary = ''
except AGS4.AGS4Error as err:
description = f"AGS4Error: {err}"
errors = {'File read error': [{'line': '-', 'group': '', 'desc': description}]}
dictionary = ''

# Discard unecessary summary from errors dictionary
errors.pop('Summary of data', None)
Expand All @@ -61,26 +57,15 @@ def check_bgs(filename: Path, **kwargs) -> dict:
"""
logger.info("Checking %s against BGS rules.", filename.name)
errors = {}
error_message = None
load_error = None
bgs_metadata = {}

try:
# Try to load and convert the file. Coordinate type errors replace
# empty dictionary from outer scope
tables, headers, errors = load_AGS4_as_numeric(filename)
except UnboundLocalError:
# This error is thrown in response to a bug in the upstream code,
# which in turn is only triggered if the AGS file has duplicate
# headers.
error_message = "ERROR: File contains duplicate headers"
except AGS4.AGS4Error as err:
error_message = str(err)
except IndexError:
error_message = "ERROR: File cannot be read, please use AGS checker to confirm format errors"
tables, load_error, ags4_errors = load_tables_reporting_errors(filename)

if error_message:
errors['File read error'] = [{'line': '-', 'group': '', 'desc': error_message}]
if load_error:
errors['File read error'] = [{'line': '-', 'group': '', 'desc': load_error}]
else:
errors.update(ags4_errors)
# Get additional metadata
bgs_metadata = generate_bgs_metadata(tables)

Expand All @@ -96,6 +81,28 @@ def check_bgs(filename: Path, **kwargs) -> dict:
additional_metadata=bgs_metadata)


def load_tables_reporting_errors(filename):
tables = None
ags4_errors = {}

try:
# Try to load and convert the file. Coordinate type errors replace
# empty dictionary from outer scope
tables, _, ags4_errors = load_ags4_as_numeric(filename)
load_error = None
except UnboundLocalError:
# This error is thrown in response to a bug in the upstream code,
# which in turn is only triggered if the AGS file has duplicate
# headers.
load_error = "ERROR: File contains duplicate headers"
except AGS4.AGS4Error as err:
load_error = str(err)
except IndexError:
load_error = "ERROR: File cannot be read, please use AGS checker to confirm format errors"

return tables, load_error, ags4_errors


def generate_bgs_metadata(tables: Dict[str, pd.DataFrame]) -> dict:
"""Generate additional metadata from groups."""
try:
Expand All @@ -119,7 +126,7 @@ def generate_bgs_metadata(tables: Dict[str, pd.DataFrame]) -> dict:
return bgs_metadata


def load_AGS4_as_numeric(filename: Path) -> Tuple[dict, dict, List[dict]]:
def load_ags4_as_numeric(filename: Path) -> Tuple[dict, dict, List[dict]]:
"""Read AGS4 file and convert to numeric data types."""
tables, headings = AGS4.AGS4_to_dataframe(filename)

Expand Down
4 changes: 2 additions & 2 deletions app/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
VALID_KEYS = [
# AGS schema rules
'AGS Format Rule 1', 'AGS Format Rule 2', 'AGS Format Rule 2a', 'AGS Format Rule 2b',
'AGS Format Rule 2c', 'AGS Format Rule 3', 'AGS Format Rule 4a', 'AGS Format Rule 4b',
'AGS Format Rule 2c', 'AGS Format Rule 3', 'AGS Format Rule 4', 'AGS Format Rule 4a', 'AGS Format Rule 4b',
'AGS Format Rule 5', 'AGS Format Rule 6', 'AGS Format Rule 7', 'AGS Format Rule 8',
'AGS Format Rule 9', 'AGS Format Rule 10a', 'AGS Format Rule 10b', 'AGS Format Rule 10c',
'AGS Format Rule 11a', 'AGS Format Rule 11b', 'AGS Format Rule 11c', 'AGS Format Rule 12',
'AGS Format Rule 13', 'AGS Format Rule 14', 'AGS Format Rule 15', 'AGS Format Rule 16',
'AGS Format Rule 17', 'AGS Format Rule 18', 'AGS Format Rule 19', 'AGS Format Rule 19a',
'AGS Format Rule 19b', 'AGS Format Rule 20', 'General',
'AGS Format Rule 19b', 'AGS Format Rule 20', 'AGS Format Rule ?', 'General',
# Warnings and FYIs
'Warning (Related to Rule 16)', 'FYI (Related to Rule 1)',
# Errors
Expand Down
26 changes: 17 additions & 9 deletions app/templates/landing_page.html
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,20 @@ <h4>Future data validation rules: (Coming Soon)</h4>
<br>
<fieldset>
<legend>Select response format:</legend>
<input type="radio" id="text" name="fmt" value="text">
<label for="text">Plain Text</label>
<input type="radio" id="json" name="fmt" value="json">
<label for="json">JSON</label><br>
<label for="json">JSON</label>
<input type="radio" id="text" name="fmt" value="text">
<label for="text">Plain Text</label><br>
</fieldset>
<br>
<fieldset>
<legend>If HTML show LOCA features on a map / If JSON include GeoJSON</legend>
<input type="radio" id="return_geometry" name="return_geometry" value="true" checked="checked">
<label for="true">Yes</label>
<input type="radio" id="return_geometry" name="return_geometry" value="false">
<label for="false">No</label><br>
</fieldset>
<br>
<fieldset>
<legend>Select .ags / .AGS file(s) for validation (v4.x only) <b>(50 Mb Maximum)</b></legend>
<input name="files" type="file" multiple>
Expand Down Expand Up @@ -165,15 +173,15 @@ <h2>AGS Converter</h2>
<br>
<form action="/convert/" enctype="multipart/form-data" method="post" id="convertForm">
<fieldset>
<legend>Sort worksheets in .xlsx file using sorting strategy<strong>(Warning: .ags to .xlsx only. The original group order will be lost)</strong></legend>
<legend>Sort worksheets in .xlsx file using a sorting strategy <strong>(Warning: .ags to .xlsx only. The original group order will be lost)</strong></legend>
<input type="radio" id="default" name="sort_tables" value="default" checked="checked">
<label for="default">None (Maintain Input File Order)</label><br>
<input type="radio" id="dictionary" name="sort_tables" value="dictionary">
<label for="dictionary">Dictionary</label>
<label for="dictionary">File Dictionary</label><br>
<input type="radio" id="alphabetical" name="sort_tables" value="alphabetical">
<label for="alphabetical">Alphabetical</label>
<label for="alphabetical">Alphabetical</label><br>
<input type="radio" id="hierarchical" name="sort_tables" value="hierarchical">
<label for="hierarchical">Hierarchical</label>
<input type="radio" id="default" name="sort_tables" value="default" checked>
<label for="default">None (Maintain Input File Order)</label><br>
<label for="hierarchical">AGS Standard Hierarchy</label><br>
</fieldset>
<br>
<fieldset>
Expand Down
1 change: 1 addition & 0 deletions requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ aiofiles
colorlog
geopandas
numpy
geojson-pydantic
pyproj
python-ags4==0.5.0
requests
Expand Down
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ fiona==1.9.5
# via
# -r requirements.in
# geopandas
geojson-pydantic==0.6.3
# via -r requirements.in
geopandas==0.14.3
# via -r requirements.in
h11==0.14.0
Expand Down Expand Up @@ -76,6 +78,7 @@ pydantic==1.10.14
# via
# -r requirements.in
# fastapi
# geojson-pydantic
pygments==2.17.2
# via rich
pyproj==3.6.1
Expand Down
68 changes: 68 additions & 0 deletions test/files/example_broken_ags.ags
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"GROUP","PROJ"
"HEADING","PROJ_ID","PROJ_NAME","PROJ_LOC","PROJ_CLNT","PROJ_CONT","PROJ_ENG","PROJ_MEMO","FILE_FSET"
"UNIT","","","","","","","",""
"TYPE","X","X","X","X","X","X","X","X"
"DATA","121415","ACME Gas Works Redevelopment","Anytown","ACME Enterprises","ACME Drilling Ltd","","",""

"GROUP","ABBR"
"HEADING","ABBR_HDNG","ABBR_CODE","ABBR_DESC","ABBR_LIST","ABBR_REM","FILE_FSET"
"UNIT","","","","","",""
"TYPE","X","X","X","X","X","X"
"DATA","DICT_TYPE","GROUP","Flag to indicate definition is a GROUP","","",""
"DATA","DICT_TYPE","HEADING","Flag to indicate definition is a HEADING","","",""
"DATA","DICT_STAT","OTHER","Other Field","","",""
"DATA","DICT_STAT","KEY","Key Field","","",""
"DATA","SAMP_TYPE","U","Undisturbed sample - open drive","","",""

"GROUP","TRAN"
"HEADING","TRAN_ISNO","TRAN_DATE","TRAN_PROD","TRAN_STAT","TRAN_DESC","TRAN_AGS","TRAN_RECV","TRAN_DLIM","TRAN_RCON","TRAN_REM","FILE_FSET"
"UNIT","","yyyy-mm-dd","","","","","","","","",""
"TYPE","X","DT","X","X","X","X","X","X","X","X","X"
"DATA","1","2021-01-18","ACME Drilling Ltd","Preliminary","Draft Logs only","4.1","ACME Consulting","|","+","",""

"GROUP","TYPE"
"HEADING","TYPE_TYPE","TYPE_DESC","FILE_FSET"
"UNIT","","",""
"TYPE","X","X","X"
"DATA","U","Undefined",""
"DATA","X","Text",""
"DATA","ID","Unique identifier",""
"DATA","PA","ABBR pick list",""
"DATA","2DP","Value; required
number of decimal places, 2",""
"DATA","DT","Date Time (ISO 8601:2004)",""
"DATA","0DP","Value; required
number of decimal places, 0",""
"DATA","1DP","Value; required
number of decimal places, 1",""
"DATA","PT","TYPE pick list",""
"DATA","PU","UNIT pick list",""
"DATA","DMS","Degrees:Minutes:Seconds",""
"DATA","T","Elapsed time",""
"DATA","RL","Record link",""

"GROUP","UNIT"
"HEADING","UNIT_UNIT","UNIT_DESC","UNIT_REM","FILE_FSET"
"UNIT","","","",""
"TYPE","X","X","X","X"
"DATA","-","No unit","",""
"DATA","m","metre","",""
"DATA","yyyy-mm-dd","Date (ISO8601)","",""
"DATA","yyyy-mm-ddThh:mm:ss.sss","Date Time (ISO8601)","",""
"DATA","mm","millimetre","",""
"DATA","%","percentage","",""
"DATA","l/min","litres per minute","",""
"DATA","hh:mm:ss","hh:mm:ss","",""

"GROUP","LOCA"
"HEADING","LOCA_ID","LOCA_TYPE","LOCA_STAT","LOCA_NATE","LOCA_NATN","LOCA_GREF","LOCA_GL","LOCA_REM","LOCA_FDEP","LOCA_STAR","LOCA_PURP","LOCA_TERM","LOCA_ENDD","LOCA_LETT","LOCA_LOCX","LOCA_LOCY","LOCA_LOCZ","LOCA_LREF","LOCA_DATM","LOCA_ETRV","LOCA_NTRV","LOCA_LTRV","LOCA_XTRL","LOCA_YTRL","LOCA_ZTRL","LOCA_LAT","LOCA_LON","LOCA_ELAT","LOCA_ELON","LOCA_LLZ","LOCA_LOCM","LOCA_LOCA","LOCA_CLST","LOCA_ALID","LOCA_OFFS","LOCA_CNGE","LOCA_TRAN","FILE_FSET"
"UNIT","","","","m","m","","m","","m","yyyy-mm-dd","","","yyyy-mm-dd","","m","m","m","","","m","m","m","m","m","m","","","","","","","","","","","","",""
"TYPE","ID","PA","PA","2DP","2DP","PA","2DP","X","2DP","DT","X","X","DT","X","2DP","2DP","2DP","X","X","2DP","2DP","2DP","2DP","2DP","2DP","DMS","DMS","DMS","DMS","X","X","X","X","X","2DP","X","X","X"
"DATA","327-16A","","","523145.00","178456.12","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","",""

"GROUP","SAMP"
"HEADING","LOCA_ID","SAMP_TOP","SAMP_REF","SAMP_TYPE","SAMP_ID","SAMP_BASE","SAMP_DTIM","SAMP_UBLO","SAMP_CONT","SAMP_PREP","SAMP_SDIA","SAMP_WDEP","SAMP_RECV","SAMP_TECH","SAMP_MATX","SAMP_TYPC","SAMP_WHO","SAMP_WHY","SAMP_REM","SAMP_DESC","SAMP_DESD","SAMP_LOG","SAMP_COND","SAMP_CLSS","SAMP_BAR","SAMP_TEMP","SAMP_PRES","SAMP_FLOW","SAMP_ETIM","SAMP_DURN","SAMP_CAPT","SAMP_LINK","GEOL_STAT","FILE_FSET"
"UNIT","","m","","","","m","yyyy-mm-ddThh:mm:ss.sss","","","","mm","m","%","","","","","","","","yyyy-mm-dd","","","","","","","l/min","yyyy-mm-ddThh:mm:ss.sss","hh:mm:ss","","","",""
"TYPE","ID","2DP","X","PA","ID","2DP","DT","0DP","X","X","0DP","X","0DP","X","X","X","X","X","X","X","DT","X","X","X","1DP","0DP","1DP","1DP","DT","T","X","X","X","X"
"DATA","327-16A","24.55","24","U","ABC121415010","25.00","","","","","","","","","","","","","","","","","","","","","","","","","","","",""

Loading