From 0309eac973dd109b16992ac08ef5f07ab138986f Mon Sep 17 00:00:00 2001 From: Jacob Wasserman Date: Mon, 11 Mar 2024 11:51:13 -0400 Subject: [PATCH] Introduce bounding box column definition (#191) --- examples/example.parquet | Bin 27814 -> 29838 bytes examples/example_metadata.json | 20 ++++++++ format-specs/geoparquet.md | 34 +++++++++++++ format-specs/schema.json | 44 +++++++++++++++++ scripts/generate_example.py | 22 +++++++-- scripts/test_json_schema.py | 86 ++++++++++++++++++++++++++++++++- 6 files changed, 200 insertions(+), 6 deletions(-) diff --git a/examples/example.parquet b/examples/example.parquet index 481e66b38952aac5ee9e2737bc454da3fc1c2794..7c653bb259012832e65eea60506a4c1984117239 100644 GIT binary patch delta 2541 zcmcIkeP~-%6u)`xV#wA}bT1^MD_zhSShqB7TAOJh_a#mG($*$f>`PnrN0Pq2ye=PI z+9b`SM8_y)6Lkc{i7*fmks?Y(6cmG?PLR<*MrF>as2eKV94Z6BuXFE9(%BaT|LCQ; z{hf2q*YBK@ymDFd^pBc1J{~Wz0X8(k>REIzLRN${nw2MJTwK*2cEH*EuqKhnJ&>Q~H7)~;lJ69OX$`$5+WGTNn&NgIL zx!~N*>V*N)9}Bv2F^FspggIaUZWwN}mS%WoPmlhb!G zyYMw*`Qj_WOdYRr7B#c`4}G-%bYw?YMVirQrf+?KcHLgy`Rc>2^Y;{_n-{~br(Wwh zcWoo+QqQbrVbs+^keq6*n%V2$tC$u4J+l|SV=SxR5@zanjkDC4h3nh+VqZD)_Pcy{ z<#Or3r|ridtVBM%Q~CLChpS8b>*w-@$7Q^+oI3MUcLQp53tcS+NvhUvp*D^zCx2)_ zZF6X+7|X5ejODLugqk{D<19eU(DGR?GHU1J2{9FAcb6}X%LBcPk?uRIL#+M*5Xj!w zp`5Nl(%Cu$twC>UGp(LI6q7!>)9~&y87}?;zeg6G z;iXH=9)wJt#wJP37;m(!wNA;}QfG%bp2>?@IW3tFcRX$;zhV?DrsOn$DMbTjRB;qQ zaqA0JkQcHATYNyF_-uJ0t6@I6)V>4<8F%4OXiPY)?3nQ#lZ_k)_LrZF4m7#(PXKbxG_N=bdHP5^Kv$3AZWIUD3O&0k+erR5vNcr=kTfw5wJ{OY#d+U1Q z zpFJVE{nJzs2v#M6;9duxv^!@a@ClOIv4>of-Q#(T<3#MopwHn!jQIkv1Ck)~L=F&3 z0x%v8JY=&K00g_B2^bHtP>(tc%t=+xmnmGTJaz?;z$n_hKesCz@;Yz z=fY$pR1|4F98dNy6o8AioTpkpb=WM}O()@-J{I?av)7r7_mD@4445agbt@U*R)!&! zpE=f1A_;xT6EV9pF$&SMBuihtO0QC7!?xxo1bdRkd^Ai743Z0EjtfyOb-HVgwBU(u zXKvaD-wvU-i&&Mw0CG?oLCgh7l|E7}T}$PWM#Dj6PCy0DCT-fmAubQBtBi@iL{Y}n#w`FiA_nJs9~&uS?6$zJZHs2$kK0H zvAVV*w`?VUMwjEj9DoB_7czbs`Yk%8U@%a#yw`SA-~vka`mwF(XPEC$zU?h`Ci_%x zsP%GzcY;hRk}K#En9x_bG)~=DN#y{>> import json, pprint, pyarrow.parquet as pq >>> pprint.pprint(json.loads(pq.read_schema("example.parquet").metadata[b"geo"])) """ +from collections import OrderedDict import json import pathlib @@ -19,6 +20,14 @@ df = geopandas.read_file(geopandas.datasets.get_path("naturalearth_lowres")) df = df.to_crs("ogc:84") + +geometry_bbox = df.bounds.rename( + OrderedDict( + [("minx", "xmin"), ("miny", "ymin"), ("maxx", "xmax"), ("maxy", "ymax")] + ), + axis=1, +) +df["bbox"] = geometry_bbox.to_dict("records") table = pa.Table.from_pandas(df.head().to_wkb()) @@ -39,14 +48,19 @@ def get_version() -> str: "crs": json.loads(df.crs.to_json()), "edges": "planar", "bbox": [round(x, 4) for x in df.total_bounds], + "covering": { + "bbox": { + "xmin": ["bbox", "xmin"], + "ymin": ["bbox", "ymin"], + "xmax": ["bbox", "xmax"], + "ymax": ["bbox", "ymax"], + }, + }, }, }, } -schema = ( - table.schema - .with_metadata({"geo": json.dumps(metadata)}) -) +schema = table.schema.with_metadata({"geo": json.dumps(metadata)}) table = table.cast(schema) pq.write_table(table, HERE / "../examples/example.parquet") diff --git a/scripts/test_json_schema.py b/scripts/test_json_schema.py index cda7fb1..f0217c1 100644 --- a/scripts/test_json_schema.py +++ b/scripts/test_json_schema.py @@ -40,8 +40,8 @@ def get_version() -> str: "columns": { "geometry": { "encoding": "WKB", - "geometry_types": [], - }, + "geometry_types": [] + } }, } @@ -210,6 +210,88 @@ def get_version() -> str: metadata["columns"]["geometry"]["epoch"] = "2015.1" invalid_cases["epoch_string"] = metadata +# Geometry Bbox +metadata_covering_template = copy.deepcopy(metadata_template) +metadata_covering_template["columns"]["geometry"]["covering"] = { + "bbox": { + "xmin": ["bbox", "xmin"], + "ymin": ["bbox", "ymin"], + "xmax": ["bbox", "xmax"], + "ymax": ["bbox", "ymax"], + }, +} + + +# Allow "any_column.xmin" etc. +metadata = copy.deepcopy(metadata_covering_template) +valid_cases["valid_default_bbox"] = metadata + +metadata = copy.deepcopy(metadata_covering_template) +metadata["columns"]["geometry"]["covering"]["bbox"] = { + "xmin": ["any_column", "xmin"], + "ymin": ["any_column", "ymin"], + "xmax": ["any_column", "xmax"], + "ymax": ["any_column", "ymax"], +} +valid_cases["valid_but_not_bbox_struct_name"] = metadata + +metadata = copy.deepcopy(metadata_covering_template) +metadata["columns"]["geometry"]["covering"].pop("bbox") +invalid_cases["empty_geometry_bbox"] = metadata + +metadata = copy.deepcopy(metadata_covering_template) +metadata["columns"]["geometry"]["covering"]["bbox"] = {} +invalid_cases["empty_geometry_bbox_missing_fields"] = metadata + +metadata = copy.deepcopy(metadata_covering_template) +metadata["columns"]["geometry"]["covering"]["bbox"].pop("xmin") +invalid_cases["covering_bbox_missing_xmin"] = metadata + +metadata = copy.deepcopy(metadata_covering_template) +metadata["columns"]["geometry"]["covering"]["bbox"].pop("ymin") +invalid_cases["covering_bbox_missing_ymin"] = metadata + +metadata = copy.deepcopy(metadata_covering_template) +metadata["columns"]["geometry"]["covering"]["bbox"].pop("xmax") +invalid_cases["covering_bbox_missing_xmax"] = metadata + +metadata = copy.deepcopy(metadata_covering_template) +metadata["columns"]["geometry"]["covering"]["bbox"].pop("ymax") +invalid_cases["covering_bbox_missing_ymax"] = metadata + +# Invalid bbox xmin/xmax/ymin/ymax values +metadata = copy.deepcopy(metadata_covering_template) +metadata["columns"]["geometry"]["covering"]["bbox"]["xmin"] = ["bbox", "not_xmin"] +invalid_cases["covering_bbox_invalid_xmin"] = metadata + +metadata = copy.deepcopy(metadata_covering_template) +metadata["columns"]["geometry"]["covering"]["bbox"]["xmax"] = ["bbox", "not_xmax"] +invalid_cases["covering_bbox_invalid_xmax"] = metadata + +metadata = copy.deepcopy(metadata_covering_template) +metadata["columns"]["geometry"]["covering"]["bbox"]["ymin"] = ["bbox", "not_ymin"] +invalid_cases["covering_bbox_invalid_ymin"] = metadata + +metadata = copy.deepcopy(metadata_covering_template) +metadata["columns"]["geometry"]["covering"]["bbox"]["ymax"] = ["bbox", "not_ymax"] +invalid_cases["covering_bbox_invalid_ymax"] = metadata + +metadata = copy.deepcopy(metadata_covering_template) +metadata["columns"]["geometry"]["covering"]["bbox"]["xmin"] = ["bbox", "xmin", "invalid_extra"] +invalid_cases["covering_bbox_extra_xmin_elements"] = metadata + +metadata = copy.deepcopy(metadata_covering_template) +metadata["columns"]["geometry"]["covering"]["bbox"]["xmax"] = ["bbox", "xmax", "invalid_extra"] +invalid_cases["covering_bbox_extra_xmax_elements"] = metadata + +metadata = copy.deepcopy(metadata_covering_template) +metadata["columns"]["geometry"]["covering"]["bbox"]["ymin"] = ["bbox", "ymin", "invalid_extra"] +invalid_cases["covering_bbox_extra_ymin_elements"] = metadata + +metadata = copy.deepcopy(metadata_covering_template) +metadata["columns"]["geometry"]["covering"]["bbox"]["ymax"] = ["bbox", "ymax", "invalid_extra"] +invalid_cases["covering_bbox_extra_ymax_elements"] = metadata + # # Tests