Skip to content

Commit

Permalink
fix: changed geometry orientation for hashing algorithm (#75)
Browse files Browse the repository at this point in the history
* fix: changed geometry orientation for hashing

* chore: added changelog entry

* chore: change test values

* chore: simplify duplication reduction algorithm

* chore: change resource usage charts
  • Loading branch information
RaczeQ authored Apr 5, 2024
1 parent 176c088 commit 21e28bb
Show file tree
Hide file tree
Showing 8 changed files with 96 additions and 56 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Fixed

- Made geometry orientation agnostic hash algorithm

## [0.5.2] - 2024-04-03

### Added
Expand Down
Binary file modified docs/assets/images/estonia_disk_spillage.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/assets/images/monaco_disk_spillage.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/assets/images/poland_disk_spillage.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 7 additions & 1 deletion quackosm/pbf_file_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,9 @@ def get_features_gdf_from_geometry(
def _drop_duplicated_features_in_pyarrow_table(
self, parsed_geoparquet_files: list[Path]
) -> pa.Table:
if len(parsed_geoparquet_files) == 1:
return pq.read_table(parsed_geoparquet_files[0])

with TaskProgressSpinner("Combining results", "", self.silent_mode, skip_step_number=True):
parquet_tables = [
pq.read_table(parsed_parquet_file)
Expand Down Expand Up @@ -796,7 +799,10 @@ def _get_oriented_geometry_filter(
if isinstance(geometry, LinearRing):
# https://stackoverflow.com/a/73073112/7766101
new_coords = []
perimeter = list(geometry.coords)
if geometry.is_ccw:
perimeter = list(geometry.coords)
else:
perimeter = list(geometry.coords)[::-1]
smallest_point = sorted(perimeter)[0]
double_iteration = itertools.chain(perimeter[:-1], perimeter)
for point in double_iteration:
Expand Down
30 changes: 30 additions & 0 deletions tests/base/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Common components for tests."""
from pathlib import Path

from shapely import to_geojson, to_wkt
from shapely.geometry import Polygon, box


def geometry_box() -> Polygon:
"""Geometry box."""
return box(
minx=7.416486207767861,
miny=43.7310867041912,
maxx=7.421931388477276,
maxy=43.73370705597216,
)


def geometry_wkt() -> str:
"""Geometry box in WKT form."""
return str(to_wkt(geometry_box()))


def geometry_geojson() -> str:
"""Geometry box in GeoJSON form."""
return str(to_geojson(geometry_box()))


def geometry_boundary_file_path() -> str:
"""Geometry Monaco boundary file path."""
return str(Path(__file__).parent.parent / "test_files" / "monaco_boundary.geojson")
82 changes: 28 additions & 54 deletions tests/base/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,10 @@

import pytest
from parametrization import Parametrization as P
from shapely import to_geojson, to_wkt
from shapely.geometry import Polygon, box
from typer.testing import CliRunner

from quackosm import __app_name__, __version__, cli
from tests.base.conftest import geometry_boundary_file_path, geometry_geojson, geometry_wkt

runner = CliRunner()

Expand All @@ -20,31 +19,6 @@ def monaco_pbf_file_path() -> str:
return str(Path(__file__).parent.parent / "test_files" / "monaco.osm.pbf")


def geometry_box() -> Polygon:
"""Geometry box."""
return box(
minx=7.416486207767861,
miny=43.7310867041912,
maxx=7.421931388477276,
maxy=43.73370705597216,
)


def geometry_wkt() -> str:
"""Geometry box in WKT form."""
return str(to_wkt(geometry_box()))


def geometry_geojson() -> str:
"""Geometry box in GeoJSON form."""
return str(to_geojson(geometry_box()))


def geometry_boundary_file_path() -> str:
"""Geometry Monaco boundary file path."""
return str(Path(__file__).parent.parent / "test_files" / "monaco_boundary.geojson")


def osm_tags_filter_file_path() -> str:
"""OSM tags filter file path."""
return str(Path(__file__).parent.parent / "test_files" / "osm_tags_filter.json")
Expand Down Expand Up @@ -182,7 +156,7 @@ def test_silent_mode(monaco_pbf_file_path: str) -> None:
@P.case(
"Geometry file filter",
["--geom-filter-file", geometry_boundary_file_path()],
"files/monaco_nofilter_6e3ec5872bf41c2c44698fcf71266971c552d13feea19c3714e171bcd7a2b2c8_compact.geoparquet",
"files/monaco_nofilter_6a869bcfa1a49ade8b76569e48e4142bce29098815bf37e57155a18204f2bbbc_compact.geoparquet",
) # type: ignore
@P.case(
"Geometry geocode filter",
Expand All @@ -197,7 +171,7 @@ def test_silent_mode(monaco_pbf_file_path: str) -> None:
@P.case(
"Geometry Geohash filter multiple",
["--geom-filter-index-geohash", "spv2bc,spv2bfr"],
"files/monaco_nofilter_72883922e5d6e81ab15e5bc2104bd7318d28ffe897a1858f64ba77bafdf62d66_compact.geoparquet",
"files/monaco_nofilter_1bd33e0afc3cd0efcb4740185b8a05ecaf1bac916d571403768939b82844b43e_compact.geoparquet",
) # type: ignore
@P.case(
"Geometry H3 filter",
Expand All @@ -207,7 +181,7 @@ def test_silent_mode(monaco_pbf_file_path: str) -> None:
@P.case(
"Geometry H3 filter multiple",
["--geom-filter-index-h3", "8a3969a40ac7fff,893969a4037ffff"],
"files/monaco_nofilter_9671a25e17150e6171275ed3dc3a96099386644dbbb93d6f4222360e840b7799_compact.geoparquet",
"files/monaco_nofilter_e50e6489d4faba664a3c7f9729b7a3bedafb7a396ae826521f32b556d0b554f1_compact.geoparquet",
) # type: ignore
@P.case(
"Geometry S2 filter",
Expand All @@ -217,7 +191,7 @@ def test_silent_mode(monaco_pbf_file_path: str) -> None:
@P.case(
"Geometry S2 filter multiple",
["--geom-filter-index-s2", "12cdc28bc,12cdc28f"],
"files/monaco_nofilter_35e189b968b4f8d8c27c25b0dab484a6623745d1c85dcb77d19599ed0f7ba802_compact.geoparquet",
"files/monaco_nofilter_cda5d65e169e3ff04970e66e80b021937a5ae141ed72428cc0d9a3764bf076db_compact.geoparquet",
) # type: ignore
@P.case(
"Filter OSM features IDs",
Expand Down Expand Up @@ -302,7 +276,7 @@ def test_proper_args_with_pbf(
@P.case(
"Geometry Geohash filter multiple",
["--geom-filter-index-geohash", "spv2bc,spv2bfr"],
"files/72883922e5d6e81ab15e5bc2104bd7318d28ffe897a1858f64ba77bafdf62d66_nofilter_compact.geoparquet",
"files/1bd33e0afc3cd0efcb4740185b8a05ecaf1bac916d571403768939b82844b43e_nofilter_compact.geoparquet",
) # type: ignore
@P.case(
"Geometry H3 filter",
Expand All @@ -312,7 +286,7 @@ def test_proper_args_with_pbf(
@P.case(
"Geometry H3 filter multiple",
["--geom-filter-index-h3", "8a3969a40ac7fff,893969a4037ffff"],
"files/9671a25e17150e6171275ed3dc3a96099386644dbbb93d6f4222360e840b7799_nofilter_compact.geoparquet",
"files/e50e6489d4faba664a3c7f9729b7a3bedafb7a396ae826521f32b556d0b554f1_nofilter_compact.geoparquet",
) # type: ignore
@P.case(
"Geometry S2 filter",
Expand All @@ -322,47 +296,47 @@ def test_proper_args_with_pbf(
@P.case(
"Geometry S2 filter multiple",
["--geom-filter-index-s2", "12cdc28bc,12cdc28f"],
"files/35e189b968b4f8d8c27c25b0dab484a6623745d1c85dcb77d19599ed0f7ba802_nofilter_compact.geoparquet",
"files/cda5d65e169e3ff04970e66e80b021937a5ae141ed72428cc0d9a3764bf076db_nofilter_compact.geoparquet",
) # type: ignore
@P.case(
"Geometry file filter with different OSM source",
["--geom-filter-file", geometry_boundary_file_path(), "--osm-extract-source", "OSMfr"],
"files/6e3ec5872bf41c2c44698fcf71266971c552d13feea19c3714e171bcd7a2b2c8_nofilter_compact.geoparquet",
"files/6a869bcfa1a49ade8b76569e48e4142bce29098815bf37e57155a18204f2bbbc_nofilter_compact.geoparquet",
) # type: ignore
@P.case(
"Explode",
["--geom-filter-file", geometry_boundary_file_path(), "--explode-tags"],
"files/6e3ec5872bf41c2c44698fcf71266971c552d13feea19c3714e171bcd7a2b2c8_nofilter_exploded.geoparquet",
"files/6a869bcfa1a49ade8b76569e48e4142bce29098815bf37e57155a18204f2bbbc_nofilter_exploded.geoparquet",
) # type: ignore
@P.case(
"Explode short",
["--geom-filter-file", geometry_boundary_file_path(), "--explode"],
"files/6e3ec5872bf41c2c44698fcf71266971c552d13feea19c3714e171bcd7a2b2c8_nofilter_exploded.geoparquet",
"files/6a869bcfa1a49ade8b76569e48e4142bce29098815bf37e57155a18204f2bbbc_nofilter_exploded.geoparquet",
) # type: ignore
@P.case(
"Compact",
["--geom-filter-file", geometry_boundary_file_path(), "--compact-tags"],
"files/6e3ec5872bf41c2c44698fcf71266971c552d13feea19c3714e171bcd7a2b2c8_nofilter_compact.geoparquet",
"files/6a869bcfa1a49ade8b76569e48e4142bce29098815bf37e57155a18204f2bbbc_nofilter_compact.geoparquet",
) # type: ignore
@P.case(
"Compact short",
["--geom-filter-file", geometry_boundary_file_path(), "--compact"],
"files/6e3ec5872bf41c2c44698fcf71266971c552d13feea19c3714e171bcd7a2b2c8_nofilter_compact.geoparquet",
"files/6a869bcfa1a49ade8b76569e48e4142bce29098815bf37e57155a18204f2bbbc_nofilter_compact.geoparquet",
) # type: ignore
@P.case(
"Working directory",
["--geom-filter-file", geometry_boundary_file_path(), "--working-directory", "files/workdir"],
"files/workdir/6e3ec5872bf41c2c44698fcf71266971c552d13feea19c3714e171bcd7a2b2c8_nofilter_compact.geoparquet",
"files/workdir/6a869bcfa1a49ade8b76569e48e4142bce29098815bf37e57155a18204f2bbbc_nofilter_compact.geoparquet",
) # type: ignore
@P.case(
"Ignore cache",
["--geom-filter-file", geometry_boundary_file_path(), "--ignore-cache"],
"files/6e3ec5872bf41c2c44698fcf71266971c552d13feea19c3714e171bcd7a2b2c8_nofilter_compact.geoparquet",
"files/6a869bcfa1a49ade8b76569e48e4142bce29098815bf37e57155a18204f2bbbc_nofilter_compact.geoparquet",
) # type: ignore
@P.case(
"Ignore cache short",
["--geom-filter-file", geometry_boundary_file_path(), "--no-cache"],
"files/6e3ec5872bf41c2c44698fcf71266971c552d13feea19c3714e171bcd7a2b2c8_nofilter_compact.geoparquet",
"files/6a869bcfa1a49ade8b76569e48e4142bce29098815bf37e57155a18204f2bbbc_nofilter_compact.geoparquet",
) # type: ignore
@P.case(
"Output",
Expand Down Expand Up @@ -399,7 +373,7 @@ def test_proper_args_with_pbf(
"--osm-tags-filter",
'{"building": true, "highway": ["primary", "secondary"], "amenity": "bench"}',
],
"files/6e3ec5872bf41c2c44698fcf71266971c552d13feea19c3714e171bcd7a2b2c8_a9dd1c3c2e3d6a94354464e9a1a536ef44cca77eebbd882f48ca52799eb4ca91_exploded.geoparquet",
"files/6a869bcfa1a49ade8b76569e48e4142bce29098815bf37e57155a18204f2bbbc_a9dd1c3c2e3d6a94354464e9a1a536ef44cca77eebbd882f48ca52799eb4ca91_exploded.geoparquet",
) # type: ignore
@P.case(
"OSM tags filter compact",
Expand All @@ -410,7 +384,7 @@ def test_proper_args_with_pbf(
'{"building": true, "highway": ["primary", "secondary"], "amenity": "bench"}',
"--compact",
],
"files/6e3ec5872bf41c2c44698fcf71266971c552d13feea19c3714e171bcd7a2b2c8_a9dd1c3c2e3d6a94354464e9a1a536ef44cca77eebbd882f48ca52799eb4ca91_compact.geoparquet",
"files/6a869bcfa1a49ade8b76569e48e4142bce29098815bf37e57155a18204f2bbbc_a9dd1c3c2e3d6a94354464e9a1a536ef44cca77eebbd882f48ca52799eb4ca91_compact.geoparquet",
) # type: ignore
@P.case(
"OSM tags filter file",
Expand All @@ -420,7 +394,7 @@ def test_proper_args_with_pbf(
"--osm-tags-filter-file",
osm_tags_filter_file_path(),
],
"files/6e3ec5872bf41c2c44698fcf71266971c552d13feea19c3714e171bcd7a2b2c8_a9dd1c3c2e3d6a94354464e9a1a536ef44cca77eebbd882f48ca52799eb4ca91_exploded.geoparquet",
"files/6a869bcfa1a49ade8b76569e48e4142bce29098815bf37e57155a18204f2bbbc_a9dd1c3c2e3d6a94354464e9a1a536ef44cca77eebbd882f48ca52799eb4ca91_exploded.geoparquet",
) # type: ignore
@P.case(
"OSM tags filter file compact",
Expand All @@ -431,7 +405,7 @@ def test_proper_args_with_pbf(
osm_tags_filter_file_path(),
"--compact",
],
"files/6e3ec5872bf41c2c44698fcf71266971c552d13feea19c3714e171bcd7a2b2c8_a9dd1c3c2e3d6a94354464e9a1a536ef44cca77eebbd882f48ca52799eb4ca91_compact.geoparquet",
"files/6a869bcfa1a49ade8b76569e48e4142bce29098815bf37e57155a18204f2bbbc_a9dd1c3c2e3d6a94354464e9a1a536ef44cca77eebbd882f48ca52799eb4ca91_compact.geoparquet",
) # type: ignore
@P.case(
"OSM tags filter grouped",
Expand All @@ -441,7 +415,7 @@ def test_proper_args_with_pbf(
"--osm-tags-filter",
'{"group": {"building": true, "highway": ["primary", "secondary"], "amenity": "bench"} }',
],
"files/6e3ec5872bf41c2c44698fcf71266971c552d13feea19c3714e171bcd7a2b2c8_654daac5550b95c8c0e3c57a75a1e16dfa638946461e0977af8f9ca98039db06_exploded.geoparquet",
"files/6a869bcfa1a49ade8b76569e48e4142bce29098815bf37e57155a18204f2bbbc_654daac5550b95c8c0e3c57a75a1e16dfa638946461e0977af8f9ca98039db06_exploded.geoparquet",
) # type: ignore
@P.case(
"OSM tags filter grouped compact",
Expand All @@ -452,7 +426,7 @@ def test_proper_args_with_pbf(
'{"group": {"building": true, "highway": ["primary", "secondary"], "amenity": "bench"} }',
"--compact",
],
"files/6e3ec5872bf41c2c44698fcf71266971c552d13feea19c3714e171bcd7a2b2c8_654daac5550b95c8c0e3c57a75a1e16dfa638946461e0977af8f9ca98039db06_compact.geoparquet",
"files/6a869bcfa1a49ade8b76569e48e4142bce29098815bf37e57155a18204f2bbbc_654daac5550b95c8c0e3c57a75a1e16dfa638946461e0977af8f9ca98039db06_compact.geoparquet",
) # type: ignore
@P.case(
"Filter OSM features IDs",
Expand All @@ -462,7 +436,7 @@ def test_proper_args_with_pbf(
"--filter-osm-ids",
"way/94399646,node/3617982224,relation/36990",
],
"files/6e3ec5872bf41c2c44698fcf71266971c552d13feea19c3714e171bcd7a2b2c8_nofilter_compact_c740a1597e53ae8c5e98c5119eaa1893ddc177161afe8642addcbe54a6dc089d.geoparquet",
"files/6a869bcfa1a49ade8b76569e48e4142bce29098815bf37e57155a18204f2bbbc_nofilter_compact_c740a1597e53ae8c5e98c5119eaa1893ddc177161afe8642addcbe54a6dc089d.geoparquet",
) # type: ignore
@P.case(
"Keep all tags",
Expand All @@ -471,7 +445,7 @@ def test_proper_args_with_pbf(
geometry_boundary_file_path(),
"--keep-all-tags",
],
"files/6e3ec5872bf41c2c44698fcf71266971c552d13feea19c3714e171bcd7a2b2c8_nofilter_compact.geoparquet",
"files/6a869bcfa1a49ade8b76569e48e4142bce29098815bf37e57155a18204f2bbbc_nofilter_compact.geoparquet",
) # type: ignore
@P.case(
"OSM tags filter with keep all tags",
Expand All @@ -482,7 +456,7 @@ def test_proper_args_with_pbf(
"--osm-tags-filter",
'{"building": true, "highway": ["primary", "secondary"], "amenity": "bench"}',
],
"files/6e3ec5872bf41c2c44698fcf71266971c552d13feea19c3714e171bcd7a2b2c8_a9dd1c3c2e3d6a94354464e9a1a536ef44cca77eebbd882f48ca52799eb4ca91_alltags_compact.geoparquet",
"files/6a869bcfa1a49ade8b76569e48e4142bce29098815bf37e57155a18204f2bbbc_a9dd1c3c2e3d6a94354464e9a1a536ef44cca77eebbd882f48ca52799eb4ca91_alltags_compact.geoparquet",
) # type: ignore
@P.case(
"OSM tags filter with keep all tags compact",
Expand All @@ -494,7 +468,7 @@ def test_proper_args_with_pbf(
'{"building": true, "highway": ["primary", "secondary"], "amenity": "bench"}',
"--compact",
],
"files/6e3ec5872bf41c2c44698fcf71266971c552d13feea19c3714e171bcd7a2b2c8_a9dd1c3c2e3d6a94354464e9a1a536ef44cca77eebbd882f48ca52799eb4ca91_alltags_compact.geoparquet",
"files/6a869bcfa1a49ade8b76569e48e4142bce29098815bf37e57155a18204f2bbbc_a9dd1c3c2e3d6a94354464e9a1a536ef44cca77eebbd882f48ca52799eb4ca91_alltags_compact.geoparquet",
) # type: ignore
@P.case(
"OSM tags filter with keep all tags exploded",
Expand All @@ -506,7 +480,7 @@ def test_proper_args_with_pbf(
'{"building": true, "highway": ["primary", "secondary"], "amenity": "bench"}',
"--explode",
],
"files/6e3ec5872bf41c2c44698fcf71266971c552d13feea19c3714e171bcd7a2b2c8_a9dd1c3c2e3d6a94354464e9a1a536ef44cca77eebbd882f48ca52799eb4ca91_alltags_exploded.geoparquet",
"files/6a869bcfa1a49ade8b76569e48e4142bce29098815bf37e57155a18204f2bbbc_a9dd1c3c2e3d6a94354464e9a1a536ef44cca77eebbd882f48ca52799eb4ca91_alltags_exploded.geoparquet",
) # type: ignore
@P.case(
"OSM way polygon config",
Expand All @@ -516,7 +490,7 @@ def test_proper_args_with_pbf(
"--osm-way-polygon-config",
osm_way_config_file_path(),
],
"files/6e3ec5872bf41c2c44698fcf71266971c552d13feea19c3714e171bcd7a2b2c8_nofilter_compact.geoparquet",
"files/6a869bcfa1a49ade8b76569e48e4142bce29098815bf37e57155a18204f2bbbc_nofilter_compact.geoparquet",
) # type: ignore
@P.case(
"Allow not covered geometry",
Expand Down
28 changes: 27 additions & 1 deletion tests/base/test_pbf_file_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import six
from parametrization import Parametrization as P
from shapely import from_wkt, hausdorff_distance
from shapely.geometry import MultiPolygon, Polygon, box
from shapely.geometry import LinearRing, MultiPolygon, Polygon, box, polygon
from shapely.geometry.base import BaseGeometry
from shapely.ops import unary_union
from srai.geometry import remove_interiors
Expand All @@ -35,6 +35,7 @@
)
from quackosm.osm_extracts import OsmExtractSource
from quackosm.pbf_file_reader import PbfFileReader
from tests.base.conftest import geometry_box

ut = TestCase()
LFS_DIRECTORY_URL = "https://github.com/kraina-ai/srai-test-files/raw/main/files/"
Expand Down Expand Up @@ -137,6 +138,31 @@ def test_pbf_reader_geometry_filtering(): # type: ignore
assert len(features_gdf) == 0


@pytest.mark.parametrize(
"geometry",
[
geometry_box(),
from_wkt(
"POLYGON ((-43.064 29.673, -43.064 29.644, -43.017 29.644,"
" -43.017 29.673, -43.064 29.673))"
),
],
) # type: ignore
def test_geometry_hash_calculation(geometry: BaseGeometry):
"""Test if geometry hash is orientation-agnostic."""
if isinstance(geometry, Polygon):
oriented_a = polygon.orient(geometry, sign=1.0)
oriented_b = polygon.orient(geometry, sign=-1.0)
elif isinstance(geometry, LinearRing):
oriented_a = geometry
oriented_b = LinearRing(list(geometry.coords)[::-1])

assert (
PbfFileReader(geometry_filter=oriented_a)._get_oriented_geometry_filter()
== PbfFileReader(geometry_filter=oriented_b)._get_oriented_geometry_filter()
)


def test_unique_osm_ids_duplicated_file(): # type: ignore
"""Test if function returns results without duplicated features."""
monaco_file_path = Path(__file__).parent.parent / "test_files" / "monaco.osm.pbf"
Expand Down

0 comments on commit 21e28bb

Please sign in to comment.