Skip to content

Commit

Permalink
feat: add automatic resolution finding logic
Browse files Browse the repository at this point in the history
  • Loading branch information
RaczeQ committed May 13, 2024
1 parent bc3ba4e commit 59fb4f2
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 16 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Changed

- Nodes intersection from `ST_Intersects` to an approximation using H3 cells for bigger scale transformations [#109](https://github.com/kraina-ai/quackosm/issues/109)

## [0.8.1] - 2024-05-11

### Added
Expand Down
48 changes: 38 additions & 10 deletions quackosm/_h3.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,55 @@
import platform
from pathlib import Path
from typing import Optional

import duckdb
import pyarrow as pa
import pyarrow.parquet as pq
from h3ronpy.arrow.vector import ContainmentMode, wkb_to_cells
from pooch import Decompress, retrieve
from shapely.geometry.base import BaseGeometry
from shapely.geometry.base import BaseGeometry, BaseMultipartGeometry

START_H3_RESOLUTION = 4
MAX_H3_RESOLUTION = 11
MAX_H3_CELLS = 10_000_000


def _transform_geometry_filter_to_h3(
geometry: BaseGeometry, working_directory: Path, h3_resolution: int = 11
) -> Path:
geometry: BaseGeometry, working_directory: Path, h3_resolution: Optional[int] = None
) -> tuple[Path, int]:
"""Fill geometry filter with H3 polygons and save them in a dedicated parquet file."""
result_file_path = working_directory / "geometry_filter_h3_indexes.parquet"
h3_indexes = wkb_to_cells(
[sub_geometry.wkb for sub_geometry in geometry.geoms],
resolution=h3_resolution,
containment_mode=ContainmentMode.Covers,
flatten=True,
).unique()
if isinstance(geometry, BaseMultipartGeometry):
wkb = [sub_geometry.wkb for sub_geometry in geometry.geoms]
else:
wkb = [geometry.wkb]

search_for_matching_resolution = False
if h3_resolution is None:
h3_resolution = START_H3_RESOLUTION
search_for_matching_resolution = True

finished_searching = False
while not finished_searching:
h3_indexes = wkb_to_cells(
wkb,
resolution=h3_resolution,
containment_mode=ContainmentMode.Covers,
flatten=True,
).unique()

if not search_for_matching_resolution:
finished_searching = True
else:
number_of_cells = len(h3_indexes)
# Multiplying by 7, because thats the number of children in a parent cell.
if (number_of_cells * 7) > MAX_H3_CELLS or h3_resolution == MAX_H3_RESOLUTION:
finished_searching = True
else:
h3_resolution += 1

pq.write_table(pa.table(dict(h3=h3_indexes)), result_file_path)
return result_file_path
return result_file_path, h3_resolution


# Based on https://github.com/fusedio/udfs/blob/main/public/DuckDB_H3_Example/utils.py
Expand Down
11 changes: 5 additions & 6 deletions quackosm/pbf_file_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def __init__(
] = None,
parquet_compression: str = "snappy",
osm_extract_source: Union[OsmExtractSource, str] = OsmExtractSource.geofabrik,
geometry_filter_intersection_h3_resolution: int = 10,
geometry_filter_intersection_h3_resolution: Optional[int] = None,
verbosity_mode: Literal["silent", "transient", "verbose"] = "transient",
allow_uncovered_geometry: bool = False,
debug_memory: bool = False,
Expand Down Expand Up @@ -149,7 +149,8 @@ def __init__(
intersections. First H3 cells are generated to cover given geometry and then
intersection is done based on H3 indexes, not using ST_Intersects. Higher resolution
will result in finer approximation of the original geometry, but might require more
memory during operations. Defaults to 11.
memory during operations. If `None`, will automatically determine the appropriate
value to cover geometries using less than 10 million H3 cells. Defaults to `None`.
verbosity_mode (Literal["silent", "transient", "verbose"], optional): Set progress
verbosity mode. Can be one of: silent, transient and verbose. Silent disables
output completely. Transient tracks progress, but removes output after finished.
Expand Down Expand Up @@ -1109,7 +1110,7 @@ def _prefilter_elements_ids(
# - select all from NI with tags filter
filter_osm_node_ids_filter = self._generate_elements_filter(filter_osm_ids, "node")
if is_intersecting:
h3_cells_path = _transform_geometry_filter_to_h3(
h3_cells_path, determined_h3_resolution = _transform_geometry_filter_to_h3(
geometry=self.geometry_filter,
working_directory=self.tmp_dir_path,
h3_resolution=self.geometry_filter_intersection_h3_resolution,
Expand All @@ -1122,9 +1123,7 @@ def _prefilter_elements_ids(
sql_query=f"""
SELECT DISTINCT id FROM ({nodes_valid_with_tags.sql_query()}) n
SEMI JOIN '{h3_cells_path}'
ON h3_latlng_to_cell(
lat, lon, {self.geometry_filter_intersection_h3_resolution}
) = h3
ON h3_latlng_to_cell(lat, lon, {determined_h3_resolution}) = h3
""",
file_path=self.tmp_dir_path / "nodes_intersecting_ids",
)
Expand Down

0 comments on commit 59fb4f2

Please sign in to comment.