From 45bc04fb762c97dc7b9e14ad69a19b42cb3805d8 Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Thu, 25 Apr 2024 22:32:06 +0200 Subject: [PATCH 01/25] Work in progress, simplify extraction of all content --- pyproject.toml | 2 +- scripts/extract_geojson.py | 107 ++++++++++++ src/lantmateriet/communication.py | 59 ------- src/lantmateriet/config.py | 131 ++------------- src/lantmateriet/construction.py | 62 ------- src/lantmateriet/geometry.py | 154 +++++------------- src/lantmateriet/ground.py | 83 ---------- src/lantmateriet/line.py | 44 +++++ src/lantmateriet/point.py | 44 +++++ src/lantmateriet/polygon.py | 45 +++++ src/lantmateriet/utils.py | 13 +- .../test_integration_communication.py | 2 +- tests/integration/test_integration_ground.py | 2 +- tests/unit/test_unit_communication.py | 2 +- tests/unit/test_unit_ground.py | 2 +- 15 files changed, 309 insertions(+), 443 deletions(-) create mode 100644 scripts/extract_geojson.py delete mode 100644 src/lantmateriet/communication.py delete mode 100644 src/lantmateriet/construction.py delete mode 100644 src/lantmateriet/ground.py create mode 100644 src/lantmateriet/line.py create mode 100644 src/lantmateriet/point.py create mode 100644 src/lantmateriet/polygon.py diff --git a/pyproject.toml b/pyproject.toml index 6e1b6f3..1c8e57a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ authors = [ { name = "Mladen Gibanica", email = "11275336+mgcth@users.noreply.github.com" }, ] requires-python = ">=3.9" -dependencies = ["geopandas ~= 0.14", "pyogrio ~= 0.7.0", "pyarrow ~= 14.0"] +dependencies = ["geopandas ~= 0.14", "pyogrio ~= 0.7", "pyarrow ~= 16.0", "unidecode ~= 1.3"] [project.optional-dependencies] lint = [ diff --git a/scripts/extract_geojson.py b/scripts/extract_geojson.py new file mode 100644 index 0000000..c763401 --- /dev/null +++ b/scripts/extract_geojson.py @@ -0,0 +1,107 @@ +"""Extract GEOJson from GPKG files.""" + +import glob +import logging +from multiprocessing import Pool, set_start_method + +import fiona +import geopandas as gpd +import pandas as pd +from lantmateriet.config import Config50 +from unidecode import unidecode + +WORKERS = 8 + +set_start_method("fork") +logger = logging.getLogger(__name__) +config = Config50() + + +def read_unique_names(file: str, layer: str, field: str) -> list[str]: + """Read unique names from specified field in file.""" + return sorted( + list( + set( + gpd.read_file( + file, + use_arrow=True, + include_fields=[field], + ignore_geometry=True, + layer=layer, + )[field] + ) + ) + ) + + +def read_first_entry(file: str, layer: str) -> gpd.GeoDataFrame: + """Read info from file.""" + return gpd.read_file(file, use_arrow=True, layer=layer, rows=1) + + +def normalise_item_names(item_names: list[str]) -> dict[str, str]: + """Normalise item names to save format.""" + return { + x: "{:02d}_".format(i + 1) + + unidecode(x.lower()) + .replace(" ", "_") + .replace("-", "") + .replace(",", "") + .replace("/", "_") + for i, x in enumerate(item_names) + } + + +def extract_geojson(file: str, layer: str): + """Extract and save geojson files.""" + print(f"Working on {file} - {layer}") + field = "objekttyp" + + if "text" in file or "text" in layer: + field = "texttyp" + + file_names = read_unique_names(file, layer, field) + normalised_names = normalise_item_names(file_names) + geometry_type = type(read_first_entry(file, layer).geometry[0]) + geometry_object = config.file_geometry_mapping[geometry_type] + + if "mark" in file: + df_sverige = None + + for name, output_name in normalised_names.items(): + geo_object = geometry_object(file, "50", layer, name, field) + if geo_object.df is not None: + geo_object.process() + geo_object.save("tmp", output_name) + + if "mark" in file: + if df_sverige is None: + df_sverige = geo_object.df.dissolve().explode(index_parts=False) + else: + df_sverige = ( + pd.concat([df_sverige, geo_object.df]) + .dissolve() + .explode(index_parts=False) + ) + + if "mark" in file: + df_sverige["area_m2"] = df_sverige.area + df_sverige["length_m"] = df_sverige.length + df_sverige = df_sverige.df.to_crs(geo_object.config.epsg_4326) + df_sverige.to_file( + "tmp/mark_sverige/mark/00_sverige" + ".geojson", driver="GeoJSON" + ) + + print(f"Saved {file} - {layer}") + + +files = glob.glob("topografi_50/*.gpkg") + +all_files = [] +for file in files: + available_layers = fiona.listlayers(file) + for layer in available_layers: + all_files.append((file, layer)) + +with Pool(WORKERS) as pool: + pool.starmap(extract_geojson, all_files) diff --git a/src/lantmateriet/communication.py b/src/lantmateriet/communication.py deleted file mode 100644 index e6def3f..0000000 --- a/src/lantmateriet/communication.py +++ /dev/null @@ -1,59 +0,0 @@ -"""Communication module.""" - -import geopandas as gpd -from lantmateriet.geometry import Geometry - - -class Communication(Geometry): - """Communication class.""" - - def __init__( - self, - file_path: str, - detail_level: str = "50", - layer: str = "mark", - use_arrow: bool = True, - ): - """Initialise Communication object. - - Args: - file_path: path to border data - detail_level: level of detail of data - layer: layer to load - use_arrow: use arrow for file-loading - - Raises: - NotImplementedError: if detail level not implemented - KeyError: if data objekttyp not equal to ground dict - """ - super().__init__(file_path, detail_level, layer, use_arrow) - self.layer = layer - self.item_type = "communication" - self.dissolve = False - - if set(self.df["objekttyp"]) != set(self.config.communication[layer].keys()): - raise KeyError( - "Data objekttyp not equal to communication dict. Has the input data changed?" - ) - - def process(self, set_length: bool = True) -> dict[str, gpd.GeoDataFrame]: - """Process all communication data items. - - Args: - set_length: set length column - - Returns: - map of ground items including - """ - return self._process( - self.item_type, self.layer, self.dissolve, False, set_length - ) - - def save(self, all_items: dict[str, gpd.GeoDataFrame], save_path: str): - """Save processed communication items in EPSG:4326 as GeoJSON. - - Args: - all_items: GeoDataFrame items to save - save_path: path to save files in - """ - self._save(self.item_type, self.layer, all_items, save_path) diff --git a/src/lantmateriet/config.py b/src/lantmateriet/config.py index 1e77a41..f0efd69 100644 --- a/src/lantmateriet/config.py +++ b/src/lantmateriet/config.py @@ -6,6 +6,15 @@ - 50: https://www.lantmateriet.se/globalassets/geodata/geodataprodukter/pb-topografi-50-nedladdning-vektor.pdf """ +from typing import TypeVar +import shapely + +from lantmateriet.line import Line +from lantmateriet.point import Point +from lantmateriet.polygon import Polygon + +Geometry = TypeVar("Geometry", Line, Polygon, Point) + class BaseConfig: """Base config class.""" @@ -19,6 +28,15 @@ class BaseConfig: border_county: str = "Länsgräns" border_municipality: str = "Kommungräns" + file_geometry_mapping: dict[str, Geometry] = { + shapely.Point: Point, + shapely.MultiPoint: Point, + shapely.MultiLineString: Line, + shapely.LineString: Line, + shapely.Polygon: Polygon, + shapely.MultiPolygon: Polygon, + } + def __getitem__(self, key): """Get item. @@ -34,35 +52,6 @@ def __getitem__(self, key): class Config1M(BaseConfig): """Topography 1M config class.""" - ground: dict[str, dict[str, str]] = { - "mark": { - "Sverige": "00_sverige.geojson", - "Vattenyta": "01_vattenyta.geojson", - "Glaciär": "05_glaciar.geojson", - "Kalfjäll": "08_kalfjall.geojson", - "Skog": "02_skog.geojson", - "Öppen mark": "15_oppen_mark.geojson", - "Bebyggelse": "06_bebygelse.geojson", - "Hav": "16_hav.geojson", - "Ej karterat område": "17_ej_kartlagt.geojson", - }, - "markkantlinje": {}, - "sankmark": {}, - } - construction: dict[str, dict[str, str]] = {"byggnadspunkt": {}} - communication: dict[str, dict[str, str]] = { - "vaglinje": { - "Motorväg": "01_motorvag.geojson", - "Motortrafikled": "02_motortrafikled.geojson", - "Landsväg": "03_landsvag.geojson", - "Landsväg liten": "04_landsvag_liten.geojson", - "Småväg": "05_smavag.geojson", - }, - "farjeled": {"Färjeled": "01_farjeled.geojson"}, - "ovrig_vag": {"Vandringsled": "01_vandringsled.geojson"}, - "ralstrafik": {"Järnväg": "01_jarnvag.geojson"}, - } - exclude = {"Hav", "Ej karterat område", "Sverige"} exteriorise = {"Skog"} ground_water = { @@ -74,90 +63,6 @@ class Config1M(BaseConfig): class Config50(BaseConfig): """Config class.""" - total_ground = ("00_sverige.geojson", "00_sverige.geojson") - ground: dict[str, dict[str, str]] = { - "mark": { - "Sverige": "00_sverige.geojson", - "Anlagt vatten": "01_anlagt_vatten.geojson", - "Vattendragsyta": "02_vattendragsyta.geojson", - "Sjö": "03_sjo.geojson", - "Glaciär": "04_glaciar.geojson", - "Kalfjäll": "05_kalfjall.geojson", - "Fjällbjörkskog": "06_fjallbjorkskog.geojson", - "Barr- och blandskog": "07_barr_blandskog.geojson", - "Lövskog": "08_lovskog.geojson", - "Åker": "09_aker.geojson", - "Fruktodling": "10_fruktodling.geojson", - "Öppen mark": "11_oppen_mark.geojson", - "Hög bebyggelse": "12_hog_bebygelse.geojson", - "Låg bebyggelse": "13_lag_bebygelse.geojson", - "Sluten bebyggelse": "14_sluten_bebygelse.geojson", - "Industri- och handelsbebyggelse": "15_industri_handel.geojson", - "Hav": "16_hav.geojson", - "Ej karterat område": "17_ej_kartlagt.geojson", - }, - "markkantlinje": {}, - "sankmark": {}, - "markframkomlighet": {}, - } - construction: dict[str, dict[str, str]] = { - "byggnad": { - "Bostad": "01_bostad.geojson", - "Industri": "02_industri.geojson", - "Samhällsfunktion": "03_samhallsfunktion.geojson", - "Verksamhet": "04_verksamhet.geojson", - "Ekonomibyggnad": "05_ekonomibyggnad.geojson", - "Komplementbyggnad": "06_komplementbyggnad.geojson", - "Övrig byggnad": "07_ovrig.geojson", - }, - "byggnadsanlaggningslinje": {}, - "byggnadsanlaggningspunkt": {}, - "byggnadspunkt": {}, - } - communication: dict[str, dict[str, str]] = { - "vaglinje": { - "Motorväg": "01_motorvag.geojson", - "Motortrafikled": "02_motortrafikled.geojson", - "Mötesfri väg": "03_motesfri_vag.geojson", - "Landsväg": "04_landsvag.geojson", - "Landsväg liten": "05_landsvag_liten.geojson", - "Småväg": "06_smavag.geojson", - "Småväg enkel standard": "07_smavag_enkel_standard.geojson", - "Övergripande länk": "08_overgripande_lank.geojson", - "Huvudgata": "09_huvudgata.geojson", - "Lokalgata stor": "10_lokalgata_stor.geojson", - "Lokalgata liten": "11_lokalgata_liten.geojson", - }, - "vagpunkt": {}, - "farjeled": {"Färjeled": "01_farjeled.geojson"}, - "ovrig_vag": { - "Parkväg": "01_parkvag.geojson", - "Cykelväg": "02_cykelvag.geojson", - "Gångstig": "03_gangstig.geojson", - "Elljusspår": "04_elljusspar.geojson", - "Traktorväg": "05_traktorvag.geojson", - "Vandringsled": "06_vandringsled.geojson", - "Vandrings- och vinterled": "07_vandrings_vinterled.geojson", - "Vinterled": "08_vinterled.geojson", - }, - "transportled_fjall": { - "Lämplig färdväg": "01_lamplig_fardvag.geojson", - "Rennäringsled": "02_rennaringsled.geojson", - "Fångstarm till led": "03_fangstarm_till_led.geojson", - "Roddled": "04_roddled.geojson", - "Svårorienterad gångstig": "05_svarorienterad_gangstig.geojson", - "Skidspår": "06_skidspar.geojson", - "Båtdrag": "07_batdrag.geojson", - "Trafikerad båtled": "08_trafikerad_batled.geojson", - }, - "ledintressepunkt_fjall": {}, - "ralstrafik": { - "Järnväg": "01_jarnvag.geojson", - "Museijärnväg": "02_museijarnvag.geojson", - }, - "ralstrafikstation": {}, - } - exclude = {"Hav", "Ej karterat område", "Sverige"} exteriorise = {"Barr- och blandskog"} ground_water = { diff --git a/src/lantmateriet/construction.py b/src/lantmateriet/construction.py deleted file mode 100644 index 2fdbe05..0000000 --- a/src/lantmateriet/construction.py +++ /dev/null @@ -1,62 +0,0 @@ -"""Construction module.""" - -import geopandas as gpd -from lantmateriet.geometry import Geometry - - -class Construction(Geometry): - """Construction class.""" - - def __init__( - self, - file_path: str, - detail_level: str = "50", - layer: str = "mark", - use_arrow: bool = True, - ): - """Initialise Construction object. - - Args: - file_path: path to border data - detail_level: level of detail of data - layer: layer to load, must be present in config.construction dict - use_arrow: use arrow for file-loading - - Raises: - NotImplementedError: if detail level not implemented - KeyError: if data objekttyp not equal to construction dict - """ - super().__init__(file_path, detail_level, layer, use_arrow) - self.layer = layer - self.item_type = "construction" - self.dissolve = True - - if set(self.df["objekttyp"]) != set(self.config.construction[layer].keys()): - raise KeyError( - "Data objekttyp not equal to construction dict. Has the input data changed?" - ) - - def process( - self, set_area: bool = True, set_length: bool = True - ) -> dict[str, gpd.GeoDataFrame]: - """Process all construction data items. - - Args: - set_area: set area column - set_length: set length column - - Returns: - map of construction items - """ - return self._process( - self.item_type, self.layer, self.dissolve, set_area, set_length - ) - - def save(self, all_items: dict[str, gpd.GeoDataFrame], save_path: str): - """Save processed construction items in EPSG:4326 as GeoJSON. - - Args: - all_items: GeoDataFrame items to save - save_path: path to save files in - """ - self._save(self.item_type, self.layer, all_items, save_path) diff --git a/src/lantmateriet/geometry.py b/src/lantmateriet/geometry.py index 1ad13d7..c5976a0 100644 --- a/src/lantmateriet/geometry.py +++ b/src/lantmateriet/geometry.py @@ -1,16 +1,15 @@ """Geometry module.""" +import os from copy import deepcopy -from multiprocessing import Pool from os import path from typing import Union import geopandas as gpd from lantmateriet import config -from lantmateriet.utils import smap, timeit +from lantmateriet.utils import timeit from shapely.ops import polygonize -WORKERS = 6 TOUCHING_MAX_DIST = 1e-5 BUFFER_DIST = 1e-8 @@ -215,17 +214,18 @@ def dissolve_and_explode_exterior(self) -> gpd.GeoDataFrame: class Geometry: """Geometry class.""" - def __init__(self, file_path: str, detail_level: str, layer: str, use_arrow: bool): + def __init__( + self, file_path: str, detail_level: str, layer: str, name: str, field: str + ): """Initialise Geometry object. Args: file_path: path to border data detail_level: level of detail of data layer: layer to load - use_arrow: use arrow to load file + name: name of data + field: geopandas field """ - self.df = gpd.read_file(file_path, layer=layer, use_arrow=use_arrow) - if detail_level == "50": self.config: Union[config.Config1M, config.Config50] = config.config_50 elif detail_level == "1m": @@ -235,6 +235,21 @@ def __init__(self, file_path: str, detail_level: str, layer: str, use_arrow: boo f"The level of detail: {detail_level} is not implemented." ) + self._file_path = file_path + self._layer = layer + self._name = name + self._field = field + self.df = None + + if name not in self.config.exclude: + self.df = gpd.read_file( + file_path, + layer=layer, + where=f"{field}='{name}'", + engine="pyogrio", + use_arrow=True, + ) + @staticmethod def _set_area(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """Set area for each geometry. @@ -263,9 +278,7 @@ def _set_length(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame: @timeit(True) @staticmethod - def _dissolve( - object_name: str, df: gpd.GeoDataFrame - ) -> tuple[str, gpd.GeoDataFrame]: + def _dissolve(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """Dissolve geometry. Args: @@ -273,16 +286,13 @@ def _dissolve( df: geopandas GeoDataFrame Returns: - object name and dissolved geopandas GeoDataFrame + dissolved geopandas GeoDataFrame """ - df_dissolved = DissolveTouchingGeometry(df).dissolve_and_explode() - return (object_name, df_dissolved) + return DissolveTouchingGeometry(df).dissolve_and_explode() @timeit(True) @staticmethod - def _dissolve_exterior( - object_name: str, df: gpd.GeoDataFrame - ) -> tuple[str, gpd.GeoDataFrame]: + def _dissolve_exterior(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """Dissolve exterior geometry. Args: @@ -290,122 +300,48 @@ def _dissolve_exterior( df: geopandas GeoDataFrame Returns: - object name and dissolved geopandas GeoDataFrame - """ - df_dissolved = DissolveTouchingGeometry(df).dissolve_and_explode_exterior() - return (object_name, df_dissolved) - - def _get_items( - self, item_type: str, layer: str - ) -> list[tuple[str, gpd.GeoDataFrame]]: - """Get items. - - Args: - item_type: type of config item - layer: str - - Returns: - list of file names and corresponding geodata - """ - return [ - (object_name, self.df[self.df["objekttyp"] == object_name]) - for object_name, _ in self.config[item_type][layer].items() - if object_name not in self.config.exclude - ] - - def _prepare_parallel_list( - self, geometry_items: list[tuple[str, gpd.GeoDataFrame]] - ) -> list[tuple]: - """Prepare list for parallel processing. - - Args: - geometry_items: list of data items - - Returns: - list of tuples of functions and data - """ - return [ - ( - Geometry._dissolve_exterior - if object_name in self.config.exteriorise - else Geometry._dissolve, - object_name, - geometry_item, - ) - for object_name, geometry_item in geometry_items - ] - - def _dissolve_parallel(self, geometry_items: list) -> list: - """Parallel processing of dissolve. - - Args: - geometry_items: list of data items - - Returns: - dissolved data + dissolved geopandas GeoDataFrame """ - geometry = self._prepare_parallel_list(geometry_items) - with Pool(WORKERS) as pool: - geometry_dissolved = pool.starmap(smap, geometry) - - return geometry_dissolved + return DissolveTouchingGeometry(df).dissolve_and_explode_exterior() def _process( self, - item_type: str, - layer: str, dissolve: bool = False, set_area: bool = True, set_length: bool = True, - ) -> dict[str, gpd.GeoDataFrame]: + ) -> None: """Process all data items. Args: - item_type: item type - layer: layer dissolve: dissolve touching geometries set_area: set area column set_length: set length column - - Returns: - map of geometry items including """ - geometry_items = self._get_items(item_type, layer) - if dissolve is True: - geometry_items = self._dissolve_parallel(geometry_items) + if self._name in self.config.exteriorise: + self.df = Geometry._dissolve_exterior(self.df) + else: + self.df = Geometry._dissolve(self.df) else: - geometry_items = [ - (k, v.explode(ignore_index=True)) for k, v in geometry_items - ] + self.df = self.df.explode(ignore_index=True) if set_area is True: - geometry_items = [(k, Geometry._set_area(v)) for k, v in geometry_items] + self.df = Geometry._set_area(self.df) if set_length is True: - geometry_items = [(k, Geometry._set_length(v)) for k, v in geometry_items] - - return { - object_name: geometry_items - for object_name, geometry_items in geometry_items - } + self.df = Geometry._set_length(self.df) - def _save( - self, - item_type: str, - layer: str, - all_items: dict[str, gpd.GeoDataFrame], - save_path: str, - ): + def _save(self, save_path: str, file: str) -> None: """Save processed geometry items in EPSG:4326 as GeoJSON. Args: - item_type: item type - layer: layer - all_items: GeoDataFrame items to save save_path: path to save files in + file: name of saved file """ - for object_name, item in all_items.items(): - file_name = self.config[item_type][layer][object_name] - item = item.to_crs(self.config.epsg_4326) - item.to_file(path.join(save_path, file_name), driver="GeoJSON") + folder_path = path.join( + save_path, self._file_path.split("/")[-1].split(".")[0], self._layer + ) + os.makedirs(folder_path, exist_ok=True) + + self.df = self.df.to_crs(self.config.epsg_4326) + self.df.to_file(path.join(folder_path, file) + ".geojson", driver="GeoJSON") diff --git a/src/lantmateriet/ground.py b/src/lantmateriet/ground.py deleted file mode 100644 index e27a0b6..0000000 --- a/src/lantmateriet/ground.py +++ /dev/null @@ -1,83 +0,0 @@ -"""Ground module.""" - -import geopandas as gpd -import pandas as pd -from lantmateriet.geometry import Geometry - - -class Ground(Geometry): - """Ground class.""" - - def __init__( - self, - file_path: str, - detail_level: str = "50", - layer: str = "mark", - use_arrow: bool = True, - ): - """Initialise Ground object. - - Args: - file_path: path to border data - detail_level: level of detail of data - layer: layer to load, must be present in config.ground dict - use_arrow: use arrow for file-loading - - Raises: - NotImplementedError: if detail level not implemented - KeyError: if data objekttyp not equal to ground dict - """ - super().__init__(file_path, detail_level, layer, use_arrow) - self.layer = layer - self.item_type = "ground" - self.dissolve = True - - if set(self.df["objekttyp"]) | self.config.exclude != ( - set(self.config.ground[layer].keys()) | self.config.exclude - ): - raise KeyError( - "Data objekttyp not equal to ground dict. Has the input data changed?" - ) - - def process( - self, set_area: bool = True, set_length: bool = True - ) -> dict[str, gpd.GeoDataFrame]: - """Process all data items. - - Args: - set_area: set area column - set_length: set length column - - Returns: - map of ground items including - """ - df_processed = self._process( - self.item_type, self.layer, self.dissolve, set_area, set_length - ) - df_processed["Sverige"] = ( - pd.concat( - [ - v # v[~v["objekttyp"].isin(self.config.ground_water)] - for _, v in df_processed.items() - ] - ) - .dissolve() - .explode(index_parts=False) - ) - df_processed["Sverige"] = self._set_area(df_processed["Sverige"]) - df_processed["Sverige"] = self._set_length(df_processed["Sverige"]) - df_processed["Sverige"]["objekttyp"] = "Sverige" - - return df_processed - - def save(self, all_items: dict[str, gpd.GeoDataFrame], save_path: str): - """Save processed ground items in EPSG:4326 as GeoJSON. - - Args: - all_items: GeoDataFrame items to save - save_path: path to save files in - """ - all_items_exclude = { - k: v for k, v in all_items.items() if k not in self.config.exteriorise - } - self._save(self.item_type, self.layer, all_items_exclude, save_path) diff --git a/src/lantmateriet/line.py b/src/lantmateriet/line.py new file mode 100644 index 0000000..a457760 --- /dev/null +++ b/src/lantmateriet/line.py @@ -0,0 +1,44 @@ +"""Line module.""" + +from lantmateriet.geometry import Geometry + + +class Line(Geometry): + """Line class.""" + + def __init__( + self, + file_path: str, + detail_level: str = "50", + layer: str = "vaglinje", + name: str = "mark", + field: str = "objekttyp", + ): + """Initialise Line object. + + Args: + file_path: path to border data + detail_level: level of detail of data + layer: layer to load + name: name of data + field: geopandas field + """ + super().__init__(file_path, detail_level, layer, name, field) + self.dissolve = False + + def process(self, set_length: bool = True) -> None: + """Process all communication data items. + + Args: + set_length: set length column + """ + self._process(self.dissolve, False, set_length) + + def save(self, save_path: str, file: str) -> None: + """Save processed communication items in EPSG:4326 as GeoJSON. + + Args: + save_path: path to save files in + file: name of saved file + """ + self._save(save_path, file) diff --git a/src/lantmateriet/point.py b/src/lantmateriet/point.py new file mode 100644 index 0000000..efa66cf --- /dev/null +++ b/src/lantmateriet/point.py @@ -0,0 +1,44 @@ +"""Point module.""" + +from lantmateriet.geometry import Geometry + + +class Point(Geometry): + """Point class.""" + + def __init__( + self, + file_path: str, + detail_level: str = "50", + layer: str = "textpunkt", + name: str = "mark", + field: str = "texttyp", + ): + """Initialise Point object. + + Args: + file_path: path to border data + detail_level: level of detail of data + layer: layer to load + name: name of data + field: geopandas field + """ + super().__init__(file_path, detail_level, layer, name, field) + self.dissolve = False + + def process(self, set_length: bool = True) -> None: + """Process all communication data items. + + Args: + set_length: set length column + """ + self._process(self.dissolve, False, set_length) + + def save(self, save_path: str, file: str) -> None: + """Save processed communication items in EPSG:4326 as GeoJSON. + + Args: + save_path: path to save files in + file: name of saved file + """ + self._save(save_path, file) diff --git a/src/lantmateriet/polygon.py b/src/lantmateriet/polygon.py new file mode 100644 index 0000000..c62393a --- /dev/null +++ b/src/lantmateriet/polygon.py @@ -0,0 +1,45 @@ +"""Polygon module.""" + +from lantmateriet.geometry import Geometry + + +class Polygon(Geometry): + """Polygon class.""" + + def __init__( + self, + file_path: str, + detail_level: str = "50", + layer: str = "mark", + name: str = "mark", + field: str = "objekttyp", + ): + """Initialise Polygon object. + + Args: + file_path: path to border data + detail_level: level of detail of data + layer: layer to load + name: name of data + field: geopandas field + """ + super().__init__(file_path, detail_level, layer, name, field) + self.dissolve = True + + def process(self, set_area: bool = True, set_length: bool = True) -> None: + """Process all data items. + + Args: + set_area: set area column + set_length: set length column + """ + self._process(self.dissolve, set_area, set_length) + + def save(self, save_path: str, file: str): + """Save processed ground items in EPSG:4326 as GeoJSON. + + Args: + save_path: path to save files in + file: name of saved file + """ + self._save(save_path, file) diff --git a/src/lantmateriet/utils.py b/src/lantmateriet/utils.py index 7ddd801..0338aa1 100644 --- a/src/lantmateriet/utils.py +++ b/src/lantmateriet/utils.py @@ -5,8 +5,7 @@ from functools import wraps from typing import Callable -logging.basicConfig() -logging.getLogger().setLevel(logging.INFO) +logger = logging.getLogger(__name__) def timeit(has_key: bool = False): @@ -35,13 +34,3 @@ def wrap(*args, **kw): return wrap return timeit_decorator - - -def smap(fun, *args): - """Useful in assigning different functions in Pool.map. - - Args: - fun: function - *args: function arguments - """ - return fun(*args) diff --git a/tests/integration/test_integration_communication.py b/tests/integration/test_integration_communication.py index adc2722..4c00092 100644 --- a/tests/integration/test_integration_communication.py +++ b/tests/integration/test_integration_communication.py @@ -3,7 +3,7 @@ import geopandas as gpd import pandas as pd from geopandas import testing -from lantmateriet.communication import Communication +from lantmateriet.line import Communication test_vaglinje_geojson = gpd.read_file( "tests/fixtures/test_integration_communication_vaglinje.geojson", diff --git a/tests/integration/test_integration_ground.py b/tests/integration/test_integration_ground.py index 51e816b..f8686b4 100644 --- a/tests/integration/test_integration_ground.py +++ b/tests/integration/test_integration_ground.py @@ -3,7 +3,7 @@ import geopandas as gpd import pandas as pd from geopandas import testing -from lantmateriet.ground import Ground +from lantmateriet.polygon import Ground test_mark_geojson = gpd.read_file( "tests/fixtures/test_integration_ground_mark.geojson", layer="mark", use_arrow=True diff --git a/tests/unit/test_unit_communication.py b/tests/unit/test_unit_communication.py index 8f0b61e..0e77b9e 100644 --- a/tests/unit/test_unit_communication.py +++ b/tests/unit/test_unit_communication.py @@ -5,7 +5,7 @@ import geopandas as gpd import pytest from lantmateriet import config -from lantmateriet.communication import Communication +from lantmateriet.line import Communication class TestUnitCommunication: diff --git a/tests/unit/test_unit_ground.py b/tests/unit/test_unit_ground.py index 90cb4ff..2738f96 100644 --- a/tests/unit/test_unit_ground.py +++ b/tests/unit/test_unit_ground.py @@ -5,7 +5,7 @@ import geopandas as gpd import pytest from lantmateriet import config -from lantmateriet.ground import Ground +from lantmateriet.polygon import Ground from shapely.geometry import Point From d94edbad9272ccc4dc810dd6e103f5d529e801b1 Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Wed, 1 May 2024 21:54:40 +0200 Subject: [PATCH 02/25] Use ray for multiprocessing --- scripts/extract_geojson.py | 106 +++++++++++++++++++++++------------ src/lantmateriet/config.py | 18 ------ src/lantmateriet/geometry.py | 4 +- 3 files changed, 71 insertions(+), 57 deletions(-) diff --git a/scripts/extract_geojson.py b/scripts/extract_geojson.py index c763401..5923047 100644 --- a/scripts/extract_geojson.py +++ b/scripts/extract_geojson.py @@ -2,17 +2,33 @@ import glob import logging -from multiprocessing import Pool, set_start_method +from typing import TypeVar import fiona import geopandas as gpd import pandas as pd -from lantmateriet.config import Config50 +import shapely +from lantmateriet.config import Config50, config_50 +from lantmateriet.line import Line +from lantmateriet.point import Point +from lantmateriet.polygon import Polygon +from ray.util.multiprocessing import Pool from unidecode import unidecode -WORKERS = 8 +Geometry = TypeVar("Geometry", Line, Polygon, Point) + +file_geometry_mapping: dict[str, Geometry] = { + shapely.Point: Point, + shapely.MultiPoint: Point, + shapely.MultiLineString: Line, + shapely.LineString: Line, + shapely.Polygon: Polygon, + shapely.MultiPolygon: Polygon, +} + +WORKER_INNER = 8 +WORKER_OUTER = 14 -set_start_method("fork") logger = logging.getLogger(__name__) config = Config50() @@ -52,6 +68,33 @@ def normalise_item_names(item_names: list[str]) -> dict[str, str]: } +def save_sweden_base(processed_geo_objects): + """Save sweden base from all dissolved ground.""" + df_sverige = ( + pd.concat([item for item in processed_geo_objects]) + .dissolve() + .explode(index_parts=False) + ) + df_sverige["area_m2"] = df_sverige.area + df_sverige["length_m"] = df_sverige.length + df_sverige = df_sverige.to_crs(config_50.epsg_4326) + df_sverige.to_file( + "tmp/mark_sverige/mark/00_sverige" + ".geojson", driver="GeoJSON" + ) + + +def parallel_process(geo_object, output_name): + """Parallel process.""" + if geo_object.df is not None: + geo_object.process() + geo_object.save("tmp", output_name) + + if "mark" in geo_object._file_path: + return geo_object.df.dissolve().explode(index_parts=False) + + return None + + def extract_geojson(file: str, layer: str): """Extract and save geojson files.""" print(f"Working on {file} - {layer}") @@ -63,45 +106,34 @@ def extract_geojson(file: str, layer: str): file_names = read_unique_names(file, layer, field) normalised_names = normalise_item_names(file_names) geometry_type = type(read_first_entry(file, layer).geometry[0]) - geometry_object = config.file_geometry_mapping[geometry_type] + geometry_object = file_geometry_mapping[geometry_type] - if "mark" in file: - df_sverige = None - - for name, output_name in normalised_names.items(): - geo_object = geometry_object(file, "50", layer, name, field) - if geo_object.df is not None: - geo_object.process() - geo_object.save("tmp", output_name) - - if "mark" in file: - if df_sverige is None: - df_sverige = geo_object.df.dissolve().explode(index_parts=False) - else: - df_sverige = ( - pd.concat([df_sverige, geo_object.df]) - .dissolve() - .explode(index_parts=False) - ) + with Pool(WORKER_INNER) as pool: + all_geo = [ + (geometry_object(file, "50", layer, name, field), output_name) + for name, output_name in normalised_names.items() + ] + processed_geo_objects = pool.starmap(parallel_process, all_geo) if "mark" in file: - df_sverige["area_m2"] = df_sverige.area - df_sverige["length_m"] = df_sverige.length - df_sverige = df_sverige.df.to_crs(geo_object.config.epsg_4326) - df_sverige.to_file( - "tmp/mark_sverige/mark/00_sverige" + ".geojson", driver="GeoJSON" - ) + save_sweden_base(processed_geo_objects) print(f"Saved {file} - {layer}") -files = glob.glob("topografi_50/*.gpkg") +def run(): + """Run extraction.""" + files = glob.glob("topografi_50/*.gpkg") + + all_files = [] + for file in files: + available_layers = fiona.listlayers(file) + for layer in available_layers: + all_files.append((file, layer)) + + with Pool(WORKER_OUTER) as pool: + pool.starmap(extract_geojson, all_files) -all_files = [] -for file in files: - available_layers = fiona.listlayers(file) - for layer in available_layers: - all_files.append((file, layer)) -with Pool(WORKERS) as pool: - pool.starmap(extract_geojson, all_files) +if __name__ == "__main__": + run() diff --git a/src/lantmateriet/config.py b/src/lantmateriet/config.py index f0efd69..4418051 100644 --- a/src/lantmateriet/config.py +++ b/src/lantmateriet/config.py @@ -6,15 +6,6 @@ - 50: https://www.lantmateriet.se/globalassets/geodata/geodataprodukter/pb-topografi-50-nedladdning-vektor.pdf """ -from typing import TypeVar -import shapely - -from lantmateriet.line import Line -from lantmateriet.point import Point -from lantmateriet.polygon import Polygon - -Geometry = TypeVar("Geometry", Line, Polygon, Point) - class BaseConfig: """Base config class.""" @@ -28,15 +19,6 @@ class BaseConfig: border_county: str = "Länsgräns" border_municipality: str = "Kommungräns" - file_geometry_mapping: dict[str, Geometry] = { - shapely.Point: Point, - shapely.MultiPoint: Point, - shapely.MultiLineString: Line, - shapely.LineString: Line, - shapely.Polygon: Polygon, - shapely.MultiPolygon: Polygon, - } - def __getitem__(self, key): """Get item. diff --git a/src/lantmateriet/geometry.py b/src/lantmateriet/geometry.py index c5976a0..044e3c3 100644 --- a/src/lantmateriet/geometry.py +++ b/src/lantmateriet/geometry.py @@ -343,5 +343,5 @@ def _save(self, save_path: str, file: str) -> None: ) os.makedirs(folder_path, exist_ok=True) - self.df = self.df.to_crs(self.config.epsg_4326) - self.df.to_file(path.join(folder_path, file) + ".geojson", driver="GeoJSON") + df = self.df.to_crs(self.config.epsg_4326) + df.to_file(path.join(folder_path, file) + ".geojson", driver="GeoJSON") From 53277d2767ec7f6068d4dbc518e528028e67a660 Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Thu, 2 May 2024 16:47:22 +0200 Subject: [PATCH 03/25] =?UTF-8?q?Add=20simple=20Lantm=C3=A4teriet=20API=20?= =?UTF-8?q?client?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lantmateriet/api.py | 78 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 src/lantmateriet/api.py diff --git a/src/lantmateriet/api.py b/src/lantmateriet/api.py new file mode 100644 index 0000000..4b2439a --- /dev/null +++ b/src/lantmateriet/api.py @@ -0,0 +1,78 @@ +"""API module.""" + +import io +import json +import logging + +import requests + +STATUS_OK = 200 + +ORDER_URL = "https://api.lantmateriet.se" +DOWNLOAD_URL = "https://download-geotorget.lantmateriet.se" + +logger = logging.getLogger(__name__) + + +def get_request(url: str) -> requests.Response: + """Get request from url. + + Args: + url: url to request from + + Returns: + response + + Raises: + ValueError + requests.exceptions.HTTPError + """ + logger.debug(f"Fetching from {url}.") + + response = requests.get(url, timeout=200) + + if response.status_code != STATUS_OK: + raise requests.exceptions.HTTPError(f"Could not request from {url}.") + + logger.debug(f"Successful request from {url}.") + + return response + + +class Lantmateriet: + """Lantmäteriet class.""" + + def __init__(self, order_id: str): + """Initialise Lantmäteriet. + + Args: + order_id: order id to fetch data from + """ + order_url = ORDER_URL + f"/geotorget/orderhanterare/v2/{order_id}" + download_url = DOWNLOAD_URL + f"/download/{order_id}/files" + + self.order = json.loads(get_request(order_url).content) + download = json.loads(get_request(download_url).content) + self.download = {item["title"]: item for item in download} + + @property + def order_info(self) -> dict[str, str]: + """Get order information.""" + return self.order + + @property + def available_files(self) -> list[str]: + """Get available files.""" + return list(self.download.keys()) + + def download(self, title: str) -> io.BytesIO: + """Download file by title. + + Args: + title: title of file to download + + Returns: + bytes io + """ + url = self.download[title]["href"] + return io.BytesIO(get_request(url).content) From 94eec75f4569bd7c5b0f7a60114f8290f133fdef Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Thu, 2 May 2024 19:03:42 +0200 Subject: [PATCH 04/25] Update gitignore, api and extract files --- .gitignore | 11 +++++++++++ scripts/extract_geojson.py | 2 +- src/lantmateriet/api.py | 24 ++++++++++++++++-------- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index b1cb160..43f3266 100644 --- a/.gitignore +++ b/.gitignore @@ -159,3 +159,14 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ +# Other +*.laz +*.trj +.DS_Store +*.geojson +*.json +*.mbtiles +*.pmtiles +*.gpkg +*.tif +*.pdf \ No newline at end of file diff --git a/scripts/extract_geojson.py b/scripts/extract_geojson.py index 5923047..c982bce 100644 --- a/scripts/extract_geojson.py +++ b/scripts/extract_geojson.py @@ -1,4 +1,4 @@ -"""Extract GEOJson from GPKG files.""" +"""Extract GeoJSON from GPKG files.""" import glob import logging diff --git a/src/lantmateriet/api.py b/src/lantmateriet/api.py index 4b2439a..132379c 100644 --- a/src/lantmateriet/api.py +++ b/src/lantmateriet/api.py @@ -3,6 +3,8 @@ import io import json import logging +import zipfile +from typing import Optional import requests @@ -42,28 +44,30 @@ def get_request(url: str) -> requests.Response: class Lantmateriet: """Lantmäteriet class.""" - def __init__(self, order_id: str): + def __init__(self, order_id: str, save_path: Optional[str] = None): """Initialise Lantmäteriet. Args: order_id: order id to fetch data from + save_path: path to save downloaded files to """ order_url = ORDER_URL + f"/geotorget/orderhanterare/v2/{order_id}" download_url = DOWNLOAD_URL + f"/download/{order_id}/files" + self._save_path = save_path - self.order = json.loads(get_request(order_url).content) + self._order = json.loads(get_request(order_url).content) download = json.loads(get_request(download_url).content) - self.download = {item["title"]: item for item in download} + self._download = {item["title"]: item for item in download} @property - def order_info(self) -> dict[str, str]: + def order(self) -> dict[str, str]: """Get order information.""" - return self.order + return self._order @property def available_files(self) -> list[str]: """Get available files.""" - return list(self.download.keys()) + return list(self._download.keys()) def download(self, title: str) -> io.BytesIO: """Download file by title. @@ -74,5 +78,9 @@ def download(self, title: str) -> io.BytesIO: Returns: bytes io """ - url = self.download[title]["href"] - return io.BytesIO(get_request(url).content) + logger.info(f"Started downloading {title}") + url = self._download[title]["href"] + content = get_request(url).content + zip = zipfile.ZipFile(io.BytesIO(content)) + zip.extractall(self._save_path) + logger.info(f"Downloaded and unpacked {title} to {self._save_path}") From c087f717655058d98e85acc65a3dabd4282f7505 Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Sat, 4 May 2024 18:25:25 +0200 Subject: [PATCH 05/25] Add progress bar to api --- pyproject.toml | 8 +++++- src/lantmateriet/api.py | 56 ++++++++++++++++++++++++++++++----------- 2 files changed, 49 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1c8e57a..bc7c071 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,13 @@ authors = [ { name = "Mladen Gibanica", email = "11275336+mgcth@users.noreply.github.com" }, ] requires-python = ">=3.9" -dependencies = ["geopandas ~= 0.14", "pyogrio ~= 0.7", "pyarrow ~= 16.0", "unidecode ~= 1.3"] +dependencies = [ + "geopandas ~= 0.14", + "pyogrio ~= 0.7", + "pyarrow ~= 16.0", + "unidecode ~= 1.3" + "tqdm ~= 4.66", +] [project.optional-dependencies] lint = [ diff --git a/src/lantmateriet/api.py b/src/lantmateriet/api.py index 132379c..a1e1166 100644 --- a/src/lantmateriet/api.py +++ b/src/lantmateriet/api.py @@ -3,15 +3,19 @@ import io import json import logging +import os import zipfile from typing import Optional +from pathlib import Path +from tqdm import tqdm import requests STATUS_OK = 200 - +BLOCK_SIZE = 1024 ORDER_URL = "https://api.lantmateriet.se" DOWNLOAD_URL = "https://download-geotorget.lantmateriet.se" +TOKEN = os.environ["LANTMATERIET_API_TOKEN"] logger = logging.getLogger(__name__) @@ -31,7 +35,8 @@ def get_request(url: str) -> requests.Response: """ logger.debug(f"Fetching from {url}.") - response = requests.get(url, timeout=200) + headers = {"Authorization": f"Bearer {TOKEN}"} + response = requests.get(url, headers=headers, timeout=200, stream=True) if response.status_code != STATUS_OK: raise requests.exceptions.HTTPError(f"Could not request from {url}.") @@ -55,32 +60,55 @@ def __init__(self, order_id: str, save_path: Optional[str] = None): download_url = DOWNLOAD_URL + f"/download/{order_id}/files" self._save_path = save_path - self._order = json.loads(get_request(order_url).content) + Path(save_path).mkdir(exist_ok=True) + self._order_enpoint = json.loads(get_request(order_url).content) download = json.loads(get_request(download_url).content) - self._download = {item["title"]: item for item in download} + self._download_enpoint = {item["title"]: item for item in download} @property def order(self) -> dict[str, str]: """Get order information.""" - return self._order + return self._order_enpoint @property def available_files(self) -> list[str]: """Get available files.""" - return list(self._download.keys()) + return list(self._download_enpoint.keys()) - def download(self, title: str) -> io.BytesIO: + def download(self, title: str) -> None: """Download file by title. Args: title: title of file to download - - Returns: - bytes io """ logger.info(f"Started downloading {title}") - url = self._download[title]["href"] - content = get_request(url).content - zip = zipfile.ZipFile(io.BytesIO(content)) - zip.extractall(self._save_path) + + url = self._download_enpoint[title]["href"] + response = get_request(url) + buffer = self._download(response) + self._unzip(buffer) + logger.info(f"Downloaded and unpacked {title} to {self._save_path}") + + def _download(self, response: requests.Response) -> io.BytesIO: + """Download file from url.""" + file_size = int(response.headers.get("Content-Length", 0)) + buffer = io.BytesIO() + with tqdm.wrapattr( + response.raw, "read", total=file_size, desc="Downloading" + ) as r_raw: + while True: + chunk = buffer.write(r_raw.read(BLOCK_SIZE)) + if not chunk: + break + + return buffer + + def _unzip(self, response: io.BytesIO): + """Extract zip and save to disk.""" + with zipfile.ZipFile(response) as zip: + for member in tqdm(zip.infolist(), desc="Extracting"): + try: + zip.extract(member, self._save_path) + except zipfile.error: + logger.error("Can't unzip {member}.") From 741b187c71e3b8df47869b5f85d59b84ff5066bf Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Sat, 4 May 2024 20:42:46 +0200 Subject: [PATCH 06/25] Add CLI to download and extract files --- pyproject.toml | 6 +- src/lantmateriet/api.py | 27 ++++++--- src/lantmateriet/cli.py | 37 ++++++++++++ .../lantmateriet/extract.py | 60 ++++--------------- src/lantmateriet/utils.py | 38 ++++++++++++ 5 files changed, 113 insertions(+), 55 deletions(-) create mode 100644 src/lantmateriet/cli.py rename scripts/extract_geojson.py => src/lantmateriet/extract.py (66%) diff --git a/pyproject.toml b/pyproject.toml index bc7c071..9756c43 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,8 +16,9 @@ dependencies = [ "geopandas ~= 0.14", "pyogrio ~= 0.7", "pyarrow ~= 16.0", - "unidecode ~= 1.3" + "unidecode ~= 1.3", "tqdm ~= 4.66", + "typer ~= 0.12", ] [project.optional-dependencies] @@ -41,6 +42,9 @@ dev = [ "ipykernel ~= 6.26", ] +[project.scripts] +ifk-lantmateriet = "lantmateriet.cli:app" + [tool.setuptools.packages.find] where = ["src"] exclude = ["material"] diff --git a/src/lantmateriet/api.py b/src/lantmateriet/api.py index a1e1166..b94f916 100644 --- a/src/lantmateriet/api.py +++ b/src/lantmateriet/api.py @@ -5,11 +5,11 @@ import logging import os import zipfile -from typing import Optional from pathlib import Path -from tqdm import tqdm +from typing import Optional import requests +from tqdm import tqdm STATUS_OK = 200 BLOCK_SIZE = 1024 @@ -86,12 +86,21 @@ def download(self, title: str) -> None: url = self._download_enpoint[title]["href"] response = get_request(url) buffer = self._download(response) - self._unzip(buffer) + + if zipfile.is_zipfile(buffer) is True: + self._unzip(buffer) logger.info(f"Downloaded and unpacked {title} to {self._save_path}") def _download(self, response: requests.Response) -> io.BytesIO: - """Download file from url.""" + """Download file from url. + + Args: + response: requests response object + + Returns: + bytesio buffer + """ file_size = int(response.headers.get("Content-Length", 0)) buffer = io.BytesIO() with tqdm.wrapattr( @@ -104,9 +113,13 @@ def _download(self, response: requests.Response) -> io.BytesIO: return buffer - def _unzip(self, response: io.BytesIO): - """Extract zip and save to disk.""" - with zipfile.ZipFile(response) as zip: + def _unzip(self, buffer: io.BytesIO): + """Extract zip and save to disk. + + Args: + buffer: buffer of downloaded content + """ + with zipfile.ZipFile(buffer) as zip: for member in tqdm(zip.infolist(), desc="Extracting"): try: zip.extract(member, self._save_path) diff --git a/src/lantmateriet/cli.py b/src/lantmateriet/cli.py new file mode 100644 index 0000000..686e945 --- /dev/null +++ b/src/lantmateriet/cli.py @@ -0,0 +1,37 @@ +"""CLI module.""" + +import typer +from lantmateriet.api import Lantmateriet +from lantmateriet.extract import extract +from tqdm import tqdm + +app = typer.Typer() + + +@app.callback() +def callback(): + """Lantmäteriet CLI client.""" + + +@app.command() +def download_all(order_id: str, save_path: str): + """Download files. + + Args: + order_id: lantmäteriet order id + save_path: path to save files to + """ + client = Lantmateriet(order_id, save_path) + all_files = client.available_files + for file in tqdm(all_files): + client.download(file) + + +@app.command() +def extract_all(path: str): + """Extract geojson from gpkg files. + + Args: + path: path to search for files + """ + extract(path) diff --git a/scripts/extract_geojson.py b/src/lantmateriet/extract.py similarity index 66% rename from scripts/extract_geojson.py rename to src/lantmateriet/extract.py index c982bce..082d3fd 100644 --- a/scripts/extract_geojson.py +++ b/src/lantmateriet/extract.py @@ -2,18 +2,18 @@ import glob import logging +from pathlib import Path from typing import TypeVar import fiona -import geopandas as gpd import pandas as pd import shapely from lantmateriet.config import Config50, config_50 from lantmateriet.line import Line from lantmateriet.point import Point from lantmateriet.polygon import Polygon +from lantmateriet.utils import normalise_item_names, read_first_entry, read_unique_names from ray.util.multiprocessing import Pool -from unidecode import unidecode Geometry = TypeVar("Geometry", Line, Polygon, Point) @@ -33,41 +33,6 @@ config = Config50() -def read_unique_names(file: str, layer: str, field: str) -> list[str]: - """Read unique names from specified field in file.""" - return sorted( - list( - set( - gpd.read_file( - file, - use_arrow=True, - include_fields=[field], - ignore_geometry=True, - layer=layer, - )[field] - ) - ) - ) - - -def read_first_entry(file: str, layer: str) -> gpd.GeoDataFrame: - """Read info from file.""" - return gpd.read_file(file, use_arrow=True, layer=layer, rows=1) - - -def normalise_item_names(item_names: list[str]) -> dict[str, str]: - """Normalise item names to save format.""" - return { - x: "{:02d}_".format(i + 1) - + unidecode(x.lower()) - .replace(" ", "_") - .replace("-", "") - .replace(",", "") - .replace("/", "_") - for i, x in enumerate(item_names) - } - - def save_sweden_base(processed_geo_objects): """Save sweden base from all dissolved ground.""" df_sverige = ( @@ -87,7 +52,7 @@ def parallel_process(geo_object, output_name): """Parallel process.""" if geo_object.df is not None: geo_object.process() - geo_object.save("tmp", output_name) + geo_object.save("tmp2", output_name) if "mark" in geo_object._file_path: return geo_object.df.dissolve().explode(index_parts=False) @@ -97,7 +62,7 @@ def parallel_process(geo_object, output_name): def extract_geojson(file: str, layer: str): """Extract and save geojson files.""" - print(f"Working on {file} - {layer}") + logger.info(f"Working on {file} - {layer}") field = "objekttyp" if "text" in file or "text" in layer: @@ -118,12 +83,17 @@ def extract_geojson(file: str, layer: str): if "mark" in file: save_sweden_base(processed_geo_objects) - print(f"Saved {file} - {layer}") + logger.info(f"Saved {file} - {layer}") + +def extract(path: str): + """Run extraction of gkpg to geojson. -def run(): - """Run extraction.""" - files = glob.glob("topografi_50/*.gpkg") + Args: + path: path to search for gkpg files + """ + file_pattern = str(Path(path) / "*.gpkg") + files = glob.glob(file_pattern) all_files = [] for file in files: @@ -133,7 +103,3 @@ def run(): with Pool(WORKER_OUTER) as pool: pool.starmap(extract_geojson, all_files) - - -if __name__ == "__main__": - run() diff --git a/src/lantmateriet/utils.py b/src/lantmateriet/utils.py index 0338aa1..6e2ce0f 100644 --- a/src/lantmateriet/utils.py +++ b/src/lantmateriet/utils.py @@ -5,6 +5,9 @@ from functools import wraps from typing import Callable +import geopandas as gpd +from unidecode import unidecode + logger = logging.getLogger(__name__) @@ -34,3 +37,38 @@ def wrap(*args, **kw): return wrap return timeit_decorator + + +def read_unique_names(file: str, layer: str, field: str) -> list[str]: + """Read unique names from specified field in file.""" + return sorted( + list( + set( + gpd.read_file( + file, + use_arrow=True, + include_fields=[field], + ignore_geometry=True, + layer=layer, + )[field] + ) + ) + ) + + +def read_first_entry(file: str, layer: str) -> gpd.GeoDataFrame: + """Read info from file.""" + return gpd.read_file(file, use_arrow=True, layer=layer, rows=1) + + +def normalise_item_names(item_names: list[str]) -> dict[str, str]: + """Normalise item names to save format.""" + return { + x: "{:02d}_".format(i + 1) + + unidecode(x.lower()) + .replace(" ", "_") + .replace("-", "") + .replace(",", "") + .replace("/", "_") + for i, x in enumerate(item_names) + } From 025444a7ca6bc384f194d5ea27f1d805cba962d7 Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Sat, 4 May 2024 21:01:14 +0200 Subject: [PATCH 07/25] Fix typing --- src/lantmateriet/api.py | 2 +- src/lantmateriet/extract.py | 7 +++---- src/lantmateriet/geometry.py | 20 +++++++++----------- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/src/lantmateriet/api.py b/src/lantmateriet/api.py index b94f916..bc8bb7b 100644 --- a/src/lantmateriet/api.py +++ b/src/lantmateriet/api.py @@ -49,7 +49,7 @@ def get_request(url: str) -> requests.Response: class Lantmateriet: """Lantmäteriet class.""" - def __init__(self, order_id: str, save_path: Optional[str] = None): + def __init__(self, order_id: str, save_path: str): """Initialise Lantmäteriet. Args: diff --git a/src/lantmateriet/extract.py b/src/lantmateriet/extract.py index 082d3fd..f6868d2 100644 --- a/src/lantmateriet/extract.py +++ b/src/lantmateriet/extract.py @@ -3,7 +3,7 @@ import glob import logging from pathlib import Path -from typing import TypeVar +from typing import Union import fiona import pandas as pd @@ -15,9 +15,7 @@ from lantmateriet.utils import normalise_item_names, read_first_entry, read_unique_names from ray.util.multiprocessing import Pool -Geometry = TypeVar("Geometry", Line, Polygon, Point) - -file_geometry_mapping: dict[str, Geometry] = { +file_geometry_mapping: dict[shapely.Geometry, Union[Line, Polygon, Point]] = { shapely.Point: Point, shapely.MultiPoint: Point, shapely.MultiLineString: Line, @@ -77,6 +75,7 @@ def extract_geojson(file: str, layer: str): all_geo = [ (geometry_object(file, "50", layer, name, field), output_name) for name, output_name in normalised_names.items() + if name not in config_50.exclude ] processed_geo_objects = pool.starmap(parallel_process, all_geo) diff --git a/src/lantmateriet/geometry.py b/src/lantmateriet/geometry.py index 044e3c3..d7583d3 100644 --- a/src/lantmateriet/geometry.py +++ b/src/lantmateriet/geometry.py @@ -3,7 +3,7 @@ import os from copy import deepcopy from os import path -from typing import Union +from typing import Optional, Union import geopandas as gpd from lantmateriet import config @@ -239,16 +239,14 @@ def __init__( self._layer = layer self._name = name self._field = field - self.df = None - - if name not in self.config.exclude: - self.df = gpd.read_file( - file_path, - layer=layer, - where=f"{field}='{name}'", - engine="pyogrio", - use_arrow=True, - ) + + self.df = gpd.read_file( + file_path, + layer=layer, + where=f"{field}='{name}'", + engine="pyogrio", + use_arrow=True, + ) @staticmethod def _set_area(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame: From dd2636ec1fc7578cc5fbf3b457d1142632135331 Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Sat, 4 May 2024 21:44:14 +0200 Subject: [PATCH 08/25] Fix types and lint --- .github/workflows/github-action-type.yaml | 2 +- src/lantmateriet/api.py | 1 - src/lantmateriet/geometry.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/github-action-type.yaml b/.github/workflows/github-action-type.yaml index afb503d..f73041b 100644 --- a/.github/workflows/github-action-type.yaml +++ b/.github/workflows/github-action-type.yaml @@ -19,9 +19,9 @@ jobs: pip install -e ".[type]" - name: Type checking with mypy run: > - MYPYPATH=src mypy --namespace-packages --explicit-package-bases --allow-redefinition + --ignore-missing-imports src diff --git a/src/lantmateriet/api.py b/src/lantmateriet/api.py index bc8bb7b..8c2392a 100644 --- a/src/lantmateriet/api.py +++ b/src/lantmateriet/api.py @@ -6,7 +6,6 @@ import os import zipfile from pathlib import Path -from typing import Optional import requests from tqdm import tqdm diff --git a/src/lantmateriet/geometry.py b/src/lantmateriet/geometry.py index d7583d3..47313cd 100644 --- a/src/lantmateriet/geometry.py +++ b/src/lantmateriet/geometry.py @@ -3,7 +3,7 @@ import os from copy import deepcopy from os import path -from typing import Optional, Union +from typing import Union import geopandas as gpd from lantmateriet import config From 73ff2b8445bc104ac2cb57b3e9efa1e5247607de Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Sat, 4 May 2024 22:49:39 +0200 Subject: [PATCH 09/25] Rename tests and update geometry init --- ...munication.py => test_integration_line.py} | 12 +- ...struction.py => test_integration_point.py} | 12 +- tests/unit/test_unit_geometry.py | 485 ++++-------------- ...nit_communication.py => test_unit_line.py} | 34 +- ...nit_construction.py => test_unit_point.py} | 36 +- ...st_unit_ground.py => test_unit_polygon.py} | 38 +- 6 files changed, 178 insertions(+), 439 deletions(-) rename tests/integration/{test_integration_communication.py => test_integration_line.py} (77%) rename tests/integration/{test_integration_construction.py => test_integration_point.py} (77%) rename tests/unit/{test_unit_communication.py => test_unit_line.py} (73%) rename tests/unit/{test_unit_construction.py => test_unit_point.py} (72%) rename tests/unit/{test_unit_ground.py => test_unit_polygon.py} (76%) diff --git a/tests/integration/test_integration_communication.py b/tests/integration/test_integration_line.py similarity index 77% rename from tests/integration/test_integration_communication.py rename to tests/integration/test_integration_line.py index 4c00092..d8c1b8e 100644 --- a/tests/integration/test_integration_communication.py +++ b/tests/integration/test_integration_line.py @@ -1,9 +1,9 @@ -"""Communication integration tests.""" +"""Line integration tests.""" import geopandas as gpd import pandas as pd from geopandas import testing -from lantmateriet.line import Communication +from lantmateriet.line import Line test_vaglinje_geojson = gpd.read_file( "tests/fixtures/test_integration_communication_vaglinje.geojson", @@ -23,12 +23,12 @@ ) -class TestIntegrationCommunication: - """Integration test of Communication.""" +class TestIntegrationLine: + """Integration test of Line.""" def test_integration_get_buiding_items(self): - """Integration test of Communication process.""" - communication = Communication( + """Integration test of Line process.""" + communication = Line( "tests/fixtures/test_integration_communication_vaglinje.gpkg", "50", "vaglinje", diff --git a/tests/integration/test_integration_construction.py b/tests/integration/test_integration_point.py similarity index 77% rename from tests/integration/test_integration_construction.py rename to tests/integration/test_integration_point.py index 427a00a..ab581b0 100644 --- a/tests/integration/test_integration_construction.py +++ b/tests/integration/test_integration_point.py @@ -1,9 +1,9 @@ -"""Construction integration tests.""" +"""Point integration tests.""" import geopandas as gpd import pandas as pd from geopandas import testing -from lantmateriet.construction import Construction +from lantmateriet.point import Point test_byggnad_geojson = gpd.read_file( "tests/fixtures/test_integration_construction_byggnad.geojson", @@ -23,12 +23,12 @@ ) -class TestIntegrationConstruction: - """Integration test of Construction.""" +class TestIntegrationPoint: + """Integration test of Point.""" def test_integration_get_buiding_items(self): - """Integration test of Construction process.""" - construction = Construction( + """Integration test of Point process.""" + construction = Point( "tests/fixtures/test_integration_construction_byggnad.gpkg", "50", "byggnad", diff --git a/tests/unit/test_unit_geometry.py b/tests/unit/test_unit_geometry.py index b73db17..3e6680a 100644 --- a/tests/unit/test_unit_geometry.py +++ b/tests/unit/test_unit_geometry.py @@ -1,7 +1,6 @@ """Geometry unit tests.""" -from copy import deepcopy -from unittest.mock import call, patch +from unittest.mock import call, patch, MagicMock import geopandas as gpd import numpy as np @@ -9,7 +8,6 @@ from geopandas import testing from lantmateriet import config from lantmateriet.geometry import DissolveTouchingGeometry, Geometry -from lantmateriet.utils import smap from shapely.geometry import Point, Polygon @@ -602,68 +600,37 @@ class TestUnitGeometry: """Unit tests of Geometry.""" @pytest.mark.parametrize( - "file_name, detail_level, layer, use_arrow, expected_result", + "file_name, detail_level, layer, name, field, expected_result", [ - ( - "path", - "50", - "mark", - True, - config.config_50, - ), - ( - "path", - "1m", - "mark", - True, - config.config_1m, - ), - ( - "path", - "50", - "mark", - False, - config.config_50, - ), - ( - "path", - "1", - "mark", - True, - None, - ), + ("path", "50", "mark", "name", "field", config.config_50), + ("path", "1m", "mark", "name", "field", config.config_1m), + ("path", "50", "mark", "name", "field", config.config_50), + ("path", "1", "mark", "name", "field", None), ], ) @patch("lantmateriet.geometry.gpd.read_file") def test_unit_init( - self, mock_read_file, file_name, detail_level, layer, use_arrow, expected_result + self, + mock_read_file, + file_name, + detail_level, + layer, + name, + field, + expected_result, ): - """Unit test of Geometry __init__ method. - - Args: - mock_read_file: mock of read_file - file_name: file_name - detail_level: detail_level - layer: layer - use_arrow: use_arrow - expected_result: expected result - """ + """Unit test of Geometry __init__ method.""" if detail_level not in {"50", "1m"}: with pytest.raises(NotImplementedError): - _ = Geometry( - file_name, - detail_level=detail_level, - layer=layer, - use_arrow=use_arrow, - ) + _ = Geometry(file_name, detail_level, layer, name, field) else: - geometry = Geometry( - file_name, detail_level=detail_level, layer=layer, use_arrow=use_arrow - ) - mock_read_file.assert_called_once_with( - file_name, layer=layer, use_arrow=use_arrow - ) + geometry = Geometry(file_name, detail_level, layer, name, field) + # mock_read_file.assert_called_once_with(file_name, layer) assert geometry.config == expected_result + assert geometry._file_path == file_name + assert geometry._layer == layer + assert geometry._name == name + assert geometry._field == field @pytest.mark.parametrize( "input_df", @@ -680,11 +647,7 @@ def test_unit_init( ], ) def test_unit_set_area(self, input_df): - """Unit test of Geometry _set_area method. - - Args: - input_df: input_df - """ + """Unit test of Geometry _set_area method.""" result = Geometry._set_area(input_df) assert "area_m2" in result @@ -692,38 +655,24 @@ def test_unit_set_area(self, input_df): "input_df", [ gpd.GeoDataFrame( - { - "geometry": [ - Polygon( - [(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)], - ), - ] - }, + {"geometry": [Polygon([(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)])]} ), ], ) def test_unit_set_length(self, input_df): - """Unit test of Geometry _set_length method. - - Args: - input_df: input_df - """ + """Unit test of Geometry _set_length method.""" result = Geometry._set_length(input_df) assert "length_m" in result @patch("lantmateriet.geometry.DissolveTouchingGeometry") def test_unit_dissolve(self, mock_DissolveTouchingGeometry): - """Unit test of Geometry _dissolve method. + """Unit test of Geometry _dissolve method.""" + df = gpd.GeoDataFrame() + result = Geometry._dissolve(df) - Args: - mock_DissolveTouchingGeometry: mock of mock_DissolveTouchingGeometry - """ - object_name, df = "object_name", gpd.GeoDataFrame() - result = Geometry._dissolve(object_name, df) - assert object_name == result[0] assert ( mock_DissolveTouchingGeometry.return_value.dissolve_and_explode.return_value - == result[1] + == result ) mock_DissolveTouchingGeometry.assert_called_with(df) mock_DissolveTouchingGeometry.return_value.dissolve_and_explode.assert_called() @@ -735,356 +684,146 @@ def test_unit_dissolve_exterior(self, mock_DissolveTouchingGeometry): Args: mock_DissolveTouchingGeometry: mock of mock_DissolveTouchingGeometry """ - object_name, df = "object_name", gpd.GeoDataFrame() - result = Geometry._dissolve_exterior(object_name, df) - assert object_name == result[0] + df = gpd.GeoDataFrame() + result = Geometry._dissolve_exterior(df) + assert ( mock_DissolveTouchingGeometry.return_value.dissolve_and_explode_exterior.return_value - == result[1] + == result ) mock_DissolveTouchingGeometry.assert_called_with(df) mock_DissolveTouchingGeometry.return_value.dissolve_and_explode_exterior.assert_called() @pytest.mark.parametrize( - "df, item_type, layer, config_ground, expected_result", + "name, dissolve, set_area, set_length, input_geometry, dissolved_geometry", [ ( - gpd.GeoDataFrame({"objekttyp": ["Hav", "Sjö"]}), - "ground", - "mark", - {"mark": {"Hav": "hav", "Sjö": "sjö"}}, - [("Sjö", gpd.GeoDataFrame({"objekttyp": ["Sjö"]}, index=[1]))], - ) - ], - ) - @patch("lantmateriet.geometry.Geometry.__init__", return_value=None) - def test_unit_get_items( - self, mock_geometry_init, df, item_type, layer, config_ground, expected_result - ): - """Unit test of Geometry _get_items method. - - Args: - mock_geometry_init: mock of Geometry init - df: test dataframe - item_type: item type - layer: layer - config_ground: test config ground - expected_result: expected result - """ - test_config = deepcopy(config.config_50) - test_config.ground = config_ground - geometry = Geometry("path") - geometry.df = df - geometry.config = test_config - - geometry_items = geometry._get_items(item_type, layer) - - assert all([x[0] == y[0] for x, y in zip(geometry_items, expected_result)]) - for (_, x), (_, y) in zip(geometry_items, expected_result): - assert all(x.objekttyp == y.objekttyp) - - @pytest.mark.parametrize( - "input, expected_result", - [ - ( - [("Sjö", 1), ("Barr- och blandskog", 2)], - [ - (Geometry._dissolve, "Sjö", 1), - (Geometry._dissolve_exterior, "Barr- och blandskog", 2), - ], - ) - ], - ) - @patch("lantmateriet.geometry.Geometry.__init__", return_value=None) - def test_unit_prepare_parallel_list(self, mock_ground_init, input, expected_result): - """Unit test of Geometry _prepare_parallel_list method. - - Args: - mock_ground_init: mock of Ground init - input: input - expected_result: expected result - """ - geometry = Geometry("path") - geometry.config = config.config_50 - - geometry_items = geometry._prepare_parallel_list(input) - - for x, y in zip(geometry_items, expected_result): - assert x[0] == y[0] - assert x[1] == y[1] - assert x[2] == y[2] - - @patch("lantmateriet.geometry.Pool") - @patch("lantmateriet.geometry.Geometry._prepare_parallel_list") - @patch("lantmateriet.ground.Geometry.__init__", return_value=None) - def test_unit_disolve_parallel( - self, mock_geometry_init, mock_prepare_list, mock_pool - ): - """Unit test of Geometry _dissolve_parallel method. - - Args: - mock_geometry_init: mock of Geometry init - mock_prepare_list: mock of Geometry _prepare_parallel_list - mock_pool: mock of Pool - """ - input_list = [] - geometry = Geometry("path") - dissolved_geometry = geometry._dissolve_parallel(input_list) - mock_prepare_list.assert_called_once_with(input_list) - mock_pool.return_value.__enter__.return_value.starmap.assert_called_once_with( - smap, mock_prepare_list.return_value - ) - - assert ( - dissolved_geometry - == mock_pool.return_value.__enter__.return_value.starmap.return_value - ) - - @pytest.mark.parametrize( - "item_type, layer, dissolve, set_area, set_length, key, input_geometry, dissolved_geometry", - [ + "Barr- och blandskog", + True, + False, + False, + gpd.GeoDataFrame({"geometry": [Polygon([(0, 0), (1, 1), (1, 0)])]}), + gpd.GeoDataFrame({"geometry": [Polygon([(0, 0), (1, 1), (1, 0)])]}), + ), ( - "ground", - "mark", + "name", True, False, False, - "Sjö", - [ - ( - "Sjö", - gpd.GeoDataFrame( - { - "geometry": [Polygon([(0, 0), (1, 1), (1, 0)])], - } - ), - ) - ], - [ - ( - "Sjö", - gpd.GeoDataFrame( - { - "geometry": [Polygon([(0, 0), (1, 1), (1, 0)])], - } - ), - ) - ], + gpd.GeoDataFrame({"geometry": [Polygon([(0, 0), (1, 1), (1, 0)])]}), + gpd.GeoDataFrame({"geometry": [Polygon([(0, 0), (1, 1), (1, 0)])]}), ), ( - "ground", - "mark", + "name", True, True, False, - "Sjö", - [ - ( - "Sjö", - gpd.GeoDataFrame( - { - "geometry": [Polygon([(0, 0), (1, 1), (1, 0)])], - } - ), - ) - ], - [ - ( - "Sjö", - gpd.GeoDataFrame( - { - "geometry": [Polygon([(0, 0), (1, 1), (1, 0)])], - "area_m2": 0.5, - } - ), - ) - ], + gpd.GeoDataFrame({"geometry": [Polygon([(0, 0), (1, 1), (1, 0)])]}), + gpd.GeoDataFrame( + {"geometry": [Polygon([(0, 0), (1, 1), (1, 0)])], "area_m2": 0.5} + ), ), ( - "ground", - "mark", + "name", True, False, True, - "Sjö", - [ - ( - "Sjö", - gpd.GeoDataFrame( - { - "geometry": [Polygon([(0, 0), (1, 1), (1, 0)])], - } - ), - ) - ], - [ - ( - "Sjö", - gpd.GeoDataFrame( - { - "geometry": [Polygon([(0, 0), (1, 1), (1, 0)])], - "length_m": 2 + np.sqrt(2), - } - ), - ) - ], + gpd.GeoDataFrame({"geometry": [Polygon([(0, 0), (1, 1), (1, 0)])]}), + gpd.GeoDataFrame( + { + "geometry": [Polygon([(0, 0), (1, 1), (1, 0)])], + "length_m": 2 + np.sqrt(2), + } + ), ), ( - "ground", - "mark", + "name", True, True, True, - "Sjö", - [ - ( - "Sjö", - gpd.GeoDataFrame( - { - "geometry": [Polygon([(0, 0), (1, 1), (1, 0)])], - } - ), - ) - ], - [ - ( - "Sjö", - gpd.GeoDataFrame( - { - "geometry": [Polygon([(0, 0), (1, 1), (1, 0)])], - "area_m2": 0.5, - "length_m": 2 + np.sqrt(2), - } - ), - ) - ], + gpd.GeoDataFrame({"geometry": [Polygon([(0, 0), (1, 1), (1, 0)])]}), + gpd.GeoDataFrame( + { + "geometry": [Polygon([(0, 0), (1, 1), (1, 0)])], + "area_m2": 0.5, + "length_m": 2 + np.sqrt(2), + } + ), ), ( - "ground", - "mark", + "name", False, True, True, - "Sjö", - [ - ( - "Sjö", - gpd.GeoDataFrame( - { - "geometry": [Polygon([(0, 0), (1, 1), (1, 0)])], - } - ), - ) - ], - [ - ( - "Sjö", - gpd.GeoDataFrame( - { - "geometry": [Polygon([(0, 0), (1, 1), (1, 0)])], - "area_m2": 0.5, - "length_m": 2 + np.sqrt(2), - } - ), - ) - ], + gpd.GeoDataFrame({"geometry": [Polygon([(0, 0), (1, 1), (1, 0)])]}), + gpd.GeoDataFrame( + { + "geometry": [Polygon([(0, 0), (1, 1), (1, 0)])], + "area_m2": 0.5, + "length_m": 2 + np.sqrt(2), + } + ), ), ], ) - @patch("lantmateriet.geometry.Geometry._get_items") - @patch("lantmateriet.geometry.Geometry._dissolve_parallel") - @patch("lantmateriet.geometry.Geometry.__init__", return_value=None) + @patch("lantmateriet.geometry.gpd.read_file") + @patch("lantmateriet.geometry.Geometry._dissolve_exterior") + @patch("lantmateriet.geometry.Geometry._dissolve") def test_unit_process( self, - mock_geometry_init, - mock_dissolve_parallel, - mock_get_items, - item_type, - layer, + mock_dissolve, + mock_dissolve_exterior, + mock_df_read_file, + name, dissolve, set_area, set_length, - key, input_geometry, dissolved_geometry, ): - """Unit test of Geometry _process method. - - Args: - mock_geometry_init: mock of Geometry init - mock_dissolve_parallel: mock of Geometry _dissolve_parallel - mock_get_items: mock of Geometry _get_items - item_type: item type - layer: layer - dissolve: dissolve flag - set_area: set area flag - set_length: set length flag - key: key - input_geometry: input geometry - dissolved_geometry: dissolved geometry - """ + """Unit test of Geometry _process method.""" if dissolve: - mock_dissolve_parallel.return_value = input_geometry - else: - mock_get_items.return_value = input_geometry - geometry = Geometry("path") - geometry.df = gpd.GeoDataFrame() + if name == "name": + mock_dissolve.return_value = input_geometry + else: + mock_dissolve_exterior.return_value = input_geometry - result = geometry._process(item_type, layer, dissolve, set_area, set_length) + geometry = Geometry("path", "50", "layer", name, "field") + geometry.df = input_geometry - mock_get_items.assert_called_once_with(item_type, layer) - if dissolve: - mock_dissolve_parallel.assert_called_once() - assert set(result.keys()) == set([x[0] for x in dissolved_geometry]) - testing.assert_geodataframe_equal(result[key], dissolved_geometry[0][1]) + geometry._process(dissolve, set_area, set_length) + + testing.assert_geodataframe_equal(geometry.df, dissolved_geometry) if set_area is True: - assert "area_m2" in result[key] + assert "area_m2" in geometry.df if set_area is False: - assert "area_m2" not in result[key] + assert "area_m2" not in geometry.df if set_length is True: - assert "length_m" in result[key] + assert "length_m" in geometry.df if set_length is False: - assert "length_m" not in result[key] + assert "length_m" not in geometry.df - @patch.object(gpd.GeoDataFrame, "to_file") - @patch("lantmateriet.geometry.Geometry.__init__", return_value=None) - def test_unit_save(self, mock_geometry_init, mock_to_file): - """Unit test of Geometry _save method. - - Args: - mock_geometry_init: mock of Geometry init - mock_to_file: mock of GeoDataFrame to_file - """ - geometry = Geometry("path") - geometry.df = gpd.GeoDataFrame({"objekttyp": ["objekttyp"]}) - geometry.config = config.config_50 - - item_type = "ground" - layer = "mark" - all_geometry = { - k: gpd.GeoDataFrame( - {"objekttyp": ["objekttyp"]}, - geometry=[Polygon([(0, 0), (1, 1), (1, 0)])], - crs=config.config_50.espg_3006, - ) - for k in config.config_50[item_type][layer].keys() - if k not in config.config_50.exclude - } + @patch("lantmateriet.geometry.os.makedirs") + @patch("lantmateriet.geometry.gpd.read_file") + def test_unit_save(self, mock_df_read_file, mock_makedirs): + """Unit test of Geometry _save method.""" + geometry = Geometry("path", "50", "layer", "name", "field") + geometry.df = MagicMock() - geometry._save(item_type, layer, all_geometry, "path_to_save") + path = "path_to_save" + file_name = "file" + geometry._save(path, file_name) - mock_to_file.assert_has_calls( + mock_makedirs.assert_called_once() + geometry.df.to_crs.assert_has_calls( [ - call( - f"path_to_save/{file_name}", - driver="GeoJSON", - ) - for k, file_name in config.config_50.ground[layer].items() - if k not in config.config_50.exclude - ], - any_order=True, + call("EPSG:4326"), + call().to_file( + "path_to_save/path/layer/file.geojson", driver="GeoJSON" + ), + ] ) diff --git a/tests/unit/test_unit_communication.py b/tests/unit/test_unit_line.py similarity index 73% rename from tests/unit/test_unit_communication.py rename to tests/unit/test_unit_line.py index 0e77b9e..c127ca9 100644 --- a/tests/unit/test_unit_communication.py +++ b/tests/unit/test_unit_line.py @@ -1,15 +1,15 @@ -"""Communication unit tests.""" +"""Line unit tests.""" from unittest.mock import patch import geopandas as gpd import pytest from lantmateriet import config -from lantmateriet.line import Communication +from lantmateriet.line import Line -class TestUnitCommunication: - """Unit tests of Communication.""" +class TestUnitLine: + """Unit tests of Line.""" @pytest.mark.parametrize( "file_name, detail_level, layer, use_arrow, df, expected_result", @@ -57,7 +57,7 @@ def test_unit_communication_init( df, expected_result, ): - """Unit test of Communication __init__ method. + """Unit test of Line __init__ method. Args; mock_gpd_read_file: mock of gpd read_file @@ -71,26 +71,26 @@ def test_unit_communication_init( mock_gpd_read_file.return_value = df if expected_result is None: with pytest.raises(KeyError): - communication = Communication(file_name, detail_level, layer, use_arrow) + communication = Line(file_name, detail_level, layer, use_arrow) else: - communication = Communication(file_name, detail_level, layer, use_arrow) + communication = Line(file_name, detail_level, layer, use_arrow) mock_gpd_read_file.assert_called_with( file_name, layer=layer, use_arrow=use_arrow ) assert communication.config == expected_result - @patch("lantmateriet.communication.Communication._process") - @patch("lantmateriet.communication.Communication.__init__", return_value=None) + @patch("lantmateriet.communication.Line._process") + @patch("lantmateriet.communication.Line.__init__", return_value=None) def test_unit_communication_process( self, mock_communication_init, mock_communication_process ): """Unit test of communication process method. Args: - mock_communication_init: mock of Communication __init__ - mock_communication_process: mock of Communication _process + mock_communication_init: mock of Line __init__ + mock_communication_process: mock of Line _process """ - communication = Communication("path") + communication = Line("path") communication.item_type = "communication" communication.layer = "vaglinje" communication.dissolve = False @@ -100,18 +100,18 @@ def test_unit_communication_process( "communication", "vaglinje", False, False, True ) - @patch("lantmateriet.communication.Communication._save") - @patch("lantmateriet.communication.Communication.__init__", return_value=None) + @patch("lantmateriet.communication.Line._save") + @patch("lantmateriet.communication.Line.__init__", return_value=None) def test_unit_communication_save( self, mock_communication_init, mock_communication_save ): """Unit test of communication save method. Args: - mock_communication_init: mock of Communication __init__ - mock_communication_save: mock of Communication _save + mock_communication_init: mock of Line __init__ + mock_communication_save: mock of Line _save """ - communication = Communication("path") + communication = Line("path") communication.item_type = "communication" communication.layer = "vaglinje" diff --git a/tests/unit/test_unit_construction.py b/tests/unit/test_unit_point.py similarity index 72% rename from tests/unit/test_unit_construction.py rename to tests/unit/test_unit_point.py index 23b20c1..9218893 100644 --- a/tests/unit/test_unit_construction.py +++ b/tests/unit/test_unit_point.py @@ -1,15 +1,15 @@ -"""Construction unit tests.""" +"""Point unit tests.""" from unittest.mock import patch import geopandas as gpd import pytest from lantmateriet import config -from lantmateriet.construction import Construction +from lantmateriet.point import Point -class TestUnitConstruction: - """Unit tests of Construction.""" +class TestUnitPoint: + """Unit tests of Point.""" @pytest.mark.parametrize( "file_name, detail_level, layer, use_arrow, df, expected_result", @@ -57,7 +57,7 @@ def test_unit_construction_init( df, expected_result, ): - """Unit test of Construction __init__ method. + """Unit test of Point __init__ method. Args; mock_gpd_read_file: mock of gpd read_file @@ -71,26 +71,26 @@ def test_unit_construction_init( mock_gpd_read_file.return_value = df if expected_result is None: with pytest.raises(KeyError): - construction = Construction(file_name, detail_level, layer, use_arrow) + construction = Point(file_name, detail_level, layer, use_arrow) else: - construction = Construction(file_name, detail_level, layer, use_arrow) + construction = Point(file_name, detail_level, layer, use_arrow) mock_gpd_read_file.assert_called_with( file_name, layer=layer, use_arrow=use_arrow ) assert construction.config == expected_result - @patch("lantmateriet.construction.Construction._process") - @patch("lantmateriet.construction.Construction.__init__", return_value=None) + @patch("lantmateriet.construction.Point._process") + @patch("lantmateriet.construction.Point.__init__", return_value=None) def test_unit_construction_process( self, mock_construction_init, mock_construction_process ): - """Unit test of Construction process method. + """Unit test of Point process method. Args: - mock_construction_init: mock of Construction __init__ - mock_construction_process: mock of Construction _process + mock_construction_init: mock of Point __init__ + mock_construction_process: mock of Point _process """ - construction = Construction("path") + construction = Point("path") construction.item_type = "construction" construction.layer = "byggnad" construction.dissolve = True @@ -100,18 +100,18 @@ def test_unit_construction_process( "construction", "byggnad", True, True, True ) - @patch("lantmateriet.construction.Construction._save") - @patch("lantmateriet.construction.Construction.__init__", return_value=None) + @patch("lantmateriet.construction.Point._save") + @patch("lantmateriet.construction.Point.__init__", return_value=None) def test_unit_construction_save( self, mock_construction_init, mock_construction_save ): """Unit test of construction save method. Args: - mock_construction_init: mock of Construction __init__ - mock_construction_save: mock of Construction _save + mock_construction_init: mock of Point __init__ + mock_construction_save: mock of Point _save """ - construction = Construction("path") + construction = Point("path") construction.item_type = "construction" construction.layer = "byggnad" diff --git a/tests/unit/test_unit_ground.py b/tests/unit/test_unit_polygon.py similarity index 76% rename from tests/unit/test_unit_ground.py rename to tests/unit/test_unit_polygon.py index 2738f96..1fe7542 100644 --- a/tests/unit/test_unit_ground.py +++ b/tests/unit/test_unit_polygon.py @@ -1,16 +1,16 @@ -"""Ground unit tests.""" +"""Polygon unit tests.""" from unittest.mock import patch import geopandas as gpd import pytest from lantmateriet import config -from lantmateriet.polygon import Ground +from lantmateriet.polygon import Polygon from shapely.geometry import Point -class TestUnitGround: - """Unit tests of Ground.""" +class TestUnitPolygon: + """Unit tests of Polygon.""" @pytest.mark.parametrize( "file_name, detail_level, layer, use_arrow, df, expected_result", @@ -54,7 +54,7 @@ def test_unit_ground_init( df, expected_result, ): - """Unit test of Ground __init__ method. + """Unit test of Polygon __init__ method. Args; mcck_gpd_read_file: mock of gpd read_file @@ -68,31 +68,31 @@ def test_unit_ground_init( mcck_gpd_read_file.return_value = df if expected_result is None: with pytest.raises(KeyError): - ground = Ground(file_name, detail_level, layer, use_arrow) + ground = Polygon(file_name, detail_level, layer, use_arrow) else: - ground = Ground(file_name, detail_level, layer, use_arrow) + ground = Polygon(file_name, detail_level, layer, use_arrow) mcck_gpd_read_file.assert_called_with( file_name, layer=layer, use_arrow=use_arrow ) assert ground.config == expected_result @patch( - "lantmateriet.ground.Ground._process", + "lantmateriet.ground.Polygon._process", return_value={ "Sverige": gpd.GeoDataFrame( {"geometry": [Point(0, 0), Point(0, 1)], "objekttyp": "Sverige"} ) }, ) - @patch("lantmateriet.ground.Ground.__init__", return_value=None) + @patch("lantmateriet.ground.Polygon.__init__", return_value=None) def test_unit_ground_process(self, mock_ground_init, mock_ground_process): - """Unit test of Ground process method. + """Unit test of Polygon process method. Args: - mock_ground_init: mock of Ground __init__ - mock_ground_process: mock of Ground _process + mock_ground_init: mock of Polygon __init__ + mock_ground_process: mock of Polygon _process """ - ground = Ground("path") + ground = Polygon("path") ground.item_type = "ground" ground.layer = "mark" ground.dissolve = True @@ -101,16 +101,16 @@ def test_unit_ground_process(self, mock_ground_init, mock_ground_process): ground.process() mock_ground_process.assert_called_once_with("ground", "mark", True, True, True) - @patch("lantmateriet.ground.Ground._save") - @patch("lantmateriet.ground.Ground.__init__", return_value=None) + @patch("lantmateriet.ground.Polygon._save") + @patch("lantmateriet.ground.Polygon.__init__", return_value=None) def test_unit_ground_save(self, mock_ground_init, mock_ground_save): - """Unit test of Ground save method. + """Unit test of Polygon save method. Args: - mock_ground_init: mock of Ground __init__ - mock_ground_save: mock of Ground _save + mock_ground_init: mock of Polygon __init__ + mock_ground_save: mock of Polygon _save """ - ground = Ground("path") + ground = Polygon("path") ground.item_type = "ground" ground.layer = "mark" ground.config = config.config_50 From bed520790eba3cb276d96345c97199d1294bb9a9 Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Sat, 4 May 2024 23:00:10 +0200 Subject: [PATCH 10/25] Update Line, Point and Polygon unit tests --- tests/unit/test_unit_line.py | 133 +++++------------------------- tests/unit/test_unit_point.py | 133 +++++------------------------- tests/unit/test_unit_polygon.py | 139 +++++--------------------------- 3 files changed, 66 insertions(+), 339 deletions(-) diff --git a/tests/unit/test_unit_line.py b/tests/unit/test_unit_line.py index c127ca9..3d73477 100644 --- a/tests/unit/test_unit_line.py +++ b/tests/unit/test_unit_line.py @@ -2,120 +2,31 @@ from unittest.mock import patch -import geopandas as gpd -import pytest -from lantmateriet import config from lantmateriet.line import Line class TestUnitLine: """Unit tests of Line.""" - @pytest.mark.parametrize( - "file_name, detail_level, layer, use_arrow, df, expected_result", - [ - ( - "path", - "50", - "vaglinje", - True, - gpd.GeoDataFrame( - { - "objekttyp": [ - k for k in config.config_50.communication["vaglinje"].keys() - ] - } - ), - config.config_50, - ), - ( - "path", - "50", - "vaglinje", - True, - gpd.GeoDataFrame( - { - "objekttyp": [ - k - for k in config.config_50.communication["vaglinje"].keys() - if k not in {"Motorväg"} - ] - } - ), - None, - ), - ], - ) - @patch("lantmateriet.geometry.gpd.read_file") - def test_unit_communication_init( - self, - mock_gpd_read_file, - file_name, - detail_level, - layer, - use_arrow, - df, - expected_result, - ): - """Unit test of Line __init__ method. - - Args; - mock_gpd_read_file: mock of gpd read_file - file_name: file_name - detail_level: detail_level - layer: layer - use_arrow: arrow flag - df: dataframe - expected_result: expected result - """ - mock_gpd_read_file.return_value = df - if expected_result is None: - with pytest.raises(KeyError): - communication = Line(file_name, detail_level, layer, use_arrow) - else: - communication = Line(file_name, detail_level, layer, use_arrow) - mock_gpd_read_file.assert_called_with( - file_name, layer=layer, use_arrow=use_arrow - ) - assert communication.config == expected_result - - @patch("lantmateriet.communication.Line._process") - @patch("lantmateriet.communication.Line.__init__", return_value=None) - def test_unit_communication_process( - self, mock_communication_init, mock_communication_process - ): - """Unit test of communication process method. - - Args: - mock_communication_init: mock of Line __init__ - mock_communication_process: mock of Line _process - """ - communication = Line("path") - communication.item_type = "communication" - communication.layer = "vaglinje" - communication.dissolve = False - - communication.process() - mock_communication_process.assert_called_once_with( - "communication", "vaglinje", False, False, True - ) - - @patch("lantmateriet.communication.Line._save") - @patch("lantmateriet.communication.Line.__init__", return_value=None) - def test_unit_communication_save( - self, mock_communication_init, mock_communication_save - ): - """Unit test of communication save method. - - Args: - mock_communication_init: mock of Line __init__ - mock_communication_save: mock of Line _save - """ - communication = Line("path") - communication.item_type = "communication" - communication.layer = "vaglinje" - - communication.save({}, "path") - mock_communication_save.assert_called_once_with( - "communication", "vaglinje", {}, "path" - ) + @patch("lantmateriet.geometry.Geometry.__init__") + def test_unit_line_init(self, mock_geometry): + """Unit test of Line __init__ method.""" + line = Line("path", "50", "layer", "name", "field") + assert line.dissolve is False + mock_geometry.assert_called_once() + + @patch("lantmateriet.geometry.Geometry._process") + @patch("lantmateriet.geometry.Geometry.__init__") + def test_unit_line_process(self, mock_geometry, mock_geometry_process): + """Unit test of Line process method.""" + line = Line("path", "50", "layer", "name", "field") + line.process(False) + mock_geometry_process.assert_called_once() + + @patch("lantmateriet.geometry.Geometry._save") + @patch("lantmateriet.geometry.Geometry.__init__") + def test_unit_line_save(self, mock_geometry, mock_geometry_save): + """Unit test of Line save method.""" + line = Line("path", "50", "layer", "name", "field") + line.save("path", "file") + mock_geometry_save.assert_called_once() diff --git a/tests/unit/test_unit_point.py b/tests/unit/test_unit_point.py index 9218893..2880013 100644 --- a/tests/unit/test_unit_point.py +++ b/tests/unit/test_unit_point.py @@ -2,120 +2,31 @@ from unittest.mock import patch -import geopandas as gpd -import pytest -from lantmateriet import config from lantmateriet.point import Point class TestUnitPoint: """Unit tests of Point.""" - @pytest.mark.parametrize( - "file_name, detail_level, layer, use_arrow, df, expected_result", - [ - ( - "path", - "50", - "byggnad", - True, - gpd.GeoDataFrame( - { - "objekttyp": [ - k for k in config.config_50.construction["byggnad"].keys() - ] - } - ), - config.config_50, - ), - ( - "path", - "50", - "byggnad", - True, - gpd.GeoDataFrame( - { - "objekttyp": [ - k - for k in config.config_50.construction["byggnad"].keys() - if k not in {"Bostad"} - ] - } - ), - None, - ), - ], - ) - @patch("lantmateriet.geometry.gpd.read_file") - def test_unit_construction_init( - self, - mock_gpd_read_file, - file_name, - detail_level, - layer, - use_arrow, - df, - expected_result, - ): - """Unit test of Point __init__ method. - - Args; - mock_gpd_read_file: mock of gpd read_file - file_name: file_name - detail_level: detail_level - layer: layer - use_arrow: arrow flag - df: dataframe - expected_result: expected result - """ - mock_gpd_read_file.return_value = df - if expected_result is None: - with pytest.raises(KeyError): - construction = Point(file_name, detail_level, layer, use_arrow) - else: - construction = Point(file_name, detail_level, layer, use_arrow) - mock_gpd_read_file.assert_called_with( - file_name, layer=layer, use_arrow=use_arrow - ) - assert construction.config == expected_result - - @patch("lantmateriet.construction.Point._process") - @patch("lantmateriet.construction.Point.__init__", return_value=None) - def test_unit_construction_process( - self, mock_construction_init, mock_construction_process - ): - """Unit test of Point process method. - - Args: - mock_construction_init: mock of Point __init__ - mock_construction_process: mock of Point _process - """ - construction = Point("path") - construction.item_type = "construction" - construction.layer = "byggnad" - construction.dissolve = True - - construction.process() - mock_construction_process.assert_called_once_with( - "construction", "byggnad", True, True, True - ) - - @patch("lantmateriet.construction.Point._save") - @patch("lantmateriet.construction.Point.__init__", return_value=None) - def test_unit_construction_save( - self, mock_construction_init, mock_construction_save - ): - """Unit test of construction save method. - - Args: - mock_construction_init: mock of Point __init__ - mock_construction_save: mock of Point _save - """ - construction = Point("path") - construction.item_type = "construction" - construction.layer = "byggnad" - - construction.save({}, "path") - mock_construction_save.assert_called_once_with( - "construction", "byggnad", {}, "path" - ) + @patch("lantmateriet.geometry.Geometry.__init__") + def test_unit_point_init(self, mock_geometry): + """Unit test of Point __init__ method.""" + point = Point("path", "50", "layer", "name", "field") + assert point.dissolve is False + mock_geometry.assert_called_once() + + @patch("lantmateriet.geometry.Geometry._process") + @patch("lantmateriet.geometry.Geometry.__init__") + def test_unit_point_process(self, mock_geometry, mock_geometry_process): + """Unit test of Point process method.""" + point = Point("path", "50", "layer", "name", "field") + point.process(False) + mock_geometry_process.assert_called_once() + + @patch("lantmateriet.geometry.Geometry._save") + @patch("lantmateriet.geometry.Geometry.__init__") + def test_unit_point_save(self, mock_geometry, mock_geometry_save): + """Unit test of Point save method.""" + point = Point("path", "50", "layer", "name", "field") + point.save("path", "file") + mock_geometry_save.assert_called_once() diff --git a/tests/unit/test_unit_polygon.py b/tests/unit/test_unit_polygon.py index 1fe7542..877a7a1 100644 --- a/tests/unit/test_unit_polygon.py +++ b/tests/unit/test_unit_polygon.py @@ -2,126 +2,31 @@ from unittest.mock import patch -import geopandas as gpd -import pytest -from lantmateriet import config from lantmateriet.polygon import Polygon -from shapely.geometry import Point class TestUnitPolygon: """Unit tests of Polygon.""" - @pytest.mark.parametrize( - "file_name, detail_level, layer, use_arrow, df, expected_result", - [ - ( - "path", - "50", - "mark", - True, - gpd.GeoDataFrame( - {"objekttyp": [k for k in config.config_50.ground["mark"].keys()]} - ), - config.config_50, - ), - ( - "path", - "50", - "mark", - True, - gpd.GeoDataFrame( - { - "objekttyp": [ - k - for k in config.config_50.ground["mark"].keys() - if k not in {"Sjö"} - ] - } - ), - None, - ), - ], - ) - @patch("lantmateriet.geometry.gpd.read_file") - def test_unit_ground_init( - self, - mcck_gpd_read_file, - file_name, - detail_level, - layer, - use_arrow, - df, - expected_result, - ): - """Unit test of Polygon __init__ method. - - Args; - mcck_gpd_read_file: mock of gpd read_file - file_name: file_name - detail_level: detail_level - layer: layer - use_arrow: arrow flag - df: dataframe - expected_result: expected result - """ - mcck_gpd_read_file.return_value = df - if expected_result is None: - with pytest.raises(KeyError): - ground = Polygon(file_name, detail_level, layer, use_arrow) - else: - ground = Polygon(file_name, detail_level, layer, use_arrow) - mcck_gpd_read_file.assert_called_with( - file_name, layer=layer, use_arrow=use_arrow - ) - assert ground.config == expected_result - - @patch( - "lantmateriet.ground.Polygon._process", - return_value={ - "Sverige": gpd.GeoDataFrame( - {"geometry": [Point(0, 0), Point(0, 1)], "objekttyp": "Sverige"} - ) - }, - ) - @patch("lantmateriet.ground.Polygon.__init__", return_value=None) - def test_unit_ground_process(self, mock_ground_init, mock_ground_process): - """Unit test of Polygon process method. - - Args: - mock_ground_init: mock of Polygon __init__ - mock_ground_process: mock of Polygon _process - """ - ground = Polygon("path") - ground.item_type = "ground" - ground.layer = "mark" - ground.dissolve = True - ground.config = config.config_50 - - ground.process() - mock_ground_process.assert_called_once_with("ground", "mark", True, True, True) - - @patch("lantmateriet.ground.Polygon._save") - @patch("lantmateriet.ground.Polygon.__init__", return_value=None) - def test_unit_ground_save(self, mock_ground_init, mock_ground_save): - """Unit test of Polygon save method. - - Args: - mock_ground_init: mock of Polygon __init__ - mock_ground_save: mock of Polygon _save - """ - ground = Polygon("path") - ground.item_type = "ground" - ground.layer = "mark" - ground.config = config.config_50 - expected_data = { - k: v - for k, v in config.config_50.ground["mark"].items() - if k not in config.config_50.exteriorise - } - - ground.save(config.config_50.ground["mark"], "path") - - mock_ground_save.assert_called_once_with( - "ground", "mark", expected_data, "path" - ) + @patch("lantmateriet.geometry.Geometry.__init__") + def test_unit_polygon_init(self, mock_geometry): + """Unit test of Polygon __init__ method.""" + polygon = Polygon("path", "50", "layer", "name", "field") + assert polygon.dissolve is True + mock_geometry.assert_called_once() + + @patch("lantmateriet.geometry.Geometry._process") + @patch("lantmateriet.geometry.Geometry.__init__") + def test_unit_polygon_process(self, mock_geometry, mock_geometry_process): + """Unit test of Polygon process method.""" + polygon = Polygon("path", "50", "layer", "name", "field") + polygon.process(False) + mock_geometry_process.assert_called_once() + + @patch("lantmateriet.geometry.Geometry._save") + @patch("lantmateriet.geometry.Geometry.__init__") + def test_unit_polygon_save(self, mock_geometry, mock_geometry_save): + """Unit test of Polygon save method.""" + polygon = Polygon("path", "50", "layer", "name", "field") + polygon.save("path", "file") + mock_geometry_save.assert_called_once() From e2e4c8d5243ea63479a98a6a0d28394c274fa73a Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Sat, 4 May 2024 23:16:28 +0200 Subject: [PATCH 11/25] Update integration tests --- src/lantmateriet/point.py | 10 ++--- tests/integration/test_integration_ground.py | 33 ----------------- tests/integration/test_integration_line.py | 18 ++++++--- tests/integration/test_integration_point.py | 17 ++++++--- tests/integration/test_integration_polygon.py | 37 +++++++++++++++++++ tests/unit/test_unit_geometry.py | 2 +- tests/unit/test_unit_point.py | 2 +- 7 files changed, 65 insertions(+), 54 deletions(-) delete mode 100644 tests/integration/test_integration_ground.py create mode 100644 tests/integration/test_integration_polygon.py diff --git a/src/lantmateriet/point.py b/src/lantmateriet/point.py index efa66cf..f8b3b2a 100644 --- a/src/lantmateriet/point.py +++ b/src/lantmateriet/point.py @@ -26,13 +26,9 @@ def __init__( super().__init__(file_path, detail_level, layer, name, field) self.dissolve = False - def process(self, set_length: bool = True) -> None: - """Process all communication data items. - - Args: - set_length: set length column - """ - self._process(self.dissolve, False, set_length) + def process(self) -> None: + """Process all communication data items.""" + self._process(self.dissolve, False, False) def save(self, save_path: str, file: str) -> None: """Save processed communication items in EPSG:4326 as GeoJSON. diff --git a/tests/integration/test_integration_ground.py b/tests/integration/test_integration_ground.py deleted file mode 100644 index f8686b4..0000000 --- a/tests/integration/test_integration_ground.py +++ /dev/null @@ -1,33 +0,0 @@ -"""Ground integration tests.""" - -import geopandas as gpd -import pandas as pd -from geopandas import testing -from lantmateriet.polygon import Ground - -test_mark_geojson = gpd.read_file( - "tests/fixtures/test_integration_ground_mark.geojson", layer="mark", use_arrow=True -) -test_mark_geojson.to_file( - "tests/fixtures/test_integration_ground_mark.gpkg", layer="mark", driver="GPKG" -) - -test_mark_result = gpd.read_file( - "tests/fixtures/test_integration_ground_mark_result.geojson", - layer="mark", - use_arrow=True, -) - - -class TestIntegrationGround: - """Integration test of Ground.""" - - def test_integration_get_ground_items(self): - """Integration test of Ground processd.""" - ground = Ground( - "tests/fixtures/test_integration_ground_mark.gpkg", "50", "mark", True - ) - df = ground.process() - df = pd.concat([v for _, v in df.items()], ignore_index=True) - - testing.assert_geodataframe_equal(df, test_mark_result, check_like=True) diff --git a/tests/integration/test_integration_line.py b/tests/integration/test_integration_line.py index d8c1b8e..74e58ac 100644 --- a/tests/integration/test_integration_line.py +++ b/tests/integration/test_integration_line.py @@ -1,7 +1,6 @@ """Line integration tests.""" import geopandas as gpd -import pandas as pd from geopandas import testing from lantmateriet.line import Line @@ -19,8 +18,13 @@ test_vaglinje_result = gpd.read_file( "tests/fixtures/test_integration_communication_vaglinje_result.geojson", layer="vaglinje", + where="objekttyp='Motorväg'", + engine="pyogrio", use_arrow=True, ) +test_vaglinje_result["objekttypnr"] = test_vaglinje_result["objekttypnr"].astype( + "int64" +) class TestIntegrationLine: @@ -28,13 +32,15 @@ class TestIntegrationLine: def test_integration_get_buiding_items(self): """Integration test of Line process.""" - communication = Line( + line = Line( "tests/fixtures/test_integration_communication_vaglinje.gpkg", "50", "vaglinje", - True, + "Motorväg", + "objekttyp", ) - df = communication.process() - df = pd.concat([v for _, v in df.items()], ignore_index=True) + line.process() - testing.assert_geodataframe_equal(df, test_vaglinje_result, check_like=True) + testing.assert_geodataframe_equal( + line.df, test_vaglinje_result, check_like=True + ) diff --git a/tests/integration/test_integration_point.py b/tests/integration/test_integration_point.py index ab581b0..7c2814a 100644 --- a/tests/integration/test_integration_point.py +++ b/tests/integration/test_integration_point.py @@ -1,7 +1,6 @@ """Point integration tests.""" import geopandas as gpd -import pandas as pd from geopandas import testing from lantmateriet.point import Point @@ -19,8 +18,12 @@ test_byggnad_result = gpd.read_file( "tests/fixtures/test_integration_construction_byggnad_result.geojson", layer="byggnad", + where="objekttyp='Bostad'", + engine="pyogrio", use_arrow=True, ) +test_byggnad_result.drop(columns=["area_m2", "length_m"], inplace=True) +test_byggnad_result["objekttypnr"] = test_byggnad_result["objekttypnr"].astype("int64") class TestIntegrationPoint: @@ -28,13 +31,15 @@ class TestIntegrationPoint: def test_integration_get_buiding_items(self): """Integration test of Point process.""" - construction = Point( + point = Point( "tests/fixtures/test_integration_construction_byggnad.gpkg", "50", "byggnad", - True, + "Bostad", + "objekttyp", ) - df = construction.process() - df = pd.concat([v for _, v in df.items()], ignore_index=True) + point.process() - testing.assert_geodataframe_equal(df, test_byggnad_result, check_like=True) + testing.assert_geodataframe_equal( + point.df, test_byggnad_result, check_like=True + ) diff --git a/tests/integration/test_integration_polygon.py b/tests/integration/test_integration_polygon.py new file mode 100644 index 0000000..7816bd2 --- /dev/null +++ b/tests/integration/test_integration_polygon.py @@ -0,0 +1,37 @@ +"""Polygon integration tests.""" + +import geopandas as gpd +from geopandas import testing +from lantmateriet.polygon import Polygon + +test_mark_geojson = gpd.read_file( + "tests/fixtures/test_integration_ground_mark.geojson", layer="mark", use_arrow=True +) +test_mark_geojson.to_file( + "tests/fixtures/test_integration_ground_mark.gpkg", layer="mark", driver="GPKG" +) + +test_mark_result = gpd.read_file( + "tests/fixtures/test_integration_ground_mark_result.geojson", + layer="mark", + where="objekttyp='Sjö'", + engine="pyogrio", + use_arrow=True, +) + + +class TestIntegrationPolygon: + """Integration test of Polygon.""" + + def test_integration_get_ground_items(self): + """Integration test of Polygon processd.""" + polygon = Polygon( + "tests/fixtures/test_integration_ground_mark.gpkg", + "50", + "mark", + "Sjö", + "objekttyp", + ) + polygon.process() + + testing.assert_geodataframe_equal(polygon.df, test_mark_result, check_like=True) diff --git a/tests/unit/test_unit_geometry.py b/tests/unit/test_unit_geometry.py index 3e6680a..4db4a6e 100644 --- a/tests/unit/test_unit_geometry.py +++ b/tests/unit/test_unit_geometry.py @@ -1,6 +1,6 @@ """Geometry unit tests.""" -from unittest.mock import call, patch, MagicMock +from unittest.mock import MagicMock, call, patch import geopandas as gpd import numpy as np diff --git a/tests/unit/test_unit_point.py b/tests/unit/test_unit_point.py index 2880013..de638d6 100644 --- a/tests/unit/test_unit_point.py +++ b/tests/unit/test_unit_point.py @@ -20,7 +20,7 @@ def test_unit_point_init(self, mock_geometry): def test_unit_point_process(self, mock_geometry, mock_geometry_process): """Unit test of Point process method.""" point = Point("path", "50", "layer", "name", "field") - point.process(False) + point.process() mock_geometry_process.assert_called_once() @patch("lantmateriet.geometry.Geometry._save") From a9c64b95b3f0a83e152ccb415cbd38b46484b110 Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Sun, 5 May 2024 10:10:38 +0200 Subject: [PATCH 12/25] Parametrise target path in extraction --- src/lantmateriet/cli.py | 7 ++++--- src/lantmateriet/extract.py | 23 ++++++++++++----------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/lantmateriet/cli.py b/src/lantmateriet/cli.py index 686e945..f22c021 100644 --- a/src/lantmateriet/cli.py +++ b/src/lantmateriet/cli.py @@ -28,10 +28,11 @@ def download_all(order_id: str, save_path: str): @app.command() -def extract_all(path: str): +def extract_all(source_path: str, target_path): """Extract geojson from gpkg files. Args: - path: path to search for files + source_path: path to search for files + target_path: path to save extracted files to """ - extract(path) + extract(source_path, target_path) diff --git a/src/lantmateriet/extract.py b/src/lantmateriet/extract.py index f6868d2..793f71d 100644 --- a/src/lantmateriet/extract.py +++ b/src/lantmateriet/extract.py @@ -31,7 +31,7 @@ config = Config50() -def save_sweden_base(processed_geo_objects): +def save_sweden_base(target_path, processed_geo_objects): """Save sweden base from all dissolved ground.""" df_sverige = ( pd.concat([item for item in processed_geo_objects]) @@ -42,15 +42,15 @@ def save_sweden_base(processed_geo_objects): df_sverige["length_m"] = df_sverige.length df_sverige = df_sverige.to_crs(config_50.epsg_4326) df_sverige.to_file( - "tmp/mark_sverige/mark/00_sverige" + ".geojson", driver="GeoJSON" + f"{target_path}/mark_sverige/mark/00_sverige" + ".geojson", driver="GeoJSON" ) -def parallel_process(geo_object, output_name): +def parallel_process(geo_object, target_path, output_name): """Parallel process.""" if geo_object.df is not None: geo_object.process() - geo_object.save("tmp2", output_name) + geo_object.save(target_path, output_name) if "mark" in geo_object._file_path: return geo_object.df.dissolve().explode(index_parts=False) @@ -58,7 +58,7 @@ def parallel_process(geo_object, output_name): return None -def extract_geojson(file: str, layer: str): +def extract_geojson(target_path: str, file: str, layer: str): """Extract and save geojson files.""" logger.info(f"Working on {file} - {layer}") field = "objekttyp" @@ -73,32 +73,33 @@ def extract_geojson(file: str, layer: str): with Pool(WORKER_INNER) as pool: all_geo = [ - (geometry_object(file, "50", layer, name, field), output_name) + (geometry_object(file, "50", layer, name, field), target_path, output_name) for name, output_name in normalised_names.items() if name not in config_50.exclude ] processed_geo_objects = pool.starmap(parallel_process, all_geo) if "mark" in file: - save_sweden_base(processed_geo_objects) + save_sweden_base(target_path, processed_geo_objects) logger.info(f"Saved {file} - {layer}") -def extract(path: str): +def extract(source_path: str, target_path): """Run extraction of gkpg to geojson. Args: - path: path to search for gkpg files + source_path: path to search for files + target_path: path to save extracted files to """ - file_pattern = str(Path(path) / "*.gpkg") + file_pattern = str(Path(source_path) / "*.gpkg") files = glob.glob(file_pattern) all_files = [] for file in files: available_layers = fiona.listlayers(file) for layer in available_layers: - all_files.append((file, layer)) + all_files.append((target_path, file, layer)) with Pool(WORKER_OUTER) as pool: pool.starmap(extract_geojson, all_files) From d52bf72e2cdc5f7296762ce0bce1be4130f4eeed Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Sun, 12 May 2024 21:15:03 +0200 Subject: [PATCH 13/25] Add ray --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 9756c43..96da9cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ dependencies = [ "unidecode ~= 1.3", "tqdm ~= 4.66", "typer ~= 0.12", + "ray ~= 2.12" ] [project.optional-dependencies] From 5518cc9e9cf61a819f0d253f561d9ed89166aa0e Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Sun, 12 May 2024 21:15:25 +0200 Subject: [PATCH 14/25] Move constant to a constant variable --- src/lantmateriet/api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lantmateriet/api.py b/src/lantmateriet/api.py index 8c2392a..e582e43 100644 --- a/src/lantmateriet/api.py +++ b/src/lantmateriet/api.py @@ -12,6 +12,7 @@ STATUS_OK = 200 BLOCK_SIZE = 1024 +REQUEST_TIMEOUT = 200 ORDER_URL = "https://api.lantmateriet.se" DOWNLOAD_URL = "https://download-geotorget.lantmateriet.se" TOKEN = os.environ["LANTMATERIET_API_TOKEN"] @@ -35,7 +36,7 @@ def get_request(url: str) -> requests.Response: logger.debug(f"Fetching from {url}.") headers = {"Authorization": f"Bearer {TOKEN}"} - response = requests.get(url, headers=headers, timeout=200, stream=True) + response = requests.get(url, headers=headers, timeout=REQUEST_TIMEOUT, stream=True) if response.status_code != STATUS_OK: raise requests.exceptions.HTTPError(f"Could not request from {url}.") From c15313e0042c9ec5b297421b325ecca8a073b176 Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Sun, 12 May 2024 21:18:09 +0200 Subject: [PATCH 15/25] Add return types in extract --- src/lantmateriet/extract.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/lantmateriet/extract.py b/src/lantmateriet/extract.py index 793f71d..29f0d5d 100644 --- a/src/lantmateriet/extract.py +++ b/src/lantmateriet/extract.py @@ -3,9 +3,10 @@ import glob import logging from pathlib import Path -from typing import Union +from typing import Optional, Union import fiona +import geopandas as gpd import pandas as pd import shapely from lantmateriet.config import Config50, config_50 @@ -31,7 +32,7 @@ config = Config50() -def save_sweden_base(target_path, processed_geo_objects): +def save_sweden_base(target_path, processed_geo_objects) -> None: """Save sweden base from all dissolved ground.""" df_sverige = ( pd.concat([item for item in processed_geo_objects]) @@ -46,7 +47,9 @@ def save_sweden_base(target_path, processed_geo_objects): ) -def parallel_process(geo_object, target_path, output_name): +def parallel_process( + geo_object, target_path, output_name +) -> Optional[gpd.GeoDataFrame]: """Parallel process.""" if geo_object.df is not None: geo_object.process() @@ -58,7 +61,7 @@ def parallel_process(geo_object, target_path, output_name): return None -def extract_geojson(target_path: str, file: str, layer: str): +def extract_geojson(target_path: str, file: str, layer: str) -> None: """Extract and save geojson files.""" logger.info(f"Working on {file} - {layer}") field = "objekttyp" @@ -85,7 +88,7 @@ def extract_geojson(target_path: str, file: str, layer: str): logger.info(f"Saved {file} - {layer}") -def extract(source_path: str, target_path): +def extract(source_path: str, target_path) -> None: """Run extraction of gkpg to geojson. Args: From b5be58da500a02df34c0b0fada8566c0a1a7c177 Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Sun, 12 May 2024 21:20:28 +0200 Subject: [PATCH 16/25] Bumpt ray 2.12->2.21 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 96da9cb..1a9c145 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ dependencies = [ "unidecode ~= 1.3", "tqdm ~= 4.66", "typer ~= 0.12", - "ray ~= 2.12" + "ray ~= 2.21" ] [project.optional-dependencies] From 49a325a92c9633479e2605e36f68c7131ee06420 Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Mon, 13 May 2024 21:26:14 +0200 Subject: [PATCH 17/25] Use 3.11 in test action and remove support for python 3.12 --- .github/workflows/github-action-test.yaml | 4 ++-- README.md | 3 +-- pyproject.toml | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/github-action-test.yaml b/.github/workflows/github-action-test.yaml index c1644e8..39ef5f2 100644 --- a/.github/workflows/github-action-test.yaml +++ b/.github/workflows/github-action-test.yaml @@ -6,7 +6,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.10", "3.11"] steps: - uses: actions/checkout@v2 @@ -38,4 +38,4 @@ jobs: message: ${{ env.total }}% minColorRange: 50 maxColorRange: 90 - valColorRange: ${{ env.total }} \ No newline at end of file + valColorRange: ${{ env.total }} diff --git a/README.md b/README.md index 8e99a68..ee33184 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@
- + @@ -36,4 +36,3 @@ ## Description Welcome to ifk-lantmateriet. This repo contains code to parse data from Lantmäteriet. - diff --git a/pyproject.toml b/pyproject.toml index 1a9c145..d612f5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ readme = "README.md" authors = [ { name = "Mladen Gibanica", email = "11275336+mgcth@users.noreply.github.com" }, ] -requires-python = ">=3.9" +requires-python = ">=3.10,<3.12" dependencies = [ "geopandas ~= 0.14", "pyogrio ~= 0.7", From 456a1f8f796bedc37651819f5b7ec8cc6d0b0bae Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Tue, 28 May 2024 22:03:12 +0200 Subject: [PATCH 18/25] Skip dtype check in integration tests --- tests/integration/test_integration_line.py | 5 ++++- tests/integration/test_integration_point.py | 5 ++++- tests/integration/test_integration_polygon.py | 7 ++++++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_integration_line.py b/tests/integration/test_integration_line.py index 74e58ac..c06dabf 100644 --- a/tests/integration/test_integration_line.py +++ b/tests/integration/test_integration_line.py @@ -42,5 +42,8 @@ def test_integration_get_buiding_items(self): line.process() testing.assert_geodataframe_equal( - line.df, test_vaglinje_result, check_like=True + line.df, + test_vaglinje_result, + check_like=True, + check_dtype=False, ) diff --git a/tests/integration/test_integration_point.py b/tests/integration/test_integration_point.py index 7c2814a..f129ac8 100644 --- a/tests/integration/test_integration_point.py +++ b/tests/integration/test_integration_point.py @@ -41,5 +41,8 @@ def test_integration_get_buiding_items(self): point.process() testing.assert_geodataframe_equal( - point.df, test_byggnad_result, check_like=True + point.df, + test_byggnad_result, + check_like=True, + check_dtype=False, ) diff --git a/tests/integration/test_integration_polygon.py b/tests/integration/test_integration_polygon.py index 7816bd2..f74ffb5 100644 --- a/tests/integration/test_integration_polygon.py +++ b/tests/integration/test_integration_polygon.py @@ -34,4 +34,9 @@ def test_integration_get_ground_items(self): ) polygon.process() - testing.assert_geodataframe_equal(polygon.df, test_mark_result, check_like=True) + testing.assert_geodataframe_equal( + polygon.df, + test_mark_result, + check_like=True, + check_dtype=False, + ) From 8e05a5ac4af2390ce9a0598c21a6dd16eb409705 Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Tue, 28 May 2024 22:09:30 +0200 Subject: [PATCH 19/25] test only 3.10 on windows --- .github/workflows/github-action-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/github-action-test.yaml b/.github/workflows/github-action-test.yaml index 39ef5f2..169615b 100644 --- a/.github/workflows/github-action-test.yaml +++ b/.github/workflows/github-action-test.yaml @@ -6,7 +6,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python-version: ["3.10", "3.11"] + python-version: ["3.10"] steps: - uses: actions/checkout@v2 From e47a29c09392eec19f6895a2d73608ab2dfe357f Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Tue, 28 May 2024 22:11:20 +0200 Subject: [PATCH 20/25] Ray doesn't work on windows and python 3.10 and 3.11 --- .github/workflows/github-action-test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/github-action-test.yaml b/.github/workflows/github-action-test.yaml index 169615b..c20f2de 100644 --- a/.github/workflows/github-action-test.yaml +++ b/.github/workflows/github-action-test.yaml @@ -5,8 +5,8 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, windows-latest, macos-latest] - python-version: ["3.10"] + os: [ubuntu-latest, macos-latest] + python-version: ["3.10", "3.11"] steps: - uses: actions/checkout@v2 From ccf97ad2864cbf1cc905a6dfa664fe1e902ffd78 Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Tue, 28 May 2024 22:14:41 +0200 Subject: [PATCH 21/25] Try ray 2.20 and windows --- .github/workflows/github-action-test.yaml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/github-action-test.yaml b/.github/workflows/github-action-test.yaml index c20f2de..39ef5f2 100644 --- a/.github/workflows/github-action-test.yaml +++ b/.github/workflows/github-action-test.yaml @@ -5,7 +5,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, macos-latest] + os: [ubuntu-latest, windows-latest, macos-latest] python-version: ["3.10", "3.11"] steps: diff --git a/pyproject.toml b/pyproject.toml index d612f5d..46ab539 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ dependencies = [ "unidecode ~= 1.3", "tqdm ~= 4.66", "typer ~= 0.12", - "ray ~= 2.21" + "ray ~= 2.20" ] [project.optional-dependencies] From da4d6b6c68c4c233ccf708bdddc221ff3a08ee94 Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Thu, 30 May 2024 22:45:53 +0200 Subject: [PATCH 22/25] Rename internal api variable --- src/lantmateriet/api.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/lantmateriet/api.py b/src/lantmateriet/api.py index e582e43..9405263 100644 --- a/src/lantmateriet/api.py +++ b/src/lantmateriet/api.py @@ -62,8 +62,10 @@ def __init__(self, order_id: str, save_path: str): Path(save_path).mkdir(exist_ok=True) self._order_enpoint = json.loads(get_request(order_url).content) - download = json.loads(get_request(download_url).content) - self._download_enpoint = {item["title"]: item for item in download} + available_files = json.loads(get_request(download_url).content) + self._available_files_enpoint = { + item["title"]: item for item in available_files + } @property def order(self) -> dict[str, str]: @@ -73,7 +75,7 @@ def order(self) -> dict[str, str]: @property def available_files(self) -> list[str]: """Get available files.""" - return list(self._download_enpoint.keys()) + return list(self._available_files_enpoint.keys()) def download(self, title: str) -> None: """Download file by title. @@ -83,7 +85,7 @@ def download(self, title: str) -> None: """ logger.info(f"Started downloading {title}") - url = self._download_enpoint[title]["href"] + url = self._available_files_enpoint[title]["href"] response = get_request(url) buffer = self._download(response) From 74d500750f292d8a1fb159839ef8ae2e7cb80677 Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Thu, 30 May 2024 22:48:32 +0200 Subject: [PATCH 23/25] Don't import Config50 class, not used --- src/lantmateriet/extract.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/lantmateriet/extract.py b/src/lantmateriet/extract.py index 29f0d5d..318a28a 100644 --- a/src/lantmateriet/extract.py +++ b/src/lantmateriet/extract.py @@ -9,7 +9,7 @@ import geopandas as gpd import pandas as pd import shapely -from lantmateriet.config import Config50, config_50 +from lantmateriet.config import config_50 from lantmateriet.line import Line from lantmateriet.point import Point from lantmateriet.polygon import Polygon @@ -25,11 +25,10 @@ shapely.MultiPolygon: Polygon, } -WORKER_INNER = 8 -WORKER_OUTER = 14 +WORKER_INNER = 3 +WORKER_OUTER = 6 logger = logging.getLogger(__name__) -config = Config50() def save_sweden_base(target_path, processed_geo_objects) -> None: From a5259c4d88364e6f0d29158856dec7a1376ddf15 Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Sun, 2 Jun 2024 21:24:34 +0200 Subject: [PATCH 24/25] Add docstrings to extract --- src/lantmateriet/extract.py | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/src/lantmateriet/extract.py b/src/lantmateriet/extract.py index 318a28a..bc73e46 100644 --- a/src/lantmateriet/extract.py +++ b/src/lantmateriet/extract.py @@ -10,6 +10,7 @@ import pandas as pd import shapely from lantmateriet.config import config_50 +from lantmateriet.geometry import Geometry from lantmateriet.line import Line from lantmateriet.point import Point from lantmateriet.polygon import Polygon @@ -31,8 +32,13 @@ logger = logging.getLogger(__name__) -def save_sweden_base(target_path, processed_geo_objects) -> None: - """Save sweden base from all dissolved ground.""" +def save_sweden_base(target_path: str, processed_geo_objects: Geometry) -> None: + """Save sweden base from all dissolved ground. + + Args: + target_path: save path of object + processed_geo_objects: geometry objects + """ df_sverige = ( pd.concat([item for item in processed_geo_objects]) .dissolve() @@ -47,9 +53,18 @@ def save_sweden_base(target_path, processed_geo_objects) -> None: def parallel_process( - geo_object, target_path, output_name + geo_object: Geometry, target_path: str, output_name: str ) -> Optional[gpd.GeoDataFrame]: - """Parallel process.""" + """Parallel process. + + Args: + geo_object: geometry object + target_path: save path of object + output_name: name of object to save + + Returns: + processed geodataframe + """ if geo_object.df is not None: geo_object.process() geo_object.save(target_path, output_name) @@ -61,7 +76,13 @@ def parallel_process( def extract_geojson(target_path: str, file: str, layer: str) -> None: - """Extract and save geojson files.""" + """Extract and save geojson files. + + Args: + target_path: path to load from + file: file to load + layer: layer to load from file + """ logger.info(f"Working on {file} - {layer}") field = "objekttyp" @@ -87,7 +108,7 @@ def extract_geojson(target_path: str, file: str, layer: str) -> None: logger.info(f"Saved {file} - {layer}") -def extract(source_path: str, target_path) -> None: +def extract(source_path: str, target_path: str) -> None: """Run extraction of gkpg to geojson. Args: From 7edc9fdd7cc3387a706fdaa02d44204adc7c2368 Mon Sep 17 00:00:00 2001 From: Mladen Gibanica <11275336+mgcth@users.noreply.github.com> Date: Mon, 3 Jun 2024 17:00:51 +0200 Subject: [PATCH 25/25] Transform BaseConfig to dataclass --- src/lantmateriet/config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/lantmateriet/config.py b/src/lantmateriet/config.py index 4418051..8928fae 100644 --- a/src/lantmateriet/config.py +++ b/src/lantmateriet/config.py @@ -6,7 +6,10 @@ - 50: https://www.lantmateriet.se/globalassets/geodata/geodataprodukter/pb-topografi-50-nedladdning-vektor.pdf """ +from dataclasses import dataclass + +@dataclass class BaseConfig: """Base config class."""