From 4a455c049511095ec23b36b3d2d9b1e2ea94461d Mon Sep 17 00:00:00 2001 From: Dobson Date: Fri, 26 Jan 2024 09:08:09 +0000 Subject: [PATCH 1/5] Rename -downloaders->prepare_data (and in imports) -geospatial_operations->utils_geo (and in imports) --- swmmanywhere/downloaders.py | 238 ------------------ swmmanywhere/geospatial_operations.py | 335 -------------------------- tests/test_downloaders.py | 2 +- tests/test_geospatial.py | 2 +- 4 files changed, 2 insertions(+), 575 deletions(-) delete mode 100644 swmmanywhere/downloaders.py delete mode 100644 swmmanywhere/geospatial_operations.py diff --git a/swmmanywhere/downloaders.py b/swmmanywhere/downloaders.py deleted file mode 100644 index 951e5f38..00000000 --- a/swmmanywhere/downloaders.py +++ /dev/null @@ -1,238 +0,0 @@ -# -*- coding: utf-8 -*- -"""Created 2023-12-20. - -@author: Barnaby Dobson -""" - -import os -import shutil -from typing import cast - -import cdsapi -import networkx as nx -import osmnx as ox -import pandas as pd -import requests -import xarray as xr -import yaml -from geopy.geocoders import Nominatim - -# Some minor comment (to remove) - -def get_country(x: float, - y: float) -> dict[int, str]: - """Identify which country a point (x,y) is in. - - Return the two and three letter ISO code for coordinates x (longitude) - and y (latitude) - - Args: - x (float): Longitude. - y (float): Latitude. - - Returns: - dict: A dictionary containing the two and three letter ISO codes. - Example: {2: 'GB', 3: 'GBR'} - """ - # Get the directory of the current module - current_dir = os.path.dirname(os.path.abspath(__file__)) - - # TODO use 'load_yaml_from_defs' - # Create the path to iso_converter.yml - iso_path = os.path.join(current_dir, - "defs", - "iso_converter.yml") - - - # Initialize geolocator - geolocator = Nominatim(user_agent="get_iso") - - # Load ISO code mapping from YAML file - with open(iso_path, "r") as file: - data = yaml.safe_load(file) - - # Get country ISO code from coordinates - location = geolocator.reverse("{0}, {1}".format(y, x), exactly_one=True) - iso_country_code = location.raw.get("address", {}).get("country_code") - iso_country_code = iso_country_code.upper() - - # Return a dictionary with the two and three letter ISO codes - return {2: iso_country_code, 3: data.get(iso_country_code, '')} - -def download_buildings(file_address: str, - x: float, - y: float) -> int: - """Download buildings data based on coordinates and save to a file. - - Args: - file_address (str): File address to save the downloaded data. - x (float): Longitude. - y (float): Latitude. - - Returns: - status_code (int): Reponse status code - """ - # Get three letter ISO code - iso_country_code = get_country(x, y)[3] - - # Construct API URL - api_url = f"https://data.source.coop/vida/google-microsoft-open-buildings/geoparquet/by_country/country_iso={iso_country_code}/{iso_country_code}.parquet" - - # Download data - response = requests.get(api_url) - if response.status_code == 200: - # Save data to the specified file address - with open(file_address, "wb") as file: - file.write(response.content) - print(f"Data downloaded and saved to {file_address}") - else: - print(f"Error downloading data. Status code: {response.status_code}") - return response.status_code - -def download_street(bbox: tuple[float, float, float, float]) -> nx.MultiDiGraph: - """Get street network within a bounding box using OSMNX. - - [CREDIT: Taher Cheghini busn_estimator package] - - Args: - bbox (tuple[float, float, float, float]): Bounding box as tuple in form - of (west, south, east, north) at EPSG:4326. - - Returns: - nx.MultiDiGraph: Street network with type drive and - ``truncate_by_edge set`` to True. - """ - west, south, east, north = bbox - graph = ox.graph_from_bbox( - north, south, east, west, network_type="drive", truncate_by_edge=True - ) - return cast("nx.MultiDiGraph", graph) - -def download_river(bbox: tuple[float, float, float, float]) -> nx.MultiDiGraph: - """Get water network within a bounding box using OSMNX. - - Args: - bbox (tuple[float, float, float, float]): Bounding box as tuple in form - of (west, south, east, north) at EPSG:4326. - - Returns: - nx.MultiDiGraph: River network with type waterway and - ``truncate_by_edge set`` to True. - """ - west, south, east, north = bbox - graph = ox.graph_from_bbox( - north, - south, - east, - west, - truncate_by_edge=True, - custom_filter='["waterway"]') - - return cast("nx.MultiDiGraph", graph) - -def download_elevation(fid: str, - bbox: tuple[float, float, float, float], - api_key: str ='') -> int: - """Download NASADEM elevation data from OpenTopography API. - - Downloads elevation data in GeoTIFF format from OpenTopography API based on - the specified bounding box. - - Args: - fid (str): File path to save the downloaded elevation data. - bbox (tuple): Bounding box coordinates in the format - (minx, miny, maxx, maxy). - api_key (str, optional): Your OpenTopography API key. - Defaults to ''. - - Returns: - status_code (int): Reponse status code - - Raises: - requests.exceptions.RequestException: If there is an error in the API - request. - - Example: - ``` - bbox = (-120, 35, -118, 37) # Example bounding box coordinates - download_elevation('elevation_data.tif', - bbox, - api_key='your_actual_api_key') - ``` - - Note: - To obtain an API key, you need to sign up on the OpenTopography - website. - - """ - minx, miny, maxx, maxy = bbox - url = f'https://portal.opentopography.org/API/globaldem?demtype=NASADEM&south={miny}&north={maxy}&west={minx}&east={maxx}&outputFormat=GTiff&API_Key={api_key}' - - try: - r = requests.get(url, stream=True) - r.raise_for_status() - - with open(fid, 'wb') as rast_file: - shutil.copyfileobj(r.raw, rast_file) - - print('Elevation data downloaded successfully.') - - except requests.exceptions.RequestException as e: - print(f'Error downloading elevation data: {e}') - - return r.status_code - -def download_precipitation(bbox: tuple[float, float, float, float], - start_date: str = '2015-01-01', - end_date: str = '2015-01-05', - username: str = '', - api_key: str = '') -> pd.DataFrame: - """Download precipitation data within bbox from ERA5. - - Register for an account and API key at: https://cds.climate.copernicus.eu. - Produces hourly gridded data (31km grid) in CRS EPSG:4326. - More information at: https://github.com/ecmwf/cdsapi. - - Args: - bbox (tuple): Bounding box coordinates in the format - (minx, miny, maxx, maxy). - start_date (str, optional): Start date. Defaults to '2015-01-01'. - end_date (str, optional): End date. Defaults to '2015-01-05'. - username (str, optional): CDS api username. - Defaults to ''. - api_key (str, optional): CDS api key. Defaults to ''. - - Returns: - df (DataFrame): DataFrame containing downloaded data. - """ - # Notes for docstring: - # looks like api will give nearest point if not in bbox - - # Define the request parameters - request = { - 'product_type': 'reanalysis', - 'format': 'netcdf', - 'variable': 'total_precipitation', - 'date' : '/'.join([start_date,end_date]), - 'time': '/'.join([f'{str(x).zfill(2)}:00' for x in range(24)]), - 'area': bbox, - } - - c = cdsapi.Client(url='https://cds.climate.copernicus.eu/api/v2', - key='{0}:{1}'.format(username, api_key)) - # Get data - c.retrieve('reanalysis-era5-single-levels', - request, - 'download.nc' - ) - - with xr.open_dataset('download.nc') as data: - # Convert the xarray Dataset to a pandas DataFrame - df = data.to_dataframe() - df['unit'] = 'm/hr' - - - # Delete nc file - os.remove('download.nc') - - return df \ No newline at end of file diff --git a/swmmanywhere/geospatial_operations.py b/swmmanywhere/geospatial_operations.py deleted file mode 100644 index 6749f4c7..00000000 --- a/swmmanywhere/geospatial_operations.py +++ /dev/null @@ -1,335 +0,0 @@ -# -*- coding: utf-8 -*- -"""Created 2024-01-20. - -A module containing functions to perform a variety of geospatial operations, -such as reprojecting coordinates and handling raster data. - -@author: Barnaby Dobson -""" -from functools import lru_cache -from typing import Optional - -import geopandas as gpd -import networkx as nx -import numpy as np -import pandas as pd -import pyproj -import rasterio as rst -import rioxarray -from rasterio import features -from scipy.interpolate import RegularGridInterpolator -from shapely import geometry as sgeom -from shapely.strtree import STRtree - -TransformerFromCRS = lru_cache(pyproj.transformer.Transformer.from_crs) - - -def get_utm_epsg(x: float, - y: float, - crs: str | int | pyproj.CRS = 'EPSG:4326', - datum_name: str = "WGS 84"): - """Get the UTM CRS code for a given coordinate. - - Note, this function is taken from GeoPandas and modified to use - for getting the UTM CRS code for a given coordinate. - - Args: - x (float): Longitude in crs - y (float): Latitude in crs - crs (str | int | pyproj.CRS, optional): The CRS of the input - coordinates. Defaults to 'EPSG:4326'. - datum_name (str, optional): The datum name to use for the UTM CRS - - Returns: - str: Formatted EPSG code for the UTM zone. - - Example: - >>> get_utm_epsg(-0.1276, 51.5074) - 'EPSG:32630' - """ - if not isinstance(x, float) or not isinstance(y, float): - raise TypeError("x and y must be floats") - - try: - crs = pyproj.CRS(crs) - except pyproj.exceptions.CRSError: - raise ValueError("Invalid CRS") - - # ensure using geographic coordinates - if pyproj.CRS(crs).is_geographic: - lon = x - lat = y - else: - transformer = TransformerFromCRS(crs, "EPSG:4326", always_xy=True) - lon, lat = transformer.transform(x, y) - utm_crs_list = pyproj.database.query_utm_crs_info( - datum_name=datum_name, - area_of_interest=pyproj.aoi.AreaOfInterest( - west_lon_degree=lon, - south_lat_degree=lat, - east_lon_degree=lon, - north_lat_degree=lat, - ), - ) - return f"{utm_crs_list[0].auth_name}:{utm_crs_list[0].code}" - - -def interp_with_nans(xy: tuple[float,float], - interp: RegularGridInterpolator, - grid: np.ndarray, - values: list[float]) -> float: - """Wrap the interpolation function to handle NaNs. - - Picks the nearest non NaN grid point if the interpolated value is NaN, - otherwise returns the interpolated value. - - Args: - xy (tuple): Coordinate of interest - interp (RegularGridInterpolator): The interpolator object. - grid (np.ndarray): List of xy coordinates of the grid points. - values (list): The list of values at each point in the grid. - - Returns: - float: The interpolated value. - """ - # Call the interpolator - val = float(interp(xy)) - # If the value is NaN, we need to pick nearest non nan grid point - if np.isnan(val): - # Get the distances to all grid points - distances = np.linalg.norm(grid - xy, axis=1) - # Get the indices of the grid points sorted by distance - indices = np.argsort(distances) - # Iterate over the grid points in order of increasing distance - for index in indices: - # If the value at this grid point is not NaN, return it - if not np.isnan(values[index]): - return values[index] - else: - return val - - raise ValueError("No non NaN values found in grid.") - -def interpolate_points_on_raster(x: list[float], - y: list[float], - elevation_fid: str) -> list[float ]: - """Interpolate points on a raster. - - Args: - x (list): X coordinates. - y (list): Y coordinates. - elevation_fid (str): Filepath to elevation raster. - - Returns: - elevation (float): Elevation at point. - """ - with rst.open(elevation_fid) as src: - # Read the raster data - data = src.read(1).astype(float) # Assuming it's a single-band raster - data[data == src.nodata] = None - - # Get the raster's coordinates - x = np.linspace(src.bounds.left, src.bounds.right, src.width) - y = np.linspace(src.bounds.bottom, src.bounds.top, src.height) - - # Define grid - xx, yy = np.meshgrid(x, y) - grid = np.vstack([xx.ravel(), yy.ravel()]).T - values = data.ravel() - - # Define interpolator - interp = RegularGridInterpolator((y,x), - np.flipud(data), - method='linear', - bounds_error=False, - fill_value=None) - # Interpolate for x,y - return [interp_with_nans((y_, x_), interp, grid, values) for x_, y_ in zip(x,y)] - -def reproject_raster(target_crs: str, - fid: str, - new_fid: Optional[str] = None): - """Reproject a raster to a new CRS. - - Args: - target_crs (str): Target CRS in EPSG format (e.g., EPSG:32630). - fid (str): Filepath to the raster to reproject. - new_fid (str, optional): Filepath to save the reprojected raster. - Defaults to None, which will just use fid with '_reprojected'. - """ - # Open the raster - with rioxarray.open_rasterio(fid) as raster: - - # Reproject the raster - reprojected = raster.rio.reproject(target_crs) - - # Define the output filepath - if new_fid is None: - new_fid = fid.replace('.tif','_reprojected.tif') - - # Save the reprojected raster - reprojected.rio.to_raster(new_fid) - -def get_transformer(source_crs: str, - target_crs: str) -> pyproj.Transformer: - """Get a transformer object for reprojection. - - Args: - source_crs (str): Source CRS in EPSG format (e.g., EPSG:32630). - target_crs (str): Target CRS in EPSG format (e.g., EPSG:32630). - - Returns: - pyproj.Transformer: Transformer object for reprojection. - - Example: - >>> transformer = get_transformer('EPSG:4326', 'EPSG:32630') - >>> transformer.transform(-0.1276, 51.5074) - (699330.1106898375, 5710164.30300683) - """ - return pyproj.Transformer.from_crs(source_crs, - target_crs, - always_xy=True) - -def reproject_df(df: pd.DataFrame, - source_crs: str, - target_crs: str) -> pd.DataFrame: - """Reproject the coordinates in a DataFrame. - - Args: - df (pd.DataFrame): DataFrame with columns 'longitude' and 'latitude'. - source_crs (str): Source CRS in EPSG format (e.g., EPSG:4326). - target_crs (str): Target CRS in EPSG format (e.g., EPSG:32630). - """ - # Function to transform coordinates - pts = gpd.points_from_xy(df["longitude"], - df["latitude"], - crs=source_crs).to_crs(target_crs) - df = df.copy() - df['x'] = pts.x - df['y'] = pts.y - return df - -def reproject_graph(G: nx.Graph, - source_crs: str, - target_crs: str) -> nx.Graph: - """Reproject the coordinates in a graph. - - osmnx.projection.project_graph might be suitable if some other behaviour - needs to be captured, but it currently fails the tests so I will ignore for - now. - - Args: - G (nx.Graph): Graph with nodes containing 'x' and 'y' properties. - source_crs (str): Source CRS in EPSG format (e.g., EPSG:4326). - target_crs (str): Target CRS in EPSG format (e.g., EPSG:32630). - - Returns: - nx.Graph: Graph with nodes containing 'x' and 'y' properties. - """ - # Create a PyProj transformer for CRS conversion - transformer = get_transformer(source_crs, target_crs) - - # Create a new graph with the converted nodes and edges - G_new = G.copy() - - # Convert and add nodes with 'x', 'y' properties - for node, data in G_new.nodes(data=True): - x, y = transformer.transform(data['x'], data['y']) - data['x'] = x - data['y'] = y - - # Convert and add edges with 'geometry' property - for u, v, data in G_new.edges(data=True): - if 'geometry' in data.keys(): - data['geometry'] = sgeom.LineString(transformer.transform(x, y) - for x, y in data['geometry'].coords) - else: - data['geometry'] = sgeom.LineString([[G_new.nodes[u]['x'], - G_new.nodes[u]['y']], - [G_new.nodes[v]['x'], - G_new.nodes[v]['y']]]) - - return G_new - - -def nearest_node_buffer(points1: dict[str, sgeom.Point], - points2: dict[str, sgeom.Point], - threshold: float) -> dict: - """Find the nearest node within a given buffer threshold. - - Args: - points1 (dict): A dictionary where keys are labels and values are - Shapely points geometries. - points2 (dict): A dictionary where keys are labels and values are - Shapely points geometries. - threshold (float): The maximum distance for a node to be considered - 'nearest'. If no nodes are within this distance, the node is not - included in the output. - - Returns: - dict: A dictionary where keys are labels from points1 and values are - labels from points2 of the nearest nodes within the threshold. - """ - # Convert the keys of points2 to a list - labels2 = list(points2.keys()) - - # Create a spatial index - tree = STRtree(list(points2.values())) - - # Initialize an empty dictionary to store the matching nodes - matching = {} - - # Iterate over points1 - for key, geom in points1.items(): - # Find the nearest node in the spatial index to the current geometry - nearest = tree.nearest(geom) - nearest_geom = points2[labels2[nearest]] - - # If the nearest node is within the threshold, add it to the - # matching dictionary - if geom.buffer(threshold).intersects(nearest_geom): - matching[key] = labels2[nearest] - - # Return the matching dictionary - return matching - -def burn_shape_in_raster(geoms: list[sgeom.LineString], - depth: float, - raster_fid: str, - new_raster_fid: str): - """Burn a depth into a raster along a list of shapely geometries. - - Args: - geoms (list): List of Shapely geometries. - depth (float): Depth to carve. - raster_fid (str): Filepath to input raster. - new_raster_fid (str): Filepath to save the carved raster. - """ - with rst.open(raster_fid) as src: - # read data - data = src.read(1) - data = data.astype(float) - data_mask = data != src.nodata - bool_mask = np.zeros(data.shape, dtype=bool) - for geom in geoms: - # Create a mask for the line - mask = features.geometry_mask([sgeom.mapping(geom)], - out_shape=src.shape, - transform=src.transform, - invert=True) - # modify masked data - bool_mask[mask] = True # Adjust this multiplier as needed - #modify data - data[bool_mask & data_mask] -= depth - # Create a new GeoTIFF with modified values - with rst.open(new_raster_fid, - 'w', - driver='GTiff', - height=src.height, - width=src.width, - count=1, - dtype=data.dtype, - crs=src.crs, - transform=src.transform, - nodata = src.nodata) as dest: - dest.write(data, 1) \ No newline at end of file diff --git a/tests/test_downloaders.py b/tests/test_downloaders.py index 96e94671..f0ec8fac 100644 --- a/tests/test_downloaders.py +++ b/tests/test_downloaders.py @@ -10,7 +10,7 @@ import geopandas as gpd import rasterio -from swmmanywhere import downloaders +from swmmanywhere import prepare_data as downloaders # Test get_country diff --git a/tests/test_geospatial.py b/tests/test_geospatial.py index fd70f724..d01f825e 100644 --- a/tests/test_geospatial.py +++ b/tests/test_geospatial.py @@ -13,7 +13,7 @@ from scipy.interpolate import RegularGridInterpolator from shapely import geometry as sgeom -from swmmanywhere import geospatial_operations as go +from swmmanywhere import utils_geo as go def test_interp_with_nans(): From 317d7d040ec4e4da62db25f5439da034512539ec Mon Sep 17 00:00:00 2001 From: Dobson Date: Fri, 26 Jan 2024 09:08:43 +0000 Subject: [PATCH 2/5] Rename utils_geo and prepare_data tests --- tests/{test_downloaders.py => test_prepare_data.py} | 0 tests/{test_geospatial.py => test_utils_geo.py} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/{test_downloaders.py => test_prepare_data.py} (100%) rename tests/{test_geospatial.py => test_utils_geo.py} (100%) diff --git a/tests/test_downloaders.py b/tests/test_prepare_data.py similarity index 100% rename from tests/test_downloaders.py rename to tests/test_prepare_data.py diff --git a/tests/test_geospatial.py b/tests/test_utils_geo.py similarity index 100% rename from tests/test_geospatial.py rename to tests/test_utils_geo.py From 3eb965d27f1905e46a114c73a458f22ddf52578d Mon Sep 17 00:00:00 2001 From: Dobson Date: Fri, 26 Jan 2024 09:09:07 +0000 Subject: [PATCH 3/5] Create prepare_data and utils_geo --- swmmanywhere/prepare_data.py | 239 +++++++++++++++++++++++++ swmmanywhere/utils_geo.py | 335 +++++++++++++++++++++++++++++++++++ 2 files changed, 574 insertions(+) create mode 100644 swmmanywhere/prepare_data.py create mode 100644 swmmanywhere/utils_geo.py diff --git a/swmmanywhere/prepare_data.py b/swmmanywhere/prepare_data.py new file mode 100644 index 00000000..02f99e73 --- /dev/null +++ b/swmmanywhere/prepare_data.py @@ -0,0 +1,239 @@ +# -*- coding: utf-8 -*- +"""Created 2023-12-20. + +@author: Barnaby Dobson +""" + +import os +import shutil +from typing import cast + +import cdsapi +import networkx as nx +import osmnx as ox +import pandas as pd +import requests +import xarray as xr +import yaml +from geopy.geocoders import Nominatim + +# Some minor comment (to remove) + +def get_country(x: float, + y: float) -> dict[int, str]: + """Identify which country a point (x,y) is in. + + Return the two and three letter ISO code for coordinates x (longitude) + and y (latitude) + + Args: + x (float): Longitude. + y (float): Latitude. + + Returns: + dict: A dictionary containing the two and three letter ISO codes. + Example: {2: 'GB', 3: 'GBR'} + """ + # Get the directory of the current module + current_dir = os.path.dirname(os.path.abspath(__file__)) + + # TODO use 'load_yaml_from_defs' + # Create the path to iso_converter.yml + iso_path = os.path.join(current_dir, + "defs", + "iso_converter.yml") + + + # Initialize geolocator + geolocator = Nominatim(user_agent="get_iso") + + # Load ISO code mapping from YAML file + with open(iso_path, "r") as file: + data = yaml.safe_load(file) + + # Get country ISO code from coordinates + location = geolocator.reverse("{0}, {1}".format(y, x), exactly_one=True) + iso_country_code = location.raw.get("address", {}).get("country_code") + iso_country_code = iso_country_code.upper() + + # Return a dictionary with the two and three letter ISO codes + return {2: iso_country_code, 3: data.get(iso_country_code, '')} + +def download_buildings(file_address: str, + x: float, + y: float) -> int: + """Download buildings data based on coordinates and save to a file. + + Args: + file_address (str): File address to save the downloaded data. + x (float): Longitude. + y (float): Latitude. + + Returns: + status_code (int): Reponse status code + """ + # Get three letter ISO code + iso_country_code = get_country(x, y)[3] + + # Construct API URL + api_url = f"https://data.source.coop/vida/google-microsoft-open-buildings/geoparquet/by_country/country_iso={iso_country_code}/{iso_country_code}.parquet" + + # Download data + response = requests.get(api_url) + if response.status_code == 200: + # Save data to the specified file address + with open(file_address, "wb") as file: + file.write(response.content) + print(f"Data downloaded and saved to {file_address}") + else: + print(f"Error downloading data. Status code: {response.status_code}") + return response.status_code + +def download_street(bbox: tuple[float, float, float, float]) -> nx.MultiDiGraph: + """Get street network within a bounding box using OSMNX. + + [CREDIT: Taher Cheghini busn_estimator package] + + Args: + bbox (tuple[float, float, float, float]): Bounding box as tuple in form + of (west, south, east, north) at EPSG:4326. + + Returns: + nx.MultiDiGraph: Street network with type drive and + ``truncate_by_edge set`` to True. + """ + west, south, east, north = bbox + graph = ox.graph_from_bbox( + north, south, east, west, network_type="drive", truncate_by_edge=True + ) + return cast("nx.MultiDiGraph", graph) + +def download_river(bbox: tuple[float, float, float, float]) -> nx.MultiDiGraph: + """Get water network within a bounding box using OSMNX. + + Args: + bbox (tuple[float, float, float, float]): Bounding box as tuple in form + of (west, south, east, north) at EPSG:4326. + + Returns: + nx.MultiDiGraph: River network with type waterway and + ``truncate_by_edge set`` to True. + """ + west, south, east, north = bbox + graph = ox.graph_from_bbox( + north, + south, + east, + west, + truncate_by_edge=True, + custom_filter='["waterway"]') + + return cast("nx.MultiDiGraph", graph) + +def download_elevation(fid: str, + bbox: tuple[float, float, float, float], + api_key: str ='') -> int: + """Download NASADEM elevation data from OpenTopography API. + + Downloads elevation data in GeoTIFF format from OpenTopography API based on + the specified bounding box. + + Args: + fid (str): File path to save the downloaded elevation data. + bbox (tuple): Bounding box coordinates in the format + (minx, miny, maxx, maxy). + api_key (str, optional): Your OpenTopography API key. + Defaults to ''. + + Returns: + status_code (int): Reponse status code + + Raises: + requests.exceptions.RequestException: If there is an error in the API + request. + + Example: + ``` + bbox = (-120, 35, -118, 37) # Example bounding box coordinates + download_elevation('elevation_data.tif', + bbox, + api_key='your_actual_api_key') + ``` + + Note: + To obtain an API key, you need to sign up on the OpenTopography + website. + + """ + minx, miny, maxx, maxy = bbox + url = f'https://portal.opentopography.org/API/globaldem?demtype=NASADEM&south={miny}&north={maxy}&west={minx}&east={maxx}&outputFormat=GTiff&API_Key={api_key}' + + try: + r = requests.get(url, stream=True) + r.raise_for_status() + + with open(fid, 'wb') as rast_file: + shutil.copyfileobj(r.raw, rast_file) + + print('Elevation data downloaded successfully.') + + except requests.exceptions.RequestException as e: + print(f'Error downloading elevation data: {e}') + + return r.status_code + +def download_precipitation(bbox: tuple[float, float, float, float], + start_date: str = '2015-01-01', + end_date: str = '2015-01-05', + username: str = '', + api_key: str = '') -> pd.DataFrame: + """Download precipitation data within bbox from ERA5. + + Register for an account and API key at: https://cds.climate.copernicus.eu. + Produces hourly gridded data (31km grid) in CRS EPSG:4326. + More information at: https://github.com/ecmwf/cdsapi. + + Args: + bbox (tuple): Bounding box coordinates in the format + (minx, miny, maxx, maxy). + start_date (str, optional): Start date. Defaults to '2015-01-01'. + end_date (str, optional): End date. Defaults to '2015-01-05'. + username (str, optional): CDS api username. + Defaults to ''. + api_key (str, optional): CDS api key. Defaults to ''. + + Returns: + df (DataFrame): DataFrame containing downloaded data. + """ + # Notes for docstring: + # looks like api will give nearest point if not in bbox + + # Define the request parameters + request = { + 'product_type': 'reanalysis', + 'format': 'netcdf', + 'variable': 'total_precipitation', + 'date' : '/'.join([start_date,end_date]), + 'time': '/'.join([f'{str(x).zfill(2)}:00' for x in range(24)]), + 'area': bbox, + } + + c = cdsapi.Client(url='https://cds.climate.copernicus.eu/api/v2', + key='{0}:{1}'.format(username, api_key)) + # Get data + c.retrieve('reanalysis-era5-single-levels', + request, + 'download.nc' + ) + + with xr.open_dataset('download.nc') as data: + # Convert the xarray Dataset to a pandas DataFrame + df = data.to_dataframe() + df['precipitation'] *= 1000 + df['unit'] = 'mm/hr' + + + # Delete nc file + os.remove('download.nc') + + return df \ No newline at end of file diff --git a/swmmanywhere/utils_geo.py b/swmmanywhere/utils_geo.py new file mode 100644 index 00000000..6749f4c7 --- /dev/null +++ b/swmmanywhere/utils_geo.py @@ -0,0 +1,335 @@ +# -*- coding: utf-8 -*- +"""Created 2024-01-20. + +A module containing functions to perform a variety of geospatial operations, +such as reprojecting coordinates and handling raster data. + +@author: Barnaby Dobson +""" +from functools import lru_cache +from typing import Optional + +import geopandas as gpd +import networkx as nx +import numpy as np +import pandas as pd +import pyproj +import rasterio as rst +import rioxarray +from rasterio import features +from scipy.interpolate import RegularGridInterpolator +from shapely import geometry as sgeom +from shapely.strtree import STRtree + +TransformerFromCRS = lru_cache(pyproj.transformer.Transformer.from_crs) + + +def get_utm_epsg(x: float, + y: float, + crs: str | int | pyproj.CRS = 'EPSG:4326', + datum_name: str = "WGS 84"): + """Get the UTM CRS code for a given coordinate. + + Note, this function is taken from GeoPandas and modified to use + for getting the UTM CRS code for a given coordinate. + + Args: + x (float): Longitude in crs + y (float): Latitude in crs + crs (str | int | pyproj.CRS, optional): The CRS of the input + coordinates. Defaults to 'EPSG:4326'. + datum_name (str, optional): The datum name to use for the UTM CRS + + Returns: + str: Formatted EPSG code for the UTM zone. + + Example: + >>> get_utm_epsg(-0.1276, 51.5074) + 'EPSG:32630' + """ + if not isinstance(x, float) or not isinstance(y, float): + raise TypeError("x and y must be floats") + + try: + crs = pyproj.CRS(crs) + except pyproj.exceptions.CRSError: + raise ValueError("Invalid CRS") + + # ensure using geographic coordinates + if pyproj.CRS(crs).is_geographic: + lon = x + lat = y + else: + transformer = TransformerFromCRS(crs, "EPSG:4326", always_xy=True) + lon, lat = transformer.transform(x, y) + utm_crs_list = pyproj.database.query_utm_crs_info( + datum_name=datum_name, + area_of_interest=pyproj.aoi.AreaOfInterest( + west_lon_degree=lon, + south_lat_degree=lat, + east_lon_degree=lon, + north_lat_degree=lat, + ), + ) + return f"{utm_crs_list[0].auth_name}:{utm_crs_list[0].code}" + + +def interp_with_nans(xy: tuple[float,float], + interp: RegularGridInterpolator, + grid: np.ndarray, + values: list[float]) -> float: + """Wrap the interpolation function to handle NaNs. + + Picks the nearest non NaN grid point if the interpolated value is NaN, + otherwise returns the interpolated value. + + Args: + xy (tuple): Coordinate of interest + interp (RegularGridInterpolator): The interpolator object. + grid (np.ndarray): List of xy coordinates of the grid points. + values (list): The list of values at each point in the grid. + + Returns: + float: The interpolated value. + """ + # Call the interpolator + val = float(interp(xy)) + # If the value is NaN, we need to pick nearest non nan grid point + if np.isnan(val): + # Get the distances to all grid points + distances = np.linalg.norm(grid - xy, axis=1) + # Get the indices of the grid points sorted by distance + indices = np.argsort(distances) + # Iterate over the grid points in order of increasing distance + for index in indices: + # If the value at this grid point is not NaN, return it + if not np.isnan(values[index]): + return values[index] + else: + return val + + raise ValueError("No non NaN values found in grid.") + +def interpolate_points_on_raster(x: list[float], + y: list[float], + elevation_fid: str) -> list[float ]: + """Interpolate points on a raster. + + Args: + x (list): X coordinates. + y (list): Y coordinates. + elevation_fid (str): Filepath to elevation raster. + + Returns: + elevation (float): Elevation at point. + """ + with rst.open(elevation_fid) as src: + # Read the raster data + data = src.read(1).astype(float) # Assuming it's a single-band raster + data[data == src.nodata] = None + + # Get the raster's coordinates + x = np.linspace(src.bounds.left, src.bounds.right, src.width) + y = np.linspace(src.bounds.bottom, src.bounds.top, src.height) + + # Define grid + xx, yy = np.meshgrid(x, y) + grid = np.vstack([xx.ravel(), yy.ravel()]).T + values = data.ravel() + + # Define interpolator + interp = RegularGridInterpolator((y,x), + np.flipud(data), + method='linear', + bounds_error=False, + fill_value=None) + # Interpolate for x,y + return [interp_with_nans((y_, x_), interp, grid, values) for x_, y_ in zip(x,y)] + +def reproject_raster(target_crs: str, + fid: str, + new_fid: Optional[str] = None): + """Reproject a raster to a new CRS. + + Args: + target_crs (str): Target CRS in EPSG format (e.g., EPSG:32630). + fid (str): Filepath to the raster to reproject. + new_fid (str, optional): Filepath to save the reprojected raster. + Defaults to None, which will just use fid with '_reprojected'. + """ + # Open the raster + with rioxarray.open_rasterio(fid) as raster: + + # Reproject the raster + reprojected = raster.rio.reproject(target_crs) + + # Define the output filepath + if new_fid is None: + new_fid = fid.replace('.tif','_reprojected.tif') + + # Save the reprojected raster + reprojected.rio.to_raster(new_fid) + +def get_transformer(source_crs: str, + target_crs: str) -> pyproj.Transformer: + """Get a transformer object for reprojection. + + Args: + source_crs (str): Source CRS in EPSG format (e.g., EPSG:32630). + target_crs (str): Target CRS in EPSG format (e.g., EPSG:32630). + + Returns: + pyproj.Transformer: Transformer object for reprojection. + + Example: + >>> transformer = get_transformer('EPSG:4326', 'EPSG:32630') + >>> transformer.transform(-0.1276, 51.5074) + (699330.1106898375, 5710164.30300683) + """ + return pyproj.Transformer.from_crs(source_crs, + target_crs, + always_xy=True) + +def reproject_df(df: pd.DataFrame, + source_crs: str, + target_crs: str) -> pd.DataFrame: + """Reproject the coordinates in a DataFrame. + + Args: + df (pd.DataFrame): DataFrame with columns 'longitude' and 'latitude'. + source_crs (str): Source CRS in EPSG format (e.g., EPSG:4326). + target_crs (str): Target CRS in EPSG format (e.g., EPSG:32630). + """ + # Function to transform coordinates + pts = gpd.points_from_xy(df["longitude"], + df["latitude"], + crs=source_crs).to_crs(target_crs) + df = df.copy() + df['x'] = pts.x + df['y'] = pts.y + return df + +def reproject_graph(G: nx.Graph, + source_crs: str, + target_crs: str) -> nx.Graph: + """Reproject the coordinates in a graph. + + osmnx.projection.project_graph might be suitable if some other behaviour + needs to be captured, but it currently fails the tests so I will ignore for + now. + + Args: + G (nx.Graph): Graph with nodes containing 'x' and 'y' properties. + source_crs (str): Source CRS in EPSG format (e.g., EPSG:4326). + target_crs (str): Target CRS in EPSG format (e.g., EPSG:32630). + + Returns: + nx.Graph: Graph with nodes containing 'x' and 'y' properties. + """ + # Create a PyProj transformer for CRS conversion + transformer = get_transformer(source_crs, target_crs) + + # Create a new graph with the converted nodes and edges + G_new = G.copy() + + # Convert and add nodes with 'x', 'y' properties + for node, data in G_new.nodes(data=True): + x, y = transformer.transform(data['x'], data['y']) + data['x'] = x + data['y'] = y + + # Convert and add edges with 'geometry' property + for u, v, data in G_new.edges(data=True): + if 'geometry' in data.keys(): + data['geometry'] = sgeom.LineString(transformer.transform(x, y) + for x, y in data['geometry'].coords) + else: + data['geometry'] = sgeom.LineString([[G_new.nodes[u]['x'], + G_new.nodes[u]['y']], + [G_new.nodes[v]['x'], + G_new.nodes[v]['y']]]) + + return G_new + + +def nearest_node_buffer(points1: dict[str, sgeom.Point], + points2: dict[str, sgeom.Point], + threshold: float) -> dict: + """Find the nearest node within a given buffer threshold. + + Args: + points1 (dict): A dictionary where keys are labels and values are + Shapely points geometries. + points2 (dict): A dictionary where keys are labels and values are + Shapely points geometries. + threshold (float): The maximum distance for a node to be considered + 'nearest'. If no nodes are within this distance, the node is not + included in the output. + + Returns: + dict: A dictionary where keys are labels from points1 and values are + labels from points2 of the nearest nodes within the threshold. + """ + # Convert the keys of points2 to a list + labels2 = list(points2.keys()) + + # Create a spatial index + tree = STRtree(list(points2.values())) + + # Initialize an empty dictionary to store the matching nodes + matching = {} + + # Iterate over points1 + for key, geom in points1.items(): + # Find the nearest node in the spatial index to the current geometry + nearest = tree.nearest(geom) + nearest_geom = points2[labels2[nearest]] + + # If the nearest node is within the threshold, add it to the + # matching dictionary + if geom.buffer(threshold).intersects(nearest_geom): + matching[key] = labels2[nearest] + + # Return the matching dictionary + return matching + +def burn_shape_in_raster(geoms: list[sgeom.LineString], + depth: float, + raster_fid: str, + new_raster_fid: str): + """Burn a depth into a raster along a list of shapely geometries. + + Args: + geoms (list): List of Shapely geometries. + depth (float): Depth to carve. + raster_fid (str): Filepath to input raster. + new_raster_fid (str): Filepath to save the carved raster. + """ + with rst.open(raster_fid) as src: + # read data + data = src.read(1) + data = data.astype(float) + data_mask = data != src.nodata + bool_mask = np.zeros(data.shape, dtype=bool) + for geom in geoms: + # Create a mask for the line + mask = features.geometry_mask([sgeom.mapping(geom)], + out_shape=src.shape, + transform=src.transform, + invert=True) + # modify masked data + bool_mask[mask] = True # Adjust this multiplier as needed + #modify data + data[bool_mask & data_mask] -= depth + # Create a new GeoTIFF with modified values + with rst.open(new_raster_fid, + 'w', + driver='GTiff', + height=src.height, + width=src.width, + count=1, + dtype=data.dtype, + crs=src.crs, + transform=src.transform, + nodata = src.nodata) as dest: + dest.write(data, 1) \ No newline at end of file From dc6040a1feff11d13fd7d0a9ee1ae49bbe2a818a Mon Sep 17 00:00:00 2001 From: Dobson Date: Fri, 26 Jan 2024 10:11:51 +0000 Subject: [PATCH 4/5] Undo unit conversion - that doesn't go here --- swmmanywhere/prepare_data.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/swmmanywhere/prepare_data.py b/swmmanywhere/prepare_data.py index 02f99e73..951e5f38 100644 --- a/swmmanywhere/prepare_data.py +++ b/swmmanywhere/prepare_data.py @@ -229,8 +229,7 @@ def download_precipitation(bbox: tuple[float, float, float, float], with xr.open_dataset('download.nc') as data: # Convert the xarray Dataset to a pandas DataFrame df = data.to_dataframe() - df['precipitation'] *= 1000 - df['unit'] = 'mm/hr' + df['unit'] = 'm/hr' # Delete nc file From ca88a1c32f8b4a3a1c55a7ee2fae21a0fdb4e231 Mon Sep 17 00:00:00 2001 From: Dobson Date: Fri, 26 Jan 2024 10:26:56 +0000 Subject: [PATCH 5/5] Update utils_geo to geospatial_utilities --- swmmanywhere/{utils_geo.py => geospatial_utilities.py} | 0 tests/{test_utils_geo.py => test_geospatial_utilities.py} | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename swmmanywhere/{utils_geo.py => geospatial_utilities.py} (100%) rename tests/{test_utils_geo.py => test_geospatial_utilities.py} (99%) diff --git a/swmmanywhere/utils_geo.py b/swmmanywhere/geospatial_utilities.py similarity index 100% rename from swmmanywhere/utils_geo.py rename to swmmanywhere/geospatial_utilities.py diff --git a/tests/test_utils_geo.py b/tests/test_geospatial_utilities.py similarity index 99% rename from tests/test_utils_geo.py rename to tests/test_geospatial_utilities.py index d01f825e..fcca0cdc 100644 --- a/tests/test_utils_geo.py +++ b/tests/test_geospatial_utilities.py @@ -13,7 +13,7 @@ from scipy.interpolate import RegularGridInterpolator from shapely import geometry as sgeom -from swmmanywhere import utils_geo as go +from swmmanywhere import geospatial_utilities as go def test_interp_with_nans():