-
Notifications
You must be signed in to change notification settings - Fork 207
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix geometry issues #532
Fix geometry issues #532
Changes from all commits
d489e77
59a0e83
9ddb8bf
b15717a
8dd7ed6
5438808
a5e5529
94c5df8
4b35011
21d167b
5c94a02
6b8f98f
1bbbf08
8841088
d42c6eb
05e954f
23f5e7c
2f41dcf
1357965
40c4a3a
2f8fd89
6ccb5ac
97d848b
cb3b14e
ca6d67c
602b62d
075ff01
75fb552
019a49b
152a550
8519d6f
0333cc2
1bf0048
05345ef
076198d
b7026e1
bc2436d
04a4fb1
3606638
aced6fa
1c3b962
d6a49d4
d9f4e33
7d457e9
4603e37
daec9e8
d69e64a
b414a98
21aba6b
a4da003
3b25277
f314100
495477d
ee6f07a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ | |
import xarray as xr | ||
from _helpers import ( | ||
configure_logging, | ||
country_name_2_two_digits, | ||
sets_path_to_root, | ||
three_2_two_digits_country, | ||
two_2_three_digits_country, | ||
|
@@ -80,6 +81,49 @@ def download_GADM(country_code, update=False, out_logging=False): | |
return GADM_inputfile_gpkg, GADM_filename | ||
|
||
|
||
def restore_country_code_by_name(row, country_code): | ||
if row["GID_0"] != country_code: | ||
return country_name_2_two_digits(row["COUNTRY"]) | ||
else: | ||
return row["GID_0"] | ||
|
||
|
||
# file=file_gpkg, layer=layer_id, cc=country_code | ||
def build_gadm_df(file, layer, cc): | ||
# read gpkg file | ||
geodf = gpd.read_file(file, layer="ADM_ADM_" + str(layer)).to_crs(geo_crs) | ||
|
||
# convert country name representation of the main country (GID_0 column) | ||
geodf["GID_0"] = [three_2_two_digits_country(twoD_c) for twoD_c in geodf["GID_0"]] | ||
|
||
# GID_0 may have some exotic values, "COUNTRY" column may be used instead | ||
geodf["GID_0"] = geodf.apply( | ||
lambda x: restore_country_code_by_name(x, country_code=cc), axis=1 | ||
) | ||
|
||
# "not found" hardcoded according to country_converter conventions | ||
geodf.drop(geodf[geodf["GID_0"] == "not found"].index, inplace=True) | ||
|
||
# create a subindex column that is useful | ||
# in the GADM processing of sub-national zones | ||
geodf["GADM_ID"] = geodf[f"GID_{layer}"] | ||
|
||
if layer >= 1: | ||
available_gadm_codes = geodf["GADM_ID"].unique() | ||
code_three_digits = two_2_three_digits_country(cc) | ||
|
||
# normally the GADM code starts the ISO3 | ||
non_std_gadm_codes = [ | ||
w for w in available_gadm_codes if not w.startswith(code_three_digits) | ||
] | ||
|
||
if len(non_std_gadm_codes) > 0: | ||
df_filtered = geodf[geodf["GADM_ID"].isin(non_std_gadm_codes)] | ||
df_filtered.to_csv("non_standard_gadm_" + cc + "_raw.csv", index=False) | ||
|
||
return geodf | ||
|
||
|
||
def get_GADM_layer(country_list, layer_id, geo_crs, update=False, outlogging=False): | ||
""" | ||
Function to retrive a specific layer id of a geopackage for a selection of countries | ||
|
@@ -109,19 +153,7 @@ def get_GADM_layer(country_list, layer_id, geo_crs, update=False, outlogging=Fal | |
# when layer id is negative or larger than the number of layers, select the last layer | ||
layer_id = len(list_layers) - 1 | ||
|
||
# read gpkg file | ||
geodf_temp = gpd.read_file(file_gpkg, layer="ADM_ADM_" + str(layer_id)).to_crs( | ||
geo_crs | ||
) | ||
|
||
# convert country name representation of the main country (GID_0 column) | ||
geodf_temp["GID_0"] = [ | ||
three_2_two_digits_country(twoD_c) for twoD_c in geodf_temp["GID_0"] | ||
] | ||
|
||
# create a subindex column that is useful | ||
# in the GADM processing of sub-national zones | ||
geodf_temp["GADM_ID"] = geodf_temp[f"GID_{layer_id}"] | ||
geodf_temp = build_gadm_df(file=file_gpkg, layer=layer_id, cc=country_code) | ||
|
||
# append geodataframes | ||
geodf_list.append(geodf_temp) | ||
|
@@ -267,7 +299,9 @@ def eez(countries, geo_crs, country_shapes, EEZ_gpkg, out_logging=False, distanc | |
) | ||
|
||
ret_df = ret_df.apply(lambda x: make_valid(x)) | ||
country_shapes = country_shapes.apply(lambda x: make_valid(x)) | ||
|
||
# country_shapes may consist of different geometries which need to be united | ||
country_shapes = country_shapes.apply(lambda x: make_valid(x)).unary_union | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By doing this don't we merge all country shapes into one? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes! That is applied under a loop, country-by-country. Have tested on a two-countries list (["PK", "KG"]) and it seems to work properly |
||
|
||
country_shapes_with_buffer = country_shapes.buffer(distance) | ||
ret_df_new = ret_df.difference(country_shapes_with_buffer) | ||
|
@@ -783,8 +817,19 @@ def gadm( | |
# set index and simplify polygons | ||
df_gadm.set_index("GADM_ID", inplace=True) | ||
df_gadm["geometry"] = df_gadm["geometry"].map(_simplify_polys) | ||
# df_gadm = df_gadm[df_gadm.geometry.is_valid & ~df_gadm.geometry.is_empty] | ||
df_gadm.geometry = df_gadm.geometry.apply( | ||
lambda r: make_valid(r) if not r.is_valid else r | ||
) | ||
df_gadm = df_gadm[df_gadm.geometry.is_valid & ~df_gadm.geometry.is_empty] | ||
|
||
# gadm_shapes.geometry = gadm_shapes.apply( | ||
# lambda row: make_valid(row.geometry) | ||
# if not row.geometry.is_valid | ||
# else row.geometry, | ||
# axis=1, | ||
# ) | ||
|
||
return df_gadm | ||
|
||
|
||
|
@@ -813,7 +858,13 @@ def gadm( | |
|
||
country_shapes = countries(countries_list, geo_crs, update, out_logging) | ||
|
||
country_shapes.reset_index().to_file(snakemake.output.country_shapes) | ||
# there may be "holes" in the countries geometry which cause troubles along the workflow | ||
# e.g. that is the case for enclaves like Dahagram–Angarpota for IN/BD | ||
country_shapes_valid = ( | ||
country_shapes.apply(lambda x: make_valid(x) if not x.is_valid else x) | ||
.reset_index() | ||
.to_file(snakemake.output.country_shapes) | ||
) | ||
|
||
offshore_shapes = eez( | ||
countries_list, geo_crs, country_shapes, EEZ_gpkg, out_logging | ||
|
@@ -837,4 +888,10 @@ def gadm( | |
year, | ||
nprocesses=nprocesses, | ||
) | ||
# gadm_shapes.geometry = gadm_shapes.apply( | ||
# lambda row: make_valid(row.geometry) | ||
# if not row.geometry.is_valid | ||
# else row.geometry, | ||
# axis=1, | ||
# ) | ||
save_to_geojson(gadm_shapes, out.gadm_shapes) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Very interesting work with sets!
Maybe we could do the check if set(country_list) != set(bus_country_list):
This may be more general and if that does not apply, then within the if we calculate the difference as you propose.
in such a case; it may be better to convert no_data_countries as a list; maybe something like
list(set(country_list).difference(set(bus_country_list)))
We may use symmetric_difference instead of simple difference to generalize.
Most likely the difference would be negligible but by checking as proposed we miss whether some countries are included into bus_country_list and they are missing in country_list.
With simmetric_difference we keep this feature
Opinions?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you :)
A very beautiful idea regarding
symmetric_difference()
and I'll definitely keep in mind this approach. But not sure if we may generalise this particular case. As far as I understand, we have to treat a situation when there are some unexpectedcountry
entries in the buses dataframe in a different way as we do the countries without busesActually, if there are some unexpected countries in the buses dataframe, I'd rather raise an error as that is likely that something went wrong during OSM data cleaning (e.g. the data folders need some clean-up). Probably, it should be added to #528
Do you agree?