Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

geopandas: Correctly handle columns with integer values bigger than the largest 32-bit integer #2841

Merged
merged 7 commits into from
Dec 4, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions pygmt/helpers/tempfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,9 @@ def tempfile_from_geojson(geojson):
os.remove(tmpfile.name) # ensure file is deleted first
ogrgmt_kwargs = {"filename": tmpfile.name, "driver": "OGR_GMT", "mode": "w"}
try:
# Map int/int64 to int32 since OGR_GMT only supports 32-bit integer
# OGR_GMT only supports 32-bit integers. We need to map int/int64
# types to int32/float type depending on if the column has an
# 32-bit integer overflow issue. Related issues:
seisman marked this conversation as resolved.
Show resolved Hide resolved
# https://github.com/geopandas/geopandas/issues/967#issuecomment-842877704
# https://github.com/GenericMappingTools/pygmt/issues/2497
if geojson.index.name is None:
Expand All @@ -140,7 +142,8 @@ def tempfile_from_geojson(geojson):
schema = gpd.io.file.infer_schema(geojson)
for col, dtype in schema["properties"].items():
if dtype in ("int", "int64"):
schema["properties"][col] = "int32"
overflow = geojson[col].abs().max() > 2**31 - 1
schema["properties"][col] = "float" if overflow else "int32"
ogrgmt_kwargs["schema"] = schema
# Using geopandas.to_file to directly export to OGR_GMT format
geojson.to_file(**ogrgmt_kwargs)
Expand Down
72 changes: 57 additions & 15 deletions pygmt/tests/test_geopandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,27 @@ def fixture_gdf():
index=["multipolygon", "polygon", "linestring"],
geometry=[multipolygon, polygon, linestring],
)
return gdf


@pytest.fixture(scope="module", name="gdf_ridge")
def fixture_gdf_ridge():
"""
Read a @RidgeTest.shp shapefile with geopandas.GeoDataFrame and reproject
the geometry.
seisman marked this conversation as resolved.
Show resolved Hide resolved
"""
# Read shapefile in geopandas.GeoDataFrame
seisman marked this conversation as resolved.
Show resolved Hide resolved
shapefile = which(
fname="@RidgeTest.shp @RidgeTest.shx @RidgeTest.dbf @RidgeTest.prj",
download="c",
)
gdf = gpd.read_file(shapefile[0])
# Reproject geometry
seisman marked this conversation as resolved.
Show resolved Hide resolved
gdf["geometry"] = (
gdf.to_crs(crs="EPSG:3857")
.buffer(distance=100000)
.to_crs(crs="OGC:CRS84") # convert to lon/lat to prevent @null in PROJ CRS
)
return gdf


Expand Down Expand Up @@ -144,40 +164,62 @@ def test_geopandas_plot3d_non_default_circle():
],
)
@pytest.mark.mpl_image_compare(filename="test_geopandas_plot_int_dtypes.png")
def test_geopandas_plot_int_dtypes(dtype):
def test_geopandas_plot_int_dtypes(gdf_ridge, dtype):
"""
Check that plotting a geopandas GeoDataFrame with integer columns works,
including int32 and int64 (non-nullable), Int32 and Int64 (nullable).
seisman marked this conversation as resolved.
Show resolved Hide resolved

This is a regression test for
https://github.com/GenericMappingTools/pygmt/issues/2497
"""
# Read shapefile in geopandas.GeoDataFrame
shapefile = which(
fname="@RidgeTest.shp @RidgeTest.shx @RidgeTest.dbf @RidgeTest.prj",
download="c",
)
gdf = gpd.read_file(shapefile[0])
# Convert NPOINTS column to integer type
gdf_ridge["NPOINTS"] = gdf_ridge.NPOINTS.astype(dtype=dtype)

# Reproject geometry and change dtype of NPOINTS column
gdf["geometry"] = (
gdf.to_crs(crs="EPSG:3857")
.buffer(distance=100000)
.to_crs(crs="OGC:CRS84") # convert to lon/lat to prevent @null in PROJ CRS
# Plot figure with three polygons colored based on NPOINTS value
fig = Figure()
makecpt(cmap="lisbon", series=[10, 60, 10], continuous=True)
fig.plot(
data=gdf_ridge,
frame=True,
pen="1p,black",
close=True,
fill="+z",
cmap=True,
aspatial="Z=NPOINTS",
)
seisman marked this conversation as resolved.
Show resolved Hide resolved
gdf["NPOINTS"] = gdf.NPOINTS.astype(dtype=dtype)
fig.colorbar()
return fig


@pytest.mark.mpl_image_compare(filename="test_geopandas_plot_int_dtypes.png")
def test_geopandas_plot_int64_as_float(gdf_ridge):
"""
Check that big 64-bit integers are correctly mapped to float type in
geopandas.GeoDataFrame object.
"""
factor = 2**32
# Convert NPOINTS column to int64 type and make big integers
gdf_ridge["NPOINTS"] = gdf_ridge.NPOINTS.astype(dtype="int64")
gdf_ridge["NPOINTS"] *= factor

# Make sure the column is bigger than the largest 32-bit integer
assert gdf_ridge["NPOINTS"].abs().max() > 2**31 - 1

# Plot figure with three polygons colored based on NPOINTS value
fig = Figure()
makecpt(cmap="lisbon", series=[10, 60, 10], continuous=True)
makecpt(
cmap="lisbon", series=[10 * factor, 60 * factor, 10 * factor], continuous=True
)
fig.plot(
data=gdf,
data=gdf_ridge,
frame=True,
pen="1p,black",
close=True,
fill="+z",
seisman marked this conversation as resolved.
Show resolved Hide resolved
cmap=True,
aspatial="Z=NPOINTS",
)
# Generate a CPT for 10-60 range and plot to reuse the baseline image
makecpt(cmap="lisbon", series=[10, 60, 10], continuous=True)
fig.colorbar()
return fig