diff --git a/doc/api/index.rst b/doc/api/index.rst index 959f9ba3d50..b488a2a4874 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -138,6 +138,9 @@ Operations on raster data grdfill grdfilter grdgradient + grdhisteq + grdhisteq.equalize_grid + grdhisteq.compute_bins grdlandmask grdproject grdsample diff --git a/pygmt/__init__.py b/pygmt/__init__.py index 87e464de8b5..db33925edf2 100644 --- a/pygmt/__init__.py +++ b/pygmt/__init__.py @@ -40,6 +40,7 @@ grdfill, grdfilter, grdgradient, + grdhisteq, grdinfo, grdlandmask, grdproject, diff --git a/pygmt/src/__init__.py b/pygmt/src/__init__.py index 0acb118bf43..6c1a29b91de 100644 --- a/pygmt/src/__init__.py +++ b/pygmt/src/__init__.py @@ -17,6 +17,7 @@ from pygmt.src.grdfill import grdfill from pygmt.src.grdfilter import grdfilter from pygmt.src.grdgradient import grdgradient +from pygmt.src.grdhisteq import grdhisteq from pygmt.src.grdimage import grdimage from pygmt.src.grdinfo import grdinfo from pygmt.src.grdlandmask import grdlandmask diff --git a/pygmt/src/grdhisteq.py b/pygmt/src/grdhisteq.py new file mode 100644 index 00000000000..61de7503d4e --- /dev/null +++ b/pygmt/src/grdhisteq.py @@ -0,0 +1,349 @@ +""" +grdhisteq - Perform histogram equalization for a grid. +""" +import warnings + +import numpy as np +import pandas as pd +from pygmt.clib import Session +from pygmt.exceptions import GMTInvalidInput +from pygmt.helpers import ( + GMTTempFile, + build_arg_string, + fmt_docstring, + kwargs_to_strings, + use_alias, +) +from pygmt.io import load_dataarray + +__doctest_skip__ = ["grdhisteq.*"] + + +class grdhisteq: # pylint: disable=invalid-name + r""" + Perform histogram equalization for a grid. + + Two common use cases of :meth:`pygmt.grdhisteq` are to find data values + that divide a grid into patches of equal area + (:meth:`pygmt.grdhisteq.compute_bins`) or to write a grid with + statistics based on some kind of cumulative distribution function + (:meth:`pygmt.grdhisteq.equalize_grid`). + + Histogram equalization provides a way to highlight data that has most + values clustered in a small portion of the dynamic range, such as a + grid of flat topography with a mountain in the middle. Ordinary gray + shading of this grid (using :meth:`pygmt.Figure.grdimage` or + :meth:`pygmt.Figure.grdview`) with a linear mapping from topography to + graytone will result in most of the image being very dark gray, with the + mountain being almost white. :meth:`pygmt.grdhisteq.compute_bins` can + provide a list of data values that divide the data range into divisions + which have an equal area in the image [Default is 16 if ``divisions`` is + not set]. The :class:`pandas.DataFrame` or ASCII file output can be used to + make a colormap with :meth:`pygmt.makecpt` and an image with + :meth:`pygmt.Figure.grdimage` that has all levels of gray occuring + equally. + + :meth:`pygmt.grdhisteq.equalize_grid` provides a way to write a grid with + statistics based on a cumulative distribution function. In this + application, the ``outgrid`` has relative highs and lows in the same + (x,y) locations as the ``grid``, but the values are changed to reflect + their place in the cumulative distribution. + """ + + @staticmethod + @fmt_docstring + @use_alias( + C="divisions", + D="outfile", + G="outgrid", + R="region", + N="gaussian", + Q="quadratic", + V="verbose", + h="header", + ) + @kwargs_to_strings(R="sequence") + def _grdhisteq(grid, output_type, **kwargs): + r""" + Perform histogram equalization for a grid. + + Must provide ``outfile`` or ``outgrid``. + + Full option list at :gmt-docs:`grdhisteq.html` + + {aliases} + + Parameters + ---------- + grid : str or xarray.DataArray + The file name of the input grid or the grid loaded as a DataArray. + outgrid : str or bool or None + The name of the output netCDF file with extension .nc to store the + grid in. + outfile : str or bool or None + The name of the output ASCII file to store the results of the + histogram equalization in. + output_type: str + Determines the output type. Use "file", "xarray", "pandas", or + "numpy". + divisions : int + Set the number of divisions of the data range [Default is 16]. + + {R} + {V} + {h} + + Returns + ------- + ret: pandas.DataFrame or xarray.DataArray or None + Return type depends on whether the ``outgrid`` parameter is set: + + - xarray.DataArray if ``output_type`` is "xarray"" + - numpy.ndarray if ``output_type`` is "numpy" + - pandas.DataFrame if ``output_type`` is "pandas" + - None if ``output_type`` is "file" (output is stored in + ``outgrid`` or ``outfile``) + + See Also + ------- + :meth:`pygmt.grd2cpt` + """ + + with Session() as lib: + file_context = lib.virtualfile_from_data(check_kind="raster", data=grid) + with file_context as infile: + arg_str = " ".join([infile, build_arg_string(kwargs)]) + lib.call_module("grdhisteq", arg_str) + + if output_type == "file": + return None + if output_type == "xarray": + return load_dataarray(kwargs["G"]) + + result = pd.read_csv( + filepath_or_buffer=kwargs["D"], + sep="\t", + header=None, + names=["start", "stop", "bin_id"], + dtype={ + "start": np.float32, + "stop": np.float32, + "bin_id": np.uint32, + }, + ) + if output_type == "numpy": + return result.to_numpy() + + return result.set_index("bin_id") + + @staticmethod + @fmt_docstring + def equalize_grid( + grid, + *, + outgrid=True, + divisions=None, + region=None, + gaussian=None, + quadratic=None, + verbose=None, + ): + r""" + Perform histogram equalization for a grid. + + :meth:`pygmt.grdhisteq.equalize_grid` provides a way to write a grid + with statistics based on a cumulative distribution function. The + ``outgrid`` has relative highs and lows in the same (x,y) locations as + the ``grid``, but the values are changed to reflect their place in the + cumulative distribution. + + Full option list at :gmt-docs:`grdhisteq.html` + + Parameters + ---------- + grid : str or xarray.DataArray + The file name of the input grid or the grid loaded as a DataArray. + outgrid : str or bool or None + The name of the output netCDF file with extension .nc to store the + grid in. + divisions : int + Set the number of divisions of the data range. + gaussian : bool or int or float + *norm*. + Produce an output grid with standard normal scores using + ``gaussian=True`` or force the scores to fall in the ±\ *norm* + range. + quadratic: bool + Perform quadratic equalization [Default is linear]. + {R} + {V} + + Returns + ------- + ret: xarray.DataArray or None + Return type depends on the ``outgrid`` parameter: + + - xarray.DataArray if ``outgrid`` is True or None + - None if ``outgrid`` is a str (grid output is stored in + ``outgrid``) + + Example + ------- + >>> import pygmt + >>> # Load a grid of @earth_relief_30m data, with an x-range of 10 to + >>> # 30, and a y-range of 15 to 25 + >>> grid = pygmt.datasets.load_earth_relief( + ... resolution="30m", region=[10, 30, 15, 25] + ... ) + >>> # Create a new grid with a Gaussian data distribution + >>> grid = pygmt.grdhisteq.equalize_grid(grid=grid, gaussian=True) + + See Also + ------- + :meth:`pygmt.grd2cpt` + + Notes + ----- + This method does a weighted histogram equalization for geographic + grids to account for node area varying with latitude. + """ + # Return an xarray.DataArray if ``outgrid`` is not set + with GMTTempFile(suffix=".nc") as tmpfile: + if isinstance(outgrid, str): + output_type = "file" + else: + output_type = "xarray" + outgrid = tmpfile.name + return grdhisteq._grdhisteq( + grid=grid, + output_type=output_type, + outgrid=outgrid, + divisions=divisions, + region=region, + gaussian=gaussian, + quadratic=quadratic, + verbose=verbose, + ) + + @staticmethod + @fmt_docstring + def compute_bins( + grid, + *, + output_type="pandas", + outfile=None, + divisions=None, + quadratic=None, + verbose=None, + region=None, + header=None, + ): + r""" + Perform histogram equalization for a grid. + + Histogram equalization provides a way to highlight data that has most + values clustered in a small portion of the dynamic range, such as a + grid of flat topography with a mountain in the middle. Ordinary gray + shading of this grid (using :meth:`pygmt.Figure.grdimage` or + :meth:`pygmt.Figure.grdview`) with a linear mapping from topography to + graytone will result in most of the image being very dark gray, with + the mountain being almost white. :meth:`pygmt.grdhisteq.compute_bins` + can provide a list of data values that divide the data range into + divisions which have an equal area in the image [Default is 16 if + ``divisions`` is not set]. The :class:`pandas.DataFrame` or ASCII file + output can be used to make a colormap with :meth:`pygmt.makecpt` and an + image with :meth:`pygmt.Figure.grdimage` that has all levels of gray + occuring equally. + + Full option list at :gmt-docs:`grdhisteq.html` + + Parameters + ---------- + grid : str or xarray.DataArray + The file name of the input grid or the grid loaded as a DataArray. + outfile : str or bool or None + The name of the output ASCII file to store the results of the + histogram equalization in. + output_type : str + Determine the format the xyz data will be returned in [Default is + ``pandas``]: + + - ``numpy`` - :class:`numpy.ndarray` + - ``pandas``- :class:`pandas.DataFrame` + - ``file`` - ASCII file (requires ``outfile``) + divisions : int + Set the number of divisions of the data range. + quadratic : bool + Perform quadratic equalization [Default is linear]. + {R} + {V} + {h} + + Returns + ------- + ret: pandas.DataFrame or None + Return type depends on the ``outfile`` parameter: + + - pandas.DataFrame if ``outfile`` is True or None + - None if ``outfile`` is a str (file output is stored in + ``outfile``) + + Example + ------- + >>> import pygmt + >>> # Load a grid of @earth_relief_30m data, with an x-range of 10 to + >>> # 30, and a y-range of 15 to 25 + >>> grid = pygmt.datasets.load_earth_relief( + ... resolution="30m", region=[10, 30, 15, 25] + ... ) + >>> # Find elevation intervals that splits the data range into 5 + >>> # divisions, each of which have an equal area in the original grid. + >>> bins = pygmt.grdhisteq.compute_bins(grid=grid, divisions=5) + >>> print(bins) + start stop + bin_id + 0 179.0 397.5 + 1 397.5 475.5 + 2 475.5 573.5 + 3 573.5 710.5 + 4 710.5 2103.0 + + See Also + ------- + :meth:`pygmt.grd2cpt` + + Notes + ----- + This method does a weighted histogram equalization for geographic + grids to account for node area varying with latitude. + """ + # Return a pandas.DataFrame if ``outfile`` is not set + if output_type not in ["numpy", "pandas", "file"]: + raise GMTInvalidInput( + "Must specify 'output_type' either as 'numpy', 'pandas' or 'file'." + ) + + if header is not None and output_type != "file": + raise GMTInvalidInput("'header' is only allowed with output_type='file'.") + + if isinstance(outfile, str) and output_type != "file": + msg = ( + f"Changing 'output_type' from '{output_type}' to 'file' " + "since 'outfile' parameter is set. Please use output_type='file' " + "to silence this warning." + ) + warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2) + output_type = "file" + with GMTTempFile(suffix=".txt") as tmpfile: + if output_type != "file": + outfile = tmpfile.name + return grdhisteq._grdhisteq( + grid, + output_type=output_type, + outfile=outfile, + divisions=divisions, + quadratic=quadratic, + verbose=verbose, + region=region, + header=header, + ) diff --git a/pygmt/tests/test_grdhisteq.py b/pygmt/tests/test_grdhisteq.py new file mode 100644 index 00000000000..9a4fd004b3d --- /dev/null +++ b/pygmt/tests/test_grdhisteq.py @@ -0,0 +1,137 @@ +""" +Tests for grdhisteq. +""" +import os + +import numpy as np +import pandas as pd +import pytest +import xarray as xr +from pygmt import grdhisteq, load_dataarray +from pygmt.exceptions import GMTInvalidInput +from pygmt.helpers import GMTTempFile +from pygmt.helpers.testing import load_static_earth_relief + + +@pytest.fixture(scope="module", name="grid") +def fixture_grid(): + """ + Load the grid data from the sample earth_relief file. + """ + return load_static_earth_relief() + + +@pytest.fixture(scope="module", name="region") +def fixture_region(): + """ + Load the grid data from the sample earth_relief file. + """ + return [-52, -48, -22, -18] + + +@pytest.fixture(scope="module", name="expected_grid") +def fixture_grid_result(): + """ + Load the expected grdhisteq grid result. + """ + return xr.DataArray( + data=[[0, 0, 0, 1], [0, 0, 0, 1], [0, 0, 1, 1], [1, 1, 1, 1]], + coords=dict(lon=[-51.5, -50.5, -49.5, -48.5], lat=[-21.5, -20.5, -19.5, -18.5]), + dims=["lat", "lon"], + ) + + +@pytest.fixture(scope="module", name="expected_df") +def fixture_df_result(): + """ + Load the expected grdhisteq table result. + """ + return pd.DataFrame( + data=np.array([[345.5, 519.5, 0], [519.5, 726.5, 1]]), + columns=["start", "stop", "bin_id"], + ).astype({"start": np.float32, "stop": np.float32, "bin_id": np.uint32}) + + +def test_equalize_grid_outgrid_file(grid, expected_grid, region): + """ + Test grdhisteq.equalize_grid with a set outgrid. + """ + with GMTTempFile(suffix=".nc") as tmpfile: + result = grdhisteq.equalize_grid( + grid=grid, divisions=2, region=region, outgrid=tmpfile.name + ) + assert result is None # return value is None + assert os.path.exists(path=tmpfile.name) # check that outgrid exists + temp_grid = load_dataarray(tmpfile.name) + xr.testing.assert_allclose(a=temp_grid, b=expected_grid) + + +@pytest.mark.parametrize("outgrid", [True, None]) +def test_equalize_grid_outgrid(grid, outgrid, expected_grid, region): + """ + Test grdhisteq.equalize_grid with ``outgrid=True`` and ``outgrid=None``. + """ + temp_grid = grdhisteq.equalize_grid( + grid=grid, divisions=2, region=region, outgrid=outgrid + ) + assert temp_grid.gmt.gtype == 1 # Geographic grid + assert temp_grid.gmt.registration == 1 # Pixel registration + xr.testing.assert_allclose(a=temp_grid, b=expected_grid) + + +def test_compute_bins_no_outfile(grid, expected_df, region): + """ + Test grdhisteq.compute_bins with no ``outfile``. + """ + temp_df = grdhisteq.compute_bins(grid=grid, divisions=2, region=region) + assert isinstance(temp_df, pd.DataFrame) + pd.testing.assert_frame_equal(left=temp_df, right=expected_df.set_index("bin_id")) + + +def test_compute_bins_ndarray_output(grid, expected_df, region): + """ + Test grdhisteq.compute_bins with "numpy" output type. + """ + temp_array = grdhisteq.compute_bins( + grid=grid, divisions=2, region=region, output_type="numpy" + ) + assert isinstance(temp_array, np.ndarray) + np.testing.assert_allclose(temp_array, expected_df.to_numpy()) + + +def test_compute_bins_outfile(grid, expected_df, region): + """ + Test grdhisteq.compute_bins with ``outfile``. + """ + with GMTTempFile(suffix=".txt") as tmpfile: + with pytest.warns(RuntimeWarning) as record: + result = grdhisteq.compute_bins( + grid=grid, + divisions=2, + region=region, + outfile=tmpfile.name, + ) + assert len(record) == 1 # check that only one warning was raised + assert result is None # return value is None + assert os.path.exists(path=tmpfile.name) + temp_df = pd.read_csv( + filepath_or_buffer=tmpfile.name, + sep="\t", + header=None, + names=["start", "stop", "bin_id"], + dtype={"start": np.float32, "stop": np.float32, "bin_id": np.uint32}, + index_col="bin_id", + ) + pd.testing.assert_frame_equal( + left=temp_df, right=expected_df.set_index("bin_id") + ) + + +def test_compute_bins_invalid_format(grid): + """ + Test that compute_bins fails with incorrect format. + """ + with pytest.raises(GMTInvalidInput): + grdhisteq.compute_bins(grid=grid, output_type=1) + with pytest.raises(GMTInvalidInput): + grdhisteq.compute_bins(grid=grid, output_type="pandas", header="o+c")