Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

POC: pygmt.binstats: Make the 'statistic' parameter more Pythonic #3012

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 80 additions & 26 deletions pygmt/src/binstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,21 @@
binstats - Bin spatial data and determine statistics per bin
"""

from typing import Literal

import xarray as xr
from pygmt.clib import Session
from pygmt.helpers import build_arg_list, fmt_docstring, kwargs_to_strings, use_alias
from pygmt.exceptions import GMTInvalidInput
from pygmt.helpers import (
build_arg_list,
fmt_docstring,
kwargs_to_strings,
use_alias,
)


@fmt_docstring
@use_alias(
C="statistic",
E="empty",
I="spacing",
N="normalize",
Expand All @@ -24,7 +31,30 @@
r="registration",
)
@kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma")
def binstats(data, outgrid: str | None = None, **kwargs) -> xr.DataArray | None:
def binstats(
data,
outgrid: str | None = None,
statistic: Literal[
"mean",
"mad",
"full",
"interquartile",
"min",
"minpos",
"median",
"number",
"lms",
"mode",
"quantile",
"rms",
"stddev",
"max",
"maxneg",
"sum",
] = "number",
quantile_value: float = 50,
**kwargs,
) -> xr.DataArray | None:
r"""
Bin spatial data and determine statistics per bin.

Expand All @@ -46,30 +76,29 @@
A file name of an ASCII data table or a 2-D
{table-classes}.
{outgrid}
statistic : str
**a**\|\ **d**\|\ **g**\|\ **i**\|\ **l**\|\ **L**\|\ **m**\|\ **n**\
\|\ **o**\|\ **p**\|\ **q**\ [*quant*]\|\ **r**\|\ **s**\|\ **u**\
\|\ **U**\|\ **z**.
Choose the statistic that will be computed per node based on the
points that are within *radius* distance of the node. Select one of:
statistic
Choose the statistic that will be computed per node based on the points that are
within *radius* distance of the node. Select one of:

- **a** for mean (average)
- **d** for median absolute deviation (MAD)
- **g** for full (max-min) range
- **i** for 25-75% interquartile range
- **l** for minimum (low)
- **L** for minimum of positive values only
- **m** for median
- **n** the number of values
- **o** for LMS scale
- **p** for mode (maximum likelihood)
- **q** for selected quantile (append desired quantile in
0-100% range [50])
- **r** for the r.m.s.
- **s** for standard deviation
- **u** for maximum (upper)
- **U** for maximum of negative values only
- **z** for the sum
- **mean**: Compute the mean value (average).
- **mad**: Compute the median absolute deviation (MAD).
- **full**: Compute the full (max-min) range.
- **interquartile**: Compute the 25-75% interquartile range.
- **min**: Compute the minimum value.
- **minpos**: Compute the minimum of positive values only.
- **median**: Compute the median value.
- **number**: Compute the number of values.
- **lms**: Compute the LMS scale.
- **mode**: Compute the mode (maximum likelihood).
- **quantile**: Compute the selected quantile. The quantile value is in 0-100%
range and is specified by the ``quantile_value`` parameter.
- **rms**: Compute the root mean square (RMS).
- **stddev**: Compute the standard deviation.
- **max**: Compute the maximum value.
- **maxneg**: Compute the maximum of negative values only.
- **sum**: Compute the sum of values.
quantile_value
The quantile value if ``statistic="quantile"``.
empty : float
Set the value assigned to empty nodes [Default is NaN].
normalize : bool
Expand Down Expand Up @@ -103,6 +132,31 @@
- None if ``outgrid`` is set (grid output will be stored in file set by
``outgrid``)
"""
# The 'statistic' parameter is alised to the C option.
lookup_statistic = {
"mean": "a",
"mad": "d",
"full": "g",
"interquartile": "i",
"min": "l",
"minpos": "L",
"median": "m",
"number": "n",
"lms": "o",
"mode": "p",
"quantile": "q",
"rms": "r",
"stddev": "s",
"max": "u",
"maxneg": "U",
"sum": "z",
}
if statistic not in {*lookup_statistic.keys(), *lookup_statistic.values()}:
raise GMTInvalidInput(f"Unknown 'statistic' method: {statistic}.")

Check warning on line 155 in pygmt/src/binstats.py

View check run for this annotation

Codecov / codecov/patch

pygmt/src/binstats.py#L155

Added line #L155 was not covered by tests
kwargs["C"] = lookup_statistic.get(statistic, statistic)
if statistic == "quantile":
kwargs["C"] += f"{quantile_value}"

Check warning on line 158 in pygmt/src/binstats.py

View check run for this annotation

Codecov / codecov/patch

pygmt/src/binstats.py#L158

Added line #L158 was not covered by tests

with Session() as lib:
with (
lib.virtualfile_in(check_kind="vector", data=data) as vintbl,
Expand Down
4 changes: 2 additions & 2 deletions pygmt/tests/test_binstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_binstats_outgrid():
data="@capitals.gmt",
outgrid=tmpfile.name,
spacing=5,
statistic="z",
statistic="sum",
search_radius="1000k",
aspatial="2=population",
region="g",
Expand All @@ -36,7 +36,7 @@ def test_binstats_no_outgrid():
temp_grid = binstats(
data="@capitals.gmt",
spacing=5,
statistic="z",
statistic="sum",
search_radius="1000k",
aspatial="2=population",
region="g",
Expand Down