From 464b17003a72f4d5ad592de868fad50717a68774 Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Wed, 17 Jan 2024 21:59:34 +0800 Subject: [PATCH] pygmt.binstats: Make the 'statistic' parameter more Pythonic --- pygmt/src/binstats.py | 106 ++++++++++++++++++++++++++--------- pygmt/tests/test_binstats.py | 4 +- 2 files changed, 82 insertions(+), 28 deletions(-) diff --git a/pygmt/src/binstats.py b/pygmt/src/binstats.py index 6430d1cf787..6ce6867e763 100644 --- a/pygmt/src/binstats.py +++ b/pygmt/src/binstats.py @@ -2,14 +2,21 @@ binstats - Bin spatial data and determine statistics per bin """ +from typing import Literal + import xarray as xr from pygmt.clib import Session -from pygmt.helpers import build_arg_list, fmt_docstring, kwargs_to_strings, use_alias +from pygmt.exceptions import GMTInvalidInput +from pygmt.helpers import ( + build_arg_list, + fmt_docstring, + kwargs_to_strings, + use_alias, +) @fmt_docstring @use_alias( - C="statistic", E="empty", I="spacing", N="normalize", @@ -24,7 +31,30 @@ r="registration", ) @kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma") -def binstats(data, outgrid: str | None = None, **kwargs) -> xr.DataArray | None: +def binstats( + data, + outgrid: str | None = None, + statistic: Literal[ + "mean", + "mad", + "full", + "interquartile", + "min", + "minpos", + "median", + "number", + "lms", + "mode", + "quantile", + "rms", + "stddev", + "max", + "maxneg", + "sum", + ] = "number", + quantile_value: float = 50, + **kwargs, +) -> xr.DataArray | None: r""" Bin spatial data and determine statistics per bin. @@ -46,30 +76,29 @@ def binstats(data, outgrid: str | None = None, **kwargs) -> xr.DataArray | None: A file name of an ASCII data table or a 2-D {table-classes}. {outgrid} - statistic : str - **a**\|\ **d**\|\ **g**\|\ **i**\|\ **l**\|\ **L**\|\ **m**\|\ **n**\ - \|\ **o**\|\ **p**\|\ **q**\ [*quant*]\|\ **r**\|\ **s**\|\ **u**\ - \|\ **U**\|\ **z**. - Choose the statistic that will be computed per node based on the - points that are within *radius* distance of the node. Select one of: + statistic + Choose the statistic that will be computed per node based on the points that are + within *radius* distance of the node. Select one of: - - **a** for mean (average) - - **d** for median absolute deviation (MAD) - - **g** for full (max-min) range - - **i** for 25-75% interquartile range - - **l** for minimum (low) - - **L** for minimum of positive values only - - **m** for median - - **n** the number of values - - **o** for LMS scale - - **p** for mode (maximum likelihood) - - **q** for selected quantile (append desired quantile in - 0-100% range [50]) - - **r** for the r.m.s. - - **s** for standard deviation - - **u** for maximum (upper) - - **U** for maximum of negative values only - - **z** for the sum + - **mean**: Compute the mean value (average). + - **mad**: Compute the median absolute deviation (MAD). + - **full**: Compute the full (max-min) range. + - **interquartile**: Compute the 25-75% interquartile range. + - **min**: Compute the minimum value. + - **minpos**: Compute the minimum of positive values only. + - **median**: Compute the median value. + - **number**: Compute the number of values. + - **lms**: Compute the LMS scale. + - **mode**: Compute the mode (maximum likelihood). + - **quantile**: Compute the selected quantile. The quantile value is in 0-100% + range and is specified by the ``quantile_value`` parameter. + - **rms**: Compute the root mean square (RMS). + - **stddev**: Compute the standard deviation. + - **max**: Compute the maximum value. + - **maxneg**: Compute the maximum of negative values only. + - **sum**: Compute the sum of values. + quantile_value + The quantile value if ``statistic="quantile"``. empty : float Set the value assigned to empty nodes [Default is NaN]. normalize : bool @@ -103,6 +132,31 @@ def binstats(data, outgrid: str | None = None, **kwargs) -> xr.DataArray | None: - None if ``outgrid`` is set (grid output will be stored in file set by ``outgrid``) """ + # The 'statistic' parameter is alised to the C option. + lookup_statistic = { + "mean": "a", + "mad": "d", + "full": "g", + "interquartile": "i", + "min": "l", + "minpos": "L", + "median": "m", + "number": "n", + "lms": "o", + "mode": "p", + "quantile": "q", + "rms": "r", + "stddev": "s", + "max": "u", + "maxneg": "U", + "sum": "z", + } + if statistic not in {*lookup_statistic.keys(), *lookup_statistic.values()}: + raise GMTInvalidInput(f"Unknown 'statistic' method: {statistic}.") + kwargs["C"] = lookup_statistic.get(statistic, statistic) + if statistic == "quantile": + kwargs["C"] += f"{quantile_value}" + with Session() as lib: with ( lib.virtualfile_in(check_kind="vector", data=data) as vintbl, diff --git a/pygmt/tests/test_binstats.py b/pygmt/tests/test_binstats.py index b8a1a39e8ad..beca8b00c0a 100644 --- a/pygmt/tests/test_binstats.py +++ b/pygmt/tests/test_binstats.py @@ -19,7 +19,7 @@ def test_binstats_outgrid(): data="@capitals.gmt", outgrid=tmpfile.name, spacing=5, - statistic="z", + statistic="sum", search_radius="1000k", aspatial="2=population", region="g", @@ -36,7 +36,7 @@ def test_binstats_no_outgrid(): temp_grid = binstats( data="@capitals.gmt", spacing=5, - statistic="z", + statistic="sum", search_radius="1000k", aspatial="2=population", region="g",