cogeotiff · vincentsarago · Oct 12, 2021 · Sep 28, 2021 · Oct 1, 2021 · Oct 1, 2021
diff --git a/rio_tiler/io/cogeo.py b/rio_tiler/io/cogeo.py
@@ -20,8 +20,14 @@
 from ..constants import WEB_MERCATOR_TMS, WGS84_CRS, BBox, Indexes, NoData
 from ..errors import ExpressionMixingWarning, NoOverviewWarning, TileOutsideBounds
 from ..expression import apply_expression, parse_expression
-from ..models import ImageData, ImageStatistics, Info
-from ..utils import create_cutline, has_alpha_band, has_mask_band
+from ..models import BandStatistics, ImageData, ImageStatistics, Info
+from ..utils import (
+    create_cutline,
+    get_array_statistics,
+    get_bands_names,
+    has_alpha_band,
+    has_mask_band,
+)
 from .base import BaseReader
 
 
@@ -236,6 +242,11 @@ def stats(
             rio_tiler.models.ImageStatistics: bands statistics.
 
         """
+        warnings.warn(
+            "`stats` method will be removed and replaced by `statistics` in rio-tiler v3.0.0",
+            DeprecationWarning,
+        )
+
         kwargs = {**self._kwargs, **kwargs}
 
         hist_options = hist_options or {}
@@ -250,6 +261,46 @@ def stats(
         )
         return {b: ImageStatistics(**s) for b, s in stats.items()}
 
+    def statistics(
+        self,
+        categorical: bool = False,
+        categories: Optional[List[float]] = None,
+        percentiles: List[int] = [2, 98],
+        hist_options: Optional[Dict] = None,
+        max_size: int = 1024,
+        **kwargs: Any,
+    ) -> Dict[str, BandStatistics]:
+        """Return bands statistics from a COG.
+
+        Args:
+
+
+            hist_options (dict, optional): Options to forward to numpy.histogram function.
+            kwargs (optional): Options to forward to `self.preview`.
+
+        Returns:
+            Dict[str, rio_tiler.models.BandStatistics]: bands statistics.
+
+        """
+        kwargs = {**self._kwargs, **kwargs}
+
+        data = self.preview(max_size=max_size, **kwargs)
+
+        hist_options = hist_options or {}
+
+        stats = get_array_statistics(
+            data.as_masked(),
+            categorical=categorical,
+            categories=categories,
+            percentiles=percentiles,
+            **hist_options,
+        )
+
+        return {
+            f"{data.band_names[ix]}": BandStatistics(**stats[ix])
+            for ix in range(len(stats))
+        }
+
     def tile(
         self,
         tile_x: int,
@@ -371,7 +422,16 @@ def part(
         if bounds_crs and bounds_crs != dst_crs:
             bbox = transform_bounds(bounds_crs, dst_crs, *bbox, densify_pts=21)
 
-        return ImageData(data, mask, bounds=bbox, crs=dst_crs, assets=[self.filepath],)
+        return ImageData(
+            data,
+            mask,
+            bounds=bbox,
+            crs=dst_crs,
+            assets=[self.filepath],
+            band_names=get_bands_names(
+                indexes=indexes, expression=expression, count=data.shape[0]
+            ),
+        )
 
     def preview(
         self,
@@ -430,6 +490,9 @@ def preview(
             bounds=self.dataset.bounds,
             crs=self.dataset.crs,
             assets=[self.filepath],
+            band_names=get_bands_names(
+                indexes=indexes, expression=expression, count=data.shape[0]
+            ),
         )
 
     def point(
@@ -572,6 +635,9 @@ def read(
             bounds=self.dataset.bounds,
             crs=self.dataset.crs,
             assets=[self.filepath],
+            band_names=get_bands_names(
+                indexes=indexes, expression=expression, count=data.shape[0]
+            ),
         )
 
 

diff --git a/rio_tiler/models.py b/rio_tiler/models.py
@@ -83,6 +83,30 @@ class ImageStatistics(RioTilerBaseModel):
     valid_percent: float
 
 
+class BandStatistics(RioTilerBaseModel):
+    """Image statistics"""
+
+    min: float
+    max: float
+    mean: float
+    count: float
+    sum: float
+    std: float
+    median: float
+    majority: float
+    minority: float
+    unique: float
+    histogram: List[List[NumType]]
+    valid_percent: float
+    masked_pixels: float
+    valid_pixels: float
+
+    class Config:
+        """Config for model."""
+
+        extra = "allow"
+
+
 class Metadata(Info):
     """Dataset metadata and statistics."""
 
@@ -114,6 +138,7 @@ class ImageData:
     bounds: Optional[BoundingBox] = attr.ib(default=None, converter=to_coordsbbox)
     crs: Optional[CRS] = attr.ib(default=None)
     metadata: Optional[Dict] = attr.ib(factory=dict)
+    band_names: Optional[List[str]] = attr.ib()
 
     @data.validator
     def _validate_data(self, attribute, value):
@@ -123,6 +148,10 @@ def _validate_data(self, attribute, value):
                 "ImageData data has to be an array in form of (count, height, width)"
             )
 
+    @band_names.default
+    def _default_names(self):
+        return [f"{ix + 1}" for ix in range(self.count)]
+
     @mask.default
     def _default_mask(self):
         return numpy.zeros((self.height, self.width), dtype="uint8") + 255

diff --git a/rio_tiler/utils.py b/rio_tiler/utils.py
@@ -2,7 +2,7 @@
 
 import os
 from io import BytesIO
-from typing import Any, Dict, Generator, Optional, Sequence, Tuple, Union
+from typing import Any, Dict, Generator, List, Optional, Sequence, Tuple, Union
 
 import numpy
 from affine import Affine
@@ -88,6 +88,116 @@ def _stats(
     )
 
 
+def get_bands_names(
+    indexes: Optional[Sequence[int]] = None,
+    expression: Optional[str] = None,
+    count: Optional[int] = None,
+) -> List[str]:
+    """Define bands names based on expression, indexes or band count."""
+    if expression:
+        return expression.split(",")
+
+    elif indexes:
+        return [str(idx) for idx in indexes]
+
+    elif count:
+        return [str(idx + 1) for idx in range(count)]
+
+    else:
+        raise ValueError(
+            "one of expression or indexes or count must be passed to define band names."
+        )
+
+
+def get_array_statistics(
+    data: numpy.ma.array,
+    categorical: bool = False,
+    categories: Optional[List[float]] = None,
+    percentiles: List[int] = [2, 98],
+    **kwargs: Any,
+) -> List[Dict[Any, Any]]:
+    """Calculate array statistics.
+
+    Args:
+        data (numpy.ndarray): Input array data to get the stats from.
+        categorical (bool): treat input data as categorical data. Defaults to False.
+        categories (list of numbers, optional)
+        percentiles (list of numbers, optional): List of percentile values to calculate. Defaults to `[2, 98]`.
+        kwargs (optional): Options to forward to numpy.histogram function (only applies for non-categorical data).
+
+    Returns:
+        list of dict
+
+    Examples:
+        >>> {
+            'percentiles': [38, 147],
+            'min': 20,
+            'max': 180,
+            'std': 28.123562304138662,
+            'histogram': [
+                [1625, 219241, 28344, 15808, 12325, 10687, 8535, 7348, 4656, 1208],
+                [20.0, 36.0, 52.0, 68.0, 84.0, 100.0, 116.0, 132.0, 148.0, 164.0, 180.0]
+            ],
+            'valid_percent': 0.5
+        }
+
+    """
+    if len(data.shape) < 3:
+        data = numpy.expand_dims(data, axis=0)
+
+    output: List[Dict[Any, Any]] = []
+    percentiles_names = [f"percentile_{int(p)}" for p in percentiles]
+
+    for b in range(data.shape[0]):
+        keys, counts = numpy.unique(data[b].compressed(), return_counts=True)
+
+        valid_pixels = float(numpy.ma.count(data[b]))
+        masked_pixels = float(numpy.ma.count_masked(data[b]))
+        valid_percent = round((valid_pixels / data[b].size) * 100, 2)
+        info_px = {
+            "valid_pixels": valid_pixels,
+            "masked_pixels": masked_pixels,
+            "valid_percent": valid_percent,
+        }
+
+        if categorical:
+            out_dict = dict(zip(keys.tolist(), counts.tolist()))
+            h_keys = (
+                numpy.array(categories).astype(keys.dtype) if categories else keys
+            ).tolist()
+            histogram = [
+                [out_dict[x] for x in h_keys],
+                h_keys,
+            ]
+        else:
+            h_counts, h_keys = numpy.histogram(data[b][~data[b].mask], **kwargs)
+            histogram = [h_counts.tolist(), h_keys.tolist()]
+
+        percentiles_values = numpy.percentile(
+            data[b].compressed(), percentiles
+        ).tolist()
+
+        output.append(
+            {
+                "min": float(data[b].min()),
+                "max": float(data[b].max()),
+                "mean": float(data[b].mean()),
+                "count": float(data[b].count()),
+                "sum": float(data[b].sum()),
+                "std": float(data[b].std()),
+                "median": float(numpy.ma.median(data[b])),
+                "majority": float(keys[counts.tolist().index(counts.max())].tolist()),
+                "minority": float(keys[counts.tolist().index(counts.min())].tolist()),
+                "unique": float(counts.size),
+                **dict(zip(percentiles_names, percentiles_values)),
+                "histogram": histogram,
+                **info_px,
+            }
+        )
+
+    return output
+
+
 # https://github.com/OSGeo/gdal/blob/b1c9c12ad373e40b955162b45d704070d4ebf7b0/gdal/frmts/ingr/IngrTypes.cpp#L191
 def _div_round_up(a: int, b: int) -> int:
     return (a // b) if (a % b) == 0 else (a // b) + 1