-
Notifications
You must be signed in to change notification settings - Fork 108
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add new statistics methods and band names in ImageData object #427
Changes from 1 commit
7d2175b
973f560
2d9ed08
fa7eeb2
9c76a51
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,8 +20,14 @@ | |
from ..constants import WEB_MERCATOR_TMS, WGS84_CRS, BBox, Indexes, NoData | ||
from ..errors import ExpressionMixingWarning, NoOverviewWarning, TileOutsideBounds | ||
from ..expression import apply_expression, parse_expression | ||
from ..models import ImageData, ImageStatistics, Info | ||
from ..utils import create_cutline, has_alpha_band, has_mask_band | ||
from ..models import BandStatistics, ImageData, ImageStatistics, Info | ||
from ..utils import ( | ||
create_cutline, | ||
get_array_statistics, | ||
get_bands_names, | ||
has_alpha_band, | ||
has_mask_band, | ||
) | ||
from .base import BaseReader | ||
|
||
|
||
|
@@ -236,6 +242,11 @@ def stats( | |
rio_tiler.models.ImageStatistics: bands statistics. | ||
|
||
""" | ||
warnings.warn( | ||
"`stats` method will be removed and replaced by `statistics` in rio-tiler v3.0.0", | ||
DeprecationWarning, | ||
) | ||
|
||
kwargs = {**self._kwargs, **kwargs} | ||
|
||
hist_options = hist_options or {} | ||
|
@@ -250,6 +261,46 @@ def stats( | |
) | ||
return {b: ImageStatistics(**s) for b, s in stats.items()} | ||
|
||
def statistics( | ||
self, | ||
categorical: bool = False, | ||
categories: Optional[List[float]] = None, | ||
percentiles: List[int] = [2, 98], | ||
hist_options: Optional[Dict] = None, | ||
max_size: int = 1024, | ||
**kwargs: Any, | ||
) -> Dict[str, BandStatistics]: | ||
"""Return bands statistics from a COG. | ||
|
||
Args: | ||
|
||
|
||
hist_options (dict, optional): Options to forward to numpy.histogram function. | ||
kwargs (optional): Options to forward to `self.preview`. | ||
|
||
Returns: | ||
Dict[str, rio_tiler.models.BandStatistics]: bands statistics. | ||
|
||
""" | ||
kwargs = {**self._kwargs, **kwargs} | ||
|
||
data = self.preview(max_size=max_size, **kwargs) | ||
|
||
hist_options = hist_options or {} | ||
|
||
stats = get_array_statistics( | ||
data.as_masked(), | ||
categorical=categorical, | ||
categories=categories, | ||
percentiles=percentiles, | ||
**hist_options, | ||
) | ||
|
||
return { | ||
f"{data.band_names[ix]}": BandStatistics(**stats[ix]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we use e.g. with COGReader("cog.tif") as cog:
print(cog.statistics(expression="b1*2,b1"))
>> {
'b1*2': BandStatistics(...),
'b1': BandStatistics(...),
} |
||
for ix in range(len(stats)) | ||
} | ||
|
||
def tile( | ||
self, | ||
tile_x: int, | ||
|
@@ -371,7 +422,16 @@ def part( | |
if bounds_crs and bounds_crs != dst_crs: | ||
bbox = transform_bounds(bounds_crs, dst_crs, *bbox, densify_pts=21) | ||
|
||
return ImageData(data, mask, bounds=bbox, crs=dst_crs, assets=[self.filepath],) | ||
return ImageData( | ||
data, | ||
mask, | ||
bounds=bbox, | ||
crs=dst_crs, | ||
assets=[self.filepath], | ||
band_names=get_bands_names( | ||
indexes=indexes, expression=expression, count=data.shape[0] | ||
), | ||
) | ||
|
||
def preview( | ||
self, | ||
|
@@ -430,6 +490,9 @@ def preview( | |
bounds=self.dataset.bounds, | ||
crs=self.dataset.crs, | ||
assets=[self.filepath], | ||
band_names=get_bands_names( | ||
indexes=indexes, expression=expression, count=data.shape[0] | ||
), | ||
) | ||
|
||
def point( | ||
|
@@ -572,6 +635,9 @@ def read( | |
bounds=self.dataset.bounds, | ||
crs=self.dataset.crs, | ||
assets=[self.filepath], | ||
band_names=get_bands_names( | ||
indexes=indexes, expression=expression, count=data.shape[0] | ||
), | ||
) | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -83,6 +83,30 @@ class ImageStatistics(RioTilerBaseModel): | |
valid_percent: float | ||
|
||
|
||
class BandStatistics(RioTilerBaseModel): | ||
"""Image statistics""" | ||
|
||
min: float | ||
max: float | ||
mean: float | ||
count: float | ||
sum: float | ||
std: float | ||
median: float | ||
majority: float | ||
minority: float | ||
unique: float | ||
histogram: List[List[NumType]] | ||
valid_percent: float | ||
masked_pixels: float | ||
valid_pixels: float | ||
|
||
class Config: | ||
"""Config for model.""" | ||
|
||
extra = "allow" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we allow extra for |
||
|
||
|
||
class Metadata(Info): | ||
"""Dataset metadata and statistics.""" | ||
|
||
|
@@ -114,6 +138,7 @@ class ImageData: | |
bounds: Optional[BoundingBox] = attr.ib(default=None, converter=to_coordsbbox) | ||
crs: Optional[CRS] = attr.ib(default=None) | ||
metadata: Optional[Dict] = attr.ib(factory=dict) | ||
band_names: Optional[List[str]] = attr.ib() | ||
|
||
@data.validator | ||
def _validate_data(self, attribute, value): | ||
|
@@ -123,6 +148,10 @@ def _validate_data(self, attribute, value): | |
"ImageData data has to be an array in form of (count, height, width)" | ||
) | ||
|
||
@band_names.default | ||
def _default_names(self): | ||
return [f"{ix + 1}" for ix in range(self.count)] | ||
|
||
@mask.default | ||
def _default_mask(self): | ||
return numpy.zeros((self.height, self.width), dtype="uint8") + 255 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
|
||
import os | ||
from io import BytesIO | ||
from typing import Any, Dict, Generator, Optional, Sequence, Tuple, Union | ||
from typing import Any, Dict, Generator, List, Optional, Sequence, Tuple, Union | ||
|
||
import numpy | ||
from affine import Affine | ||
|
@@ -88,6 +88,116 @@ def _stats( | |
) | ||
|
||
|
||
def get_bands_names( | ||
indexes: Optional[Sequence[int]] = None, | ||
expression: Optional[str] = None, | ||
count: Optional[int] = None, | ||
) -> List[str]: | ||
"""Define bands names based on expression, indexes or band count.""" | ||
if expression: | ||
return expression.split(",") | ||
|
||
elif indexes: | ||
return [str(idx) for idx in indexes] | ||
|
||
elif count: | ||
return [str(idx + 1) for idx in range(count)] | ||
|
||
else: | ||
raise ValueError( | ||
"one of expression or indexes or count must be passed to define band names." | ||
) | ||
|
||
|
||
def get_array_statistics( | ||
data: numpy.ma.array, | ||
categorical: bool = False, | ||
categories: Optional[List[float]] = None, | ||
percentiles: List[int] = [2, 98], | ||
**kwargs: Any, | ||
) -> List[Dict[Any, Any]]: | ||
"""Calculate array statistics. | ||
|
||
Args: | ||
data (numpy.ndarray): Input array data to get the stats from. | ||
categorical (bool): treat input data as categorical data. Defaults to False. | ||
categories (list of numbers, optional) | ||
vincentsarago marked this conversation as resolved.
Show resolved
Hide resolved
|
||
percentiles (list of numbers, optional): List of percentile values to calculate. Defaults to `[2, 98]`. | ||
kwargs (optional): Options to forward to numpy.histogram function (only applies for non-categorical data). | ||
|
||
Returns: | ||
list of dict | ||
|
||
Examples: | ||
>>> { | ||
'percentiles': [38, 147], | ||
'min': 20, | ||
'max': 180, | ||
'std': 28.123562304138662, | ||
'histogram': [ | ||
[1625, 219241, 28344, 15808, 12325, 10687, 8535, 7348, 4656, 1208], | ||
[20.0, 36.0, 52.0, 68.0, 84.0, 100.0, 116.0, 132.0, 148.0, 164.0, 180.0] | ||
], | ||
'valid_percent': 0.5 | ||
} | ||
vincentsarago marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
""" | ||
if len(data.shape) < 3: | ||
data = numpy.expand_dims(data, axis=0) | ||
|
||
output: List[Dict[Any, Any]] = [] | ||
percentiles_names = [f"percentile_{int(p)}" for p in percentiles] | ||
|
||
for b in range(data.shape[0]): | ||
keys, counts = numpy.unique(data[b].compressed(), return_counts=True) | ||
|
||
valid_pixels = float(numpy.ma.count(data[b])) | ||
masked_pixels = float(numpy.ma.count_masked(data[b])) | ||
valid_percent = round((valid_pixels / data[b].size) * 100, 2) | ||
info_px = { | ||
"valid_pixels": valid_pixels, | ||
"masked_pixels": masked_pixels, | ||
"valid_percent": valid_percent, | ||
} | ||
|
||
if categorical: | ||
out_dict = dict(zip(keys.tolist(), counts.tolist())) | ||
h_keys = ( | ||
numpy.array(categories).astype(keys.dtype) if categories else keys | ||
).tolist() | ||
histogram = [ | ||
[out_dict[x] for x in h_keys], | ||
h_keys, | ||
] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if the data is set as |
||
else: | ||
h_counts, h_keys = numpy.histogram(data[b][~data[b].mask], **kwargs) | ||
histogram = [h_counts.tolist(), h_keys.tolist()] | ||
|
||
percentiles_values = numpy.percentile( | ||
data[b].compressed(), percentiles | ||
).tolist() | ||
|
||
output.append( | ||
{ | ||
"min": float(data[b].min()), | ||
"max": float(data[b].max()), | ||
"mean": float(data[b].mean()), | ||
"count": float(data[b].count()), | ||
"sum": float(data[b].sum()), | ||
"std": float(data[b].std()), | ||
"median": float(numpy.ma.median(data[b])), | ||
"majority": float(keys[counts.tolist().index(counts.max())].tolist()), | ||
"minority": float(keys[counts.tolist().index(counts.min())].tolist()), | ||
"unique": float(counts.size), | ||
**dict(zip(percentiles_names, percentiles_values)), | ||
"histogram": histogram, | ||
**info_px, | ||
} | ||
) | ||
|
||
return output | ||
|
||
|
||
# https://github.com/OSGeo/gdal/blob/b1c9c12ad373e40b955162b45d704070d4ebf7b0/gdal/frmts/ingr/IngrTypes.cpp#L191 | ||
def _div_round_up(a: int, b: int) -> int: | ||
return (a // b) if (a % b) == 0 else (a // b) + 1 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the
statistics
method will only get stats for the full dataset (using preview) but we can setmax_size
to control the resolution the user wants