-
Notifications
You must be signed in to change notification settings - Fork 109
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add new statistics methods and band names in ImageData object #427
Changes from 4 commits
7d2175b
973f560
2d9ed08
fa7eeb2
9c76a51
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,8 +20,14 @@ | |
from ..constants import WEB_MERCATOR_TMS, WGS84_CRS, BBox, Indexes, NoData | ||
from ..errors import ExpressionMixingWarning, NoOverviewWarning, TileOutsideBounds | ||
from ..expression import apply_expression, parse_expression | ||
from ..models import ImageData, ImageStatistics, Info | ||
from ..utils import create_cutline, has_alpha_band, has_mask_band | ||
from ..models import BandStatistics, ImageData, ImageStatistics, Info | ||
from ..utils import ( | ||
create_cutline, | ||
get_array_statistics, | ||
get_bands_names, | ||
has_alpha_band, | ||
has_mask_band, | ||
) | ||
from .base import BaseReader | ||
|
||
|
||
|
@@ -236,6 +242,11 @@ def stats( | |
rio_tiler.models.ImageStatistics: bands statistics. | ||
|
||
""" | ||
warnings.warn( | ||
"`stats` method will be removed and replaced by `statistics` in rio-tiler v3.0.0", | ||
DeprecationWarning, | ||
) | ||
|
||
kwargs = {**self._kwargs, **kwargs} | ||
|
||
hist_options = hist_options or {} | ||
|
@@ -250,6 +261,48 @@ def stats( | |
) | ||
return {b: ImageStatistics(**s) for b, s in stats.items()} | ||
|
||
def statistics( | ||
self, | ||
categorical: bool = False, | ||
categories: Optional[List[float]] = None, | ||
percentiles: List[int] = [2, 98], | ||
hist_options: Optional[Dict] = None, | ||
max_size: int = 1024, | ||
**kwargs: Any, | ||
) -> Dict[str, BandStatistics]: | ||
"""Return bands statistics from a dataset. | ||
|
||
Args: | ||
categorical (bool): treat input data as categorical data. Defaults to False. | ||
categories (list of numbers, optional): list of caterogies to return value for. | ||
percentiles (list of numbers, optional): list of percentile values to calculate. Defaults to `[2, 98]`. | ||
hist_options (dict, optional): Options to forward to numpy.histogram function. | ||
max_size (int, optional): Limit the size of the longest dimension of the dataset read, respecting bounds X/Y aspect ratio. Defaults to 1024. | ||
kwargs (optional): Options to forward to `self.preview`. | ||
|
||
Returns: | ||
Dict[str, rio_tiler.models.BandStatistics]: bands statistics. | ||
|
||
""" | ||
kwargs = {**self._kwargs, **kwargs} | ||
|
||
data = self.preview(max_size=max_size, **kwargs) | ||
|
||
hist_options = hist_options or {} | ||
|
||
stats = get_array_statistics( | ||
data.as_masked(), | ||
categorical=categorical, | ||
categories=categories, | ||
percentiles=percentiles, | ||
**hist_options, | ||
) | ||
|
||
return { | ||
f"{data.band_names[ix]}": BandStatistics(**stats[ix]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we use e.g. with COGReader("cog.tif") as cog:
print(cog.statistics(expression="b1*2,b1"))
>> {
'b1*2': BandStatistics(...),
'b1': BandStatistics(...),
} |
||
for ix in range(len(stats)) | ||
} | ||
|
||
def tile( | ||
self, | ||
tile_x: int, | ||
|
@@ -371,7 +424,16 @@ def part( | |
if bounds_crs and bounds_crs != dst_crs: | ||
bbox = transform_bounds(bounds_crs, dst_crs, *bbox, densify_pts=21) | ||
|
||
return ImageData(data, mask, bounds=bbox, crs=dst_crs, assets=[self.filepath],) | ||
return ImageData( | ||
data, | ||
mask, | ||
bounds=bbox, | ||
crs=dst_crs, | ||
assets=[self.filepath], | ||
band_names=get_bands_names( | ||
indexes=indexes, expression=expression, count=data.shape[0] | ||
), | ||
) | ||
|
||
def preview( | ||
self, | ||
|
@@ -430,6 +492,9 @@ def preview( | |
bounds=self.dataset.bounds, | ||
crs=self.dataset.crs, | ||
assets=[self.filepath], | ||
band_names=get_bands_names( | ||
indexes=indexes, expression=expression, count=data.shape[0] | ||
), | ||
) | ||
|
||
def point( | ||
|
@@ -572,6 +637,9 @@ def read( | |
bounds=self.dataset.bounds, | ||
crs=self.dataset.crs, | ||
assets=[self.filepath], | ||
band_names=get_bands_names( | ||
indexes=indexes, expression=expression, count=data.shape[0] | ||
), | ||
) | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
|
||
import os | ||
from io import BytesIO | ||
from typing import Any, Dict, Generator, Optional, Sequence, Tuple, Union | ||
from typing import Any, Dict, Generator, List, Optional, Sequence, Tuple, Union | ||
|
||
import numpy | ||
from affine import Affine | ||
|
@@ -88,6 +88,130 @@ def _stats( | |
) | ||
|
||
|
||
def get_bands_names( | ||
indexes: Optional[Sequence[int]] = None, | ||
expression: Optional[str] = None, | ||
count: Optional[int] = None, | ||
) -> List[str]: | ||
"""Define bands names based on expression, indexes or band count.""" | ||
if expression: | ||
return expression.split(",") | ||
|
||
elif indexes: | ||
return [str(idx) for idx in indexes] | ||
|
||
elif count: | ||
return [str(idx + 1) for idx in range(count)] | ||
|
||
else: | ||
raise ValueError( | ||
"one of expression or indexes or count must be passed to define band names." | ||
) | ||
|
||
|
||
def get_array_statistics( | ||
data: numpy.ma.array, | ||
categorical: bool = False, | ||
categories: Optional[List[float]] = None, | ||
percentiles: List[int] = [2, 98], | ||
**kwargs: Any, | ||
) -> List[Dict[Any, Any]]: | ||
"""Calculate per band array statistics. | ||
|
||
Args: | ||
data (numpy.ma.ndarray): input masked array data to get the statistics from. | ||
categorical (bool): treat input data as categorical data. Defaults to False. | ||
categories (list of numbers, optional): list of caterogies to return value for. | ||
percentiles (list of numbers, optional): list of percentile values to calculate. Defaults to `[2, 98]`. | ||
kwargs (optional): options to forward to `numpy.histogram` function (only applies for non-categorical data). | ||
|
||
Returns: | ||
list of dict | ||
|
||
Examples: | ||
>>> data = numpy.ma.zeros((1, 256, 256)) | ||
>>> get_array_statistics(data) | ||
[ | ||
{ | ||
'min': 0.0, | ||
'max': 0.0, | ||
'mean': 0.0, | ||
'count': 65536.0, | ||
'sum': 0.0, | ||
'std': 0.0, | ||
'median': 0.0, | ||
'majority': 0.0, | ||
'minority': 0.0, | ||
'unique': 1.0, | ||
'percentile_2': 0.0, | ||
'percentile_98': 0.0, | ||
'histogram': [ | ||
[0, 0, 0, 0, 0, 65536, 0, 0, 0, 0], | ||
[-0.5, -0.4, -0.3, -0.19999999999999996, -0.09999999999999998, 0.0, 0.10000000000000009, 0.20000000000000007, 0.30000000000000004, 0.4, 0.5] | ||
], | ||
'valid_pixels': 65536.0, | ||
'masked_pixels': 0.0, | ||
'valid_percent': 100.0 | ||
} | ||
] | ||
|
||
""" | ||
if len(data.shape) < 3: | ||
data = numpy.expand_dims(data, axis=0) | ||
|
||
output: List[Dict[Any, Any]] = [] | ||
percentiles_names = [f"percentile_{int(p)}" for p in percentiles] | ||
|
||
for b in range(data.shape[0]): | ||
keys, counts = numpy.unique(data[b].compressed(), return_counts=True) | ||
|
||
valid_pixels = float(numpy.ma.count(data[b])) | ||
masked_pixels = float(numpy.ma.count_masked(data[b])) | ||
valid_percent = round((valid_pixels / data[b].size) * 100, 2) | ||
info_px = { | ||
"valid_pixels": valid_pixels, | ||
"masked_pixels": masked_pixels, | ||
"valid_percent": valid_percent, | ||
} | ||
|
||
if categorical: | ||
out_dict = dict(zip(keys.tolist(), counts.tolist())) | ||
h_keys = ( | ||
numpy.array(categories).astype(keys.dtype) if categories else keys | ||
).tolist() | ||
histogram = [ | ||
[out_dict[x] for x in h_keys], | ||
h_keys, | ||
] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if the data is set as |
||
else: | ||
h_counts, h_keys = numpy.histogram(data[b][~data[b].mask], **kwargs) | ||
histogram = [h_counts.tolist(), h_keys.tolist()] | ||
|
||
percentiles_values = numpy.percentile( | ||
data[b].compressed(), percentiles | ||
).tolist() | ||
|
||
output.append( | ||
{ | ||
"min": float(data[b].min()), | ||
"max": float(data[b].max()), | ||
"mean": float(data[b].mean()), | ||
"count": float(data[b].count()), | ||
"sum": float(data[b].sum()), | ||
"std": float(data[b].std()), | ||
"median": float(numpy.ma.median(data[b])), | ||
"majority": float(keys[counts.tolist().index(counts.max())].tolist()), | ||
"minority": float(keys[counts.tolist().index(counts.min())].tolist()), | ||
"unique": float(counts.size), | ||
**dict(zip(percentiles_names, percentiles_values)), | ||
"histogram": histogram, | ||
**info_px, | ||
} | ||
) | ||
|
||
return output | ||
|
||
|
||
# https://github.com/OSGeo/gdal/blob/b1c9c12ad373e40b955162b45d704070d4ebf7b0/gdal/frmts/ingr/IngrTypes.cpp#L191 | ||
def _div_round_up(a: int, b: int) -> int: | ||
return (a // b) if (a % b) == 0 else (a // b) + 1 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the
statistics
method will only get stats for the full dataset (using preview) but we can setmax_size
to control the resolution the user wants