Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DRAFT: add validation in python for NormalAccessor #328

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,5 @@ static
.pnp.loader.mjs
node_modules
*.h5
.env
data.json
8 changes: 8 additions & 0 deletions DEVELOP.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,14 @@ Anywidget and its dependency ipywidgets handles the serialization from Python in

Push a new tag to the main branch of the format `v*`. A new version will be published to PyPI automatically.

## Contribute
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This whole doc is geared towards contributors; maybe put this section above ## Publishing and name it ## Linting?


We run linting and formating on all pull requests. You can perform that operation locally with `pre-commit`.
```
pip install -U pre-commit
pre-commit run --all-files
```

## Documentation website

The documentation website is generated with `mkdocs` and [`mkdocs-material`](https://squidfunk.github.io/mkdocs-material). After `poetry install`, you can serve the docs website locally with
Expand Down
12 changes: 12 additions & 0 deletions lonboard/_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,17 @@ def serialize_float_accessor(data: Union[int, float, NDArray[np.floating]], obj)
return serialize_pyarrow_column(data, max_chunksize=obj._rows_per_chunk)


def serialize_normal_accessor(data: Union[List[Union[int, float]], np.ndarray], obj):
if data is None:
return None

if isinstance(data, list):
return data

assert isinstance(data, (pa.ChunkedArray, pa.Array))
return serialize_pyarrow_column(data, max_chunksize=obj._rows_per_chunk)


def serialize_table(data, obj):
assert isinstance(data, pa.Table), "expected pyarrow table"
return serialize_table_to_parquet(data, max_chunksize=obj._rows_per_chunk)
Expand All @@ -78,3 +89,4 @@ def infer_rows_per_chunk(table: pa.Table) -> int:
# TODO: rename as it's used for text as well
FLOAT_SERIALIZATION = {"to_json": serialize_float_accessor}
naomatheus marked this conversation as resolved.
Show resolved Hide resolved
TABLE_SERIALIZATION = {"to_json": serialize_table}
NORMAL_SERIALIZATION = {"to_json": serialize_normal_accessor}
104 changes: 99 additions & 5 deletions lonboard/experimental/traits.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
from __future__ import annotations

import warnings
from typing import Any, List, Tuple, Union

import matplotlib as mpl
import numpy as np
import pyarrow as pa
from traitlets.traitlets import TraitType

from lonboard._serialization import (
COLOR_SERIALIZATION,
)
from lonboard._serialization import COLOR_SERIALIZATION, NORMAL_SERIALIZATION
from lonboard.traits import FixedErrorTraitType


Expand Down Expand Up @@ -86,7 +85,7 @@ def validate(
if isinstance(value, str):
try:
c = mpl.colors.to_rgba(value) # type: ignore
except ValueError:
except Exception:
naomatheus marked this conversation as resolved.
Show resolved Hide resolved
self.error(
obj,
value,
Expand All @@ -95,9 +94,104 @@ def validate(
"matplotlib.colors.to_rgba."
),
)
return

return tuple(map(int, (np.array(c) * 255).astype(np.uint8)))

self.error(obj, value)
assert False


class NormalAccessor(FixedErrorTraitType):
"""
A representation of a deck.gl normal accessor

Acceptable inputs:
- A numpy ndarray with two dimensions: The size of the
second dimension must be 3 i.e. `(N,3)`
- a pyarrow `FixedSizeListArray` or
`ChunkedArray` containing `FixedSizeListArray`s
where the size of the inner fixed size list 3.
"""

default_value = [0, 0, 1]
info_text = (
"List representing normal of each object, in [nx, ny, nz]. or numpy ndarray or "
"pyarrow FixedSizeList representing the normal of each object, in [nx, ny, nz]"
)

def __init__(
self: TraitType,
*args,
**kwargs: Any,
) -> None:
super().__init__(*args, **kwargs)
self.tag(sync=True, **NORMAL_SERIALIZATION)

def validate(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd recommend writing some tests first, which will make it easier to sort out the logic in this function. You can find some related tests here.

The purpose of this validation function is to take whatever user input and both validate it and conform to a standard expectation of how we want to store it internally.

To align with deck.gl and other traits, we should allow a list with three scalar values, or a numpy array, or a pyarrow FixedSizeListArray.

But here the first thing you check is value.shape. Which won't exist for a list type. So it'll crash

In [1]: l = [1, 23, 50]

In [2]: l.shape
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[2], line 1
----> 1 l.shape

AttributeError: 'list' object has no attribute 'shape'

Hence why adding tests first might help

self, obj, value
) -> Union[Tuple[int, ...], List[int], pa.ChunkedArray, pa.FixedSizeListArray]:
"""
Values in acceptable types must be contiguous
(the same length for all values) and of floating point type
"""

fixed_list_size = 3

if isinstance(value, List):
naomatheus marked this conversation as resolved.
Show resolved Hide resolved
if len(value) != 3:
self.error("Normal array must have length 3, (x,y,z)")

if not all(isinstance(item, float) for item in value):
naomatheus marked this conversation as resolved.
Show resolved Hide resolved
self.error("All elements of Normal array must be floating point type")

return pa.FixedSizeListArray.from_arrays(value, fixed_list_size)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This won't work...

The conceptual difference to understand is between a scalar which will be used as the value for all rows and a column, where a different value per row can be passed.

It's a bit confusing because a normal value has three elements, but an input value looking like [0, 0, 1] is still a scalar because that value applies to all rows.

To pass a scalar into @geoarrow/deck.gl-layers, we want to pass a JSON-like object, like [0, 0, 1], which it will interpret as a single scalar. Passing a pyarrow Array will cause @geoarrow/deck.gl-layers to interpret the input as a column instead. But then it will error because this "column" has only one row, whereas the table has multiple rows.

Instead you should be returning the input list or tuple

return value


if isinstance(value, np.ndarray):
if value.ndim != 2 or value.shape[1] != 3:
self.error(obj, value, info="Normal array must be 2D with shape (N,3)")

if not np.any(value == 1):
self.error(
obj, value, info="One of the normal vector elements must be 1."
)
Comment on lines +153 to +156
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a great idea for a validation step, but I don't think it's correct. There's no good documentation in deck.gl of the exact format expected. Most likely, these normal vectors are expected to be "normalized" which means that their length is expected to be 1. I.e. x^2 + y^2 + z^2 = 1. This means that almost never will one of the elements themselves be 1.

In any case I'd tend to think this is something we shouldn't validate, because deck.gl might be lenient and allow non-normalized vectors.


if not np.issubdtype(value.dtype, np.float32):
warnings.warn(
"""Warning: Numpy array should be floating point type.
Converting to float32 point pyarrow array"""
)
value = value.cast(pa.list_(pa.float32()))
naomatheus marked this conversation as resolved.
Show resolved Hide resolved

return pa.FixedSizeListArray.from_arrays(value, fixed_list_size)

if isinstance(value, (pa.ChunkedArray, pa.Array)):
if not pa.types.is_fixed_size_list(value.type):
self.error(
obj, value, info="Point pyarrow array must be a FixedSizeList."
)

if value.type.list_size != 3:
self.error(
obj,
value,
info=(
"""Normal pyarrow array must have a
FixedSizeList inner size of 3."""
),
)

if not pa.types.is_floating(value.type.value_type):
naomatheus marked this conversation as resolved.
Show resolved Hide resolved
try:
value = value.cast(pa.list_(pa.float32()))
naomatheus marked this conversation as resolved.
Show resolved Hide resolved
except Exception:
self.error(
obj,
value,
info="""Failed to convert array
values to floating point type""",
)

return pa.FixedSizeListArray.from_arrays(value, fixed_list_size)
naomatheus marked this conversation as resolved.
Show resolved Hide resolved

self.error(obj, value)
assert False