Skip to content

Commit

Permalink
tests/test_shape.py [skip ci]
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Jul 9, 2024
1 parent 6d1293a commit 3e6789d
Show file tree
Hide file tree
Showing 18 changed files with 290 additions and 73 deletions.
8 changes: 8 additions & 0 deletions apis/python/src/tiledbsoma/_common_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,14 @@ def shape(self) -> Tuple[int, ...]:
"""
return cast(Tuple[int, ...], tuple(self._handle.shape))

@property
def maxshape(self) -> Tuple[int, ...]:
"""XXX write me please thank you
Lifecycle:
Experimental.
"""
return cast(Tuple[int, ...], tuple(self._handle.maxshape))

def reshape(self, shape: Tuple[int, ...]) -> None:
"""Unsupported operation for this object type.
Expand Down
16 changes: 9 additions & 7 deletions apis/python/src/tiledbsoma/_sparse_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def create(
*,
type: pa.DataType,
shape: Sequence[Union[int, None]], # XXX TOUCH
max_shape: Optional[Sequence[Union[int, None]]] = None, # XXX TOUCH
maxshape: Optional[Sequence[Union[int, None]]] = None, # XXX TOUCH
# XXX maxshape ...
platform_config: Optional[options.PlatformConfig] = None,
context: Optional[SOMATileDBContext] = None,
Expand All @@ -130,19 +130,23 @@ def create(
index_column_data = {}

shape_map = {}
if max_shape is None:
max_shape = [None] * len(shape)
if maxshape is None:
maxshape = [None] * len(shape)
else:
if len(shape) != len(maxshape):
raise ValueError(
f"shape and maxshape must have the same lengths; got {len(shape)} != {len(maxshape)}"
)

# XXX COMMENT
print("SNDA::CREATE shape", shape)
for dim_idx, dim_shape in enumerate(shape):
dim_name = f"soma_dim_{dim_idx}"
if dim_shape is None:
shape_map[dim_name] = 1 # XXX COMMENT
else:
shape_map[dim_name] = shape[dim_idx]

dim_max_shape = max_shape[dim_idx]
dim_max_shape = maxshape[dim_idx]

pa_field = pa.field(dim_name, pa.int64())
dim_capacity, dim_extent = cls._dim_capacity_and_extent(
Expand All @@ -153,8 +157,6 @@ def create(
index_column_schema.append(pa_field)
index_column_data[pa_field.name] = [0, dim_capacity - 1, dim_extent]

print("SNDA::CREATE shape_map", shape_map)

index_column_info = pa.RecordBatch.from_pydict(
index_column_data, schema=pa.schema(index_column_schema)
)
Expand Down
9 changes: 9 additions & 0 deletions apis/python/src/tiledbsoma/_tdb_handles.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,10 @@ def dim_names(self) -> Tuple[str, ...]:
def shape(self) -> Tuple[int, ...]:
return tuple(self._handle.shape)

@property
def maxshape(self) -> Tuple[int, ...]:
return tuple(self._handle.maxshape)


class DataFrameWrapper(SOMAArrayWrapper[clib.SOMADataFrame]):
"""Wrapper around a Pybind11 SOMADataFrame handle."""
Expand All @@ -416,6 +420,11 @@ def shape(self) -> Tuple[int, ...]:
# Shape is not implemented for DataFrames
raise NotImplementedError

@property
def maxshape(self) -> Tuple[int, ...]:
# Shape is not implemented for DataFrames -- XXX ?!?
raise NotImplementedError


class DenseNDArrayWrapper(SOMAArrayWrapper[clib.SOMADenseNDArray]):
"""Wrapper around a Pybind11 DenseNDArrayWrapper handle."""
Expand Down
4 changes: 2 additions & 2 deletions apis/python/src/tiledbsoma/io/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1307,12 +1307,12 @@ def _create_from_matrix(
# A SparseNDArray must be appendable in soma.io.
# XXX TOUCH
shape = matrix.shape
max_shape = [None for _ in matrix.shape]
maxshape = [None for _ in matrix.shape]
soma_ndarray = cls.create(
uri,
type=pa.from_numpy_dtype(matrix.dtype),
shape=shape,
max_shape=max_shape,
maxshape=maxshape,
platform_config=platform_config,
context=context,
)
Expand Down
2 changes: 2 additions & 0 deletions apis/python/src/tiledbsoma/soma_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,8 @@ void load_soma_array(py::module& m) {

.def_property_readonly("shape", &SOMAArray::shape)

.def_property_readonly("maxshape", &SOMAArray::maxshape)

.def_property_readonly("uri", &SOMAArray::uri)

.def_property_readonly("column_names", &SOMAArray::column_names)
Expand Down
189 changes: 189 additions & 0 deletions apis/python/tests/test_shape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
from __future__ import annotations

import pyarrow as pa
import pytest

import tiledbsoma as soma

from tests._util import maybe_raises

# ================================================================
# TODO:

# ----------------------------------------------------------------
# * snda create w/ shape & maxshape
# o UT shape <, ==, > maxshape
# * dnda ditto
# o UT shape <, ==, > maxshape
# * sdf ditto
# o UT shape <, ==, > maxshape
# o UT partials w/ extra dims

# ----------------------------------------------------------------
# * all 3:
# o UT OOB writes
# o UT OOB reads

# ----------------------------------------------------------------
# * used_shape accessor:
# o as-is
# o deprecation notice ...
# * shape accessor:
# o ret new if avail
# o else ret old w/ deprecation notice ... ?
# * ned accessor
# o as is
# * maxshape accessor
# o new
# ! spell it maxshape

# ----------------------------------------------------------------
# * resize mutator
# o NotImplementedError for old arrays
# o ValueError if shrinking CD
# o ValueError if bigger than domain

# ----------------------------------------------------------------
# * tiledbsoma_upgrade_shape for snda/dnda
# o array.schema.version to see if needed
# o use core storage-version-update logic ...
# o fail if outside domain
# * tiledbsoma_upgrade_shape for sdf
# o arg name is domain not shape

# ----------------------------------------------------------------
# * tiledbsoma.io.resize ...
# o per array
# o do-it-all w/ new nobs/nvar -- ?
# ================================================================


@pytest.mark.parametrize(
"shape_maxshape_exc",
[
[(100, 200), None, None],
[(100, 200), (None, None), None],
[(100, 200), (100, 200), None],
[(100, 200), (1000, 200), None],
[(100, 200), (100, 2000), None],
[(100, 200), (10, 200), soma.SOMAError],
[(100, 200), (10, 200), soma.SOMAError],
[(100, 200), (100,), ValueError],
[(100, 200), (100, 200, 300), ValueError],
],
)
def test_sparse_nd_array_create(
tmp_path,
shape_maxshape_exc,
):
shape, maxshape, exc = shape_maxshape_exc
uri = tmp_path.as_posix()
element_type = pa.float32()

# Create the array
with maybe_raises(exc):
snda = soma.SparseNDArray.create(
uri, type=element_type, shape=shape, maxshape=maxshape
)
if exc is not None:
return

assert soma.SparseNDArray.exists(uri)

# Test the various accessors
with soma.SparseNDArray.open(uri) as snda:
assert snda.shape == shape

# TODO: need a saved-off array in UT-data land

# If maxshape is None, or None in any slot, we expect it to be set to a
# big signed int32. (There are details on the exact value of that
# number, involving R compatibility, and leaving room for a single tile
# capacity, etc ... we could check for some magic value but it suffices
# to check that it's over 2 billion.)
if maxshape is None:
for e in snda.maxshape:
assert e > 2_000_000_000
else:
for i in range(len(shape)):
if maxshape[i] is None:
assert snda.maxshape[i] > 2_000_000_000
else:
assert snda.maxshape[i] == maxshape[i]

# TODO: used_shape
# o as-is
# o deprecation notice ...

# No data have been written for this test case
assert snda.non_empty_domain() == ((0, 0), (0, 0))

# Write some data
with soma.SparseNDArray.open(uri, "w") as snda:
table = pa.Table.from_pydict(
{
"soma_dim_0": [0, 1],
"soma_dim_1": [2, 3],
"soma_data": [4, 5],
}
)
snda.write(table)

# Test the various accessors
with soma.SparseNDArray.open(uri) as snda:
assert snda.shape == shape
if maxshape is None:
for e in snda.maxshape:
assert e > 2_000_000_000
else:
for i in range(len(shape)):
if maxshape[i] is None:
assert snda.maxshape[i] > 2_000_000_000
else:
assert snda.maxshape[i] == maxshape[i]
assert snda.non_empty_domain() == ((0, 1), (2, 3))

# Test reads out of bounds
with soma.SparseNDArray.open(uri) as snda:
with pytest.raises(soma.SOMAError):
coords = ((shape[0] + 10,), (shape[1] + 20,))
snda.read(coords)

# Test writes out of bounds
with soma.SparseNDArray.open(uri, "w") as snda:
with pytest.raises(soma.SOMAError):
table = pa.Table.from_pydict(
{
"soma_dim_0": [shape[0] + 10],
"soma_dim_1": [shape[1] + 20],
"soma_data": [30],
}
)
snda.write(table)


# ----------------------------------------------------------------
# XXX DNDA all

# ----------------------------------------------------------------
# XXX SDF all
# XXX partials w/ extra dims

# ----------------------------------------------------------------
# * resize mutator
# o NotImplementedError for old arrays
# o ValueError if shrinking CD
# o ValueError if bigger than domain

# ----------------------------------------------------------------
# * tiledbsoma_upgrade_shape for snda/dnda
# o array.schema.version to see if needed
# o use core storage-version-update logic ...
# o fail if outside domain
# * tiledbsoma_upgrade_shape for sdf
# o arg name is domain not shape

# ----------------------------------------------------------------
# * tiledbsoma.io.resize ...
# o per array
# o do-it-all w/ new nobs/nvar -- ?
2 changes: 1 addition & 1 deletion libtiledbsoma/src/reindexer/reindexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class IntIndexer {

lookup(keys.data(), results.data(), keys.size());
}
IntIndexer(){};
IntIndexer() {};
IntIndexer(std::shared_ptr<tiledbsoma::SOMAContext> context)
: context_(context) {
}
Expand Down
53 changes: 28 additions & 25 deletions libtiledbsoma/src/soma/soma_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ using namespace tiledb;
//= public static
//===================================================================

std::unique_ptr<SOMAArray> SOMAArray::create( // XXX TOUCH
std::unique_ptr<SOMAArray> SOMAArray::create( // XXX TOUCH
std::shared_ptr<SOMAContext> ctx,
std::string_view uri,
ArraySchema schema,
Expand Down Expand Up @@ -805,35 +805,38 @@ uint64_t SOMAArray::nnz_slow() {
return total_cell_num;
}

// XXX comment more
std::vector<int64_t> SOMAArray::shape() {
std::vector<int64_t> result;
auto dimensions = mq_->schema()->domain().dimensions();

printf("SOMAArray::shape ENTER\n");

// ----------------------------------------------------------------
auto current_domain = tiledb::ArraySchemaExperimental::current_domain(*ctx_->tiledb_ctx(), arr_->schema());
auto current_domain = tiledb::ArraySchemaExperimental::current_domain(
*ctx_->tiledb_ctx(), arr_->schema());
if (current_domain.is_empty()) {
printf("CD IS EMPTY\n");
} else {
printf("CD IS NON-EMPTY\n");
auto t = current_domain.type();
if (t != TILEDB_NDRECTANGLE) {
printf("CD TYPE IS NOT NDRECTANGLE, BAD!\n");
} else {
printf("CD TYPE IS NDRECTANGLE, GOOD!\n");

NDRectangle ndrect = current_domain.ndrectangle();

for (auto dimension_name : dimension_names()) {
// TODO: non-int64 types for SOMADataFrame extra dims.
// This simply needs to be integrated with switch statements as in the legacy code below.
auto range = ndrect.range<int64_t>(dimension_name);
printf("CD DIM %s %ld : %ld\n", dimension_name.c_str(), range[0], range[1]);
}
}
// XXX comment
return maxshape();
}

auto t = current_domain.type();
if (t != TILEDB_NDRECTANGLE) {
throw TileDBSOMAError("current_domain type is not NDRECTANGLE");
}

NDRectangle ndrect = current_domain.ndrectangle();

for (auto dimension_name : dimension_names()) {
// TODO: non-int64 types for SOMADataFrame extra dims.
// This simply needs to be integrated with switch statements as in the
// legacy code below.
auto range = ndrect.range<int64_t>(dimension_name);
result.push_back(range[1] + 1);
}
// ----------------------------------------------------------------
return result;
}

// XXX comment more
std::vector<int64_t> SOMAArray::maxshape() {
std::vector<int64_t> result;
auto dimensions = mq_->schema()->domain().dimensions();

for (const auto& dim : dimensions) {
switch (dim.type()) {
Expand Down
7 changes: 7 additions & 0 deletions libtiledbsoma/src/soma/soma_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,13 @@ class SOMAArray : public SOMAObject {
*/
std::vector<int64_t> shape();

/**
* @brief XXX write me please thx
*
* @return XXX write me please thx
*/
std::vector<int64_t> maxshape();

/**
* @brief Get the number of dimensions.
*
Expand Down
Loading

0 comments on commit 3e6789d

Please sign in to comment.