Skip to content

Commit

Permalink
Temp stacking for dataframe shape RFC
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Nov 5, 2024
1 parent c0f1b44 commit 086cf3e
Show file tree
Hide file tree
Showing 9 changed files with 28 additions and 46 deletions.
2 changes: 1 addition & 1 deletion apis/python/notebooks/tutorial_soma_objects.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 27,
"id": "ab458224-5353-4e15-baa9-46689729e071",
"metadata": {
"tags": []
Expand Down
15 changes: 6 additions & 9 deletions apis/python/src/tiledbsoma/_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,22 +399,19 @@ def count(self) -> int:
return cast(DataFrameWrapper, self._handle).count

@property
def _maybe_soma_joinid_shape(self) -> Optional[int]:
"""An internal helper method that returns the shape
value along the ``soma_joinid`` index column, if the ``DataFrame
has one, else ``None``.
def shape(self) -> Optional[int]:
"""Returns the shape for the ``soma_joinid`` index column, if it
is an index column in the dataframe; otherwise, returns ``None``.
Lifecycle:
Experimental.
"""
return self._handle.maybe_soma_joinid_shape

@property
def _maybe_soma_joinid_maxshape(self) -> Optional[int]:
"""An internal helper method that returns the maxshape
value along the ``soma_joinid`` index column, if the ``DataFrame
has one, else ``None``.
def maxshape(self) -> Optional[int]:
"""Returns the maxshape for the ``soma_joinid`` index column, if it
is an index column in the dataframe; otherwise, returns ``None``.
Lifecycle:
Experimental.
Expand Down
6 changes: 3 additions & 3 deletions apis/python/src/tiledbsoma/io/shaping.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def resize_experiment(
with tiledbsoma.Experiment.open(uri) as exp:
for ms_key in exp.ms.keys():
if ms_key not in nvars.keys():
nvars[ms_key] = exp.ms[ms_key].var._maybe_soma_joinid_shape or 1
nvars[ms_key] = exp.ms[ms_key].var.shape or 1

ok = _treewalk(
uri,
Expand Down Expand Up @@ -326,13 +326,14 @@ def _leaf_visitor_show_shapes(
if isinstance(item, tiledbsoma.DataFrame):
_print_leaf_node_banner("DataFrame", node_name, item.uri, args)
_bannerize(args, "count", item.count)
_bannerize(args, "shape", item.shape)
_bannerize(args, "maxshape", item.maxshape)
_bannerize(args, "domain", item.domain)
_bannerize(args, "maxdomain", item.maxdomain)
_bannerize(args, "upgraded", item.tiledbsoma_has_upgraded_domain)

elif isinstance(item, tiledbsoma.SparseNDArray):
_print_leaf_node_banner("SparseNDArray", node_name, item.uri, args)
####_bannerize(args, "used_shape", item.used_shape())
_bannerize(args, "used_shape", _find_old_sparse_ndarray_bounds(item))
_bannerize(args, "shape", item.shape)
_bannerize(args, "maxshape", item.maxshape)
Expand Down Expand Up @@ -382,7 +383,6 @@ def _leaf_visitor_upgrade(
print(" Already upgraded", file=args["output_handle"])

elif isinstance(item, tiledbsoma.SparseNDArray):
#### used_shape = item.used_shape()
used_shape = _find_old_sparse_ndarray_bounds(item)
new_shape = tuple(e[1] + 1 for e in used_shape)

Expand Down
7 changes: 2 additions & 5 deletions apis/python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,9 @@ def test_dataframe(tmp_path, arrow_schema):
== soma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED
)

with pytest.raises(AttributeError):
assert sdf.shape is None

# soma_joinid is not a dim here
assert sdf._maybe_soma_joinid_shape is None
assert sdf._maybe_soma_joinid_maxshape is None
assert sdf.shape is None
assert sdf.maxshape is None

# Read all
table = sdf.read().concat()
Expand Down
12 changes: 6 additions & 6 deletions apis/python/tests/test_shape.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,18 +314,18 @@ def test_dataframe_basics(tmp_path, soma_joinid_domain, index_column_names):
with tiledbsoma.DataFrame.open(uri) as sdf:
has_sjid_dim = "soma_joinid" in index_column_names
if has_sjid_dim:
assert sdf._maybe_soma_joinid_shape == 1 + soma_joinid_domain[1]
assert sdf.shape == 1 + soma_joinid_domain[1]
if not tiledbsoma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED:
assert sdf._maybe_soma_joinid_maxshape == 1 + soma_joinid_domain[1]
assert sdf.maxshape == 1 + soma_joinid_domain[1]
else:
assert sdf._maybe_soma_joinid_shape is None
assert sdf.shape is None
if not tiledbsoma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED:
assert sdf._maybe_soma_joinid_maxshape is None
assert sdf.maxshape is None

assert len(sdf.non_empty_domain()) == len(index_column_names)

# This may be None if soma_joinid is not an index column
shape_at_create = sdf._maybe_soma_joinid_shape
shape_at_create = sdf.shape

if tiledbsoma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED:

Expand All @@ -351,7 +351,7 @@ def test_dataframe_basics(tmp_path, soma_joinid_domain, index_column_names):
sdf.tiledbsoma_resize_soma_joinid_shape(new_shape)

with tiledbsoma.DataFrame.open(uri) as sdf:
assert sdf._maybe_soma_joinid_shape == shape_at_create
assert sdf.shape == shape_at_create

# Test writes out of bounds, before resize
offset = shape_at_create if has_soma_joinid_dim else 100
Expand Down
24 changes: 6 additions & 18 deletions apis/r/R/SOMADataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -360,28 +360,16 @@ SOMADataFrame <- R6::R6Class(
self$write(values)
},

#' @description Retrieve the shape; as \code{SOMADataFrames} are shapeless,
#' simply raises an error
#'
#' @return None, instead a \code{\link{.NotYetImplemented}()} error is raised
#'
#' @description Returns the shape for the `soma_joinid` index column, if it
#' is an index column in the dataframe; otherwise, returns `NULL`.
shape = function() {
stop(errorCondition(
"'SOMADataFrame$shape()' is not implemented yet",
class = "notYetImplementedError"
))
return(maybe_soma_joinid_shape(self$uri, private$.soma_context))
},

#' @description Retrieve the maxshape; as \code{SOMADataFrames} are shapeless,
#' simply raises an error
#'
#' @return None, instead a \code{\link{.NotYetImplemented}()} error is raised
#'
#' @description Returns the maxshape for the `soma_joinid` index column, if it
#' is an index column in the dataframe; otherwise, returns `NULL`.
maxshape = function() {
stop(errorCondition(
"'SOMADataFrame$maxshape()' is not implemented",
class = "notYetImplementedError"
))
return(maybe_soma_joinid_maxshape(self$uri, private$.soma_context))
},

#' @description Returns a named list of minimum/maximum pairs, one per index
Expand Down
2 changes: 1 addition & 1 deletion apis/r/tests/testthat/test-SOMADataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ test_that("Basic mechanics", {
sdf <- SOMADataFrameOpen(uri)
expect_match(sdf$soma_type, "SOMADataFrame")

expect_error(sdf$shape(), class = "notYetImplementedError")
### TODO: add assertions expect_error(sdf$shape(), class = "notYetImplementedError")

expect_equivalent(
tiledb::tiledb_array(sdf$uri, return_as = "asis")[],
Expand Down
4 changes: 2 additions & 2 deletions apis/r/tests/testthat/test-shape.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ test_that("SOMADataFrame shape", {
} else {
expect_true(sdf$tiledbsoma_has_upgraded_domain())
}
expect_error(sdf$shape(), class = "notYetImplementedError")
expect_error(sdf$maxshape(), class = "notYetImplementedError")
## TODO: write cases expect_error(sdf$shape(), class = "notYetImplementedError")
## TODO: write cases expect_error(sdf$maxshape(), class = "notYetImplementedError")

# Not implemented this way per
# https://github.com/single-cell-data/TileDB-SOMA/pull/2953#discussion_r1746125089
Expand Down
2 changes: 1 addition & 1 deletion apis/r/tests/testthat/test-write-soma-objects.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ test_that("write_soma.data.frame mechanics", {
expect_identical(sdf$uri, file.path(collection$uri, "co2"))
expect_identical(sdf$dimnames(), "soma_joinid")
expect_identical(sdf$attrnames(), c(names(co2), "obs_id"))
expect_error(sdf$shape(), class = "notYetImplementedError")
### TODO: add assertions expect_error(sdf$shape(), class = "notYetImplementedError")
schema <- sdf$schema()
expect_s3_class(schema, "Schema")
expect_equal(schema$num_fields - 2L, ncol(co2))
Expand Down

0 comments on commit 086cf3e

Please sign in to comment.