Skip to content

Commit

Permalink
[python/c++] Deprecate config_options_from_schema in favor of new f…
Browse files Browse the repository at this point in the history
…unction (#3437)

* Create new `PlatformSchemaConfig` class with only TileDB schema properties.
* Create new method `schema_config_options` that returns the
  `PlatformSchemaConfig`.
* Deprecate `config_options_from_schema` in favor of `schema_config_options`.
  • Loading branch information
jp-dark authored Dec 17, 2024
1 parent 0356b08 commit ddea64c
Show file tree
Hide file tree
Showing 13 changed files with 232 additions and 14 deletions.
41 changes: 40 additions & 1 deletion apis/python/src/tiledbsoma/_soma_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#
# Licensed under the MIT License.

import warnings
from typing import Any, Tuple

import pyarrow as pa
Expand Down Expand Up @@ -33,9 +34,41 @@ def schema(self) -> pa.Schema:
"""
return self._handle.schema

def schema_config_options(self) -> clib.PlatformSchemaConfig:
"""Returns metadata about the array schema that is not encompassed within
the Arrow Schema, in the form of a PlatformConfig.
Available attributes are:
* capacity: int
* allows_duplicates: bool
* tile_order: str
* cell_order: str
* offsets_filters: str
* name (of filter): str
* compression_level: str
* validity_filters: str
* attrs: str
* name (of attribute): str
* filters: str
* name (of filter): str
* compression_level: str
* dims: str
* name (of dimension): str
* filters: str
* name (of filter): str
* compression_level: str
* tile: int
Lifecycle:
Experimental.
"""
return self._handle.schema_config_options()

def config_options_from_schema(self) -> clib.PlatformConfig:
"""Returns metadata about the array that is not encompassed within the
Arrow Schema, in the form of a PlatformConfig.
Arrow Schema, in the form of a PlatformConfig (deprecated).
Use ``schema_config_options`` instead.
Available attributes are:
* dataframe_dim_zstd_level: int
Expand Down Expand Up @@ -64,7 +97,13 @@ def config_options_from_schema(self) -> clib.PlatformConfig:
* tile_order: str
* cell_order: str
* consolidate_and_vacuum: bool
Lifecycle:
Deprecated.
"""
warnings.warn(
"Deprecated. Use schema_config_options instead.", DeprecationWarning
)
return self._handle.config_options_from_schema()

def non_empty_domain(self) -> Tuple[Tuple[Any, Any], ...]:
Expand Down
6 changes: 6 additions & 0 deletions apis/python/src/tiledbsoma/_tdb_handles.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,12 @@ def _do_initial_reads(self, reader: RawHandle) -> None:
def schema(self) -> pa.Schema:
return self._handle.schema

def schema_config_options(self) -> clib.PlatformSchemaConfig:
"""Returns a class containing the TileDB platform configuration options that
can be read from an array schema.
"""
return self._handle.schema_config_options()

def config_options_from_schema(self) -> clib.PlatformConfig:
return self._handle.config_options_from_schema()

Expand Down
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsoma/io/spatial/_xarray_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def dtype(self) -> np.typing.DTypeLike:

def recommend_chunks(self) -> Tuple[int, ...]:
"""Returns recommended chunk sizes for chunking this array."""
dim_info = json.loads(self._array.config_options_from_schema().dims)
dim_info = json.loads(self._array.schema_config_options().dims)
return tuple(
_str_to_int(dim_info[f"soma_dim_{index}"]["tile"])
for index in range(self.ndim)
Expand Down
14 changes: 14 additions & 0 deletions apis/python/src/tiledbsoma/pytiledbsoma.cc
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,20 @@ PYBIND11_MODULE(pytiledbsoma, m) {
.def_readwrite(
"consolidate_and_vacuum", &PlatformConfig::consolidate_and_vacuum);

py::class_<PlatformSchemaConfig>(m, "PlatformSchemaConfig")
.def(py::init<>())
.def_readwrite("capacity", &PlatformSchemaConfig::capacity)
.def_readwrite(
"offsets_filters", &PlatformSchemaConfig::offsets_filters)
.def_readwrite(
"validity_filters", &PlatformSchemaConfig::validity_filters)
.def_readwrite("attrs", &PlatformSchemaConfig::attrs)
.def_readwrite("dims", &PlatformSchemaConfig::dims)
.def_readwrite(
"allows_duplicates", &PlatformSchemaConfig::allows_duplicates)
.def_readwrite("tile_order", &PlatformSchemaConfig::tile_order)
.def_readwrite("cell_order", &PlatformSchemaConfig::cell_order);

m.def("_update_dataframe_schema", &SOMADataFrame::update_dataframe_schema);

load_soma_context(m);
Expand Down
1 change: 1 addition & 0 deletions apis/python/src/tiledbsoma/soma_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ void load_soma_array(py::module& m) {
return pa_schema_import(
py::capsule(array.arrow_schema().get()));
})
.def("schema_config_options", &SOMAArray::schema_config_options)
.def(
"config_options_from_schema",
&SOMAArray::config_options_from_schema)
Expand Down
4 changes: 2 additions & 2 deletions apis/python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def test_dataframe(tmp_path, arrow_schema):
assert [e.as_py() for e in table["mybool"]] == pydict["mybool"]

with soma.DataFrame.open(uri) as A:
cfg = A.config_options_from_schema()
cfg = A.schema_config_options()
assert not cfg.allows_duplicates
assert json.loads(cfg.dims)["myint"]["filters"] == [
{"COMPRESSION_LEVEL": 3, "name": "ZSTD"}
Expand Down Expand Up @@ -1189,7 +1189,7 @@ def test_create_platform_config_overrides(
).close()

with soma.DataFrame.open(tmp_path.as_posix()) as A:
cfg = A.config_options_from_schema()
cfg = A.schema_config_options()
assert expected_schema_fields["validity_filters"] == json.loads(
cfg.validity_filters
)
Expand Down
2 changes: 1 addition & 1 deletion apis/python/tests/test_dense_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ def test_tile_extents(tmp_path):
).close()

with soma.DenseNDArray.open(tmp_path.as_posix()) as A:
dim_info = json.loads(A.config_options_from_schema().dims)
dim_info = json.loads(A.schema_config_options().dims)
# With new shape (tiledbsoma 1.15), core current domain is (100,10000)
# but core domain is huge, and therefore dim 0 does not get its extent
# squashed down to 100.
Expand Down
4 changes: 2 additions & 2 deletions apis/python/tests/test_platform_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_platform_config(conftest_pbmc_small):

x_arr_uri = str(Path(output_path) / "ms" / "RNA" / "X" / "data")
with tiledbsoma.SparseNDArray.open(x_arr_uri) as x_arr:
cfg = x_arr.config_options_from_schema()
cfg = x_arr.schema_config_options()
assert cfg.capacity == create_cfg["capacity"]
assert cfg.cell_order == create_cfg["cell_order"]
assert cfg.tile_order == create_cfg["tile_order"]
Expand All @@ -70,7 +70,7 @@ def test_platform_config(conftest_pbmc_small):

var_arr_uri = str(Path(output_path) / "ms" / "RNA" / "var")
with tiledbsoma.DataFrame.open(var_arr_uri) as var_arr:
cfg = var_arr.config_options_from_schema()
cfg = var_arr.schema_config_options()
assert json.loads(cfg.dims)["soma_joinid"]["filters"] == [
{"COMPRESSION_LEVEL": 1, "name": "ZSTD"}
]
Expand Down
10 changes: 5 additions & 5 deletions apis/python/tests/test_sparse_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ def test_sparse_nd_array_read_write_sparse_tensor(

with soma.SparseNDArray.open(tmp_path.as_posix()) as A:
assert A.is_sparse
assert not A.config_options_from_schema().allows_duplicates
assert not A.schema_config_options().allows_duplicates


@pytest.mark.parametrize("shape", [(10,), (23, 4), (5, 3, 1), (8, 4, 2, 30)])
Expand All @@ -376,7 +376,7 @@ def test_sparse_nd_array_read_write_table(

with soma.SparseNDArray.open(tmp_path.as_posix()) as A:
assert A.is_sparse
assert not A.config_options_from_schema().allows_duplicates
assert not A.schema_config_options().allows_duplicates


@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.int32, np.int64])
Expand Down Expand Up @@ -404,7 +404,7 @@ def test_sparse_nd_array_read_as_pandas(

with soma.SparseNDArray.open(tmp_path.as_posix()) as A:
assert A.is_sparse
assert not A.config_options_from_schema().allows_duplicates
assert not A.schema_config_options().allows_duplicates


@pytest.mark.parametrize("shape_is_nones", [True, False])
Expand Down Expand Up @@ -1114,7 +1114,7 @@ def test_tile_extents(tmp_path):
).close()

with soma.SparseNDArray.open(tmp_path.as_posix()) as A:
dim_info = json.loads(A.config_options_from_schema().dims)
dim_info = json.loads(A.schema_config_options().dims)
assert int(dim_info["soma_dim_0"]["tile"]) == 2048
assert int(dim_info["soma_dim_1"]["tile"]) == 2048

Expand Down Expand Up @@ -1157,7 +1157,7 @@ def test_create_platform_config_overrides(
).close()

with soma.SparseNDArray.open(tmp_path.as_posix()) as A:
cfg = A.config_options_from_schema()
cfg = A.schema_config_options()
assert expected_schema_fields["validity_filters"] == json.loads(
cfg.validity_filters
)
Expand Down
11 changes: 11 additions & 0 deletions libtiledbsoma/src/soma/soma_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,17 @@ class SOMAArray : public SOMAObject {
ctx_->tiledb_ctx(), arr_);
}

/**
* @brief Get members of the schema (capacity, allows_duplicates,
* tile_order, cell_order, offsets_filters, validity_filters, attr filters,
* and dim filters) in the form of a PlatformSchemaConfig.
*
* @return PlatformSchemaConfig
*/
PlatformSchemaConfig schema_config_options() const {
return ArrowAdapter::platform_schema_config_from_tiledb(*schema_);
}

/**
* @brief Get members of the schema (capacity, allows_duplicates,
* tile_order, cell_order, offsets_filters, validity_filters, attr filters,
Expand Down
29 changes: 29 additions & 0 deletions libtiledbsoma/src/utils/arrow_adapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,35 @@ PlatformConfig ArrowAdapter::platform_config_from_tiledb_schema(
return platform_config;
}

PlatformSchemaConfig ArrowAdapter::platform_schema_config_from_tiledb(
ArraySchema tiledb_schema) {
std::map<tiledb_layout_t, std::string> layout_as_string{
{TILEDB_ROW_MAJOR, "row-major"},
{TILEDB_COL_MAJOR, "column-major"},
{TILEDB_HILBERT, "hilbert"},
{TILEDB_UNORDERED, "unordered"},
};

PlatformSchemaConfig platform_config;
platform_config.capacity = tiledb_schema.capacity();
platform_config.allows_duplicates = tiledb_schema.allows_dups();
platform_config.tile_order = layout_as_string[tiledb_schema.tile_order()];
platform_config.cell_order = layout_as_string[tiledb_schema.cell_order()];
platform_config.offsets_filters = ArrowAdapter::_get_filter_list_json(
tiledb_schema.offsets_filter_list())
.dump();
platform_config.validity_filters = ArrowAdapter::_get_filter_list_json(
tiledb_schema.validity_filter_list())
.dump();
platform_config.attrs = ArrowAdapter::_get_attrs_filter_list_json(
tiledb_schema)
.dump();
platform_config.dims = ArrowAdapter::_get_dims_list_json(tiledb_schema)
.dump();

return platform_config;
}

json ArrowAdapter::_get_attrs_filter_list_json(
const ArraySchema& tiledb_schema) {
json attrs_filter_list_as_json;
Expand Down
Loading

0 comments on commit ddea64c

Please sign in to comment.