Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python/c++] Deprecate config_options_from_schema in favor of new function #3437

Merged
merged 2 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 40 additions & 1 deletion apis/python/src/tiledbsoma/_soma_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#
# Licensed under the MIT License.

import warnings
from typing import Any, Tuple

import pyarrow as pa
Expand Down Expand Up @@ -33,9 +34,41 @@
"""
return self._handle.schema

def schema_config_options(self) -> clib.PlatformSchemaConfig:
"""Returns metadata about the array schema that is not encompassed within
the Arrow Schema, in the form of a PlatformConfig.

Available attributes are:
* capacity: int
* allows_duplicates: bool
* tile_order: str
* cell_order: str
* offsets_filters: str
* name (of filter): str
* compression_level: str
* validity_filters: str
* attrs: str
* name (of attribute): str
* filters: str
* name (of filter): str
* compression_level: str
* dims: str
* name (of dimension): str
* filters: str
* name (of filter): str
* compression_level: str
* tile: int

Lifecycle:
Experimental.
"""
return self._handle.schema_config_options()

def config_options_from_schema(self) -> clib.PlatformConfig:
"""Returns metadata about the array that is not encompassed within the
Arrow Schema, in the form of a PlatformConfig.
Arrow Schema, in the form of a PlatformConfig (deprecated).

Use ``schema_config_options`` instead.

Available attributes are:
* dataframe_dim_zstd_level: int
Expand Down Expand Up @@ -64,7 +97,13 @@
* tile_order: str
* cell_order: str
* consolidate_and_vacuum: bool

Lifecycle:
Deprecated.
"""
warnings.warn(

Check warning on line 104 in apis/python/src/tiledbsoma/_soma_array.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/_soma_array.py#L104

Added line #L104 was not covered by tests
"Deprecated. Use schema_config_options instead.", DeprecationWarning
)
return self._handle.config_options_from_schema()

def non_empty_domain(self) -> Tuple[Tuple[Any, Any], ...]:
Expand Down
6 changes: 6 additions & 0 deletions apis/python/src/tiledbsoma/_tdb_handles.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,12 @@ def _do_initial_reads(self, reader: RawHandle) -> None:
def schema(self) -> pa.Schema:
return self._handle.schema

def schema_config_options(self) -> clib.PlatformSchemaConfig:
"""Returns a class containing the TileDB platform configuration options that
can be read from an array schema.
"""
return self._handle.schema_config_options()

def config_options_from_schema(self) -> clib.PlatformConfig:
return self._handle.config_options_from_schema()

Expand Down
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsoma/io/spatial/_xarray_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def dtype(self) -> np.typing.DTypeLike:

def recommend_chunks(self) -> Tuple[int, ...]:
"""Returns recommended chunk sizes for chunking this array."""
dim_info = json.loads(self._array.config_options_from_schema().dims)
dim_info = json.loads(self._array.schema_config_options().dims)
return tuple(
_str_to_int(dim_info[f"soma_dim_{index}"]["tile"])
for index in range(self.ndim)
Expand Down
14 changes: 14 additions & 0 deletions apis/python/src/tiledbsoma/pytiledbsoma.cc
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,20 @@ PYBIND11_MODULE(pytiledbsoma, m) {
.def_readwrite(
"consolidate_and_vacuum", &PlatformConfig::consolidate_and_vacuum);

py::class_<PlatformSchemaConfig>(m, "PlatformSchemaConfig")
.def(py::init<>())
.def_readwrite("capacity", &PlatformSchemaConfig::capacity)
.def_readwrite(
"offsets_filters", &PlatformSchemaConfig::offsets_filters)
.def_readwrite(
"validity_filters", &PlatformSchemaConfig::validity_filters)
.def_readwrite("attrs", &PlatformSchemaConfig::attrs)
.def_readwrite("dims", &PlatformSchemaConfig::dims)
.def_readwrite(
"allows_duplicates", &PlatformSchemaConfig::allows_duplicates)
.def_readwrite("tile_order", &PlatformSchemaConfig::tile_order)
.def_readwrite("cell_order", &PlatformSchemaConfig::cell_order);

m.def("_update_dataframe_schema", &SOMADataFrame::update_dataframe_schema);

load_soma_context(m);
Expand Down
1 change: 1 addition & 0 deletions apis/python/src/tiledbsoma/soma_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ void load_soma_array(py::module& m) {
return pa_schema_import(
py::capsule(array.arrow_schema().get()));
})
.def("schema_config_options", &SOMAArray::schema_config_options)
.def(
"config_options_from_schema",
&SOMAArray::config_options_from_schema)
Expand Down
4 changes: 2 additions & 2 deletions apis/python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def test_dataframe(tmp_path, arrow_schema):
assert [e.as_py() for e in table["mybool"]] == pydict["mybool"]

with soma.DataFrame.open(uri) as A:
cfg = A.config_options_from_schema()
cfg = A.schema_config_options()
assert not cfg.allows_duplicates
assert json.loads(cfg.dims)["myint"]["filters"] == [
{"COMPRESSION_LEVEL": 3, "name": "ZSTD"}
Expand Down Expand Up @@ -1189,7 +1189,7 @@ def test_create_platform_config_overrides(
).close()

with soma.DataFrame.open(tmp_path.as_posix()) as A:
cfg = A.config_options_from_schema()
cfg = A.schema_config_options()
assert expected_schema_fields["validity_filters"] == json.loads(
cfg.validity_filters
)
Expand Down
2 changes: 1 addition & 1 deletion apis/python/tests/test_dense_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ def test_tile_extents(tmp_path):
).close()

with soma.DenseNDArray.open(tmp_path.as_posix()) as A:
dim_info = json.loads(A.config_options_from_schema().dims)
dim_info = json.loads(A.schema_config_options().dims)
# With new shape (tiledbsoma 1.15), core current domain is (100,10000)
# but core domain is huge, and therefore dim 0 does not get its extent
# squashed down to 100.
Expand Down
4 changes: 2 additions & 2 deletions apis/python/tests/test_platform_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_platform_config(conftest_pbmc_small):

x_arr_uri = str(Path(output_path) / "ms" / "RNA" / "X" / "data")
with tiledbsoma.SparseNDArray.open(x_arr_uri) as x_arr:
cfg = x_arr.config_options_from_schema()
cfg = x_arr.schema_config_options()
assert cfg.capacity == create_cfg["capacity"]
assert cfg.cell_order == create_cfg["cell_order"]
assert cfg.tile_order == create_cfg["tile_order"]
Expand All @@ -70,7 +70,7 @@ def test_platform_config(conftest_pbmc_small):

var_arr_uri = str(Path(output_path) / "ms" / "RNA" / "var")
with tiledbsoma.DataFrame.open(var_arr_uri) as var_arr:
cfg = var_arr.config_options_from_schema()
cfg = var_arr.schema_config_options()
assert json.loads(cfg.dims)["soma_joinid"]["filters"] == [
{"COMPRESSION_LEVEL": 1, "name": "ZSTD"}
]
Expand Down
10 changes: 5 additions & 5 deletions apis/python/tests/test_sparse_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ def test_sparse_nd_array_read_write_sparse_tensor(

with soma.SparseNDArray.open(tmp_path.as_posix()) as A:
assert A.is_sparse
assert not A.config_options_from_schema().allows_duplicates
assert not A.schema_config_options().allows_duplicates


@pytest.mark.parametrize("shape", [(10,), (23, 4), (5, 3, 1), (8, 4, 2, 30)])
Expand All @@ -376,7 +376,7 @@ def test_sparse_nd_array_read_write_table(

with soma.SparseNDArray.open(tmp_path.as_posix()) as A:
assert A.is_sparse
assert not A.config_options_from_schema().allows_duplicates
assert not A.schema_config_options().allows_duplicates


@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.int32, np.int64])
Expand Down Expand Up @@ -404,7 +404,7 @@ def test_sparse_nd_array_read_as_pandas(

with soma.SparseNDArray.open(tmp_path.as_posix()) as A:
assert A.is_sparse
assert not A.config_options_from_schema().allows_duplicates
assert not A.schema_config_options().allows_duplicates


@pytest.mark.parametrize("shape_is_nones", [True, False])
Expand Down Expand Up @@ -1114,7 +1114,7 @@ def test_tile_extents(tmp_path):
).close()

with soma.SparseNDArray.open(tmp_path.as_posix()) as A:
dim_info = json.loads(A.config_options_from_schema().dims)
dim_info = json.loads(A.schema_config_options().dims)
assert int(dim_info["soma_dim_0"]["tile"]) == 2048
assert int(dim_info["soma_dim_1"]["tile"]) == 2048

Expand Down Expand Up @@ -1157,7 +1157,7 @@ def test_create_platform_config_overrides(
).close()

with soma.SparseNDArray.open(tmp_path.as_posix()) as A:
cfg = A.config_options_from_schema()
cfg = A.schema_config_options()
assert expected_schema_fields["validity_filters"] == json.loads(
cfg.validity_filters
)
Expand Down
11 changes: 11 additions & 0 deletions libtiledbsoma/src/soma/soma_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,17 @@ class SOMAArray : public SOMAObject {
ctx_->tiledb_ctx(), arr_);
}

/**
* @brief Get members of the schema (capacity, allows_duplicates,
* tile_order, cell_order, offsets_filters, validity_filters, attr filters,
* and dim filters) in the form of a PlatformSchemaConfig.
*
* @return PlatformSchemaConfig
*/
PlatformSchemaConfig schema_config_options() const {
return ArrowAdapter::platform_schema_config_from_tiledb(*schema_);
}

/**
* @brief Get members of the schema (capacity, allows_duplicates,
* tile_order, cell_order, offsets_filters, validity_filters, attr filters,
Expand Down
29 changes: 29 additions & 0 deletions libtiledbsoma/src/utils/arrow_adapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,35 @@ PlatformConfig ArrowAdapter::platform_config_from_tiledb_schema(
return platform_config;
}

PlatformSchemaConfig ArrowAdapter::platform_schema_config_from_tiledb(
ArraySchema tiledb_schema) {
std::map<tiledb_layout_t, std::string> layout_as_string{
{TILEDB_ROW_MAJOR, "row-major"},
{TILEDB_COL_MAJOR, "column-major"},
{TILEDB_HILBERT, "hilbert"},
{TILEDB_UNORDERED, "unordered"},
};

PlatformSchemaConfig platform_config;
platform_config.capacity = tiledb_schema.capacity();
platform_config.allows_duplicates = tiledb_schema.allows_dups();
platform_config.tile_order = layout_as_string[tiledb_schema.tile_order()];
platform_config.cell_order = layout_as_string[tiledb_schema.cell_order()];
platform_config.offsets_filters = ArrowAdapter::_get_filter_list_json(
tiledb_schema.offsets_filter_list())
.dump();
platform_config.validity_filters = ArrowAdapter::_get_filter_list_json(
tiledb_schema.validity_filter_list())
.dump();
platform_config.attrs = ArrowAdapter::_get_attrs_filter_list_json(
tiledb_schema)
.dump();
platform_config.dims = ArrowAdapter::_get_dims_list_json(tiledb_schema)
.dump();

return platform_config;
}

json ArrowAdapter::_get_attrs_filter_list_json(
const ArraySchema& tiledb_schema) {
json attrs_filter_list_as_json;
Expand Down
Loading