Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python/r/c++] Bulk-sync main to release-1.15 #3411

Merged
merged 3 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .github/workflows/python-ci-packaging.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ jobs:
run: |
mkdir -p external
# Please do not edit manually -- let scripts/update-tiledb-version.py update this
wget --quiet https://github.com/TileDB-Inc/TileDB/releases/download/2.27.0-rc3/tiledb-linux-x86_64-2.27.0-rc3-8d581f2.tar.gz
wget --quiet https://github.com/TileDB-Inc/TileDB/releases/download/2.27.0.rc5/tiledb-linux-x86_64-2.27.0.rc5-2862c30.tar.gz
tar -C external -xzf tiledb-linux-x86_64-*.tar.gz
ls external/lib/
echo "LD_LIBRARY_PATH=$(pwd)/external/lib" >> $GITHUB_ENV
Expand Down Expand Up @@ -178,10 +178,10 @@ jobs:
mkdir -p external
# Please do not edit manually -- let scripts/update-tiledb-version.py update this
if [ `uname -m` == "arm64" ]; then
wget --quiet https://github.com/TileDB-Inc/TileDB/releases/download/2.27.0-rc3/tiledb-macos-arm64-2.27.0-rc3-8d581f2.tar.gz
wget --quiet https://github.com/TileDB-Inc/TileDB/releases/download/2.27.0.rc5/tiledb-macos-arm64-2.27.0.rc5-2862c30.tar.gz
tar -C external -xzf tiledb-macos-arm64-*.tar.gz
else
wget --quiet https://github.com/TileDB-Inc/TileDB/releases/download/2.27.0-rc3/tiledb-macos-x86_64-2.27.0-rc3-8d581f2.tar.gz
wget --quiet https://github.com/TileDB-Inc/TileDB/releases/download/2.27.0.rc5/tiledb-macos-x86_64-2.27.0.rc5-2862c30.tar.gz
tar -C external -xzf tiledb-macos-x86_64-*.tar.gz
fi
ls external/lib/
Expand Down Expand Up @@ -274,14 +274,14 @@ jobs:
if [ `uname -s` == "Darwin" ]; then
if [ `uname -m` == "arm64" ]; then
# Please do not edit manually -- let scripts/update-tiledb-version.py update this
wget --quiet https://github.com/TileDB-Inc/TileDB/releases/download/2.27.0-rc3/tiledb-macos-arm64-2.27.0-rc3-8d581f2.tar.gz
wget --quiet https://github.com/TileDB-Inc/TileDB/releases/download/2.27.0.rc5/tiledb-macos-arm64-2.27.0.rc5-2862c30.tar.gz
else
# Please do not edit manually -- let scripts/update-tiledb-version.py update this
wget --quiet https://github.com/TileDB-Inc/TileDB/releases/download/2.27.0-rc3/tiledb-macos-x86_64-2.27.0-rc3-8d581f2.tar.gz
wget --quiet https://github.com/TileDB-Inc/TileDB/releases/download/2.27.0.rc5/tiledb-macos-x86_64-2.27.0.rc5-2862c30.tar.gz
fi
else
# Please do not edit manually -- let scripts/update-tiledb-version.py update this
wget --quiet https://github.com/TileDB-Inc/TileDB/releases/download/2.27.0-rc3/tiledb-linux-x86_64-2.27.0-rc3-8d581f2.tar.gz
wget --quiet https://github.com/TileDB-Inc/TileDB/releases/download/2.27.0.rc5/tiledb-linux-x86_64-2.27.0.rc5-2862c30.tar.gz
fi
tar -C external -xzf tiledb-*.tar.gz
ls external/lib/
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/r-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ jobs:
# uses: lukka/get-cmake@latest

#- name: MkVars
# run: mkdir ~/.R && echo "CXX17FLAGS=-Wno-deprecated-declarations -Wno-deprecated" > ~/.R/Makevars
# run: mkdir ~/.R && echo "CXX20FLAGS=-Wno-deprecated-declarations -Wno-deprecated" > ~/.R/Makevars

#- name: Build and install libtiledbsoma
# run: sudo scripts/bld --prefix=/usr/local
Expand Down
1 change: 1 addition & 0 deletions apis/python/notebooks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
If you add notebooks here, please also update the symlinks in `/doc/source/notebooks`.
4 changes: 3 additions & 1 deletion apis/python/notebooks/tutorial_soma_shape.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
"\n",
"In this notebook, we'll go through how you use shapes for the dataframes and arrays within your SOMA experiments, when and how you can resize, and options for experiments created before TileDB-SOMA 1.15.\n",
"\n",
"The dataset used is from Peripheral Blood Mononuclear Cells (PBMC), which is freely available from 10X Genomics. "
"The dataset used is from Peripheral Blood Mononuclear Cells (PBMC), which is freely available from 10X Genomics.\n",
"\n",
"(Please also see the [Academy tutorial](https://cloud.tiledb.com/academy/structure/life-sciences/single-cell/tutorials/shapes/).)"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion apis/python/requirements_spatial.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ tifffile
pillow
spatialdata>=0.2.5
xarray
dask
dask<=2024.11.2
41 changes: 37 additions & 4 deletions apis/python/src/tiledbsoma/_common_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from typing_extensions import Self

from ._soma_array import SOMAArray
from ._types import OpenTimestamp
from ._types import OpenTimestamp, StatusAndReason
from .options._soma_tiledb_context import (
SOMATileDBContext,
)
Expand Down Expand Up @@ -51,9 +51,9 @@ def create(
None)``, as the sequence length determines the number of dimensions
N in the N-dimensional array.

For :class:`SparseNDArray` only, if a slot is None, then the maximum
possible int32 will be used. This makes a :class:`SparseNDArray`
growable.
For :class:`SparseNDArray` only, if a slot is None, then the minimum
possible range will be used. This makes a :class:`SparseNDArray`
growable using ``resize``.
platform_config:
Platform-specific options used to create this array.
This may be provided as settings in a dictionary, with options
Expand Down Expand Up @@ -84,6 +84,39 @@ def create(
"""
raise NotImplementedError("must be implemented by child class.")

def resize(
self, newshape: Sequence[Union[int, None]], check_only: bool = False
) -> StatusAndReason:
"""Increases the shape of the array as specfied. Raises an error if the new
shape is less than the current shape in any dimension. Raises an error if
the new shape exceeds maxshape in any dimension. Raises an error if the
array doesn't already have a shape: in that case please call
tiledbsoma_upgrade_shape. If ``check_only`` is ``True``, returns
whether the operation would succeed if attempted, and a reason why it
would not.

Lifecycle:
Maturing.
"""
if check_only:
return self._handle.tiledbsoma_can_resize(newshape)
else:
self._handle.resize(newshape)
return (True, "")

def tiledbsoma_upgrade_shape(
self, newshape: Sequence[Union[int, None]], check_only: bool = False
) -> StatusAndReason:
"""Allows the array to have a resizeable shape as described in the TileDB-SOMA
1.15 release notes. Raises an error if the new shape exceeds maxshape in
any dimension. Raises an error if the array already has a shape.
"""
if check_only:
return self._handle.tiledbsoma_can_upgrade_shape(newshape)
else:
self._handle.tiledbsoma_upgrade_shape(newshape)
return (True, "")

@property
def shape(self) -> Tuple[int, ...]:
"""Returns capacity of each dimension, always a list of length ``ndim``.
Expand Down
58 changes: 25 additions & 33 deletions apis/python/src/tiledbsoma/_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
from . import pytiledbsoma as clib
from ._constants import SOMA_JOINID
from ._exception import SOMAError, map_exception_for_create
from ._query_condition import QueryCondition
from ._read_iters import TableReadIter
from ._soma_array import SOMAArray
from ._tdb_handles import DataFrameWrapper
Expand Down Expand Up @@ -172,16 +171,21 @@ def create(
index column name is required.
domain:
An optional sequence of tuples specifying the domain of each
index column. Each tuple should be a pair consisting of the minimum and
maximum values storable in the index column. For example, if there is a
single int64-valued index column, then ``domain`` might be ``[(100,
200)]`` to indicate that values between 100 and 200, inclusive, can be
stored in that column. If provided, this sequence must have the same
length as ``index_column_names``, and the index-column domain will be as
specified. If omitted entirely, or if ``None`` in a given dimension,
the corresponding index-column domain will use the minimum and maximum
possible values for the column's datatype. This makes a
:class:`DataFrame` growable.
index column. Each tuple must be a pair consisting of the
minimum and maximum values storable in the index column. For
example, if there is a single int64-valued index column, then
``domain`` might be ``[(100, 200)]`` to indicate that values
between 100 and 200, inclusive, can be stored in that column.
If provided, this sequence must have the same length as
``index_column_names``, and the index-column domain will be as
specified. If omitted entirely, or if ``None`` in a given
dimension, the corresponding index-column domain will use an
empty range, and data writes after that will fail with "A range
was set outside of the current domain". Unless you have a
particular reason not to, you should always provide the desired
`domain` at create time: this is an optional but strongly
recommended parameter. See also ``change_domain`` which allows
you to expand the domain after create.
platform_config:
Platform-specific options used to create this array.
This may be provided as settings in a dictionary, with options
Expand Down Expand Up @@ -621,6 +625,9 @@ def change_domain(
Lastly, it is an error to try to set the ``domain`` to be smaller than
``maxdomain`` along any index column. The ``maxdomain`` of a dataframe is
set at creation time, and cannot be extended afterward.

Lifecycle:
Maturing.
"""
frame = inspect.currentframe()
function_name_for_messages = frame.f_code.co_name if frame else "tiledbsoma"
Expand Down Expand Up @@ -714,31 +721,16 @@ def read(
_util.check_unpartitioned(partitions)
self._check_open_read()

handle = self._handle._handle

context = handle.context()
if platform_config is not None:
config = context.tiledb_config.copy()
config.update(platform_config)
context = clib.SOMAContext(config)

sr = clib.SOMADataFrame.open(
uri=handle.uri,
mode=clib.OpenMode.read,
context=context,
column_names=column_names or [],
# TODO: batch_size
return TableReadIter(
array=self,
coords=coords,
column_names=column_names,
result_order=_util.to_clib_result_order(result_order),
timestamp=handle.timestamp and (0, handle.timestamp),
value_filter=value_filter,
platform_config=platform_config,
)

if value_filter is not None:
sr.set_condition(QueryCondition(value_filter), handle.schema)

_util._set_coords(sr, coords)

# TODO: batch_size
return TableReadIter(sr)

def write(
self, values: pa.Table, platform_config: Optional[options.PlatformConfig] = None
) -> Self:
Expand Down
71 changes: 17 additions & 54 deletions apis/python/src/tiledbsoma/_dense_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@
from ._arrow_types import pyarrow_to_carrow_type
from ._common_nd_array import NDArray
from ._exception import SOMAError, map_exception_for_create
from ._read_iters import TableReadIter
from ._tdb_handles import DenseNDArrayWrapper
from ._types import OpenTimestamp, Slice, StatusAndReason
from ._types import OpenTimestamp, Slice
from ._util import dense_indices_to_shape
from .options._soma_tiledb_context import (
SOMATileDBContext,
Expand Down Expand Up @@ -232,42 +233,20 @@
data_shape = tuple(handle.shape if use_shape else ned)
target_shape = dense_indices_to_shape(coords, data_shape, result_order)

context = handle.context()
if platform_config is not None:
config = context.tiledb_config.copy()
config.update(platform_config)
context = clib.SOMAContext(config)

sr = clib.SOMADenseNDArray.open(
uri=handle.uri,
mode=clib.OpenMode.read,
context=context,
arrow_table = TableReadIter(
array=self,
coords=coords,
column_names=[],
result_order=_util.to_clib_result_order(result_order),
timestamp=handle.timestamp and (0, handle.timestamp),
)

_util._set_coords(sr, coords)
value_filter=None,
platform_config=platform_config,
).concat()

arrow_tables = []
while True:
arrow_table_piece = sr.read_next()
if not arrow_table_piece:
break
arrow_tables.append(arrow_table_piece)

# For dense arrays there is no zero-output case: attempting to make a test case
# to do that, say by indexing a 10x20 array by positions 888 and 999, results
# in read-time errors of the form
#
# [TileDB::Subarray] Error: Cannot add range to dimension 'soma_dim_0'; Range [888, 888] is
# out of domain bounds [0, 9]
if not arrow_tables:
if arrow_table is None:
raise SOMAError(
"internal error: at least one table-piece should have been returned"
)

arrow_table = pa.concat_tables(arrow_tables)
npval = arrow_table.column("soma_data").to_numpy()
# TODO: as currently coded we're looking at the non-empty domain upper
# bound but not its lower bound. That works fine if data are written at
Expand Down Expand Up @@ -310,7 +289,7 @@
"""
_util.check_type("values", values, (pa.Tensor,))

clib_dense_array = self._handle._handle
clib_handle = self._handle._handle

# Compute the coordinates for the dense array.
new_coords: List[Union[int, Slice[int], None]] = []
Expand All @@ -331,34 +310,18 @@
if not input.flags.contiguous:
input = np.ascontiguousarray(input)
order = clib.ResultOrder.rowmajor
clib_dense_array.reset(result_order=order)
_util._set_coords(clib_dense_array, new_coords)
clib_dense_array.write(input)

mq = clib.ManagedQuery(clib_handle, clib_handle.context())
mq.set_layout(order)
_util._set_coords(mq, clib_handle, new_coords)
mq.set_soma_data(input)
mq.submit_write()

tiledb_write_options = TileDBWriteOptions.from_platform_config(platform_config)
if tiledb_write_options.consolidate_and_vacuum:
clib_dense_array.consolidate_and_vacuum()
clib_handle.consolidate_and_vacuum()

Check warning on line 322 in apis/python/src/tiledbsoma/_dense_nd_array.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/_dense_nd_array.py#L322

Added line #L322 was not covered by tests
return self

def resize(self, newshape: Sequence[Union[int, None]]) -> None:
"""Supported for ``SparseNDArray``; scheduled for implementation for
``DenseNDArray`` in TileDB-SOMA 1.15
"""
self._handle.resize(newshape)

def tiledbsoma_upgrade_shape(
self, newshape: Sequence[Union[int, None]], check_only: bool = False
) -> StatusAndReason:
"""Allows the array to have a resizeable shape as described in the TileDB-SOMA
1.15 release notes. Raises an error if the new shape exceeds maxshape in
any dimension. Raises an error if the array already has a shape.
"""
if check_only:
return self._handle.tiledbsoma_can_upgrade_shape(newshape)
else:
self._handle.tiledbsoma_upgrade_shape(newshape)
return (True, "")

@classmethod
def _dim_capacity_and_extent(
cls,
Expand Down
Loading