From ebc45585e1a70deb23814566d252711e37b203bb Mon Sep 17 00:00:00 2001 From: Isaac Virshup Date: Mon, 15 Jul 2024 11:11:55 -0700 Subject: [PATCH] Bump tiledbsoma version, address deprecations (#1235) --- api/python/cellxgene_census/pyproject.toml | 2 +- .../cellxgene_census/experimental/ml/pytorch.py | 2 +- api/python/cellxgene_census/tests/test_open.py | 14 +++++++------- .../cellxgene_census_builder/tests/test_builder.py | 3 +-- .../geneformer/generate-geneformer-embeddings.py | 12 +++++------- .../models/geneformer/wdl/generate_embeddings.wdl | 2 +- 6 files changed, 16 insertions(+), 19 deletions(-) diff --git a/api/python/cellxgene_census/pyproject.toml b/api/python/cellxgene_census/pyproject.toml index 2884a666f..2904089ff 100644 --- a/api/python/cellxgene_census/pyproject.toml +++ b/api/python/cellxgene_census/pyproject.toml @@ -31,7 +31,7 @@ dependencies= [ # NOTE: the tiledbsoma version must be >= to the version used in the Census builder, to # ensure that the assets are readable (tiledbsoma supports backward compatible reading). # Make sure this version does not fall behind the builder's tiledbsoma version. - "tiledbsoma~=1.11.4", + "tiledbsoma~=1.12.3", "anndata", "numpy>=1.21,<2.0", "requests", diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py index 765c5cea1..5bef673c9 100644 --- a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py +++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py @@ -541,7 +541,7 @@ def __init__( experimental """ self.exp_uri = experiment.uri - self.aws_region = experiment.context.tiledb_ctx.config().get("vfs.s3.region") + self.aws_region = experiment.context.tiledb_config.get("vfs.s3.region") self.measurement_name = measurement_name self.layer_name = X_name self.obs_query = obs_query diff --git a/api/python/cellxgene_census/tests/test_open.py b/api/python/cellxgene_census/tests/test_open.py index a14246f2a..df20b3337 100644 --- a/api/python/cellxgene_census/tests/test_open.py +++ b/api/python/cellxgene_census/tests/test_open.py @@ -37,7 +37,7 @@ def test_open_soma_stable() -> None: with cellxgene_census.open_soma() as default_census: assert default_census.uri == census.uri for k, v in DEFAULT_TILEDB_CONFIGURATION.items(): - assert census.context.tiledb_ctx.config()[k] == str(v) + assert census.context.tiledb_config[k] == str(v) @pytest.fixture(scope="module") @@ -68,7 +68,7 @@ def test_open_soma_with_customized_tiledb_config(latest_locator: CensusLocator) with cellxgene_census.open_soma(uri=latest_locator["uri"], tiledb_config=tiledb_config) as census: assert census.uri == latest_locator["uri"] # Verify that user-provided custom config is passed through correctly - assert census.context.tiledb_ctx.config()["soma.init_buffer_bytes"] == soma_init_buffer_bytes + assert census.context.tiledb_config["soma.init_buffer_bytes"] == soma_init_buffer_bytes @pytest.mark.live_corpus @@ -90,7 +90,7 @@ def test_open_soma_with_customized_plain_soma_context( context = soma.SOMATileDBContext().replace(**cfg) with cellxgene_census.open_soma(uri=latest_locator["uri"], context=context) as census: # Verify that the user-provided config settings are set correctly in the TileDB context object. - assert census.context.tiledb_ctx.config()["soma.init_buffer_bytes"] == soma_init_buffer_bytes + assert census.context.tiledb_config["soma.init_buffer_bytes"] == soma_init_buffer_bytes assert census.context.timestamp_ms == timestamp_ms @@ -108,12 +108,12 @@ def test_open_soma_with_customized_default_soma_context( with cellxgene_census.open_soma(census_version="latest", context=custom_context) as census: # Verify the non-overriden soma context defaults are set correctly in the TileDB context object. - assert census.context.tiledb_ctx.config()["vfs.s3.no_sign_request"] == "true" - assert census.context.tiledb_ctx.config()["vfs.s3.region"] == latest_locator.get("s3_region") - assert census.context.tiledb_ctx.config()["py.init_buffer_bytes"] == f"{1 * 1024 ** 3}" + assert census.context.tiledb_config["vfs.s3.no_sign_request"] == "true" + assert census.context.tiledb_config["vfs.s3.region"] == latest_locator.get("s3_region") + assert census.context.tiledb_config["py.init_buffer_bytes"] == f"{1 * 1024 ** 3}" # Verify that the user-overridden config settings are set correctly in the TileDB context object. - assert census.context.tiledb_ctx.config()["soma.init_buffer_bytes"] == soma_init_buffer_bytes + assert census.context.tiledb_config["soma.init_buffer_bytes"] == soma_init_buffer_bytes assert census.context.timestamp_ms == timestamp_ms diff --git a/tools/cellxgene_census_builder/tests/test_builder.py b/tools/cellxgene_census_builder/tests/test_builder.py index 4eb53b7d1..28aba691f 100644 --- a/tools/cellxgene_census_builder/tests/test_builder.py +++ b/tools/cellxgene_census_builder/tests/test_builder.py @@ -10,7 +10,6 @@ import psutil import pyarrow as pa import pytest -import tiledb import tiledbsoma as soma from cellxgene_census_builder.build_soma import build @@ -95,7 +94,7 @@ def proxy_psutil_virtual_memory() -> psutil._pslinux.svmem: # Query the census and do assertions with soma.Collection.open( uri=census_build_args.soma_path.as_posix(), - context=soma.options.SOMATileDBContext(tiledb_ctx=tiledb.Ctx({"vfs.s3.region": "us-west-2"})), + context=soma.options.SOMATileDBContext(tiledb_config={"vfs.s3.region": "us-west-2"}), ) as census: # There are 16 cells in total (4 in each dataset). They all belong to homo_sapiens human_obs = census[CENSUS_DATA_NAME]["homo_sapiens"]["obs"].read().concat().to_pandas() diff --git a/tools/models/geneformer/generate-geneformer-embeddings.py b/tools/models/geneformer/generate-geneformer-embeddings.py index fd909cd1b..1c7c5adbe 100755 --- a/tools/models/geneformer/generate-geneformer-embeddings.py +++ b/tools/models/geneformer/generate-geneformer-embeddings.py @@ -36,13 +36,11 @@ def main(argv): except Exception: # noqa: BLE001 pass tiledbsoma_context = tiledbsoma.options.SOMATileDBContext( - tiledb_ctx=tiledb.Ctx( - { - "py.init_buffer_bytes": 4 * 1024**3, - "soma.init_buffer_bytes": 4 * 1024**3, - "vfs.s3.region": aws_region, - } - ) + tiledb_config={ + "py.init_buffer_bytes": 4 * 1024**3, + "soma.init_buffer_bytes": 4 * 1024**3, + "vfs.s3.region": aws_region, + } ) with tiledbsoma.SparseNDArray.open(args.outfile, "r", context=tiledbsoma_context): diff --git a/tools/models/geneformer/wdl/generate_embeddings.wdl b/tools/models/geneformer/wdl/generate_embeddings.wdl index d335a2a44..2f28a41d2 100644 --- a/tools/models/geneformer/wdl/generate_embeddings.wdl +++ b/tools/models/geneformer/wdl/generate_embeddings.wdl @@ -62,7 +62,7 @@ task init_embeddings_array { type=pa.float32(), shape=(2**31-2, ~{embedding_dim}), context = tiledbsoma.options.SOMATileDBContext( - tiledb_ctx=tiledb.Ctx({"vfs.s3.region": '~{s3_region}'}) + tiledb_config={"vfs.s3.region": '~{s3_region}'} ) ).close() EOF