diff --git a/.github/workflows/r-ci.yml b/.github/workflows/r-ci.yml index f5652693ea..564de26d8d 100644 --- a/.github/workflows/r-ci.yml +++ b/.github/workflows/r-ci.yml @@ -67,6 +67,9 @@ jobs: # if: ${{ matrix.os == 'ubuntu-latest' }} # run: sudo ldconfig + - name: Update packages + run: Rscript -e 'update.packages(ask=FALSE)' + - name: Test if: ${{ matrix.covr == 'no' }} run: cd apis/r && tools/r-ci.sh run_tests @@ -75,6 +78,10 @@ jobs: run: cat $HOME/work/TileDB-SOMA/TileDB-SOMA/apis/r/tiledbsoma.Rcheck/00install.out if: failure() + - name: View Test Output + run: cat $HOME/work/TileDB-SOMA/TileDB-SOMA/apis/r/tiledbsoma.Rcheck/00check.log + if: failure() + - name: Coverage if: ${{ matrix.os == 'ubuntu-latest' && matrix.covr == 'yes' }} run: cd apis/r && tools/r-ci.sh coverage diff --git a/.github/workflows/r-python-interop-testing.yml b/.github/workflows/r-python-interop-testing.yml index 6087ce71f2..c58aacfeda 100644 --- a/.github/workflows/r-python-interop-testing.yml +++ b/.github/workflows/r-python-interop-testing.yml @@ -80,5 +80,8 @@ jobs: python -c 'import tiledbsoma; tiledbsoma.show_package_versions()' python scripts/show-versions.py + - name: Update Packages + run: Rscript -e 'update.packages(ask=FALSE)' + - name: Interop Tests run: python -m pytest apis/system/tests/ diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index 0c1bf7a9b5..230e3424e8 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -233,6 +233,8 @@ def from_h5ad( *, context: Optional[SOMATileDBContext] = None, platform_config: Optional[PlatformConfig] = None, + obs_id_name: str = "obs_id", + var_id_name: str = "var_id", ingest_mode: IngestMode = "write", use_relative_uri: Optional[bool] = None, X_kind: Union[Type[SparseNDArray], Type[DenseNDArray]] = SparseNDArray, @@ -326,6 +328,8 @@ def from_h5ad( measurement_name, context=context, platform_config=platform_config, + obs_id_name=obs_id_name, + var_id_name=var_id_name, ingest_mode=ingest_mode, use_relative_uri=use_relative_uri, X_kind=X_kind, @@ -346,6 +350,8 @@ def from_anndata( *, context: Optional[SOMATileDBContext] = None, platform_config: Optional[PlatformConfig] = None, + obs_id_name: str = "obs_id", + var_id_name: str = "var_id", ingest_mode: IngestMode = "write", use_relative_uri: Optional[bool] = None, X_kind: Union[Type[SparseNDArray], Type[DenseNDArray]] = SparseNDArray, @@ -422,7 +428,7 @@ def from_anndata( with _write_dataframe( df_uri, conversions.decategoricalize_obs_or_var(anndata.obs), - id_column_name="obs_id", + id_column_name=obs_id_name, platform_config=platform_config, context=context, ingestion_params=ingestion_params, @@ -751,6 +757,7 @@ def append_obs( exp: Experiment, new_obs: pd.DataFrame, *, + obs_id_name: str = "obs_id", registration_mapping: ExperimentAmbientLabelMapping, context: Optional[SOMATileDBContext] = None, platform_config: Optional[PlatformConfig] = None, @@ -792,7 +799,7 @@ def append_obs( with _write_dataframe( exp.obs.uri, conversions.decategoricalize_obs_or_var(new_obs), - id_column_name="obs_id", + id_column_name=obs_id_name, platform_config=platform_config, context=context, ingestion_params=ingestion_params, @@ -809,6 +816,7 @@ def append_var( new_var: pd.DataFrame, measurement_name: str, *, + var_id_name: str = "var_id", registration_mapping: ExperimentAmbientLabelMapping, context: Optional[SOMATileDBContext] = None, platform_config: Optional[PlatformConfig] = None, diff --git a/apis/python/tests/test_basic_anndata_io.py b/apis/python/tests/test_basic_anndata_io.py index 14a400fae1..4c1bd0b784 100644 --- a/apis/python/tests/test_basic_anndata_io.py +++ b/apis/python/tests/test_basic_anndata_io.py @@ -744,3 +744,77 @@ def test_obs_with_categorical_int_nan_enumeration( tiledbsoma.io.from_h5ad( output_path, h5ad_file_categorical_int_nan, measurement_name="RNA" ) + + +@pytest.mark.parametrize("obs_id_name", ["obs_id", "cells_are_great"]) +@pytest.mark.parametrize("var_id_name", ["var_id", "genes_are_nice_too"]) +@pytest.mark.parametrize("indexify_obs", [True, False]) +@pytest.mark.parametrize("indexify_var", [True, False]) +def test_id_names(tmp_path, obs_id_name, var_id_name, indexify_obs, indexify_var): + obs_ids = ["AAAT", "CATG", "CTGA", "TCTG", "TGAG", "TTTG"] + var_ids = ["AKT1", "APOE", "ESR1", "TP53", "VEGFA", "ZZZ3"] + + n_obs = len(obs_ids) + n_var = len(var_ids) + + obs = pd.DataFrame( + data={ + obs_id_name: np.asarray(obs_ids), + "cell_type": pd.Categorical( + [["B cell", "T cell"][e % 2] for e in range(n_obs)], + categories=["B cell", "T cell"], + ordered=True, + ), + }, + index=np.arange(n_obs).astype(str), + ) + if indexify_obs: + obs.set_index(obs_id_name, inplace=True) + + var = pd.DataFrame( + data={ + var_id_name: np.asarray(var_ids), + "counter": np.asarray(range(n_var), dtype=np.float32), + }, + index=np.arange(n_var).astype(str), + ) + if indexify_var: + var.set_index(var_id_name, inplace=True) + + X = np.zeros([n_obs, n_var]) + for i in range(n_obs): + for j in range(n_var): + if (i + j) % 2 == 1: + X[i, j] = 100 + 10 * i + j + + adata = anndata.AnnData(X=X, obs=obs, var=var, dtype=X.dtype) + + uri = tmp_path.as_posix() + + # Implicitly, a check for no-throw + tiledbsoma.io.from_anndata( + uri, + adata, + measurement_name="RNA", + obs_id_name=obs_id_name, + var_id_name=var_id_name, + ) + + with tiledbsoma.Experiment.open(uri) as exp: + assert obs_id_name in exp.obs.keys() + assert var_id_name in exp.ms["RNA"].var.keys() + + # Implicitly, a check for no-throw + bdata = tiledbsoma.io.to_anndata( + exp, + measurement_name="RNA", + obs_id_name=obs_id_name, + var_id_name=var_id_name, + ) + + soma_obs = exp.obs.read(column_names=[obs_id_name]).concat().to_pandas() + soma_var = ( + exp.ms["RNA"].var.read(column_names=[var_id_name]).concat().to_pandas() + ) + assert list(bdata.obs.index) == list(soma_obs[obs_id_name]) + assert list(bdata.var.index) == list(soma_var[var_id_name])