Skip to content

Commit

Permalink
Doc bits for splitting out little test files (#167)
Browse files Browse the repository at this point in the history
* Doc bits for splitting out little test files

* mkmd.sh was renaming off README.md et al and should not have (#169)

* Doc bits for splitting out little test files
  • Loading branch information
johnkerl authored Jun 20, 2022
1 parent e08e474 commit 30ad846
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 0 deletions.
Binary file added apis/python/anndata/subset-soma-01.h5ad
Binary file not shown.
Binary file added apis/python/anndata/subset-soma-02.h5ad
Binary file not shown.
Binary file added apis/python/anndata/subset-soma-03.h5ad
Binary file not shown.
Binary file added apis/python/anndata/subset-soma-04.h5ad
Binary file not shown.
File renamed without changes.
47 changes: 47 additions & 0 deletions apis/python/tools/splitter
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env python

"""
Given one of the outputs from cartorapher.py, splits it into a few little pieces for quick-iteration purposes.
"""

import tiledbsc
import tiledbsc.io

import numpy as np
import os, shutil

# ----------------------------------------------------------------
def write_subset(soma, obs_indices, output_path):
if os.path.exists(output_path):
shutil.rmtree(output_path)
subset_soma = tiledbsc.SOMA(output_path)
subset_soma.create_unless_exists()

obs_ids = list(soma.obs.df().index[obs_indices])
var_ids = list(soma.var.df().index)

subset_obs = soma.obs.df(obs_ids)
subset_var = soma.var.df()
subset_X_data = soma.X.data.csr(obs_ids, None)

subset_obs["is_primary_data"] = np.asarray([True] * len(obs_ids))

subset_soma.obs.from_dataframe(subset_obs, extent=2048)
subset_soma.var.from_dataframe(subset_var, extent=2048)

print("S OBS", subset_soma.obs.shape())
print("S VAR", subset_soma.var.shape())
print("S XDA", subset_X_data.shape)

subset_soma.X.add_layer_from_matrix_and_dim_values(subset_X_data, obs_ids, var_ids)

tiledbsc.io.to_h5ad(subset_soma, output_path + ".h5ad")


# ----------------------------------------------------------------
input_soma = tiledbsc.SOMA("atlas/4056cbab-2a32-4c9e-a55f-c930bc793fb6")

write_subset(input_soma, range(0, 100), "subset-soma-01")
write_subset(input_soma, range(100, 200), "subset-soma-02")
write_subset(input_soma, range(200, 300), "subset-soma-03")
write_subset(input_soma, range(300, 400), "subset-soma-04")

0 comments on commit 30ad846

Please sign in to comment.