-
Notifications
You must be signed in to change notification settings - Fork 4
/
slideseqv1_analysis_py.py
156 lines (121 loc) · 4.45 KB
/
slideseqv1_analysis_py.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# %%
# %%
import os
import numpy as np
import warnings
warnings.filterwarnings("ignore")
import scanpy as sc
sc.logging.print_header()
sc.set_figure_params(facecolor="white", figsize=(8, 8))
sc.settings.verbosity = 1 # errors (0), warnings (1), info (2), hints (3)
import random
# Note that BANKSY itself is deterministic, here the seeds affect the umap clusters and leiden partition
seed = 0
np.random.seed(seed)
random.seed(seed)
# %%
# Define File paths
file_path = os.path.join("data", "slide_seq", "v1")
gcm_filename = "Cerebellum_MappedDGEForR.csv"
# (Optional) Arguments for load_data only if annadata is not present
locations_filename = "Cerebellum_BeadLocationsForR.csv"
adata_filename = "slideseqv1_cerebellum_adataraw.h5ad"
# %%
from banksy_utils.load_data import load_adata
# To either load data from .h5ad directly or convert raw data to .h5ad format
load_adata_directly = True
# Keys to specify coordinate indexes in the anndata Object
coord_keys = ('xcoord', 'ycoord', 'coord_xy')
raw_y, raw_x, adata = load_adata(file_path,
load_adata_directly,
adata_filename,
gcm_filename,
locations_filename,
coord_keys)
# %%
adata.var_names_make_unique()
adata.var["mt"] = adata.var_names.str.startswith("MT-")
# Calulates QC metrics and put them in place to the adata object
sc.pp.calculate_qc_metrics(adata,
qc_vars=["mt"],
log1p=True,
inplace=True)
# %%
from banksy_utils.filter_utils import filter_cells
# Filter cells with each respective filters
adata = filter_cells(adata,
min_count=40,
max_count=1000,
MT_filter=20,
gene_filter=10)
# %%
from banksy_utils.filter_utils import normalize_total, filter_hvg
# Normalizes the anndata dataset
adata = normalize_total(adata)
# %%
adata, adata_allgenes = filter_hvg(adata,
n_top_genes = 2000,
flavor="seurat")
# %%
from banksy.main import median_dist_to_nearest_neighbour
# set params
# ==========
plot_graph_weights = True
k_geom = 15 # only for fixed type
max_m = 1 # azumithal transform up to kth order
nbr_weight_decay = "scaled_gaussian" # can also be "reciprocal", "uniform" or "ranked"
# Find median distance to closest neighbours, the median distance will be `sigma`
nbrs = median_dist_to_nearest_neighbour(adata, key = coord_keys[2])
from banksy.initialize_banksy import initialize_banksy
banksy_dict = initialize_banksy(adata,
coord_keys,
k_geom,
nbr_weight_decay = nbr_weight_decay,
max_m = max_m,
plt_edge_hist = True,
plt_nbr_weights = True,
plt_agf_angles = False
)
from banksy.main import concatenate_all
from banksy.embed_banksy import generate_banksy_matrix
# The following are the main hyperparameters for BANKSY
resolutions = [0.7] # clustering resolution for UMAP
pca_dims = [20] # Dimensionality in which PCA reduces to
lambda_list = [0.2] # list of lambda parameters
banksy_dict, banksy_matrix = generate_banksy_matrix(adata,
banksy_dict,
lambda_list,
max_m)
banksy_dict["nonspatial"] = {
# Here we simply append the nonspatial matrix (adata.X) to obtain the nonspatial clustering results
0.0: {"adata": concatenate_all([adata.X], 0, adata=adata), }
}
print(banksy_dict['nonspatial'][0.0]['adata'])
from banksy_utils.umap_pca import pca_umap
pca_umap(banksy_dict,
pca_dims = pca_dims,
add_umap = True
)
from banksy.cluster_methods import run_Leiden_partition
results_df, max_num_labels = run_Leiden_partition(
banksy_dict,
resolutions,
num_nn = 50,
num_iterations = -1,
partition_seed = 1234,
match_labels = True,
)
from banksy.plot_banksy import plot_results
c_map = 'tab20' # specify color map
weights_graph = banksy_dict['scaled_gaussian']['weights'][1]
plot_results(
results_df,
weights_graph,
c_map,
match_labels = True,
coord_keys = coord_keys,
max_num_labels = max_num_labels,
save_path = os.path.join(file_path, 'tmp_png'),
save_fig = False, # Save Spatial Plot Only
save_fullfig = True # Save Full Plot
)