Skip to content

Commit

Permalink
fix duplicate rownames introduced in cellphonedb v5 (#48)
Browse files Browse the repository at this point in the history
* related to ktplots#89

* Update plot_cpdb_chord.py

* Update plot_cpdb_chord.py

* Update support.py

* Update plot_cpdb_chord.py

* Update plot_cpdb_chord.py

* Update plot_cpdb_chord.py

* Update plot_cpdb_chord.py

* Update plot_cpdb_chord.py

* Update plot_cpdb_chord.py

* Update pyproject.toml

* add a new option to toggle whether to keep the id_cp_interaction value when plotting

* update notebooks
  • Loading branch information
zktuong authored Dec 6, 2023
1 parent 797548d commit dac474c
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 83 deletions.
131 changes: 95 additions & 36 deletions docs/notebooks/tutorial.ipynb

Large diffs are not rendered by default.

24 changes: 12 additions & 12 deletions docs/notebooks/tutorial_v5.ipynb

Large diffs are not rendered by default.

10 changes: 9 additions & 1 deletion ktplotspy/plot/plot_cpdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ def plot_cpdb(
scale_alpha_by_interaction_scores: bool = False,
scale_alpha_by_cellsign: bool = False,
filter_by_cellsign: bool = False,
keep_id_cp_interaction: bool = False,
) -> Union[ggplot, pd.DataFrame]:
"""Plotting CellPhoneDB results as a dot plot.
Expand Down Expand Up @@ -157,6 +158,8 @@ def plot_cpdb(
Whether or not to filter the transparency of interactions by the cellsign.
filter_by_cellsign: bool, optional
Filter out interactions with a 0 value cellsign.
keep_id_cp_interaction: bool, optional
Whether to keep the original `id_cp_interaction` value when plotting.
Returns
-------
Union[ggplot, pd.DataFrame]
Expand Down Expand Up @@ -192,7 +195,7 @@ def plot_cpdb(
tmp = means_mat.melt(id_vars=means_mat.columns[:col_start])
direc, classif, is_int = {}, {}, {}
for _, r in tmp.iterrows():
key = r.interacting_pair.replace("_", "-") + DEFAULT_SEP * 3 + r.variable
key = r.id_cp_interaction + DEFAULT_SEP * 3 + r.interacting_pair.replace("_", "-") + DEFAULT_SEP * 3 + r.variable
direc[key] = r.directionality
classif[key] = r.classification
is_int[key] = r.is_integrin
Expand Down Expand Up @@ -384,6 +387,11 @@ def plot_cpdb(
if return_table:
return df
else:
# change the labelling of interaction_group
if keep_id_cp_interaction:
df.interaction_group = [re.sub(DEFAULT_SEP * 3, "_", c) for c in df.interaction_group]
else:
df.interaction_group = [c.split(DEFAULT_SEP * 3)[1] for c in df.interaction_group]
# set global figure size
options.figure_size = figsize

Expand Down
16 changes: 11 additions & 5 deletions ktplotspy/plot/plot_cpdb_chord.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,13 +115,18 @@ def plot_cpdb_chord(
# do some name wrangling
subset_clusters = list(set(flatten([x.split("-") for x in lr_interactions.celltype_group])))
adata_subset = adata[adata.obs[celltype_key].isin(subset_clusters)].copy()
interactions = means[["interacting_pair", "gene_a", "gene_b", "partner_a", "partner_b", "receptor_a", "receptor_b"]].copy()
interactions["converted"] = [re.sub("-", " ", x) for x in interactions.interacting_pair]
interactions["converted"] = [re.sub("_", "-", x) for x in interactions.interacting_pair]
interactions = means[
["id_cp_interaction", "interacting_pair", "gene_a", "gene_b", "partner_a", "partner_b", "receptor_a", "receptor_b"]
].copy()
interactions["use_interaction_name"] = [
x + DEFAULT_SEP * 3 + y for x, y in zip(interactions.id_cp_interaction, interactions.interacting_pair)
]
# interactions["converted"] = [re.sub("-", " ", x) for x in interactions.use_interaction_name]
interactions["converted"] = [re.sub("_", "-", x) for x in interactions.use_interaction_name]
lr_interactions["barcode"] = [a + DEFAULT_SEP + b for a, b in zip(lr_interactions.celltype_group, lr_interactions.interaction_group)]
interactions_subset = interactions[interactions["converted"].isin(list(lr_interactions.interaction_group))].copy()
# handle complexes gently
tm0 = {kx: rx.split("_") for kx, rx in interactions_subset.interacting_pair.items()}
tm0 = {kx: rx.split("_") for kx, rx in interactions_subset.use_interaction_name.items()}
if any([len(x) > 2 for x in tm0.values()]):
complex_id, simple_id = [], []
for i, j in tm0.items():
Expand Down Expand Up @@ -156,6 +161,7 @@ def plot_cpdb_chord(
else:
tm0 = pd.DataFrame(tm0).T
tm0.columns = ["id_a", "id_b"]
tm0.id_a = [x.split(DEFAULT_SEP * 3)[1] for x in tm0.id_a]
interactions_subset = pd.concat([interactions_subset, tm0], axis=1)

# keep only useful genes
Expand Down Expand Up @@ -275,7 +281,7 @@ def plot_cpdb_chord(
end_size = 1 if end_size < 1 else end_size
source = (j["producer"], j["start"] - 1, start_size, raxis_range[0] - size)
destination = (j["receiver"], j["end"] - 1, end_size, raxis_range[0] - size)
circle.chord_plot(source, destination, edge_col_dict[lr])
circle.chord_plot(source, destination, edge_col_dict[lr] if lr in edge_col_dict else "#f7f7f700")

custom_lines = [Line2D([0], [0], color=val, lw=4) for val in edge_col_dict.values()]
circle.figure.legend(custom_lines, edge_col_dict.keys(), **legend_params)
Expand Down
68 changes: 40 additions & 28 deletions ktplotspy/utils/support.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,40 +233,40 @@ def prep_table(data: pd.DataFrame) -> pd.DataFrame:
Table ready for further analysis.
"""
dat = data.copy()
dat.index = make_unique(dat.interacting_pair)
dat.index = [x + DEFAULT_SEP * 3 + y for x, y in zip(dat.id_cp_interaction, dat.interacting_pair)]
dat.columns = [re.sub("\\|", DEFAULT_SEP, col) for col in dat.columns]
dat.index = [re.sub("_", "-", row) for row in dat.index]
dat.index = [re.sub("[.]", " ", row) for row in dat.index]

return dat


def make_unique(seq: pd.Series) -> List:
"""Make unique names.
Parameters
----------
seq : pd.Series
Series to convert to unique.
Returns
-------
List
List of unique names.
"""
seq = list(seq)
not_unique = [k for k, v in Counter(seq).items() if v > 1] # so we have: ['name', 'zip']
# suffix generator dict - e.g., {'name': <my_gen>, 'zip': <my_gen>}
suff_gens = dict(zip(not_unique, tee(count(1), len(not_unique))))
for idx, s in enumerate(seq):
try:
suffix = "_" + str(next(suff_gens[s]))
except KeyError:
# s was unique
continue
else:
seq[idx] += suffix
return seq
# def make_unique(seq: pd.Series) -> List:
# """Make unique names.

# Parameters
# ----------
# seq : pd.Series
# Series to convert to unique.

# Returns
# -------
# List
# List of unique names.
# """
# seq = list(seq)
# not_unique = [k for k, v in Counter(seq).items() if v > 1] # so we have: ['name', 'zip']
# # suffix generator dict - e.g., {'name': <my_gen>, 'zip': <my_gen>}
# suff_gens = dict(zip(not_unique, tee(count(1), len(not_unique))))
# for idx, s in enumerate(seq):
# try:
# suffix = "_" + str(next(suff_gens[s]))
# except KeyError:
# # s was unique
# continue
# else:
# seq[idx] += suffix
# return seq


def sub_pattern(cell_type: str, pattern: str) -> str:
Expand Down Expand Up @@ -663,20 +663,23 @@ def generate_df(
out = []
for _, (px, rx) in cell_type_grid.iterrows():
for _, (
ici,
ip,
ga,
gb,
pa,
pb,
ra,
rb,
ui,
cp,
ia,
ib,
) in interactions_subset.iterrows():
if ra:
if rb:
_out = [
ici,
ia,
ib,
ra,
Expand All @@ -692,6 +695,7 @@ def generate_df(
]
else:
_out = [
ici,
ia,
ib,
ra,
Expand All @@ -708,6 +712,7 @@ def generate_df(
else:
if rb:
_out = [
ici,
ia,
ib,
ra,
Expand All @@ -723,6 +728,7 @@ def generate_df(
]
else: # pragma: no cover
_out = [
ici,
ia,
ib,
ra,
Expand All @@ -740,6 +746,7 @@ def generate_df(
pd.DataFrame(
_out,
index=[
"id_cp_interaction",
"ligand",
"receptor",
"receptor_a",
Expand All @@ -763,13 +770,14 @@ def generate_df(
_df = _df.reset_index(drop=True)
for i, j in _df.iterrows():
if (j["receptor_b"]) and not (j["receptor_a"]):
lg, rc = j["receptor"], j["ligand"]
ici, lg, rc = j["id_cp_interaction"], j["receptor"], j["ligand"]
con_pair = lg + "-" + rc
ra, rb = j["receptor_b"], j["receptor_a"]
px, rx = j["receiver"], j["producer"]
pre, prf = j["receiver_expression"], j["receiver_fraction"]
rce, rcf = j["producer_expression"], j["producer_fraction"]
tos, frs = j["from"], j["to"]
_df.at[i, "id_cp_interaction"] = ici
_df.at[i, "ligand"] = lg
_df.at[i, "receptor"] = rc
_df.at[i, "converted_pair"] = con_pair
Expand All @@ -783,4 +791,8 @@ def generate_df(
_df.at[i, "receiver_fraction"] = rcf
_df.at[i, "from"] = frs
_df.at[i, "to"] = tos
else:
ici, lg, rc = j["id_cp_interaction"], j["ligand"], j["receptor"]
con_pair = rc + "-" + lg
_df.at[i, "converted_pair"] = con_pair
return _df
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "ktplotspy"
version = "0.2.0"
version = "0.2.1"
description = "Python library for plotting Cellphonedb results. Ported from ktplots R package."
authors = ["Kelvin Tuong <[email protected]>"]
license = "MIT"
Expand Down
16 changes: 16 additions & 0 deletions tests/test_plot_cpdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,22 @@ def test_plot_cpdb(mock_show, adata, means, pvals):
g


@patch("matplotlib.pyplot.show")
@pytest.mark.usefixtures("adata", "means", "pvals")
def test_plot_cpdb_keep_id(mock_show, adata, means, pvals):
g = plot_cpdb(
adata=adata,
cell_type1="B cell",
cell_type2="CD4T cell",
means=means,
pvals=pvals,
celltype_key="celltype",
genes=["CXCL13", "CD274", "CXCR5"],
keep_id_cp_interaction=True,
)
g


@patch("matplotlib.pyplot.show")
@pytest.mark.usefixtures("adata", "means", "pvals")
def test_plot_cpdb_title(mock_show, adata, means, pvals):
Expand Down

0 comments on commit dac474c

Please sign in to comment.