Skip to content

Commit

Permalink
Copy label as default value for CL_Label when missing
Browse files Browse the repository at this point in the history
  • Loading branch information
axdanbol committed Feb 14, 2024
1 parent bda8bdc commit 86e1317
Showing 1 changed file with 25 additions and 0 deletions.
25 changes: 25 additions & 0 deletions containers/crosswalking/context/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,18 +61,24 @@ def crosswalk(
table_organ_level_column: str,
table_label_column: str,
table_clid_column: str,
table_clid_label_column: str,
table_match_column: str,
) -> anndata.AnnData:
"""Crosswalks the data adding CLIDs and match types using a crosswalk table.
Args:
matrix (anndata.AnnData): Data to crosswalk
organ_id (str): Organ id
organ_label (str): Organ level
data_label_column (str): Column used to match against the table
data_clid_column (str): Column to store CLIDs in
data_match_column (str): Column to store match type in
table (pd.DataFrame): Crosswalk table
table_organ_id_column (str): Column storing organ uberon ids
table_organ_lavel_column (str): Column storing organ levels
table_label_column (str): Column used to match against the data
table_clid_column (str): Column storing CLIDs
table_clid_column (str): Column storing CL labels
table_match_column (str): Column storing match type
Returns:
Expand Down Expand Up @@ -101,6 +107,7 @@ def crosswalk(

_set_default_clid(merged_obs, data_clid_column, data_label_column)
_set_default_match(merged_obs, data_match_column)
_set_default_clid(merged_obs, table_clid_label_column, data_label_column)

result = matrix.copy()
result.obs = merged_obs
Expand All @@ -121,6 +128,17 @@ def _set_default_clid(obs: pd.DataFrame, clid_column: str, label_column: str) ->
obs.loc[obs[clid_column].isna(), clid_column] = defaults


def _set_default_clid(obs: pd.DataFrame, clid_label_column: str, label_column: str) -> None:
"""Adds default CL labels to rows that did not match against the crosswalk table.
Args:
obs (pd.DataFrame): Data rows
clid_label_column (str): Column to check and update with default CL labels
label_column (str): Column with defaults
"""
obs.loc[obs[clid_label_column].isna(), clid_label_column] = obs[label_column]


def _set_default_match(obs: pd.DataFrame, column: str) -> None:
"""Adds default match type to rows that did not match against the crosswalk table.
Expand Down Expand Up @@ -158,6 +176,7 @@ def _get_empty_table(args: argparse.Namespace) -> pd.DataFrame:
args.crosswalk_table_organ_level_column,
args.crosswalk_table_label_column,
args.crosswalk_table_clid_column,
args.crosswalk_table_clid_label_column,
args.crosswalk_table_match_column,
]
)
Expand Down Expand Up @@ -208,6 +227,7 @@ def main(args: argparse.Namespace):
args.crosswalk_table_organ_level_column,
args.crosswalk_table_label_column,
args.crosswalk_table_clid_column,
args.crosswalk_table_clid_label_column,
args.crosswalk_table_match_column,
)
matrix.write_h5ad(args.output_matrix)
Expand Down Expand Up @@ -250,6 +270,11 @@ def _get_arg_parser() -> argparse.ArgumentParser:
default="CL_ID",
help="Column with CL ID in crosswalking table",
)
parser.add_argument(
"--crosswalk-table-clid-label-column",
default="CL_Label",
help="Column with CL label in crosswalking table",
)
parser.add_argument(
"--crosswalk-table-match-column",
default="CL_Match",
Expand Down

0 comments on commit 86e1317

Please sign in to comment.