Skip to content

Commit

Permalink
Fixes #240: convert alt case/control labels to Unknown
Browse files Browse the repository at this point in the history
  • Loading branch information
jaamarks committed Feb 29, 2024
1 parent 8364c04 commit 0428f25
Showing 1 changed file with 14 additions and 0 deletions.
14 changes: 14 additions & 0 deletions src/cgr_gwas_qc/reporting/qc_exclusions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,15 @@ def sample_exclusion_counts(data: DataFrameOrFile) -> pd.DataFrame:

return _create_sample_exclusion_counts_table(df)

def _replace_unknown_labels(df):
known_cc_labels = ["Unknown", "Case", "Control", "QC"] # must be one of these labels

# Use boolean indexing to select rows where "case_control" isn't in known_cc_labels
is_unknown = ~df["case_control"].isin(known_cc_labels)
# Update the "case_control" column with "Unknown" for these rows
df.loc[is_unknown, "case_control"] = "Unknown"
return df


def _create_sample_exclusion_counts_table(df: pd.DataFrame) -> pd.DataFrame:
row_names = {
Expand All @@ -62,6 +71,8 @@ def _create_sample_exclusion_counts_table(df: pd.DataFrame) -> pd.DataFrame:
"total": "All Samples", # added below
}

df = _replace_unknown_labels(df.copy())

return (
df.assign(is_array_processing_failure=lambda x: x.is_missing_idats | x.is_missing_gtc)
.assign(samples_remaining=lambda x: ~x.analytic_exclusion & ~x.is_internal_control)
Expand All @@ -77,6 +88,7 @@ def _create_sample_exclusion_counts_table(df: pd.DataFrame) -> pd.DataFrame:
.groupby("case_control")
.sum() # Count the number True
.pipe(_clean_up_counts_table, row_names=row_names, col_names=col_names)

)


Expand Down Expand Up @@ -113,6 +125,8 @@ def _create_subject_exclusion_counts_table(df: pd.DataFrame) -> pd.DataFrame:
"total": "All Subjects",
}

df = _replace_unknown_labels(df.copy())

return (
df.reindex(["case_control", *row_names.keys()], axis=1)
.groupby("case_control")
Expand Down

0 comments on commit 0428f25

Please sign in to comment.