Skip to content

Commit

Permalink
Debug clustering.merge_labels to be able to correctly revert minor …
Browse files Browse the repository at this point in the history
…labels back to parent labels.
  • Loading branch information
akikuno committed Jan 13, 2024
1 parent 5899543 commit 8127a94
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 4 deletions.
7 changes: 7 additions & 0 deletions docs/ROADMAP.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,15 @@
+ ReferenceのアレルをFASTA/HTMLディレクトリに保存する
-->

# v0.3.6

## 📝 Documentation
## 🚀 Features
+ To facilitate the discrimination of Insertion alleles, the Reference for Insertion alleles has been saved in FASTA/HTML directory. [Commit Detail](https://github.com/akikuno/DAJIN2/commit/5899543077f0398863b6316d8c3e953b5f125f55)

## 🐛 Bug Fixes
## 🔧 Maintenance
## ⛔️ Deprecated

-------------

Expand Down
22 changes: 18 additions & 4 deletions src/DAJIN2/core/clustering/label_merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,21 @@ def map_clusters_to_previous(labels_sample: list[int], labels_previous: list[int
return correspondence


def get_update_required_labels(correspondence: dict) -> dict:
update_required_labels = {}
for key, value in correspondence.items():
if value not in update_required_labels:
update_required_labels[value] = {key}
else:
update_required_labels[value].add(key)
return update_required_labels


def merge_minor_cluster(
labels_sample: list[int],
labels_previous: list[int],
threshold_percentage: float = 0.5,
threshold_readnumber: int = 10,
threshold_readnumber: int = 5,
) -> list[int]:
"""Merge labels in sample if they appear less than 'threshold' percentage."""

Expand All @@ -55,8 +65,12 @@ def merge_minor_cluster(
minor_labels = minor_labels_percentage | minor_labels_readnumber

correspondence = map_clusters_to_previous(labels_sample, labels_previous)
update_required_labels = get_update_required_labels(correspondence)

labels_merged = [correspondence[label] if label in minor_labels else label for label in labels_sample]
labels_merged = labels_sample.copy()
for m in minor_labels:
new_label = max(labels_merged) + 1
labels_merged = [new_label if label in update_required_labels[correspondence[m]] else label for label in labels_merged]

return labels_merged

Expand All @@ -67,8 +81,8 @@ def merge_minor_cluster(


def merge_labels(labels_control: list[int], labels_sample: list[int], labels_previous: list[int]) -> list[int]:
labels_merged = merge_mixed_cluster(labels_control, labels_sample)
labels_merged = merge_minor_cluster(
labels_merged, labels_previous, threshold_percentage=0.5, threshold_readnumber=10
labels_sample, labels_previous, threshold_percentage=0.5, threshold_readnumber=10
)
labels_merged = merge_mixed_cluster(labels_control, labels_merged)
return labels_merged

0 comments on commit 8127a94

Please sign in to comment.