Skip to content

Commit

Permalink
Apply formatting with Ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
akikuno committed Jun 15, 2024
1 parent 1f3a981 commit aec9b69
Show file tree
Hide file tree
Showing 55 changed files with 291 additions and 230 deletions.
6 changes: 5 additions & 1 deletion docs/RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,18 @@

## 💥 Breaking

+ Enable to accept FASTA files as an input #37 [[Commit Detail](https://github.com/akikuno/DAJIN2/commit/ee6d392cd51649c928bd604acafbab4b9d28feb1)]
+ Enable to accept additional file formats as an input #37
+ FASTA [[Commit Detail](https://github.com/akikuno/DAJIN2/commit/ee6d392cd51649c928bd604acafbab4b9d28feb1)]
+ BAM [[Commit Detail](https://github.com/akikuno/DAJIN2/commit/xxx)]

## 🔧 Maintenance

+ Specify the Python version to be between 3.8 and 3.10. [[Commit Detail](https://github.com/akikuno/DAJIN2/commit/5fae947eff7da0f7e1ed5e4ff3f95c911fd9f646)]

+ Change `mutation_exporter.report_mutations` to return list[list[str]]. Update the tests accordingly. [[Commit Detail](https://github.com/akikuno/DAJIN2/commit/7153cb143d621e136ca94bfe6b391f1d7b61d438)]

+ Apply formatting with Ruff [[Commit Detail](https://github.com/akikuno/DAJIN2/commit/xxx)]

## 🐛 Bug Fixes

+ Add `reallocate_insertion_within_deletion` into `report.mutation_exporter` and reflected it in the mutation info. [[Commit Detail](https://github.com/akikuno/DAJIN2/commit/ed6a96e01bb40c77df9cd3a17a4c29524684b6f1)]
Expand Down
38 changes: 38 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,41 @@ ruptures = ">=1.1.8"
[tool.poetry.scripts]
DAJIN2 = "DAJIN2.main:execute"


[tool.ruff]
line-length = 119

[tool.ruff.lint]
select = [
"E", # pycodestyle errors
"W", # pycodestyle warnings
"F", # pyflakes
"I", # isort
"B", # flake8-bugbear
"C4", # flake8-comprehensions
"UP", # pyupgrade
]
ignore = [
"E501", # line too long, handled by black
"B008", # do not perform function calls in argument defaults
"C901", # too complex
"W191", # indentation contains tabs
"B904", # ignore errors for raise ... from ... not being used
]

[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401"]

[tool.ruff.lint.isort]
combine-as-imports = true
known-first-party = ["musubi_restapi"]
section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"]
split-on-trailing-comma = true

[tool.ruff.format]
quote-style = "double"


[tool.ruff.lint.pyupgrade]
# Settings for Python 3.8 compatibility
keep-runtime-typing = true
3 changes: 1 addition & 2 deletions src/DAJIN2/core/classification/allele_merger.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from __future__ import annotations

from itertools import groupby
from collections import defaultdict

from itertools import groupby

##########################################################
# merge minor alleles
Expand Down
4 changes: 2 additions & 2 deletions src/DAJIN2/core/classification/classifier.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from __future__ import annotations

from pathlib import Path
from itertools import groupby
from pathlib import Path

from DAJIN2.utils import io
from DAJIN2.core.classification.allele_merger import merge_minor_alleles
from DAJIN2.utils import io


def calc_match(cssplit: str) -> float:
Expand Down
2 changes: 1 addition & 1 deletion src/DAJIN2/core/clustering/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from DAJIN2.core.clustering.appender import add_labels, add_percent, add_readnum
from DAJIN2.core.clustering.label_extractor import extract_labels
from DAJIN2.core.clustering.appender import add_labels, add_readnum, add_percent
from DAJIN2.core.clustering.label_updator import update_labels
8 changes: 4 additions & 4 deletions src/DAJIN2/core/clustering/clustering.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
from __future__ import annotations

from pathlib import Path
from itertools import chain
from collections import Counter
from itertools import chain
from pathlib import Path

import numpy as np
from scipy.sparse import csr_matrix, spmatrix
from sklearn import metrics
from sklearn.cluster import BisectingKMeans
from scipy.sparse import csr_matrix, spmatrix

from DAJIN2.utils import io, config
from DAJIN2.core.clustering.label_merger import merge_labels
from DAJIN2.core.clustering.score_handler import subset_scores
from DAJIN2.core.clustering.strand_bias_handler import remove_biased_clusters
from DAJIN2.utils import config, io

config.set_warnings_ignore()

Expand Down
9 changes: 4 additions & 5 deletions src/DAJIN2/core/clustering/label_extractor.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
from __future__ import annotations

import uuid

from pathlib import Path
from itertools import groupby
from pathlib import Path

from DAJIN2.utils import io
from DAJIN2.core.clustering.score_handler import make_score, annotate_score
from DAJIN2.core.clustering.clustering import return_labels
from DAJIN2.core.clustering.label_updator import relabel_with_consective_order
from DAJIN2.core.clustering.score_handler import annotate_score, make_score
from DAJIN2.core.clustering.strand_bias_handler import is_strand_bias
from DAJIN2.core.clustering.clustering import return_labels
from DAJIN2.utils import io


def extract_labels(classif_sample, TEMPDIR, SAMPLE_NAME, CONTROL_NAME) -> list[dict[str]]:
Expand Down
1 change: 1 addition & 0 deletions src/DAJIN2/core/clustering/label_merger.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from collections import Counter

import numpy as np


Expand Down
4 changes: 2 additions & 2 deletions src/DAJIN2/core/clustering/score_handler.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from __future__ import annotations

from typing import Generator
from itertools import groupby
from collections import Counter
from itertools import groupby
from typing import Generator

from DAJIN2.core.clustering.kmer_generator import generate_mutation_kmers

Expand Down
15 changes: 8 additions & 7 deletions src/DAJIN2/core/clustering/strand_bias_handler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
from __future__ import annotations

from collections import defaultdict
from pathlib import Path
from typing import Generator

from sklearn.tree import DecisionTreeClassifier

from DAJIN2.utils import io

"""
Nanopore sequencing results often results in strand specific mutations even though the mutation is not strand specific, thus they are considered as sequencing errors and should be removed.
Expand All @@ -8,13 +16,6 @@
Re-allocates reads belonging to clusters with strand bias to clusters without strand bias.
"""

from pathlib import Path
from typing import Generator
from collections import defaultdict
from sklearn.tree import DecisionTreeClassifier

from DAJIN2.utils import io

# Constants
STRAND_BIAS_LOWER_LIMIT = 0.1
STRAND_BIAS_UPPER_LIMIT = 0.9
Expand Down
6 changes: 2 additions & 4 deletions src/DAJIN2/core/consensus/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
from DAJIN2.core.consensus.clust_formatter import downsample_by_label, remove_minor_alleles
from DAJIN2.core.consensus.consensus import call_consensus
from DAJIN2.core.consensus.name_handler import call_allele_name
from DAJIN2.core.consensus.name_handler import update_key_by_allele_name
from DAJIN2.core.consensus.name_handler import add_key_by_allele_name
from DAJIN2.core.consensus.clust_formatter import remove_minor_alleles, downsample_by_label
from DAJIN2.core.consensus.mutation_extractor import cache_mutation_loci
from DAJIN2.core.consensus.name_handler import add_key_by_allele_name, call_allele_name, update_key_by_allele_name
2 changes: 1 addition & 1 deletion src/DAJIN2/core/consensus/clust_formatter.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from __future__ import annotations

import random
from itertools import groupby
from collections import defaultdict
from itertools import groupby


def remove_minor_alleles(clust_sample: list[dict]) -> list[dict]:
Expand Down
10 changes: 4 additions & 6 deletions src/DAJIN2/core/consensus/consensus.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
from __future__ import annotations

from pathlib import Path
from collections import defaultdict
from dataclasses import dataclass
from itertools import groupby
from collections import defaultdict
from pathlib import Path

from DAJIN2.utils import io
from DAJIN2.utils.cssplits_handler import call_sequence


###########################################################
# call position weight matrix (cons_pergentage)
###########################################################
Expand Down Expand Up @@ -98,11 +97,10 @@ class ConsensusKey:


def call_consensus(tempdir: Path, sample_name: str, clust_sample: list[dict]) -> tuple[dict[list], dict[str]]:

clust_sample.sort(key=lambda x: [x["ALLELE"], x["LABEL"]])

cons_percentages = dict()
cons_sequences = dict()
cons_percentages = {}
cons_sequences = {}

for (allele, label), group in groupby(clust_sample, key=lambda x: [x["ALLELE"], x["LABEL"]]):
clust = list(group)
Expand Down
8 changes: 4 additions & 4 deletions src/DAJIN2/core/consensus/mutation_extractor.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from __future__ import annotations

from pathlib import Path
from itertools import groupby
from pathlib import Path

import numpy as np
from sklearn.cluster import MiniBatchKMeans

from DAJIN2.utils import io
from DAJIN2.core.preprocess.mutation_extractor import summarize_indels, extract_mutation_loci, minimize_mutation_counts
from DAJIN2.core.consensus.similarity_searcher import cache_selected_control_by_similarity
from DAJIN2.core.preprocess.mutation_extractor import extract_mutation_loci, minimize_mutation_counts, summarize_indels
from DAJIN2.utils import io

"""
Most of the code reuses `preprocess.cache_mutation_loci`.
Expand All @@ -25,7 +25,7 @@ def get_thresholds(path_indels_normalized_sample, path_indels_normalized_control
indels_normalized_sample = io.load_pickle(path_indels_normalized_sample)
indels_normalized_control = io.load_pickle(path_indels_normalized_control)
indels_normalized_minimize_control = minimize_mutation_counts(indels_normalized_control, indels_normalized_sample)
thresholds = dict()
thresholds = {}
for mut in {"+", "-", "*"}:
values_sample = indels_normalized_sample[mut]
values_control = indels_normalized_minimize_control[mut]
Expand Down
3 changes: 2 additions & 1 deletion src/DAJIN2/core/consensus/name_handler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import re

from DAJIN2.core.consensus.consensus import ConsensusKey


Expand Down Expand Up @@ -69,7 +70,7 @@ def call_allele_name(


def update_key_by_allele_name(cons: dict, allele_names: dict[int, str]) -> dict:
cons_update = dict()
cons_update = {}
for key in cons:
old_allele = cons[key]
new_allele = allele_names[key.label]
Expand Down
11 changes: 5 additions & 6 deletions src/DAJIN2/core/consensus/similarity_searcher.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from __future__ import annotations

from pathlib import Path
from collections import defaultdict
from pathlib import Path

import numpy as np

from sklearn.neighbors import LocalOutlierFactor

from DAJIN2.utils import io
Expand All @@ -28,22 +27,22 @@ def onehot_by_mutations(midsv_sample: list[dict]) -> dict[str, np.ndarray]:
def calculate_percentage(
mut_onehot_sample: dict[str, np.ndarray], coverage_match: np.ndarray[int]
) -> dict[str, np.ndarray]:
mut_percentage = dict()
mut_percentage = {}
for mut, onehot in mut_onehot_sample.items():
x = np.sum(onehot, axis=0) / coverage_match
mut_percentage[mut] = np.where(np.isnan(x), 0, x)
return mut_percentage


def get_values_to_mask(mut_percentage_sample: dict[str, np.ndarray], threshold=0.5) -> dict[str, np.ndarray[float]]:
mask = dict()
mask = {}
for mut, percentage in mut_percentage_sample.items():
mask[mut] = np.where(percentage > threshold, 0, percentage)
return mask


def apply_mask(mut_onehot: dict[str, np.ndarray], mask_sample: dict[str, np.ndarray[float]]):
mut_onehot_masked = dict()
mut_onehot_masked = {}
for mut, onehot in mut_onehot.items():
mut_onehot_masked[mut] = onehot * mask_sample[mut]
return mut_onehot_masked
Expand All @@ -52,7 +51,7 @@ def apply_mask(mut_onehot: dict[str, np.ndarray], mask_sample: dict[str, np.ndar
def identify_normal_reads(
mut_onehot_sample_masked: dict[str, np.ndarray], mut_onehot_control_masked: dict[str, np.ndarray]
) -> list[bool]:
mutation_comparisons = dict()
mutation_comparisons = {}
for mut in {"+", "-", "*"}:
values_sample = mut_onehot_sample_masked[mut]
values_control = mut_onehot_control_masked[mut]
Expand Down
12 changes: 6 additions & 6 deletions src/DAJIN2/core/preprocess/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from DAJIN2.core.preprocess.cache_checker import exists_cached_hash, exists_cached_genome
from DAJIN2.core.preprocess.genome_fetcher import fetch_coordinates, fetch_chromosome_size
from DAJIN2.core.preprocess.mapping import generate_sam
from DAJIN2.core.preprocess.directory_manager import create_temporal_directories, create_report_directories
from DAJIN2.core.preprocess.cache_checker import exists_cached_genome, exists_cached_hash
from DAJIN2.core.preprocess.directory_manager import create_report_directories, create_temporal_directories
from DAJIN2.core.preprocess.genome_fetcher import fetch_chromosome_size, fetch_coordinates
from DAJIN2.core.preprocess.input_formatter import format_inputs
from DAJIN2.core.preprocess.midsv_caller import generate_midsv
from DAJIN2.core.preprocess.insertions_to_fasta import generate_insertion_fasta
from DAJIN2.core.preprocess.knockin_handler import extract_knockin_loci
from DAJIN2.core.preprocess.mapping import generate_sam
from DAJIN2.core.preprocess.midsv_caller import generate_midsv
from DAJIN2.core.preprocess.mutation_extractor import cache_mutation_loci
from DAJIN2.core.preprocess.insertions_to_fasta import generate_insertion_fasta
1 change: 1 addition & 0 deletions src/DAJIN2/core/preprocess/cache_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import hashlib
from pathlib import Path

from DAJIN2.utils import io


Expand Down
2 changes: 1 addition & 1 deletion src/DAJIN2/core/preprocess/homopolymer_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def extract_sequence_errors_in_homopolymer_loci(
indels_normalized_control: dict[str, np.array],
anomal_loci: dict[set],
) -> dict[str, set[int]]:
sequence_errors_in_homopolymer = dict()
sequence_errors_in_homopolymer = {}
for mut in ["+", "-", "*"]:
repeat_regions = get_repeat_regions(sequence, anomal_loci[mut])
if len(repeat_regions) == 0:
Expand Down
12 changes: 6 additions & 6 deletions src/DAJIN2/core/preprocess/input_formatter.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
from __future__ import annotations

import uuid

from pathlib import Path
from dataclasses import dataclass
from collections import defaultdict

from DAJIN2.utils import io, config, fastx_handler
from dataclasses import dataclass
from pathlib import Path

from DAJIN2.core import preprocess
from DAJIN2.utils import config, fastx_handler, io


def parse_arguments(arguments: dict) -> tuple:
Expand Down Expand Up @@ -64,7 +62,9 @@ def get_genome_coordinates(genome_urls: dict, fasta_alleles: dict, is_cache_geno
if is_cache_genome:
genome_coordinates = next(io.read_jsonl(Path(tempdir, "cache", "genome_coordinates.jsonl")))
else:
genome_coordinates = preprocess.fetch_coordinates(genome_coordinates, genome_urls, fasta_alleles["control"])
genome_coordinates = preprocess.fetch_coordinates(
genome_coordinates, genome_urls, fasta_alleles["control"]
)
genome_coordinates["chrom_size"] = preprocess.fetch_chromosome_size(genome_coordinates, genome_urls)
io.write_jsonl([genome_coordinates], Path(tempdir, "cache", "genome_coordinates.jsonl"))

Expand Down
Loading

0 comments on commit aec9b69

Please sign in to comment.