Apply formatting with Ruff

akikuno · Jun 15, 2024 · aec9b69 · aec9b69
1 parent 1f3a981
commit aec9b69
Show file tree

Hide file tree

Showing 55 changed files with 291 additions and 230 deletions.
diff --git a/docs/RELEASE.md b/docs/RELEASE.md
@@ -17,14 +17,18 @@
 
 ## 💥 Breaking
 
-+ Enable to accept FASTA files as an input #37 [[Commit Detail](https://github.com/akikuno/DAJIN2/commit/ee6d392cd51649c928bd604acafbab4b9d28feb1)]
++ Enable to accept additional file formats as an input #37 
+  + FASTA [[Commit Detail](https://github.com/akikuno/DAJIN2/commit/ee6d392cd51649c928bd604acafbab4b9d28feb1)]
+  + BAM [[Commit Detail](https://github.com/akikuno/DAJIN2/commit/xxx)]
 
 ## 🔧 Maintenance
 
 + Specify the Python version to be between 3.8 and 3.10. [[Commit Detail](https://github.com/akikuno/DAJIN2/commit/5fae947eff7da0f7e1ed5e4ff3f95c911fd9f646)]
 
 + Change `mutation_exporter.report_mutations` to return list[list[str]]. Update the tests accordingly. [[Commit Detail](https://github.com/akikuno/DAJIN2/commit/7153cb143d621e136ca94bfe6b391f1d7b61d438)]
 
++ Apply formatting with Ruff [[Commit Detail](https://github.com/akikuno/DAJIN2/commit/xxx)]
+
 ## 🐛 Bug Fixes
 
 + Add `reallocate_insertion_within_deletion` into `report.mutation_exporter` and reflected it in the mutation info. [[Commit Detail](https://github.com/akikuno/DAJIN2/commit/ed6a96e01bb40c77df9cd3a17a4c29524684b6f1)]

diff --git a/pyproject.toml b/pyproject.toml
@@ -51,3 +51,41 @@ ruptures = ">=1.1.8"
 [tool.poetry.scripts]
 DAJIN2 = "DAJIN2.main:execute"
 
+
+[tool.ruff]
+line-length = 119
+
+[tool.ruff.lint]
+select = [
+    "E",  # pycodestyle errors
+    "W",  # pycodestyle warnings
+    "F",  # pyflakes
+    "I",  # isort
+    "B",  # flake8-bugbear
+    "C4",  # flake8-comprehensions
+    "UP",  # pyupgrade
+]
+ignore = [
+    "E501",  # line too long, handled by black
+    "B008",  # do not perform function calls in argument defaults
+    "C901",  # too complex
+    "W191",  # indentation contains tabs
+    "B904", # ignore errors for raise ... from ... not being used
+]
+
+[tool.ruff.lint.per-file-ignores]
+"__init__.py" = ["F401"]
+
+[tool.ruff.lint.isort]
+combine-as-imports = true
+known-first-party = ["musubi_restapi"]
+section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"]
+split-on-trailing-comma = true
+
+[tool.ruff.format]
+quote-style = "double"
+
+
+[tool.ruff.lint.pyupgrade]
+# Settings for Python 3.8 compatibility
+keep-runtime-typing = true
diff --git a/src/DAJIN2/core/classification/allele_merger.py b/src/DAJIN2/core/classification/allele_merger.py
@@ -1,8 +1,7 @@
 from __future__ import annotations
 
-from itertools import groupby
 from collections import defaultdict
-
+from itertools import groupby
 
 ##########################################################
 # merge minor alleles

diff --git a/src/DAJIN2/core/classification/classifier.py b/src/DAJIN2/core/classification/classifier.py
@@ -1,10 +1,10 @@
 from __future__ import annotations
 
-from pathlib import Path
 from itertools import groupby
+from pathlib import Path
 
-from DAJIN2.utils import io
 from DAJIN2.core.classification.allele_merger import merge_minor_alleles
+from DAJIN2.utils import io
 
 
 def calc_match(cssplit: str) -> float:

diff --git a/src/DAJIN2/core/clustering/__init__.py b/src/DAJIN2/core/clustering/__init__.py
@@ -1,3 +1,3 @@
+from DAJIN2.core.clustering.appender import add_labels, add_percent, add_readnum
 from DAJIN2.core.clustering.label_extractor import extract_labels
-from DAJIN2.core.clustering.appender import add_labels, add_readnum, add_percent
 from DAJIN2.core.clustering.label_updator import update_labels
diff --git a/src/DAJIN2/core/clustering/clustering.py b/src/DAJIN2/core/clustering/clustering.py
@@ -1,18 +1,18 @@
 from __future__ import annotations
 
-from pathlib import Path
-from itertools import chain
 from collections import Counter
+from itertools import chain
+from pathlib import Path
 
 import numpy as np
+from scipy.sparse import csr_matrix, spmatrix
 from sklearn import metrics
 from sklearn.cluster import BisectingKMeans
-from scipy.sparse import csr_matrix, spmatrix
 
-from DAJIN2.utils import io, config
 from DAJIN2.core.clustering.label_merger import merge_labels
 from DAJIN2.core.clustering.score_handler import subset_scores
 from DAJIN2.core.clustering.strand_bias_handler import remove_biased_clusters
+from DAJIN2.utils import config, io
 
 config.set_warnings_ignore()
 

diff --git a/src/DAJIN2/core/clustering/label_extractor.py b/src/DAJIN2/core/clustering/label_extractor.py
@@ -1,15 +1,14 @@
 from __future__ import annotations
 
 import uuid
-
-from pathlib import Path
 from itertools import groupby
+from pathlib import Path
 
-from DAJIN2.utils import io
-from DAJIN2.core.clustering.score_handler import make_score, annotate_score
+from DAJIN2.core.clustering.clustering import return_labels
 from DAJIN2.core.clustering.label_updator import relabel_with_consective_order
+from DAJIN2.core.clustering.score_handler import annotate_score, make_score
 from DAJIN2.core.clustering.strand_bias_handler import is_strand_bias
-from DAJIN2.core.clustering.clustering import return_labels
+from DAJIN2.utils import io
 
 
 def extract_labels(classif_sample, TEMPDIR, SAMPLE_NAME, CONTROL_NAME) -> list[dict[str]]:

diff --git a/src/DAJIN2/core/clustering/label_merger.py b/src/DAJIN2/core/clustering/label_merger.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 from collections import Counter
+
 import numpy as np
 
 

diff --git a/src/DAJIN2/core/clustering/score_handler.py b/src/DAJIN2/core/clustering/score_handler.py
@@ -1,8 +1,8 @@
 from __future__ import annotations
 
-from typing import Generator
-from itertools import groupby
 from collections import Counter
+from itertools import groupby
+from typing import Generator
 
 from DAJIN2.core.clustering.kmer_generator import generate_mutation_kmers
 

diff --git a/src/DAJIN2/core/clustering/strand_bias_handler.py b/src/DAJIN2/core/clustering/strand_bias_handler.py
@@ -1,5 +1,13 @@
 from __future__ import annotations
 
+from collections import defaultdict
+from pathlib import Path
+from typing import Generator
+
+from sklearn.tree import DecisionTreeClassifier
+
+from DAJIN2.utils import io
+
 """
 Nanopore sequencing results often results in strand specific mutations even though the mutation is not strand specific, thus they are considered as sequencing errors and should be removed.
 
@@ -8,13 +16,6 @@
 Re-allocates reads belonging to clusters with strand bias to clusters without strand bias.
 """
 
-from pathlib import Path
-from typing import Generator
-from collections import defaultdict
-from sklearn.tree import DecisionTreeClassifier
-
-from DAJIN2.utils import io
-
 # Constants
 STRAND_BIAS_LOWER_LIMIT = 0.1
 STRAND_BIAS_UPPER_LIMIT = 0.9

diff --git a/src/DAJIN2/core/consensus/__init__.py b/src/DAJIN2/core/consensus/__init__.py
@@ -1,6 +1,4 @@
+from DAJIN2.core.consensus.clust_formatter import downsample_by_label, remove_minor_alleles
 from DAJIN2.core.consensus.consensus import call_consensus
-from DAJIN2.core.consensus.name_handler import call_allele_name
-from DAJIN2.core.consensus.name_handler import update_key_by_allele_name
-from DAJIN2.core.consensus.name_handler import add_key_by_allele_name
-from DAJIN2.core.consensus.clust_formatter import remove_minor_alleles, downsample_by_label
 from DAJIN2.core.consensus.mutation_extractor import cache_mutation_loci
+from DAJIN2.core.consensus.name_handler import add_key_by_allele_name, call_allele_name, update_key_by_allele_name
diff --git a/src/DAJIN2/core/consensus/clust_formatter.py b/src/DAJIN2/core/consensus/clust_formatter.py
@@ -1,8 +1,8 @@
 from __future__ import annotations
 
 import random
-from itertools import groupby
 from collections import defaultdict
+from itertools import groupby
 
 
 def remove_minor_alleles(clust_sample: list[dict]) -> list[dict]:

diff --git a/src/DAJIN2/core/consensus/consensus.py b/src/DAJIN2/core/consensus/consensus.py
@@ -1,14 +1,13 @@
 from __future__ import annotations
 
-from pathlib import Path
+from collections import defaultdict
 from dataclasses import dataclass
 from itertools import groupby
-from collections import defaultdict
+from pathlib import Path
 
 from DAJIN2.utils import io
 from DAJIN2.utils.cssplits_handler import call_sequence
 
-
 ###########################################################
 # call position weight matrix (cons_pergentage)
 ###########################################################
@@ -98,11 +97,10 @@ class ConsensusKey:
 
 
 def call_consensus(tempdir: Path, sample_name: str, clust_sample: list[dict]) -> tuple[dict[list], dict[str]]:
-
     clust_sample.sort(key=lambda x: [x["ALLELE"], x["LABEL"]])
 
-    cons_percentages = dict()
-    cons_sequences = dict()
+    cons_percentages = {}
+    cons_sequences = {}
 
     for (allele, label), group in groupby(clust_sample, key=lambda x: [x["ALLELE"], x["LABEL"]]):
         clust = list(group)

diff --git a/src/DAJIN2/core/consensus/mutation_extractor.py b/src/DAJIN2/core/consensus/mutation_extractor.py
@@ -1,14 +1,14 @@
 from __future__ import annotations
 
-from pathlib import Path
 from itertools import groupby
+from pathlib import Path
 
 import numpy as np
 from sklearn.cluster import MiniBatchKMeans
 
-from DAJIN2.utils import io
-from DAJIN2.core.preprocess.mutation_extractor import summarize_indels, extract_mutation_loci, minimize_mutation_counts
 from DAJIN2.core.consensus.similarity_searcher import cache_selected_control_by_similarity
+from DAJIN2.core.preprocess.mutation_extractor import extract_mutation_loci, minimize_mutation_counts, summarize_indels
+from DAJIN2.utils import io
 
 """
 Most of the code reuses `preprocess.cache_mutation_loci`.
@@ -25,7 +25,7 @@ def get_thresholds(path_indels_normalized_sample, path_indels_normalized_control
     indels_normalized_sample = io.load_pickle(path_indels_normalized_sample)
     indels_normalized_control = io.load_pickle(path_indels_normalized_control)
     indels_normalized_minimize_control = minimize_mutation_counts(indels_normalized_control, indels_normalized_sample)
-    thresholds = dict()
+    thresholds = {}
     for mut in {"+", "-", "*"}:
         values_sample = indels_normalized_sample[mut]
         values_control = indels_normalized_minimize_control[mut]

diff --git a/src/DAJIN2/core/consensus/name_handler.py b/src/DAJIN2/core/consensus/name_handler.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import re
+
 from DAJIN2.core.consensus.consensus import ConsensusKey
 
 
@@ -69,7 +70,7 @@ def call_allele_name(
 
 
 def update_key_by_allele_name(cons: dict, allele_names: dict[int, str]) -> dict:
-    cons_update = dict()
+    cons_update = {}
     for key in cons:
         old_allele = cons[key]
         new_allele = allele_names[key.label]

diff --git a/src/DAJIN2/core/consensus/similarity_searcher.py b/src/DAJIN2/core/consensus/similarity_searcher.py
@@ -1,10 +1,9 @@
 from __future__ import annotations
 
-from pathlib import Path
 from collections import defaultdict
+from pathlib import Path
 
 import numpy as np
-
 from sklearn.neighbors import LocalOutlierFactor
 
 from DAJIN2.utils import io
@@ -28,22 +27,22 @@ def onehot_by_mutations(midsv_sample: list[dict]) -> dict[str, np.ndarray]:
 def calculate_percentage(
     mut_onehot_sample: dict[str, np.ndarray], coverage_match: np.ndarray[int]
 ) -> dict[str, np.ndarray]:
-    mut_percentage = dict()
+    mut_percentage = {}
     for mut, onehot in mut_onehot_sample.items():
         x = np.sum(onehot, axis=0) / coverage_match
         mut_percentage[mut] = np.where(np.isnan(x), 0, x)
     return mut_percentage
 
 
 def get_values_to_mask(mut_percentage_sample: dict[str, np.ndarray], threshold=0.5) -> dict[str, np.ndarray[float]]:
-    mask = dict()
+    mask = {}
     for mut, percentage in mut_percentage_sample.items():
         mask[mut] = np.where(percentage > threshold, 0, percentage)
     return mask
 
 
 def apply_mask(mut_onehot: dict[str, np.ndarray], mask_sample: dict[str, np.ndarray[float]]):
-    mut_onehot_masked = dict()
+    mut_onehot_masked = {}
     for mut, onehot in mut_onehot.items():
         mut_onehot_masked[mut] = onehot * mask_sample[mut]
     return mut_onehot_masked
@@ -52,7 +51,7 @@ def apply_mask(mut_onehot: dict[str, np.ndarray], mask_sample: dict[str, np.ndar
 def identify_normal_reads(
     mut_onehot_sample_masked: dict[str, np.ndarray], mut_onehot_control_masked: dict[str, np.ndarray]
 ) -> list[bool]:
-    mutation_comparisons = dict()
+    mutation_comparisons = {}
     for mut in {"+", "-", "*"}:
         values_sample = mut_onehot_sample_masked[mut]
         values_control = mut_onehot_control_masked[mut]

diff --git a/src/DAJIN2/core/preprocess/__init__.py b/src/DAJIN2/core/preprocess/__init__.py
@@ -1,9 +1,9 @@
-from DAJIN2.core.preprocess.cache_checker import exists_cached_hash, exists_cached_genome
-from DAJIN2.core.preprocess.genome_fetcher import fetch_coordinates, fetch_chromosome_size
-from DAJIN2.core.preprocess.mapping import generate_sam
-from DAJIN2.core.preprocess.directory_manager import create_temporal_directories, create_report_directories
+from DAJIN2.core.preprocess.cache_checker import exists_cached_genome, exists_cached_hash
+from DAJIN2.core.preprocess.directory_manager import create_report_directories, create_temporal_directories
+from DAJIN2.core.preprocess.genome_fetcher import fetch_chromosome_size, fetch_coordinates
 from DAJIN2.core.preprocess.input_formatter import format_inputs
-from DAJIN2.core.preprocess.midsv_caller import generate_midsv
+from DAJIN2.core.preprocess.insertions_to_fasta import generate_insertion_fasta
 from DAJIN2.core.preprocess.knockin_handler import extract_knockin_loci
+from DAJIN2.core.preprocess.mapping import generate_sam
+from DAJIN2.core.preprocess.midsv_caller import generate_midsv
 from DAJIN2.core.preprocess.mutation_extractor import cache_mutation_loci
-from DAJIN2.core.preprocess.insertions_to_fasta import generate_insertion_fasta
diff --git a/src/DAJIN2/core/preprocess/cache_checker.py b/src/DAJIN2/core/preprocess/cache_checker.py
@@ -2,6 +2,7 @@
 
 import hashlib
 from pathlib import Path
+
 from DAJIN2.utils import io
 
 

diff --git a/src/DAJIN2/core/preprocess/homopolymer_handler.py b/src/DAJIN2/core/preprocess/homopolymer_handler.py
@@ -37,7 +37,7 @@ def extract_sequence_errors_in_homopolymer_loci(
     indels_normalized_control: dict[str, np.array],
     anomal_loci: dict[set],
 ) -> dict[str, set[int]]:
-    sequence_errors_in_homopolymer = dict()
+    sequence_errors_in_homopolymer = {}
     for mut in ["+", "-", "*"]:
         repeat_regions = get_repeat_regions(sequence, anomal_loci[mut])
         if len(repeat_regions) == 0:

diff --git a/src/DAJIN2/core/preprocess/input_formatter.py b/src/DAJIN2/core/preprocess/input_formatter.py
@@ -1,14 +1,12 @@
 from __future__ import annotations
 
 import uuid
-
-from pathlib import Path
-from dataclasses import dataclass
 from collections import defaultdict
-
-from DAJIN2.utils import io, config, fastx_handler
+from dataclasses import dataclass
+from pathlib import Path
 
 from DAJIN2.core import preprocess
+from DAJIN2.utils import config, fastx_handler, io
 
 
 def parse_arguments(arguments: dict) -> tuple:
@@ -64,7 +62,9 @@ def get_genome_coordinates(genome_urls: dict, fasta_alleles: dict, is_cache_geno
         if is_cache_genome:
             genome_coordinates = next(io.read_jsonl(Path(tempdir, "cache", "genome_coordinates.jsonl")))
         else:
-            genome_coordinates = preprocess.fetch_coordinates(genome_coordinates, genome_urls, fasta_alleles["control"])
+            genome_coordinates = preprocess.fetch_coordinates(
+                genome_coordinates, genome_urls, fasta_alleles["control"]
+            )
             genome_coordinates["chrom_size"] = preprocess.fetch_chromosome_size(genome_coordinates, genome_urls)
             io.write_jsonl([genome_coordinates], Path(tempdir, "cache", "genome_coordinates.jsonl"))
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2,6 +2,7 @@

		import hashlib
		from pathlib import Path

		from DAJIN2.utils import io


Expand Down