Skip to content

Commit

Permalink
Merge pull request #17 from ylab-hi/dev
Browse files Browse the repository at this point in the history
refactor: Update CLI options in chop function
  • Loading branch information
cauliyang authored Oct 13, 2024
2 parents e8ea4d3 + 9b73043 commit 2a67947
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 44 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -593,4 +593,5 @@ scripts/*.json
*stats_*.json
!build.rs
.ruff_cache
hg_deepchopper
hg_deepchopper
analysis_data
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
# <img src="./documentation/logo.webp" alt="logo" height="100"/> **DeepChopper** [![social](https://img.shields.io/github/stars/ylab-hi/DeepChopper?style=social)](https://github.com/ylab-hi/DeepChopper/stargazers)

[![pypi](https://img.shields.io/pypi/v/deepchopper.svg)](https://pypi.python.org/pypi/deepchopper)
[![PyPI - Wheel](https://img.shields.io/pypi/wheel/deepchopper)](https://pypi.org/project/deepchopper/#files)
[![license](https://img.shields.io/pypi/l/deepchopper.svg)](https://github.com/ylab-hi/DeepChopper/blob/main/LICENSE)
[![pypi version](https://img.shields.io/pypi/pyversions/deepchopper.svg)](https://pypi.python.org/pypi/deepbiop)
[![Actions status](https://github.com/ylab-hi/DeepChopper/actions/workflows/release-python.yml/badge.svg)](https://github.com/ylab-hi/DeepChopper/actions)
[![platform](https://img.shields.io/badge/platform-linux%20%7C%20osx%20%7C%20win-blue)](https://pypi.org/project/deepchopper/#files)
[![Actions status](https://github.com/ylab-hi/DeepChopper/actions/workflows/release-python.yml/badge.svg)](https://github.com/ylab-hi/DeepChopper/actions)
[![Space](https://huggingface.co/datasets/huggingface/badges/resolve/main/open-in-hf-spaces-md.svg)](https://huggingface.co/spaces/yangliz5/deepchopper)

<!--toc:start-->
Expand Down
70 changes: 28 additions & 42 deletions deepchopper/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ def encode(
if verbose:
set_logging_level(logging.INFO)

if isinstance(fastq_path, str):
fastq_path = Path(fastq_path)

if not fastq_path.exists():
msg = f"Folder {fastq_path} does not exist."
logging.error(msg)
Expand Down Expand Up @@ -109,6 +112,9 @@ def predict(
if verbose:
set_logging_level(logging.INFO)

if isinstance(data_path, str):
data_path = Path(data_path)

tokenizer = deepchopper.models.llm.load_tokenizer_from_hyena_model(model_name="hyenadna-small-32k-seqlen")
datamodule: LightningDataModule = deepchopper.data.fq_datamodule.FqDataModule(
train_data_path="dummy.parquet",
Expand All @@ -129,7 +135,7 @@ def predict(

trainer = lightning.pytorch.trainer.Trainer(
accelerator=accelerator,
devices=gpus,
devices=devices,
callbacks=callbacks,
deterministic=False,
logger=False,
Expand All @@ -141,19 +147,17 @@ def predict(
def chop(
predicts: list[Path] = typer.Argument(..., help="Paths to prediction files"),
fq: Path = typer.Argument(..., help="Path to FASTQ file"),
smooth_window_size: int = typer.Option(21, "--smooth-window", "-s", help="Smooth window size"),
min_interval_size: int = typer.Option(13, "--min-interval", "-i", help="Minimum interval size"),
approved_interval_number: int = typer.Option(20, "--approved-intervals", "-a", help="Number of approved intervals"),
max_process_intervals: int = typer.Option(4, "--max-process", "-p", help="Maximum process intervals"),
min_read_length_after_chop: int = typer.Option(
20, "--min-read-length", "-l", help="Minimum read length after chop"
),
output_chopped_seqs: bool = typer.Option(False, "--output-chopped", "-o", help="Output chopped sequences"),
chop_type: str = typer.Option("all", "--chop-type", "-t", help="Chop type"),
threads: int = typer.Option(2, "--threads", "-n", help="Number of threads"),
output_prefix: str | None = typer.Option(None, "--prefix", "-x", help="Output prefix"),
max_batch_size: int | None = typer.Option(None, "--max-batch", "-b", help="Maximum batch size"),
smooth_window_size: int = typer.Option(21, "--smooth-window", help="Smooth window size"),
min_interval_size: int = typer.Option(13, "--min-interval-size", help="Minimum interval size"),
approved_interval_number: int = typer.Option(20, "--approved-intervals", help="Number of approved intervals"),
max_process_intervals: int = typer.Option(4, "--max-process-intervals", help="Maximum process intervals"),
min_read_length_after_chop: int = typer.Option(20, "--min-read-length", help="Minimum read length after chop"),
output_chopped_seqs: bool = typer.Option(False, "--output-chopped", help="Output chopped sequences"),
chop_type: str = typer.Option("all", "--chop-type", help="Chop type"),
threads: int = typer.Option(2, "--threads", help="Number of threads"),
verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose output"),
output_prefix: str | None = typer.Option(None, "--prefix", "-o", help="Output prefix"),
max_batch_size: int | None = typer.Option(None, "--max-batch", help="Maximum batch size"),
):
"""Chop sequences based on predictions."""
if verbose:
Expand All @@ -168,37 +172,19 @@ def chop(

predict_files = " ".join([f"--pdt {predict}" for predict in predicts])

commands = [
[
"deepchopper-chop",
predict_files,
"--fq",
fq,
"-t",
threads,
"-s",
smooth_window_size,
"--mis",
min_interval_size,
"-a",
approved_interval_number,
"--mpi",
max_process_intervals,
"--mcr",
min_read_length_after_chop,
"--ocq",
output_chopped_seqs,
"--ct",
chop_type,
"-o",
output_prefix,
"-m",
max_batch_size,
],
]
command = f"deepchopper-chop {predict_files} --fq {fq} -t {threads} -s {smooth_window_size} --mis {min_interval_size} -a {approved_interval_number} --mpi {max_process_intervals} --mcr {min_read_length_after_chop} --ct {chop_type} "

if output_chopped_seqs:
command += "--ocq "

if output_prefix is not None:
command += f"-o {output_prefix} "

if max_batch_size is not None:
command += f"-m {max_batch_size} "

try:
subprocess.run(commands, check=True)
subprocess.run(command.split(), check=True)
except subprocess.CalledProcessError as e:
logging.error(f"Error: Chopping failed with exit code {e.returncode}")
raise e
Expand Down

0 comments on commit 2a67947

Please sign in to comment.