From 1fd3615c2cb4e8a1e56d37ed896bf09b351a2de8 Mon Sep 17 00:00:00 2001 From: LaraFuhrmann <55209716+LaraFuhrmann@users.noreply.github.com> Date: Fri, 9 Feb 2024 15:35:28 +0100 Subject: [PATCH] add predicthaplo --- .../predicthaplo_local_haplo.py | 77 +++++++++++-------- 1 file changed, 44 insertions(+), 33 deletions(-) diff --git a/resources/auxiliary_workflows/benchmark/resources/method_definitions/predicthaplo_local_haplo.py b/resources/auxiliary_workflows/benchmark/resources/method_definitions/predicthaplo_local_haplo.py index db9a0bcf..5e7293eb 100644 --- a/resources/auxiliary_workflows/benchmark/resources/method_definitions/predicthaplo_local_haplo.py +++ b/resources/auxiliary_workflows/benchmark/resources/method_definitions/predicthaplo_local_haplo.py @@ -2,11 +2,24 @@ # CONDA: cxx-compiler = 1.4.1 # CONDA: make = 4.3 # CONDA: cmake = 3.22.1 +# CONDA: rhash = 1.4.4 # CONDA: liblapack = 3.9.0 # CONDA: gtest = 1.11.0 # CONDA: samtools = 1.15.1 # CONDA: biopython = 1.79 +""" +To execute PredictHaplo on Euler, I have to load the following: +source /cluster/apps/local/env2lmod.sh # scp doesn't know env2lmod +export MY_MODULEPATH_ROOT=/cluster/work/bewi/nss/modules/ +module use $MY_MODULEPATH_ROOT/Core +module load gcc/5.4.0 openblas/0.2.19 +. /cluster/work/bewi/members/lfuhrmann/miniconda3/bin/activate +cd /cluster/work/bewi/members/lfuhrmann/ +conda activate snakemake +""" + + import subprocess from pathlib import Path @@ -15,6 +28,23 @@ from Bio.SeqRecord import SeqRecord +def write_config_file(fname_ref, fname_sam,fname_config, ph_prefix): + with open('original_file.txt', 'r') as file: + content = file.readlines() + + modified_content = content + modified_content[2] = str(ph_prefix) + modified_content[4] = str(fname_ref) + modified_content[6] = "0" + modified_content[8] = str(fname_sam) + modified_content[10] = "0" + modified_content[12] = str(fname_ref) + modified_content[16] = "10000" + + with open(fname_config, 'w') as file: + file.writelines(modified_content) + + def main(fname_bam, fname_reference, fname_results_snv, @@ -25,52 +55,33 @@ def main(fname_bam, ): dname_work.mkdir(parents=True, exist_ok=True) - # compile tool - subprocess.run( - ["git", "clone", "https://github.com/cbg-ethz/PredictHaplo.git"], - cwd=dname_work, - check=True, - ) + predicthaplo_exe = "/cluster/work/bewi/members/lfuhrmann/PredictHaplo-1.1/PredictHaplo" - repo_dir = dname_work / "PredictHaplo" + # prepare environment subprocess.run( - ["cmake", "-DCMAKE_BUILD_TYPE=Release", "-B", "build", "-S", "."], - cwd=repo_dir, + ["samtools", "view", "-h", "-o", dname_work / "reads.sam", fname_bam], check=True, ) - subprocess.run(["cmake", "--build", "build"], cwd=repo_dir, check=True) - exec_path = repo_dir / "build" / "predicthaplo" + fname_sam = (dname_work / "reads.sam").resolve() - # prepare environment + # copy example config file subprocess.run( - ["samtools", "view", "-h", "-o", dname_work / "reads.sam", fname_bam], + ["wget", "https://github.com/bmda-unibas/PredictHaplo/blob/master/config_5V"], + cwd=dname_work, check=True, ) + # modify config file for predicthaplo + fname_config = (dname_work / "predicthaplo_config").resolve() + ph_prefix = (dname_work / "predicthaplo_output").resolve() + write_config_file(Path(fname_reference).resolve(), fname_sam, fname_config, ph_prefix) + # execute tool - ph_prefix = dname_work / "predicthaplo_output" subprocess.run( [ - exec_path, - "--sam", - dname_work / "reads.sam", - "--reference", - fname_reference, - "--prefix", - ph_prefix, - "--reconstructed_haplotypes", - fname_result, - "--have_true_haplotypes", - "0", - "--min_align_score_fraction", - "-1", - "--min_overlap_factor", - "0.1", - "--entropy_threshold", - "1e-4", - "--min_length", - "1", + predicthaplo_exe, + str(fname_config), ], check=True, )