Skip to content

Commit

Permalink
Added some logging to dev version
Browse files Browse the repository at this point in the history
  • Loading branch information
rachelicr committed Jul 23, 2024
1 parent 6b616d7 commit 70db26f
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 36 deletions.
1 change: 1 addition & 0 deletions src/SOPRANO/R/parse_vcf.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
library(magrittr)
library(dplyr)

paste("parse_vcf.R")

Expand Down
6 changes: 5 additions & 1 deletion src/SOPRANO/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ def with_tab_annotator(tab: DeltaGenerator):
vcf_definition_method_selection = st.radio(
"Method for defining VCF files to annoatate:",
options=AnnotatorUIOptions.vcf_definition_method(),
index=1
)

if vcf_definition_method_selection == "File uploader":
Expand Down Expand Up @@ -320,7 +321,10 @@ def with_tab_annotator(tab: DeltaGenerator):
output_name=name_processed,
assembly=assembly_processed,
)
st.text(f"Processed sources @ {vcf_dir_processed}")

output = Directories.app_annotated_inputs()
st.text(f"Completed. Processed sources @ {vcf_dir_processed}")
st.write(f"Output annotated file to {output}{name_processed}")
anno_cache.clean_up()


Expand Down
63 changes: 33 additions & 30 deletions src/SOPRANO/core/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def __init__(

def log(self, msg: str) -> None:
self.logger.info(msg)


class GlobalParameters:
def __init__(
Expand All @@ -299,6 +299,7 @@ def __init__(
genomes: GenomePaths,
n_samples: int,
):
print("Global params")
# Sanitized
self.job_cache = check_cache_path(job_cache, analysis_name)
self.seed = GlobalParameters.check_seed(seed)
Expand Down Expand Up @@ -389,7 +390,7 @@ def gather(self):
sample_results_paths = [
self.get_sample(idx).results_path for idx in range(self.n_samples)
]

for expected_results_path in sample_results_paths:
if not expected_results_path.exists():
warnings.warn(
Expand All @@ -401,35 +402,37 @@ def gather(self):
sample_results_paths.remove(expected_results_path)

if len(sample_results_paths) == 0:
raise ValueError(f"No sample results found for {self.job_cache}.")

joined_df: pd.DataFrame | None = None

with open(self.samples_meta_path, "w") as f:
for path in sample_results_paths:
if joined_df is None:
joined_df = pd.read_csv(path, sep="\t")
else:
joined_df = pd.concat(
[joined_df, pd.read_csv(path, sep="\t")],
ignore_index=True,
)

f.write(f"{path.as_posix()}\n")

# Dropped estimateed statistics... don't mean much in this context
joined_df.drop(
columns=[
"ON_Low_CI",
"ON_High_CI",
"OFF_Low_CI",
"OFF_High_CI",
"Pvalue",
]
)
print("Expected results paths:",sample_results_paths)
#raise ValueError(f"No sample results found for {self.job_cache}.")
print(f"No sample results found for {self.job_cache}.")
else:
joined_df: pd.DataFrame | None = None

with open(self.samples_meta_path, "w") as f:
for path in sample_results_paths:
if joined_df is None:
joined_df = pd.read_csv(path, sep="\t")
else:
joined_df = pd.concat(
[joined_df, pd.read_csv(path, sep="\t")],
ignore_index=True,
)

f.write(f"{path.as_posix()}\n")

# Dropped estimateed statistics... don't mean much in this context
joined_df.drop(
columns=[
"ON_Low_CI",
"ON_High_CI",
"OFF_Low_CI",
"OFF_High_CI",
"Pvalue",
]
)

joined_df.to_csv(self.samples_path)
self.plot_hist()
joined_df.to_csv(self.samples_path)
self.plot_hist()

@staticmethod
def split_joined_df(
Expand Down
6 changes: 4 additions & 2 deletions src/SOPRANO/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,8 @@ def _apply(self, params: Parameters):
tar_and_compress(params)


def run_pipeline(params: Parameters):
def run_pipeline(params: Parameters, tidy=True):
print("Running pipeline")
jobs: List[_PipelineComponent] = [FilterTranscripts()]

if params.use_random_regions:
Expand Down Expand Up @@ -594,7 +595,8 @@ def run_pipeline(params: Parameters):
jobs.append(CheckTargetMutations())
jobs.append(ComputeIntronRate())
jobs.append(ComputeStatistics())
#jobs.append(TidyUp())
if tidy:
jobs.append(TidyUp())

for job in jobs:
job.apply(params)
6 changes: 6 additions & 0 deletions src/SOPRANO/utils/anno_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ def annotate_source(
replace_existing: str = "N",
):
print("------------- Annotating source -------------")
print(f"Source path: {source_path}")
print(f"Assembly: {assembly}")
print(f"Output name: {output_name}")
print(f"Cache directory: {cache_directory}")
print(f"Skip missing: {skip_missing}")
print(f"Replace existing: {replace_existing}")
vcf_paths = find_vcf_files(source_path)

bskip_missing = skip_missing.upper() == "Y"
Expand Down
7 changes: 4 additions & 3 deletions src/SOPRANO/utils/app_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,11 +636,11 @@ def pipeline(params: Parameters):

params.cache_dir.mkdir(exist_ok=True)
output = st.empty()
with st_capture(output.code):
with st_capture(output.code):
t_start = time()
output = st.empty()
with st_capture(output.code):
run_pipeline(params)
run_pipeline(params,tidy=False)
print("Tidying files...")
TidyUp()
t_end = time()
Expand Down Expand Up @@ -705,7 +705,7 @@ def annotate(
"Annotation in progress ... please wait until this "
"process has finished."
)

output = st.empty()
with st_capture(output.code):
all_annotated_paths = anno_utils.annotate_source(
Expand All @@ -714,6 +714,7 @@ def annotate(
cache_directory=Directories.app_annotated_inputs(),
assembly=assembly,
skip_missing="Y",
replace_existing="Y",
)

running_msg.empty()
Expand Down
1 change: 1 addition & 0 deletions src/SOPRANO/utils/path_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# Repo and source roots
_SOPRANO_SRC = pathlib.Path(__file__).parent.parent
_SOPRANO_REPO = _SOPRANO_SRC.parent.parent
print("SOPRANO repo =",_SOPRANO_SRC)

# Source directories
_SOPRANO_SCRIPTS = _SOPRANO_SRC / "scripts"
Expand Down

0 comments on commit 70db26f

Please sign in to comment.