diff --git a/src/SOPRANO/R/parse_vcf.R b/src/SOPRANO/R/parse_vcf.R index 164b754..89b7104 100755 --- a/src/SOPRANO/R/parse_vcf.R +++ b/src/SOPRANO/R/parse_vcf.R @@ -1,4 +1,5 @@ library(magrittr) +library(dplyr) paste("parse_vcf.R") diff --git a/src/SOPRANO/app.py b/src/SOPRANO/app.py index b204601..dbbed42 100755 --- a/src/SOPRANO/app.py +++ b/src/SOPRANO/app.py @@ -261,6 +261,7 @@ def with_tab_annotator(tab: DeltaGenerator): vcf_definition_method_selection = st.radio( "Method for defining VCF files to annoatate:", options=AnnotatorUIOptions.vcf_definition_method(), + index=1 ) if vcf_definition_method_selection == "File uploader": @@ -320,7 +321,10 @@ def with_tab_annotator(tab: DeltaGenerator): output_name=name_processed, assembly=assembly_processed, ) - st.text(f"Processed sources @ {vcf_dir_processed}") + + output = Directories.app_annotated_inputs() + st.text(f"Completed. Processed sources @ {vcf_dir_processed}") + st.write(f"Output annotated file to {output}{name_processed}") anno_cache.clean_up() diff --git a/src/SOPRANO/core/objects.py b/src/SOPRANO/core/objects.py index 02c19da..f895a56 100755 --- a/src/SOPRANO/core/objects.py +++ b/src/SOPRANO/core/objects.py @@ -282,7 +282,7 @@ def __init__( def log(self, msg: str) -> None: self.logger.info(msg) - + class GlobalParameters: def __init__( @@ -299,6 +299,7 @@ def __init__( genomes: GenomePaths, n_samples: int, ): + print("Global params") # Sanitized self.job_cache = check_cache_path(job_cache, analysis_name) self.seed = GlobalParameters.check_seed(seed) @@ -389,7 +390,7 @@ def gather(self): sample_results_paths = [ self.get_sample(idx).results_path for idx in range(self.n_samples) ] - + for expected_results_path in sample_results_paths: if not expected_results_path.exists(): warnings.warn( @@ -401,35 +402,37 @@ def gather(self): sample_results_paths.remove(expected_results_path) if len(sample_results_paths) == 0: - raise ValueError(f"No sample results found for {self.job_cache}.") - - joined_df: pd.DataFrame | None = None - - with open(self.samples_meta_path, "w") as f: - for path in sample_results_paths: - if joined_df is None: - joined_df = pd.read_csv(path, sep="\t") - else: - joined_df = pd.concat( - [joined_df, pd.read_csv(path, sep="\t")], - ignore_index=True, - ) - - f.write(f"{path.as_posix()}\n") - - # Dropped estimateed statistics... don't mean much in this context - joined_df.drop( - columns=[ - "ON_Low_CI", - "ON_High_CI", - "OFF_Low_CI", - "OFF_High_CI", - "Pvalue", - ] - ) + print("Expected results paths:",sample_results_paths) + #raise ValueError(f"No sample results found for {self.job_cache}.") + print(f"No sample results found for {self.job_cache}.") + else: + joined_df: pd.DataFrame | None = None + + with open(self.samples_meta_path, "w") as f: + for path in sample_results_paths: + if joined_df is None: + joined_df = pd.read_csv(path, sep="\t") + else: + joined_df = pd.concat( + [joined_df, pd.read_csv(path, sep="\t")], + ignore_index=True, + ) + + f.write(f"{path.as_posix()}\n") + + # Dropped estimateed statistics... don't mean much in this context + joined_df.drop( + columns=[ + "ON_Low_CI", + "ON_High_CI", + "OFF_Low_CI", + "OFF_High_CI", + "Pvalue", + ] + ) - joined_df.to_csv(self.samples_path) - self.plot_hist() + joined_df.to_csv(self.samples_path) + self.plot_hist() @staticmethod def split_joined_df( diff --git a/src/SOPRANO/pipeline.py b/src/SOPRANO/pipeline.py index b27f025..97f60c5 100755 --- a/src/SOPRANO/pipeline.py +++ b/src/SOPRANO/pipeline.py @@ -556,7 +556,8 @@ def _apply(self, params: Parameters): tar_and_compress(params) -def run_pipeline(params: Parameters): +def run_pipeline(params: Parameters, tidy=True): + print("Running pipeline") jobs: List[_PipelineComponent] = [FilterTranscripts()] if params.use_random_regions: @@ -594,7 +595,8 @@ def run_pipeline(params: Parameters): jobs.append(CheckTargetMutations()) jobs.append(ComputeIntronRate()) jobs.append(ComputeStatistics()) - #jobs.append(TidyUp()) + if tidy: + jobs.append(TidyUp()) for job in jobs: job.apply(params) diff --git a/src/SOPRANO/utils/anno_utils.py b/src/SOPRANO/utils/anno_utils.py index 2700b8d..b808df3 100755 --- a/src/SOPRANO/utils/anno_utils.py +++ b/src/SOPRANO/utils/anno_utils.py @@ -76,6 +76,12 @@ def annotate_source( replace_existing: str = "N", ): print("------------- Annotating source -------------") + print(f"Source path: {source_path}") + print(f"Assembly: {assembly}") + print(f"Output name: {output_name}") + print(f"Cache directory: {cache_directory}") + print(f"Skip missing: {skip_missing}") + print(f"Replace existing: {replace_existing}") vcf_paths = find_vcf_files(source_path) bskip_missing = skip_missing.upper() == "Y" diff --git a/src/SOPRANO/utils/app_utils.py b/src/SOPRANO/utils/app_utils.py index ff11bd8..fe73e58 100755 --- a/src/SOPRANO/utils/app_utils.py +++ b/src/SOPRANO/utils/app_utils.py @@ -636,11 +636,11 @@ def pipeline(params: Parameters): params.cache_dir.mkdir(exist_ok=True) output = st.empty() - with st_capture(output.code): + with st_capture(output.code): t_start = time() output = st.empty() with st_capture(output.code): - run_pipeline(params) + run_pipeline(params,tidy=False) print("Tidying files...") TidyUp() t_end = time() @@ -705,7 +705,7 @@ def annotate( "Annotation in progress ... please wait until this " "process has finished." ) - + output = st.empty() with st_capture(output.code): all_annotated_paths = anno_utils.annotate_source( @@ -714,6 +714,7 @@ def annotate( cache_directory=Directories.app_annotated_inputs(), assembly=assembly, skip_missing="Y", + replace_existing="Y", ) running_msg.empty() diff --git a/src/SOPRANO/utils/path_utils.py b/src/SOPRANO/utils/path_utils.py index 4ff68fc..1dc6bf3 100755 --- a/src/SOPRANO/utils/path_utils.py +++ b/src/SOPRANO/utils/path_utils.py @@ -4,6 +4,7 @@ # Repo and source roots _SOPRANO_SRC = pathlib.Path(__file__).parent.parent _SOPRANO_REPO = _SOPRANO_SRC.parent.parent +print("SOPRANO repo =",_SOPRANO_SRC) # Source directories _SOPRANO_SCRIPTS = _SOPRANO_SRC / "scripts"