Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add conditional parameters for tomte #3112

Merged
merged 44 commits into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
5064fac
added gene_panel generation for tomte and clean up duplicated methods
fevac Apr 10, 2024
1a5862a
added parameters to tomte
fevac Apr 10, 2024
8d7e285
make constant for all nf-analysis
fevac Apr 10, 2024
080730a
started to fixpytests
fevac Apr 12, 2024
984ee6f
add conditional parameters for tomte
fevac Apr 12, 2024
0573e53
fix pytests
fevac Apr 12, 2024
2e8b05d
Merge branch 'master' into tomte_update_params
fevac Apr 12, 2024
f9cfa84
replace string by constant
fevac Apr 12, 2024
0a689f3
black
fevac Apr 12, 2024
48ade42
code smell
fevac Apr 12, 2024
9e6d77a
revert changes in report api
fevac Apr 12, 2024
eb83422
fix syntax
fevac Apr 12, 2024
e888dac
Merge branch 'master' into tomte_update_params
fevac Apr 15, 2024
eccf7ea
Merge branch 'master' into tomte_update_params
fevac Apr 22, 2024
14cb9f8
reviews
fevac Apr 22, 2024
89d1e77
clean up get_case_sources_type
fevac Apr 22, 2024
9a4b8c9
partial fix pytests
fevac Apr 22, 2024
9205ac0
move method to validator
fevac Apr 22, 2024
7508c51
Merge branch 'master' into tomte_update_params
fevac Apr 23, 2024
96e3e22
update reference genome and remove default
fevac Apr 23, 2024
d2185e5
Merge branch 'master' into tomte_update_params
fevac Apr 23, 2024
76a010a
added get gene panel to analysis api
fevac Apr 23, 2024
2a553cf
move get source to lims api
fevac Apr 23, 2024
762aea4
fix genome reference version for raredisease
fevac Apr 23, 2024
15bede4
remove duplicated method
fevac Apr 23, 2024
c27eb4d
remove comments
fevac Apr 23, 2024
a49d9fe
Merge branch 'master' into tomte_update_params
fevac Apr 24, 2024
4dc46e1
reverting pytests wokflwo param
fevac Apr 24, 2024
dac4bce
allow empty tissue
fevac Apr 24, 2024
fd00ade
return tissue unknown when none
fevac Apr 24, 2024
f7afd71
add validator as pre
fevac Apr 24, 2024
284f131
restric values in model for tissue and genome
fevac Apr 24, 2024
a708519
Merge branch 'master' into tomte_update_params
fevac Apr 24, 2024
e4f1181
reviews
fevac Apr 24, 2024
3e98de3
Merge branch 'master' into tomte_update_params
fevac Apr 24, 2024
c6c519c
handle source exception
fevac Apr 24, 2024
db01db1
update get_source
fevac Apr 24, 2024
98733ee
reviews
fevac Apr 24, 2024
6c9bdda
docstring
fevac Apr 25, 2024
72aac23
reviews
fevac Apr 25, 2024
51c6b66
add dry run for child mehthods
fevac Apr 25, 2024
c8b0683
unused import
fevac Apr 25, 2024
25ffe7e
Merge branch 'master' into tomte_update_params
fevac Apr 25, 2024
e851cc3
Merge branch 'master' into tomte_update_params
fevac Apr 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions cg/apps/lims/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,10 @@
from genologics.lims import Lims
from requests.exceptions import HTTPError

from cg.constants.lims import (
MASTER_STEPS_UDFS,
PROP2UDF,
DocumentationMethod,
LimsArtifactTypes,
)
from cg.constants import Priority
from cg.constants.lims import MASTER_STEPS_UDFS, PROP2UDF, DocumentationMethod, LimsArtifactTypes
from cg.exc import LimsDataError

from ...constants import Priority
from .order import OrderHandler

SEX_MAP = {"F": "female", "M": "male", "Unknown": "unknown", "unknown": "unknown"}
Expand Down Expand Up @@ -68,6 +63,13 @@ def samples_in_pools(self, pool_name, projectname):
"""Fetch all samples from a pool"""
return self.get_samples(udf={"pool name": str(pool_name)}, projectname=projectname)

def get_source(self, lims_id: str) -> str | None:
"""Return the source from LIMS for a given sample ID.
Return 'None' if no source information is set or
if sample is not found or cannot be fetched from LIMS."""
lims_sample: dict[str, Any] = self.sample(lims_id=lims_id)
return lims_sample.get("source")

@staticmethod
def _export_project(lims_project) -> dict:
"""Fetch relevant information from a lims project object"""
Expand Down
6 changes: 4 additions & 2 deletions cg/apps/scout/scoutapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ def upload(self, scout_load_config: Path, force: bool = False) -> None:
self.process.run_command(load_command)
LOG.debug("Case loaded successfully to Scout")

def export_panels(self, panels: list[str], build: str = GENOME_BUILD_37) -> list[str]:
def export_panels(
self, panels: list[str], build: str = GENOME_BUILD_37, dry_run: bool = False
ivadym marked this conversation as resolved.
Show resolved Hide resolved
) -> list[str]:
"""Pass through to export of a list of gene panels.

Return list of lines in bed format
Expand All @@ -61,7 +63,7 @@ def export_panels(self, panels: list[str], build: str = GENOME_BUILD_37) -> list
export_panels_command.extend(["--build", build])

try:
self.process.run_command(export_panels_command)
self.process.run_command(export_panels_command, dry_run=dry_run)
if not self.process.stdout:
return []
except CalledProcessError:
Expand Down
5 changes: 5 additions & 0 deletions cg/constants/gene_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
GENOME_BUILD_38: str = "GRCh38"


class GenePanelGenomeBuild(StrEnum):
hg19: str = GENOME_BUILD_37
hg38: str = GENOME_BUILD_38
ivadym marked this conversation as resolved.
Show resolved Hide resolved


class GenePanelMasterList(StrEnum):
BRAIN: str = "BRAIN"
CARDIOLOGY: str = "Cardiology"
Expand Down
6 changes: 6 additions & 0 deletions cg/constants/sample_sources.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Constants that specify sample sources"""

from enum import StrEnum

METAGENOME_SOURCES = (
"blood",
"skin",
Expand Down Expand Up @@ -27,3 +29,7 @@
"bone marrow",
"other",
)


class SourceType(StrEnum):
UNKNOWN: str = "unknown"
ivadym marked this conversation as resolved.
Show resolved Hide resolved
21 changes: 3 additions & 18 deletions cg/meta/report/report_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,7 @@
from cg.exc import DeliveryReportError
from cg.io.controller import ReadFile, WriteStream
from cg.meta.meta import MetaAPI
from cg.meta.report.field_validators import (
get_empty_report_data,
get_missing_report_data,
)
from cg.meta.report.field_validators import get_empty_report_data, get_missing_report_data
from cg.meta.workflow.analysis import AnalysisAPI
from cg.models.analysis import AnalysisModel
from cg.models.cg_config import CGConfig
Expand All @@ -30,20 +27,8 @@
ReportModel,
ScoutReportFiles,
)
from cg.models.report.sample import (
ApplicationModel,
MethodsModel,
SampleModel,
TimestampModel,
)
from cg.store.models import (
Analysis,
Application,
ApplicationLimitations,
Case,
CaseSample,
Sample,
)
from cg.models.report.sample import ApplicationModel, MethodsModel, SampleModel, TimestampModel
from cg.store.models import Analysis, Application, ApplicationLimitations, Case, CaseSample, Sample

LOG = logging.getLogger(__name__)

Expand Down
55 changes: 44 additions & 11 deletions cg/meta/workflow/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,16 @@
import shutil
from pathlib import Path
from subprocess import CalledProcessError
from typing import Any
from typing import Any, Iterator

import click
import requests
from housekeeper.store.models import Bundle, Version

from cg.apps.environ import environ_email
from cg.constants import EXIT_FAIL, EXIT_SUCCESS, Priority, SequencingFileTag, Workflow
from cg.constants.constants import (
AnalysisType,
CaseActions,
FileFormat,
WorkflowManager,
)
from cg.constants.gene_panel import GenePanelCombo
from cg.constants.constants import AnalysisType, CaseActions, FileFormat, WorkflowManager
from cg.constants.gene_panel import GenePanelCombo, GenePanelMasterList
from cg.constants.scout import ScoutExportFileName
from cg.constants.tb import AnalysisStatus
from cg.exc import AnalysisNotReadyError, BundleAlreadyAddedError, CgDataError, CgError
Expand Down Expand Up @@ -189,6 +185,20 @@ def get_case_application_type(self, case_id: str) -> str:

return application_types.pop()

def get_case_source_type(self, case_id: str) -> str | None:
"""Returns the source type for samples in a case.
Raises:
CgError: If different sources are set for the samples linked to a case."""
sample_ids: Iterator[str] = self.status_db.get_sample_ids_by_case_id(case_id=case_id)
fevac marked this conversation as resolved.
Show resolved Hide resolved
source_types: set[str | None] = {
self.lims_api.get_source(lims_id=sample_id) for sample_id in sample_ids
}

if len(source_types) > 1:
raise CgError(f"Different source types found for case: {case_id} ({source_types})")
diitaz93 marked this conversation as resolved.
Show resolved Hide resolved

return source_types.pop()

def has_case_only_exome_samples(self, case_id: str) -> bool:
"""Returns True if the application type for all samples in a case is WES."""
application_type: str = self.get_case_application_type(case_id)
Expand Down Expand Up @@ -631,26 +641,49 @@ def _write_managed_variants(out_dir: Path, content: list[str]) -> None:

@staticmethod
def _write_panel(out_dir: Path, content: list[str]) -> None:
"""Write the managed variants to case dir."""
"""Write the gene panel to case dir."""
out_dir.mkdir(parents=True, exist_ok=True)
WriteFile.write_file_from_content(
content="\n".join(content),
file_format=FileFormat.TXT,
file_path=Path(out_dir, ScoutExportFileName.PANELS),
)

def _get_gene_panel(self, case_id: str, genome_build: str) -> list[str]:
def _get_gene_panel(self, case_id: str, genome_build: str, dry_run: bool = False) -> list[str]:
"""Create and return the aggregated gene panel file."""
case: Case = self.status_db.get_case_by_internal_id(internal_id=case_id)
all_panels: list[str] = self.get_aggregated_panels(
customer_id=case.customer.internal_id, default_panels=set(case.panels)
)
return self.scout_api.export_panels(build=genome_build, panels=all_panels)
return self.scout_api.export_panels(build=genome_build, panels=all_panels, dry_run=dry_run)

def get_gene_panel(self, case_id: str, dry_run: bool = False) -> list[str]:
fevac marked this conversation as resolved.
Show resolved Hide resolved
"""Create and return the aggregated gene panel file."""
raise NotImplementedError

def _get_managed_variants(self, genome_build: str) -> list[str]:
"""Create and return the managed variants."""
return self.scout_api.export_managed_variants(genome_build=genome_build)

def write_panel(self, case_id: str, content: list[str]) -> None:
"""Write the gene panel to case dir."""
self._write_panel(out_dir=Path(self.root, case_id), content=content)

@staticmethod
def get_aggregated_panels(customer_id: str, default_panels: set[str]) -> list[str]:
"""Check if customer should use the gene panel master list
and if all default panels are included in the gene panel master list.
If not, add gene panel combo and OMIM-AUTO.
Return an aggregated gene panel."""
master_list: list[str] = GenePanelMasterList.get_panel_names()
if customer_id in GenePanelMasterList.collaborators() and default_panels.issubset(
master_list
):
return master_list
all_panels: set[str] = add_gene_panel_combo(default_panels=default_panels)
all_panels |= {GenePanelMasterList.OMIM_AUTO, GenePanelMasterList.PANELAPP_GREEN}
return list(all_panels)

def run_analysis(self, *args, **kwargs):
raise NotImplementedError

Expand Down
19 changes: 0 additions & 19 deletions cg/meta/workflow/mip.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,25 +149,6 @@ def link_fastq_files(self, case_id: str, dry_run: bool = False) -> None:
for link in case.links:
self.link_fastq_files_for_sample(case=case, sample=link.sample)

def write_panel(self, case_id: str, content: list[str]) -> None:
"""Write the gene panel to case dir."""
self._write_panel(out_dir=Path(self.root, case_id), content=content)

@staticmethod
def get_aggregated_panels(customer_id: str, default_panels: set[str]) -> list[str]:
"""Check if customer should use the gene panel master list
and if all default panels are included in the gene panel master list.
If not, add gene panel combo and OMIM-AUTO.
Return an aggregated gene panel."""
master_list: list[str] = GenePanelMasterList.get_panel_names()
if customer_id in GenePanelMasterList.collaborators() and default_panels.issubset(
master_list
):
return master_list
all_panels: set[str] = add_gene_panel_combo(default_panels=default_panels)
all_panels |= {GenePanelMasterList.OMIM_AUTO, GenePanelMasterList.PANELAPP_GREEN}
return list(all_panels)

def _get_latest_raw_file(self, family_id: str, tags: list[str]) -> Any:
"""Get a python object file for a tag and a family ."""

Expand Down
4 changes: 2 additions & 2 deletions cg/meta/workflow/mip_dna.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ def config_sample(
sample_data[Pedigree.FATHER.value]: str = link_obj.father.internal_id
return sample_data

def get_gene_panel(self, case_id: str) -> list[str]:
def get_gene_panel(self, case_id: str, dry_run: bool = False) -> list[str]:
"""Create and return the aggregated gene panel file."""
return self._get_gene_panel(case_id=case_id, genome_build=GENOME_BUILD_37)
return self._get_gene_panel(case_id=case_id, genome_build=GENOME_BUILD_37, dry_run=dry_run)

def get_managed_variants(self) -> list[str]:
"""Create and return the managed variants."""
Expand Down
4 changes: 2 additions & 2 deletions cg/meta/workflow/mip_rna.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ def config_sample(self, link_obj, panel_bed: str | None = None) -> dict[str, str
sample_data[Pedigree.FATHER.value]: str = link_obj.father.internal_id
return sample_data

def get_gene_panel(self, case_id: str) -> list[str]:
def get_gene_panel(self, case_id: str, dry_run: bool = False) -> list[str]:
"""Create and return the aggregated gene panel file."""
return self._get_gene_panel(case_id=case_id, genome_build=GENOME_BUILD_38)
return self._get_gene_panel(case_id=case_id, genome_build=GENOME_BUILD_38, dry_run=dry_run)

def get_managed_variants(self) -> list[str]:
"""Create and return the managed variants."""
Expand Down
64 changes: 59 additions & 5 deletions cg/meta/workflow/nf_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
MultiQC,
WorkflowManager,
)
from cg.constants.gene_panel import GenePanelGenomeBuild
from cg.constants.nextflow import NFX_WORK_DIR
from cg.constants.nf_analysis import NfTowerStatus
from cg.constants.tb import AnalysisStatus
Expand All @@ -25,7 +26,7 @@
from cg.io.yaml import write_yaml_nextflow_style
from cg.meta.workflow.analysis import AnalysisAPI
from cg.meta.workflow.nf_handlers import NextflowHandler, NfTowerHandler
from cg.models.analysis import NextflowAnalysis, AnalysisModel
from cg.models.analysis import NextflowAnalysis
from cg.models.cg_config import CGConfig
from cg.models.deliverables.metric_deliverables import (
MetricsBase,
Expand Down Expand Up @@ -111,6 +112,11 @@ def is_multiqc_pattern_search_exact(self) -> bool:
If false, pattern must be present but does not need to be exact."""
return False

@property
def is_gene_panel_required(self) -> bool:
"""Return True if a gene panel is needs to be created using the information in StatusDB and exporting it from Scout."""
return False
ivadym marked this conversation as resolved.
Show resolved Hide resolved

def get_profile(self, profile: str | None = None) -> str:
"""Get NF profiles."""
return profile or self.profile
Expand Down Expand Up @@ -215,6 +221,12 @@ def get_workdir_path(self, case_id: str, work_dir: Path | None = None) -> Path:
return work_dir.absolute()
return Path(self.get_case_path(case_id), NFX_WORK_DIR)

def get_gene_panels_path(self, case_id: str) -> Path:
"""Path to gene panels bed file exported from Scout."""
return Path(self.get_case_path(case_id=case_id), "gene_panels").with_suffix(
FileExtensions.BED
)
ivadym marked this conversation as resolved.
Show resolved Hide resolved

def set_cluster_options(self, case_id: str) -> str:
return f'process.clusterOptions = "-A {self.account} --qos={self.get_slurm_qos_for_case(case_id=case_id)}"\n'

Expand Down Expand Up @@ -352,6 +364,16 @@ def create_nextflow_config(self, case_id: str, dry_run: bool = False) -> None:
file_path=self.get_nextflow_config_path(case_id=case_id),
)

def create_gene_panel(self, case_id: str, dry_run: bool) -> None:
"""Create and write an aggregated gene panel file exported from Scout."""
LOG.info("Creating gene panel file")
bed_lines: list[str] = self.get_gene_panel(case_id=case_id, dry_run=dry_run)
if dry_run:
bed_lines: str = "\n".join(bed_lines)
LOG.debug(f"{bed_lines}")
diitaz93 marked this conversation as resolved.
Show resolved Hide resolved
return
self.write_panel(case_id=case_id, content=bed_lines)

def config_case(self, case_id: str, dry_run: bool):
"""Create directory and config files required by a workflow for a case."""
if dry_run:
Expand All @@ -361,6 +383,8 @@ def config_case(self, case_id: str, dry_run: bool):
self.create_sample_sheet(case_id=case_id, dry_run=dry_run)
self.create_params_file(case_id=case_id, dry_run=dry_run)
self.create_nextflow_config(case_id=case_id, dry_run=dry_run)
if self.is_gene_panel_required:
self.create_gene_panel(case_id=case_id, dry_run=dry_run)

def _run_analysis_with_nextflow(
self, case_id: str, command_args: NfCommandArgs, dry_run: bool
Expand Down Expand Up @@ -812,6 +836,40 @@ def get_cases_to_store(self) -> list[Case]:
or self.trailblazer_api.is_latest_analysis_qc(case_id=case.internal_id)
]

def get_genome_build(self, case_id: str) -> GenomeVersion:
fevac marked this conversation as resolved.
Show resolved Hide resolved
"""Return reference genome version for a case.
Raises CgError if this information is missing or inconsistent for the samples linked to a case.
"""
reference_genome: set[str] = {
sample.reference_genome
for sample in self.status_db.get_samples_by_case_id(case_id=case_id)
}
fevac marked this conversation as resolved.
Show resolved Hide resolved
if len(reference_genome) == 1:
return reference_genome.pop()
if len(reference_genome) > 1:
raise CgError(
f"Samples linked to case {case_id} have different reference genome versions set"
)
raise CgError(f"No reference genome specified for case {case_id}")

def get_gene_panel_genome_build(self, case_id: str) -> GenePanelGenomeBuild:
"""Return build version of the gene panel for a case."""
reference_genome: GenomeVersion = self.get_genome_build(case_id=case_id)
try:
return getattr(GenePanelGenomeBuild, reference_genome)
except AttributeError as error:
raise CgError(
f"Reference {reference_genome} has no associated genome build for panels: {error}"
) from error

def get_gene_panel(self, case_id: str, dry_run: bool = False) -> list[str]:
fevac marked this conversation as resolved.
Show resolved Hide resolved
"""Create and return the aggregated gene panel file."""
return self._get_gene_panel(
case_id=case_id,
genome_build=self.get_gene_panel_genome_build(case_id=case_id),
dry_run=dry_run,
)

def parse_analysis(self, qc_metrics_raw: list[MetricsBase], **kwargs) -> NextflowAnalysis:
"""Parse Nextflow output analysis files and return an analysis model."""
sample_metrics: dict[str, dict] = {}
Expand All @@ -826,7 +884,3 @@ def get_latest_metadata(self, case_id: str) -> NextflowAnalysis:
"""Return analysis output of a Nextflow case."""
qc_metrics: list[MetricsBase] = self.get_multiqc_json_metrics(case_id)
return self.parse_analysis(qc_metrics_raw=qc_metrics)

def get_genome_build(self, case_id: str) -> str:
"""Return the reference genome build version of Nextflow analysis."""
return GenomeVersion.hg38.value
Loading
Loading