Skip to content

Commit

Permalink
Changes for #478
Browse files Browse the repository at this point in the history
Update GeneScoreRanker to return GeneScores when no variant prioritisation steps were run.
Update AbstractAnalysisRunner to slightly reduce logging verbosity.
Add new AbstractAnalysisRunner.genesToScore() abstract method.
Update SimpleAnalysisRunner and PassOnlyAnalysisRunner with new genesToScore() method
Update ExomiserTest to use new exomiser-test.vcf
  • Loading branch information
julesjacobsen committed Feb 28, 2023
1 parent 034aae0 commit e873ef9
Show file tree
Hide file tree
Showing 10 changed files with 171 additions and 128 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -69,36 +69,39 @@ public AnalysisResults run(Sample sample, Analysis analysis) {
// This is a critical step. It will validate that all the relevant information is present for the specified steps.
AnalysisSampleValidator.validate(sample, analysis);

logger.info("Starting analysis");
logger.info("Using genome assembly {}", sample.getGenomeAssembly());
logger.info("Validating sample input data");
// all the sample-related bits, might be worth encapsulating
Path vcfPath = sample.getVcfPath();
VcfReader vcfReader = vcfPath == null ? new NoOpVcfReader() : new VcfFileReader(vcfPath);
// n.b. this next block will safely handle a null VCF file
logger.info("Checking proband and pedigree for VCF {}", vcfPath);
List<String> sampleNames = vcfReader.readSampleIdentifiers();
VariantFactory variantFactory = new VariantFactoryImpl(genomeAnalysisService.getVariantAnnotator(), vcfReader);

List<String> sampleNames = vcfReader.readSampleIdentifiers();
String probandIdentifier = SampleIdentifiers.checkProbandIdentifier(sample.getProbandSampleName(), sampleNames);
Pedigree validatedPedigree = PedigreeSampleValidator.validate(sample.getPedigree(), probandIdentifier, sampleNames);
InheritanceModeOptions inheritanceModeOptions = analysis.getInheritanceModeOptions();

InheritanceModeAnnotator inheritanceModeAnnotator = new InheritanceModeAnnotator(validatedPedigree, inheritanceModeOptions);

//now run the analysis on the sample
int vcfGenotypePosition = SampleIdentifiers.samplePosition(probandIdentifier, sampleNames);
logger.info("Running analysis for proband {} (sample {} in VCF) from samples: {}", probandIdentifier, vcfGenotypePosition, sampleNames);
// now run the analysis on the sample
if (sample.hasVcf()) {
int vcfGenotypePosition = SampleIdentifiers.samplePosition(probandIdentifier, sampleNames);
logger.info("Running analysis for proband {} (sample {} in VCF) from samples: {}. Using coordinates for genome assembly {}.", probandIdentifier, vcfGenotypePosition, sampleNames, sample.getGenomeAssembly());
} else {
logger.info("Running analysis for proband {} without VCF", probandIdentifier);
}
Instant timeStart = Instant.now();
//soo many comments - this is a bad sign that this is too complicated.
Map<String, Gene> allGenes = makeKnownGenes();
List<VariantEvaluation> variantEvaluations = new ArrayList<>();
FilterStats filterStats = new FilterStats();

// TODO: there needs to be some logic to distinguish samples with (vfc only || hpo + vcf || hpo only) alternatively,
// just expose the hpo-only analysis in the cli
// TODO: might also be easier to work with returning a list of Genes from this which may/may not contain variants.
// How Exomiser uses the input sample data will depend on the analysis steps provided. These are grouped by
// function (variant filter, gene filter, prioritiser) as an AnalysisGroup. Only a variant filter step/group
// will trigger the VCF to be loaded and analysed.
boolean variantsLoaded = false;
List<AnalysisGroup> analysisStepGroups = AnalysisGroup.groupAnalysisSteps(analysis.getAnalysisSteps());
logWarningIfSubOptimalAnalysisSumbitted(analysisStepGroups);
for (AnalysisGroup analysisGroup : analysisStepGroups) {
// This is admittedly pretty confusing code and I'm sorry. It's easiest to follow if you turn on debugging.
// The analysis steps are run in groups of VARIANT_FILTER, GENE_ONLY_DEPENDENT or INHERITANCE_MODE_DEPENDENT
Expand All @@ -123,21 +126,8 @@ public AnalysisResults run(Sample sample, Analysis analysis) {
filterStat.getFilterType(), filterStat.getPassCount(), filterStat.getFailCount()));
}

//maybe only the non-variant dependent steps have been run in which case we need to load the variants although
//the results might be a bit meaningless.
//See issue #129 This is an excellent place to put the output of a gene phenotype score only run.
//i.e. stream in the variants, annotate them (assign a gene symbol) then write out that variant with the calculated GENE_PHENO_SCORE (prioritiser scores).
//this would fit well with a lot of people's pipelines where they only want the phenotype score as they are using VEP or ANNOVAR for variant analysis.

if (!variantsLoaded && sample.hasVcf()) {
try (Stream<VariantEvaluation> variantStream = variantFactory.createVariantEvaluations()) {
variantEvaluations = variantStream.collect(Collectors.toUnmodifiableList());
}
assignVariantsToGenes(variantEvaluations, allGenes);
variantsLoaded = true;
}

List<Gene> genesToScore = variantsLoaded ? getGenesWithVariants(allGenes) : List.copyOf(allGenes.values());
// If no variant steps have been run and there is a VCF present, don't load it here - See issues #129, #478
List<Gene> genesToScore = variantsLoaded ? getGenesWithVariants(allGenes) : allGenes.values().stream().filter(genesToScore()).collect(Collectors.toUnmodifiableList());
// Temporarily add a new PValueGeneScorer so as not to break semver will revert to RawScoreGeneScorer in 14.0.0
CombinedScorePvalueCalculator combinedScorePvalueCalculator = buildCombinedScorePvalueCalculator(sample, analysis, genesToScore.size());
GeneScorer geneScorer = new PvalueGeneScorer(probandIdentifier, sample.getSex(), inheritanceModeAnnotator, combinedScorePvalueCalculator);
Expand All @@ -146,9 +136,7 @@ public AnalysisResults run(Sample sample, Analysis analysis) {
List<Gene> genes = geneScorer.scoreGenes(genesToScore);
List<VariantEvaluation> variants = variantsLoaded ? getFinalVariantList(variantEvaluations) : List.of();

logger.info("Analysed {} genes containing {} filtered variants", genes.size(), variants.size());

logger.info("Creating analysis results from VCF {}", sample.getVcfPath());
logger.info("Analysed sample {} with {} genes containing {} filtered variants", probandIdentifier, genes.size(), variants.size());
AnalysisResults analysisResults = AnalysisResults.builder()
// TODO: add FilterStats? - would make HTML output more meaningful
.sample(sample)
Expand All @@ -164,6 +152,25 @@ public AnalysisResults run(Sample sample, Analysis analysis) {
return analysisResults;
}

private void logWarningIfSubOptimalAnalysisSumbitted(List<AnalysisGroup> analysisStepGroups) {
boolean hasPrioritiserStep = false;
boolean hasVariantFilterStep = false;
for (AnalysisGroup analysisGroup : analysisStepGroups) {
if (analysisGroup.isVariantFilterGroup()) {
hasVariantFilterStep = true;
}
if (analysisGroup.hasPrioritiserStep()) {
hasPrioritiserStep = true;
}
}
if (!hasPrioritiserStep) {
logger.warn("RUNNING AN ANALYSIS WITHOUT ANY PHENOTYPE PRIORITISATION WILL LEAD TO SUB-OPTIMAL RESULTS!");
}
if (!hasVariantFilterStep) {
logger.warn("RUNNING AN ANALYSIS WITHOUT ANY VARIANT FILTERING WILL LEAD TO SUB-OPTIMAL RESULTS!");
}
}

private CombinedScorePvalueCalculator buildCombinedScorePvalueCalculator(Sample sample, Analysis analysis, int numFilteredGenes) {
var prioritiser = analysis.getMainPrioritiser();
List<Gene> knownGenes = genomeAnalysisService.getKnownGenes();
Expand Down Expand Up @@ -276,6 +283,8 @@ private UnaryOperator<VariantEvaluation> flagWhiteListedVariants() {
*/
abstract Predicate<VariantEvaluation> runVariantFilters(List<VariantFilter> variantFilters, FilterStats filterStats);

protected abstract Predicate<Gene> genesToScore();

private void assignVariantsToGenes(List<VariantEvaluation> variantEvaluations, Map<String, Gene> allGenes) {
for (VariantEvaluation variantEvaluation : variantEvaluations) {
Gene gene = allGenes.get(variantEvaluation.getGeneSymbol());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,4 +95,9 @@ protected List<VariantEvaluation> getFinalVariantList(List<VariantEvaluation> va
.filter(VariantEvaluation::passedFilters)
.collect(Collectors.toUnmodifiableList());
}

@Override
protected Predicate<Gene> genesToScore() {
return Gene::passedFilters;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,9 @@ protected Predicate<VariantEvaluation> runVariantFilters(List<VariantFilter> var
protected List<VariantEvaluation> getFinalVariantList(List<VariantEvaluation> variants) {
return List.copyOf(variants);
}

@Override
protected Predicate<Gene> genesToScore() {
return gene -> true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ private List<GeneScore> calculateRankedGeneScores() {
.variantScore(0)
.build();
compatibleGeneScores.add(geneScore);
} else if (!gene.hasVariants() && compatibleGeneScores.isEmpty()) {
// in the case of a phenotype-only analysis there wil be no variants loaded which will result in
// an empty genes.tsv file. To avoid this, we want to add the ANY MOI score. The combined score
// will only be zero for genes where both the phenotypeScore and variantScore are zero.
compatibleGeneScores.add(gene.getGeneScoreForMode(ModeOfInheritance.ANY));
}
return compatibleGeneScores.stream();
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,23 @@
import org.junit.jupiter.api.Test;
import org.monarchinitiative.exomiser.core.analysis.*;
import org.monarchinitiative.exomiser.core.analysis.sample.Sample;
import org.monarchinitiative.exomiser.core.analysis.util.InheritanceModeOptions;
import org.monarchinitiative.exomiser.core.genome.GenomeAnalysisService;
import org.monarchinitiative.exomiser.core.genome.GenomeAnalysisServiceProvider;
import org.monarchinitiative.exomiser.core.genome.GenomeAssembly;
import org.monarchinitiative.exomiser.core.genome.TestFactory;
import org.monarchinitiative.exomiser.core.model.frequency.FrequencySource;
import org.monarchinitiative.exomiser.core.model.pathogenicity.PathogenicitySource;
import org.monarchinitiative.exomiser.core.phenotype.service.OntologyService;
import org.monarchinitiative.exomiser.core.phenotype.service.TestOntologyService;
import org.monarchinitiative.exomiser.core.prioritisers.PriorityFactory;
import org.monarchinitiative.exomiser.core.prioritisers.PriorityFactoryImpl;
import org.monarchinitiative.exomiser.core.prioritisers.service.TestPriorityServiceFactory;
import org.monarchinitiative.exomiser.core.prioritisers.util.DataMatrix;

import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.EnumSet;
import java.util.List;

import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.instanceOf;
Expand All @@ -51,37 +56,48 @@
*/
public class ExomiserTest {

private static final Path VCF_PATH = Paths.get("src/test/resources/smallTest.vcf");

private final GenomeAnalysisServiceProvider genomeAnalysisServiceProvider = new GenomeAnalysisServiceProvider(TestFactory
.buildDefaultHg19GenomeAnalysisService());
private final PriorityFactory priorityFactory = new PriorityFactoryImpl(TestPriorityServiceFactory.testPriorityService(), null, null);
private final PriorityFactory priorityFactory = new PriorityFactoryImpl(TestPriorityServiceFactory.testPriorityService(), DataMatrix.empty(), null);
private final OntologyService ontologyService = TestOntologyService.builder().build();

private final AnalysisFactory analysisFactory = new AnalysisFactory(genomeAnalysisServiceProvider, priorityFactory, ontologyService);
//AnalysisFactory is only ever used here, but it provides a clean interface to the Analysis module
private final Exomiser instance = new Exomiser(analysisFactory);

private final Sample sample = Sample.builder().vcfPath(VCF_PATH).build();
private final Sample sample = Sample.builder()
.probandSampleName("manuel")
.hpoIds(List.of("HP:0001156", "HP:0001363", "HP:0011304", "HP:0010055"))
.vcfPath(Paths.get("src/test/resources/exomiser-test.vcf"))
.genomeAssembly(GenomeAssembly.HG19)
.build();

private Analysis makeAnalysisWithMode(AnalysisMode analysisMode) {
return instance.getAnalysisBuilder()
.analysisMode(analysisMode)
.inheritanceModes(InheritanceModeOptions.defaults())
.frequencySources(FrequencySource.ALL_EXTERNAL_FREQ_SOURCES)
.pathogenicitySources(EnumSet.of(PathogenicitySource.REVEL, PathogenicitySource.MVP))
.addFrequencyFilter(0.01f)
.addPathogenicityFilter(true)
.addInheritanceFilter()
.addOmimPrioritiser()
.addHiPhivePrioritiser()
.build();
}

@Test
public void canRunAnalysisFull() {
Analysis analysis = makeAnalysisWithMode(AnalysisMode.FULL);
AnalysisResults analysisResults = instance.run(sample, analysis);
assertThat(analysisResults.getGenes().size(), equalTo(2));
assertThat(analysisResults.getGenes().size(), equalTo(3));
}

@Test
public void canRunAnalysisPassOnly() {
Analysis analysis = makeAnalysisWithMode(AnalysisMode.PASS_ONLY);
AnalysisResults analysisResults = instance.run(sample, analysis);
assertThat(analysisResults.getGenes().size(), equalTo(2));
assertThat(analysisResults.getGenes().size(), equalTo(3));
}

@Test
Expand All @@ -93,29 +109,28 @@ public void canRunAnalysisUsingAlternateGenomeAssemblyPassOnly() {
AnalysisFactory analysisFactory = new AnalysisFactory(twoAssemblyProvider, priorityFactory, ontologyService);

Exomiser twoAssembliesSupportedExomiser = new Exomiser(analysisFactory);
Analysis analysis = makeAnalysisWithMode(AnalysisMode.PASS_ONLY);

Sample hg37Sample = Sample.builder()
.vcfPath(VCF_PATH)
.from(sample)
.genomeAssembly(GenomeAssembly.HG19)
.build();

Analysis hg37Analysis = twoAssembliesSupportedExomiser.getAnalysisBuilder()
.analysisMode(AnalysisMode.PASS_ONLY)
.build();
AnalysisResults hg37AnalysisResults = twoAssembliesSupportedExomiser.run(hg37Sample, hg37Analysis);
assertThat(hg37AnalysisResults.getGenes().size(), equalTo(2));
AnalysisResults hg37AnalysisResults = twoAssembliesSupportedExomiser.run(hg37Sample, analysis);
assertThat(hg37AnalysisResults.getSample().getGenomeAssembly(), equalTo(GenomeAssembly.HG19));
assertThat(hg37AnalysisResults.getGenes().size(), equalTo(3));
assertThat(hg37AnalysisResults.getVariantEvaluations().size(), equalTo(4));


Sample hg38Sample = Sample.builder()
.vcfPath(VCF_PATH)
.from(sample)
.genomeAssembly(GenomeAssembly.HG38)
.build();

Analysis hg38Analysis = twoAssembliesSupportedExomiser.getAnalysisBuilder()
.analysisMode(AnalysisMode.PASS_ONLY)
.build();
AnalysisResults hg38AnalysisResults = twoAssembliesSupportedExomiser.run(hg38Sample, hg38Analysis);
assertThat(hg38AnalysisResults.getGenes().size(), equalTo(2));
AnalysisResults hg38AnalysisResults = twoAssembliesSupportedExomiser.run(hg38Sample, analysis);
assertThat(hg38AnalysisResults.getSample().getGenomeAssembly(), equalTo(GenomeAssembly.HG38));
assertThat(hg38AnalysisResults.getGenes().size(), equalTo(3));
assertThat(hg38AnalysisResults.getVariantEvaluations().size(), equalTo(4));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.monarchinitiative.exomiser.core.model.*;
import org.monarchinitiative.exomiser.core.model.frequency.FrequencySource;
import org.monarchinitiative.exomiser.core.prioritisers.MockPrioritiser;
import org.monarchinitiative.exomiser.core.prioritisers.NoneTypePrioritiser;
import org.monarchinitiative.exomiser.core.prioritisers.Prioritiser;
import org.monarchinitiative.exomiser.core.prioritisers.PriorityType;

Expand All @@ -57,20 +58,12 @@ public class PassOnlyAnalysisRunnerTest extends AnalysisRunnerTestBase {
private final PassOnlyAnalysisRunner instance = new PassOnlyAnalysisRunner(genomeAnalysisService);

@Test
public void testRunAnalysisNoFiltersNoPrioritisers() {
public void testRunAnalysisNoFiltersNoPrioritisersThrowsException() {
Sample sample = vcfOnlySample;
Analysis analysis = makeAnalysis();

AnalysisResults analysisResults = instance.run(sample, analysis);

printResults(analysisResults);
assertThat(analysisResults.getGenes().size(), equalTo(2));
for (Gene gene : analysisResults.getGenes()) {
assertThat(gene.passedFilters(), is(true));
for (VariantEvaluation variantEvaluation : gene.getVariantEvaluations()) {
assertThat(variantEvaluation.getFilterStatus(), equalTo(FilterStatus.UNFILTERED));
}
}
var exception = assertThrows(IllegalStateException.class, () -> instance.run(sample, analysis));
assertThat(exception.getMessage(), equalTo("No analysis steps specified!"));
}

@Test
Expand Down Expand Up @@ -192,19 +185,10 @@ public void testRunAnalysisWhenProbandSampleNameIsNotInSingleSampleVcf() {
.vcfPath(vcfPath)
.probandSampleName("mickyMouse")
.build();
Analysis analysis = Analysis.builder()
.build();
assertThrows(IllegalStateException.class, () -> instance.run(sample, analysis));
}
Analysis analysis = makeAnalysis(new QualityFilter(120), new NoneTypePrioritiser());

@Test
public void testRunAnalysisWhenProbandSampleNameIsNotSpecifiedAndHaveSingleSampleVcf() {
Sample sample = Sample.builder()
.vcfPath(vcfPath)
.build();
Analysis analysis = Analysis.builder()
.build();
instance.run(sample, analysis);
var exception = assertThrows(IllegalStateException.class, () -> instance.run(sample, analysis));
assertThat(exception.getMessage(), equalTo("Proband sample name 'mickyMouse' is not found in the VCF sample. Expected one of [manuel]. Please check your sample and analysis files match."));
}

@Test
Expand All @@ -213,9 +197,10 @@ public void testRunAnalysisWhenProbandSampleNameIsNotInMultiSampleVcf() {
.vcfPath(TestPedigrees.trioVcfPath())
.probandSampleName("mickyMouse")
.build();
Analysis analysis = Analysis.builder()
.build();
assertThrows(IllegalStateException.class, () -> instance.run(sample, analysis));
Analysis analysis = makeAnalysis(new QualityFilter(120), new NoneTypePrioritiser());

var exception = assertThrows(IllegalStateException.class, () -> instance.run(sample, analysis));
assertThat(exception.getMessage(), equalTo("Proband sample name 'mickyMouse' is not found in the VCF sample. Expected one of [Seth, Adam, Eva]. Please check your sample and analysis files match."));
}

@Test
Expand Down Expand Up @@ -277,6 +262,7 @@ public void testRunAnalysisPrioritiserAndPriorityScoreFilterOnly() {
assertThat(passedGene.passedFilters(), is(true));
assertThat(passedGene.getEntrezGeneID(), equalTo(9939));
assertThat(passedGene.getPriorityScore(), equalTo(desiredPrioritiserScore));
assertThat(passedGene.hasVariants(), equalTo(false));
System.out.println(passedGene.getGeneScores()); }

@Test
Expand Down
Loading

0 comments on commit e873ef9

Please sign in to comment.