From c043bfee313e1483280e754b33f1cfa82cd8b2e4 Mon Sep 17 00:00:00 2001 From: Fabian Engelniederhammer Date: Wed, 13 Dec 2023 17:32:22 +0100 Subject: [PATCH] feat: implement leftover variant query parts #498 --- .../model/variantqueryparser/VariantQuery.g4 | 6 +- .../genspectrum/lapis/model/CovidSpecifics.kt | 12 +++ .../lapis/model/VariantQueryCustomListener.kt | 73 ++++++++++---- .../lapis/request/AminoAcidInsertion.kt | 5 +- .../lapis/request/NucleotideInsertion.kt | 8 +- .../lapis/model/VariantQueryFacadeTest.kt | 97 ++++++++++++------- siloLapisTests/test/details.spec.ts | 10 +- .../testData/small_metadata_set.tsv | 2 +- .../testData/testDatabaseConfig.yaml | 4 +- 9 files changed, 149 insertions(+), 68 deletions(-) create mode 100644 lapis2/src/main/kotlin/org/genspectrum/lapis/model/CovidSpecifics.kt diff --git a/lapis2/src/main/antlr/org/genspectrum/lapis/model/variantqueryparser/VariantQuery.g4 b/lapis2/src/main/antlr/org/genspectrum/lapis/model/variantqueryparser/VariantQuery.g4 index fa792f782..bc33c423f 100644 --- a/lapis2/src/main/antlr/org/genspectrum/lapis/model/variantqueryparser/VariantQuery.g4 +++ b/lapis2/src/main/antlr/org/genspectrum/lapis/model/variantqueryparser/VariantQuery.g4 @@ -45,7 +45,8 @@ nOfMatchExactly: 'EXACTLY-' | 'exactly-'; nOfNumberOfMatchers: NUMBER+; nOfExprs: expr (',' expr)*; -nucleotideInsertionQuery: insertionKeyword position ':' (possibleAmbiguousNucleotideSymbol | '?')+; +nucleotideInsertionQuery: insertionKeyword position ':' nucleotideInsertionSymbol+; +nucleotideInsertionSymbol: possibleAmbiguousNucleotideSymbol | '?'; insertionKeyword: 'ins_' | 'INS_'; aaMutationQuery: gene ':' aaSymbol? position possiblyAmbiguousAaSymbol?; @@ -55,7 +56,8 @@ possiblyAmbiguousAaSymbol: aaSymbol | ambiguousAaSymbol; gene: covidGene; covidGene : E | M | N | S | ORF; -aaInsertionQuery: insertionKeyword gene ':' position ':' (possiblyAmbiguousAaSymbol | '?')+; +aaInsertionQuery: insertionKeyword gene ':' position ':' aaInsertionSymbol+; +aaInsertionSymbol: possiblyAmbiguousAaSymbol | '?'; nextcladePangolineageQuery: nextcladePangoLineagePrefix pangolineageQuery; nextcladePangoLineagePrefix: 'nextcladePangoLineage:' | 'NEXTCLADEPANGOLINEAGE:'; diff --git a/lapis2/src/main/kotlin/org/genspectrum/lapis/model/CovidSpecifics.kt b/lapis2/src/main/kotlin/org/genspectrum/lapis/model/CovidSpecifics.kt new file mode 100644 index 000000000..523bc6e91 --- /dev/null +++ b/lapis2/src/main/kotlin/org/genspectrum/lapis/model/CovidSpecifics.kt @@ -0,0 +1,12 @@ +package org.genspectrum.lapis.model + +/** + * Those are special values used for the COVID instance that supports advanced "variant queries". + */ + +const val PANGO_LINEAGE_COLUMN = "pangoLineage" +const val NEXTCLADE_PANGO_LINEAGE_COLUMN = "nextcladePangoLineage" +const val NEXTSTRAIN_CLADE_COLUMN = "nextstrainClade" +const val GISAID_CLADE_COLUMN = "gisaidClade" + +const val NEXTSTRAIN_CLADE_RECOMBINANT = "RECOMBINANT" diff --git a/lapis2/src/main/kotlin/org/genspectrum/lapis/model/VariantQueryCustomListener.kt b/lapis2/src/main/kotlin/org/genspectrum/lapis/model/VariantQueryCustomListener.kt index 05a2d12a0..2a24e8f31 100644 --- a/lapis2/src/main/kotlin/org/genspectrum/lapis/model/VariantQueryCustomListener.kt +++ b/lapis2/src/main/kotlin/org/genspectrum/lapis/model/VariantQueryCustomListener.kt @@ -1,10 +1,10 @@ package org.genspectrum.lapis.model import VariantQueryBaseListener +import VariantQueryParser import VariantQueryParser.AaInsertionQueryContext import VariantQueryParser.AaMutationQueryContext import VariantQueryParser.AndContext -import VariantQueryParser.GisaidCladeLineageQueryContext import VariantQueryParser.MaybeContext import VariantQueryParser.NOfQueryContext import VariantQueryParser.NextcladePangolineageQueryContext @@ -14,7 +14,11 @@ import VariantQueryParser.NucleotideInsertionQueryContext import VariantQueryParser.NucleotideMutationQueryContext import VariantQueryParser.OrContext import VariantQueryParser.PangolineageQueryContext +import org.antlr.v4.runtime.RuleContext import org.antlr.v4.runtime.tree.ParseTreeListener +import org.genspectrum.lapis.request.LAPIS_INSERTION_AMBIGUITY_SYMBOL +import org.genspectrum.lapis.request.SILO_INSERTION_AMBIGUITY_SYMBOL +import org.genspectrum.lapis.silo.AminoAcidInsertionContains import org.genspectrum.lapis.silo.AminoAcidSymbolEquals import org.genspectrum.lapis.silo.And import org.genspectrum.lapis.silo.HasAminoAcidMutation @@ -22,10 +26,12 @@ import org.genspectrum.lapis.silo.HasNucleotideMutation import org.genspectrum.lapis.silo.Maybe import org.genspectrum.lapis.silo.NOf import org.genspectrum.lapis.silo.Not +import org.genspectrum.lapis.silo.NucleotideInsertionContains import org.genspectrum.lapis.silo.NucleotideSymbolEquals import org.genspectrum.lapis.silo.Or import org.genspectrum.lapis.silo.PangoLineageEquals import org.genspectrum.lapis.silo.SiloFilterExpression +import org.genspectrum.lapis.silo.StringEquals class VariantQueryCustomListener : VariantQueryBaseListener(), ParseTreeListener { private val expressionStack = ArrayDeque() @@ -48,15 +54,8 @@ class VariantQueryCustomListener : VariantQueryBaseListener(), ParseTreeListener expressionStack.addLast(expression) } - override fun enterPangolineageQuery(ctx: PangolineageQueryContext?) { - if (ctx == null) { - return - } - val pangolineage = ctx.pangolineage().text - val includeSublineages = ctx.pangolineageIncludeSublineages() != null - - val expr = PangoLineageEquals("pango_lineage", pangolineage, includeSublineages) - expressionStack.addLast(expr) + override fun enterPangolineageQuery(ctx: PangolineageQueryContext) { + addPangoLineage(ctx, PANGO_LINEAGE_COLUMN) } override fun exitAnd(ctx: AndContext?) { @@ -95,8 +94,14 @@ class VariantQueryCustomListener : VariantQueryBaseListener(), ParseTreeListener expressionStack.addLast(NOf(n, matchExactly, children.reversed())) } - override fun enterNucleotideInsertionQuery(ctx: NucleotideInsertionQueryContext?) { - throw SiloNotImplementedError("Nucleotide insertions are not supported yet.", NotImplementedError()) + override fun enterNucleotideInsertionQuery(ctx: NucleotideInsertionQueryContext) { + val value = ctx.nucleotideInsertionSymbol().joinToString("", transform = ::mapInsertionSymbol) + expressionStack.addLast( + NucleotideInsertionContains( + ctx.position().text.toInt(), + value, + ), + ) } override fun enterAaMutationQuery(ctx: AaMutationQueryContext?) { @@ -113,21 +118,49 @@ class VariantQueryCustomListener : VariantQueryBaseListener(), ParseTreeListener expressionStack.addLast(expression) } - override fun enterAaInsertionQuery(ctx: AaInsertionQueryContext?) { - throw SiloNotImplementedError("Amino acid insertions are not supported yet.", NotImplementedError()) + override fun enterAaInsertionQuery(ctx: AaInsertionQueryContext) { + val value = ctx.aaInsertionSymbol().joinToString("", transform = ::mapInsertionSymbol) + expressionStack.addLast( + AminoAcidInsertionContains( + ctx.position().text.toInt(), + value, + ctx.gene().text, + ), + ) + } + + override fun enterNextcladePangolineageQuery(ctx: NextcladePangolineageQueryContext) { + addPangoLineage(ctx.pangolineageQuery(), NEXTCLADE_PANGO_LINEAGE_COLUMN) } - override fun enterNextcladePangolineageQuery(ctx: NextcladePangolineageQueryContext?) { - throw SiloNotImplementedError("Nextclade pango lineages are not supported yet.", NotImplementedError()) + override fun enterNextstrainCladeQuery(ctx: NextstrainCladeQueryContext) { + val value = when (ctx.text) { + NEXTSTRAIN_CLADE_RECOMBINANT -> ctx.text.lowercase() + else -> ctx.text + } + expressionStack.addLast(StringEquals(NEXTSTRAIN_CLADE_COLUMN, value)) } - override fun enterNextstrainCladeQuery(ctx: NextstrainCladeQueryContext?) { - throw SiloNotImplementedError("Nextstrain clade lineages are not supported yet.", NotImplementedError()) + override fun enterGisaidCladeNomenclature(ctx: VariantQueryParser.GisaidCladeNomenclatureContext) { + expressionStack.addLast(StringEquals(GISAID_CLADE_COLUMN, ctx.text)) } - override fun enterGisaidCladeLineageQuery(ctx: GisaidCladeLineageQueryContext?) { - throw SiloNotImplementedError("Gisaid clade lineages are not supported yet.", NotImplementedError()) + private fun addPangoLineage( + ctx: PangolineageQueryContext, + pangoLineageColumnName: String, + ) { + val pangolineage = ctx.pangolineage().text + val includeSublineages = ctx.pangolineageIncludeSublineages() != null + + val expr = PangoLineageEquals(pangoLineageColumnName, pangolineage, includeSublineages) + expressionStack.addLast(expr) } } +fun mapInsertionSymbol(ctx: RuleContext): String = + when (ctx.text) { + LAPIS_INSERTION_AMBIGUITY_SYMBOL -> SILO_INSERTION_AMBIGUITY_SYMBOL + else -> ctx.text + } + class SiloNotImplementedError(message: String?, cause: Throwable?) : Exception(message, cause) diff --git a/lapis2/src/main/kotlin/org/genspectrum/lapis/request/AminoAcidInsertion.kt b/lapis2/src/main/kotlin/org/genspectrum/lapis/request/AminoAcidInsertion.kt index ae9ae6308..8c14c85ac 100644 --- a/lapis2/src/main/kotlin/org/genspectrum/lapis/request/AminoAcidInsertion.kt +++ b/lapis2/src/main/kotlin/org/genspectrum/lapis/request/AminoAcidInsertion.kt @@ -26,7 +26,10 @@ data class AminoAcidInsertion(val position: Int, val gene: String, val insertion "Invalid amino acid insertion: $aminoAcidInsertion: Did not find gene", ) - val insertions = matchGroups["insertions"]?.value?.replace("?", ".*") + val insertions = matchGroups["insertions"]?.value?.replace( + LAPIS_INSERTION_AMBIGUITY_SYMBOL, + SILO_INSERTION_AMBIGUITY_SYMBOL, + ) ?: throw BadRequestException( "Invalid amino acid insertion: $aminoAcidInsertion: Did not find insertions", ) diff --git a/lapis2/src/main/kotlin/org/genspectrum/lapis/request/NucleotideInsertion.kt b/lapis2/src/main/kotlin/org/genspectrum/lapis/request/NucleotideInsertion.kt index 687a5ddd2..a4eb01377 100644 --- a/lapis2/src/main/kotlin/org/genspectrum/lapis/request/NucleotideInsertion.kt +++ b/lapis2/src/main/kotlin/org/genspectrum/lapis/request/NucleotideInsertion.kt @@ -8,6 +8,9 @@ import org.springframework.boot.jackson.JsonComponent import org.springframework.core.convert.converter.Converter import org.springframework.stereotype.Component +const val LAPIS_INSERTION_AMBIGUITY_SYMBOL = "?" +const val SILO_INSERTION_AMBIGUITY_SYMBOL = ".*" + data class NucleotideInsertion(val position: Int, val insertions: String, val segment: String?) { companion object { fun fromString(nucleotideInsertion: String): NucleotideInsertion { @@ -21,7 +24,10 @@ data class NucleotideInsertion(val position: Int, val insertions: String, val se "Invalid nucleotide insertion: $nucleotideInsertion: Did not find position", ) - val insertions = matchGroups["insertions"]?.value?.replace("?", ".*") + val insertions = matchGroups["insertions"]?.value?.replace( + LAPIS_INSERTION_AMBIGUITY_SYMBOL, + SILO_INSERTION_AMBIGUITY_SYMBOL, + ) ?: throw BadRequestException( "Invalid nucleotide insertion: $nucleotideInsertion: Did not find insertions", ) diff --git a/lapis2/src/test/kotlin/org/genspectrum/lapis/model/VariantQueryFacadeTest.kt b/lapis2/src/test/kotlin/org/genspectrum/lapis/model/VariantQueryFacadeTest.kt index 36977d81d..75a294ed9 100644 --- a/lapis2/src/test/kotlin/org/genspectrum/lapis/model/VariantQueryFacadeTest.kt +++ b/lapis2/src/test/kotlin/org/genspectrum/lapis/model/VariantQueryFacadeTest.kt @@ -1,5 +1,6 @@ package org.genspectrum.lapis.model +import org.genspectrum.lapis.silo.AminoAcidInsertionContains import org.genspectrum.lapis.silo.AminoAcidSymbolEquals import org.genspectrum.lapis.silo.And import org.genspectrum.lapis.silo.HasAminoAcidMutation @@ -7,14 +8,15 @@ import org.genspectrum.lapis.silo.HasNucleotideMutation import org.genspectrum.lapis.silo.Maybe import org.genspectrum.lapis.silo.NOf import org.genspectrum.lapis.silo.Not +import org.genspectrum.lapis.silo.NucleotideInsertionContains import org.genspectrum.lapis.silo.NucleotideSymbolEquals import org.genspectrum.lapis.silo.Or import org.genspectrum.lapis.silo.PangoLineageEquals +import org.genspectrum.lapis.silo.StringEquals import org.hamcrest.MatcherAssert.assertThat import org.hamcrest.Matchers.equalTo import org.junit.jupiter.api.BeforeEach import org.junit.jupiter.api.Test -import org.junit.jupiter.api.assertThrows class VariantQueryFacadeTest { private lateinit var underTest: VariantQueryFacade @@ -74,7 +76,7 @@ class VariantQueryFacadeTest { ), ), ), - PangoLineageEquals("pango_lineage", "A.1.2.3", true), + PangoLineageEquals(PANGO_LINEAGE_COLUMN, "A.1.2.3", true), ), ) @@ -196,7 +198,7 @@ class VariantQueryFacadeTest { val result = underTest.map(variantQuery) - val expectedResult = PangoLineageEquals("pango_lineage", "A.1.2.3", false) + val expectedResult = PangoLineageEquals(PANGO_LINEAGE_COLUMN, "A.1.2.3", false) assertThat(result, equalTo(expectedResult)) } @@ -207,7 +209,18 @@ class VariantQueryFacadeTest { val result = underTest.map(variantQuery) - val expectedResult = PangoLineageEquals("pango_lineage", "A.1.2.3", true) + val expectedResult = PangoLineageEquals(PANGO_LINEAGE_COLUMN, "A.1.2.3", true) + assertThat(result, equalTo(expectedResult)) + } + + @Test + @Suppress("ktlint:standard:max-line-length") + fun `given a variantQuery with a 'NextcladePangolineage' expression then map should return the corresponding SiloQuery`() { + val variantQuery = "nextcladePangoLineage:A.1.2.3*" + + val result = underTest.map(variantQuery) + + val expectedResult = PangoLineageEquals(NEXTCLADE_PANGO_LINEAGE_COLUMN, "A.1.2.3", true) assertThat(result, equalTo(expectedResult)) } @@ -248,19 +261,25 @@ class VariantQueryFacadeTest { } @Test - fun `given a variantQuery with a 'Insertion' expression then map should throw an error`() { + fun `given a variantQuery with a 'Insertion' expression then returns SILO query`() { val variantQuery = "ins_1234:GAG" - val exception = assertThrows { underTest.map(variantQuery) } + val result = underTest.map(variantQuery) + + assertThat(result, equalTo(NucleotideInsertionContains(1234, "GAG"))) + } - assertThat( - exception.message, - equalTo("Nucleotide insertions are not supported yet."), - ) + @Test + fun `given a variantQuery with a 'Insertion' with wildcard expression then returns SILO query`() { + val variantQuery = "ins_1234:G?A?G" + + val result = underTest.map(variantQuery) + + assertThat(result, equalTo(NucleotideInsertionContains(1234, "G.*A.*G"))) } @Test - fun `given amino acidAA mutation expression then should map to AminoAcidSymbolEquals`() { + fun `given amino acid mutation expression then should map to AminoAcidSymbolEquals`() { val variantQuery = "S:N501Y" val result = underTest.map(variantQuery) @@ -296,50 +315,56 @@ class VariantQueryFacadeTest { } @Test - fun `given a valid variantQuery with a 'AA insertion' expression then map should throw an error`() { + fun `given a valid variantQuery with a 'AA insertion' expression then returns SILO query`() { val variantQuery = "ins_S:501:EPE" - val exception = assertThrows { underTest.map(variantQuery) } + val result = underTest.map(variantQuery) - assertThat( - exception.message, - equalTo("Amino acid insertions are not supported yet."), - ) + assertThat(result, equalTo(AminoAcidInsertionContains(501, "EPE", "S"))) } @Test - fun `given a valid variantQuery with a 'nextclade pango lineage' expression then map should throw an error`() { - val variantQuery = "nextcladePangoLineage:BA.5*" + fun `given a valid variantQuery with a 'AA insertion' with wildcard then returns SILO query`() { + val variantQuery = "ins_S:501:E?E?" - val exception = assertThrows { underTest.map(variantQuery) } + val result = underTest.map(variantQuery) - assertThat( - exception.message, - equalTo("Nextclade pango lineages are not supported yet."), - ) + assertThat(result, equalTo(AminoAcidInsertionContains(501, "E.*E.*", "S"))) } @Test - fun `given a valid variantQuery with a 'Nextstrain clade lineage' expression then map should throw an error`() { + fun `given a valid variantQuery with a 'NextstrainCladeLineage' expression then returns SILO query`() { val variantQuery = "nextstrainClade:22B" - val exception = assertThrows { underTest.map(variantQuery) } + val result = underTest.map(variantQuery) - assertThat( - exception.message, - equalTo("Nextstrain clade lineages are not supported yet."), - ) + assertThat(result, equalTo(StringEquals(NEXTSTRAIN_CLADE_COLUMN, "22B"))) + } + + @Test + fun `given a valid variantQuery with a 'NextstrainCladeLineage' recombinant expression then returns SILO query`() { + val variantQuery = "nextstrainClade:RECOMBINANT" + + val result = underTest.map(variantQuery) + + assertThat(result, equalTo(StringEquals(NEXTSTRAIN_CLADE_COLUMN, "recombinant"))) } @Test - fun `given a valid variantQuery with a 'Gisaid clade lineage' expression then map should throw an error`() { + fun `given a valid variantQuery with a single letter 'GisaidCladeLineage' expression then returns SILO query`() { + val variantQuery = "gisaid:X" + + val result = underTest.map(variantQuery) + + assertThat(result, equalTo(StringEquals(GISAID_CLADE_COLUMN, "X"))) + } + + @Test + fun `given a valid variantQuery with a 'GisaidCladeLineage' expression then returns SILO query`() { val variantQuery = "gisaid:AB" - val exception = assertThrows { underTest.map(variantQuery) } + val result = underTest.map(variantQuery) - assertThat( - exception.message, - equalTo("Gisaid clade lineages are not supported yet."), - ) + assertThat(result, equalTo(StringEquals(GISAID_CLADE_COLUMN, "AB"))) } } diff --git a/siloLapisTests/test/details.spec.ts b/siloLapisTests/test/details.spec.ts index ad896fa56..7abf7c575 100644 --- a/siloLapisTests/test/details.spec.ts +++ b/siloLapisTests/test/details.spec.ts @@ -8,7 +8,7 @@ describe('The /details endpoint', () => { const result = await lapisClient.postDetails1({ detailsPostRequest: { pangoLineage: 'B.1.617.2', - fields: ['pango_lineage', 'division'], + fields: ['pangoLineage', 'division'], }, }); @@ -98,7 +98,7 @@ describe('The /details endpoint', () => { it('should return the data as CSV', async () => { const urlParams = new URLSearchParams({ - fields: 'gisaid_epi_isl,pango_lineage,division', + fields: 'gisaid_epi_isl,pangoLineage,division', orderBy: 'gisaid_epi_isl', limit: '3', dataFormat: 'csv', @@ -108,7 +108,7 @@ describe('The /details endpoint', () => { expect(await result.text()).to.be.equal( String.raw` -division,gisaid_epi_isl,pango_lineage +division,gisaid_epi_isl,pangoLineage Vaud,EPI_ISL_1001493,B.1.177.44 Bern,EPI_ISL_1001920,B.1.177 Solothurn,EPI_ISL_1002052,B.1 @@ -118,7 +118,7 @@ Solothurn,EPI_ISL_1002052,B.1 it('should return the data as TSV', async () => { const urlParams = new URLSearchParams({ - fields: 'gisaid_epi_isl,pango_lineage,division', + fields: 'gisaid_epi_isl,pangoLineage,division', orderBy: 'gisaid_epi_isl', limit: '3', dataFormat: 'tsv', @@ -128,7 +128,7 @@ Solothurn,EPI_ISL_1002052,B.1 expect(await result.text()).to.be.equal( String.raw` -division gisaid_epi_isl pango_lineage +division gisaid_epi_isl pangoLineage Vaud EPI_ISL_1001493 B.1.177.44 Bern EPI_ISL_1001920 B.1.177 Solothurn EPI_ISL_1002052 B.1 diff --git a/siloLapisTests/testData/small_metadata_set.tsv b/siloLapisTests/testData/small_metadata_set.tsv index 7b2bcc753..510847594 100644 --- a/siloLapisTests/testData/small_metadata_set.tsv +++ b/siloLapisTests/testData/small_metadata_set.tsv @@ -1,4 +1,4 @@ -gisaid_epi_isl pango_lineage date region country division unsorted_date age qc_value insertions aaInsertions +gisaid_epi_isl pangoLineage date region country division unsorted_date age qc_value insertions aaInsertions EPI_ISL_1408408 B.1.1.7 2021-03-18 Europe Switzerland Basel-Land 4 0.98 S:214:EPE EPI_ISL_1749899 B.1.1.7 2021-04-13 Europe Switzerland Bern 2020-03-08 5 0.97 EPI_ISL_2016901 B.1.1.7 2021-04-25 Europe Switzerland Aargau 2021-01-29 6 0.96 diff --git a/siloLapisTests/testData/testDatabaseConfig.yaml b/siloLapisTests/testData/testDatabaseConfig.yaml index dd23ce92e..3bd20d6d4 100644 --- a/siloLapisTests/testData/testDatabaseConfig.yaml +++ b/siloLapisTests/testData/testDatabaseConfig.yaml @@ -12,7 +12,7 @@ schema: - name: country type: string generateIndex: true - - name: pango_lineage + - name: pangoLineage type: pango_lineage - name: division type: string @@ -29,4 +29,4 @@ schema: - name: sarsCoV2VariantQuery primaryKey: gisaid_epi_isl dateToSortBy: date - partitionBy: pango_lineage + partitionBy: pangoLineage