Skip to content

Commit

Permalink
feat: implement maybe mutation filters #551
Browse files Browse the repository at this point in the history
  • Loading branch information
fengelniederhammer committed Jan 11, 2024
1 parent f7bddbc commit 1860d4f
Show file tree
Hide file tree
Showing 11 changed files with 165 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ const val AMINO_ACID_INSERTIONS_ENDPOINT_DESCRIPTION =
considered."""
const val INFO_ENDPOINT_DESCRIPTION = "Returns information about LAPIS"
const val ALIGNED_AMINO_ACID_SEQUENCE_ENDPOINT_DESCRIPTION =
"""Returns a string of fasta formated aligned amino acid sequences. Only sequences matching the specified
"""Returns a string of fasta formatted aligned amino acid sequences. Only sequences matching the specified
sequence filters are considered."""
const val ALIGNED_SINGLE_SEGMENTED_NUCLEOTIDE_SEQUENCE_ENDPOINT_DESCRIPTION =
"""Returns a string of fasta formatted aligned nucleotide sequences. Only sequences matching the
Expand All @@ -44,3 +44,22 @@ const val OFFSET_DESCRIPTION =
const val FORMAT_DESCRIPTION =
"""The data format of the response. Alternatively, the data format can be specified by setting the
\"Accept\"-header. When both are specified, this parameter takes precedence."""

private const val MAYBE_DESCRIPTION = """
A mutation can be wrapped in a maybe expression "MAYBE(\<mutation\>)"
to include sequences with ambiguous symbols at the given position.
"""

const val NUCLEOTIDE_MUTATION_DESCRIPTION = """
A nucleotide mutation in the format "\<sequenceName\>?:\<fromSymbol\>?\<position\>\<toSymbol\>?".
If the sequenceName is not provided, LAPIS will use the default sequence name.
The fromSymbol is optional.
If the toSymbol is not provided, the statement means "has any mutation at the given position".
$MAYBE_DESCRIPTION
"""

const val AMINO_ACID_MUTATION_DESCRIPTION = """
A amino acid mutation in the format "\<gene\>:\<position\>\<toSymbol\>?".
If the toSymbol is not provided, the statement means "has any mutation at the given position".
$MAYBE_DESCRIPTION
"""
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import org.genspectrum.lapis.controller.BadRequestException
import org.genspectrum.lapis.request.AminoAcidInsertion
import org.genspectrum.lapis.request.AminoAcidMutation
import org.genspectrum.lapis.request.CommonSequenceFilters
import org.genspectrum.lapis.request.MaybeMutation
import org.genspectrum.lapis.request.NucleotideInsertion
import org.genspectrum.lapis.request.NucleotideMutation
import org.genspectrum.lapis.silo.AminoAcidInsertionContains
Expand All @@ -19,6 +20,7 @@ import org.genspectrum.lapis.silo.HasAminoAcidMutation
import org.genspectrum.lapis.silo.HasNucleotideMutation
import org.genspectrum.lapis.silo.IntBetween
import org.genspectrum.lapis.silo.IntEquals
import org.genspectrum.lapis.silo.Maybe
import org.genspectrum.lapis.silo.NucleotideInsertionContains
import org.genspectrum.lapis.silo.NucleotideSymbolEquals
import org.genspectrum.lapis.silo.Or
Expand Down Expand Up @@ -326,23 +328,35 @@ class SiloFilterExpressionMapper(
}

private fun toNucleotideMutationFilter(nucleotideMutation: NucleotideMutation) =
when (nucleotideMutation.symbol) {
null -> HasNucleotideMutation(nucleotideMutation.sequenceName, nucleotideMutation.position)
else -> NucleotideSymbolEquals(
nucleotideMutation.sequenceName,
nucleotideMutation.position,
nucleotideMutation.symbol,
)
}
wrapInMaybe(
nucleotideMutation,
when (nucleotideMutation.symbol) {
null -> HasNucleotideMutation(nucleotideMutation.sequenceName, nucleotideMutation.position)
else -> NucleotideSymbolEquals(
nucleotideMutation.sequenceName,
nucleotideMutation.position,
nucleotideMutation.symbol,
)
},
)

private fun toAminoAcidMutationFilter(aaMutation: AminoAcidMutation) =
when (aaMutation.symbol) {
null -> HasAminoAcidMutation(aaMutation.gene, aaMutation.position)
else -> AminoAcidSymbolEquals(
aaMutation.gene,
aaMutation.position,
aaMutation.symbol,
)
wrapInMaybe(
aaMutation,
when (aaMutation.symbol) {
null -> HasAminoAcidMutation(aaMutation.gene, aaMutation.position)
else -> AminoAcidSymbolEquals(
aaMutation.gene,
aaMutation.position,
aaMutation.symbol,
)
},
)

private fun wrapInMaybe(maybeMutation: MaybeMutation<*>, expression: SiloFilterExpression) =
when (maybeMutation.maybe) {
true -> Maybe(expression)
false -> expression
}

private fun toNucleotideInsertionFilter(nucleotideInsertion: NucleotideInsertion): NucleotideInsertionContains {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import org.genspectrum.lapis.config.SequenceFilterFields
import org.genspectrum.lapis.controller.AGGREGATED_GROUP_BY_FIELDS_DESCRIPTION
import org.genspectrum.lapis.controller.AMINO_ACID_INSERTIONS_PROPERTY
import org.genspectrum.lapis.controller.AMINO_ACID_MUTATIONS_PROPERTY
import org.genspectrum.lapis.controller.AMINO_ACID_MUTATION_DESCRIPTION
import org.genspectrum.lapis.controller.DETAILS_FIELDS_DESCRIPTION
import org.genspectrum.lapis.controller.FIELDS_PROPERTY
import org.genspectrum.lapis.controller.FORMAT_DESCRIPTION
Expand All @@ -23,6 +24,7 @@ import org.genspectrum.lapis.controller.LIMIT_PROPERTY
import org.genspectrum.lapis.controller.MIN_PROPORTION_PROPERTY
import org.genspectrum.lapis.controller.NUCLEOTIDE_INSERTIONS_PROPERTY
import org.genspectrum.lapis.controller.NUCLEOTIDE_MUTATIONS_PROPERTY
import org.genspectrum.lapis.controller.NUCLEOTIDE_MUTATION_DESCRIPTION
import org.genspectrum.lapis.controller.OFFSET_DESCRIPTION
import org.genspectrum.lapis.controller.OFFSET_PROPERTY
import org.genspectrum.lapis.controller.ORDER_BY_PROPERTY
Expand Down Expand Up @@ -384,18 +386,12 @@ private fun aminoAcidInsertionSchema() =
private fun nucleotideMutations() =
Schema<List<NucleotideMutation>>()
.type("array")
.description(NUCLEOTIDE_MUTATION_DESCRIPTION)
.items(
Schema<String>()
.type("string")
.example("sequence1:A123T")
.description(
"""
|A nucleotide mutation in the format "\<sequenceName\>?:\<fromSymbol\>?\<position\>\<toSymbol\>?".
|If the sequenceName is not provided, LAPIS will use the default sequence name.
|The fromSymbol is optional.
|If the toSymbol is not provided, the statement means "has any mutation at the given position".
""".trimMargin(),
),
.description(NUCLEOTIDE_MUTATION_DESCRIPTION),
)

private fun aminoAcidMutations() =
Expand All @@ -405,12 +401,7 @@ private fun aminoAcidMutations() =
Schema<String>()
.type("string")
.example("S:123T")
.description(
"""
|A amino acid mutation in the format "\<gene\>:\<position\>\<toSymbol\>?".
|If the toSymbol is not provided, the statement means "has any mutation at the given position".
""".trimMargin(),
),
.description(AMINO_ACID_MUTATION_DESCRIPTION),
)

private fun nucleotideInsertions() =
Expand Down Expand Up @@ -442,12 +433,6 @@ private fun aminoAcidInsertions() =
),
)

private fun orderByGetSchema(orderByFieldsSchema: Schema<Any>) =
Schema<List<String>>()
.type("array")
.items(orderByFieldsSchema)
.description("The fields by which the result is ordered in ascending order.")

private fun orderByPostSchema(orderByFieldsSchema: Schema<Any>) =
Schema<List<String>>()
.type("array")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,22 @@ import org.springframework.boot.jackson.JsonComponent
import org.springframework.core.convert.converter.Converter
import org.springframework.stereotype.Component

data class AminoAcidMutation(val gene: String, val position: Int, val symbol: String?) {
data class AminoAcidMutation(
val gene: String,
val position: Int,
val symbol: String?,
override val maybe: Boolean = false,
) :
MaybeMutation<AminoAcidMutation> {
companion object {
fun fromString(
aminoAcidMutation: String,
referenceGenome: ReferenceGenome,
) = wrapWithMaybeMutationParser(aminoAcidMutation) { parseMutation(it, referenceGenome) }

private fun parseMutation(
aminoAcidMutation: String,
referenceGenome: ReferenceGenome,
): AminoAcidMutation {
val match = AMINO_ACID_MUTATION_REGEX.find(aminoAcidMutation)
?: throw BadRequestException("Invalid amino acid mutation: $aminoAcidMutation")
Expand All @@ -36,6 +47,8 @@ data class AminoAcidMutation(val gene: String, val position: Int, val symbol: St
)
}
}

override fun asMaybe() = copy(maybe = true)
}

private val AMINO_ACID_MUTATION_REGEX =
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package org.genspectrum.lapis.request


interface MaybeMutation<Self : MaybeMutation<Self>> {
val maybe: Boolean

fun asMaybe(): Self
}

val MAYBE_REGEX = Regex("""^MAYBE\((?<mutationCandidate>.+)\)$""")

inline fun <reified T : MaybeMutation<T>> wrapWithMaybeMutationParser(
mutationCandidate: String,
mutationParser: (String) -> T,
) =
when (val match = MAYBE_REGEX.find(mutationCandidate)) {
null -> mutationParser(mutationCandidate)
else -> mutationParser(match.groups["mutationCandidate"]!!.value).asMaybe()
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,19 @@ import org.springframework.boot.jackson.JsonComponent
import org.springframework.core.convert.converter.Converter
import org.springframework.stereotype.Component

data class NucleotideMutation(val sequenceName: String?, val position: Int, val symbol: String?) {
data class NucleotideMutation(
val sequenceName: String?,
val position: Int,
val symbol: String?,
override val maybe: Boolean = false,
) : MaybeMutation<NucleotideMutation> {
companion object {
fun fromString(
nucleotideMutation: String,
referenceGenome: ReferenceGenome,
): NucleotideMutation {
) = wrapWithMaybeMutationParser(nucleotideMutation) { parseMutation(it, referenceGenome) }

private fun parseMutation(nucleotideMutation: String, referenceGenome: ReferenceGenome): NucleotideMutation {
val match = NUCLEOTIDE_MUTATION_REGEX.find(nucleotideMutation)
?: throw BadRequestException("Invalid nucleotide mutation: $nucleotideMutation")

Expand All @@ -35,6 +42,8 @@ data class NucleotideMutation(val sequenceName: String?, val position: Int, val
)
}
}

override fun asMaybe() = copy(maybe = true)
}

private val NUCLEOTIDE_MUTATION_REGEX =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import org.genspectrum.lapis.silo.HasAminoAcidMutation
import org.genspectrum.lapis.silo.HasNucleotideMutation
import org.genspectrum.lapis.silo.IntBetween
import org.genspectrum.lapis.silo.IntEquals
import org.genspectrum.lapis.silo.Maybe
import org.genspectrum.lapis.silo.NucleotideInsertionContains
import org.genspectrum.lapis.silo.NucleotideSymbolEquals
import org.genspectrum.lapis.silo.Or
Expand Down Expand Up @@ -277,16 +278,21 @@ class SiloFilterExpressionMapperTest {
fun `given nucleotide mutation with symbol then is mapped to NucleotideSymbolEquals`() {
val filterParameter = DummySequenceFilters(
emptyMap(),
listOf(NucleotideMutation(null, 123, "B"), NucleotideMutation("sequenceName", 999, "A")),
listOf(
NucleotideMutation(null, 123, "B", maybe = true),
NucleotideMutation("sequenceName", 999, "A", maybe = false),
),
emptyList(),
emptyList(),
emptyList(),
)

val result = underTest.map(filterParameter)

val expected =
And(NucleotideSymbolEquals(null, 123, "B"), NucleotideSymbolEquals("sequenceName", 999, "A"))
val expected = And(
Maybe(NucleotideSymbolEquals(null, 123, "B")),
NucleotideSymbolEquals("sequenceName", 999, "A"),
)
assertThat(result, equalTo(expected))
}

Expand All @@ -302,8 +308,10 @@ class SiloFilterExpressionMapperTest {

val result = underTest.map(filterParameter)

val expected =
And(HasNucleotideMutation(null, 123), HasNucleotideMutation("sequenceName", 999))
val expected = And(
HasNucleotideMutation(null, 123),
HasNucleotideMutation("sequenceName", 999),
)
assertThat(result, equalTo(expected))
}

Expand All @@ -312,15 +320,20 @@ class SiloFilterExpressionMapperTest {
val filterParameter = DummySequenceFilters(
emptyMap(),
emptyList(),
listOf(AminoAcidMutation("geneName1", 123, "B"), AminoAcidMutation("geneName2", 999, "A")),
listOf(
AminoAcidMutation("geneName1", 123, "B", maybe = true),
AminoAcidMutation("geneName2", 999, "A", maybe = false),
),
emptyList(),
emptyList(),
)

val result = underTest.map(filterParameter)

val expected =
And(AminoAcidSymbolEquals("geneName1", 123, "B"), AminoAcidSymbolEquals("geneName2", 999, "A"))
val expected = And(
Maybe(AminoAcidSymbolEquals("geneName1", 123, "B")),
AminoAcidSymbolEquals("geneName2", 999, "A"),
)
assertThat(result, equalTo(expected))
}

Expand All @@ -336,8 +349,10 @@ class SiloFilterExpressionMapperTest {

val result = underTest.map(filterParameter)

val expected =
And(HasAminoAcidMutation("geneName1", 123), HasAminoAcidMutation("geneName2", 999))
val expected = And(
HasAminoAcidMutation("geneName1", 123),
HasAminoAcidMutation("geneName2", 999),
)
assertThat(result, equalTo(expected))
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ class AminoAcidMutationTest {
"\"gENe1:123A\"",
AminoAcidMutation("gene1", 123, "A"),
),
Arguments.of(
"\"MAYBE(gene1:123A)\"",
AminoAcidMutation("gene1", 123, "A", maybe = true),
),
)

@JvmStatic
Expand All @@ -95,6 +99,10 @@ class AminoAcidMutationTest {
Arguments.of("\":123A\""),
Arguments.of("\"gene1\$name&with/invalid)chars:123A\""),
Arguments.of("\"geneNotInReferenceGenome:123A\""),
Arguments.of("\"MAYBE()\""),
Arguments.of("\"MAYBE(notAMutation)\""),
Arguments.of("\"MAYBE(gene1:G123A))\""),
Arguments.of("\"MAYBE((gene1:G123A)\""),
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,14 @@ class NucleotideMutationTest {
"\"othER_SegmENt:123X\"",
NucleotideMutation("other_segment", 123, "X"),
),
Arguments.of(
"\"MAYBE(other_segment:123X)\"",
NucleotideMutation("other_segment", 123, "X", maybe = true),
),
Arguments.of(
"\"MAYBE(123X)\"",
NucleotideMutation(null, 123, "X", maybe = true),
),
)

@JvmStatic
Expand All @@ -97,6 +105,10 @@ class NucleotideMutationTest {
Arguments.of("\":123A\""),
Arguments.of("\"sequence\$name&with/invalid)chars:G123A\""),
Arguments.of("\"segmentNotInReferenceGenome:G123A\""),
Arguments.of("\"MAYBE()\""),
Arguments.of("\"MAYBE(notAMutation)\""),
Arguments.of("\"MAYBE(123A))\""),
Arguments.of("\"MAYBE((123A)\""),
)
}
}
11 changes: 11 additions & 0 deletions siloLapisTests/test/aggregatedQueries/maybeAminoAcidMutation.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"testCaseName": "maybe amino acid mutation",
"lapisRequest": {
"aminoAcidMutations": ["MAYBE(S:22)"]
},
"expected": [
{
"count": 3
}
]
}
Loading

0 comments on commit 1860d4f

Please sign in to comment.