Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: return structured mutation and insertion responses #723

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/lapis2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ jobs:

- name: Start SILO and LAPIS and Run Tests
run: |
docker compose -f lapis2/docker-compose.yml pull
docker compose -f lapis2/docker-compose.yml up -d --wait
cd siloLapisTests && npm run test
env:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,18 @@ function getFieldsThatAreAlwaysPresent(selection: Selection): ResultField[] {
{ name: 'mutation', type: 'string', nullable: false },
{ name: 'proportion', type: 'string', nullable: false },
{ name: 'count', type: 'integer', nullable: false },
{ name: 'sequenceName', type: 'string', nullable: true },
{ name: 'mutationFrom', type: 'string', nullable: false },
{ name: 'mutationTo', type: 'string', nullable: false },
{ name: 'position', type: 'integer', nullable: false },
];
case 'insertions':
return [
{ name: 'insertion', type: 'string', nullable: false },
{ name: 'count', type: 'integer', nullable: false },
{ name: 'insertedSymbols', type: 'string', nullable: false },
{ name: 'sequenceName', type: 'string', nullable: false },
{ name: 'position', type: 'integer', nullable: false },
];
case 'details':
case 'nucleotideSequences':
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,23 @@ class SiloQueryModel(
),
)
return data.map {
val sequenceName =
if (referenceGenomeSchema.isSingleSegmented()) it.mutation else "${it.sequenceName}:${it.mutation}"
val mutation = if (referenceGenomeSchema.isSingleSegmented()) {
it.mutation
} else {
"${it.sequenceName}:${it.mutation}"
}
Comment on lines +53 to +57
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Taepper should we move this logic to SILO, too?


NucleotideMutationResponse(
sequenceName,
it.count,
it.proportion,
mutation = mutation,
count = it.count,
proportion = it.proportion,
sequenceName = when (referenceGenomeSchema.isSingleSegmented()) {
true -> null
fengelniederhammer marked this conversation as resolved.
Show resolved Hide resolved
false -> it.sequenceName
},
mutationFrom = it.mutationFrom,
mutationTo = it.mutationTo,
position = it.position,
)
}
}
Expand All @@ -77,9 +87,13 @@ class SiloQueryModel(
)
return data.map {
AminoAcidMutationResponse(
"${it.sequenceName}:${it.mutation}",
it.count,
it.proportion,
mutation = "${it.sequenceName}:${it.mutation}",
count = it.count,
proportion = it.proportion,
sequenceName = it.sequenceName,
mutationFrom = it.mutationFrom,
mutationTo = it.mutationTo,
position = it.position,
)
}
}
Expand Down Expand Up @@ -110,11 +124,15 @@ class SiloQueryModel(
)

return data.map {
val sequenceName = if (referenceGenomeSchema.isSingleSegmented()) "" else "${it.sequenceName}:"

NucleotideInsertionResponse(
"ins_${sequenceName}${it.position}:${it.insertions}",
it.count,
insertion = it.insertion,
count = it.count,
insertedSymbols = it.insertedSymbols,
position = it.position,
sequenceName = when (referenceGenomeSchema.isSingleSegmented()) {
true -> null
false -> it.sequenceName
},
)
}
}
Expand All @@ -133,8 +151,11 @@ class SiloQueryModel(

return data.map {
AminoAcidInsertionResponse(
"ins_${it.sequenceName}:${it.position}:${it.insertions}",
it.count,
insertion = it.insertion,
count = it.count,
insertedSymbols = it.insertedSymbols,
position = it.position,
sequenceName = it.sequenceName,
)
}
}
Expand Down
140 changes: 113 additions & 27 deletions lapis2/src/main/kotlin/org/genspectrum/lapis/openApi/OpenApiDocs.kt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import io.swagger.v3.oas.models.OpenAPI
import io.swagger.v3.oas.models.media.ArraySchema
import io.swagger.v3.oas.models.media.BooleanSchema
import io.swagger.v3.oas.models.media.IntegerSchema
import io.swagger.v3.oas.models.media.NumberSchema
import io.swagger.v3.oas.models.media.Schema
import io.swagger.v3.oas.models.media.StringSchema
import org.genspectrum.lapis.config.DatabaseConfig
Expand Down Expand Up @@ -163,7 +164,11 @@ fun buildOpenApiSchema(
.description(
"The response contains the metadata of every sequence matching the sequence filters.",
)
.properties(nucleotideMutationProportionSchema()),
.properties(nucleotideMutationProportionSchema())
.required(
nucleotideMutationProportionSchema().keys
.filterNot { referenceGenomeSchema.isSingleSegmented() && it == "sequenceName" },
),
),
)
.addSchemas(NUCLEOTIDE_MUTATIONS_SCHEMA, nucleotideMutations())
Expand All @@ -175,7 +180,8 @@ fun buildOpenApiSchema(
.description(
"The response contains the metadata of every sequence matching the sequence filters.",
)
.properties(aminoAcidMutationProportionSchema()),
.properties(aminoAcidMutationProportionSchema())
.required(aminoAcidMutationProportionSchema().keys.toList()),
),
)
.addSchemas(
Expand All @@ -184,7 +190,11 @@ fun buildOpenApiSchema(
Schema<String>()
.type("object")
.description("Nucleotide Insertion data.")
.properties(nucleotideInsertionSchema()),
.properties(nucleotideInsertionSchema())
.required(
nucleotideInsertionSchema().keys
.filterNot { referenceGenomeSchema.isSingleSegmented() && it == "sequenceName" },
),
),
)
.addSchemas(
Expand All @@ -193,7 +203,8 @@ fun buildOpenApiSchema(
Schema<String>()
.type("object")
.description("Amino Acid Insertion data.")
.properties(aminoAcidInsertionSchema()),
.properties(aminoAcidInsertionSchema())
.required(aminoAcidInsertionSchema().keys.toList()),
),
)
.addSchemas(FIELDS_TO_AGGREGATE_BY_SCHEMA, fieldsArray(databaseConfig.schema.metadata))
Expand Down Expand Up @@ -372,42 +383,112 @@ private fun getAggregatedResponseProperties(filterProperties: Map<SequenceFilter
COUNT_PROPERTY to IntegerSchema().description("The number of sequences matching the filters."),
)

fun accessKeySchema() = StringSchema().description(ACCESS_KEY_DESCRIPTION)
fun accessKeySchema(): Schema<Any> = StringSchema().description(ACCESS_KEY_DESCRIPTION)

private fun nucleotideMutationProportionSchema() =
mapOf(
"mutation" to Schema<String>().type("string").example("T123C").description("The mutation that was found."),
"proportion" to Schema<String>().type("number").description("The proportion of sequences having the mutation."),
"count" to Schema<String>().type("integer")
.description("The number of sequences matching having the mutation."),
"mutation" to StringSchema()
.example("sequence1:G29741T")
.description(
"If the genome only contains one segment then this is: " +
"(mutationFrom)(position)(mutationTo)." +
"If it has more than one segment (e.g., influenza), then the sequence is contained here: " +
"(sequenceName):(mutationFrom)(position)" +
"(mutationTo)",
),
"proportion" to NumberSchema()
.example(0.54321)
.description(
"Number of sequences with this mutation divided by the total number sequences matching the " +
"given filter criteria with non-ambiguous reads at that position",
),
"count" to IntegerSchema()
.example(1234)
.description("Total number of sequences with this mutation matching the given sequence filter criteria"),
"sequenceName" to StringSchema()
.example("sequence1")
.description(
"The name of the segment in which the mutation occurs. Null if the genome is single-segmented.",
),
"mutationFrom" to StringSchema()
.example("G")
.description("The nucleotide symbol in the reference genome at the position of the mutation"),
"mutationTo" to StringSchema()
.example("T")
.description("The nucleotide symbol that the mutation changes to or '-' in case of a deletion"),
"position" to IntegerSchema()
.example(29741)
.description("The position in the reference genome where the mutation occurs"),
)

private fun aminoAcidMutationProportionSchema() =
mapOf(
"mutation" to Schema<String>().type("string").example("ORF1a:123").description(
"A amino acid mutation that was found in the format \"\\<gene\\>:\\<position\\>",
),
"proportion" to Schema<String>().type("number").description("The proportion of sequences having the mutation."),
"count" to Schema<String>().type("integer")
.description("The number of sequences matching having the mutation."),
"mutation" to StringSchema()
.example("ORF1a:G29741T")
.description("Of the format (sequenceName):(mutationFrom)(position)(mutationTo)"),
"proportion" to NumberSchema()
.example(0.54321)
.description(
"Number of sequences with this mutation divided by the total number sequences matching the " +
"given filter criteria with non-ambiguous reads at that position",
),
"count" to IntegerSchema()
.example(42)
.description("Total number of sequences with this mutation matching the given sequence filter criteria"),
"sequenceName" to StringSchema()
.example("ORF1a")
.description("The name of the gene in which the mutation occurs."),
"mutationFrom" to StringSchema()
.example("G")
.description("The amino acid symbol in the reference genome at the position of the mutation"),
"mutationTo" to StringSchema()
.example("T")
.description("The amino acid symbol that the mutation changes to or '-' in case of a deletion"),
"position" to IntegerSchema()
.example(29741)
.description("The position in the reference genome where the mutation occurs"),
)

private fun nucleotideInsertionSchema() =
mapOf(
"insertion" to Schema<String>().type("string")
.example("ins_segment:123:AAT")
.description("The insertion that was found."),
"count" to Schema<String>().type("integer")
.description("The number of sequences matching having the insertion."),
"insertion" to StringSchema()
.example("ins_segment1:22204:CAGAAG")
.description(
"A nucleotide insertion in the format \"ins_(segment):(position):(insertedSymbols)\". " +
"If the pathogen has only one segment LAPIS will omit the segment name (\"ins_22204:CAGAAG\").",
),
"count" to IntegerSchema()
.example(42)
.description("Total number of sequences with this insertion matching the given sequence filter criteria"),
"insertedSymbols" to StringSchema()
.example("CAGAAG")
.description("The nucleotide symbols that were inserted at the given position"),
"position" to IntegerSchema()
.example(22204)
.description("The position in the reference genome where the insertion occurs"),
"sequenceName" to StringSchema()
.example("segment1")
.description(
"The name of the segment in which the insertion occurs. Null if the genome is single-segmented.",
),
)

private fun aminoAcidInsertionSchema() =
mapOf(
"insertion" to Schema<String>().type("string")
.example("ins_gene:123:AAT")
.description("The insertion that was found."),
"count" to Schema<String>().type("integer")
.description("The number of sequences matching having the insertion."),
"insertion" to StringSchema()
.example("ins_ORF1a:22204:CAGAAG")
.description("An amino acid insertion in the format \"ins_(gene):(position):(insertedSymbols)\"."),
"count" to IntegerSchema()
.description("Total number of sequences with this insertion matching the given sequence filter criteria."),
"insertedSymbols" to StringSchema()
.example("CAGAAG")
.description("The amino acid symbols that were inserted at the given position."),
"position" to IntegerSchema()
.example(22204)
.description("The position in the reference genome where the insertion occurs."),
"sequenceName" to StringSchema()
.example("ORF1a")
.description("The name of the gene in which the insertion occurs."),
)

private fun nucleotideMutations() =
Expand Down Expand Up @@ -512,9 +593,14 @@ private fun fieldsArray(
private fun aggregatedOrderByFieldsEnum(databaseConfig: DatabaseConfig) =
orderByFieldsEnum(databaseConfig.schema.metadata, listOf("count"))

private fun mutationsOrderByFieldsEnum() = orderByFieldsEnum(emptyList(), listOf("mutation", "count", "proportion"))
private fun mutationsOrderByFieldsEnum() =
orderByFieldsEnum(
emptyList(),
listOf("mutation", "count", "proportion", "sequenceName", "mutationFrom", "mutationTo", "position"),
)

private fun insertionsOrderByFieldsEnum() = orderByFieldsEnum(emptyList(), listOf("insertion", "count"))
private fun insertionsOrderByFieldsEnum() =
orderByFieldsEnum(emptyList(), listOf("insertion", "count", "position", "sequenceName", "insertedSymbols"))

private fun aminoAcidSequenceOrderByFieldsEnum(
referenceGenomeSchema: ReferenceGenomeSchema,
Expand Down
Loading
Loading