Skip to content

Commit

Permalink
Add validation rules to MutationsByLineage
Browse files Browse the repository at this point in the history
  • Loading branch information
remoteeng00 committed Feb 16, 2023
1 parent 50333d6 commit cebaa30
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 36 deletions.
4 changes: 4 additions & 0 deletions config_web/genomics.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
r"/{pre}/{ver}/prevalence-by-location-all-lineages",
"web.handlers.v2.genomics.PrevalenceAllLineagesByLocationHandler",
),
(
r"/{pre}/{ver}/mutations-by-lineage",
"web.handlers.v2.genomics.MutationsByLineage",
),
]

APP_LIST = [
Expand Down
3 changes: 2 additions & 1 deletion web/handlers/v2/genomics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
from .lineage import LineageHandler
from .lineage_mutations import LineageMutationsHandler
from .location import LocationHandler
from .prevalence_by_location_and_time import PrevalenceByLocationAndTimeHandler
from .mutations_by_lineage import MutationsByLineage
from .prevalence_all_lineages_by_location import PrevalenceAllLineagesByLocationHandler
from .prevalence_by_location_and_time import PrevalenceByLocationAndTimeHandler
90 changes: 55 additions & 35 deletions web/handlers/v2/genomics/mutations_by_lineage.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,61 @@
import pandas as pd

from web.handlers.genomics.base import BaseHandler
from web.handlers.genomics.util import (
calculate_proportion,
create_nested_mutation_query,
parse_location_id_to_query,
)


class MutationsByLineage(BaseHandler):
@gen.coroutine
def _get(self):
query_location = self.get_argument("location_id", None)
query_mutations = self.get_argument("mutations", None)
query_pangolin_lineage = self.get_argument("pangolin_lineage", None)
query_mutations = [muts.split(",") for muts in query_mutations.split(" AND ")] if query_mutations is not None else []
query_frequency_threshold = self.get_argument("frequency", None)
query_frequency_threshold = float(query_frequency_threshold) if query_frequency_threshold is not None else 0
name = "mutations-by-lineage"
kwargs = dict(BaseHandler.kwargs)
kwargs["GET"] = {
"location_id": {"type": str, "default": None},
"mutations": {"type": str, "default": None},
"pangolin_lineage": {"type": str, "default": None},
"frequency": {"type": float, "default": 0, "min": 0, "max": 1},
}

async def _get(self):
query_location = self.args.location_id
query_mutations = self.args.mutations
query_pangolin_lineage = self.args.pangolin_lineage
query_mutations = (
[muts.split(",") for muts in query_mutations.split(" AND ")]
if query_mutations is not None
else []
)
query_frequency_threshold = self.args.frequency
results = {}
for muts in query_mutations: # For multiple sets of mutations, create multiple ES queries. Since AND queries are possible doing one ES query with aggregations is cumbersome. Must look for better solution here.
for (
muts
) in (
query_mutations
): # For multiple sets of mutations, create multiple ES queries. Since AND queries are possible doing one ES query with aggregations is cumbersome. Must look for better solution here.
query = {
"size": 0,
"aggs": {
"lineage": {
"lineage": {
"terms": {"field": "pangolin_lineage", "size": self.size},
"aggs": {
"mutations": {
"filter": {}
}
}
"aggs": {"mutations": {"filter": {}}},
}
}
},
}
if query_location is not None:
query["query"] = parse_location_id_to_query(query_location)
if query_pangolin_lineage is not None:
if "query" in query: # Only query added will be bool for location
query["query"]["bool"]["must"].append({
"term": {
"pangolin_lineage": query_pangolin_lineage
}
})
if "query" in query: # Only query added will be bool for location
query["query"]["bool"]["must"].append(
{"term": {"pangolin_lineage": query_pangolin_lineage}}
)
else:
query["query"] = {
"term": {
"pangolin_lineage": query_pangolin_lineage
}
}
query["aggs"]["lineage"]["aggs"]["mutations"]["filter"] = create_nested_mutation_query(mutations = muts)
resp = yield self.asynchronous_fetch(query)
query["query"] = {"term": {"pangolin_lineage": query_pangolin_lineage}}
query["aggs"]["lineage"]["aggs"]["mutations"]["filter"] = create_nested_mutation_query(
mutations=muts
)
resp = await self.asynchronous_fetch(query)
path_to_results = ["aggregations", "lineage", "buckets"]
buckets = resp
for i in path_to_results:
Expand All @@ -48,14 +64,18 @@ def _get(self):
for i in buckets:
if not i["mutations"]["doc_count"] > 0 or i["key"] == "none":
continue
flattened_response.append({
"pangolin_lineage": i["key"],
"lineage_count": i["doc_count"],
"mutation_count": i["mutations"]["doc_count"]
})
flattened_response.append(
{
"pangolin_lineage": i["key"],
"lineage_count": i["doc_count"],
"mutation_count": i["mutations"]["doc_count"],
}
)
df_response = pd.DataFrame(flattened_response)
if df_response.shape[0] > 0:
prop = calculate_proportion(df_response["mutation_count"], df_response["lineage_count"])
prop = calculate_proportion(
df_response["mutation_count"], df_response["lineage_count"]
)
df_response.loc[:, "proportion"] = prop[0]
df_response.loc[:, "proportion_ci_lower"] = prop[1]
df_response.loc[:, "proportion_ci_upper"] = prop[2]
Expand Down

0 comments on commit cebaa30

Please sign in to comment.