Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/fdaconstraint' #1599
Browse files Browse the repository at this point in the history
  • Loading branch information
amykglen committed Aug 15, 2021
2 parents f39e0af + b28b1b6 commit 8f7f7d6
Show file tree
Hide file tree
Showing 3 changed files with 203 additions and 29 deletions.
60 changes: 53 additions & 7 deletions code/ARAX/ARAXQuery/ARAX_expander.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/bin/env python3
import copy
import multiprocessing
import pickle
import sys
import os
import traceback
Expand All @@ -10,6 +11,8 @@
sys.path.append(os.path.dirname(os.path.abspath(__file__))) # ARAXQuery directory
from ARAX_response import ARAXResponse
from ARAX_decorator import ARAXDecorator
sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../../") # code directory
from RTXConfiguration import RTXConfiguration
sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../BiolinkHelper/")
from biolink_helper import BiolinkHelper
sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/Expand/")
Expand Down Expand Up @@ -40,6 +43,8 @@ def __init__(self):
"biolink:PhenotypicFeature"}}
self.kp_command_definitions = eu.get_kp_command_definitions()
self.biolink_helper = BiolinkHelper()
self.supported_qnode_constraints = {"biolink:highest_FDA_approval_status"}
self.supported_qedge_constraints = set()

def describe_me(self):
"""
Expand Down Expand Up @@ -110,17 +115,27 @@ def apply(self, response, input_parameters, mode="ARAX"):
# We'll use a copy of the QG because we modify it for internal use within Expand
query_graph = copy.deepcopy(message.query_graph)

# Verify we understand all constraints (right now we don't support any)
# Verify we understand all constraints and organize those we do for later handling
# TODO: Also verify we understand the value for each supported constraint?
constraints_to_apply = {"nodes": defaultdict(set), "edges": defaultdict(set)}
for qnode_key, qnode in query_graph.nodes.items():
if qnode.constraints:
constraint_ids = {constraint.id for constraint in qnode.constraints}
log.error(f"Unsupported constraint(s) detected on qnode {qnode_key} for {constraint_ids}. "
f"Don't know how to handle these!", error_code="UnsupportedConstraint")
qnode_constraints = {constraint.id for constraint in qnode.constraints}
unsupported_constraints = qnode_constraints.difference(self.supported_qnode_constraints)
if unsupported_constraints:
log.error(f"Unsupported constraint(s) detected on qnode {qnode_key}: {unsupported_constraints}. "
f"Don't know how to handle!", error_code="UnsupportedConstraint")
else:
for constraint in qnode.constraints:
constraints_to_apply["nodes"][qnode_key].add((constraint.id, constraint.value))
log.debug(f"Constraints to apply for qnode {qnode_key} are: {constraints_to_apply['nodes'][qnode_key]}")
for qedge_key, qedge in query_graph.edges.items():
if qedge.constraints:
constraint_ids = {constraint.id for constraint in qedge.constraints}
log.error(f"Unsupported constraint(s) detected on qedge {qedge_key} for {constraint_ids}. "
f"Don't know how to handle these!", error_code="UnsupportedConstraint")
qedge_constraints = {constraint.id for constraint in qedge.constraints}
unsupported_qedge_constraints = qedge_constraints.difference(self.supported_qedge_constraints)
if unsupported_qedge_constraints:
log.error(f"Unsupported constraint(s) detected on qedge {qedge_key}: {unsupported_qedge_constraints}. "
f"Don't know how to handle!", error_code="UnsupportedConstraint")

if response.status != 'OK':
return response
Expand Down Expand Up @@ -241,6 +256,23 @@ def apply(self, response, input_parameters, mode="ARAX"):
return response
log.debug(f"After merging KPs' answers, total KG counts are: {eu.get_printable_counts_by_qg_id(overarching_kg)}")

# Handle any constraints for this qedge and/or its qnodes (that require post-filtering)
qnode_keys = {qedge.subject, qedge.object}
qnode_keys_with_answers = qnode_keys.intersection(set(overarching_kg.nodes_by_qg_id))
for qnode_key in qnode_keys_with_answers:
for constraint_to_apply in constraints_to_apply["nodes"][qnode_key]:
constraint_id = constraint_to_apply[0]
constraint_value = constraint_to_apply[1]
log.debug(f"Applying qnode constraint {constraint_id} with value '{constraint_value}'")
# Handle FDA-approved drugs constraint TODO: check value later on? unclear what means..
if constraint_id == "biolink:highest_FDA_approval_status":
fda_approved_drug_ids = self._load_fda_approved_drug_ids()
answer_node_ids = set(overarching_kg.nodes_by_qg_id[qnode_key])
non_fda_approved_ids = answer_node_ids.difference(fda_approved_drug_ids)
log.info(f"Removing {len(non_fda_approved_ids)} nodes fulfilling {qnode_key} that are not "
f"FDA approved ({round((len(non_fda_approved_ids) / len(answer_node_ids)) * 100)}%)")
overarching_kg.remove_nodes(non_fda_approved_ids, qnode_key, query_graph)

# Do some pruning and apply kryptonite edges (only if we're not in KG2 mode)
if mode == "ARAX":
self._apply_any_kryptonite_edges(overarching_kg, message.query_graph,
Expand Down Expand Up @@ -857,6 +889,20 @@ def _set_and_validate_parameters(self, kp: Optional[str], input_parameters: Dict

return parameters

@staticmethod
def _load_fda_approved_drug_ids() -> Set[str]:
# Determine the local path to the FDA-approved drugs pickle
path_list = os.path.realpath(__file__).split(os.path.sep)
rtx_index = path_list.index("RTX")
rtxc = RTXConfiguration()
pickle_dir_path = os.path.sep.join([*path_list[:(rtx_index + 1)], 'code', 'ARAX', 'KnowledgeSources'])
pickle_name = rtxc.fda_approved_drugs_path.split('/')[-1]
pickle_file_path = f"{pickle_dir_path}{os.path.sep}{pickle_name}"
# Load the pickle's data
with open(pickle_file_path, "rb") as fda_pickle:
fda_approved_drug_ids = pickle.load(fda_pickle)
return fda_approved_drug_ids

@staticmethod
def _override_node_categories(kg: KnowledgeGraph, qg: QueryGraph):
# This method overrides KG nodes' types to match those requested in the QG, where possible (issue #987)
Expand Down
10 changes: 10 additions & 0 deletions code/ARAX/ARAXQuery/Expand/expand_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,16 @@ def add_edge(self, edge_key: str, edge: Edge, qedge_key: str):
self.edges_by_qg_id[qedge_key] = dict()
self.edges_by_qg_id[qedge_key][edge_key] = edge

def remove_nodes(self, node_keys_to_delete: Set[str], qnode_key: str, qg: QueryGraph):
for node_key in node_keys_to_delete:
del self.nodes_by_qg_id[qnode_key][node_key]
connected_qedges = {qedge_key for qedge_key, qedge in qg.edges.items() if qedge.subject == qnode_key or qedge.object == qnode_key}
for connected_qedge_key in connected_qedges.intersection(set(self.edges_by_qg_id)):
edges_to_delete = {edge_key for edge_key, edge in self.edges_by_qg_id[connected_qedge_key].items()
if {edge.subject, edge.object}.intersection(node_keys_to_delete)}
for edge_key in edges_to_delete:
del self.edges_by_qg_id[connected_qedge_key][edge_key]

def get_all_node_keys_used_by_edges(self) -> Set[str]:
return {node_key for edges in self.edges_by_qg_id.values() for edge in edges.values()
for node_key in [edge.subject, edge.object]}
Expand Down
162 changes: 140 additions & 22 deletions code/ARAX/test/test_ARAX_expand.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def _run_query_and_do_standard_testing(actions: Optional[List[str]] = None, json
# Run the query
araxq = ARAXQuery()
assert actions or json_query # Must provide some sort of query to run
response = araxq.query({"operations": {"actions": actions}}) if actions else araxq.query(json_query)
response = araxq.query({"operations": {"actions": actions}}) if actions else araxq.query({"message": {"query_graph": json_query}})
message = araxq.message
if response.status != 'OK':
print(response.show(level=ARAXResponse.DEBUG))
Expand Down Expand Up @@ -846,27 +846,23 @@ def test_1516_single_quotes_in_ids():

def test_constraint_validation():
query = {
"message": {
"query_graph": {
"edges": {
"e00": {
"object": "n01",
"predicates": ["biolink:physically_interacts_with"],
"subject": "n00",
"constraints": [{"id": "test_edge_constraint_1", "name": "test name edge", "operator": "<", "value": 1.0},
{"id": "test_edge_constraint_2", "name": "test name edge", "operator": ">", "value": 0.5}]
}
},
"nodes": {
"n00": {
"categories": ["biolink:ChemicalEntity"],
"ids": ["CHEMBL.COMPOUND:CHEMBL112"]
},
"n01": {
"categories": ["biolink:Protein"],
"constraints": [{"id": "test_node_constraint", "name": "test name node", "operator": "<", "value": 1.0}]
}
}
"edges": {
"e00": {
"object": "n01",
"predicates": ["biolink:physically_interacts_with"],
"subject": "n00",
"constraints": [{"id": "test_edge_constraint_1", "name": "test name edge", "operator": "<", "value": 1.0},
{"id": "test_edge_constraint_2", "name": "test name edge", "operator": ">", "value": 0.5}]
}
},
"nodes": {
"n00": {
"categories": ["biolink:ChemicalEntity"],
"ids": ["CHEMBL.COMPOUND:CHEMBL112"]
},
"n01": {
"categories": ["biolink:Protein"],
"constraints": [{"id": "test_node_constraint", "name": "test name node", "operator": "<", "value": 1.0}]
}
}
}
Expand Down Expand Up @@ -1007,5 +1003,127 @@ def test_auto_pruning_two_hop():
assert len(nodes_by_qg_id["n1"]) <= 200


def test_fda_approved_query_workflow_a9_egfr_advanced():
query = {
"nodes": {
"n0": {
"categories": [
"biolink:SmallMolecule"
],
"constraints": [
{
"id": "biolink:highest_FDA_approval_status",
"name": "highest FDA approval status",
"operator": "==",
"value": "regular approval"
}
]
},
"n1": {
"ids": [
"NCBIGene:1956"
]
}
},
"edges": {
"e0": {
"subject": "n0",
"object": "n1",
"predicates": [
"biolink:decreases_abundance_of",
"biolink:decreases_activity_of",
"biolink:decreases_expression_of",
"biolink:decreases_synthesis_of",
"biolink:increases_degradation_of",
"biolink:disrupts",
"biolink:entity_negatively_regulates_entity"
]
}
}
}
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(json_query=query)


def test_fda_approved_query_simple():
query = {
"nodes": {
"n0": {
"ids": [
"MONDO:0000888"
]
},
"n1": {
"categories": [
"biolink:ChemicalEntity"
],
"constraints": [
{
"id": "biolink:highest_FDA_approval_status",
"name": "highest FDA approval status",
"operator": "==",
"value": "regular approval"
}
]
}
},
"edges": {
"e0": {
"subject": "n1",
"object": "n0",
"predicates": [
"biolink:treats"
]
}
}
}
nodes_by_qg_id, edges_by_qg_id = _run_query_and_do_standard_testing(json_query=query)


@pytest.mark.slow
def test_fda_approved_query_workflow_a9_egfr_advanced():
query_unconstrained = {
"nodes": {
"n0": {
"categories": [
"biolink:SmallMolecule"
]
},
"n1": {
"ids": [
"NCBIGene:1956"
]
}
},
"edges": {
"e0": {
"subject": "n0",
"object": "n1",
"predicates": [
"biolink:decreases_abundance_of",
"biolink:decreases_activity_of",
"biolink:decreases_expression_of",
"biolink:decreases_synthesis_of",
"biolink:increases_degradation_of",
"biolink:disrupts",
"biolink:entity_negatively_regulates_entity"
]
}
}
}
nodes_by_qg_id_unconstrained, edges_by_qg_id_unconstrained = _run_query_and_do_standard_testing(json_query=query_unconstrained)

query_constrained = query_unconstrained
fda_approved_constraint = {
"id": "biolink:highest_FDA_approval_status",
"name": "highest FDA approval status",
"operator": "==",
"value": "regular approval"
}
query_constrained["nodes"]["n0"]["constraints"] = [fda_approved_constraint]
nodes_by_qg_id_constrained, edges_by_qg_id_constrained = _run_query_and_do_standard_testing(json_query=query_constrained)

assert len(nodes_by_qg_id_constrained["n0"]) < len(nodes_by_qg_id_unconstrained["n0"])


if __name__ == "__main__":
pytest.main(['-v', 'test_ARAX_expand.py'])

0 comments on commit 8f7f7d6

Please sign in to comment.