Skip to content
This repository has been archived by the owner on Nov 18, 2023. It is now read-only.

Improve clarity of diagnosis example #117

Merged
merged 3 commits into from
Dec 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion kglib/kgcn/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Knowledge Graph Convolutional Networks

This project introduces a novel model: the *Knowledge Graph Convolutional Network* (KGCN). This work is in its second major iteration since inception.
This project introduces a novel model: the *Knowledge Graph Convolutional Network* (KGCN).

### Getting Started - Running the Machine Learning Pipeline

Expand Down
1 change: 1 addition & 0 deletions kglib/kgcn/examples/diagnosis/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ py_library(
"//kglib/kgcn/plot",
"//kglib/kgcn/models",
"//kglib/utils/grakn/synthetic",
"//kglib/utils/grakn/type",
"@graknlabs_client_python//:client_python",
],
visibility=['//visibility:public']
Expand Down
138 changes: 68 additions & 70 deletions kglib/kgcn/examples/diagnosis/diagnosis.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,58 @@

from kglib.kgcn.pipeline.pipeline import pipeline
from kglib.utils.grakn.synthetic.examples.diagnosis.generate import generate_example_graphs
from kglib.utils.grakn.type.type import get_thing_types, get_role_types
from kglib.utils.graph.iterate import multidigraph_data_iterator
from kglib.utils.graph.query.query_graph import QueryGraph
from kglib.utils.graph.thing.queries_to_graph import build_graph_from_queries

KEYSPACE = "diagnosis"
URI = "localhost:48555"

# Existing elements in the graph are those that pre-exist in the graph, and should be predicted to continue to exist
PREEXISTS = dict(solution=0)

# Candidates are neither present in the input nor in the solution, they are negative samples
CANDIDATE = dict(solution=1)

# Elements to infer are the graph elements whose existence we want to predict to be true, they are positive samples
TO_INFER = dict(solution=2)

# Categorical Attribute types and the values of their categories
CATEGORICAL_ATTRIBUTES = {'name': ['Diabetes Type II', 'Multiple Sclerosis', 'Blurred vision', 'Fatigue', 'Cigarettes',
'Alcohol']}
# Continuous Attribute types and their min and max values
CONTINUOUS_ATTRIBUTES = {'severity': (0, 1), 'age': (7, 80), 'units-per-week': (3, 29)}

TYPES_TO_IGNORE = ['candidate-diagnosis', 'example-id', 'probability-exists', 'probability-non-exists', 'probability-preexists']
ROLES_TO_IGNORE = ['candidate-patient', 'candidate-diagnosed-disease']

# The learner should see candidate relations the same as the ground truth relations, so adjust these candidates to
# look like their ground truth counterparts
TYPES_AND_ROLES_TO_OBFUSCATE = {'candidate-diagnosis': 'diagnosis',
'candidate-patient': 'patient',
'candidate-diagnosed-disease': 'diagnosed-disease'}


def diagnosis_example(num_graphs=200,
num_processing_steps_tr=5,
num_processing_steps_ge=5,
num_training_iterations=1000,
keyspace="diagnosis", uri="localhost:48555"):
keyspace=KEYSPACE, uri=URI):
"""
Run the diagnosis example from start to finish, including traceably ingesting predictions back into Grakn

Args:
num_graphs: Number of graphs to use for training and testing combined
num_processing_steps_tr: The number of message-passing steps for training
num_processing_steps_ge: The number of message-passing steps for testing
num_training_iterations: The number of training epochs
keyspace: The name of the keyspace to retrieve example subgraphs from
uri: The uri of the running Grakn instance

Returns:
Final accuracies for training and for testing
"""

tr_ge_split = int(num_graphs*0.5)

Expand All @@ -48,7 +90,10 @@ def diagnosis_example(num_graphs=200,
with session.transaction().read() as tx:
# Change the terminology here onwards from thing -> node and role -> edge
node_types = get_thing_types(tx)
[node_types.remove(el) for el in TYPES_TO_IGNORE]

edge_types = get_role_types(tx)
[edge_types.remove(el) for el in ROLES_TO_IGNORE]
print(f'Found node types: {node_types}')
print(f'Found edge types: {edge_types}')

Expand All @@ -72,12 +117,17 @@ def diagnosis_example(num_graphs=200,
return solveds_tr, solveds_ge


CATEGORICAL_ATTRIBUTES = {'name': ['Diabetes Type II', 'Multiple Sclerosis', 'Blurred vision', 'Fatigue', 'Cigarettes',
'Alcohol']}
CONTINUOUS_ATTRIBUTES = {'severity': (0, 1), 'age': (7, 80), 'units-per-week': (3, 29)}
def create_concept_graphs(example_indices, grakn_session):
"""
Builds an in-memory graph for each example, with an example_id as an anchor for each example subgraph.
Args:
example_indices: The values used to anchor the subgraph queries within the entire knowledge graph
grakn_session: Grakn Session

Returns:
In-memory graphs of Grakn subgraphs
"""

def create_concept_graphs(example_indices, grakn_session):
graphs = []
infer = True

Expand All @@ -90,37 +140,28 @@ def create_concept_graphs(example_indices, grakn_session):

# Remove label leakage - change type labels that indicate candidates into non-candidates
for data in multidigraph_data_iterator(graph):
typ = data['type']
if typ == 'candidate-diagnosis':
data.update(type='diagnosis')
elif typ == 'candidate-patient':
data.update(type='patient')
elif typ == 'candidate-diagnosed-disease':
data.update(type='diagnosed-disease')
for label_to_obfuscate, with_label in TYPES_AND_ROLES_TO_OBFUSCATE.items():
if data['type'] == label_to_obfuscate:
data.update(type=with_label)
break

graph.name = example_id
graphs.append(graph)

return graphs


# Existing elements in the graph are those that pre-exist in the graph, and should be predicted to continue to exist
PREEXISTS = dict(solution=0)

# Candidates are neither present in the input nor in the solution, they are negative samples
CANDIDATE = dict(solution=1)

# Elements to infer are the graph elements whose existence we want to predict to be true, they are positive samples
TO_INFER = dict(solution=2)


def get_query_handles(example_id):
"""
1. Supply a query
2. Supply a `QueryGraph` object to represent that query. That itself is a subclass of a networkx graph
3. Execute the query
4. Make a graph of the query results by taking the variables you got back and arranging the concepts as they are in the `QueryGraph`. This gives one graph for each result, for each query.
5. Combine all of these graphs into one single graph, and that’s your example subgraph
Creates an iterable, each element containing a Graql query, a function to sample the answers, and a QueryGraph
object which must be the Grakn graph representation of the query. This tuple is termed a "query_handle"

Args:
example_id: A uniquely identifiable attribute value used to anchor the results of the queries to a specific
subgraph

Returns:
query handles
"""

# === Hereditary Feature ===
Expand Down Expand Up @@ -165,7 +206,6 @@ def get_query_handles(example_id):
$p isa person, has example-id {example_id}, has age $a;
get;''')


vars = p, a = 'p', 'a'
g = QueryGraph()
g.add_vars(*vars, **PREEXISTS)
Expand Down Expand Up @@ -248,48 +288,6 @@ def get_query_handles(example_id):
]


def get_thing_types(tx):
"""
Get all schema types, excluding those for implicit attribute relations, base types, and candidate types
Args:
tx: Grakn transaction

Returns:
Grakn types
"""
schema_concepts = tx.query(
"match $x sub thing; "
"not {$x sub @has-attribute;}; "
"not {$x sub @key-attribute;}; "
"get;")
thing_types = [schema_concept.get('x').label() for schema_concept in schema_concepts]
[thing_types.remove(el) for el in
['thing', 'relation', 'entity', 'attribute', 'candidate-diagnosis', 'example-id', 'probability-exists',
'probability-non-exists', 'probability-preexists']]
return thing_types


def get_role_types(tx):
"""
Get all schema roles, excluding those for implicit attribute relations, the base role type, and candidate roles
Args:
tx: Grakn transaction

Returns:
Grakn roles
"""
schema_concepts = tx.query(
"match $x sub role; "
"not{$x sub @key-attribute-value;}; "
"not{$x sub @key-attribute-owner;}; "
"not{$x sub @has-attribute-value;}; "
"not{$x sub @has-attribute-owner;};"
"get;")
role_types = ['has'] + [role.get('x').label() for role in schema_concepts]
[role_types.remove(el) for el in ['role', 'candidate-patient', 'candidate-diagnosed-disease']]
return role_types


def write_predictions_to_grakn(graphs, tx):
"""
Take predictions from the ML model, and insert representations of those predictions back into the graph.
Expand Down
7 changes: 5 additions & 2 deletions kglib/kgcn/pipeline/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@ def duplicate_edges_in_reverse(graph):
Takes in a directed multi graph, and creates duplicates of all edges, the duplicates having reversed direction to
the originals. This is useful since directed edges constrain the direction of messages passed. We want to permit
omni-directional message passing.
:param graph: The graph
:return: The graph with duplicated edges, reversed, with all original edge properties attached to the duplicates
Args:
graph: The graph

Returns:
The graph with duplicated edges, reversed, with all original edge properties attached to the duplicates
"""
for sender, receiver, keys, data in graph.edges(data=True, keys=True):
graph.add_edge(receiver, sender, keys, **data)
Expand Down
1 change: 1 addition & 0 deletions kglib/utils/grakn/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ py_library(
'//kglib/utils/grakn/test',
'//kglib/utils/grakn/object',
'//kglib/utils/grakn/synthetic',
'//kglib/utils/grakn/type',
],
visibility=['//visibility:public']
)
11 changes: 11 additions & 0 deletions kglib/utils/grakn/type/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
load("@io_bazel_rules_python//python:python.bzl", "py_library")
load("@pypi_dependencies//:requirements.bzl", "requirement")


py_library(
name = "type",
srcs = [
'type.py',
],
visibility=['//visibility:public']
)
58 changes: 58 additions & 0 deletions kglib/utils/grakn/type/type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#


def get_thing_types(tx):
"""
Get all schema types, excluding those for implicit attribute relations and base types
Args:
tx: Grakn transaction

Returns:
Grakn types
"""
schema_concepts = tx.query(
"match $x sub thing; "
"not {$x sub @has-attribute;}; "
"not {$x sub @key-attribute;}; "
"get;")
thing_types = [schema_concept.get('x').label() for schema_concept in schema_concepts]
[thing_types.remove(el) for el in ['thing', 'relation', 'entity', 'attribute']]
return thing_types


def get_role_types(tx):
"""
Get all schema roles, excluding those for implicit attribute relations, the base role type
Args:
tx: Grakn transaction

Returns:
Grakn roles
"""
schema_concepts = tx.query(
"match $x sub role; "
"not{$x sub @key-attribute-value;}; "
"not{$x sub @key-attribute-owner;}; "
"not{$x sub @has-attribute-value;}; "
"not{$x sub @has-attribute-owner;};"
"get;")
role_types = ['has'] + [role.get('x').label() for role in schema_concepts]
role_types.remove('role')
return role_types
45 changes: 31 additions & 14 deletions kglib/utils/graph/thing/queries_to_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,26 @@ def concept_dict_from_concept_map(concept_map):
"""
Given a concept map, build a dictionary of the variables present and the concepts they refer to, locally storing any
information required about those concepts.
:param concept_map: A dict of Concepts provided by Grakn keyed by query variables
:return: A dictionary of concepts keyed by query variables

Args:
concept_map: A dict of Concepts provided by Grakn keyed by query variables

Returns:
A dictionary of concepts keyed by query variables
"""
return {variable: build_thing(grakn_concept) for variable, grakn_concept in concept_map.map().items()}


def combine_2_graphs(graph1, graph2):
"""
Combine two graphs into one. Do this by recognising common nodes between the two.
:param graph1: Graph to compare
:param graph2: Graph to compare
:return: Combined graph

Args:
graph1: Graph to compare
graph2: Graph to compare

Returns:
Combined graph
"""

for node, data in graph1.nodes(data=True):
Expand All @@ -67,8 +75,12 @@ def combine_2_graphs(graph1, graph2):
def combine_n_graphs(graphs_list):
"""
Combine N graphs into one. Do this by recognising common nodes between the two.
:param graphs_list: List of graphs to combine
:return: Combined graph

Args:
graphs_list: List of graphs to combine

Returns:
Combined graph
"""
return reduce(lambda x, y: combine_2_graphs(x, y), graphs_list)

Expand All @@ -78,14 +90,19 @@ def build_graph_from_queries(query_sampler_variable_graph_tuples, grakn_transact
"""
Builds a graph of Things, interconnected by roles (and *has*), from a set of queries and graphs representing those
queries (variable graphs)of those queries, over a Grakn transaction
:param infer:
:param query_sampler_variable_graph_tuples: A list of tuples, each tuple containing a query, a sampling function,
and a variable_graph
:param grakn_transaction: A Grakn transaction
:param concept_dict_converter: The function to use to convert from concept_dicts to a Grakn model. This could be
a typical model or a mathematical model
:return: A networkx graph

Args:
infer: whether to use Grakn's inference engine
query_sampler_variable_graph_tuples: A list of tuples, each tuple containing a query, a sampling function,
and a variable_graph
grakn_transaction: A Grakn transaction
concept_dict_converter: The function to use to convert from concept_dicts to a Grakn model. This could be
a typical model or a mathematical model

Returns:
A networkx graph
"""

query_concept_graphs = []

for query, sampler, variable_graph in query_sampler_variable_graph_tuples:
Expand Down