diff --git a/kglib/kgcn/README.md b/kglib/kgcn/README.md index 6c9da782..02251cea 100644 --- a/kglib/kgcn/README.md +++ b/kglib/kgcn/README.md @@ -1,6 +1,6 @@ # Knowledge Graph Convolutional Networks -This project introduces a novel model: the *Knowledge Graph Convolutional Network* (KGCN). This work is in its second major iteration since inception. +This project introduces a novel model: the *Knowledge Graph Convolutional Network* (KGCN). ### Getting Started - Running the Machine Learning Pipeline diff --git a/kglib/kgcn/examples/diagnosis/BUILD b/kglib/kgcn/examples/diagnosis/BUILD index 20c32227..4a075430 100644 --- a/kglib/kgcn/examples/diagnosis/BUILD +++ b/kglib/kgcn/examples/diagnosis/BUILD @@ -26,6 +26,7 @@ py_library( "//kglib/kgcn/plot", "//kglib/kgcn/models", "//kglib/utils/grakn/synthetic", + "//kglib/utils/grakn/type", "@graknlabs_client_python//:client_python", ], visibility=['//visibility:public'] diff --git a/kglib/kgcn/examples/diagnosis/diagnosis.py b/kglib/kgcn/examples/diagnosis/diagnosis.py index 6ff8ab0c..a5b08eae 100644 --- a/kglib/kgcn/examples/diagnosis/diagnosis.py +++ b/kglib/kgcn/examples/diagnosis/diagnosis.py @@ -25,16 +25,58 @@ from kglib.kgcn.pipeline.pipeline import pipeline from kglib.utils.grakn.synthetic.examples.diagnosis.generate import generate_example_graphs +from kglib.utils.grakn.type.type import get_thing_types, get_role_types from kglib.utils.graph.iterate import multidigraph_data_iterator from kglib.utils.graph.query.query_graph import QueryGraph from kglib.utils.graph.thing.queries_to_graph import build_graph_from_queries +KEYSPACE = "diagnosis" +URI = "localhost:48555" + +# Existing elements in the graph are those that pre-exist in the graph, and should be predicted to continue to exist +PREEXISTS = dict(solution=0) + +# Candidates are neither present in the input nor in the solution, they are negative samples +CANDIDATE = dict(solution=1) + +# Elements to infer are the graph elements whose existence we want to predict to be true, they are positive samples +TO_INFER = dict(solution=2) + +# Categorical Attribute types and the values of their categories +CATEGORICAL_ATTRIBUTES = {'name': ['Diabetes Type II', 'Multiple Sclerosis', 'Blurred vision', 'Fatigue', 'Cigarettes', + 'Alcohol']} +# Continuous Attribute types and their min and max values +CONTINUOUS_ATTRIBUTES = {'severity': (0, 1), 'age': (7, 80), 'units-per-week': (3, 29)} + +TYPES_TO_IGNORE = ['candidate-diagnosis', 'example-id', 'probability-exists', 'probability-non-exists', 'probability-preexists'] +ROLES_TO_IGNORE = ['candidate-patient', 'candidate-diagnosed-disease'] + +# The learner should see candidate relations the same as the ground truth relations, so adjust these candidates to +# look like their ground truth counterparts +TYPES_AND_ROLES_TO_OBFUSCATE = {'candidate-diagnosis': 'diagnosis', + 'candidate-patient': 'patient', + 'candidate-diagnosed-disease': 'diagnosed-disease'} + def diagnosis_example(num_graphs=200, num_processing_steps_tr=5, num_processing_steps_ge=5, num_training_iterations=1000, - keyspace="diagnosis", uri="localhost:48555"): + keyspace=KEYSPACE, uri=URI): + """ + Run the diagnosis example from start to finish, including traceably ingesting predictions back into Grakn + + Args: + num_graphs: Number of graphs to use for training and testing combined + num_processing_steps_tr: The number of message-passing steps for training + num_processing_steps_ge: The number of message-passing steps for testing + num_training_iterations: The number of training epochs + keyspace: The name of the keyspace to retrieve example subgraphs from + uri: The uri of the running Grakn instance + + Returns: + Final accuracies for training and for testing + """ tr_ge_split = int(num_graphs*0.5) @@ -48,7 +90,10 @@ def diagnosis_example(num_graphs=200, with session.transaction().read() as tx: # Change the terminology here onwards from thing -> node and role -> edge node_types = get_thing_types(tx) + [node_types.remove(el) for el in TYPES_TO_IGNORE] + edge_types = get_role_types(tx) + [edge_types.remove(el) for el in ROLES_TO_IGNORE] print(f'Found node types: {node_types}') print(f'Found edge types: {edge_types}') @@ -72,12 +117,17 @@ def diagnosis_example(num_graphs=200, return solveds_tr, solveds_ge -CATEGORICAL_ATTRIBUTES = {'name': ['Diabetes Type II', 'Multiple Sclerosis', 'Blurred vision', 'Fatigue', 'Cigarettes', - 'Alcohol']} -CONTINUOUS_ATTRIBUTES = {'severity': (0, 1), 'age': (7, 80), 'units-per-week': (3, 29)} +def create_concept_graphs(example_indices, grakn_session): + """ + Builds an in-memory graph for each example, with an example_id as an anchor for each example subgraph. + Args: + example_indices: The values used to anchor the subgraph queries within the entire knowledge graph + grakn_session: Grakn Session + Returns: + In-memory graphs of Grakn subgraphs + """ -def create_concept_graphs(example_indices, grakn_session): graphs = [] infer = True @@ -90,13 +140,10 @@ def create_concept_graphs(example_indices, grakn_session): # Remove label leakage - change type labels that indicate candidates into non-candidates for data in multidigraph_data_iterator(graph): - typ = data['type'] - if typ == 'candidate-diagnosis': - data.update(type='diagnosis') - elif typ == 'candidate-patient': - data.update(type='patient') - elif typ == 'candidate-diagnosed-disease': - data.update(type='diagnosed-disease') + for label_to_obfuscate, with_label in TYPES_AND_ROLES_TO_OBFUSCATE.items(): + if data['type'] == label_to_obfuscate: + data.update(type=with_label) + break graph.name = example_id graphs.append(graph) @@ -104,23 +151,17 @@ def create_concept_graphs(example_indices, grakn_session): return graphs -# Existing elements in the graph are those that pre-exist in the graph, and should be predicted to continue to exist -PREEXISTS = dict(solution=0) - -# Candidates are neither present in the input nor in the solution, they are negative samples -CANDIDATE = dict(solution=1) - -# Elements to infer are the graph elements whose existence we want to predict to be true, they are positive samples -TO_INFER = dict(solution=2) - - def get_query_handles(example_id): """ - 1. Supply a query - 2. Supply a `QueryGraph` object to represent that query. That itself is a subclass of a networkx graph - 3. Execute the query - 4. Make a graph of the query results by taking the variables you got back and arranging the concepts as they are in the `QueryGraph`. This gives one graph for each result, for each query. - 5. Combine all of these graphs into one single graph, and that’s your example subgraph + Creates an iterable, each element containing a Graql query, a function to sample the answers, and a QueryGraph + object which must be the Grakn graph representation of the query. This tuple is termed a "query_handle" + + Args: + example_id: A uniquely identifiable attribute value used to anchor the results of the queries to a specific + subgraph + + Returns: + query handles """ # === Hereditary Feature === @@ -165,7 +206,6 @@ def get_query_handles(example_id): $p isa person, has example-id {example_id}, has age $a; get;''') - vars = p, a = 'p', 'a' g = QueryGraph() g.add_vars(*vars, **PREEXISTS) @@ -248,48 +288,6 @@ def get_query_handles(example_id): ] -def get_thing_types(tx): - """ - Get all schema types, excluding those for implicit attribute relations, base types, and candidate types - Args: - tx: Grakn transaction - - Returns: - Grakn types - """ - schema_concepts = tx.query( - "match $x sub thing; " - "not {$x sub @has-attribute;}; " - "not {$x sub @key-attribute;}; " - "get;") - thing_types = [schema_concept.get('x').label() for schema_concept in schema_concepts] - [thing_types.remove(el) for el in - ['thing', 'relation', 'entity', 'attribute', 'candidate-diagnosis', 'example-id', 'probability-exists', - 'probability-non-exists', 'probability-preexists']] - return thing_types - - -def get_role_types(tx): - """ - Get all schema roles, excluding those for implicit attribute relations, the base role type, and candidate roles - Args: - tx: Grakn transaction - - Returns: - Grakn roles - """ - schema_concepts = tx.query( - "match $x sub role; " - "not{$x sub @key-attribute-value;}; " - "not{$x sub @key-attribute-owner;}; " - "not{$x sub @has-attribute-value;}; " - "not{$x sub @has-attribute-owner;};" - "get;") - role_types = ['has'] + [role.get('x').label() for role in schema_concepts] - [role_types.remove(el) for el in ['role', 'candidate-patient', 'candidate-diagnosed-disease']] - return role_types - - def write_predictions_to_grakn(graphs, tx): """ Take predictions from the ML model, and insert representations of those predictions back into the graph. diff --git a/kglib/kgcn/pipeline/utils.py b/kglib/kgcn/pipeline/utils.py index cc12d61b..50bd1f71 100644 --- a/kglib/kgcn/pipeline/utils.py +++ b/kglib/kgcn/pipeline/utils.py @@ -23,8 +23,11 @@ def duplicate_edges_in_reverse(graph): Takes in a directed multi graph, and creates duplicates of all edges, the duplicates having reversed direction to the originals. This is useful since directed edges constrain the direction of messages passed. We want to permit omni-directional message passing. - :param graph: The graph - :return: The graph with duplicated edges, reversed, with all original edge properties attached to the duplicates + Args: + graph: The graph + + Returns: + The graph with duplicated edges, reversed, with all original edge properties attached to the duplicates """ for sender, receiver, keys, data in graph.edges(data=True, keys=True): graph.add_edge(receiver, sender, keys, **data) diff --git a/kglib/utils/grakn/BUILD b/kglib/utils/grakn/BUILD index c4d521fe..d6127913 100644 --- a/kglib/utils/grakn/BUILD +++ b/kglib/utils/grakn/BUILD @@ -8,6 +8,7 @@ py_library( '//kglib/utils/grakn/test', '//kglib/utils/grakn/object', '//kglib/utils/grakn/synthetic', + '//kglib/utils/grakn/type', ], visibility=['//visibility:public'] ) \ No newline at end of file diff --git a/kglib/utils/grakn/type/BUILD b/kglib/utils/grakn/type/BUILD new file mode 100644 index 00000000..8290e905 --- /dev/null +++ b/kglib/utils/grakn/type/BUILD @@ -0,0 +1,11 @@ +load("@io_bazel_rules_python//python:python.bzl", "py_library") +load("@pypi_dependencies//:requirements.bzl", "requirement") + + +py_library( + name = "type", + srcs = [ + 'type.py', + ], + visibility=['//visibility:public'] +) \ No newline at end of file diff --git a/kglib/utils/grakn/type/type.py b/kglib/utils/grakn/type/type.py new file mode 100644 index 00000000..0a5a6767 --- /dev/null +++ b/kglib/utils/grakn/type/type.py @@ -0,0 +1,58 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + + +def get_thing_types(tx): + """ + Get all schema types, excluding those for implicit attribute relations and base types + Args: + tx: Grakn transaction + + Returns: + Grakn types + """ + schema_concepts = tx.query( + "match $x sub thing; " + "not {$x sub @has-attribute;}; " + "not {$x sub @key-attribute;}; " + "get;") + thing_types = [schema_concept.get('x').label() for schema_concept in schema_concepts] + [thing_types.remove(el) for el in ['thing', 'relation', 'entity', 'attribute']] + return thing_types + + +def get_role_types(tx): + """ + Get all schema roles, excluding those for implicit attribute relations, the base role type + Args: + tx: Grakn transaction + + Returns: + Grakn roles + """ + schema_concepts = tx.query( + "match $x sub role; " + "not{$x sub @key-attribute-value;}; " + "not{$x sub @key-attribute-owner;}; " + "not{$x sub @has-attribute-value;}; " + "not{$x sub @has-attribute-owner;};" + "get;") + role_types = ['has'] + [role.get('x').label() for role in schema_concepts] + role_types.remove('role') + return role_types diff --git a/kglib/utils/graph/thing/queries_to_graph.py b/kglib/utils/graph/thing/queries_to_graph.py index 3ecca17e..ff848329 100644 --- a/kglib/utils/graph/thing/queries_to_graph.py +++ b/kglib/utils/graph/thing/queries_to_graph.py @@ -29,8 +29,12 @@ def concept_dict_from_concept_map(concept_map): """ Given a concept map, build a dictionary of the variables present and the concepts they refer to, locally storing any information required about those concepts. - :param concept_map: A dict of Concepts provided by Grakn keyed by query variables - :return: A dictionary of concepts keyed by query variables + + Args: + concept_map: A dict of Concepts provided by Grakn keyed by query variables + + Returns: + A dictionary of concepts keyed by query variables """ return {variable: build_thing(grakn_concept) for variable, grakn_concept in concept_map.map().items()} @@ -38,9 +42,13 @@ def concept_dict_from_concept_map(concept_map): def combine_2_graphs(graph1, graph2): """ Combine two graphs into one. Do this by recognising common nodes between the two. - :param graph1: Graph to compare - :param graph2: Graph to compare - :return: Combined graph + + Args: + graph1: Graph to compare + graph2: Graph to compare + + Returns: + Combined graph """ for node, data in graph1.nodes(data=True): @@ -67,8 +75,12 @@ def combine_2_graphs(graph1, graph2): def combine_n_graphs(graphs_list): """ Combine N graphs into one. Do this by recognising common nodes between the two. - :param graphs_list: List of graphs to combine - :return: Combined graph + + Args: + graphs_list: List of graphs to combine + + Returns: + Combined graph """ return reduce(lambda x, y: combine_2_graphs(x, y), graphs_list) @@ -78,14 +90,19 @@ def build_graph_from_queries(query_sampler_variable_graph_tuples, grakn_transact """ Builds a graph of Things, interconnected by roles (and *has*), from a set of queries and graphs representing those queries (variable graphs)of those queries, over a Grakn transaction - :param infer: - :param query_sampler_variable_graph_tuples: A list of tuples, each tuple containing a query, a sampling function, - and a variable_graph - :param grakn_transaction: A Grakn transaction - :param concept_dict_converter: The function to use to convert from concept_dicts to a Grakn model. This could be - a typical model or a mathematical model - :return: A networkx graph + + Args: + infer: whether to use Grakn's inference engine + query_sampler_variable_graph_tuples: A list of tuples, each tuple containing a query, a sampling function, + and a variable_graph + grakn_transaction: A Grakn transaction + concept_dict_converter: The function to use to convert from concept_dicts to a Grakn model. This could be + a typical model or a mathematical model + + Returns: + A networkx graph """ + query_concept_graphs = [] for query, sampler, variable_graph in query_sampler_variable_graph_tuples: