From 593bf23d6a9fe44f3135e59dd8f37663ef34bf88 Mon Sep 17 00:00:00 2001 From: James Fletcher Date: Mon, 28 Jan 2019 12:42:39 +0000 Subject: [PATCH 01/18] Updates VERSION to match reality, corrects real Pypi destination url --- VERSION | 2 +- deployment.properties | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/VERSION b/VERSION index ceab6e11..b7161198 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.1 \ No newline at end of file +0.1a1 \ No newline at end of file diff --git a/deployment.properties b/deployment.properties index f708ee79..43d2e19c 100644 --- a/deployment.properties +++ b/deployment.properties @@ -19,7 +19,7 @@ github.repository=grakn maven.repository-url.snapshot=http://maven.grakn.ai/nexus/content/repositories/snapshots/ maven.repository-url.release=http://maven.grakn.ai/nexus/content/repositories/releases/ -pip.repository-url.pypi=https://pypi.org/legacy/ +pip.repository-url.pypi=https://upload.pypi.org/legacy/ pip.repository-url.test=https://test.pypi.org/legacy/ npm.repository-url=https://registry.npmjs.org/ maven.packages=common,server,console,protocol,client-java \ No newline at end of file From 151291807f16e4661b61b40a6f609f9e2a7cd4d1 Mon Sep 17 00:00:00 2001 From: James Fletcher Date: Mon, 28 Jan 2019 13:31:37 +0000 Subject: [PATCH 02/18] Corrects typos and links in READMEs, and adds link to London Meetup KGCN talk --- README.md | 2 +- kglib/kgcn/README.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 3305a6c6..34ad55ad 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # Research This repository is the centre of all research projects conducted at Grakn Labs. In particular, it's focus is on the integration of machine learning with the Grakn knowledge graph. -At present this repo contains one project: [*Knowledge Graph Convolutional Networks* (KGCNs)](/kglib/kgcn). +At present this repo contains one project: [*Knowledge Graph Convolutional Networks* (KGCNs)](https://github.com/graknlabs/kglib/tree/master/kglib/kgcn). diff --git a/kglib/kgcn/README.md b/kglib/kgcn/README.md index fa542c22..e58d75db 100644 --- a/kglib/kgcn/README.md +++ b/kglib/kgcn/README.md @@ -6,7 +6,7 @@ This project introduces a novel model: the *Knowledge Graph Convolutional Networ ## Methodology -The ideology behind this project is described [here](https://blog.grakn.ai/knowledge-graph-convolutional-networks-machine-learning-over-reasoned-knowledge-9eb5ce5e0f68). The principles of the implementation are based on [GraphSAGE](http://snap.stanford.edu/graphsage/), from the Stanford SNAP group, made to work over a **knowledge graph**. Instead of working on a typical property graph, a KGCN learns from the context of a *typed hypergraph*, **Grakn**. Additionally, it learns from facts deduced by Grakn's *automated logical reasoner*. From this point onwards some understanding of [Grakn's docs](http://dev.grakn.ai) is assumed. +The ideology behind this project is described [here](https://blog.grakn.ai/knowledge-graph-convolutional-networks-machine-learning-over-reasoned-knowledge-9eb5ce5e0f68), and a [video of the presentation](https://youtu.be/Jx_Twc75ka0?t=368). The principles of the implementation are based on [GraphSAGE](http://snap.stanford.edu/graphsage/), from the Stanford SNAP group, made to work over a **knowledge graph**. Instead of working on a typical property graph, a KGCN learns from the context of a *typed hypergraph*, **Grakn**. Additionally, it learns from facts deduced by Grakn's *automated logical reasoner*. From this point onwards some understanding of [Grakn's docs](http://dev.grakn.ai) is assumed. #### How do KGCNs work? @@ -34,7 +34,7 @@ To create embeddings, we build a network in TensorFlow that successively aggrega - Prepare the data: - - If you already have an insatnce of Grakn running, make sure to stop it using `./grakn server stop` + - If you already have an instance of Grakn running, make sure to stop it using `./grakn server stop` - Download the pre-loaded Grakn distribution from the [latest release](https://github.com/graknlabs/kglib/releases/latest) @@ -82,4 +82,4 @@ The [main](examples/animal_trade/main.py) function will: - Build the TensorFlow computation graph using `model.KGCN`, including a multi-class classification step and learning procedure defined by `downstream.SupervisedKGCNClassifier` -- Feed the `raw_arrays` to the TensorFlow graph, and performs learning \ No newline at end of file +- Feed the `raw_arrays` to the TensorFlow graph, and perform learning \ No newline at end of file From d1957f2dba6469c69c4b0262595b99efc412b303 Mon Sep 17 00:00:00 2001 From: James Fletcher Date: Wed, 30 Jan 2019 12:15:56 +0100 Subject: [PATCH 03/18] Corrects prediction_schema example for data re-ingestion --- .../examples/animal_trade/prediction_schema.gql | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/kglib/kgcn/examples/animal_trade/prediction_schema.gql b/kglib/kgcn/examples/animal_trade/prediction_schema.gql index 787f283b..5548397a 100644 --- a/kglib/kgcn/examples/animal_trade/prediction_schema.gql +++ b/kglib/kgcn/examples/animal_trade/prediction_schema.gql @@ -18,9 +18,9 @@ # define -endangerment-level sub attribute datatype long; -kgcn-model-version sub attribute datatype double; -prediction-score sub attribute datatype double; +endangerment-level sub attribute, datatype long; +kgcn-model-version sub attribute, datatype double; +prediction-score sub attribute, datatype double; traded-item has endangerment-level; @@ -36,11 +36,13 @@ kgcn-model sub entity, define @has-endangerment-level plays predicted-value; + + insert -$kgcn isa kgcn-model has kgcn-model-version 0.1; +$kgcn isa kgcn-model, has kgcn-model-version 0.1; -$t1 id V630904, has endangerment-level $el1 via $r1; $el1 1; (predicted-value: $r1, predicting-kgcn-model: $kgcn) isa value-prediction, has prediction-score 0.87; -$t2 id V704688, has endangerment-level $el2 via $r2; $el2 1; (predicted-value: $r2, predicting-kgcn-model: $kgcn) isa value-prediction, has prediction-score 0.71; +$t1 id V1282192, has endangerment-level $el1 via $r1; $el1 1; (predicted-value: $r1, predicting-kgcn-model: $kgcn) isa value-prediction, has prediction-score 0.87; +$t2 id V1364112, has endangerment-level $el2 via $r2; $el2 1; (predicted-value: $r2, predicting-kgcn-model: $kgcn) isa value-prediction, has prediction-score 0.71; match $t1 isa traded-item, has endangerment-level $el1 via $r1; $el1 1; $vp1(predicted-value: $r1, predicting-kgcn-model: $kgcn) isa value-prediction, has prediction-score $s1; get; @@ -60,6 +62,6 @@ when { $ti isa traded-item, has endangerment-level $el; $el 1; $ti has item-type $type; $type "meat"; $e(exchanged-item: $ti) isa exchange; -} then { +}, then { (suspicious-activity: $e, cause-of-suspicion: $type, cause-of-suspicion: $el) isa suspicious-activity-detection; }; From f8dabbf49cc3f4d36ead68b8188452b1a0e958e3 Mon Sep 17 00:00:00 2001 From: James Fletcher Date: Wed, 30 Jan 2019 15:07:45 +0100 Subject: [PATCH 04/18] Moves examples out of main source, updates READMEs to reflect this --- examples/kgcn/animal_trade/README.md | 64 +++++++++++ .../kgcn}/animal_trade/main.py | 0 .../kgcn}/animal_trade/prediction_schema.gql | 0 .../kgcn}/animal_trade/schema.gql | 0 kglib/kgcn/README.md | 94 +++++++-------- kglib/kgcn/examples/toy/main.py | 107 ------------------ kglib/kgcn/examples/toy/schema.gql | 39 ------- kglib/kgcn/refactor.md | 104 ----------------- 8 files changed, 103 insertions(+), 305 deletions(-) create mode 100644 examples/kgcn/animal_trade/README.md rename {kglib/kgcn/examples => examples/kgcn}/animal_trade/main.py (100%) rename {kglib/kgcn/examples => examples/kgcn}/animal_trade/prediction_schema.gql (100%) rename {kglib/kgcn/examples => examples/kgcn}/animal_trade/schema.gql (100%) delete mode 100644 kglib/kgcn/examples/toy/main.py delete mode 100644 kglib/kgcn/examples/toy/schema.gql delete mode 100644 kglib/kgcn/refactor.md diff --git a/examples/kgcn/animal_trade/README.md b/examples/kgcn/animal_trade/README.md new file mode 100644 index 00000000..906c0612 --- /dev/null +++ b/examples/kgcn/animal_trade/README.md @@ -0,0 +1,64 @@ +# KGCN Example - CITES Animal Trade Data + +### Quickstart + +**Requirements:** + +- Python 3.6.3 < version < 3.7 ([tensorflow doesn't yet support Python 3.7](https://github.com/tensorflow/tensorflow/issues/17022)) +- kglib installed from pip: `pip install --extra-index-url https://test.pypi.org/simple/ grakn-kglib` +- The source code in order to access the example `git clone https://github.com/graknlabs/kglib.git` +- The `grakn-animaltrade.zip` dataset from the [latest release](https://github.com/graknlabs/kglib/releases/latest). This is a dataset that has been pre-loaded into Grakn v1.5 (so you don't have to run the data import yourself), with two keyspaces: `animaltrade_train` and `animaltrade_test` + +**To use:** + +- Prepare the data: + + - If you already have an instance of Grakn running, make sure to stop it using `./grakn server stop` + + - Download the pre-loaded Grakn distribution from the [latest release](https://github.com/graknlabs/kglib/releases/latest) + + - Unzip the distribution `unzip grakn-animaltrade.zip `, where you store this doesn't matter + + - cd into the distribution `cd grakn-animaltrade` + + - start Grakn `./grakn server start` + + - Confirm that the training keyspace is present and contains data + + `./grakn console -k animaltrade_train` + + `match $t isa traded-item; limit 1; get;` + + and then `exit` + +- Run the `main` function of the example: + + Navigate to the root of the `kglib` repo: `cd kglib` + + Run the example: `python3 -m examples.kgcn.animal_trade.main` + + This will run the full pipeline: retrieving data, building and training a KGCN classifier + +#### Details + +The CITES dataset details exchanges of animal-based products between countries. In this example we aim to predict the value of `appendix` for a set of samples. This `appendix` can be thought of as the level of endangerment that a `traded-item` is subject to, where `1` represents the highest level of endangerment, and `3` the lowest. + +The [main](../../examples/kgcn/animal_trade/main.py) function will: + +- Search Grakn for 30 concepts (with attributes as labels) to use as the training set, 30 for the evaluation set, and 30 for the prediction set using queries such as (limiting the returned stream): + + ``` + match $e(exchanged-item: $traded-item) isa exchange, has appendix $appendix; $appendix 1; get; + ``` + + This searches for an `exchange` between countries that has an `appendix` (endangerment level) of `1`, and finds the `traded-item` that was exchanged + +- Save those labelled samples to file + +- Delete all `appendix` attributes from both `animaltrade_train` and `animaltrade_test` keyspaces. This is the label we will predict in this example, so it should not be present in Grakn otherwise the network can cheat + +- Search Grakn for the k-hop neighbours of the selected examples, and store information about them as arrays, demoted in the code as `raw_arrays`. This data is saved to file so that subsequent steps can be re-run without recomputing these data + +- Build the TensorFlow computation graph using `model.KGCN`, including a multi-class classification step and learning procedure defined by `downstream.SupervisedKGCNClassifier` + +- Feed the `raw_arrays` to the TensorFlow graph, and perform learning \ No newline at end of file diff --git a/kglib/kgcn/examples/animal_trade/main.py b/examples/kgcn/animal_trade/main.py similarity index 100% rename from kglib/kgcn/examples/animal_trade/main.py rename to examples/kgcn/animal_trade/main.py diff --git a/kglib/kgcn/examples/animal_trade/prediction_schema.gql b/examples/kgcn/animal_trade/prediction_schema.gql similarity index 100% rename from kglib/kgcn/examples/animal_trade/prediction_schema.gql rename to examples/kgcn/animal_trade/prediction_schema.gql diff --git a/kglib/kgcn/examples/animal_trade/schema.gql b/examples/kgcn/animal_trade/schema.gql similarity index 100% rename from kglib/kgcn/examples/animal_trade/schema.gql rename to examples/kgcn/animal_trade/schema.gql diff --git a/kglib/kgcn/README.md b/kglib/kgcn/README.md index e58d75db..da28333c 100644 --- a/kglib/kgcn/README.md +++ b/kglib/kgcn/README.md @@ -2,84 +2,68 @@ This project introduces a novel model: the *Knowledge Graph Convolutional Network* (KGCN). The principal idea of this work is to forge a bridge between knowledge graphs and machine learning, using [Grakn](https://github.com/graknlabs/grakn) as the knowledge graph. A KGCN can be used to create vector representations, *embeddings*, of any labelled set of Grakn Concepts via supervised learning. As a result, a KGCN can be trained directly for the classification or regression of Concepts stored in Grakn. Future work will include building embeddings via unsupervised learning.![KGCN Process](readme_images/KGCN_process.png) - - -## Methodology - -The ideology behind this project is described [here](https://blog.grakn.ai/knowledge-graph-convolutional-networks-machine-learning-over-reasoned-knowledge-9eb5ce5e0f68), and a [video of the presentation](https://youtu.be/Jx_Twc75ka0?t=368). The principles of the implementation are based on [GraphSAGE](http://snap.stanford.edu/graphsage/), from the Stanford SNAP group, made to work over a **knowledge graph**. Instead of working on a typical property graph, a KGCN learns from the context of a *typed hypergraph*, **Grakn**. Additionally, it learns from facts deduced by Grakn's *automated logical reasoner*. From this point onwards some understanding of [Grakn's docs](http://dev.grakn.ai) is assumed. - -#### How do KGCNs work? - -The purpose of this method is to derive embeddings for a set of Concepts (and thereby directly learn to classify them). We start by querying Grakn to find a set of labelled examples. Following that, we gather data about the neighbourhood of each example Concept. We do this by considering their *k-hop* neighbours. - -![k-hop neighbours](readme_images/k-hop_neighbours.png)We retrieve the data concerning this neighbourhood from Grakn. This information includes the *type hierarchy*, *roles*, and *attribute* values of each neighbouring Concept encountered. - -To create embeddings, we build a network in TensorFlow that successively aggregates and combines features from the K hops until a 'summary' representation remains - an embedding. In our example these embeddings are directly optimised to perform multi-class classification. This is achieved by passing the embeddings to a single subsequent dense layer and determining loss via softmax cross entropy with the labels retrieved. - -![Aggregation and Combination process](readme_images/aggregate_and_combine.png) - - - -## Usage by example - CITES Animal Trade Data - -### Quickstart +## Quickstart **Requirements:** -- Python 3.6.3 or higher -- kglib installed from pip: `pip install --extra-index-url https://test.pypi.org/simple/ grakn-kglib` -- The `grakn-animaltrade.zip` dataset from the [latest release](https://github.com/graknlabs/kglib/releases/latest). This is a dataset that has been pre-loaded into Grakn v1.5 (so you don't have to run the data import yourself), with two keyspaces: `animaltrade_train` and `animaltrade_test`. - -**To use:** +- Python 3.6.3 < version < 3.7 ([tensorflow doesn't yet support Python 3.7](https://github.com/tensorflow/tensorflow/issues/17022)) -- Prepare the data: +- kglib installed from pip: `pip install --extra-index-url https://test.pypi.org/simple/ grakn-kglib` - - If you already have an instance of Grakn running, make sure to stop it using `./grakn server stop` - - - Download the pre-loaded Grakn distribution from the [latest release](https://github.com/graknlabs/kglib/releases/latest) +### Usage - - Unzip the distribution `unzip grakn-animaltrade.zip `, where you store this doesn't matter +The following is a template of what must be defined in order to instantiate a KGCN, optimised for a downstream learning task of multi-class classification: - - cd into the distribution `cd grakn-animaltrade` - - - start Grakn `./grakn server start` +```python +import kglib.kgcn.models as models +import tensorflow as tf +import grakn - - Confirm that the training keyspace is present and contains data +URI = "localhost:48555" - `./grakn console -k animaltrade_train` +client = grakn.Grakn(uri=URI) +session = client.session(keyspace=training_keyspace) +transaction = session.transaction(grakn.TxType.WRITE) - `match $t isa traded-item; limit 1; get;` +kgcn = models.model.KGCN(NEIGHBOUR_SAMPLE_SIZES, + features_length, + starting_concepts_features_length, + aggregated_length, + output_length, + transaction, + batch_size, + buffer_size + ) - and then `exit` +optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) +classifier = models.downstream.SupervisedKGCNClassifier(kgcn, optimizer, num_classes, log_dir, + max_training_steps=max_training_steps) -- Run the `main` function of the example: +training_feed_dict = classifier.get_feed_dict(session, training_concepts, labels=training_labels) - `cd kglib` +classifier.train(training_feed_dict) - `python3 -m kglib.kgcn.examples.animal_trade.main` +transaction.close() +session.close() +``` - This will run the full pipeline: retrieving data, building and training a KGCN classifier +There is also a [full example](https://github.com/graknlabs/kglib/examples/kgcn/animal_trade) which outlines retrieving sample concepts with labels and working with separate keyspaces for training and testing. -#### Details +## Methodology -The CITES dataset details exchanges of animal-based products between countries. In this example we aim to predict the value of `appendix` for a set of samples. This `appendix` can be thought of as the level of endangerment that a `traded-item` is subject to, where `1` represents the highest level of endangerment, and `3` the lowest. +The ideology behind this project is described [here](https://blog.grakn.ai/knowledge-graph-convolutional-networks-machine-learning-over-reasoned-knowledge-9eb5ce5e0f68), and a [video of the presentation](https://youtu.be/Jx_Twc75ka0?t=368). The principles of the implementation are based on [GraphSAGE](http://snap.stanford.edu/graphsage/), from the Stanford SNAP group, made to work over a **knowledge graph**. Instead of working on a typical property graph, a KGCN learns from the context of a *typed hypergraph*, **Grakn**. Additionally, it learns from facts deduced by Grakn's *automated logical reasoner*. From this point onwards some understanding of [Grakn's docs](http://dev.grakn.ai) is assumed. -The [main](examples/animal_trade/main.py) function will: +#### How do KGCNs work? -- Search Grakn for 30 concepts (with attributes as labels) to use as the training set, 30 for the evaluation set, and 30 for the prediction set using queries such as (limiting the returned stream): +The purpose of this method is to derive embeddings for a set of Concepts (and thereby directly learn to classify them). We start by querying Grakn to find a set of labelled examples. Following that, we gather data about the neighbourhood of each example Concept. We do this by considering their *k-hop* neighbours. - ``` - match $e(exchanged-item: $traded-item) isa exchange, has appendix $appendix; $appendix 1; get; - ``` +![k-hop neighbours](readme_images/k-hop_neighbours.png)We retrieve the data concerning this neighbourhood from Grakn. This information includes the *type hierarchy*, *roles*, and *attribute* values of each neighbouring Concept encountered. - This searches for an `exchange` between countries that has an `appendix` (endangerment level) of `1`, and finds the `traded-item` that was exchanged +To create embeddings, we build a network in TensorFlow that successively aggregates and combines features from the K hops until a 'summary' representation remains - an embedding. In our example these embeddings are directly optimised to perform multi-class classification. This is achieved by passing the embeddings to a single subsequent dense layer and determining loss via softmax cross entropy with the labels retrieved. -- Save those labelled samples to file +![Aggregation and Combination process](readme_images/aggregate_and_combine.png) -- Delete all `appendix` attributes from both `animaltrade_train` and `animaltrade_test` keyspaces. This is the label we will predict in this example, so it should not be present in Grakn otherwise the network can cheat -- Search Grakn for the k-hop neighbours of the selected examples, and store information about them as arrays, demoted in the code as `raw_arrays`. This data is saved to file so that subsequent steps can be re-run without recomputing these data -- Build the TensorFlow computation graph using `model.KGCN`, including a multi-class classification step and learning procedure defined by `downstream.SupervisedKGCNClassifier` + -- Feed the `raw_arrays` to the TensorFlow graph, and perform learning \ No newline at end of file diff --git a/kglib/kgcn/examples/toy/main.py b/kglib/kgcn/examples/toy/main.py deleted file mode 100644 index ebad6dff..00000000 --- a/kglib/kgcn/examples/toy/main.py +++ /dev/null @@ -1,107 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -import time - -import grakn -import numpy as np -import tensorflow as tf - -import kglib.kgcn.models.model as model -import kglib.kgcn.neighbourhood.data.sampling.ordered as ordered -import kglib.kgcn.neighbourhood.data.sampling.sampler as samp -import kgcn.neighbourhood.schema.strategy as schema_strat - -flags = tf.app.flags -FLAGS = flags.FLAGS - -flags.DEFINE_boolean('debug', False, 'Enable debugging') -flags.DEFINE_float('learning_rate', 0.05, 'Learning rate') -flags.DEFINE_integer('classes_length', 3, 'Number of classes') -flags.DEFINE_integer('features_length', 192, 'Number of features after encoding') -flags.DEFINE_integer('starting_concepts_features_length', 4, ## 143, - 'Number of features after encoding for the nodes of interest, which excludes the features for ' - 'role_type and role_direction') -flags.DEFINE_integer('aggregated_length', 4, 'Length of aggregated representation of neighbours, a hidden dimension') -flags.DEFINE_integer('output_length', 4, 'Length of the output of "combine" operation, taking place at each depth, ' - 'and the final length of the embeddings') - -flags.DEFINE_integer('max_training_steps', 2500, 'Max number of gradient steps to take during gradient descent') - -TIMESTAMP = time.strftime("%Y-%m-%d_%H-%M-%S") -flags.DEFINE_string('log_dir', './out/out_' + TIMESTAMP, 'directory to use to store data from training') - - -def main(): - keyspace = 'toy' - uri = "localhost:48555" - - client = grakn.Grakn(uri=uri) - train_session = client.session(keyspace=keyspace) - tx = train_session.transaction(grakn.TxType.WRITE) - - label_types = ['A', 'B', 'C'] - - concepts = [] - labels = [] - - for label_type in label_types: - - target_concept_query = f"match $x($label) isa example; $label isa {label_type}; get;" - - answers = tx.query(target_concept_query) - new_concepts = [ans.get('x') for ans in answers] - - base_label = [0, 0, 0] - base_label[label_types.index(label_type)] = 1 - - concepts += new_concepts - labels += [base_label for _ in new_concepts] - - labels = np.array(labels, dtype=np.float32) - - neighbour_sample_sizes = (1,) - - sampling_method = ordered.ordered_sample - - samplers = [] - for sample_size in neighbour_sample_sizes: - samplers.append(samp.Sampler(sample_size, sampling_method, limit=sample_size + 1)) - - # Strategies - role_schema_strategy = schema_strat.SchemaRoleTraversalStrategy(include_implicit=False, include_metatypes=False) - thing_schema_strategy = schema_strat.SchemaThingTraversalStrategy(include_implicit=False, include_metatypes=False) - - traversal_strategies = {'role': role_schema_strategy, - 'thing': thing_schema_strategy} - - kgcn = model.KGCN(tx, traversal_strategies, samplers, features_to_exclude=['neighbour_data_type', - 'neighbour_value_long', - 'neighbour_value_double', - 'neighbour_value_boolean', - 'neighbour_value_date', - 'neighbour_value_string']) - - kgcn.train(tx, concepts, labels) - kgcn.evaluate(tx, concepts, labels) - # kgcn.predict(tx, concepts) - - -if __name__ == "__main__": - main() diff --git a/kglib/kgcn/examples/toy/schema.gql b/kglib/kgcn/examples/toy/schema.gql deleted file mode 100644 index 6f5d581b..00000000 --- a/kglib/kgcn/examples/toy/schema.gql +++ /dev/null @@ -1,39 +0,0 @@ -define -example sub relationship, - relates label-a, - relates label-b, - relates label-c; - -A sub entity, - plays label-a; - -B sub entity, - plays label-b; - -C sub entity, - plays label-c; - -insert -$e1(label-a: $a) isa example; $a isa A; -$e2(label-b: $b) isa example; $b isa B; -$e3(label-c: $c) isa example; $c isa C; - -insert -$e1(label-a: $a) isa example; $a isa A; -$e2(label-b: $b) isa example; $b isa B; -$e3(label-c: $c) isa example; $c isa C; - -insert -$e1(label-a: $a) isa example; $a isa A; -$e2(label-b: $b) isa example; $b isa B; -$e3(label-c: $c) isa example; $c isa C; - -insert -$e1(label-a: $a) isa example; $a isa A; -$e2(label-b: $b) isa example; $b isa B; -$e3(label-c: $c) isa example; $c isa C; - -insert -$e1(label-a: $a) isa example; $a isa A; -$e2(label-b: $b) isa example; $b isa B; -$e3(label-c: $c) isa example; $c isa C; diff --git a/kglib/kgcn/refactor.md b/kglib/kgcn/refactor.md deleted file mode 100644 index 9abb4cfa..00000000 --- a/kglib/kgcn/refactor.md +++ /dev/null @@ -1,104 +0,0 @@ - -1. Customise the learning parameters, in some clear way, including: - - numerical parameters - - strategies - - encoders - -*Pre-TensorFlow model* -These components are used repeatedly for each neighbour hop, but could require different parameters each -neighbour sampling params: - query limits, - sampling nature e.g. ordered, pseudo-random, random - -*Within Tensorflow model* -Name scoping? - -encoding parameters - encoder to user per-type - -normalisation parameters(?) - -These components are used repeatedly for each neighbour hop, but could require different parameters each - Aggregator parameters - Weight initialiser - Bias - Weight regulariser - Activation - Dropout - Layer Type (currently dense) - Reduction method - Combination parameters - Weight initialiser - Weight regularisers - - Normaliser parameters - -Loss method - -kgcn = KGCN(traversal_params={}, aggregation_params={'bias': False}, combination_params={}) -Any arguments provided here should override the default dict params - - - - -2. Direct supervised learning for: - - Unknown downstream learning (arbitrary user pipeline), with ready-made components: - - Attribute prediction - - Link prediction - -3. Generate unsupervised embeddings, subsequently perform either: - - Unknown downstream learning (arbitrary user pipeline), with ready-made components: - - Attribute prediction - - Link prediction - -4. Visualise the model/learning in TensorBoard - -5. Save the traversal output arrays to file in order to quickly iterate on the learning model - -6. Advanced: Customise the neural net design - -7. Low priority: Support learning outside TensorFlow, using other libraries etc - - - - -2. + 3. - -traverser = Traverser(params) - -traversals = traverser.traverse(concepts) - -# Do any saving/loading of traversals and labels to/from file - -embedder = Embedder(params) # This is agnostic to training, evaluation, prediction etc - -# Get the output tensors, e.g. embeddings, summary writers, initialisers etc -output_tensors = embedder.build() - -classifier = SimpleMultiClassClassifier() - -kgcn = KGCN(params) - -# Create embeddings tensor -embeddings = kgcn.get_embeddings() - -udl = UnknownDownstreamLearning(embeddings) -udl.train(concepts, labels, grakn_connection) - - -kgcn = SupervisedKGCNClassifier(params) -kgcn = KGCNMultiClassClassifier(Embedder(params), classifier_params) -kgcn = KGCN(params) -train_results = kgcn.train(concepts, labels) -eval_results = kgcn.eval(concepts, labels) -predictions = kgcn.predict(concepts, labels) - - - - - - - - - - From 4e788048d1cb2ae2d2e4305e4e0c1c6d803ba4ea Mon Sep 17 00:00:00 2001 From: James Fletcher Date: Thu, 31 Jan 2019 14:03:56 +0000 Subject: [PATCH 05/18] Adds a test for importing from the test pypi server, and an end-to-end test, used via bazel to test using local source and to test using the test pypi server --- BUILD | 72 ++++++++++ examples/kgcn/animal_trade/schema.gql | 4 +- .../kgcn/animal_trade/test/end_to_end_test.py | 136 ++++++++++++++++++ kglib/kgcn/models/downstream.py | 9 +- requirements.txt | 2 + test/import_test.py | 31 ++++ 6 files changed, 249 insertions(+), 5 deletions(-) create mode 100644 examples/kgcn/animal_trade/test/end_to_end_test.py create mode 100644 test/import_test.py diff --git a/BUILD b/BUILD index 42eaee36..5b290153 100644 --- a/BUILD +++ b/BUILD @@ -185,6 +185,78 @@ py_test( ] ) +py_test( + name = "import_test", + srcs = [ + "test/import_test.py" + ], + deps = [ + requirement('grakn-kglib'), + + # Grakn deps + requirement('grakn'), + requirement('grpcio'), + + # TensorFlow deps + requirement('tensorflow'), + requirement('numpy'), + requirement('protobuf'), + requirement('six'), + requirement('absl-py'), + requirement('keras_applications'), + requirement('keras_preprocessing'), + requirement('gast'), + requirement('astor'), + requirement('termcolor'), + + requirement('tensorflow-hub'), + requirement('scikit-learn'), + requirement('scipy') + ] +) + +py_test( + name = "test_pypi_end_to_end_test", + main = "end_to_end_test.py", + srcs = [ + "examples/kgcn/animal_trade/test/end_to_end_test.py" + ], + deps = [ + requirement('grakn-kglib'), + + # Grakn deps + requirement('grakn'), + requirement('grpcio'), + + # TensorFlow deps + requirement('tensorflow'), + requirement('numpy'), + requirement('protobuf'), + requirement('six'), + requirement('absl-py'), + requirement('keras_applications'), + requirement('keras_preprocessing'), + requirement('gast'), + requirement('astor'), + requirement('termcolor'), + + requirement('tensorflow-hub'), + requirement('scikit-learn'), + requirement('scipy') + ] +) + +py_test( + name = "local_end_to_end_test", + main = "end_to_end_test.py", + srcs = [ + "examples/kgcn/animal_trade/test/end_to_end_test.py" + ], + deps = [ + "kglib", + ] +) + py_library( name = "kglib", srcs = glob(['kglib/__init__.py', 'kglib/kgcn/**/*.py']), diff --git a/examples/kgcn/animal_trade/schema.gql b/examples/kgcn/animal_trade/schema.gql index e02c0b84..d89ba9ec 100644 --- a/examples/kgcn/animal_trade/schema.gql +++ b/examples/kgcn/animal_trade/schema.gql @@ -136,8 +136,8 @@ define relates originated-species; taxon-membership sub relationship, - relates member-item, - relates taxonomic-group; + relates member-item, + relates taxonomic-group; taxonomic-ranking when { diff --git a/examples/kgcn/animal_trade/test/end_to_end_test.py b/examples/kgcn/animal_trade/test/end_to_end_test.py new file mode 100644 index 00000000..e2e8b49a --- /dev/null +++ b/examples/kgcn/animal_trade/test/end_to_end_test.py @@ -0,0 +1,136 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +import os +import time +import unittest + +import grakn +import tensorflow as tf + +import kglib.kgcn.management.grakn as grakn_mgmt +import kglib.kgcn.management.samples as samp_mgmt +import kglib.kgcn.models.downstream as downstream +import kglib.kgcn.models.model as model +import kglib.kgcn.neighbourhood.data.sampling.random_sampling as random_sampling + +flags = tf.app.flags +FLAGS = flags.FLAGS + +# Learning params +flags.DEFINE_float('learning_rate', 0.01, 'Learning rate') +flags.DEFINE_integer('num_classes', 3, 'Number of classes') +flags.DEFINE_integer('features_length', 198, 'Number of features after encoding') +flags.DEFINE_integer('starting_concepts_features_length', 173, + 'Number of features after encoding for the nodes of interest, which excludes the features for ' + 'role_type and role_direction') +flags.DEFINE_integer('aggregated_length', 20, 'Length of aggregated representation of neighbours, a hidden dimension') +flags.DEFINE_integer('output_length', 32, 'Length of the output of "combine" operation, taking place at each depth, ' + 'and the final length of the embeddings') +flags.DEFINE_integer('max_training_steps', 50, 'Max number of gradient steps to take during gradient descent') + +# Sample selection params +EXAMPLES_QUERY = 'match $e(exchanged-item: $traded-item) isa exchange, has appendix $appendix; $appendix {}; get;' +LABEL_ATTRIBUTE_TYPE = 'appendix' +ATTRIBUTE_VALUES = [1, 2, 3] +EXAMPLE_CONCEPT_TYPE = 'traded-item' + +NUM_PER_CLASS = 5 +POPULATION_SIZE_PER_CLASS = 100 + +# Params for persisting to files +DIR = os.path.dirname(os.path.realpath(__file__)) +TIMESTAMP = time.strftime("%Y-%m-%d_%H-%M-%S") +# BASE_PATH = f'{DIR}/dataset/{NUM_PER_CLASS}_concepts/' +# flags.DEFINE_string('log_dir', BASE_PATH + 'out/out_' + TIMESTAMP, 'directory to use to store data from training') + +# SAVED_LABELS_PATH = BASE_PATH + 'labels/labels_{}.p' + +TRAIN = 'train' +EVAL = 'eval' +PREDICT = 'predict' + +KEYSPACES = { + TRAIN: "animaltrade_train", + EVAL: "animaltrade_train", + PREDICT: "animaltrade_train", +} + +URI = "localhost:48555" + +NEIGHBOUR_SAMPLE_SIZES = (2, 1) + + +class TestEndToEnd(unittest.TestCase): + def test_end_to_end(self): + modes = (TRAIN, EVAL) + + client = grakn.Grakn(uri=URI) + sessions = grakn_mgmt.get_sessions(client, KEYSPACES) + transactions = grakn_mgmt.get_transactions(sessions) + + batch_size = buffer_size = NUM_PER_CLASS * FLAGS.num_classes + kgcn = model.KGCN(NEIGHBOUR_SAMPLE_SIZES, + FLAGS.features_length, + FLAGS.starting_concepts_features_length, + FLAGS.aggregated_length, + FLAGS.output_length, + transactions[TRAIN], + batch_size, + buffer_size, + sampling_method=random_sampling.random_sample, + sampling_limit_factor=4 + ) + + optimizer = tf.train.GradientDescentOptimizer(learning_rate=FLAGS.learning_rate) + classifier = downstream.SupervisedKGCNClassifier(kgcn, optimizer, FLAGS.num_classes, None, + max_training_steps=FLAGS.max_training_steps) + + feed_dicts = {} + + sampling_params = { + TRAIN: {'sample_size': NUM_PER_CLASS, 'population_size': POPULATION_SIZE_PER_CLASS}, + EVAL: {'sample_size': NUM_PER_CLASS, 'population_size': POPULATION_SIZE_PER_CLASS}, + PREDICT: {'sample_size': NUM_PER_CLASS, 'population_size': POPULATION_SIZE_PER_CLASS}, + } + concepts, labels = samp_mgmt.compile_labelled_concepts(EXAMPLES_QUERY, EXAMPLE_CONCEPT_TYPE, + LABEL_ATTRIBUTE_TYPE, ATTRIBUTE_VALUES, + transactions[TRAIN], transactions[PREDICT], + sampling_params) + + for mode in modes: + feed_dicts[mode] = classifier.get_feed_dict(sessions[mode], concepts[mode], labels=labels[mode]) + + # Train + if TRAIN in modes: + print("\n\n********** TRAIN Keyspace **********") + classifier.train(feed_dicts[TRAIN]) + + # Eval + if EVAL in modes: + print("\n\n********** EVAL Keyspace **********") + # Presently, eval keyspace is the same as the TRAIN keyspace + classifier.eval(feed_dicts[EVAL]) + + grakn_mgmt.close(sessions) + grakn_mgmt.close(transactions) + + +if __name__ == "__main__": + unittest.main() diff --git a/kglib/kgcn/models/downstream.py b/kglib/kgcn/models/downstream.py index d2595381..cb2f83a4 100644 --- a/kglib/kgcn/models/downstream.py +++ b/kglib/kgcn/models/downstream.py @@ -33,6 +33,7 @@ def __init__(self, kgcn: kglib.kgcn.models.model.KGCN, optimizer, num_classes, l classification_kernel_initializer=tf.contrib.layers.xavier_initializer()): self._log_dir = log_dir + self._write_summary = self._log_dir is not None self._kgcn = kgcn self._optimizer = optimizer self._num_classes = num_classes @@ -92,7 +93,8 @@ def __init__(self, kgcn: kglib.kgcn.models.model.KGCN, optimizer, num_classes, l init_tables = tf.tables_initializer() # Instantiate a SummaryWriter to output summaries and the Graph. - self.summary_writer = tf.summary.FileWriter(self._log_dir, self.tf_session.graph) + if self._write_summary: + self.summary_writer = tf.summary.FileWriter(self._log_dir, self.tf_session.graph) # Run the Op to initialize the variables. self.tf_session.run(init_global) @@ -136,8 +138,9 @@ def train(self, feed_dict): self._predictions_class_winners, self._labels_winners]) summary_str = self.tf_session.run(self.summary, feed_dict=feed_dict) - self.summary_writer.add_summary(summary_str, step) - self.summary_writer.flush() + if self._write_summary: + self.summary_writer.add_summary(summary_str, step) + self.summary_writer.flush() if step % int(self._max_training_steps / 20) == 0: print(f'\n-----') print(f'Step {step}') diff --git a/requirements.txt b/requirements.txt index 8055a0a7..d6609fdb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ +--extra-index-url https://testpypi.python.org/pypi +grakn-kglib absl-py==0.5.0 astor==0.7.1 decorator==4.3.0 diff --git a/test/import_test.py b/test/import_test.py new file mode 100644 index 00000000..7e1f19c1 --- /dev/null +++ b/test/import_test.py @@ -0,0 +1,31 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +import unittest + + +class TestImport(unittest.TestCase): + def test_import_kgcn(self): + import kglib.kgcn.models.model as model + cls = model.KGCN + print(cls) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file From aa01428a80fd7d782f49bed10e5b31c885be2831 Mon Sep 17 00:00:00 2001 From: James Fletcher Date: Thu, 31 Jan 2019 14:09:06 +0000 Subject: [PATCH 06/18] Creates a new py_library that depends on the pypi test server rather than local source code --- BUILD | 58 ++++++++++++++++++++++++---------------------------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/BUILD b/BUILD index 5b290153..a739bd62 100644 --- a/BUILD +++ b/BUILD @@ -191,27 +191,7 @@ py_test( "test/import_test.py" ], deps = [ - requirement('grakn-kglib'), - - # Grakn deps - requirement('grakn'), - requirement('grpcio'), - - # TensorFlow deps - requirement('tensorflow'), - requirement('numpy'), - requirement('protobuf'), - requirement('six'), - requirement('absl-py'), - requirement('keras_applications'), - requirement('keras_preprocessing'), - requirement('gast'), - requirement('astor'), - requirement('termcolor'), - - requirement('tensorflow-hub'), - requirement('scikit-learn'), - requirement('scipy') + "test-pypi-kglib" ] ) @@ -222,8 +202,25 @@ py_test( "examples/kgcn/animal_trade/test/end_to_end_test.py" ], deps = [ - requirement('grakn-kglib'), + "test-pypi-kglib" + ] +) + +py_test( + name = "local_end_to_end_test", + main = "end_to_end_test.py", + srcs = [ + "examples/kgcn/animal_trade/test/end_to_end_test.py" + ], + deps = [ + "kglib", + ] +) +py_library( + name = "kglib", + srcs = glob(['kglib/__init__.py', 'kglib/kgcn/**/*.py']), + deps = [ # Grakn deps requirement('grakn'), requirement('grpcio'), @@ -242,25 +239,18 @@ py_test( requirement('tensorflow-hub'), requirement('scikit-learn'), - requirement('scipy') + requirement('scipy'), ] ) -py_test( - name = "local_end_to_end_test", - main = "end_to_end_test.py", +py_library( + name = "test-pypi-kglib", srcs = [ "examples/kgcn/animal_trade/test/end_to_end_test.py" ], deps = [ - "kglib", - ] -) - -py_library( - name = "kglib", - srcs = glob(['kglib/__init__.py', 'kglib/kgcn/**/*.py']), - deps = [ + requirement('grakn-kglib'), + # Grakn deps requirement('grakn'), requirement('grpcio'), From 72325510d0e68bfe4ebf3fd8b5fcca413d63f19c Mon Sep 17 00:00:00 2001 From: James Fletcher Date: Thu, 31 Jan 2019 16:26:49 +0000 Subject: [PATCH 07/18] Separates bazel commands into more BUILD files such that kglib tests and example end-to-end tests can be fun separately --- BUILD | 211 ------------------------------------------------- examples/BUILD | 64 +++++++++++++++ kglib/BUILD | 153 +++++++++++++++++++++++++++++++++++ 3 files changed, 217 insertions(+), 211 deletions(-) create mode 100644 examples/BUILD create mode 100644 kglib/BUILD diff --git a/BUILD b/BUILD index a739bd62..728c5eb6 100644 --- a/BUILD +++ b/BUILD @@ -61,214 +61,3 @@ deploy_pip( ], target = ":kglib" ) - -py_test( - name = "ordered_test", - srcs = [ - "kglib/kgcn/neighbourhood/data/sampling/ordered_test.py" - ], - deps = [ - "kglib" - ], -) - -py_test( - name = "random_sampling_test", - srcs = [ - "kglib/kgcn/neighbourhood/data/sampling/random_sampling_test.py" - ], - deps = [ - "kglib", - ] -) - -py_test( - name = "label_extraction_test", - srcs = [ - "kglib/kgcn/use_cases/attribute_prediction/label_extraction_test.py" - ], - deps = [ - "kglib", - ] -) - -py_test( - name = "metrics_test", - srcs = [ - "kglib/kgcn/models/metrics_test.py" - ], - deps = [ - "kglib", - ] -) - -py_test( - name = "tf_hub_test", - srcs = [ - "kglib/kgcn/encoder/tf_hub_test.py" - ], - deps = [ - "kglib", - ] -) - -py_test( - name = "schema_test", - srcs = [ - "kglib/kgcn/encoder/schema_test.py" - ], - deps = [ - "kglib", - ] -) - -py_test( - name = "encode_test", - srcs = [ - "kglib/kgcn/encoder/encode_test.py" - ], - deps = [ - "kglib", - ] -) - -py_test( - name = "boolean_test", - srcs = [ - "kglib/kgcn/encoder/boolean_test.py" - ], - deps = [ - "kglib", - ] -) - -py_test( - name = "data_traversal_test", - main = "traversal_test.py", - srcs = [ - "kglib/kgcn/neighbourhood/data/traversal_test.py" - ], - deps = [ - "kglib", - ] -) - -py_test( - name = "data_executor_test", - main = "executor_test.py", - srcs = [ - "kglib/kgcn/neighbourhood/data/executor_test.py" - ], - deps = [ - "kglib", - ] -) - -py_test( - name = "schema_traversal_test", - main = "traversal_test.py", - srcs = [ - "kglib/kgcn/neighbourhood/schema/traversal_test.py" - ], - deps = [ - "kglib", - ] -) - -py_test( - name = "raw_array_builder_test", - srcs = [ - "kglib/kgcn/preprocess/raw_array_builder_test.py" - ], - deps = [ - "kglib", - ] -) - -py_test( - name = "import_test", - srcs = [ - "test/import_test.py" - ], - deps = [ - "test-pypi-kglib" - ] -) - -py_test( - name = "test_pypi_end_to_end_test", - main = "end_to_end_test.py", - srcs = [ - "examples/kgcn/animal_trade/test/end_to_end_test.py" - ], - deps = [ - "test-pypi-kglib" - ] -) - -py_test( - name = "local_end_to_end_test", - main = "end_to_end_test.py", - srcs = [ - "examples/kgcn/animal_trade/test/end_to_end_test.py" - ], - deps = [ - "kglib", - ] -) - -py_library( - name = "kglib", - srcs = glob(['kglib/__init__.py', 'kglib/kgcn/**/*.py']), - deps = [ - # Grakn deps - requirement('grakn'), - requirement('grpcio'), - - # TensorFlow deps - requirement('tensorflow'), - requirement('numpy'), - requirement('protobuf'), - requirement('six'), - requirement('absl-py'), - requirement('keras_applications'), - requirement('keras_preprocessing'), - requirement('gast'), - requirement('astor'), - requirement('termcolor'), - - requirement('tensorflow-hub'), - requirement('scikit-learn'), - requirement('scipy'), - ] -) - -py_library( - name = "test-pypi-kglib", - srcs = [ - "examples/kgcn/animal_trade/test/end_to_end_test.py" - ], - deps = [ - requirement('grakn-kglib'), - - # Grakn deps - requirement('grakn'), - requirement('grpcio'), - - # TensorFlow deps - requirement('tensorflow'), - requirement('numpy'), - requirement('protobuf'), - requirement('six'), - requirement('absl-py'), - requirement('keras_applications'), - requirement('keras_preprocessing'), - requirement('gast'), - requirement('astor'), - requirement('termcolor'), - - requirement('tensorflow-hub'), - requirement('scikit-learn'), - requirement('scipy'), - ] -) \ No newline at end of file diff --git a/examples/BUILD b/examples/BUILD new file mode 100644 index 00000000..9da21e84 --- /dev/null +++ b/examples/BUILD @@ -0,0 +1,64 @@ +load("@io_bazel_rules_python//python:python.bzl", "py_library", "py_test") +load("@pypi_dependencies//:requirements.bzl", "requirement") + +py_test( + name = "import_test", + srcs = [ + "test/import_test.py" + ], + deps = [ + "test-pypi-kglib" + ] +) + +py_test( + name = "test_pypi_end_to_end_test", + main = "end_to_end_test.py", + srcs = [ + "kgcn/animal_trade/test/end_to_end_test.py" + ], + deps = [ + "test-pypi-kglib" + ] +) + +py_test( + name = "local_end_to_end_test", + main = "end_to_end_test.py", + srcs = [ + "kgcn/animal_trade/test/end_to_end_test.py" + ], + deps = [ + "//kglib:kglib", + ] +) + +py_library( + name = "test-pypi-kglib", + srcs = [ + "kgcn/animal_trade/test/end_to_end_test.py" + ], + deps = [ + requirement('grakn-kglib'), + + # Grakn deps + requirement('grakn'), + requirement('grpcio'), + + # TensorFlow deps + requirement('tensorflow'), + requirement('numpy'), + requirement('protobuf'), + requirement('six'), + requirement('absl-py'), + requirement('keras_applications'), + requirement('keras_preprocessing'), + requirement('gast'), + requirement('astor'), + requirement('termcolor'), + + requirement('tensorflow-hub'), + requirement('scikit-learn'), + requirement('scipy'), + ] +) \ No newline at end of file diff --git a/kglib/BUILD b/kglib/BUILD new file mode 100644 index 00000000..3ed6ce19 --- /dev/null +++ b/kglib/BUILD @@ -0,0 +1,153 @@ +load("@io_bazel_rules_python//python:python.bzl", "py_library", "py_test") +load("@pypi_dependencies//:requirements.bzl", "requirement") + + +py_test( + name = "ordered_test", + srcs = [ + "kgcn/neighbourhood/data/sampling/ordered_test.py" + ], + deps = [ + "kglib" + ], +) + +py_test( + name = "random_sampling_test", + srcs = [ + "kgcn/neighbourhood/data/sampling/random_sampling_test.py" + ], + deps = [ + "kglib", + ] +) + +py_test( + name = "label_extraction_test", + srcs = [ + "kgcn/use_cases/attribute_prediction/label_extraction_test.py" + ], + deps = [ + "kglib", + ] +) + +py_test( + name = "metrics_test", + srcs = [ + "kgcn/models/metrics_test.py" + ], + deps = [ + "kglib", + ] +) + +py_test( + name = "tf_hub_test", + srcs = [ + "kgcn/encoder/tf_hub_test.py" + ], + deps = [ + "kglib", + ] +) + +py_test( + name = "schema_test", + srcs = [ + "kgcn/encoder/schema_test.py" + ], + deps = [ + "kglib", + ] +) + +py_test( + name = "encode_test", + srcs = [ + "kgcn/encoder/encode_test.py" + ], + deps = [ + "kglib", + ] +) + +py_test( + name = "boolean_test", + srcs = [ + "kgcn/encoder/boolean_test.py" + ], + deps = [ + "kglib", + ] +) + +py_test( + name = "data_traversal_test", + main = "traversal_test.py", + srcs = [ + "kgcn/neighbourhood/data/traversal_test.py" + ], + deps = [ + "kglib", + ] +) + +py_test( + name = "data_executor_test", + main = "executor_test.py", + srcs = [ + "kgcn/neighbourhood/data/executor_test.py" + ], + deps = [ + "kglib", + ] +) + +py_test( + name = "schema_traversal_test", + main = "traversal_test.py", + srcs = [ + "kgcn/neighbourhood/schema/traversal_test.py" + ], + deps = [ + "kglib", + ] +) + +py_test( + name = "raw_array_builder_test", + srcs = [ + "kgcn/preprocess/raw_array_builder_test.py" + ], + deps = [ + "kglib", + ] +) + +py_library( + name = "kglib", + srcs = glob(['__init__.py', 'kgcn/**/*.py']), + deps = [ + # Grakn deps + requirement('grakn'), + requirement('grpcio'), + + # TensorFlow deps + requirement('tensorflow'), + requirement('numpy'), + requirement('protobuf'), + requirement('six'), + requirement('absl-py'), + requirement('keras_applications'), + requirement('keras_preprocessing'), + requirement('gast'), + requirement('astor'), + requirement('termcolor'), + + requirement('tensorflow-hub'), + requirement('scikit-learn'), + requirement('scipy'), + ], + visibility=['//visibility:public'] +) \ No newline at end of file From d35bfcf8deb46d1238991ce3691ce6958e424c13 Mon Sep 17 00:00:00 2001 From: James Fletcher Date: Thu, 31 Jan 2019 16:29:19 +0000 Subject: [PATCH 08/18] Removes loacl end-to-end test on the grounds that it has an undesirable dependency for little gain --- examples/BUILD | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/examples/BUILD b/examples/BUILD index 9da21e84..35175b04 100644 --- a/examples/BUILD +++ b/examples/BUILD @@ -22,17 +22,6 @@ py_test( ] ) -py_test( - name = "local_end_to_end_test", - main = "end_to_end_test.py", - srcs = [ - "kgcn/animal_trade/test/end_to_end_test.py" - ], - deps = [ - "//kglib:kglib", - ] -) - py_library( name = "test-pypi-kglib", srcs = [ From a25136ca2f23978a43f36ded7825dbe3a5a3353b Mon Sep 17 00:00:00 2001 From: James Fletcher Date: Thu, 31 Jan 2019 16:30:16 +0000 Subject: [PATCH 09/18] Removes import_test as redundant given the end-to-end test --- examples/BUILD | 10 ---------- test/import_test.py | 31 ------------------------------- 2 files changed, 41 deletions(-) delete mode 100644 test/import_test.py diff --git a/examples/BUILD b/examples/BUILD index 35175b04..12b6e099 100644 --- a/examples/BUILD +++ b/examples/BUILD @@ -1,16 +1,6 @@ load("@io_bazel_rules_python//python:python.bzl", "py_library", "py_test") load("@pypi_dependencies//:requirements.bzl", "requirement") -py_test( - name = "import_test", - srcs = [ - "test/import_test.py" - ], - deps = [ - "test-pypi-kglib" - ] -) - py_test( name = "test_pypi_end_to_end_test", main = "end_to_end_test.py", diff --git a/test/import_test.py b/test/import_test.py deleted file mode 100644 index 7e1f19c1..00000000 --- a/test/import_test.py +++ /dev/null @@ -1,31 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -import unittest - - -class TestImport(unittest.TestCase): - def test_import_kgcn(self): - import kglib.kgcn.models.model as model - cls = model.KGCN - print(cls) - - -if __name__ == "__main__": - unittest.main() \ No newline at end of file From e7722d29a64019bc60ee42d0a767dec513426587 Mon Sep 17 00:00:00 2001 From: James Fletcher Date: Fri, 1 Feb 2019 13:29:08 +0000 Subject: [PATCH 10/18] Adds the dependency of end-to-end test upon tha animaltrade grakn dist. Setup needs to be added to the test itself. --- BUILD | 2 +- WORKSPACE | 11 ++++++++++- examples/BUILD | 5 ++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/BUILD b/BUILD index 728c5eb6..98f0d203 100644 --- a/BUILD +++ b/BUILD @@ -59,5 +59,5 @@ deploy_pip( deployment_requirement("webencodings"), deployment_requirement("six"), ], - target = ":kglib" + target = "//kglib:kglib" ) diff --git a/WORKSPACE b/WORKSPACE index 49e7ef3f..928afa61 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -35,4 +35,13 @@ pip3_import( requirements = "//:deployment/requirements.txt", ) load("@pypi_deployment_dependencies//:requirements.bzl", "pip_install") -pip_install() \ No newline at end of file +pip_install() + + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_file") + +http_file( + name = "animaltrade_dist", + urls = ["https://github.com/graknlabs/kglib/releases/download/v0.1a1/grakn-animaltrade.zip", # TODO How to update to the latest relase each time? + ] +) \ No newline at end of file diff --git a/examples/BUILD b/examples/BUILD index 12b6e099..0665efa5 100644 --- a/examples/BUILD +++ b/examples/BUILD @@ -8,7 +8,10 @@ py_test( "kgcn/animal_trade/test/end_to_end_test.py" ], deps = [ - "test-pypi-kglib" + "test-pypi-kglib", + ], + data = [ + "@animaltrade_dist//file", ] ) From 0f0f6bdfdd27a321131ad8d5587b318609550add Mon Sep 17 00:00:00 2001 From: James Fletcher Date: Fri, 1 Feb 2019 15:09:44 +0000 Subject: [PATCH 11/18] Unzips Grakn dist from a release and starts grakn during end-to-end test --- examples/kgcn/animal_trade/test/end_to_end_test.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/examples/kgcn/animal_trade/test/end_to_end_test.py b/examples/kgcn/animal_trade/test/end_to_end_test.py index e2e8b49a..4b2d9d74 100644 --- a/examples/kgcn/animal_trade/test/end_to_end_test.py +++ b/examples/kgcn/animal_trade/test/end_to_end_test.py @@ -18,6 +18,7 @@ # import os +import subprocess as sub import time import unittest @@ -78,7 +79,15 @@ class TestEndToEnd(unittest.TestCase): + def test_end_to_end(self): + # Unzip the Grakn distribution containing our data + sub.run(['unzip', 'external/animaltrade_dist/file/downloaded', '-d', + 'external/animaltrade_dist/file/downloaded-unzipped']) + + # Start Grakn + sub.run(['external/animaltrade_dist/file/downloaded-unzipped/grakn-animaltrade/grakn', 'server', 'start']) + modes = (TRAIN, EVAL) client = grakn.Grakn(uri=URI) From ce9e8d7df4e1784255cb5decd8e38a2cfca25d5d Mon Sep 17 00:00:00 2001 From: James Fletcher Date: Fri, 1 Feb 2019 15:16:30 +0000 Subject: [PATCH 12/18] Ignores .ijwb directory --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 8670dbf5..7a370252 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ # Bazel files bazel-* +# IntelliJ Bazel project +.ijwb/ + # Compiled class file *.class From f5270231ef6a76c6dcf4081628d690dd8e9f551d Mon Sep 17 00:00:00 2001 From: James Fletcher Date: Fri, 1 Feb 2019 15:26:23 +0000 Subject: [PATCH 13/18] Add end-to-end test to CI --- .circleci/config.yml | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index f5008ccb..3bc8f598 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -23,7 +23,7 @@ jobs: - run: unzip grakn-core-1.4.3.zip - run: nohup grakn-core-1.4.3/grakn server start - run: grakn-core-1.4.3/graql console -k test_schema -f kglib/kgcn/test_data/schema.gql - - run: bazel test //... --test_output=streamed --force_python PY3 --python_path $(which python) + - run: bazel test //kglib/... --test_output=streamed --force_python PY3 --python_path $(which python) test-deploy-pip: machine: true @@ -37,7 +37,17 @@ jobs: - run: date +%s > VERSION - run: cat VERSION - run: bazel run //:deploy-pip -- test $PYPI_TEST_SERVER_USERNAME $PYPI_TEST_SERVER_PASSWORD - - run: pip install --extra-index-url https://test.pypi.org/simple/ grakn-kglib + + end-to-end-test: + machine: true + working_directory: ~/kglib + steps: + - checkout + - bazel_install + - run: sudo apt-get update + - run: pyenv install 3.6.3 + - run: pyenv global 3.6.3 + - run: bazel test //examples:test_pypi_end_to_end_test --test_output=streamed --force_python PY3 --python_path $(which python) --spawn_strategy=standalone deploy-git: machine: true @@ -71,10 +81,12 @@ workflows: - test-deploy-pip: requires: - test + - end-to-end-test: + - requires: test-deploy-pip - approve-deploy-git: type: approval requires: - - test-deploy-pip + - end-to-end-test - deploy-git: requires: - approve-deploy-git From 17369bdcdae5a8511641257366e9c158326d506d Mon Sep 17 00:00:00 2001 From: James Fletcher Date: Fri, 1 Feb 2019 15:36:10 +0000 Subject: [PATCH 14/18] Fixes CI config file --- .circleci/config.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 3bc8f598..49c8fab3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -82,7 +82,8 @@ workflows: requires: - test - end-to-end-test: - - requires: test-deploy-pip + requires: + - test-deploy-pip - approve-deploy-git: type: approval requires: From 7a229b5eaa0ccedea74219fd7ae104738e8faf9d Mon Sep 17 00:00:00 2001 From: James Fletcher Date: Fri, 1 Feb 2019 16:04:12 +0000 Subject: [PATCH 15/18] Improvements to READMEs --- examples/kgcn/animal_trade/README.md | 17 ++++++++++++++--- kglib/kgcn/README.md | 2 +- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/examples/kgcn/animal_trade/README.md b/examples/kgcn/animal_trade/README.md index 906c0612..887d06bf 100644 --- a/examples/kgcn/animal_trade/README.md +++ b/examples/kgcn/animal_trade/README.md @@ -15,9 +15,9 @@ - If you already have an instance of Grakn running, make sure to stop it using `./grakn server stop` - - Download the pre-loaded Grakn distribution from the [latest release](https://github.com/graknlabs/kglib/releases/latest) + - Download `grakn-animaltrade.zip` from the [latest release](https://github.com/graknlabs/kglib/releases/latest). This is a Grakn distribution, pre-loaded with the CITES dataset - - Unzip the distribution `unzip grakn-animaltrade.zip `, where you store this doesn't matter + - Unzip the distribution `unzip grakn-animaltrade.zip`, where you store this doesn't matter - cd into the distribution `cd grakn-animaltrade` @@ -61,4 +61,15 @@ The [main](../../examples/kgcn/animal_trade/main.py) function will: - Build the TensorFlow computation graph using `model.KGCN`, including a multi-class classification step and learning procedure defined by `downstream.SupervisedKGCNClassifier` -- Feed the `raw_arrays` to the TensorFlow graph, and perform learning \ No newline at end of file +- Feed the `raw_arrays` to the TensorFlow graph, and perform learning + +##### Re-training the model +Re-running the `main` function will make use of the `feed_dicts` previously saved to file (at `dataset/10_concepts/input`), and so will repeat `classifier.train(feed_dicts[TRAIN])`, `classifier.eval(feed_dicts[EVAL])` and `classifier.eval(feed_dicts[PREDICT])` over the exact same data as previously retrieved. Therefore, to play with the learning parameters, do so and then simply re-run `main`. + +##### Re-generating the `feed_dicts` +To re-generate the `feed_dicts`, delete the saved files in `dataset/10_concepts/input`. + +##### Picking new samples +To pick different sample concepts to use for training/evaluation/prediction you need to: +- Force the `feed-dict`s to re-generate by deleting the saved files (as above) +- Use a fresh version of `grakn-animaltrade`, since the present one has had the supervised labels deleted! \ No newline at end of file diff --git a/kglib/kgcn/README.md b/kglib/kgcn/README.md index da28333c..185b20fd 100644 --- a/kglib/kgcn/README.md +++ b/kglib/kgcn/README.md @@ -25,7 +25,7 @@ client = grakn.Grakn(uri=URI) session = client.session(keyspace=training_keyspace) transaction = session.transaction(grakn.TxType.WRITE) -kgcn = models.model.KGCN(NEIGHBOUR_SAMPLE_SIZES, +kgcn = models.model.KGCN(neighbour_sample_sizes, features_length, starting_concepts_features_length, aggregated_length, From 50520f896eaabb8116d3564625c35ad840c61065 Mon Sep 17 00:00:00 2001 From: James Fletcher Date: Fri, 1 Feb 2019 17:25:11 +0000 Subject: [PATCH 16/18] Re-adds local end-to-end test --- examples/BUILD | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/examples/BUILD b/examples/BUILD index 0665efa5..cee59bfa 100644 --- a/examples/BUILD +++ b/examples/BUILD @@ -15,6 +15,21 @@ py_test( ] ) +py_test( + name = "local_end_to_end_test", + main = "end_to_end_test.py", + srcs = [ + "kgcn/animal_trade/test/end_to_end_test.py" + ], + deps = [ + "//kglib:kglib", + ], + data = [ + "@animaltrade_dist//file", + ] +) + + py_library( name = "test-pypi-kglib", srcs = [ From 1e819f2bc7dc58ca6edc81d7219fdbda9b9e4a3f Mon Sep 17 00:00:00 2001 From: James Fletcher Date: Fri, 1 Feb 2019 17:27:01 +0000 Subject: [PATCH 17/18] Edit to deployment/requirements.txt to detail transitive dependency issue --- deployment/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deployment/requirements.txt b/deployment/requirements.txt index 039eb9b2..284ce1e7 100644 --- a/deployment/requirements.txt +++ b/deployment/requirements.txt @@ -8,9 +8,9 @@ pkginfo==1.5.0.1 readme-renderer==24.0 requests-toolbelt==0.8.0 requests==2.21.0 -setuptools>=38.6.0 tqdm==4.29.1 twine==1.12.1 +setuptools==40.7.2 # Setuptools 40.7.2 is brought in automatically by twine regardless, so insluding it here is misleading. Including a different version than twine brings proves to be fatal, as there are two versions, one from twine and one from us. urllib3==1.24.1 webencodings==0.5.1 -wheel==0.32.3 \ No newline at end of file +wheel==0.32.3 From 3598d66765a32b316f3763763528e8157a3cf646 Mon Sep 17 00:00:00 2001 From: Max Vorobev Date: Mon, 4 Feb 2019 14:57:14 +0300 Subject: [PATCH 18/18] Fixes broken PyPI deployment --- WORKSPACE | 4 ++-- deployment/requirements.txt | 16 ---------------- 2 files changed, 2 insertions(+), 18 deletions(-) delete mode 100644 deployment/requirements.txt diff --git a/WORKSPACE b/WORKSPACE index 928afa61..2c8c13d4 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -14,7 +14,7 @@ git_repository( git_repository( name="graknlabs_bazel_distribution", remote="https://github.com/graknlabs/bazel-distribution", - commit="2e932a2555d1e43f75c8ee676c926399bd12f240" + commit="ebc9ae9e6d4ef0086d1c6731bf6f5f8a8f40b509" ) ## Only needed for PIP support: @@ -32,7 +32,7 @@ pip_install() pip3_import( name = "pypi_deployment_dependencies", - requirements = "//:deployment/requirements.txt", + requirements = "@graknlabs_bazel_distribution//pip:requirements.txt" ) load("@pypi_deployment_dependencies//:requirements.bzl", "pip_install") pip_install() diff --git a/deployment/requirements.txt b/deployment/requirements.txt deleted file mode 100644 index 284ce1e7..00000000 --- a/deployment/requirements.txt +++ /dev/null @@ -1,16 +0,0 @@ -Pygments==2.3.1 -bleach==3.1.0 -certifi==2018.11.29 -chardet==3.0.4 -docutils==0.14 -idna==2.8 -pkginfo==1.5.0.1 -readme-renderer==24.0 -requests-toolbelt==0.8.0 -requests==2.21.0 -tqdm==4.29.1 -twine==1.12.1 -setuptools==40.7.2 # Setuptools 40.7.2 is brought in automatically by twine regardless, so insluding it here is misleading. Including a different version than twine brings proves to be fatal, as there are two versions, one from twine and one from us. -urllib3==1.24.1 -webencodings==0.5.1 -wheel==0.32.3