From d1ac7dd7e5c833084e319ee815ba985a505b30e2 Mon Sep 17 00:00:00 2001 From: TNO-Knowlege-Based-Systems <59829021+TNO-Knowlege-Based-Systems@users.noreply.github.com> Date: Tue, 21 Jan 2020 10:07:30 +0100 Subject: [PATCH 1/6] Add model saver and inference function to learn.py Save model after training and be able to reload it for inference in the infer function. Enables using the model on a test set. --- kglib/kgcn/learn/learn.py | 57 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/kglib/kgcn/learn/learn.py b/kglib/kgcn/learn/learn.py index f53757fb..618e8c40 100644 --- a/kglib/kgcn/learn/learn.py +++ b/kglib/kgcn/learn/learn.py @@ -25,15 +25,24 @@ from kglib.kgcn.learn.loss import loss_ops_preexisting_no_penalty from kglib.kgcn.learn.metrics import existence_accuracy +from graph_nets import utils_np +from graph_nets.graphs import GraphsTuple + class KGCNLearner: """ Responsible for running a KGCN model """ - def __init__(self, model, num_processing_steps_tr=10, num_processing_steps_ge=10): + def __init__(self, model, num_processing_steps_tr=10, num_processing_steps_ge=10, save_fle="save_model.txt", reload_fle=''): + """Args: + save_fle: Name to save the trained model to. + reload_fle: Name to load saved model from, when doing inference. + """ self._model = model self._num_processing_steps_tr = num_processing_steps_tr self._num_processing_steps_ge = num_processing_steps_ge + self.save_fle = save_fle + self.reload_fle = reload_fle def __call__(self, tr_input_graphs, @@ -102,6 +111,7 @@ def __call__(self, train_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) + model_saver = tf.train.Saver() logged_iterations = [] losses_tr = [] @@ -171,6 +181,51 @@ def __call__(self, "outputs": output_ops_tr }, feed_dict=feed_dict) + + # Train the model and save it in the end + if not self.save_fle.is_dir(): + model_saver.save(sess, self.save_fle.as_posix()) + tf.train.write_graph(sess.graph.as_graph_def(), logdir=self.save_fle.parent.as_posix(), name=self.save_fle.with_suffix('.pbtxt').as_posix(), as_text=True) training_info = logged_iterations, losses_tr, losses_ge, corrects_tr, corrects_ge, solveds_tr, solveds_ge return train_values, test_values, training_info + + # New function to infer / apply without training + # Inspired from: https://medium.com/@prasadpal107/saving-freezing-optimizing-for-inference-restoring-of-tensorflow-models-b4146deb21b5 + def infer(self, + input_graphs, + target_graphs, log_dir): + + input_ph, target_ph = create_placeholders(input_graphs, target_graphs) + input_ph, target_ph = make_all_runnable_in_session(input_ph, target_ph) + output_ops_ge = self._model(input_ph, self._num_processing_steps_ge) + saver = tf.train.import_meta_graph(self.reload_fle.as_posix() + '.meta') + + sess = tf.Session() + sess.run(tf.global_variables_initializer()) + tf.reset_default_graph() + with sess.as_default(): + if not self.reload_fle.is_dir(): + saver.restore(sess, self.reload_fle.as_posix()) + else: + print("no file found, restoring failed") + + input_graphs_tuple = utils_np.networkxs_to_graphs_tuple(input_graphs) + target_graphs_tuple = utils_np.networkxs_to_graphs_tuple(target_graphs) + feed_dict = { + input_ph: input_graphs_tuple, + target_ph: target_graphs_tuple, + } + test_values = sess.run( + { + "target": target_ph, + "outputs": output_ops_ge, + }, + feed_dict=feed_dict) + + correct_ge, solved_ge = existence_accuracy( + test_values["target"], test_values["outputs"][-1], use_edges=False) + + testing_info = 0, 0, 0, 0, [correct_ge], 0, [solved_ge] + + return test_values, testing_info From 7720b757ae9846cba05059d2fe17d52f45d36d1b Mon Sep 17 00:00:00 2001 From: TNO-Knowlege-Based-Systems <59829021+TNO-Knowlege-Based-Systems@users.noreply.github.com> Date: Tue, 21 Jan 2020 10:14:05 +0100 Subject: [PATCH 2/6] Saving & reloading models in pipeline.py Adjusted pipeline.py to be able to save and restore trained models. --- kglib/kgcn/pipeline/pipeline.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/kglib/kgcn/pipeline/pipeline.py b/kglib/kgcn/pipeline/pipeline.py index 8d73f372..db324f58 100644 --- a/kglib/kgcn/pipeline/pipeline.py +++ b/kglib/kgcn/pipeline/pipeline.py @@ -44,7 +44,10 @@ def pipeline(graphs, attr_embedding_dim=6, edge_output_size=3, node_output_size=3, - output_dir=None): + output_dir=None, + do_test=False, + save_fle="test_model.ckpt", + reload_fle=""): ############################################################ # Manipulate the graph data @@ -82,18 +85,28 @@ def pipeline(graphs, node_output_size=node_output_size) learner = KGCNLearner(kgcn, - num_processing_steps_tr=num_processing_steps_tr, - num_processing_steps_ge=num_processing_steps_ge) - - train_values, test_values, tr_info = learner(tr_input_graphs, + num_processing_steps_tr=num_processing_steps_tr, # These processing steps indicate how many message-passing iterations to do for every training / testing step + num_processing_steps_ge=num_processing_steps_ge, + save_fle=output_dir / save_fle, + reload_fle=output_dir / reload_fle) + + # only test + if not (output_dir / reload_fle).is_dir() and do_test == True: + test_values, tr_info = learner.test(ge_input_graphs, + ge_target_graphs, + log_dir=output_dir) + # train + else: + train_values, test_values, tr_info = learner(input_graphs, tr_target_graphs, ge_input_graphs, ge_target_graphs, num_training_iterations=num_training_iterations, log_dir=output_dir) - plot_across_training(*tr_info, output_file=f'{output_dir}learning.png') - plot_predictions(graphs[tr_ge_split:], test_values, num_processing_steps_ge, output_file=f'{output_dir}graph.png') + + plot_across_training(*tr_info, output_file=f'{output_dir}/learning.png') + plot_predictions(graphs[tr_ge_split:], test_values, num_processing_steps_ge, output_file=f'{output_dir}/graph.png') logit_graphs = graphs_tuple_to_networkxs(test_values["outputs"][-1]) From f50aa669e8cff5b278da33db83a6f45bfc6240ca Mon Sep 17 00:00:00 2001 From: Fieke Hillerstrom Date: Wed, 4 Mar 2020 15:01:42 +0100 Subject: [PATCH 3/6] test --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 6267b321..c3bd09e6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,3 +33,4 @@ tensorflow-probability==0.7.0 termcolor==1.1.0 Werkzeug==0.15.6 wrapt==1.11.2 + From 1c96330bef5054176e2a24d300a8585ba738c876 Mon Sep 17 00:00:00 2001 From: Fieke Hillerstrom Date: Thu, 5 Mar 2020 15:34:55 +0100 Subject: [PATCH 4/6] Bug fix for using strings instead of pathlib --- kglib/kgcn/pipeline/pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kglib/kgcn/pipeline/pipeline.py b/kglib/kgcn/pipeline/pipeline.py index db324f58..fa7e33b4 100644 --- a/kglib/kgcn/pipeline/pipeline.py +++ b/kglib/kgcn/pipeline/pipeline.py @@ -87,8 +87,8 @@ def pipeline(graphs, learner = KGCNLearner(kgcn, num_processing_steps_tr=num_processing_steps_tr, # These processing steps indicate how many message-passing iterations to do for every training / testing step num_processing_steps_ge=num_processing_steps_ge, - save_fle=output_dir / save_fle, - reload_fle=output_dir / reload_fle) + save_fle=f'{output_dir}/{save_fle}', + reload_fle=f'{output_dir}/{reload_fle}') # only test if not (output_dir / reload_fle).is_dir() and do_test == True: From e50c55dfb939e9c609a898c3cb90ddf513f60e25 Mon Sep 17 00:00:00 2001 From: Fieke Hillerstrom Date: Thu, 5 Mar 2020 16:09:45 +0100 Subject: [PATCH 5/6] Bug fix due to Pathlib --- kglib/kgcn/pipeline/pipeline.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kglib/kgcn/pipeline/pipeline.py b/kglib/kgcn/pipeline/pipeline.py index fa7e33b4..f5d3264f 100644 --- a/kglib/kgcn/pipeline/pipeline.py +++ b/kglib/kgcn/pipeline/pipeline.py @@ -19,6 +19,7 @@ import networkx as nx import numpy as np +from pathlib import Path from graph_nets.utils_np import graphs_tuple_to_networkxs from kglib.kgcn.learn.learn import KGCNLearner @@ -91,7 +92,7 @@ def pipeline(graphs, reload_fle=f'{output_dir}/{reload_fle}') # only test - if not (output_dir / reload_fle).is_dir() and do_test == True: + if not Path(output_dir / reload_fle).is_dir() and do_test == True: test_values, tr_info = learner.test(ge_input_graphs, ge_target_graphs, log_dir=output_dir) From b58a4fc17789093526935ee53a009ef8d03a3438 Mon Sep 17 00:00:00 2001 From: Fieke Hillerstrom Date: Thu, 26 Mar 2020 16:15:09 +0100 Subject: [PATCH 6/6] Adjustments to resolve issues from James --- kglib/kgcn/learn/learn.py | 31 ++++++++++++++++--------------- kglib/kgcn/pipeline/pipeline.py | 6 +++--- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/kglib/kgcn/learn/learn.py b/kglib/kgcn/learn/learn.py index 618e8c40..dd9d25b4 100644 --- a/kglib/kgcn/learn/learn.py +++ b/kglib/kgcn/learn/learn.py @@ -21,6 +21,8 @@ import tensorflow as tf +from pathlib import Path + from kglib.kgcn.learn.feed import create_placeholders, create_feed_dict, make_all_runnable_in_session from kglib.kgcn.learn.loss import loss_ops_preexisting_no_penalty from kglib.kgcn.learn.metrics import existence_accuracy @@ -33,7 +35,7 @@ class KGCNLearner: """ Responsible for running a KGCN model """ - def __init__(self, model, num_processing_steps_tr=10, num_processing_steps_ge=10, save_fle="save_model.txt", reload_fle=''): + def __init__(self, model, num_processing_steps_tr=10, num_processing_steps_ge=10): """Args: save_fle: Name to save the trained model to. reload_fle: Name to load saved model from, when doing inference. @@ -41,10 +43,8 @@ def __init__(self, model, num_processing_steps_tr=10, num_processing_steps_ge=10 self._model = model self._num_processing_steps_tr = num_processing_steps_tr self._num_processing_steps_ge = num_processing_steps_ge - self.save_fle = save_fle - self.reload_fle = reload_fle - def __call__(self, + def train(self, tr_input_graphs, tr_target_graphs, ge_input_graphs, @@ -52,7 +52,8 @@ def __call__(self, num_training_iterations=1000, learning_rate=1e-3, log_every_epochs=20, - log_dir=None): + log_dir=None, + save_file='save_model.txt'): """ Args: tr_graphs: In-memory graphs of Grakn concepts for training @@ -66,7 +67,7 @@ def __call__(self, Returns: """ - + save_fle = Path(save_file) tf.set_random_seed(1) input_ph, target_ph = create_placeholders(tr_input_graphs, tr_target_graphs) @@ -183,30 +184,30 @@ def __call__(self, feed_dict=feed_dict) # Train the model and save it in the end - if not self.save_fle.is_dir(): - model_saver.save(sess, self.save_fle.as_posix()) - tf.train.write_graph(sess.graph.as_graph_def(), logdir=self.save_fle.parent.as_posix(), name=self.save_fle.with_suffix('.pbtxt').as_posix(), as_text=True) + if not save_fle.is_dir(): + model_saver.save(sess, save_fle.as_posix()) + tf.train.write_graph(sess.graph.as_graph_def(), logdir=save_fle.parent.as_posix(), name=save_fle.with_suffix('.pbtxt').as_posix(), as_text=True) training_info = logged_iterations, losses_tr, losses_ge, corrects_tr, corrects_ge, solveds_tr, solveds_ge return train_values, test_values, training_info # New function to infer / apply without training # Inspired from: https://medium.com/@prasadpal107/saving-freezing-optimizing-for-inference-restoring-of-tensorflow-models-b4146deb21b5 - def infer(self, - input_graphs, - target_graphs, log_dir): + def infer(self, input_graphs, target_graphs, log_dir, load_file): + + reload_file = Path(load_file) input_ph, target_ph = create_placeholders(input_graphs, target_graphs) input_ph, target_ph = make_all_runnable_in_session(input_ph, target_ph) output_ops_ge = self._model(input_ph, self._num_processing_steps_ge) - saver = tf.train.import_meta_graph(self.reload_fle.as_posix() + '.meta') + saver = tf.train.import_meta_graph(reload_file.as_posix() + '.meta') sess = tf.Session() sess.run(tf.global_variables_initializer()) tf.reset_default_graph() with sess.as_default(): - if not self.reload_fle.is_dir(): - saver.restore(sess, self.reload_fle.as_posix()) + if not reload_file.is_dir(): + saver.restore(sess, reload_file.as_posix()) else: print("no file found, restoring failed") diff --git a/kglib/kgcn/pipeline/pipeline.py b/kglib/kgcn/pipeline/pipeline.py index f5d3264f..122bc067 100644 --- a/kglib/kgcn/pipeline/pipeline.py +++ b/kglib/kgcn/pipeline/pipeline.py @@ -92,13 +92,13 @@ def pipeline(graphs, reload_fle=f'{output_dir}/{reload_fle}') # only test - if not Path(output_dir / reload_fle).is_dir() and do_test == True: - test_values, tr_info = learner.test(ge_input_graphs, + if not (Path(output_dir) / reload_fle).is_dir() and do_test is True: + test_values, tr_info = learner.infer(ge_input_graphs, ge_target_graphs, log_dir=output_dir) # train else: - train_values, test_values, tr_info = learner(input_graphs, + train_values, test_values, tr_info = learner.train(input_graphs, tr_target_graphs, ge_input_graphs, ge_target_graphs,