From 49427edac0585d29e426e6ee1d40317f94e589bf Mon Sep 17 00:00:00 2001 From: Jean-KOUAGOU Date: Wed, 16 Oct 2024 18:19:44 +0200 Subject: [PATCH] Set NCES to train from scratch when no pretrained model is available --- examples/concept_learning_cv_evaluation.py | 2 +- examples/train_nces.py | 23 +-- main.py | 6 +- ontolearn/base_nces.py | 4 +- ontolearn/concept_learner.py | 148 ++++++++++++----- ontolearn/nces_trainer.py | 180 ++++++++++----------- ontolearn/scripts/run.py | 2 +- tests/test_nces.py | 4 +- 8 files changed, 217 insertions(+), 152 deletions(-) diff --git a/examples/concept_learning_cv_evaluation.py b/examples/concept_learning_cv_evaluation.py index c9f3b11a..e84a7044 100644 --- a/examples/concept_learning_cv_evaluation.py +++ b/examples/concept_learning_cv_evaluation.py @@ -89,7 +89,7 @@ def dl_concept_learning(args): nces = NCES(knowledge_base_path=args.kb, quality_func=F1(), path_of_embeddings=get_embedding_path("https://files.dice-research.org/projects/NCES/NCES_Ontolearn_Data/NCESData.zip",args.path_of_nces_embeddings, args.kb), - pretrained_model_name=["LSTM", "GRU", "SetTransformer"], + learner_names=["LSTM", "GRU", "SetTransformer"], num_predictions=100, verbose=0) args.path_of_clip_embeddings = get_embedding_path( diff --git a/examples/train_nces.py b/examples/train_nces.py index 35bac29c..a7054991 100644 --- a/examples/train_nces.py +++ b/examples/train_nces.py @@ -21,7 +21,8 @@ def str2bool(v): parser = argparse.ArgumentParser() -parser.add_argument('--kbs', type=str, nargs='+', default=['carcinogenesis'], help='Knowledge base name(s)') +parser.add_argument('--kbs', type=str, nargs='+', default=['family'], help='Knowledge base name(s)') +parser.add_argument('--path_train_data', type=str, default="./NCES-Experiment-17:42:45/", help='Path to training data') parser.add_argument('--models', type=str, nargs='+', default=['SetTransformer', 'LSTM', 'GRU'], help='Neural models') parser.add_argument('--load_pretrained', type=str2bool, default=False, help='Whether to load the pretrained model') parser.add_argument('--learning_rate', type=float, default=0.001, help='The learning rate') @@ -31,15 +32,15 @@ def str2bool(v): for kb in args.kbs: knowledge_base_path = f"./NCESData/{kb}/{kb}.owl" path_of_embeddings = f"./NCESData/{kb}/embeddings/ConEx_entity_embeddings.csv" - with open(f"./NCESData/{kb}/training_data/Data.json") as file: - training_data = list(json.load(file).items()) - - nces = NCES(knowledge_base_path=knowledge_base_path, learner_name="SetTransformer", + try: + with open(args.path_train_data+"/LPs.json") as file: + training_data = list(json.load(file).items()) + except FileNotFoundError: + print("Could not find training data. Will generate some data and train.") + training_data = None + + nces = NCES(knowledge_base_path=knowledge_base_path, learner_names=args.models, path_of_embeddings=path_of_embeddings, max_length=48, proj_dim=128, rnn_n_layers=2, drop_prob=0.1, - num_heads=4, num_seeds=1, num_inds=32, load_pretrained=args.load_pretrained) + num_heads=4, num_seeds=1, num_inds=32, verbose=True, load_pretrained=args.load_pretrained) - for model in args.models: - nces.learner_name = model - nces.pretrained_model_name = model - nces.refresh() - nces.train(training_data, epochs=args.epochs, learning_rate=args.learning_rate, save_model=True) + nces.train(training_data, epochs=args.epochs, learning_rate=args.learning_rate, num_workers=2, save_model=True) diff --git a/main.py b/main.py index 6fbdb580..27296c41 100644 --- a/main.py +++ b/main.py @@ -105,7 +105,7 @@ def get_default_arguments(description=None): help="Random initialization method.", choices=["GROW", "FULL", "RAMPED_HALF_HALF"]) # NCES only - parser.add_argument("--learner_name", type=str, default="SetTransformer", help="Learner name.", + parser.add_argument("--learner_names", type=str, nargs="+", default=["SetTransformer"], help="Learner name.", choices=["SetTransformer", "GRU", "LSTM"]) parser.add_argument("--proj_dim", type=int, default=128, help="Number of projection dimensions.") parser.add_argument("--rnn_n_layers", type=int, default=2, help="Number of RNN layers (only for LSTM and GRU).") @@ -122,8 +122,8 @@ def get_default_arguments(description=None): parser.add_argument("--max_length", type=int, default=48, help="Maximum length") parser.add_argument("--load_pretrained", type=bool, default=True, help="Load pretrained.") parser.add_argument("--sorted_examples", type=bool, default=True, help="Sorted examples.") - parser.add_argument("--pretrained_model_name", type=str, default="SetTransformer", help="Pretrained model name", - choices=["SetTransformer", "GRU", "LSTM"]) +# parser.add_argument("--pretrained_model_name", type=str, default="SetTransformer", help="Pretrained model name", +# choices=["SetTransformer", "GRU", "LSTM"]) if description is None: return parser.parse_args() diff --git a/ontolearn/base_nces.py b/ontolearn/base_nces.py index 226aedd8..ca0730c9 100644 --- a/ontolearn/base_nces.py +++ b/ontolearn/base_nces.py @@ -35,7 +35,7 @@ class BaseNCES: - def __init__(self, knowledge_base_path, learner_name, path_of_embeddings, batch_size=256, learning_rate=1e-4, + def __init__(self, knowledge_base_path, learner_names, path_of_embeddings, batch_size=256, learning_rate=1e-4, decay_rate=0.0, clip_value=5.0, num_workers=4): self.name = "NCES" kb = KnowledgeBase(path=knowledge_base_path) @@ -52,7 +52,7 @@ def __init__(self, knowledge_base_path, learner_name, path_of_embeddings, batch_ self.all_individuals = set([ind.str.split("/")[-1] for ind in kb.individuals()]) self.inv_vocab = np.array(vocab, dtype='object') self.vocab = {vocab[i]: i for i in range(len(vocab))} - self.learner_name = learner_name + self.learner_names = learner_names self.num_examples = self.find_optimal_number_of_examples(kb) self.batch_size = batch_size self.learning_rate = learning_rate diff --git a/ontolearn/concept_learner.py b/ontolearn/concept_learner.py index 375ab6d5..a4db409b 100644 --- a/ontolearn/concept_learner.py +++ b/ontolearn/concept_learner.py @@ -28,6 +28,7 @@ import operator import random import time +from datetime import datetime from contextlib import contextmanager from itertools import islice, chain from typing import Any, Callable, Dict, FrozenSet, Set, List, Tuple, Iterable, Optional, Union @@ -78,6 +79,9 @@ from owlapy.utils import OrderedOWLObject from sortedcontainers import SortedSet import os +import json +import glob +from ontolearn.lp_generator import LPGen logger = logging.getLogger(__name__) @@ -1226,7 +1230,7 @@ def load_model(predictor_name, load_pretrained): pretrained_model_path = self.path_of_embeddings.split("embeddings")[ 0] + "trained_models/trained_" + predictor_name + ".pt" if load_pretrained and os.path.isfile(pretrained_model_path): - model.load_state_dict(torch.load(pretrained_model_path, map_location=self.device)) + model.load_state_dict(torch.load(pretrained_model_path, map_location=self.device, weights_only=True)) model.eval() print("\n Loaded length predictor!") return model @@ -1399,11 +1403,10 @@ class NCES(BaseNCES): def __init__(self, knowledge_base_path, quality_func: Optional[AbstractScorer] = None, num_predictions=5, - learner_name="SetTransformer", path_of_embeddings="", proj_dim=128, rnn_n_layers=2, drop_prob=0.1, + learner_names=["SetTransformer"], path_of_embeddings="", proj_dim=128, rnn_n_layers=2, drop_prob=0.1, num_heads=4, num_seeds=1, num_inds=32, ln=False, learning_rate=1e-4, decay_rate=0.0, clip_value=5.0, - batch_size=256, num_workers=4, max_length=48, load_pretrained=True, sorted_examples=False, - pretrained_model_name=None, verbose: int = 0): - super().__init__(knowledge_base_path, learner_name, path_of_embeddings, batch_size, learning_rate, decay_rate, + batch_size=256, num_workers=4, max_length=48, load_pretrained=True, sorted_examples=False, verbose: int = 0): + super().__init__(knowledge_base_path, learner_names, path_of_embeddings, batch_size, learning_rate, decay_rate, clip_value, num_workers) self.quality_func = quality_func self.num_predictions = num_predictions @@ -1419,42 +1422,84 @@ def __init__(self, knowledge_base_path, self.ln = ln self.load_pretrained = load_pretrained self.sorted_examples = sorted_examples - self.pretrained_model_name = pretrained_model_name self.verbose = verbose self.model = self.get_synthesizer() self.dl_parser = DLSyntaxParser(namespace=self.kb_namespace) self.best_predictions = None - def get_synthesizer(self): - def load_model(learner_name, load_pretrained): - if learner_name == 'SetTransformer': - model = SetTransformer(self.knowledge_base_path, self.vocab, self.inv_vocab, self.max_length, - self.input_size, self.proj_dim, self.num_heads, self.num_seeds, self.num_inds, - self.ln) - elif learner_name == 'GRU': - model = GRU(self.knowledge_base_path, self.vocab, self.inv_vocab, self.max_length, self.input_size, - self.proj_dim, self.rnn_n_layers, self.drop_prob) - elif learner_name == 'LSTM': - model = LSTM(self.knowledge_base_path, self.vocab, self.inv_vocab, self.max_length, self.input_size, - self.proj_dim, self.rnn_n_layers, self.drop_prob) - if load_pretrained: - model_path = self.path_of_embeddings.split("embeddings")[ - 0] + "trained_models/trained_" + learner_name + ".pt" - model.load_state_dict(torch.load(model_path, map_location=self.device)) - model.eval() - if self.verbose > 0: - print("\n Loaded synthesizer model!") - return model + def get_synthesizer(self, path=None): + m1 = SetTransformer(self.knowledge_base_path, self.vocab, self.inv_vocab, self.max_length, + self.input_size, self.proj_dim, self.num_heads, self.num_seeds, self.num_inds, + self.ln) + m2 = GRU(self.knowledge_base_path, self.vocab, self.inv_vocab, self.max_length, self.input_size, + self.proj_dim, self.rnn_n_layers, self.drop_prob) + + m3 = LSTM(self.knowledge_base_path, self.vocab, self.inv_vocab, self.max_length, self.input_size, + self.proj_dim, self.rnn_n_layers, self.drop_prob) + Untrained = [] + for name in self.learner_names: + for m in [m1,m2,m3]: + if m.name == name: + Untrained.append(m) + + Models = [] + + if self.load_pretrained: + if path is None: + try: + if len(glob.glob(self.path_of_embeddings.split("embeddings")[0] + "trained_models/*.pt")) == 0: + raise FileNotFoundError + else: + for file_name in glob.glob(self.path_of_embeddings.split("embeddings")[0] + "trained_models/*.pt"): + for m in Untrained: + if m.name in file_name: + try: + m.load_state_dict(torch.load(file_name, map_location=self.device, weights_only=True)) + Models.append(m.eval()) + except Exception as e: + print(e) + pass + except Exception as e: + print(e) + raise RuntimeError + + if Models: + if self.verbose: + print("\n Loaded synthesizer model!") + return Models + else: + print("!!!Returning untrained models, could not load pretrained") + return Untrained - if not self.load_pretrained: - return [load_model(self.learner_name, self.load_pretrained)] - elif self.load_pretrained and isinstance(self.pretrained_model_name, str): - return [load_model(self.pretrained_model_name, self.load_pretrained)] - elif self.load_pretrained and isinstance(self.pretrained_model_name, list): - return [load_model(name, self.load_pretrained) for name in self.pretrained_model_name] + elif len(glob.glob(path+"/*.pt")) == 0: + print("No pretrained model found!") + raise FileNotFoundError + else: + for file_name in glob.glob(path+"/*.pt"): + for m in Untrained: + if m.name in file_name: + try: + m.load_state_dict(torch.load(file_name, map_location=self.device, weights_only=True)) + Models.append(m.eval()) + except Exception as e: + print(e) + pass + if Models: + if self.verbose: + print("\n Loaded synthesizer model!") + return Models + else: + print("!!!Returning untrained models, could not load pretrained") + return Untrained + else: + print("!!!Returning untrained models, could not load pretrained. Check the `load_pretrained parameter` or train the models using NCES.train(data).") + return Untrained - def refresh(self): - self.model = self.get_synthesizer() + + def refresh(self, path=None): + if path is not None: + self.load_pretrained = True + self.model = self.get_synthesizer(path) def sample_examples(self, pos, neg): # pragma: no cover assert type(pos[0]) == type(neg[0]), "The two iterables pos and neg must be of same type" @@ -1507,7 +1552,7 @@ def fit_one(self, pos: Union[Set[OWLNamedIndividual], Set[str]], neg: Union[Set[ Pos = np.random.choice(pos_str, size=(self.num_predictions, len(pos_str)), replace=True) Neg = np.random.choice(neg_str, size=(self.num_predictions, len(neg_str)), replace=True) - assert self.load_pretrained and self.pretrained_model_name, \ + assert self.load_pretrained and self.learner_names, \ "No pretrained model found. Please first train NCES, see the <> method below" dataset = NCESDataLoaderInference([("", Pos_str, Neg_str) for (Pos_str, Neg_str) in zip(Pos, Neg)], @@ -1597,7 +1642,7 @@ def fit_from_iterable(self, dataset: Union[List[Tuple[str, Set[OWLNamedIndividua - This function returns predictions as owl class expressions, not nodes as in fit """ - assert self.load_pretrained and self.pretrained_model_name, \ + assert self.load_pretrained and self.learner_names, \ "No pretrained model found. Please first train NCES, refer to the <> method" dataset = [self.convert_to_list_str_from_iterable(datapoint) for datapoint in dataset] dataset = NCESDataLoaderInference(dataset, self.instance_embeddings, self.vocab, self.inv_vocab, @@ -1623,20 +1668,39 @@ def fit_from_iterable(self, dataset: Union[List[Tuple[str, Set[OWLNamedIndividua print("Predictions: ", predictions_str) return predictions_as_owl_class_expressions - def train(self, data: Iterable[List[Tuple]], epochs=300, batch_size=None, learning_rate=1e-4, decay_rate=0.0, + @staticmethod + def generate_training_data(kb_path, num_lps=1000, storage_dir="./NCES_Training_Data"): + lp_gen = LPGen(kb_path=kb_path, max_num_lps=num_lps, storage_dir=storage_dir) + lp_gen.generate() + print("Loading generated data...") + with open(f"{storage_dir}/LPs.json") as file: + lps = list(json.load(file).items()) + print("Number of learning problems:", len(lps)) + return lps + + + + def train(self, data: Iterable[List[Tuple]]=None, epochs=50, batch_size=64, num_lps=1000, learning_rate=1e-4, decay_rate=0.0, clip_value=5.0, num_workers=8, save_model=True, storage_path=None, optimizer='Adam', record_runtime=True, example_sizes=None, shuffle_examples=False): + if os.cpu_count() <= num_workers: + num_workers = max(0,os.cpu_count()-1) + if storage_path is None: + currentDateAndTime = datetime.now() + storage_path = f'NCES-Experiment-{currentDateAndTime.strftime("%H:%M:%S")}' + if not os.path.exists(storage_path): + os.mkdir(storage_path) if batch_size is None: batch_size = self.batch_size + if data is None: + data = self.generate_training_data(self.knowledge_base_path, num_lps=num_lps, storage_dir=storage_path) train_dataset = NCESDataLoader(data, self.instance_embeddings, self.vocab, self.inv_vocab, shuffle_examples=shuffle_examples, max_length=self.max_length, example_sizes=example_sizes) - train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=self.num_workers, + train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, collate_fn=self.collate_batch, shuffle=True) - if storage_path is None: - storage_path = self.knowledge_base_path[:self.knowledge_base_path.rfind("/")] - elif not os.path.exists(storage_path) and (record_runtime or save_model): - os.mkdir(storage_path) + trainer = NCESTrainer(self, epochs=epochs, learning_rate=learning_rate, decay_rate=decay_rate, clip_value=clip_value, num_workers=num_workers, storage_path=storage_path) trainer.train(train_dataloader, save_model, optimizer, record_runtime) + self.refresh(storage_path+"/trained_models") diff --git a/ontolearn/nces_trainer.py b/ontolearn/nces_trainer.py index 5b4caee3..2123b399 100644 --- a/ontolearn/nces_trainer.py +++ b/ontolearn/nces_trainer.py @@ -103,9 +103,10 @@ def get_optimizer(self, synthesizer, optimizer='Adam'): # pragma: no cover raise ValueError print('Unsupported optimizer') - def show_num_learnable_params(self): + @staticmethod + def show_num_learnable_params(model): print("*"*20+"Trainable model size"+"*"*20) - size = sum([p.numel() for p in self.nces.model.parameters()]) + size = sum([p.numel() for p in model.parameters()]) size_ = 0 print("Synthesizer: ", size) print("*"*20+"Trainable model size"+"*"*20) @@ -134,95 +135,94 @@ def collate_batch(self, batch): # pragma: no cover return pos_emb_list, neg_emb_list, target_labels def map_to_token(self, idx_array): - return self.nces.model.inv_vocab[idx_array] + return self.nces.model[0].inv_vocab[idx_array] def train(self, train_dataloader, save_model=True, optimizer='Adam', record_runtime=True): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - if isinstance(self.nces.model, list): - self.nces.model = copy.deepcopy(self.nces.model[0]) - model_size = self.show_num_learnable_params() - if device.type == "cpu": - print("Training on CPU, it may take long...") - else: - print("GPU available !") - print() - print("#"*50) - print() - print("{} starts training... \n".format(self.nces.model.name)) - print("#"*50, "\n") - synthesizer = copy.deepcopy(self.nces.model).train() - desc = synthesizer.name - if device.type == "cuda": - synthesizer.cuda() - opt = self.get_optimizer(synthesizer=synthesizer, optimizer=optimizer) - if self.decay_rate: - self.scheduler = ExponentialLR(opt, self.decay_rate) - Train_loss = [] - Train_acc = defaultdict(list) - best_score = 0. - if record_runtime: - t0 = time.time() - s_acc, h_acc = 0, 0 - Epochs = trange(self.epochs, desc=f'Loss: {np.nan}, Soft Acc: {s_acc}, Hard Acc: {h_acc}', leave=True) - for e in Epochs: - soft_acc, hard_acc = [], [] - train_losses = [] - for x1, x2, labels in train_dataloader: - target_sequence = self.map_to_token(labels) - if device.type == "cuda": - x1, x2, labels = x1.cuda(), x2.cuda(), labels.cuda() - pred_sequence, scores = synthesizer(x1, x2) - loss = synthesizer.loss(scores, labels) - s_acc, h_acc = self.compute_accuracy(pred_sequence, target_sequence) - soft_acc.append(s_acc) - hard_acc.append(h_acc) - train_losses.append(loss.item()) - opt.zero_grad() - loss.backward() - clip_grad_value_(synthesizer.parameters(), clip_value=self.clip_value) - opt.step() - if self.decay_rate: - self.scheduler.step() - train_soft_acc, train_hard_acc = np.mean(soft_acc), np.mean(hard_acc) - Train_loss.append(np.mean(train_losses)) - Train_acc['soft'].append(train_soft_acc) - Train_acc['hard'].append(train_hard_acc) - Epochs.set_description('Loss: {:.4f}, Soft Acc: {:.2f}%, Hard Acc: {:.2f}%'.format(Train_loss[-1], - train_soft_acc, - train_hard_acc)) - Epochs.refresh() - weights = copy.deepcopy(synthesizer.state_dict()) - if Train_acc['hard'] and Train_acc['hard'][-1] > best_score: - best_score = Train_acc['hard'][-1] - best_weights = weights - synthesizer.load_state_dict(best_weights) - if record_runtime: # pragma: no cover - duration = time.time()-t0 - runtime_info = {"Architecture": synthesizer.name, - "Number of Epochs": self.epochs, "Runtime (s)": duration} - if not os.path.exists(self.storage_path+"/runtime/"): - os.mkdir(self.storage_path+"/runtime/") - with open(self.storage_path+"/runtime/runtime"+"_"+desc+".json", "w") as file: - json.dump(runtime_info, file, indent=3) - results_dict = dict() - print("Top performance: loss: {:.4f}, soft accuracy: {:.2f}% ... " - "hard accuracy: {:.2f}%".format(min(Train_loss), max(Train_acc['soft']), max(Train_acc['hard']))) - print() - results_dict.update({"Train Max Soft Acc": max(Train_acc['soft']), "Train Max Hard Acc": max(Train_acc['hard']), - "Train Min Loss": min(Train_loss)}) - - if save_model: # pragma: no cover - if not os.path.exists(self.storage_path+"/results/"): - os.mkdir(self.storage_path+"/results/") - with open(self.storage_path+"/results/"+"results"+"_"+desc+".json", "w") as file: - json.dump(results_dict, file, indent=3) + for model in self.nces.model: + model_size = self.show_num_learnable_params(model) + if device.type == "cpu": + print("Training on CPU, it may take long...") + else: + print("GPU available !") + print() + print("#"*50) + print() + print("{} starts training... \n".format(model.name)) + print("#"*50, "\n") + synthesizer = copy.deepcopy(model).train() + desc = synthesizer.name + if device.type == "cuda": + synthesizer.cuda() + opt = self.get_optimizer(synthesizer=synthesizer, optimizer=optimizer) + if self.decay_rate: + self.scheduler = ExponentialLR(opt, self.decay_rate) + Train_loss = [] + Train_acc = defaultdict(list) + best_score = 0. + if record_runtime: + t0 = time.time() + s_acc, h_acc = 0, 0 + Epochs = trange(self.epochs, desc=f'Loss: {np.nan}, Soft Acc: {s_acc}, Hard Acc: {h_acc}', leave=True) + for e in Epochs: + soft_acc, hard_acc = [], [] + train_losses = [] + for x1, x2, labels in train_dataloader: + target_sequence = self.map_to_token(labels) + if device.type == "cuda": + x1, x2, labels = x1.cuda(), x2.cuda(), labels.cuda() + pred_sequence, scores = synthesizer(x1, x2) + loss = synthesizer.loss(scores, labels) + s_acc, h_acc = self.compute_accuracy(pred_sequence, target_sequence) + soft_acc.append(s_acc) + hard_acc.append(h_acc) + train_losses.append(loss.item()) + opt.zero_grad() + loss.backward() + clip_grad_value_(synthesizer.parameters(), clip_value=self.clip_value) + opt.step() + if self.decay_rate: + self.scheduler.step() + train_soft_acc, train_hard_acc = np.mean(soft_acc), np.mean(hard_acc) + Train_loss.append(np.mean(train_losses)) + Train_acc['soft'].append(train_soft_acc) + Train_acc['hard'].append(train_hard_acc) + Epochs.set_description('Loss: {:.4f}, Soft Acc: {:.2f}%, Hard Acc: {:.2f}%'.format(Train_loss[-1], + train_soft_acc, + train_hard_acc)) + Epochs.refresh() + weights = copy.deepcopy(synthesizer.state_dict()) + if Train_acc['hard'] and Train_acc['hard'][-1] > best_score: + best_score = Train_acc['hard'][-1] + best_weights = weights + synthesizer.load_state_dict(best_weights) + if record_runtime: # pragma: no cover + duration = time.time()-t0 + runtime_info = {"Architecture": synthesizer.name, + "Number of Epochs": self.epochs, "Runtime (s)": duration} + if not os.path.exists(self.storage_path+"/runtime/"): + os.mkdir(self.storage_path+"/runtime/") + with open(self.storage_path+"/runtime/runtime"+"_"+desc+".json", "w") as file: + json.dump(runtime_info, file, indent=3) + results_dict = dict() + print("Top performance: loss: {:.4f}, soft accuracy: {:.2f}% ... " + "hard accuracy: {:.2f}%".format(min(Train_loss), max(Train_acc['soft']), max(Train_acc['hard']))) + print() + results_dict.update({"Train Max Soft Acc": max(Train_acc['soft']), "Train Max Hard Acc": max(Train_acc['hard']), + "Train Min Loss": min(Train_loss)}) + + if save_model: # pragma: no cover + if not os.path.exists(self.storage_path+"/results/"): + os.mkdir(self.storage_path+"/results/") + with open(self.storage_path+"/results/"+"results"+"_"+desc+".json", "w") as file: + json.dump(results_dict, file, indent=3) - if not os.path.exists(self.storage_path+"/trained_models/"): - os.mkdir(self.storage_path+"/trained_models/") - torch.save(synthesizer.state_dict(), self.storage_path+"/trained_models/"+"trained_"+desc+".pt") - print("{} saved".format(synthesizer.name)) - if not os.path.exists(self.storage_path+"/metrics/"): - os.mkdir(self.storage_path+"/metrics/") - with open(self.storage_path+"/metrics/"+"metrics_"+desc+".json", "w") as plot_file: - json.dump({"soft acc": Train_acc['soft'], "hard acc": Train_acc['hard'], "loss": Train_loss}, plot_file, - indent=3) + if not os.path.exists(self.storage_path+"/trained_models/"): + os.mkdir(self.storage_path+"/trained_models/") + torch.save(synthesizer.state_dict(), self.storage_path+"/trained_models/"+"trained_"+desc+".pt") + print("{} saved".format(synthesizer.name)) + if not os.path.exists(self.storage_path+"/metrics/"): + os.mkdir(self.storage_path+"/metrics/") + with open(self.storage_path+"/metrics/"+"metrics_"+desc+".json", "w") as plot_file: + json.dump({"soft acc": Train_acc['soft'], "hard acc": Train_acc['hard'], "loss": Train_loss}, plot_file, + indent=3) diff --git a/ontolearn/scripts/run.py b/ontolearn/scripts/run.py index 84ac29ca..cb08d56e 100644 --- a/ontolearn/scripts/run.py +++ b/ontolearn/scripts/run.py @@ -148,7 +148,7 @@ def get_nces(data: dict): nces = NCES(knowledge_base_path=args.path_knowledge_base, path_of_embeddings=get_embedding_path("https://files.dice-research.org/projects/NCES/NCES_Ontolearn_Data/NCESData.zip", args.path_knowledge_base), quality_func=F1(), - pretrained_model_name=["LSTM", "GRU", "SetTransformer"], + learner_names=["LSTM", "GRU", "SetTransformer"], num_predictions=64 ) return nces diff --git a/tests/test_nces.py b/tests/test_nces.py index cddc7ae3..56076471 100644 --- a/tests/test_nces.py +++ b/tests/test_nces.py @@ -33,7 +33,7 @@ class TestNCES(unittest.TestCase): def test_prediction_quality_family(self): nces = NCES(knowledge_base_path="./NCESData/family/family.owl", quality_func=F1(), num_predictions=100, path_of_embeddings="./NCESData/family/embeddings/ConEx_entity_embeddings.csv", - pretrained_model_name=["LSTM", "GRU", "SetTransformer"]) + learner_names=["LSTM", "GRU", "SetTransformer"]) KB = KnowledgeBase(path=nces.knowledge_base_path) dl_parser = DLSyntaxParser(nces.kb_namespace) brother = dl_parser.parse('Brother') @@ -47,7 +47,7 @@ def test_prediction_quality_family(self): def test_prediction_quality_mutagenesis(self): nces = NCES(knowledge_base_path="./NCESData/mutagenesis/mutagenesis.owl", quality_func=F1(), num_predictions=100, path_of_embeddings="./NCESData/mutagenesis/embeddings/ConEx_entity_embeddings.csv", - pretrained_model_name=["LSTM", "GRU", "SetTransformer"]) + learner_names=["LSTM", "GRU", "SetTransformer"]) KB = KnowledgeBase(path=nces.knowledge_base_path) dl_parser = DLSyntaxParser(nces.kb_namespace) exists_inbond = dl_parser.parse('∃ hasStructure.Benzene')