diff --git a/azureml-designer-modules/entries/score_sar_entry.py b/azureml-designer-modules/entries/score_sar_entry.py index 6aaeaacb83..f7aeb7abd3 100644 --- a/azureml-designer-modules/entries/score_sar_entry.py +++ b/azureml-designer-modules/entries/score_sar_entry.py @@ -3,11 +3,11 @@ from enum import Enum from pathlib import Path import joblib -import time from azureml.studio.core.data_frame_schema import DataFrameSchema from azureml.studio.core.logger import module_logger as logger from azureml.studio.core.io.data_frame_directory import load_data_frame_from_directory, save_data_frame_to_directory +from azureml.studio.core.io.model_directory import load_model_from_directory class ScoreType(Enum): @@ -26,26 +26,41 @@ class ItemSet(Enum): SCORE_ONLY = 'Items in score set' -MODEL_NAME = 'sar_model' +def joblib_loader(load_from_dir, model_spec): + file_name = model_spec['file_name'] + with open(Path(load_from_dir) / file_name, 'rb') as fin: + return joblib.load(fin) -def recommend_items(model, data, ranking_metric, top_k, sort_top_k, remove_seen, normalize): - if ranking_metric == RankingMetric.RATING: - return model.recommend_k_items(test=data, top_k=top_k, sort_top_k=sort_top_k, remove_seen=remove_seen, - normalize=normalize) - if ranking_metric == RankingMetric.SIMILARITY: - return model.get_item_based_topk(items=data, top_k=top_k, sort_top_k=sort_top_k) - if ranking_metric == RankingMetric.POPULARITY: - return model.get_popularity_based_topk(top_k=top_k, sort_top_k=sort_top_k) - raise ValueError(f"Got unexpected ranking metric: {ranking_metric}.") +class ScoreSARModule: + def __init__(self, model, input_data): + self._model = model + self._input_data = input_data + @property + def model(self): + return self._model -def predict_ratings(model, data, items_to_predict, normalize): - if items_to_predict == ItemSet.TRAIN_ONLY: - return model.predict_training_items(test=data, normalize=normalize) - if items_to_predict == ItemSet.SCORE_ONLY: - return model.predict(test=data, normalize=normalize) - raise ValueError(f"Got unexpected 'items to predict': {items_to_predict}.") + @property + def input_data(self): + return self._input_data + + def recommend_items(self, ranking_metric, top_k, sort_top_k, remove_seen, normalize): + if ranking_metric == RankingMetric.RATING: + return self.model.recommend_k_items(test=self.input_data, top_k=top_k, sort_top_k=sort_top_k, + remove_seen=remove_seen, normalize=normalize) + if ranking_metric == RankingMetric.SIMILARITY: + return self.model.get_item_based_topk(items=self.input_data, top_k=top_k, sort_top_k=sort_top_k) + if ranking_metric == RankingMetric.POPULARITY: + return self.model.get_popularity_based_topk(top_k=top_k, sort_top_k=sort_top_k) + raise ValueError(f"Got unexpected ranking metric: {ranking_metric}.") + + def predict_ratings(self, items_to_predict, normalize): + if items_to_predict == ItemSet.TRAIN_ONLY: + return self.model.predict_training_items(test=self.input_data, normalize=normalize) + if items_to_predict == ItemSet.SCORE_ONLY: + return self.model.predict(test=self.input_data, normalize=normalize) + raise ValueError(f"Got unexpected 'items to predict': {items_to_predict}.") if __name__ == '__main__': @@ -75,38 +90,26 @@ def predict_ratings(model, data, items_to_predict, normalize): args, _ = parser.parse_known_args() logger.info(f"Arguments: {args}") - - with open(Path(args.trained_model) / MODEL_NAME, 'rb') as f: - sar_model = joblib.load(f) - - dataset_to_score = load_data_frame_from_directory(args.dataset_to_score).data - logger.debug(f"Shape of loaded DataFrame: {dataset_to_score.shape}") - sort_top_k = strtobool(args.sort_top_k) if args.sort_top_k else None remove_seen_items = strtobool(args.remove_seen_items) if args.remove_seen_items else None normalize = strtobool(args.normalize) if args.normalize else None - start_time = time.time() + sar_model = load_model_from_directory(args.trained_model, model_loader=joblib_loader).data + dataset_to_score = load_data_frame_from_directory(args.dataset_to_score).data + logger.debug(f"Shape of loaded DataFrame: {dataset_to_score.shape}") + + score_sar_module = ScoreSARModule(model=sar_model, input_data=dataset_to_score) score_type = ScoreType(args.score_type) if score_type == ScoreType.ITEM_RECOMMENDATION: - score_result = recommend_items(model=sar_model, - data=dataset_to_score, - ranking_metric=RankingMetric(args.ranking_metric), - top_k=args.top_k, - sort_top_k=sort_top_k, - remove_seen=args.remove_seen_items, - normalize=normalize) + score_result = score_sar_module.recommend_items(ranking_metric=RankingMetric(args.ranking_metric), + top_k=args.top_k, sort_top_k=sort_top_k, + remove_seen=args.remove_seen_items, normalize=normalize) elif score_type == ScoreType.RATING_PREDICTION: - score_result = predict_ratings(model=sar_model, - data=dataset_to_score, - items_to_predict=ItemSet(args.items_to_predict), - normalize=normalize) + score_result = score_sar_module.predict_ratings(items_to_predict=ItemSet(args.items_to_predict), + normalize=normalize) else: raise ValueError(f"Got unexpected score type: {score_type}.") - test_time = time.time() - start_time - logger.debug("Took {} seconds for score.\n".format(test_time)) - save_data_frame_to_directory(args.score_result, data=score_result, schema=DataFrameSchema.data_frame_to_dict(score_result)) diff --git a/azureml-designer-modules/entries/train_sar_entry.py b/azureml-designer-modules/entries/train_sar_entry.py index 1d81aa1f27..b49e5c8441 100644 --- a/azureml-designer-modules/entries/train_sar_entry.py +++ b/azureml-designer-modules/entries/train_sar_entry.py @@ -7,10 +7,26 @@ from reco_utils.recommender.sar import SAR from azureml.studio.core.logger import module_logger as logger +from azureml.studio.core.utils.fileutils import ensure_folder from azureml.studio.core.io.data_frame_directory import load_data_frame_from_directory +from azureml.studio.core.io.model_directory import save_model_to_directory -MODEL_NAME = 'sar_model' +def joblib_dumper(data, file_name=None): + """Return a dumper to dump a model with pickle.""" + if not file_name: + file_name = '_data.pkl' + + def model_dumper(save_to): + full_path = Path(save_to) / file_name + ensure_folder(Path(save_to)) + with open(full_path, 'wb') as fout: + joblib.dump(data, fout, protocol=4) + + model_spec = {'model_type': 'joblib', 'file_name': file_name} + return model_spec + + return model_dumper if __name__ == '__main__': @@ -49,8 +65,5 @@ train_time = time.time() - start_time print("Took {} seconds for training.".format(train_time)) - model_dir = Path(args.output_model) - if not model_dir.exists(): - model_dir.mkdir() - with open(model_dir / MODEL_NAME, 'wb') as f: - joblib.dump(model, f, protocol=4) + save_model_to_directory(save_to=args.output_model, model_dumper=joblib_dumper(data=model)) + diff --git a/azureml-designer-modules/module_specs/sar_score.yaml b/azureml-designer-modules/module_specs/sar_score.yaml index c0bb9ec2dc..78c21e89b7 100644 --- a/azureml-designer-modules/module_specs/sar_score.yaml +++ b/azureml-designer-modules/module_specs/sar_score.yaml @@ -1,5 +1,5 @@ name: SAR Score -version: 0.0.21 +version: 0.0.23 category: Experimentation description: | Python SAR Recommender diff --git a/azureml-designer-modules/module_specs/sar_train.yaml b/azureml-designer-modules/module_specs/sar_train.yaml index fdacf7e487..1388ecf393 100644 --- a/azureml-designer-modules/module_specs/sar_train.yaml +++ b/azureml-designer-modules/module_specs/sar_train.yaml @@ -1,6 +1,6 @@ name: SAR Train id: efd1af54-0d31-42e1-b3d5-ce3b7c538705 -version: 0.0.11 +version: 0.0.18 category: Experimentation description: "SAR Train from CAT Recommender repo: https://github.com/Microsoft/Recommenders/tree/master/." inputs: