From e47076c7c9d228c440d9aae57a7594c63ffc8c13 Mon Sep 17 00:00:00 2001 From: john savvas <149728671+johnsaveus@users.noreply.github.com> Date: Wed, 23 Oct 2024 13:18:48 +0300 Subject: [PATCH] feat: dataframe,probabilities,jaqpot_row_label (#36) * dataframe_and_probabilities * added_row_label * small_bug * remove_duplicate --------- Co-authored-by: Alex Arvanitidis --- src/handlers/predict_pyg.py | 58 ++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/src/handlers/predict_pyg.py b/src/handlers/predict_pyg.py index abe13c1..7683984 100644 --- a/src/handlers/predict_pyg.py +++ b/src/handlers/predict_pyg.py @@ -9,20 +9,26 @@ def graph_post_handler(request: PredictionRequestPydantic): - feat_config = request.extraConfig["torchConfig"]["featurizerConfig"] featurizer = _load_featurizer(feat_config) target_name = request.model["dependentFeatures"][0]["name"] model_task = request.model["task"] - smiles = request.dataset["input"][0]["SMILES"] - data = featurizer.featurize(smiles) + user_input = request.dataset["input"] raw_model = request.model["rawModel"] + preds = [] if request.model["type"] == "TORCH_ONNX": - model_output = onnx_post_handler(raw_model, data) - return check_model_task(model_task, target_name, model_output) + for inp in user_input: + model_output = onnx_post_handler( + raw_model, featurizer.featurize(inp["SMILES"]) + ) + preds.append(check_model_task(model_task, target_name, model_output, inp)) elif request.model["type"] == "TORCHSCRIPT": - model_output = torchscript_post_handler(raw_model, data) - return check_model_task(model_task, target_name, model_output) + for inp in user_input: + model_output = torchscript_post_handler( + raw_model, featurizer.featurize(inp["SMILES"]) + ) + preds.append(check_model_task(model_task, target_name, model_output, inp)) + return {"predictions": preds} def onnx_post_handler(raw_model, data): @@ -60,38 +66,42 @@ def _to_numpy(tensor): def _load_featurizer(config): - featurizer = SmilesGraphFeaturizer() featurizer.load_dict(config) featurizer.sort_allowable_sets() return featurizer -def graph_regression(target_name, output): - preds = [output.squeeze().tolist()] +def graph_regression(target_name, output, inp): + pred = [output.squeeze().tolist()] results = {} - results[target_name] = [str(pred) for pred in preds] - final_all = {"predictions": [dict(zip(results, t)) for t in zip(*results.values())]} - return final_all + results["jaqpotMetadata"] = {"jaqpotRowId": inp["jaqpotRowId"]} + if "jaqpotRowLabel" in inp: + results["jaqpotMetadata"]["jaqpotRowLabel"] = inp["jaqpotRowLabel"] + results[target_name] = pred + return results -def graph_binary_classification(target_name, output): - probs = [F.sigmoid(output).squeeze().tolist()] - preds = [int(prob > 0.5) for prob in probs] +def graph_binary_classification(target_name, output, inp): + proba = F.sigmoid(output).squeeze().tolist() + pred = int(proba > 0.5) # UI Results results = {} - results["Probabilities"] = [str(prob) for prob in probs] - results[target_name] = [str(pred) for pred in preds] - final_all = {"predictions": [dict(zip(results, t)) for t in zip(*results.values())]} - return final_all - + results["jaqpotMetadata"] = { + "probabilities": [round((1 - proba), 3), round(proba, 3)], + "jaqpotRowId": inp["jaqpotRowId"], + } + if "jaqpotRowLabel" in inp: + results["jaqpotMetadata"]["jaqpotRowLabel"] = inp["jaqpotRowLabel"] + results[target_name] = pred + return results -def check_model_task(model_task, target_name, out): +def check_model_task(model_task, target_name, out, row_id): if model_task == "BINARY_CLASSIFICATION": - return graph_binary_classification(target_name, out) + return graph_binary_classification(target_name, out, row_id) elif model_task == "REGRESSION": - return graph_regression(target_name, out) + return graph_regression(target_name, out, row_id) else: raise ValueError( "Only BINARY_CLASSIFICATION and REGRESSION tasks are supported"