From e706635c1547b1fbae4b54c00ac7a448234b629e Mon Sep 17 00:00:00 2001 From: Sergei Rybakov Date: Wed, 11 Dec 2024 21:00:22 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Fix=20very=20long=20runtimes=20f?= =?UTF-8?q?or=20Artifact.describe=20(#2273)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/core/_django.py | 47 ++++++++++++++++++++------------ lamindb/core/_feature_manager.py | 6 ++-- 2 files changed, 34 insertions(+), 19 deletions(-) diff --git a/lamindb/core/_django.py b/lamindb/core/_django.py index 0e174e120..4c72dfa02 100644 --- a/lamindb/core/_django.py +++ b/lamindb/core/_django.py @@ -1,5 +1,7 @@ from __future__ import annotations +from functools import reduce + from django.contrib.postgres.aggregates import ArrayAgg from django.db import connection from django.db.models import F, OuterRef, Q, Subquery @@ -81,15 +83,6 @@ def get_artifact_with_related( id=F(f"{fk}__id"), name=F(f"{fk}__{name_field}") ) - for name in m2m_relations: - related_model = get_related_model(model, name) - name_field = get_name_field(related_model) - annotations[f"m2mfield_{name}"] = ArrayAgg( - JSONObject(id=F(f"{name}__id"), name=F(f"{name}__{name_field}")), - filter=Q(**{f"{name}__isnull": False}), - distinct=True, - ) - for link in link_tables: link_model = getattr(model, link).rel.related_model if not hasattr(link_model, "feature"): @@ -137,9 +130,7 @@ def get_artifact_with_related( related_data: dict = {"m2m": {}, "fk": {}, "link": {}, "featuresets": {}} for k, v in artifact_meta.items(): - if k.startswith("m2mfield_"): - related_data["m2m"][k[9:]] = v - elif k.startswith("fkfield_"): + if k.startswith("fkfield_"): related_data["fk"][k[8:]] = v elif k.startswith("linkfield_"): related_data["link"][k[10:]] = v @@ -149,11 +140,33 @@ def get_artifact_with_related( artifact, {i["featureset"]: i["slot"] for i in v} ) - related_data["m2m"] = { - k: {item["id"]: item["name"] for item in v} - for k, v in related_data["m2m"].items() - if v - } + if len(m2m_relations) == 0: + m2m_any = False + else: + m2m_any_expr = reduce( + lambda a, b: a | b, + (Q(**{f"{m2m_name}__isnull": False}) for m2m_name in m2m_relations), + ) + # this is needed to avoid querying all m2m relations even if they are all empty + # this checks if non-empty m2m relations are present in the record + m2m_any = ( + model.objects.using(artifact._state.db) + .filter(uid=artifact.uid) + .filter(m2m_any_expr) + .exists() + ) + if m2m_any: + m2m_data = related_data["m2m"] + for m2m_name in m2m_relations: + related_model = get_related_model(model, m2m_name) + name_field = get_name_field(related_model) + m2m_records = ( + getattr(artifact, m2m_name).values_list("id", name_field).distinct() + ) + for rec_id, rec_name in m2m_records: + if m2m_name not in m2m_data: + m2m_data[m2m_name] = {} + m2m_data[m2m_name][rec_id] = rec_name return { **{name: artifact_meta[name] for name in ["id", "uid"]}, diff --git a/lamindb/core/_feature_manager.py b/lamindb/core/_feature_manager.py index f07a3bb2a..43968e1e7 100644 --- a/lamindb/core/_feature_manager.py +++ b/lamindb/core/_feature_manager.py @@ -333,7 +333,7 @@ def describe_features( fs_data = _get_featuresets_postgres(self, related_data=related_data) for fs_id, (slot, data) in fs_data.items(): for registry_str, feature_names in data.items(): - feature_set = FeatureSet.get(id=fs_id) + feature_set = FeatureSet.objects.using(self._state.db).get(id=fs_id) feature_set_data[slot] = (feature_set, feature_names) for feature_name in feature_names: feature_data[feature_name] = (slot, registry_str) @@ -492,7 +492,9 @@ def describe_features( for child in ext_features_tree_children: ext_features_tree.add(child) if with_labels: - labels_tree = describe_labels(self, as_subtree=True) + # avoid querying the db if the labels were queried already + labels_data = related_data.get("m2m") if related_data is not None else None + labels_tree = describe_labels(self, labels_data=labels_data, as_subtree=True) if labels_tree: tree.add(labels_tree)