Skip to content

Commit

Permalink
🐛 Fix very long runtimes for Artifact.describe (#2273)
Browse files Browse the repository at this point in the history
  • Loading branch information
Koncopd authored Dec 11, 2024
1 parent 0f90711 commit e706635
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 19 deletions.
47 changes: 30 additions & 17 deletions lamindb/core/_django.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

from functools import reduce

from django.contrib.postgres.aggregates import ArrayAgg
from django.db import connection
from django.db.models import F, OuterRef, Q, Subquery
Expand Down Expand Up @@ -81,15 +83,6 @@ def get_artifact_with_related(
id=F(f"{fk}__id"), name=F(f"{fk}__{name_field}")
)

for name in m2m_relations:
related_model = get_related_model(model, name)
name_field = get_name_field(related_model)
annotations[f"m2mfield_{name}"] = ArrayAgg(
JSONObject(id=F(f"{name}__id"), name=F(f"{name}__{name_field}")),
filter=Q(**{f"{name}__isnull": False}),
distinct=True,
)

for link in link_tables:
link_model = getattr(model, link).rel.related_model
if not hasattr(link_model, "feature"):
Expand Down Expand Up @@ -137,9 +130,7 @@ def get_artifact_with_related(

related_data: dict = {"m2m": {}, "fk": {}, "link": {}, "featuresets": {}}
for k, v in artifact_meta.items():
if k.startswith("m2mfield_"):
related_data["m2m"][k[9:]] = v
elif k.startswith("fkfield_"):
if k.startswith("fkfield_"):
related_data["fk"][k[8:]] = v
elif k.startswith("linkfield_"):
related_data["link"][k[10:]] = v
Expand All @@ -149,11 +140,33 @@ def get_artifact_with_related(
artifact, {i["featureset"]: i["slot"] for i in v}
)

related_data["m2m"] = {
k: {item["id"]: item["name"] for item in v}
for k, v in related_data["m2m"].items()
if v
}
if len(m2m_relations) == 0:
m2m_any = False
else:
m2m_any_expr = reduce(
lambda a, b: a | b,
(Q(**{f"{m2m_name}__isnull": False}) for m2m_name in m2m_relations),
)
# this is needed to avoid querying all m2m relations even if they are all empty
# this checks if non-empty m2m relations are present in the record
m2m_any = (
model.objects.using(artifact._state.db)
.filter(uid=artifact.uid)
.filter(m2m_any_expr)
.exists()
)
if m2m_any:
m2m_data = related_data["m2m"]
for m2m_name in m2m_relations:
related_model = get_related_model(model, m2m_name)
name_field = get_name_field(related_model)
m2m_records = (
getattr(artifact, m2m_name).values_list("id", name_field).distinct()
)
for rec_id, rec_name in m2m_records:
if m2m_name not in m2m_data:
m2m_data[m2m_name] = {}
m2m_data[m2m_name][rec_id] = rec_name

return {
**{name: artifact_meta[name] for name in ["id", "uid"]},
Expand Down
6 changes: 4 additions & 2 deletions lamindb/core/_feature_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ def describe_features(
fs_data = _get_featuresets_postgres(self, related_data=related_data)
for fs_id, (slot, data) in fs_data.items():
for registry_str, feature_names in data.items():
feature_set = FeatureSet.get(id=fs_id)
feature_set = FeatureSet.objects.using(self._state.db).get(id=fs_id)
feature_set_data[slot] = (feature_set, feature_names)
for feature_name in feature_names:
feature_data[feature_name] = (slot, registry_str)
Expand Down Expand Up @@ -492,7 +492,9 @@ def describe_features(
for child in ext_features_tree_children:
ext_features_tree.add(child)
if with_labels:
labels_tree = describe_labels(self, as_subtree=True)
# avoid querying the db if the labels were queried already
labels_data = related_data.get("m2m") if related_data is not None else None
labels_tree = describe_labels(self, labels_data=labels_data, as_subtree=True)
if labels_tree:
tree.add(labels_tree)

Expand Down

0 comments on commit e706635

Please sign in to comment.