Skip to content

Commit

Permalink
refactor: drop django-tree-queries & implement array-based tree funct…
Browse files Browse the repository at this point in the history
…ions

The Django-Tree-Queries, together with some visibility queries, causes
a ton of heavy performance problems. Therefore, we now implement the
tree functionality for Scopes by using an array field to "cache" all
the parent PKs of every scope.

This allows us to write some much more efficient queries that don't
rely on CTEs, and enables subqueries where we previously had to explode
a QS to perform the same functionality.
  • Loading branch information
winged committed Jun 13, 2024
1 parent 03c2216 commit 30ed6c4
Show file tree
Hide file tree
Showing 10 changed files with 239 additions and 538 deletions.
2 changes: 1 addition & 1 deletion emeis/core/migrations/0010_scope_full_name.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def set_full_name(apps, schema_editor):
scope_model = apps.get_model("emeis_core.scope")
for scope in scope_model.objects.all().iterator():
# explicitly trigger the set_full_name signal handler
models.set_full_name(instance=scope, sender=set_full_name)
models.set_full_name_and_parents(instance=scope, sender=set_full_name)
scope.save()


Expand Down
54 changes: 54 additions & 0 deletions emeis/core/migrations/0012_parents_as_arrayfield.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Generated by Django 3.2.25 on 2024-06-13 07:05

import django.contrib.postgres.fields
import django.contrib.postgres.indexes
from django.db import migrations, models
import django.db.models.deletion


def set_all_parents(apps, schema_editor):
from emeis.core.models import set_full_name_and_parents

scope_model = apps.get_model("emeis_core.scope")
for scope in scope_model.objects.all().iterator():
# explicitly trigger the set_full_name signal handler
set_full_name_and_parents(instance=scope, sender=set_all_parents)
scope.save()


class Migration(migrations.Migration):
dependencies = [
("emeis_core", "0011_mptt_to_tree_queries"),
]

operations = [
migrations.AlterModelOptions(
name="scope",
options={"ordering": ["name"]},
),
migrations.AddField(
model_name="scope",
name="all_parents",
field=django.contrib.postgres.fields.ArrayField(
base_field=models.UUIDField(), default=list, size=None
),
),
migrations.AlterField(
model_name="scope",
name="parent",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="children",
to="emeis_core.scope",
),
),
migrations.AddIndex(
model_name="scope",
index=django.contrib.postgres.indexes.GinIndex(
fields=["all_parents"], name="emeis_core__all_par_f8231c_gin"
),
),
migrations.RunPython(set_all_parents, migrations.RunPython.noop),
]
140 changes: 85 additions & 55 deletions emeis/core/models.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
import operator
import unicodedata
import uuid
from functools import reduce

from django.conf import settings
from django.contrib.auth.models import AbstractBaseUser, UserManager
from django.contrib.auth.validators import UnicodeUsernameValidator
from django.contrib.postgres.aggregates import ArrayAgg
from django.contrib.postgres.fields import ArrayField
from django.contrib.postgres.indexes import GinIndex
from django.core.exceptions import ValidationError
from django.db import models
from django.db.models import Exists, OuterRef, Q, Subquery
from django.db.models.signals import pre_save
from django.dispatch import receiver
from django.utils import timezone, translation
from django.utils.translation import gettext_lazy as _
from localized_fields.fields import LocalizedCharField, LocalizedTextField
from tree_queries.models import TreeNode, TreeQuerySet


def make_uuid():
Expand Down Expand Up @@ -160,59 +162,34 @@ def is_authenticated(self):
return True


class ScopeQuerySet(TreeQuerySet):
# django-tree-queries sadly does not (yet?) support ancestors query
# for QS - only for single nodes. So we're providing all_descendants()
# and all_ancestors() queryset methods.

class ScopeQuerySet(models.QuerySet):
def all_descendants(self, include_self=False):
"""Return a QS that contains all descendants of the given QS.
"""Return a QS that contains all descendants."""
expr = Q(all_parents__overlap=self.aggregate(all_pks=ArrayAgg("pk"))["all_pks"])

This is a workaround for django-tree-queries, which currently does
not support this query (it can only do it on single nodes).
if include_self:
expr = expr | Q(pk__in=self)

This is in contrast to .descendants(), which can only give the descendants
of one model instance.
"""
descendants_q = reduce(
operator.or_,
[
models.Q(
pk__in=entry.descendants(include_self=include_self).values("pk")
)
for entry in self
],
models.Q(),
)
return self.model.objects.filter(descendants_q)
return Scope.objects.filter(expr)

def all_ancestors(self, include_self=False):
"""Return a QS that contains all ancestors of the given QS.
"""Return a QS that contains all ancestors."""

This is a workaround for django-tree-queries, which currently does
not support this query (it can only do it on single nodes).
filter_qs = self.filter(all_parents__contains=[OuterRef("pk")])

This is in contrast to .ancestors(), which can only give the ancestors
of one model instance.
"""
new_qs = Scope.objects.all().annotate(_is_ancestor=Exists(Subquery(filter_qs)))
expr = Q(_is_ancestor=True)

descendants_q = reduce(
operator.or_,
[
models.Q(pk__in=entry.ancestors(include_self=include_self).values("pk"))
for entry in self
],
models.Q(),
)
return self.model.objects.filter(descendants_q)
if include_self:
expr = expr | Q(pk__in=self)

return new_qs.filter(expr)

def all_roots(self):
return Scope.objects.all().filter(
pk__in=[scope.ancestors(include_self=True).first().pk for scope in self]
)
return self.all_ancestors(include_self=True).filter(parent__isnull=True)


class Scope(TreeNode, UUIDModel):
class Scope(UUIDModel):
name = LocalizedCharField(_("scope name"), blank=False, null=False, required=False)

full_name = LocalizedCharField(
Expand All @@ -224,26 +201,67 @@ class Scope(TreeNode, UUIDModel):
)
is_active = models.BooleanField(default=True)

objects = ScopeQuerySet.as_manager(with_tree_fields=True)
objects = ScopeQuerySet.as_manager()

parent = models.ForeignKey(
"Scope",
null=True,
blank=True,
on_delete=models.CASCADE,
related_name="children",
)

all_parents = ArrayField(models.UUIDField(null=False), default=list)

def ancestors(self, include_self=False):
expr = Q(pk__in=self.all_parents)
if include_self:
expr = expr | Q(pk=self.pk)
return Scope.objects.all().filter(expr)

def descendants(self, include_self=False):
expr = Q(all_parents__contains=[self.pk])

if include_self:
expr = expr | Q(pk=self.pk)

return Scope.objects.all().filter(expr)

def get_root(self):
return self.ancestors(include_self=True).first()
if self.parent_id:
return Scope.objects.get(pk=self.all_parents[0])
else:
return self

def save(self, *args, **kwargs):
# django-tree-queries does validation in TreeNode.clean(), which is not
# called by DRF (only by django forms), so we have to do this here
self.clean()
self._ensure_no_loop()
return super().save(*args, **kwargs)

def _ensure_no_loop(self):
parent = self.parent
while parent:
if parent == self:
raise ValidationError(
"A node cannot be made a descendant or parent of itself"
)
parent = parent.parent

def __str__(self):
return f"{type(self).__name__} ({self.full_name}, pk={self.pk})"

class Meta:
ordering = ["name"]
indexes = [GinIndex(fields=["all_parents"])]


@receiver(pre_save, sender=Scope)
def set_full_name(instance, sender, **kwargs):
def set_full_name_and_parents(instance, sender, **kwargs):
"""Update the `full_name` and `all_parents` properties of the Scope.
The full name depends on the complete list of parents of the Scope.
And to ensure correct behaviour in the queries, the `all_parents`
attribute needs to be updated as well
"""
if kwargs.get("raw"): # pragma: no cover
# Raw is set while loading fixtures. In those
# cases we don't want to mess with data that
Expand All @@ -255,6 +273,9 @@ def set_full_name(instance, sender, **kwargs):

forced_lang = settings.EMEIS_FORCE_MODEL_LOCALE.get("scope", None)

old_all_parents = [*instance.all_parents]
old_full_name = {**instance.full_name}

if forced_lang:
# If scope is forced monolingual, do not fill non-forced language fields
languages = [forced_lang]
Expand All @@ -263,25 +284,34 @@ def set_full_name(instance, sender, **kwargs):
with translation.override(lang):
instance.full_name[lang] = str(instance.name)

parent_ids = []
parent = instance.parent
while parent:
parent_ids.append(parent.pk)
for lang in languages:
with translation.override(lang):
new_fullname = f"{parent.name} {sep} {instance.full_name[lang]}"
instance.full_name[lang] = new_fullname
parent = parent.parent

# make it root-first
parent_ids.reverse()
instance.all_parents = parent_ids

if forced_lang:
# Ensure only the "forced" language full_name is set, and nothing else
full_name = instance.full_name[forced_lang]
instance.full_name.clear()
instance.full_name[forced_lang] = full_name

# Force update of all children (recursively)
for child in instance.children.all():
# save() triggers the set_full_name signal handler, which will
# recurse all the way down, updating the full_name
child.save()
if old_all_parents != instance.all_parents or old_full_name != dict(
instance.full_name
):
# Something changed - force update all children (recursively)
for child in instance.children.all():
# save() triggers the signal handler, which will
# recurse all the way down, updating the full_name
child.save()


class Role(SlugModel):
Expand Down
18 changes: 3 additions & 15 deletions emeis/core/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,24 +80,12 @@ class ScopeSerializer(BaseSerializer):
level = serializers.SerializerMethodField()

def get_level(self, obj):
depth = getattr(obj, "tree_depth", None)
if depth is not None:
return depth

# Note: This should only happen on CREATE, never in GET (Either list,
# detail, or include!) In CREATE, it's a new object that doesn't come
# from a QS

# Sometimes, the model object may come out of a non-django-tree-queries
# QS, and thus would not have the `tree_*` attributes amended. Then we
# need to go the "slow path"
if not obj.pk and obj.parent_id:
# unsaved object, sometimes used in unit tests etc
# unsaved object, sometimes used in unit tests etc. Can't rely on
# `all_parents` being set just yet
return self.get_level(obj.parent) + 1

if obj.parent_id:
return obj.ancestors().count()
return 0
return len(obj.all_parents)

class Meta:
model = Scope
Expand Down
Loading

0 comments on commit 30ed6c4

Please sign in to comment.