Skip to content

Commit

Permalink
Fixed #29984 -- Added QuerySet.iterator() support for prefetching rel…
Browse files Browse the repository at this point in the history
…ated objects.

Co-authored-by: Raphael Kimmig <[email protected]>
Co-authored-by: Simon Charette <[email protected]>
  • Loading branch information
3 people authored and felixxm committed Jan 25, 2022
1 parent c27932e commit edbf930
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 14 deletions.
39 changes: 33 additions & 6 deletions django/db/models/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import copy
import operator
import warnings
from itertools import chain
from itertools import chain, islice

import django
from django.conf import settings
Expand All @@ -23,6 +23,7 @@
from django.db.models.sql.constants import CURSOR, GET_ITERATOR_CHUNK_SIZE
from django.db.models.utils import create_namedtuple_class, resolve_callables
from django.utils import timezone
from django.utils.deprecation import RemovedInDjango50Warning
from django.utils.functional import cached_property, partition

# The maximum number of results to fetch in a get() query.
Expand Down Expand Up @@ -356,14 +357,40 @@ def __or__(self, other):
####################################

def _iterator(self, use_chunked_fetch, chunk_size):
yield from self._iterable_class(self, chunked_fetch=use_chunked_fetch, chunk_size=chunk_size)
iterable = self._iterable_class(
self,
chunked_fetch=use_chunked_fetch,
chunk_size=chunk_size or 2000,
)
if not self._prefetch_related_lookups or chunk_size is None:
yield from iterable
return

iterator = iter(iterable)
while results := list(islice(iterator, chunk_size)):
prefetch_related_objects(results, *self._prefetch_related_lookups)
yield from results

def iterator(self, chunk_size=2000):
def iterator(self, chunk_size=None):
"""
An iterator over the results from applying this QuerySet to the
database.
"""
if chunk_size <= 0:
database. chunk_size must be provided for QuerySets that prefetch
related objects. Otherwise, a default chunk_size of 2000 is supplied.
"""
if chunk_size is None:
if self._prefetch_related_lookups:
# When the deprecation ends, replace with:
# raise ValueError(
# 'chunk_size must be provided when using '
# 'QuerySet.iterator() after prefetch_related().'
# )
warnings.warn(
'Using QuerySet.iterator() after prefetch_related() '
'without specifying chunk_size is deprecated.',
category=RemovedInDjango50Warning,
stacklevel=2,
)
elif chunk_size <= 0:
raise ValueError('Chunk size must be strictly positive.')
use_chunked_fetch = not connections[self.db].settings_dict.get('DISABLE_SERVER_SIDE_CURSORS')
return self._iterator(use_chunked_fetch, chunk_size)
Expand Down
4 changes: 4 additions & 0 deletions docs/internals/deprecation.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ details on these changes.

* ``django.contrib.sessions.serializers.PickleSerializer`` will be removed.

* The usage of ``QuerySet.iterator()`` on a queryset that prefetches related
objects without providing the ``chunk_size`` argument will no longer be
allowed.

.. _deprecation-removed-in-4.1:

4.1
Expand Down
40 changes: 33 additions & 7 deletions docs/ref/models/querysets.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1215,8 +1215,10 @@ could be generated, which, depending on the database, might have performance
problems of its own when it comes to parsing or executing the SQL query. Always
profile for your use case!

Note that if you use ``iterator()`` to run the query, ``prefetch_related()``
calls will be ignored since these two optimizations do not make sense together.
.. versionchanged:: 4.1

If you use ``iterator()`` to run the query, ``prefetch_related()``
calls will only be observed if a value for ``chunk_size`` is provided.

You can use the :class:`~django.db.models.Prefetch` object to further control
the prefetch operation.
Expand Down Expand Up @@ -2341,7 +2343,7 @@ If you pass ``in_bulk()`` an empty list, you'll get an empty dictionary.
``iterator()``
~~~~~~~~~~~~~~

.. method:: iterator(chunk_size=2000)
.. method:: iterator(chunk_size=None)

Evaluates the ``QuerySet`` (by performing the query) and returns an iterator
(see :pep:`234`) over the results. A ``QuerySet`` typically caches its results
Expand All @@ -2355,12 +2357,34 @@ performance and a significant reduction in memory.
Note that using ``iterator()`` on a ``QuerySet`` which has already been
evaluated will force it to evaluate again, repeating the query.

Also, use of ``iterator()`` causes previous ``prefetch_related()`` calls to be
ignored since these two optimizations do not make sense together.
``iterator()`` is compatible with previous calls to ``prefetch_related()`` as
long as ``chunk_size`` is given. Larger values will necessitate fewer queries
to accomplish the prefetching at the cost of greater memory usage.

On some databases (e.g. Oracle, `SQLite
<https://www.sqlite.org/limits.html#max_variable_number>`_), the maximum number
of terms in an SQL ``IN`` clause might be limited. Hence values below this
limit should be used. (In particular, when prefetching across two or more
relations, a ``chunk_size`` should be small enough that the anticipated number
of results for each prefetched relation still falls below the limit.)

So long as the QuerySet does not prefetch any related objects, providing no
value for ``chunk_size`` will result in Django using an implicit default of
2000.

Depending on the database backend, query results will either be loaded all at
once or streamed from the database using server-side cursors.

.. versionchanged:: 4.1

Support for prefetching related objects was added.

.. deprecated:: 4.1

Using ``iterator()`` on a queryset that prefetches related objects without
providing the ``chunk_size`` is deprecated. In Django 5.0, an exception
will be raise.

With server-side cursors
^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down Expand Up @@ -2399,8 +2423,10 @@ The ``chunk_size`` parameter controls the size of batches Django retrieves from
the database driver. Larger batches decrease the overhead of communicating with
the database driver at the expense of a slight increase in memory consumption.

The default value of ``chunk_size``, 2000, comes from `a calculation on the
psycopg mailing list <https://www.postgresql.org/message-id/4D2F2C71.8080805%40dndg.it>`_:
So long as the QuerySet does not prefetch any related objects, providing no
value for ``chunk_size`` will result in Django using an implicit default of
2000, a value derived from `a calculation on the psycopg mailing list
<https://www.postgresql.org/message-id/4D2F2C71.8080805%40dndg.it>`_:

Assuming rows of 10-20 columns with a mix of textual and numeric data, 2000
is going to fetch less than 100KB of data, which seems a good compromise
Expand Down
9 changes: 9 additions & 0 deletions docs/releases/4.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,10 @@ Models
insertion fails uniqueness constraints. This is supported on MariaDB, MySQL,
PostgreSQL, and SQLite 3.24+.

* :meth:`.QuerySet.iterator` now supports prefetching related objects as long
as the ``chunk_size`` argument is provided. In older versions, no prefetching
was done.

Requests and Responses
~~~~~~~~~~~~~~~~~~~~~~

Expand Down Expand Up @@ -430,6 +434,11 @@ Miscellaneous
* ``django.contrib.sessions.serializers.PickleSerializer`` is deprecated due to
the risk of remote code execution.

* The usage of ``QuerySet.iterator()`` on a queryset that prefetches related
objects without providing the ``chunk_size`` argument is deprecated. In older
versions, no prefetching was done. Providing a value for ``chunk_size``
signifies that the additional query per chunk needed to prefetch is desired.

Features removed in 4.1
=======================

Expand Down
35 changes: 34 additions & 1 deletion tests/prefetch_related/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from django.db.models.query import get_prefetcher
from django.db.models.sql import Query
from django.test import TestCase, override_settings
from django.test.utils import CaptureQueriesContext
from django.test.utils import CaptureQueriesContext, ignore_warnings
from django.utils.deprecation import RemovedInDjango50Warning

from .models import (
Article, Author, Author2, AuthorAddress, AuthorWithAge, Bio, Book,
Expand Down Expand Up @@ -316,6 +317,38 @@ def test_named_values_list(self):
['Anne', 'Charlotte', 'Emily', 'Jane'],
)

def test_m2m_prefetching_iterator_with_chunks(self):
with self.assertNumQueries(3):
authors = [
b.authors.first()
for b in Book.objects.prefetch_related('authors').iterator(chunk_size=2)
]
self.assertEqual(
authors,
[self.author1, self.author1, self.author3, self.author4],
)

@ignore_warnings(category=RemovedInDjango50Warning)
def test_m2m_prefetching_iterator_without_chunks(self):
# prefetch_related() is ignored.
with self.assertNumQueries(5):
authors = [
b.authors.first()
for b in Book.objects.prefetch_related('authors').iterator()
]
self.assertEqual(
authors,
[self.author1, self.author1, self.author3, self.author4],
)

def test_m2m_prefetching_iterator_without_chunks_warning(self):
msg = (
'Using QuerySet.iterator() after prefetch_related() without '
'specifying chunk_size is deprecated.'
)
with self.assertWarnsMessage(RemovedInDjango50Warning, msg):
Book.objects.prefetch_related('authors').iterator()


class RawQuerySetTests(TestDataMixin, TestCase):
def test_basic(self):
Expand Down

0 comments on commit edbf930

Please sign in to comment.