Performance helpers

Issue #337
django-json-api · Jul 22, 2017 · 779cd9b · 779cd9b
2 parents 1659d8c + 7343def
commit 779cd9b
Show file tree

Hide file tree

Showing 16 changed files with 235 additions and 18 deletions.
diff --git a/.gitignore b/.gitignore
@@ -40,4 +40,7 @@ pip-delete-this-directory.txt
 # VirtualEnv
 .venv/
 
+# Developers
 *.sw*
+manage.py
+.DS_Store
diff --git a/.travis.yml b/.travis.yml
@@ -49,7 +49,7 @@ script:
   - isort --check-only --verbose --recursive --diff rest_framework_json_api
   # example has extra dependencies that are installed in a dev environment
   # but are not installed in CI. Explicitly set those packages.
-  - isort --check-only --verbose --recursive --diff --thirdparty pytest --thirdparty polymorphic --thirdparty pytest_factoryboy example
+  - isort --check-only --verbose --recursive --diff --thirdparty pytest --thirdparty polymorphic --thirdparty pytest_factoryboy --thirdparty packaging  example
   - coverage run setup.py -v test
 after_success:
   - codecov
diff --git a/docs/usage.md b/docs/usage.md
@@ -23,7 +23,12 @@ REST_FRAMEWORK = {
     ),
     'DEFAULT_RENDERER_CLASSES': (
         'rest_framework_json_api.renderers.JSONRenderer',
-        'rest_framework.renderers.BrowsableAPIRenderer',
+        # If you're performance testing, you will want to use the browseable API
+        # without forms, as the forms can generate their own queries.
+        # If performance testing, enable:
+        # 'example.utils.BrowsableAPIRendererWithoutForms',
+        # Otherwise, to play around with the browseable API, enable:
+        'rest_framework.renderers.BrowsableAPIRenderer'
     ),
     'DEFAULT_METADATA_CLASS': 'rest_framework_json_api.metadata.JSONAPIMetadata',
 }
@@ -36,6 +41,12 @@ retrieve the page can be customized by subclassing `PageNumberPagination` and
 overriding the `page_query_param`.  Page size can be controlled per request via
 the `PAGINATE_BY_PARAM` query parameter (`page_size` by default).
 
+#### Performance Testing
+
+If you are trying to see if your viewsets are configured properly to optimize performance,
+it is preferable to use `example.utils.BrowsableAPIRendererWithoutForms` instead of the default `BrowsableAPIRenderer`
+to remove queries introduced by the forms themselves.
+
 ### Serializers
 
 It is recommended to import the base serializer classes from this package
@@ -558,6 +569,43 @@ class QuestSerializer(serializers.ModelSerializer):
 `included_resources` informs DJA of **what** you would like to include.
 `included_serializers` tells DJA **how** you want to include it.
 
+#### Performance improvements
+
+Be aware that using included resources without any form of prefetching **WILL HURT PERFORMANCE** as it will introduce m*(n+1) queries.
+
+A viewset helper was designed to allow for greater flexibility and it is automatically available when subclassing
+`views.ModelViewSet`
+```
+ # When MyViewSet is called with ?include=author it will dynamically prefetch author and author.bio
+ class MyViewSet(viewsets.ModelViewSet):
+    queryset = Book.objects.all()
+    prefetch_for_includes = {
+    '__all__': [],
+    'author': ['author', 'author__bio']
+    'category.section': ['category']
+}
+```
+
+The special keyword `__all__` can be used to specify a prefetch which should be done regardless of the include, similar to making the prefetch yourself on the QuerySet.
+
+Using the helper to prefetch, rather than attempting to minimise queries via select_related might give you better performance depending on the characteristics of your data and database.
+
+For example:
+
+If you have a single model, e.g. Book, which has four relations e.g. Author, Publisher, CopyrightHolder, Category.
+
+To display 25 books and related models, you would need to either do:
+
+a) 1 query via selected_related, e.g. SELECT * FROM books LEFT JOIN author LEFT JOIN publisher LEFT JOIN CopyrightHolder LEFT JOIN Category
+
+b) 4 small queries via prefetch_related.
+
+If you have 1M books, 50k authors, 10k categories, 10k copyrightholders
+in the select_related scenario, you've just created a in-memory table
+with 1e18 rows which will likely exhaust any available memory and
+slow your database to crawl.
+
+The prefetch_related case will issue 4 queries, but they will be small and fast queries.
 <!--
 ### Relationships
 ### Errors

diff --git a/example/factories/__init__.py → example/factories.py b/example/factories/__init__.py → example/factories.py
@@ -2,8 +2,17 @@
 
 import factory
 from faker import Factory as FakerFactory
+
 from example.models import (
-    Blog, Author, AuthorBio, Entry, Comment, TaggedItem, ArtProject, ResearchProject, Company
+    ArtProject,
+    Author,
+    AuthorBio,
+    Blog,
+    Comment,
+    Company,
+    Entry,
+    ResearchProject,
+    TaggedItem
 )
 
 faker = FakerFactory.create()
@@ -64,7 +73,6 @@ class Meta:
 
 
 class TaggedItemFactory(factory.django.DjangoModelFactory):
-
     class Meta:
         model = TaggedItem
 

diff --git a/example/models.py b/example/models.py
@@ -28,6 +28,9 @@ class TaggedItem(BaseModel):
     def __str__(self):
         return self.tag
 
+    class Meta:
+        ordering = ('id',)
+
 
 @python_2_unicode_compatible
 class Blog(BaseModel):
@@ -38,6 +41,9 @@ class Blog(BaseModel):
     def __str__(self):
         return self.name
 
+    class Meta:
+        ordering = ('id',)
+
 
 @python_2_unicode_compatible
 class Author(BaseModel):
@@ -47,6 +53,9 @@ class Author(BaseModel):
     def __str__(self):
         return self.name
 
+    class Meta:
+        ordering = ('id',)
+
 
 @python_2_unicode_compatible
 class AuthorBio(BaseModel):
@@ -56,6 +65,9 @@ class AuthorBio(BaseModel):
     def __str__(self):
         return self.author.name
 
+    class Meta:
+        ordering = ('id',)
+
 
 @python_2_unicode_compatible
 class Entry(BaseModel):
@@ -73,6 +85,9 @@ class Entry(BaseModel):
     def __str__(self):
         return self.headline
 
+    class Meta:
+        ordering = ('id',)
+
 
 @python_2_unicode_compatible
 class Comment(BaseModel):
@@ -87,6 +102,9 @@ class Comment(BaseModel):
     def __str__(self):
         return self.body
 
+    class Meta:
+        ordering = ('id',)
+
 
 class Project(PolymorphicModel):
     topic = models.CharField(max_length=30)

diff --git a/example/serializers.py b/example/serializers.py
@@ -1,8 +1,8 @@
 from datetime import datetime
 
 import rest_framework
-
 from packaging import version
+
 from rest_framework_json_api import relations, serializers
 
 from example.models import (
@@ -20,14 +20,12 @@
 
 
 class TaggedItemSerializer(serializers.ModelSerializer):
-
     class Meta:
         model = TaggedItem
-        fields = ('tag', )
+        fields = ('tag',)
 
 
 class BlogSerializer(serializers.ModelSerializer):
-
     copyright = serializers.SerializerMethodField()
     tags = TaggedItemSerializer(many=True, read_only=True)
 
@@ -46,12 +44,11 @@ def get_root_meta(self, resource, many):
     class Meta:
         model = Blog
         fields = ('name', 'url', 'tags')
-        read_only_fields = ('tags', )
+        read_only_fields = ('tags',)
         meta_fields = ('copyright',)
 
 
 class EntrySerializer(serializers.ModelSerializer):
-
     def __init__(self, *args, **kwargs):
         super(EntrySerializer, self).__init__(*args, **kwargs)
         # to make testing more concise we'll only output the
@@ -97,15 +94,14 @@ class Meta:
         model = Entry
         fields = ('blog', 'headline', 'body_text', 'pub_date', 'mod_date',
                   'authors', 'comments', 'featured', 'suggested', 'tags')
-        read_only_fields = ('tags', )
+        read_only_fields = ('tags',)
         meta_fields = ('body_format',)
 
     class JSONAPIMeta:
         included_resources = ['comments']
 
 
 class AuthorBioSerializer(serializers.ModelSerializer):
-
     class Meta:
         model = AuthorBio
         fields = ('author', 'body')

diff --git a/example/settings/dev.py b/example/settings/dev.py
@@ -25,6 +25,7 @@
     'rest_framework',
     'polymorphic',
     'example',
+    'debug_toolbar',
 ]
 
 TEMPLATES = [
@@ -58,7 +59,11 @@
 
 PASSWORD_HASHERS = ('django.contrib.auth.hashers.UnsaltedMD5PasswordHasher', )
 
-MIDDLEWARE_CLASSES = ()
+MIDDLEWARE_CLASSES = (
+    'debug_toolbar.middleware.DebugToolbarMiddleware',
+)
+
+INTERNAL_IPS = ('127.0.0.1', )
 
 JSON_API_FORMAT_KEYS = 'camelize'
 JSON_API_FORMAT_TYPES = 'camelize'
@@ -74,6 +79,12 @@
     ),
     'DEFAULT_RENDERER_CLASSES': (
         'rest_framework_json_api.renderers.JSONRenderer',
+
+        # If you're performance testing, you will want to use the browseable API
+        # without forms, as the forms can generate their own queries.
+        # If performance testing, enable:
+        # 'example.utils.BrowsableAPIRendererWithoutForms',
+        # Otherwise, to play around with the browseable API, enable:
         'rest_framework.renderers.BrowsableAPIRenderer',
     ),
     'DEFAULT_METADATA_CLASS': 'rest_framework_json_api.metadata.JSONAPIMetadata',

diff --git a/example/tests/test_performance.py b/example/tests/test_performance.py
@@ -0,0 +1,57 @@
+from django.utils import timezone
+from rest_framework.test import APITestCase
+
+from example.factories import CommentFactory
+from example.models import Author, Blog, Comment, Entry
+
+
+class PerformanceTestCase(APITestCase):
+    def setUp(self):
+        self.author = Author.objects.create(name='Super powerful superhero', email='[email protected]')
+        self.blog = Blog.objects.create(name='Some Blog', tagline="It's a blog")
+        self.other_blog = Blog.objects.create(name='Other blog', tagline="It's another blog")
+        self.first_entry = Entry.objects.create(
+            blog=self.blog,
+            headline='headline one',
+            body_text='body_text two',
+            pub_date=timezone.now(),
+            mod_date=timezone.now(),
+            n_comments=0,
+            n_pingbacks=0,
+            rating=3
+        )
+        self.second_entry = Entry.objects.create(
+            blog=self.blog,
+            headline='headline two',
+            body_text='body_text one',
+            pub_date=timezone.now(),
+            mod_date=timezone.now(),
+            n_comments=0,
+            n_pingbacks=0,
+            rating=1
+        )
+        self.comment = Comment.objects.create(entry=self.first_entry)
+        CommentFactory.create_batch(50)
+
+    def test_query_count_no_includes(self):
+        """ We expect a simple list view to issue only two queries.
+
+        1. The number of results in the set (e.g. a COUNT query),
+           only necessary because we're using PageNumberPagination
+        2. The SELECT query for the set
+        """
+        with self.assertNumQueries(2):
+            response = self.client.get('/comments?page_size=25')
+            self.assertEqual(len(response.data['results']), 25)
+
+    def test_query_count_include_author(self):
+        """ We expect a list view with an include have three queries:
+
+        1. Primary resource COUNT query
+        2. Primary resource SELECT
+        3. Authors prefetched
+        3. Entries prefetched
+        """
+        with self.assertNumQueries(4):
+            response = self.client.get('/comments?include=author&page_size=25')
+            self.assertEqual(len(response.data['results']), 25)
diff --git a/example/urls.py b/example/urls.py
@@ -1,3 +1,4 @@
+from django.conf import settings
 from django.conf.urls import include, url
 from rest_framework import routers
 
@@ -22,3 +23,10 @@
 urlpatterns = [
     url(r'^', include(router.urls)),
 ]
+
+
+if settings.DEBUG:
+    import debug_toolbar
+    urlpatterns = [
+        url(r'^__debug__/', include(debug_toolbar.urls)),
+    ] + urlpatterns
diff --git a/example/utils.py b/example/utils.py
@@ -0,0 +1,20 @@
+from rest_framework.renderers import BrowsableAPIRenderer
+
+
+class BrowsableAPIRendererWithoutForms(BrowsableAPIRenderer):
+    """Renders the browsable api, but excludes the forms."""
+
+    def get_context(self, *args, **kwargs):
+        ctx = super().get_context(*args, **kwargs)
+        ctx['display_edit_forms'] = False
+        return ctx
+
+    def show_form_for_method(self, view, method, request, obj):
+        """We never want to do this! So just return False."""
+        return False
+
+    def get_rendered_html_form(self, data, view, method, request):
+        """Why render _any_ forms at all. This method should return
+        rendered HTML, so let's simply return an empty string.
+        """
+        return ""
diff --git a/example/views.py b/example/views.py
@@ -74,8 +74,13 @@ class AuthorViewSet(ModelViewSet):
 
 
 class CommentViewSet(ModelViewSet):
-    queryset = Comment.objects.all()
+    queryset = Comment.objects.select_related('author', 'entry')
     serializer_class = CommentSerializer
+    prefetch_for_includes = {
+        '__all__': [],
+        'author': ['author', 'author__bio', 'author__entries'],
+        'entry': ['author', 'author__bio', 'author__entries']
+    }
 
 
 class CompanyViewset(ModelViewSet):

diff --git a/requirements-development.txt b/requirements-development.txt
@@ -10,3 +10,6 @@ recommonmark
 Sphinx
 sphinx_rtd_theme
 tox
+mock
+django-debug-toolbar
+packaging==16.8