Skip to content

Commit

Permalink
Performance helpers
Browse files Browse the repository at this point in the history
Issue #337
  • Loading branch information
jsenecal authored Jul 22, 2017
2 parents 1659d8c + 7343def commit 779cd9b
Show file tree
Hide file tree
Showing 16 changed files with 235 additions and 18 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,7 @@ pip-delete-this-directory.txt
# VirtualEnv
.venv/

# Developers
*.sw*
manage.py
.DS_Store
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ script:
- isort --check-only --verbose --recursive --diff rest_framework_json_api
# example has extra dependencies that are installed in a dev environment
# but are not installed in CI. Explicitly set those packages.
- isort --check-only --verbose --recursive --diff --thirdparty pytest --thirdparty polymorphic --thirdparty pytest_factoryboy example
- isort --check-only --verbose --recursive --diff --thirdparty pytest --thirdparty polymorphic --thirdparty pytest_factoryboy --thirdparty packaging example
- coverage run setup.py -v test
after_success:
- codecov
50 changes: 49 additions & 1 deletion docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,12 @@ REST_FRAMEWORK = {
),
'DEFAULT_RENDERER_CLASSES': (
'rest_framework_json_api.renderers.JSONRenderer',
'rest_framework.renderers.BrowsableAPIRenderer',
# If you're performance testing, you will want to use the browseable API
# without forms, as the forms can generate their own queries.
# If performance testing, enable:
# 'example.utils.BrowsableAPIRendererWithoutForms',
# Otherwise, to play around with the browseable API, enable:
'rest_framework.renderers.BrowsableAPIRenderer'
),
'DEFAULT_METADATA_CLASS': 'rest_framework_json_api.metadata.JSONAPIMetadata',
}
Expand All @@ -36,6 +41,12 @@ retrieve the page can be customized by subclassing `PageNumberPagination` and
overriding the `page_query_param`. Page size can be controlled per request via
the `PAGINATE_BY_PARAM` query parameter (`page_size` by default).

#### Performance Testing

If you are trying to see if your viewsets are configured properly to optimize performance,
it is preferable to use `example.utils.BrowsableAPIRendererWithoutForms` instead of the default `BrowsableAPIRenderer`
to remove queries introduced by the forms themselves.

### Serializers

It is recommended to import the base serializer classes from this package
Expand Down Expand Up @@ -558,6 +569,43 @@ class QuestSerializer(serializers.ModelSerializer):
`included_resources` informs DJA of **what** you would like to include.
`included_serializers` tells DJA **how** you want to include it.

#### Performance improvements

Be aware that using included resources without any form of prefetching **WILL HURT PERFORMANCE** as it will introduce m*(n+1) queries.

A viewset helper was designed to allow for greater flexibility and it is automatically available when subclassing
`views.ModelViewSet`
```
# When MyViewSet is called with ?include=author it will dynamically prefetch author and author.bio
class MyViewSet(viewsets.ModelViewSet):
queryset = Book.objects.all()
prefetch_for_includes = {
'__all__': [],
'author': ['author', 'author__bio']
'category.section': ['category']
}
```

The special keyword `__all__` can be used to specify a prefetch which should be done regardless of the include, similar to making the prefetch yourself on the QuerySet.

Using the helper to prefetch, rather than attempting to minimise queries via select_related might give you better performance depending on the characteristics of your data and database.

For example:

If you have a single model, e.g. Book, which has four relations e.g. Author, Publisher, CopyrightHolder, Category.

To display 25 books and related models, you would need to either do:

a) 1 query via selected_related, e.g. SELECT * FROM books LEFT JOIN author LEFT JOIN publisher LEFT JOIN CopyrightHolder LEFT JOIN Category

b) 4 small queries via prefetch_related.

If you have 1M books, 50k authors, 10k categories, 10k copyrightholders
in the select_related scenario, you've just created a in-memory table
with 1e18 rows which will likely exhaust any available memory and
slow your database to crawl.

The prefetch_related case will issue 4 queries, but they will be small and fast queries.
<!--
### Relationships
### Errors
Expand Down
12 changes: 10 additions & 2 deletions example/factories/__init__.py → example/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,17 @@

import factory
from faker import Factory as FakerFactory

from example.models import (
Blog, Author, AuthorBio, Entry, Comment, TaggedItem, ArtProject, ResearchProject, Company
ArtProject,
Author,
AuthorBio,
Blog,
Comment,
Company,
Entry,
ResearchProject,
TaggedItem
)

faker = FakerFactory.create()
Expand Down Expand Up @@ -64,7 +73,6 @@ class Meta:


class TaggedItemFactory(factory.django.DjangoModelFactory):

class Meta:
model = TaggedItem

Expand Down
18 changes: 18 additions & 0 deletions example/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ class TaggedItem(BaseModel):
def __str__(self):
return self.tag

class Meta:
ordering = ('id',)


@python_2_unicode_compatible
class Blog(BaseModel):
Expand All @@ -38,6 +41,9 @@ class Blog(BaseModel):
def __str__(self):
return self.name

class Meta:
ordering = ('id',)


@python_2_unicode_compatible
class Author(BaseModel):
Expand All @@ -47,6 +53,9 @@ class Author(BaseModel):
def __str__(self):
return self.name

class Meta:
ordering = ('id',)


@python_2_unicode_compatible
class AuthorBio(BaseModel):
Expand All @@ -56,6 +65,9 @@ class AuthorBio(BaseModel):
def __str__(self):
return self.author.name

class Meta:
ordering = ('id',)


@python_2_unicode_compatible
class Entry(BaseModel):
Expand All @@ -73,6 +85,9 @@ class Entry(BaseModel):
def __str__(self):
return self.headline

class Meta:
ordering = ('id',)


@python_2_unicode_compatible
class Comment(BaseModel):
Expand All @@ -87,6 +102,9 @@ class Comment(BaseModel):
def __str__(self):
return self.body

class Meta:
ordering = ('id',)


class Project(PolymorphicModel):
topic = models.CharField(max_length=30)
Expand Down
12 changes: 4 additions & 8 deletions example/serializers.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from datetime import datetime

import rest_framework

from packaging import version

from rest_framework_json_api import relations, serializers

from example.models import (
Expand All @@ -20,14 +20,12 @@


class TaggedItemSerializer(serializers.ModelSerializer):

class Meta:
model = TaggedItem
fields = ('tag', )
fields = ('tag',)


class BlogSerializer(serializers.ModelSerializer):

copyright = serializers.SerializerMethodField()
tags = TaggedItemSerializer(many=True, read_only=True)

Expand All @@ -46,12 +44,11 @@ def get_root_meta(self, resource, many):
class Meta:
model = Blog
fields = ('name', 'url', 'tags')
read_only_fields = ('tags', )
read_only_fields = ('tags',)
meta_fields = ('copyright',)


class EntrySerializer(serializers.ModelSerializer):

def __init__(self, *args, **kwargs):
super(EntrySerializer, self).__init__(*args, **kwargs)
# to make testing more concise we'll only output the
Expand Down Expand Up @@ -97,15 +94,14 @@ class Meta:
model = Entry
fields = ('blog', 'headline', 'body_text', 'pub_date', 'mod_date',
'authors', 'comments', 'featured', 'suggested', 'tags')
read_only_fields = ('tags', )
read_only_fields = ('tags',)
meta_fields = ('body_format',)

class JSONAPIMeta:
included_resources = ['comments']


class AuthorBioSerializer(serializers.ModelSerializer):

class Meta:
model = AuthorBio
fields = ('author', 'body')
Expand Down
13 changes: 12 additions & 1 deletion example/settings/dev.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
'rest_framework',
'polymorphic',
'example',
'debug_toolbar',
]

TEMPLATES = [
Expand Down Expand Up @@ -58,7 +59,11 @@

PASSWORD_HASHERS = ('django.contrib.auth.hashers.UnsaltedMD5PasswordHasher', )

MIDDLEWARE_CLASSES = ()
MIDDLEWARE_CLASSES = (
'debug_toolbar.middleware.DebugToolbarMiddleware',
)

INTERNAL_IPS = ('127.0.0.1', )

JSON_API_FORMAT_KEYS = 'camelize'
JSON_API_FORMAT_TYPES = 'camelize'
Expand All @@ -74,6 +79,12 @@
),
'DEFAULT_RENDERER_CLASSES': (
'rest_framework_json_api.renderers.JSONRenderer',

# If you're performance testing, you will want to use the browseable API
# without forms, as the forms can generate their own queries.
# If performance testing, enable:
# 'example.utils.BrowsableAPIRendererWithoutForms',
# Otherwise, to play around with the browseable API, enable:
'rest_framework.renderers.BrowsableAPIRenderer',
),
'DEFAULT_METADATA_CLASS': 'rest_framework_json_api.metadata.JSONAPIMetadata',
Expand Down
57 changes: 57 additions & 0 deletions example/tests/test_performance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from django.utils import timezone
from rest_framework.test import APITestCase

from example.factories import CommentFactory
from example.models import Author, Blog, Comment, Entry


class PerformanceTestCase(APITestCase):
def setUp(self):
self.author = Author.objects.create(name='Super powerful superhero', email='[email protected]')
self.blog = Blog.objects.create(name='Some Blog', tagline="It's a blog")
self.other_blog = Blog.objects.create(name='Other blog', tagline="It's another blog")
self.first_entry = Entry.objects.create(
blog=self.blog,
headline='headline one',
body_text='body_text two',
pub_date=timezone.now(),
mod_date=timezone.now(),
n_comments=0,
n_pingbacks=0,
rating=3
)
self.second_entry = Entry.objects.create(
blog=self.blog,
headline='headline two',
body_text='body_text one',
pub_date=timezone.now(),
mod_date=timezone.now(),
n_comments=0,
n_pingbacks=0,
rating=1
)
self.comment = Comment.objects.create(entry=self.first_entry)
CommentFactory.create_batch(50)

def test_query_count_no_includes(self):
""" We expect a simple list view to issue only two queries.
1. The number of results in the set (e.g. a COUNT query),
only necessary because we're using PageNumberPagination
2. The SELECT query for the set
"""
with self.assertNumQueries(2):
response = self.client.get('/comments?page_size=25')
self.assertEqual(len(response.data['results']), 25)

def test_query_count_include_author(self):
""" We expect a list view with an include have three queries:
1. Primary resource COUNT query
2. Primary resource SELECT
3. Authors prefetched
3. Entries prefetched
"""
with self.assertNumQueries(4):
response = self.client.get('/comments?include=author&page_size=25')
self.assertEqual(len(response.data['results']), 25)
8 changes: 8 additions & 0 deletions example/urls.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from django.conf import settings
from django.conf.urls import include, url
from rest_framework import routers

Expand All @@ -22,3 +23,10 @@
urlpatterns = [
url(r'^', include(router.urls)),
]


if settings.DEBUG:
import debug_toolbar
urlpatterns = [
url(r'^__debug__/', include(debug_toolbar.urls)),
] + urlpatterns
20 changes: 20 additions & 0 deletions example/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from rest_framework.renderers import BrowsableAPIRenderer


class BrowsableAPIRendererWithoutForms(BrowsableAPIRenderer):
"""Renders the browsable api, but excludes the forms."""

def get_context(self, *args, **kwargs):
ctx = super().get_context(*args, **kwargs)
ctx['display_edit_forms'] = False
return ctx

def show_form_for_method(self, view, method, request, obj):
"""We never want to do this! So just return False."""
return False

def get_rendered_html_form(self, data, view, method, request):
"""Why render _any_ forms at all. This method should return
rendered HTML, so let's simply return an empty string.
"""
return ""
7 changes: 6 additions & 1 deletion example/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,13 @@ class AuthorViewSet(ModelViewSet):


class CommentViewSet(ModelViewSet):
queryset = Comment.objects.all()
queryset = Comment.objects.select_related('author', 'entry')
serializer_class = CommentSerializer
prefetch_for_includes = {
'__all__': [],
'author': ['author', 'author__bio', 'author__entries'],
'entry': ['author', 'author__bio', 'author__entries']
}


class CompanyViewset(ModelViewSet):
Expand Down
3 changes: 3 additions & 0 deletions requirements-development.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,6 @@ recommonmark
Sphinx
sphinx_rtd_theme
tox
mock
django-debug-toolbar
packaging==16.8
Loading

0 comments on commit 779cd9b

Please sign in to comment.