Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Page views: use origin URL instead of page name #7293

Merged
merged 9 commits into from
Aug 27, 2020
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 28 additions & 16 deletions readthedocs/analytics/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,41 @@
from django.utils import timezone

from readthedocs.api.v2.signals import footer_response
from readthedocs.core.unresolver import unresolve_from_request
from readthedocs.projects.models import Feature

from .models import PageView


@receiver(footer_response)
def increase_page_view_count(sender, **kwargs):
def increase_page_view_count(sender, *, request, context, origin, **kwargs):
"""Increase the page view count for the given project."""
del sender # unused
context = kwargs['context']
# unused
del sender
del kwargs

project = context['project']
version = context['version']
# footer_response sends an empty path for the index
path = context['path'] or '/'

if project.has_feature(Feature.STORE_PAGEVIEWS):
page_view, _ = PageView.objects.get_or_create(
project=project,
version=version,
path=path,
date=timezone.now().date(),
)
PageView.objects.filter(pk=page_view.pk).update(
view_count=F('view_count') + 1
)

if not origin or not project.has_feature(Feature.STORE_PAGEVIEWS):
return

unresolved = unresolve_from_request(request, origin)
if not unresolved:
return

path = unresolved.filename

fields = dict(
project=project,
version=version,
path=path,
date=timezone.now().date(),
)

page_view = PageView.objects.filter(**fields).first()
if page_view:
page_view.view_count = F('view_count') + 1
page_view.save(update_fields=['view_count'])
else:
PageView.objects.create(**fields, view_count=1)
86 changes: 46 additions & 40 deletions readthedocs/analytics/tests.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
from unittest import mock

from django_dynamic_fixture import get
from django.test import TestCase, RequestFactory
import pytest
from django.test import RequestFactory, TestCase, override_settings
from django.urls import reverse
from django.utils import timezone
from django_dynamic_fixture import get

from readthedocs.builds.models import Version
from readthedocs.projects.models import Project, Feature
from readthedocs.projects.models import Feature, Project

from .models import PageView
from .signals import increase_page_view_count
from .utils import (
anonymize_ip_address,
anonymize_user_agent,
get_client_ip,
)
from .utils import anonymize_ip_address, anonymize_user_agent, get_client_ip


class UtilsTests(TestCase):
Expand Down Expand Up @@ -97,82 +94,91 @@ def test_get_client_ip_with_remote_addr(self):
self.assertEqual(client_ip, '203.0.113.195')


class AnalyticsTasksTests(TestCase):
def test_increase_page_view_count(self):
project = get(
@pytest.mark.proxito
@override_settings(PUBLIC_DOMAIN='readthedocs.io')
class AnalyticsPageViewsTests(TestCase):

def setUp(self):
self.project = get(
Project,
slug='project-1',
slug='pip',
)
self.version = get(Version, slug='1.8', project=self.project)
self.origin = f'https://{self.project.slug}.readthedocs.io/en/latest/index.html'
self.host = f'{self.project.slug}.readthedocs.io'
self.url = (
reverse('footer_html') +
f'?project={self.project.slug}&version={self.version.slug}&page=index&docroot=/docs/' +
f'&origin={self.origin}'
)
version = get(Version, slug='1.8', project=project)
path = "index"

today = timezone.now()
tomorrow = timezone.now() + timezone.timedelta(days=1)
yesterday = timezone.now() - timezone.timedelta(days=1)
self.today = timezone.now()
self.tomorrow = timezone.now() + timezone.timedelta(days=1)
self.yesterday = timezone.now() - timezone.timedelta(days=1)

def test_increase_page_view_count(self):
assert (
PageView.objects.all().count() == 0
), 'There\'s no PageView object created yet.'

context = {
"project": project,
"version": version,
"path": path,
}

# Without the feature flag, no PageView is created
increase_page_view_count(None, context=context)
self.client.get(self.url, HTTP_HOST=self.host)
assert (
PageView.objects.all().count() == 0
)

feature, _ = Feature.objects.get_or_create(
feature_id=Feature.STORE_PAGEVIEWS,
)
project.feature_set.add(feature)
self.project.feature_set.add(feature)

# testing for yesterday
with mock.patch('readthedocs.analytics.tasks.timezone.now') as mocked_timezone:
mocked_timezone.return_value = yesterday
mocked_timezone.return_value = self.yesterday

increase_page_view_count(None, context=context)
self.client.get(self.url, HTTP_HOST=self.host)

assert (
PageView.objects.all().count() == 1
), f'PageView object for path \'{path}\' is created'
), f'PageView object for path \'{origin}\' is created'
assert (
PageView.objects.all().first().view_count == 1
), '\'index\' has 1 view'

increase_page_view_count(None, context=context)
self.client.get(self.url, HTTP_HOST=self.host)

assert (
PageView.objects.all().count() == 1
), f'PageView object for path \'{path}\' is already created'
), f'PageView object for path \'{origin}\' is already created'
assert PageView.objects.filter(path='index.html').count() == 1
assert (
PageView.objects.all().first().view_count == 2
), f'\'{path}\' has 2 views now'
), f'\'{origin}\' has 2 views now'

# testing for today
with mock.patch('readthedocs.analytics.tasks.timezone.now') as mocked_timezone:
mocked_timezone.return_value = today
increase_page_view_count(None, context=context)
mocked_timezone.return_value = self.today

self.client.get(self.url, HTTP_HOST=self.host)

assert (
PageView.objects.all().count() == 2
), f'PageView object for path \'{path}\' is created for two days (yesterday and today)'
), f'PageView object for path \'{origin}\' is created for two days (yesterday and today)'
assert PageView.objects.filter(path='index.html').count() == 2
assert (
PageView.objects.all().order_by('-date').first().view_count == 1
), f'\'{path}\' has 1 view today'
), f'\'{origin}\' has 1 view today'

# testing for tomorrow
with mock.patch('readthedocs.analytics.tasks.timezone.now') as mocked_timezone:
mocked_timezone.return_value = tomorrow
increase_page_view_count(None, context=context)
mocked_timezone.return_value = self.tomorrow

self.client.get(self.url, HTTP_HOST=self.host)

assert (
PageView.objects.all().count() == 3
), f'PageView object for path \'{path}\' is created for three days (yesterday, today & tomorrow)'
), f'PageView object for path \'{origin}\' is created for three days (yesterday, today & tomorrow)'
assert PageView.objects.filter(path='index.html').count() == 3
assert (
PageView.objects.all().order_by('-date').first().view_count == 1
), f'\'{path}\' has 1 view tomorrow'
), f'\'{origin}\' has 1 view tomorrow'
2 changes: 1 addition & 1 deletion readthedocs/api/v2/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@


footer_response = django.dispatch.Signal(
providing_args=['request', 'context', 'response_data'],
providing_args=['request', 'context', 'response_data', 'origin'],
)
18 changes: 16 additions & 2 deletions readthedocs/api/v2/views/footer_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from readthedocs.builds.models import Version
from readthedocs.core.utils.extend import SettingsOverrideObject
from readthedocs.projects.constants import MKDOCS, SPHINX_HTMLDIR
from readthedocs.projects.models import Project, Feature
from readthedocs.projects.models import Feature, Project
from readthedocs.projects.version_handling import (
highest_version,
parse_version_failsafe,
Expand Down Expand Up @@ -81,6 +81,19 @@ class BaseFooterHTML(APIView):
"""
Render and return footer markup.

Query parameters:

- project
- version
- page: Sphinx's page name, used for path operations,
like change between languages (deprecated in favor of ``origin``).
- origin: Full path with domain, used for path operations.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't we call it full_path?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or absolute_uri as Django calls it when it has the domain on it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm avoiding naming this path since it includes the domain as well, we use full path to refer to a path in other parts of the code. Not sure about uri

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, origin isn't the right name, since that's used in HTTP for just the origin hostname: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Origin

I think absolute_uri or similar is best: https://docs.djangoproject.com/en/3.1/ref/request-response/#django.http.HttpRequest.build_absolute_uri

- theme: Used to decide how to integrate the flyout menu.
- docroot: Path where all the source documents are.
Used to build the ``edit_on`` URL.
- source_suffix: Suffix from the source document.
Used to build the ``edit_on`` URL.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are passing subproject too, but we are not using it. I can remove it in another PR if that's ok


.. note::

The methods `_get_project` and `_get_version`
Expand Down Expand Up @@ -228,7 +241,8 @@ def get(self, request, format=None):
sender=None,
request=request,
context=context,
resp_data=resp_data,
response_data=resp_data,
origin=self.request.GET.get('origin'),
)

return Response(resp_data)
Expand Down
1 change: 1 addition & 0 deletions readthedocs/core/static-src/core/js/doc-embed/footer.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ function init() {
project: rtd['project'],
version: rtd['version'],
page: rtd['page'],
origin: window.location.href,
theme: rtd.get_theme_name(),
format: "jsonp",
};
Expand Down
2 changes: 1 addition & 1 deletion readthedocs/core/static/core/js/readthedocs-doc-embed.js

Large diffs are not rendered by default.

35 changes: 27 additions & 8 deletions readthedocs/core/unresolver.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import logging
from urllib.parse import urlparse
from collections import namedtuple
from urllib.parse import urlparse

from django.urls import resolve as url_resolve
from django.test.client import RequestFactory
from django.urls import resolve as url_resolve

from readthedocs.core.utils.extend import SettingsOverrideObject
from readthedocs.proxito.middleware import map_host_to_project_slug
from readthedocs.proxito.views.utils import _get_project_data_from_request
from readthedocs.proxito.views.mixins import ServeDocsMixin
from readthedocs.proxito.views.utils import _get_project_data_from_request

log = logging.getLogger(__name__)

Expand All @@ -27,17 +27,35 @@ def unresolve(self, url):
"""
parsed = urlparse(url)
domain = parsed.netloc.split(':', 1)[0]
path = parsed.path

# TODO: Make this not depend on the request object,
# but instead move all this logic here working on strings.
request = RequestFactory().get(path=path, HTTP_HOST=domain)
project_slug = request.host_project_slug = map_host_to_project_slug(request)
request = RequestFactory().get(path=parsed.path, HTTP_HOST=domain)
project_slug = map_host_to_project_slug(request)

# Handle returning a response
if hasattr(project_slug, 'status_code'):
return None

request.host_project_slug = request.slug = project_slug
return self.unresolve_from_request(request, url)

def unresolve_from_request(self, request, path):
"""
Unresolve using a request.

``path`` can be a full URL, but the domain will be ignored,
since that information is already in the request object.

None is returned if the request isn't valid.
"""
parsed = urlparse(path)
path = parsed.path
project_slug = getattr(request, 'slug', None)
stsewd marked this conversation as resolved.
Show resolved Hide resolved

if not project_slug:
return None

_, __, kwargs = url_resolve(
path,
urlconf='readthedocs.proxito.urls',
Expand All @@ -63,8 +81,8 @@ def unresolve(self, url):

log.info(
'Unresolver parsed: '
'url=%s project=%s lang_slug=%s version_slug=%s filename=%s',
url, final_project.slug, lang_slug, version_slug, filename
'project=%s lang_slug=%s version_slug=%s filename=%s',
final_project.slug, lang_slug, version_slug, filename
)
return UnresolvedObject(final_project, lang_slug, version_slug, filename, parsed.fragment)

Expand All @@ -76,3 +94,4 @@ class Unresolver(SettingsOverrideObject):

unresolver = Unresolver()
unresolve = unresolver.unresolve
unresolve_from_request = unresolver.unresolve_from_request
Loading