From 0998e8aa14ea232e4db0cec93a8d4cff548ef399 Mon Sep 17 00:00:00 2001 From: dojutsu-user Date: Wed, 28 Aug 2019 23:13:44 +0530 Subject: [PATCH 01/72] initial work --- readthedocs/api/v2/views/footer_views.py | 8 +++++ readthedocs/search/admin.py | 11 +++++- .../search/migrations/0002_pageview.py | 36 +++++++++++++++++++ .../migrations/0003_auto_20190828_1231.py | 20 +++++++++++ readthedocs/search/models.py | 19 ++++++++++ readthedocs/search/tasks.py | 25 ++++++++++++- 6 files changed, 117 insertions(+), 2 deletions(-) create mode 100644 readthedocs/search/migrations/0002_pageview.py create mode 100644 readthedocs/search/migrations/0003_auto_20190828_1231.py diff --git a/readthedocs/api/v2/views/footer_views.py b/readthedocs/api/v2/views/footer_views.py index 1036ea961b1..6b44d124ee2 100644 --- a/readthedocs/api/v2/views/footer_views.py +++ b/readthedocs/api/v2/views/footer_views.py @@ -16,6 +16,7 @@ highest_version, parse_version_failsafe, ) +from readthedocs.search.tasks import increase_page_view_count def get_version_compare_data(project, base_version=None): @@ -163,6 +164,13 @@ def footer_html(request): 'version_supported': version.supported, } + # increase the page view count + increase_page_view_count.delay( + project=context['project'], + version=context['version'], + path=context['path'] if context['path'] else 'index.html' + ) + # Allow folks to hook onto the footer response for various information # collection, or to modify the resp_data. footer_response.send( diff --git a/readthedocs/search/admin.py b/readthedocs/search/admin.py index 77a11e798f9..dce251a6197 100644 --- a/readthedocs/search/admin.py +++ b/readthedocs/search/admin.py @@ -2,7 +2,7 @@ from django.contrib import admin -from .models import SearchQuery +from .models import SearchQuery, PageView class SearchQueryAdmin(admin.ModelAdmin): @@ -14,4 +14,13 @@ class SearchQueryAdmin(admin.ModelAdmin): list_select_related = ('project', 'version', 'version__project') +class PageViewAdmin(admin.ModelAdmin): + raw_id_fields = ('project', 'version') + list_display = ('__str__', 'view_count') + search_fields = ('project__slug', 'version__slug', 'path') + readonly_fields = ('created', 'modified') + list_select_related = ('project', 'version', 'version__project') + + admin.site.register(SearchQuery, SearchQueryAdmin) +admin.site.register(PageView, PageViewAdmin) diff --git a/readthedocs/search/migrations/0002_pageview.py b/readthedocs/search/migrations/0002_pageview.py new file mode 100644 index 00000000000..eb562aa0608 --- /dev/null +++ b/readthedocs/search/migrations/0002_pageview.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.23 on 2019-08-28 12:14 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion +import django_extensions.db.fields + + +class Migration(migrations.Migration): + + dependencies = [ + ('builds', '0010_add-description-field-to-automation-rule'), + ('projects', '0044_auto_20190703_1300'), + ('search', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='PageView', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('created', django_extensions.db.fields.CreationDateTimeField(auto_now_add=True, verbose_name='created')), + ('modified', django_extensions.db.fields.ModificationDateTimeField(auto_now=True, verbose_name='modified')), + ('path', models.CharField(max_length=4096)), + ('view_count', models.PositiveIntegerField(default=0)), + ('project', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='page_views', to='projects.Project')), + ('version', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='page_views', to='builds.Version', verbose_name='Version')), + ], + options={ + 'ordering': ('-modified', '-created'), + 'get_latest_by': 'modified', + 'abstract': False, + }, + ), + ] diff --git a/readthedocs/search/migrations/0003_auto_20190828_1231.py b/readthedocs/search/migrations/0003_auto_20190828_1231.py new file mode 100644 index 00000000000..c69a944e218 --- /dev/null +++ b/readthedocs/search/migrations/0003_auto_20190828_1231.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.23 on 2019-08-28 12:31 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('search', '0002_pageview'), + ] + + operations = [ + migrations.AlterField( + model_name='pageview', + name='view_count', + field=models.PositiveIntegerField(default=1), + ), + ] diff --git a/readthedocs/search/models.py b/readthedocs/search/models.py index 45f13640294..fc45ac6b1c6 100644 --- a/readthedocs/search/models.py +++ b/readthedocs/search/models.py @@ -137,3 +137,22 @@ def generate_distribution_of_top_queries(cls, project_slug, n): final_data['int_data'].append(count_of_other) return final_data + + +class PageView(TimeStampedModel): + project = models.ForeignKey( + Project, + related_name='page_views', + on_delete=models.CASCADE, + ) + version = models.ForeignKey( + Version, + verbose_name=_('Version'), + related_name='page_views', + on_delete=models.CASCADE, + ) + path = models.CharField(max_length=4096) + view_count = models.PositiveIntegerField(default=1) + + def __str__(self): + return f'[{self.project.slug}:{self.version.slug}]: {self.path}' diff --git a/readthedocs/search/tasks.py b/readthedocs/search/tasks.py index 0057f90b497..cd333b97627 100644 --- a/readthedocs/search/tasks.py +++ b/readthedocs/search/tasks.py @@ -6,7 +6,7 @@ from readthedocs.builds.models import Version from readthedocs.projects.models import Project -from readthedocs.search.models import SearchQuery +from readthedocs.search.models import SearchQuery, PageView from readthedocs.worker import app from .utils import _get_index, _get_document @@ -176,3 +176,26 @@ def record_search_query(project_slug, version_slug, query, total_results): version=version, query=query, ) + + +@app.task(queue='web') +def increase_page_view_count(project_slug, version_slug, path): + today_date = timezone.now().date() + page_view_obj = PageView.objects.filter( + project__slug=project_slug, + version__slug=version_slug, + path=path, + created__date=today_date, + ).first() + + if page_view_obj: + page_view_obj.view_count += 1 + page_view_obj.save() + else: + project_obj = Project.objects.get(slug=project_slug) + version_obj = Version.objects.get(slug=version_slug, project=project_obj) + PageView.objects.create( + project=project_obj, + version=version_obj, + path=path, + ) From 3e8a99a1e3ea83f1da3d7caffe2526a75c497f1f Mon Sep 17 00:00:00 2001 From: dojutsu-user Date: Thu, 29 Aug 2019 23:30:17 +0530 Subject: [PATCH 02/72] fix arguments --- readthedocs/search/tasks.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/readthedocs/search/tasks.py b/readthedocs/search/tasks.py index cd333b97627..5d52b3cf547 100644 --- a/readthedocs/search/tasks.py +++ b/readthedocs/search/tasks.py @@ -179,11 +179,11 @@ def record_search_query(project_slug, version_slug, query, total_results): @app.task(queue='web') -def increase_page_view_count(project_slug, version_slug, path): +def increase_page_view_count(project, version, path): today_date = timezone.now().date() page_view_obj = PageView.objects.filter( - project__slug=project_slug, - version__slug=version_slug, + project=project, + version=version, path=path, created__date=today_date, ).first() @@ -192,10 +192,8 @@ def increase_page_view_count(project_slug, version_slug, path): page_view_obj.view_count += 1 page_view_obj.save() else: - project_obj = Project.objects.get(slug=project_slug) - version_obj = Version.objects.get(slug=version_slug, project=project_obj) PageView.objects.create( - project=project_obj, - version=version_obj, + project=project, + version=version, path=path, ) From f5c82cf34bd0a19b9710322942d9e215b29e4ffb Mon Sep 17 00:00:00 2001 From: dojutsu-user Date: Thu, 29 Aug 2019 23:48:54 +0530 Subject: [PATCH 03/72] update migration file --- ...eview.py => 0002_create_pageview_model.py} | 6 +++--- .../migrations/0003_auto_20190828_1231.py | 20 ------------------- 2 files changed, 3 insertions(+), 23 deletions(-) rename readthedocs/search/migrations/{0002_pageview.py => 0002_create_pageview_model.py} (96%) delete mode 100644 readthedocs/search/migrations/0003_auto_20190828_1231.py diff --git a/readthedocs/search/migrations/0002_pageview.py b/readthedocs/search/migrations/0002_create_pageview_model.py similarity index 96% rename from readthedocs/search/migrations/0002_pageview.py rename to readthedocs/search/migrations/0002_create_pageview_model.py index eb562aa0608..8284a80660a 100644 --- a/readthedocs/search/migrations/0002_pageview.py +++ b/readthedocs/search/migrations/0002_create_pageview_model.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Generated by Django 1.11.23 on 2019-08-28 12:14 +# Generated by Django 1.11.23 on 2019-08-29 18:07 from __future__ import unicode_literals from django.db import migrations, models @@ -10,8 +10,8 @@ class Migration(migrations.Migration): dependencies = [ - ('builds', '0010_add-description-field-to-automation-rule'), ('projects', '0044_auto_20190703_1300'), + ('builds', '0010_add-description-field-to-automation-rule'), ('search', '0001_initial'), ] @@ -23,7 +23,7 @@ class Migration(migrations.Migration): ('created', django_extensions.db.fields.CreationDateTimeField(auto_now_add=True, verbose_name='created')), ('modified', django_extensions.db.fields.ModificationDateTimeField(auto_now=True, verbose_name='modified')), ('path', models.CharField(max_length=4096)), - ('view_count', models.PositiveIntegerField(default=0)), + ('view_count', models.PositiveIntegerField(default=1)), ('project', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='page_views', to='projects.Project')), ('version', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='page_views', to='builds.Version', verbose_name='Version')), ], diff --git a/readthedocs/search/migrations/0003_auto_20190828_1231.py b/readthedocs/search/migrations/0003_auto_20190828_1231.py deleted file mode 100644 index c69a944e218..00000000000 --- a/readthedocs/search/migrations/0003_auto_20190828_1231.py +++ /dev/null @@ -1,20 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by Django 1.11.23 on 2019-08-28 12:31 -from __future__ import unicode_literals - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('search', '0002_pageview'), - ] - - operations = [ - migrations.AlterField( - model_name='pageview', - name='view_count', - field=models.PositiveIntegerField(default=1), - ), - ] From 9b03012a18568ec44d736cbadcda4d88bf37b88d Mon Sep 17 00:00:00 2001 From: dojutsu-user Date: Tue, 3 Sep 2019 20:20:51 +0530 Subject: [PATCH 04/72] show top 10 viewed page to the users. --- readthedocs/projects/urls/private.py | 4 ++ readthedocs/projects/views/private.py | 26 +++++++++- readthedocs/search/models.py | 40 +++++++++++++- .../templates/projects/project_edit_base.html | 1 + .../projects/project_page_views.html | 52 +++++++++++++++++++ 5 files changed, 121 insertions(+), 2 deletions(-) create mode 100644 readthedocs/templates/projects/project_page_views.html diff --git a/readthedocs/projects/urls/private.py b/readthedocs/projects/urls/private.py index d9df7a3f970..cb2c714aa8e 100644 --- a/readthedocs/projects/urls/private.py +++ b/readthedocs/projects/urls/private.py @@ -109,6 +109,10 @@ r'^(?P[-\w]+)/search-analytics/$', private.search_analytics_view, name='projects_search_analytics', ), + url( + r'^(?P[-\w]+)/page-views/$', + private.page_views, name='projects_page_views', + ), ] domain_urls = [ diff --git a/readthedocs/projects/views/private.py b/readthedocs/projects/views/private.py index c20d3a088c8..a2cd3e25b94 100644 --- a/readthedocs/projects/views/private.py +++ b/readthedocs/projects/views/private.py @@ -64,7 +64,7 @@ from readthedocs.projects.utils import Echo from readthedocs.projects.views.base import ProjectAdminMixin, ProjectSpamMixin from readthedocs.projects.views.mixins import ProjectImportMixin -from readthedocs.search.models import SearchQuery +from readthedocs.search.models import SearchQuery, PageView from ..tasks import retry_domain_verification @@ -1020,3 +1020,27 @@ def _search_analytics_csv_data(request, project_slug): ) response['Content-Disposition'] = f'attachment; filename="{file_name}"' return response + + +def page_views(request, project_slug): + """View for page views.""" + + project = get_object_or_404( + Project.objects.for_admin_user(request.user), + slug=project_slug, + ) + + top_viewed_pages = PageView.get_top_viewed_pages(project) + top_viewed_pages_iter = zip( + top_viewed_pages['pages'], + top_viewed_pages['view_counts'] + ) + + return render( + request, + 'projects/project_page_views.html', + { + 'project': project, + 'top_viewed_pages_iter': top_viewed_pages_iter, + }, + ) diff --git a/readthedocs/search/models.py b/readthedocs/search/models.py index fc45ac6b1c6..8bc7e8d2af2 100644 --- a/readthedocs/search/models.py +++ b/readthedocs/search/models.py @@ -1,7 +1,7 @@ """Search Queries.""" from django.db import models -from django.db.models import Count +from django.db.models import Count, Sum from django.db.models.functions import TruncDate from django.utils import timezone from django.utils.translation import ugettext_lazy as _ @@ -156,3 +156,41 @@ class PageView(TimeStampedModel): def __str__(self): return f'[{self.project.slug}:{self.version.slug}]: {self.path}' + + @classmethod + def get_top_viewed_pages(cls, project): + """ + Returns top 10 pages according to view counts. + + Structure of returned data is compatible to make graphs. + Sample returned data:: + { + 'pages': ['index.html', 'contribute.html', 'sponsors.html'], + 'view_counts': [150, 200, 143] + } + This data shows that `index.html` is the most viewed page having 150 total views, + followed by `contribute.html` and `sponsors.html` having 200 and + 143 total page views respectively. + """ + qs = ( + cls.objects + .filter(project=project) + .values_list('path') + .annotate(total_views=Sum('view_count')) + .values_list('path', 'total_views') + .order_by('-total_views')[:10] + ) + + pages = [] + view_counts = [] + + for data in qs.iterator(): + pages.append(data[0]) + view_counts.append(data[1]) + + final_data = { + 'pages': pages, + 'view_counts': view_counts, + } + + return final_data diff --git a/readthedocs/templates/projects/project_edit_base.html b/readthedocs/templates/projects/project_edit_base.html index e7fa71af179..8abf01537e9 100644 --- a/readthedocs/templates/projects/project_edit_base.html +++ b/readthedocs/templates/projects/project_edit_base.html @@ -25,6 +25,7 @@
  • {% trans "Environment Variables" %}
  • {% trans "Notifications" %}
  • {% trans "Search Analytics" %}
  • +
  • {% trans "Page Views" %}
  • {% if USE_PROMOS %}
  • {% trans "Advertising" %}
  • {% endif %} diff --git a/readthedocs/templates/projects/project_page_views.html b/readthedocs/templates/projects/project_page_views.html new file mode 100644 index 00000000000..b2f168f78d3 --- /dev/null +++ b/readthedocs/templates/projects/project_page_views.html @@ -0,0 +1,52 @@ +{% extends "projects/project_edit_base.html" %} + +{% load i18n %} +{% load static %} + +{% block title %}{% trans "Page Views" %}{% endblock %} + +{% block nav-dashboard %} class="active"{% endblock %} + +{% block project-page-views-active %}active{% endblock %} +{% block project_edit_content_header %}{% trans "Page Views" %}{% endblock %} + +{% block project_edit_content %} +

    {% trans "Top Viewed Pages" %}

    +
    +
    +
      + {% for page, count in top_viewed_pages_iter %} +
    • + {{ page }} + + {{ count }} {{ view|pluralize:"s" }} + +
    • + {% empty %} +
    • +

      + {% trans 'No date available.' %} +

      +
    • + {% endfor %} +
    +
    +
    + +
    + +

    {% trans "Page Views Per Page" %}

    + + +{% endblock %} + +{% block extra_scripts %} +{% endblock %} + +{% block extra_links %} + + +{% endblock %} + +{% block footerjs %} +{% endblock %} From f3894768f44b97259f67f8746601a255fcb72158 Mon Sep 17 00:00:00 2001 From: dojutsu-user Date: Sun, 8 Sep 2019 13:21:00 +0530 Subject: [PATCH 05/72] initial work for showing graphs to the user --- readthedocs/projects/views/private.py | 15 +++++++ readthedocs/search/models.py | 41 ++++++++++++++++--- readthedocs/search/utils.py | 22 +++++++++- .../projects/project_page_views.html | 26 ++++++++++++ 4 files changed, 98 insertions(+), 6 deletions(-) diff --git a/readthedocs/projects/views/private.py b/readthedocs/projects/views/private.py index a2cd3e25b94..a8a5a39c870 100644 --- a/readthedocs/projects/views/private.py +++ b/readthedocs/projects/views/private.py @@ -1036,11 +1036,26 @@ def page_views(request, project_slug): top_viewed_pages['view_counts'] ) + all_pages = PageView.objects.filter(project=project).values_list('path', flat=True) + if all_pages.exists(): + all_pages = sorted(list(set(all_pages))) + page_path = request.GET.get('page', all_pages[0]) + else: + all_pages = [] + page_path = '' + + page_data = PageView.get_page_view_count_of_one_month( + project_slug=project.slug, + page_path=page_path + ) return render( request, 'projects/project_page_views.html', { 'project': project, 'top_viewed_pages_iter': top_viewed_pages_iter, + 'page_data': page_data, + 'page_path': page_path, + 'all_pages': all_pages, }, ) diff --git a/readthedocs/search/models.py b/readthedocs/search/models.py index 8bc7e8d2af2..63574a3d541 100644 --- a/readthedocs/search/models.py +++ b/readthedocs/search/models.py @@ -11,6 +11,7 @@ from readthedocs.builds.models import Version from readthedocs.projects.models import Project from readthedocs.projects.querysets import RelatedProjectQuerySet +from readthedocs.search.utils import _get_last_31_days_iter, _get_last_31_days_str class SearchQuery(TimeStampedModel): @@ -59,7 +60,7 @@ def generate_queries_count_of_one_month(cls, project_slug): last_30th_day = timezone.now().date() - timezone.timedelta(days=30) # this includes the current day also - last_31_days_iter = [last_30th_day + timezone.timedelta(days=n) for n in range(31)] + last_31_days_iter = _get_last_31_days_iter() qs = cls.objects.filter( project__slug=project_slug, @@ -81,10 +82,7 @@ def generate_queries_count_of_one_month(cls, project_slug): # format the date value to a more readable form # Eg. `16 Jul` - last_31_days_str = [ - timezone.datetime.strftime(date, '%d %b') - for date in last_31_days_iter - ] + last_31_days_str = _get_last_31_days_str() final_data = { 'labels': last_31_days_str, @@ -194,3 +192,36 @@ def get_top_viewed_pages(cls, project): } return final_data + + @classmethod + def get_page_view_count_of_one_month(cls, project_slug, page_path): + today = timezone.now().date() + last_30th_day = timezone.now().date() - timezone.timedelta(days=30) + + # this includes the current day also + last_31_days_iter = _get_last_31_days_iter() + + qs = cls.objects.filter( + project__slug=project_slug, + path=page_path, + ).order_by('-created') + + count_dict = dict( + qs.annotate(created_date=TruncDate('created')) + .values('created_date') + .order_by('created_date') + .values_list('created_date', 'view_count') + ) + + count_data = [count_dict.get(date) or 0 for date in last_31_days_iter] + + # format the date value to a more readable form + # Eg. `16 Jul` + last_31_days_str = _get_last_31_days_str() + + final_data = { + 'labels': last_31_days_str, + 'int_data': count_data, + } + + return final_data diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py index a747c45f115..0e548842c94 100644 --- a/readthedocs/search/utils.py +++ b/readthedocs/search/utils.py @@ -3,13 +3,13 @@ import logging from operator import attrgetter +from django.utils import timezone from django.shortcuts import get_object_or_404 from django_elasticsearch_dsl.apps import DEDConfig from django_elasticsearch_dsl.registries import registry from readthedocs.builds.models import Version from readthedocs.projects.models import HTMLFile, Project -from readthedocs.search.documents import PageDocument log = logging.getLogger(__name__) @@ -132,6 +132,7 @@ def _indexing_helper(html_objs_qs, wipe=False): If ``wipe`` is set to False, html_objs are deleted from the ES index, else, html_objs are indexed. """ + from readthedocs.search.documents import PageDocument from readthedocs.search.tasks import index_objects_to_es, delete_objects_in_es if html_objs_qs: @@ -168,3 +169,22 @@ def _get_sorted_results(results, source_key='_source'): ] return sorted_results + + +def _get_last_31_days_iter(): + today = timezone.now().date() + last_30th_day = timezone.now().date() - timezone.timedelta(days=30) + + # this includes the current day also + last_31_days_iter = [last_30th_day + timezone.timedelta(days=n) for n in range(31)] + return last_31_days_iter + + +def _get_last_31_days_str(): + last_31_days_iter = _get_last_31_days_iter() + last_31_days_str = [ + timezone.datetime.strftime(date, '%d %b') + for date in last_31_days_iter + ] + return last_31_days_str + diff --git a/readthedocs/templates/projects/project_page_views.html b/readthedocs/templates/projects/project_page_views.html index b2f168f78d3..cb875f97ab2 100644 --- a/readthedocs/templates/projects/project_page_views.html +++ b/readthedocs/templates/projects/project_page_views.html @@ -36,11 +36,20 @@

    {% trans "Top Viewed Pages" %}


    {% trans "Page Views Per Page" %}

    +
    + +
    {% endblock %} {% block extra_scripts %} + + {% endblock %} {% block extra_links %} @@ -49,4 +58,21 @@

    {% trans "Page Views Per Page" %}

    {% endblock %} {% block footerjs %} + var line_chart = document.getElementById("page-views-per-page").getContext("2d"); + var line_chart_labels = {{ page_data.labels|safe }}; + var line_chart_data = {{ page_data.int_data|safe }}; + + var line_chart = new Chart(line_chart, { + type: "line", + data: { + labels: line_chart_labels, + datasets: [{ + label: "# of views for {{ page_path }}", + data: line_chart_data, + fill: false, + borderColor: "rgba(75, 192, 192, 1)", + pointBorderColor: "rgba(75, 192, 192, 1)", + }] + }, + }); {% endblock %} From a19823f7e1377514958f949814ea0043ef8dded7 Mon Sep 17 00:00:00 2001 From: dojutsu-user Date: Wed, 18 Sep 2019 11:50:51 +0530 Subject: [PATCH 06/72] show pageviews for a specific page --- readthedocs/api/v2/views/footer_views.py | 4 ++-- readthedocs/search/admin.py | 2 +- ...del.py => 0002_create_page_views_model.py} | 13 +++------- readthedocs/search/models.py | 24 ++++++++++++++----- readthedocs/search/tasks.py | 11 +++++---- 5 files changed, 31 insertions(+), 23 deletions(-) rename readthedocs/search/migrations/{0002_create_pageview_model.py => 0002_create_page_views_model.py} (67%) diff --git a/readthedocs/api/v2/views/footer_views.py b/readthedocs/api/v2/views/footer_views.py index 8c562275cee..f0cc5cb7b85 100644 --- a/readthedocs/api/v2/views/footer_views.py +++ b/readthedocs/api/v2/views/footer_views.py @@ -213,8 +213,8 @@ def get(self, request, format=None): # increase the page view count increase_page_view_count.delay( - project=context['project'], - version=context['version'], + project_slug=context['project'].slug, + version_slug=context['version'].slug, path=context['path'] if context['path'] else 'index.html' ) diff --git a/readthedocs/search/admin.py b/readthedocs/search/admin.py index dce251a6197..80b88d0d75b 100644 --- a/readthedocs/search/admin.py +++ b/readthedocs/search/admin.py @@ -18,7 +18,7 @@ class PageViewAdmin(admin.ModelAdmin): raw_id_fields = ('project', 'version') list_display = ('__str__', 'view_count') search_fields = ('project__slug', 'version__slug', 'path') - readonly_fields = ('created', 'modified') + readonly_fields = ('date',) list_select_related = ('project', 'version', 'version__project') diff --git a/readthedocs/search/migrations/0002_create_pageview_model.py b/readthedocs/search/migrations/0002_create_page_views_model.py similarity index 67% rename from readthedocs/search/migrations/0002_create_pageview_model.py rename to readthedocs/search/migrations/0002_create_page_views_model.py index 8284a80660a..8352ea28791 100644 --- a/readthedocs/search/migrations/0002_create_pageview_model.py +++ b/readthedocs/search/migrations/0002_create_page_views_model.py @@ -1,17 +1,16 @@ # -*- coding: utf-8 -*- -# Generated by Django 1.11.23 on 2019-08-29 18:07 +# Generated by Django 1.11.24 on 2019-09-18 06:11 from __future__ import unicode_literals from django.db import migrations, models import django.db.models.deletion -import django_extensions.db.fields class Migration(migrations.Migration): dependencies = [ - ('projects', '0044_auto_20190703_1300'), ('builds', '0010_add-description-field-to-automation-rule'), + ('projects', '0044_auto_20190703_1300'), ('search', '0001_initial'), ] @@ -20,17 +19,11 @@ class Migration(migrations.Migration): name='PageView', fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('created', django_extensions.db.fields.CreationDateTimeField(auto_now_add=True, verbose_name='created')), - ('modified', django_extensions.db.fields.ModificationDateTimeField(auto_now=True, verbose_name='modified')), ('path', models.CharField(max_length=4096)), ('view_count', models.PositiveIntegerField(default=1)), + ('date', models.DateField()), ('project', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='page_views', to='projects.Project')), ('version', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='page_views', to='builds.Version', verbose_name='Version')), ], - options={ - 'ordering': ('-modified', '-created'), - 'get_latest_by': 'modified', - 'abstract': False, - }, ), ] diff --git a/readthedocs/search/models.py b/readthedocs/search/models.py index 63574a3d541..8245061f53e 100644 --- a/readthedocs/search/models.py +++ b/readthedocs/search/models.py @@ -137,7 +137,7 @@ def generate_distribution_of_top_queries(cls, project_slug, n): return final_data -class PageView(TimeStampedModel): +class PageView(models.Model): project = models.ForeignKey( Project, related_name='page_views', @@ -151,6 +151,7 @@ class PageView(TimeStampedModel): ) path = models.CharField(max_length=4096) view_count = models.PositiveIntegerField(default=1) + date = models.DateField() def __str__(self): return f'[{self.project.slug}:{self.version.slug}]: {self.path}' @@ -195,6 +196,18 @@ def get_top_viewed_pages(cls, project): @classmethod def get_page_view_count_of_one_month(cls, project_slug, page_path): + """ + Returns the total page views count for last 30 days (including today) for a particular `page_path`. + + Structure of returned data is compatible to make graphs. + Sample returned data:: + { + 'labels': ['01 Jul', '02 Jul', '03 Jul'], + 'int_data': [150, 200, 143] + } + This data shows that there were 150 page views on 01 July, + 200 page views on 02 July and 143 page views on 03 July for a particular `page_path`. + """ today = timezone.now().date() last_30th_day = timezone.now().date() - timezone.timedelta(days=30) @@ -204,13 +217,12 @@ def get_page_view_count_of_one_month(cls, project_slug, page_path): qs = cls.objects.filter( project__slug=project_slug, path=page_path, - ).order_by('-created') + ).order_by('-date') count_dict = dict( - qs.annotate(created_date=TruncDate('created')) - .values('created_date') - .order_by('created_date') - .values_list('created_date', 'view_count') + qs.values('date') + .order_by('date') + .values_list('date', 'view_count') ) count_data = [count_dict.get(date) or 0 for date in last_31_days_iter] diff --git a/readthedocs/search/tasks.py b/readthedocs/search/tasks.py index 36c81e02c11..ebece39f744 100644 --- a/readthedocs/search/tasks.py +++ b/readthedocs/search/tasks.py @@ -215,21 +215,24 @@ def record_search_query(project_slug, version_slug, query, total_results, time_s @app.task(queue='web') -def increase_page_view_count(project, version, path): +def increase_page_view_count(project_slug, version_slug, path): today_date = timezone.now().date() page_view_obj = PageView.objects.filter( - project=project, - version=version, + project__slug=project_slug, + version__slug=version_slug, path=path, - created__date=today_date, + date=today_date, ).first() if page_view_obj: page_view_obj.view_count += 1 page_view_obj.save() else: + project = Project.objects.get(slug=project_slug) + version = Version.objects.get(project=project, slug=version_slug) PageView.objects.create( project=project, version=version, path=path, + date=today_date, ) From a1d0a9b55a65ae55a0adcd1045561e08746f325b Mon Sep 17 00:00:00 2001 From: dojutsu-user Date: Wed, 2 Oct 2019 22:27:35 +0530 Subject: [PATCH 07/72] change view to class based view --- readthedocs/projects/urls/private.py | 3 +- readthedocs/projects/views/private.py | 57 +++++++++++++-------------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/readthedocs/projects/urls/private.py b/readthedocs/projects/urls/private.py index 114fa153eb0..2f68ec67138 100644 --- a/readthedocs/projects/urls/private.py +++ b/readthedocs/projects/urls/private.py @@ -23,6 +23,7 @@ IntegrationExchangeDetail, IntegrationList, IntegrationWebhookSync, + PageViewAdmin, ProjectAdvancedUpdate, ProjectAdvertisingUpdate, ProjectDashboard, @@ -116,7 +117,7 @@ ), url( r'^(?P[-\w]+)/page-views/$', - private.page_views, name='projects_page_views', + PageViewAdmin.as_view(), name='projects_page_views', ), ] diff --git a/readthedocs/projects/views/private.py b/readthedocs/projects/views/private.py index 8e306ce69f9..0cd4a7b528b 100644 --- a/readthedocs/projects/views/private.py +++ b/readthedocs/projects/views/private.py @@ -1004,40 +1004,39 @@ def _search_analytics_csv_data(request, project_slug): return response -def page_views(request, project_slug): - """View for page views.""" +class PageViewAdmin(ProjectAdminMixin, PrivateViewMixin, TemplateView): - project = get_object_or_404( - Project.objects.for_admin_user(request.user), - slug=project_slug, - ) + template_name = 'projects/project_page_views.html' + http_method_names = ['get'] - top_viewed_pages = PageView.get_top_viewed_pages(project) - top_viewed_pages_iter = zip( - top_viewed_pages['pages'], - top_viewed_pages['view_counts'] - ) + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + project = self.get_project() - all_pages = PageView.objects.filter(project=project).values_list('path', flat=True) - if all_pages.exists(): - all_pages = sorted(list(set(all_pages))) - page_path = request.GET.get('page', all_pages[0]) - else: - all_pages = [] - page_path = '' + top_viewed_pages = PageView.get_top_viewed_pages(project) + top_viewed_pages_iter = zip( + top_viewed_pages['pages'], + top_viewed_pages['view_counts'] + ) - page_data = PageView.get_page_view_count_of_one_month( - project_slug=project.slug, - page_path=page_path - ) - return render( - request, - 'projects/project_page_views.html', - { - 'project': project, + all_pages = PageView.objects.filter(project=project).values_list('path', flat=True) + if all_pages.exists(): + all_pages = sorted(list(set(all_pages))) + page_path = self.request.GET.get('page', all_pages[0]) + else: + all_pages = [] + page_path = '' + + page_data = PageView.get_page_view_count_of_one_month( + project_slug=project.slug, + page_path=page_path + ) + + context.update({ 'top_viewed_pages_iter': top_viewed_pages_iter, 'page_data': page_data, 'page_path': page_path, 'all_pages': all_pages, - }, - ) + }) + + return context From 415a3b31770d5b89d2f839f2663f5641f87ed63a Mon Sep 17 00:00:00 2001 From: dojutsu-user Date: Fri, 4 Oct 2019 00:54:03 +0530 Subject: [PATCH 08/72] fix lint --- readthedocs/search/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py index 0e548842c94..69c1c42b993 100644 --- a/readthedocs/search/utils.py +++ b/readthedocs/search/utils.py @@ -187,4 +187,3 @@ def _get_last_31_days_str(): for date in last_31_days_iter ] return last_31_days_str - From 2073a823bfb91cd03a9a1f6e2df6783317603d6f Mon Sep 17 00:00:00 2001 From: dojutsu-user Date: Sat, 5 Oct 2019 12:05:25 +0530 Subject: [PATCH 09/72] fix more lint --- readthedocs/search/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/search/models.py b/readthedocs/search/models.py index 1aa2f8ed6b0..fd4eefa2331 100644 --- a/readthedocs/search/models.py +++ b/readthedocs/search/models.py @@ -152,7 +152,7 @@ def get_top_viewed_pages(cls, project): @classmethod def get_page_view_count_of_one_month(cls, project_slug, page_path): """ - Returns the total page views count for last 30 days (including today) for a particular `page_path`. + Returns the total page views count for last 30 days for a particular `page_path`. Structure of returned data is compatible to make graphs. Sample returned data:: From 3ec59dc150cd79fdbcd193f730ee9654cf5101b9 Mon Sep 17 00:00:00 2001 From: dojutsu-user Date: Tue, 15 Oct 2019 18:45:05 +0530 Subject: [PATCH 10/72] store page_slug instead of page_path --- readthedocs/api/v2/views/footer_views.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/readthedocs/api/v2/views/footer_views.py b/readthedocs/api/v2/views/footer_views.py index f0cc5cb7b85..369dbad6a71 100644 --- a/readthedocs/api/v2/views/footer_views.py +++ b/readthedocs/api/v2/views/footer_views.py @@ -212,10 +212,11 @@ def get(self, request, format=None): } # increase the page view count + page_slug = request.GET.get('page', 'index') increase_page_view_count.delay( project_slug=context['project'].slug, version_slug=context['version'].slug, - path=context['path'] if context['path'] else 'index.html' + path=page_slug ) # Allow folks to hook onto the footer response for various information From 108fd5fe7fefc17f63f7cf92e1aad9d67510fd3c Mon Sep 17 00:00:00 2001 From: dojutsu-user Date: Tue, 15 Oct 2019 23:26:26 +0530 Subject: [PATCH 11/72] little refactor --- readthedocs/search/models.py | 14 +++++++------- readthedocs/search/utils.py | 6 ++++-- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/readthedocs/search/models.py b/readthedocs/search/models.py index fd4eefa2331..51c28b0ca00 100644 --- a/readthedocs/search/models.py +++ b/readthedocs/search/models.py @@ -82,7 +82,7 @@ def generate_queries_count_of_one_month(cls, project_slug): # format the date value to a more readable form # Eg. `16 Jul` - last_31_days_str = _get_last_31_days_str() + last_31_days_str = _get_last_31_days_str(date_format='%d %b') final_data = { 'labels': last_31_days_str, @@ -119,12 +119,12 @@ def get_top_viewed_pages(cls, project): Structure of returned data is compatible to make graphs. Sample returned data:: { - 'pages': ['index.html', 'contribute.html', 'sponsors.html'], - 'view_counts': [150, 200, 143] + 'pages': ['index', 'config-file/v1', 'intro/import-guide'], + 'view_counts': [150, 120, 100] } - This data shows that `index.html` is the most viewed page having 150 total views, - followed by `contribute.html` and `sponsors.html` having 200 and - 143 total page views respectively. + This data shows that `index` is the most viewed page having 150 total views, + followed by `config-file/v1` and `intro/import-guide` having 120 and + 100 total page views respectively. """ qs = ( cls.objects @@ -184,7 +184,7 @@ def get_page_view_count_of_one_month(cls, project_slug, page_path): # format the date value to a more readable form # Eg. `16 Jul` - last_31_days_str = _get_last_31_days_str() + last_31_days_str = _get_last_31_days_str(date_format='%d %b') final_data = { 'labels': last_31_days_str, diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py index 69c1c42b993..c1ff956ecf6 100644 --- a/readthedocs/search/utils.py +++ b/readthedocs/search/utils.py @@ -172,6 +172,7 @@ def _get_sorted_results(results, source_key='_source'): def _get_last_31_days_iter(): + """Returns iterator for last 31 days (including today).""" today = timezone.now().date() last_30th_day = timezone.now().date() - timezone.timedelta(days=30) @@ -180,10 +181,11 @@ def _get_last_31_days_iter(): return last_31_days_iter -def _get_last_31_days_str(): +def _get_last_31_days_str(date_format): + """Returns the list of dates in string format for last 31 days (including today).""" last_31_days_iter = _get_last_31_days_iter() last_31_days_str = [ - timezone.datetime.strftime(date, '%d %b') + timezone.datetime.strftime(date, date_format) for date in last_31_days_iter ] return last_31_days_str From ddc96d9ff909936b8e2f29233c0ef506963187c0 Mon Sep 17 00:00:00 2001 From: dojutsu-user Date: Wed, 16 Oct 2019 01:38:33 +0530 Subject: [PATCH 12/72] update test --- readthedocs/rtd_tests/tests/test_footer.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/readthedocs/rtd_tests/tests/test_footer.py b/readthedocs/rtd_tests/tests/test_footer.py index 0c48c2ad3b9..9affc2010d6 100644 --- a/readthedocs/rtd_tests/tests/test_footer.py +++ b/readthedocs/rtd_tests/tests/test_footer.py @@ -233,9 +233,11 @@ class TestFooterPerformance(APITestCase): url = '/api/v2/footer_html/?project=pip&version=latest&page=index&docroot=/' factory = APIRequestFactory() - # The expected number of queries for generating the footer - # This shouldn't increase unless we modify the footer API - EXPECTED_QUERIES = 9 + # The expected number of queries for generating the footer. + # This shouldn't increase unless we modify the footer API. + # Here 9 queries are for serving footer views and 4 queries + # are for storing page views in the db. + EXPECTED_QUERIES = 9 + 4 def setUp(self): self.pip = Project.objects.get(slug='pip') From 8da0f9300f3bfc315f50dcac23cdbffd8bcf7806 Mon Sep 17 00:00:00 2001 From: dojutsu-user Date: Wed, 16 Oct 2019 02:57:08 +0530 Subject: [PATCH 13/72] fix tests --- readthedocs/rtd_tests/tests/test_footer.py | 56 ++++++++++++---------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/readthedocs/rtd_tests/tests/test_footer.py b/readthedocs/rtd_tests/tests/test_footer.py index 9affc2010d6..452df85cb1f 100644 --- a/readthedocs/rtd_tests/tests/test_footer.py +++ b/readthedocs/rtd_tests/tests/test_footer.py @@ -235,9 +235,7 @@ class TestFooterPerformance(APITestCase): # The expected number of queries for generating the footer. # This shouldn't increase unless we modify the footer API. - # Here 9 queries are for serving footer views and 4 queries - # are for storing page views in the db. - EXPECTED_QUERIES = 9 + 4 + EXPECTED_QUERIES = 9 def setUp(self): self.pip = Project.objects.get(slug='pip') @@ -251,30 +249,36 @@ def render(self): def test_version_queries(self): # The number of Versions shouldn't impact the number of queries - with self.assertNumQueries(self.EXPECTED_QUERIES): - response = self.render() - self.assertContains(response, '0.8.1') - - for patch in range(3): - identifier = '0.99.{}'.format(patch) - self.pip.versions.create( - verbose_name=identifier, - identifier=identifier, - type=TAG, - active=True, - ) - - with self.assertNumQueries(self.EXPECTED_QUERIES): - response = self.render() - self.assertContains(response, '0.99.0') + with mock.patch('readthedocs.api.v2.views.footer_views.increase_page_view_count') as mocked: + mocked.side_effect = None + + with self.assertNumQueries(self.EXPECTED_QUERIES): + response = self.render() + self.assertContains(response, '0.8.1') + + for patch in range(3): + identifier = '0.99.{}'.format(patch) + self.pip.versions.create( + verbose_name=identifier, + identifier=identifier, + type=TAG, + active=True, + ) + + with self.assertNumQueries(self.EXPECTED_QUERIES): + response = self.render() + self.assertContains(response, '0.99.0') def test_domain_queries(self): # Setting up a custom domain shouldn't impact the number of queries - self.pip.domains.create( - domain='http://docs.foobar.com', - canonical=True, - ) + with mock.patch('readthedocs.api.v2.views.footer_views.increase_page_view_count') as mocked: + mocked.side_effect = None - with self.assertNumQueries(self.EXPECTED_QUERIES): - response = self.render() - self.assertContains(response, 'docs.foobar.com') + self.pip.domains.create( + domain='http://docs.foobar.com', + canonical=True, + ) + + with self.assertNumQueries(self.EXPECTED_QUERIES): + response = self.render() + self.assertContains(response, 'docs.foobar.com') From d0e1317ff247a865d7eceb8595626d96794d255a Mon Sep 17 00:00:00 2001 From: dojutsu-user Date: Wed, 16 Oct 2019 20:49:36 +0530 Subject: [PATCH 14/72] add test for search tasks --- readthedocs/search/tests/test_search_tasks.py | 73 ++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/readthedocs/search/tests/test_search_tasks.py b/readthedocs/search/tests/test_search_tasks.py index 86d2742ee67..81376c7c41c 100644 --- a/readthedocs/search/tests/test_search_tasks.py +++ b/readthedocs/search/tests/test_search_tasks.py @@ -8,7 +8,7 @@ from readthedocs.projects.models import Project from readthedocs.builds.models import Version -from readthedocs.search.models import SearchQuery +from readthedocs.search.models import SearchQuery, PageView from readthedocs.search import tasks @@ -124,3 +124,74 @@ def test_delete_old_search_queries_from_db(self, project): assert SearchQuery.objects.all().count() == 1 tasks.delete_old_search_queries_from_db() assert SearchQuery.objects.all().count() == 0 + + def test_increase_page_view_count(self, project): + today = timezone.now() + tomorrow = timezone.now() + timezone.timedelta(days=1) + yesterday = timezone.now() - timezone.timedelta(days=1) + + assert ( + PageView.objects.all().count() == 0 + ), 'There\'s no PageView object created yet.' + + # testing for yesterday + with mock.patch('readthedocs.search.tasks.timezone') as mocked_timezone: + mocked_timezone.return_value = yesterday + + tasks.increase_page_view_count( + project_slug=project.slug, + version_slug=project.versions.all().first().slug, + path='index', + ) + + assert ( + PageView.objects.all().count() == 1, + ), 'PageView object for path \'index\' is created' + assert ( + PageView.objects.all().first().view_count == 1 + ), '\'index\' has 1 view' + + tasks.increase_page_view_count( + project_slug=project.slug, + version_slug=project.versions.all().first().slug, + path='index', + ) + + assert ( + PageView.objects.all().count() == 1, + ), 'PageView object for path \'index\' is already created' + assert ( + PageView.objects.all().first().view_count == 2 + ), '\'index\' has 2 views now' + + # testing for today + with mock.patch('readthedocs.search.tasks.timezone') as mocked_timezone: + mocked_timezone.return_value = today + tasks.increase_page_view_count( + project_slug=project.slug, + version_slug=project.versions.all().first().slug, + path='index', + ) + + assert ( + PageView.objects.all().count() == 2, + ), 'PageView object for path \'index\' is created for two days (yesterday and today)' + assert ( + PageView.objects.all().order_by('-created').first().view_count == 1 + ), '\'index\' has 1 view today' + + # testing for tomorrow + with mock.patch('readthedocs.search.tasks.timezone') as mocked_timezone: + mocked_timezone.return_value = tomorrow + tasks.increase_page_view_count( + project_slug=project.slug, + version_slug=project.versions.all().first().slug, + path='index', + ) + + assert ( + PageView.objects.all().count() == 3, + ), 'PageView object for path \'index\' is created for three days (yesterday, today & tomorrow)' + assert ( + PageView.objects.all().order_by('-created').first().view_count == 1 + ), '\'index\' has 1 view tomorrow' From 39463981c5bd9da162082ddf0991bd90d006ca10 Mon Sep 17 00:00:00 2001 From: dojutsu-user Date: Thu, 17 Oct 2019 19:04:58 +0530 Subject: [PATCH 15/72] use F expression --- readthedocs/search/tasks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/readthedocs/search/tasks.py b/readthedocs/search/tasks.py index ebece39f744..6e1a552dfa9 100644 --- a/readthedocs/search/tasks.py +++ b/readthedocs/search/tasks.py @@ -2,6 +2,7 @@ from dateutil.parser import parse from django.apps import apps +from django.db.models import F from django.utils import timezone from django_elasticsearch_dsl.registries import registry @@ -225,7 +226,7 @@ def increase_page_view_count(project_slug, version_slug, path): ).first() if page_view_obj: - page_view_obj.view_count += 1 + page_view_obj.view_count = F('view_count') + 1 page_view_obj.save() else: project = Project.objects.get(slug=project_slug) From 11eb6acaacf11cebb6b8efcba4e5c9857807d4d8 Mon Sep 17 00:00:00 2001 From: dojutsu-user Date: Wed, 23 Oct 2019 00:35:53 +0530 Subject: [PATCH 16/72] fix tests --- readthedocs/search/tests/test_search_tasks.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/readthedocs/search/tests/test_search_tasks.py b/readthedocs/search/tests/test_search_tasks.py index 81376c7c41c..33dbe012d68 100644 --- a/readthedocs/search/tests/test_search_tasks.py +++ b/readthedocs/search/tests/test_search_tasks.py @@ -135,7 +135,7 @@ def test_increase_page_view_count(self, project): ), 'There\'s no PageView object created yet.' # testing for yesterday - with mock.patch('readthedocs.search.tasks.timezone') as mocked_timezone: + with mock.patch('readthedocs.search.tasks.timezone.now') as mocked_timezone: mocked_timezone.return_value = yesterday tasks.increase_page_view_count( @@ -145,7 +145,7 @@ def test_increase_page_view_count(self, project): ) assert ( - PageView.objects.all().count() == 1, + PageView.objects.all().count() == 1 ), 'PageView object for path \'index\' is created' assert ( PageView.objects.all().first().view_count == 1 @@ -158,14 +158,14 @@ def test_increase_page_view_count(self, project): ) assert ( - PageView.objects.all().count() == 1, + PageView.objects.all().count() == 1 ), 'PageView object for path \'index\' is already created' assert ( PageView.objects.all().first().view_count == 2 ), '\'index\' has 2 views now' # testing for today - with mock.patch('readthedocs.search.tasks.timezone') as mocked_timezone: + with mock.patch('readthedocs.search.tasks.timezone.now') as mocked_timezone: mocked_timezone.return_value = today tasks.increase_page_view_count( project_slug=project.slug, @@ -174,14 +174,14 @@ def test_increase_page_view_count(self, project): ) assert ( - PageView.objects.all().count() == 2, + PageView.objects.all().count() == 2 ), 'PageView object for path \'index\' is created for two days (yesterday and today)' assert ( - PageView.objects.all().order_by('-created').first().view_count == 1 + PageView.objects.all().order_by('-date').first().view_count == 1 ), '\'index\' has 1 view today' # testing for tomorrow - with mock.patch('readthedocs.search.tasks.timezone') as mocked_timezone: + with mock.patch('readthedocs.search.tasks.timezone.now') as mocked_timezone: mocked_timezone.return_value = tomorrow tasks.increase_page_view_count( project_slug=project.slug, @@ -190,8 +190,8 @@ def test_increase_page_view_count(self, project): ) assert ( - PageView.objects.all().count() == 3, + PageView.objects.all().count() == 3 ), 'PageView object for path \'index\' is created for three days (yesterday, today & tomorrow)' assert ( - PageView.objects.all().order_by('-created').first().view_count == 1 + PageView.objects.all().order_by('-date').first().view_count == 1 ), '\'index\' has 1 view tomorrow' From da4673ace6c3df3c7cf149061345e2cb87d6534b Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 6 May 2020 13:56:35 -0500 Subject: [PATCH 17/72] RTDFacetedSearch: pass filters in one way only Currently, we can pass filters as keywords (if they match a valid filter) or in the `filters` keyword. The original model FacetedSearch allows to pass filter only using the filfilters keyword. All places were refactored so we always pass the filters in the filters arg. --- readthedocs/search/api.py | 4 +++- readthedocs/search/faceted_search.py | 32 ++++++++++------------------ readthedocs/search/views.py | 9 ++++---- 3 files changed, 18 insertions(+), 27 deletions(-) diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index d5e123d2863..d6fedbf8779 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -120,13 +120,15 @@ def get_queryset(self): kwargs = {'filter_by_user': False, 'filters': {}} kwargs['filters']['project'] = [p.slug for p in self.get_all_projects()] kwargs['filters']['version'] = self._get_version().slug - # Check to avoid searching all projects in case project is empty. + + # Check to avoid searching all projects in case these filters are empty. if not kwargs['filters']['project']: log.info("Unable to find a project to search") return HTMLFile.objects.none() if not kwargs['filters']['version']: log.info("Unable to find a version to search") return HTMLFile.objects.none() + user = self.request.user queryset = PageSearch( query=query, user=user, **kwargs diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py index 912e1ac99d9..7153f8628d1 100644 --- a/readthedocs/search/faceted_search.py +++ b/readthedocs/search/faceted_search.py @@ -1,18 +1,13 @@ import logging +from django.conf import settings from elasticsearch import Elasticsearch from elasticsearch_dsl import FacetedSearch, TermsFacet from elasticsearch_dsl.faceted_search import NestedFacet -from elasticsearch_dsl.query import Bool, SimpleQueryString, Nested, Match - -from django.conf import settings +from elasticsearch_dsl.query import Bool, Match, Nested, SimpleQueryString from readthedocs.core.utils.extend import SettingsOverrideObject -from readthedocs.search.documents import ( - PageDocument, - ProjectDocument, -) - +from readthedocs.search.documents import PageDocument, ProjectDocument log = logging.getLogger(__name__) @@ -21,7 +16,11 @@ class RTDFacetedSearch(FacetedSearch): - def __init__(self, user, **kwargs): + """Custom wrapper around FacetedSearch.""" + + operators = [] + + def __init__(self, query=None, filters=None, user=None, **kwargs): """ Pass in a user in order to filter search results by privacy. @@ -33,23 +32,14 @@ def __init__(self, user, **kwargs): self.user = user self.filter_by_user = kwargs.pop('filter_by_user', True) - # Set filters properly - for facet in self.facets: - if facet in kwargs: - kwargs.setdefault('filters', {})[facet] = kwargs.pop(facet) - - # Don't pass along unnecessary filters - for f in ALL_FACETS: - if f in kwargs: - del kwargs[f] - # Hack a fix to our broken connection pooling # This creates a new connection on every request, # but actually works :) log.info('Hacking Elastic to fix search connection pooling') self.using = Elasticsearch(**settings.ELASTICSEARCH_DSL['default']) - super().__init__(**kwargs) + filters = filters or {} + super().__init__(query, filters, **kwargs) def query(self, search, query): """ @@ -57,7 +47,7 @@ def query(self, search, query): Also: - * Adds SimpleQueryString instead of default query. + * Adds SimpleQueryString with `self.operators` instead of default query. * Adds HTML encoding of results to avoid XSS issues. """ search = search.highlight_options(encoder='html', number_of_fragments=3) diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index 94a63c43bee..23a6cd8dc32 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -8,13 +8,12 @@ from readthedocs.builds.constants import LATEST from readthedocs.projects.models import Project +from readthedocs.search import utils from readthedocs.search.faceted_search import ( ALL_FACETS, PageSearch, ProjectSearch, ) -from readthedocs.search import utils - log = logging.getLogger(__name__) LOG_TEMPLATE = '(Elastic Search) [%(user)s:%(type)s] [%(project)s:%(version)s:%(language)s] %(msg)s' @@ -71,15 +70,15 @@ def elastic_search(request, project_slug=None): facets = {} if user_input.query: - kwargs = {} + filters = {} for avail_facet in ALL_FACETS: value = getattr(user_input, avail_facet, None) if value: - kwargs[avail_facet] = value + filters[avail_facet] = value search = search_facets[user_input.type]( - query=user_input.query, user=request.user, **kwargs + query=user_input.query, filters=filters, user=request.user, ) results = search[:50].execute() facets = results.facets From 6c3bb51a326bb74b00907d13f9347dd8bca0bfbd Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 6 May 2020 15:42:19 -0500 Subject: [PATCH 18/72] Search view can still pass invalid values --- readthedocs/search/faceted_search.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py index 7153f8628d1..e0aea1b6b68 100644 --- a/readthedocs/search/faceted_search.py +++ b/readthedocs/search/faceted_search.py @@ -39,7 +39,14 @@ def __init__(self, query=None, filters=None, user=None, **kwargs): self.using = Elasticsearch(**settings.ELASTICSEARCH_DSL['default']) filters = filters or {} - super().__init__(query, filters, **kwargs) + + # We may recieve invalid filters + valid_filters = { + k: v + for k, v in filters.items() + if k in self.facets + } + super().__init__(query=query, filters=valid_filters, **kwargs) def query(self, search, query): """ From a5e8a3ad43ff30f2a5f35fe8acf3060396aafaff Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 6 May 2020 16:42:53 -0500 Subject: [PATCH 19/72] Search: iterate over valid facets only This logic is the same as the previous one, but it iterates over the valid facets only. --- readthedocs/search/views.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index 94a63c43bee..38590dac1d9 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -97,12 +97,10 @@ def elastic_search(request, project_slug=None): ) # Make sure our selected facets are displayed even when they return 0 results - for avail_facet in ALL_FACETS: - value = getattr(user_input, avail_facet, None) - if not value or avail_facet not in facets: - continue - if value not in [val[0] for val in facets[avail_facet]]: - facets[avail_facet].insert(0, (value, 0, True)) + for facet in facets: + value = getattr(user_input, facet, None) + if value and value not in (val[0] for val in facets[facet]): + facets[facet].insert(0, (value, 0, True)) if results: From a2f1503e1e385ab47785e704d653464077ee9e9f Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 6 May 2020 17:12:40 -0500 Subject: [PATCH 20/72] Search Document: remove unused class methods This was only being used for testing. --- readthedocs/search/documents.py | 37 ------------------- .../search/tests/test_faceted_search.py | 6 +-- readthedocs/search/tests/test_xss.py | 4 +- 3 files changed, 5 insertions(+), 42 deletions(-) diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py index 68dd80ee8d3..55efe0b4ca5 100644 --- a/readthedocs/search/documents.py +++ b/readthedocs/search/documents.py @@ -2,12 +2,10 @@ from django.conf import settings from django_elasticsearch_dsl import DocType, Index, fields - from elasticsearch import Elasticsearch from readthedocs.projects.models import HTMLFile, Project - project_conf = settings.ES_INDEXES['project'] project_index = Index(project_conf['name']) project_index.settings(**project_conf['settings']) @@ -50,19 +48,6 @@ class Meta: fields = ('name', 'slug', 'description') ignore_signals = True - @classmethod - def faceted_search(cls, query, user, language=None): - from readthedocs.search.faceted_search import ProjectSearch - kwargs = { - 'user': user, - 'query': query, - } - - if language: - kwargs['filters'] = {'language': language} - - return ProjectSearch(**kwargs) - @page_index.doc_type class PageDocument(RTDDocTypeMixin, DocType): @@ -148,28 +133,6 @@ def prepare_domains(self, html_file): return all_domains - @classmethod - def faceted_search( - cls, query, user, projects_list=None, versions_list=None, - filter_by_user=True - ): - from readthedocs.search.faceted_search import PageSearch - kwargs = { - 'user': user, - 'query': query, - 'filter_by_user': filter_by_user, - } - - filters = {} - if projects_list is not None: - filters['project'] = projects_list - if versions_list is not None: - filters['version'] = versions_list - - kwargs['filters'] = filters - - return PageSearch(**kwargs) - def get_queryset(self): """Overwrite default queryset to filter certain files to index.""" queryset = super().get_queryset() diff --git a/readthedocs/search/tests/test_faceted_search.py b/readthedocs/search/tests/test_faceted_search.py index 74112321682..d62a6903300 100644 --- a/readthedocs/search/tests/test_faceted_search.py +++ b/readthedocs/search/tests/test_faceted_search.py @@ -1,6 +1,6 @@ import pytest -from readthedocs.search.documents import PageDocument +from readthedocs.search.faceted_search import PageSearch @pytest.mark.django_db @@ -21,7 +21,7 @@ def test_search_exact_match(self, client, project, case): cased_query = getattr(query_text, case) query = cased_query() - page_search = PageDocument.faceted_search(query=query, user='') + page_search = PageSearch(query=query, user='') results = page_search.execute() assert len(results) == 1 @@ -37,7 +37,7 @@ def test_search_combined_result(self, client, project): - Where `Foo` or `Bar` is present """ query = 'Elasticsearch Query' - page_search = PageDocument.faceted_search(query=query, user='') + page_search = PageSearch(query=query, user='') results = page_search.execute() assert len(results) == 3 diff --git a/readthedocs/search/tests/test_xss.py b/readthedocs/search/tests/test_xss.py index ed5d674f668..31353061e0d 100644 --- a/readthedocs/search/tests/test_xss.py +++ b/readthedocs/search/tests/test_xss.py @@ -1,6 +1,6 @@ import pytest -from readthedocs.search.documents import PageDocument +from readthedocs.search.faceted_search import PageSearch @pytest.mark.django_db @@ -9,7 +9,7 @@ class TestXSS: def test_facted_page_xss(self, client, project): query = 'XSS' - page_search = PageDocument.faceted_search(query=query, user='') + page_search = PageSearch(query=query, user='') results = page_search.execute() expected = """ <h3>XSS exploit</h3> From 3ce38c82e16285f12e1754afacdf422bb34ba4a2 Mon Sep 17 00:00:00 2001 From: David Fischer Date: Thu, 7 May 2020 17:01:44 -0700 Subject: [PATCH 21/72] Index and refactor page view counting --- .../0002_create_page_views_model.py | 16 +++++----- readthedocs/search/models.py | 11 +++++-- readthedocs/search/tasks.py | 29 +++++++------------ 3 files changed, 27 insertions(+), 29 deletions(-) diff --git a/readthedocs/search/migrations/0002_create_page_views_model.py b/readthedocs/search/migrations/0002_create_page_views_model.py index 8352ea28791..20281a18832 100644 --- a/readthedocs/search/migrations/0002_create_page_views_model.py +++ b/readthedocs/search/migrations/0002_create_page_views_model.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- -# Generated by Django 1.11.24 on 2019-09-18 06:11 -from __future__ import unicode_literals +# Generated by Django 2.2.12 on 2020-05-07 23:32 +import datetime from django.db import migrations, models import django.db.models.deletion @@ -9,8 +8,8 @@ class Migration(migrations.Migration): dependencies = [ - ('builds', '0010_add-description-field-to-automation-rule'), - ('projects', '0044_auto_20190703_1300'), + ('projects', '0048_remove_version_privacy_field'), + ('builds', '0019_migrate_protected_versions_to_hidden'), ('search', '0001_initial'), ] @@ -20,10 +19,13 @@ class Migration(migrations.Migration): fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('path', models.CharField(max_length=4096)), - ('view_count', models.PositiveIntegerField(default=1)), - ('date', models.DateField()), + ('view_count', models.PositiveIntegerField(default=0)), + ('date', models.DateField(db_index=True, default=datetime.date.today)), ('project', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='page_views', to='projects.Project')), ('version', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='page_views', to='builds.Version', verbose_name='Version')), ], + options={ + 'unique_together': {('project', 'version', 'path', 'date')}, + }, ), ] diff --git a/readthedocs/search/models.py b/readthedocs/search/models.py index 51c28b0ca00..12366085964 100644 --- a/readthedocs/search/models.py +++ b/readthedocs/search/models.py @@ -1,5 +1,7 @@ """Search Queries.""" +import datetime + from django.db import models from django.db.models import Count, Sum from django.db.models.functions import TruncDate @@ -105,11 +107,14 @@ class PageView(models.Model): on_delete=models.CASCADE, ) path = models.CharField(max_length=4096) - view_count = models.PositiveIntegerField(default=1) - date = models.DateField() + view_count = models.PositiveIntegerField(default=0) + date = models.DateField(default=datetime.date.today, db_index=True) + + class Meta: + unique_together = ("project", "version", "path", "date") def __str__(self): - return f'[{self.project.slug}:{self.version.slug}]: {self.path}' + return f'PageView: [{self.project.slug}:{self.version.slug}] - {self.path}' @classmethod def get_top_viewed_pages(cls, project): diff --git a/readthedocs/search/tasks.py b/readthedocs/search/tasks.py index 14db55bfdfb..69a3d7961a9 100644 --- a/readthedocs/search/tasks.py +++ b/readthedocs/search/tasks.py @@ -214,23 +214,14 @@ def record_search_query(project_slug, version_slug, query, total_results, time_s @app.task(queue='web') def increase_page_view_count(project_slug, version_slug, path): - today_date = timezone.now().date() - page_view_obj = PageView.objects.filter( - project__slug=project_slug, - version__slug=version_slug, + project = Project.objects.get(slug=project_slug) + + page_view, _ = PageView.objects.get_or_create( + project=project, + version=Version.objects.get(project=project, slug=version_slug), path=path, - date=today_date, - ).first() - - if page_view_obj: - page_view_obj.view_count = F('view_count') + 1 - page_view_obj.save() - else: - project = Project.objects.get(slug=project_slug) - version = Version.objects.get(project=project, slug=version_slug) - PageView.objects.create( - project=project, - version=version, - path=path, - date=today_date, - ) + date=timezone.now().date(), + ) + PageView.objects.filter(pk=page_view.pk).update( + view_count=F('view_count') + 1 + ) From 46747b0be7447e79b9b16b0e4ee0728b38bc3c90 Mon Sep 17 00:00:00 2001 From: David Fischer Date: Fri, 8 May 2020 14:14:41 -0700 Subject: [PATCH 22/72] Feedback updates and renames for clarity - Added a periodic task to remove old pageviews - Show aggregated traffic on the admin view --- readthedocs/api/v2/views/footer_views.py | 15 ++--- readthedocs/projects/urls/private.py | 6 +- readthedocs/projects/views/private.py | 40 ++++---------- readthedocs/search/models.py | 55 ++++++++++--------- readthedocs/search/tasks.py | 11 ++++ readthedocs/search/utils.py | 23 ++++---- readthedocs/settings/base.py | 7 ++- .../templates/projects/project_edit_base.html | 2 +- ...ws.html => project_traffic_analytics.html} | 33 +++++------ .../projects/projects_search_analytics.html | 2 +- 10 files changed, 92 insertions(+), 102 deletions(-) rename readthedocs/templates/projects/{project_page_views.html => project_traffic_analytics.html} (61%) diff --git a/readthedocs/api/v2/views/footer_views.py b/readthedocs/api/v2/views/footer_views.py index 81e03b224ef..5fe91e54240 100644 --- a/readthedocs/api/v2/views/footer_views.py +++ b/readthedocs/api/v2/views/footer_views.py @@ -222,13 +222,14 @@ def get(self, request, format=None): 'version_supported': version.supported, } - # increase the page view count - page_slug = request.GET.get('page', 'index') - increase_page_view_count.delay( - project_slug=context['project'].slug, - version_slug=context['version'].slug, - path=page_slug - ) + # increase the page view count for the given page + page_slug = request.GET.get('page', '') + if page_slug: + increase_page_view_count.delay( + project_slug=context['project'].slug, + version_slug=context['version'].slug, + path=page_slug + ) # Allow folks to hook onto the footer response for various information # collection, or to modify the resp_data. diff --git a/readthedocs/projects/urls/private.py b/readthedocs/projects/urls/private.py index 0272f78b1cb..1db0be47af3 100644 --- a/readthedocs/projects/urls/private.py +++ b/readthedocs/projects/urls/private.py @@ -26,7 +26,7 @@ IntegrationExchangeDetail, IntegrationList, IntegrationWebhookSync, - PageViewAdmin, + TrafficAnalyticsView, ProjectAdvancedUpdate, ProjectAdvertisingUpdate, ProjectDashboard, @@ -141,8 +141,8 @@ name='projects_search_analytics', ), url( - r'^(?P[-\w]+)/page-views/$', - PageViewAdmin.as_view(), name='projects_page_views', + r'^(?P[-\w]+)/traffic-analytics/$', + TrafficAnalyticsView.as_view(), name='projects_traffic_analytics', ), ] diff --git a/readthedocs/projects/views/private.py b/readthedocs/projects/views/private.py index 0399946a270..27dcb758259 100644 --- a/readthedocs/projects/views/private.py +++ b/readthedocs/projects/views/private.py @@ -6,7 +6,6 @@ from allauth.socialaccount.models import SocialAccount from django.conf import settings from django.contrib import messages -from django.contrib.auth.decorators import login_required from django.db.models import Count from django.http import ( Http404, @@ -41,7 +40,6 @@ ) from readthedocs.core.mixins import ( ListViewWithForm, - LoginRequiredMixin, PrivateViewMixin, ) from readthedocs.core.utils import broadcast, trigger_build @@ -977,15 +975,6 @@ class RegexAutomationRuleUpdate(RegexAutomationRuleMixin, UpdateView): pass -@login_required -def search_analytics_view(request, project_slug): - """View for search analytics.""" - project = get_object_or_404( - Project.objects.for_admin_user(request.user), - slug=project_slug, - ) - - class SearchAnalytics(ProjectAdminMixin, PrivateViewMixin, TemplateView): template_name = 'projects/projects_search_analytics.html' @@ -1065,39 +1054,30 @@ def _search_analytics_csv_data(self): return response -class PageViewAdmin(ProjectAdminMixin, PrivateViewMixin, TemplateView): +class TrafficAnalyticsView(ProjectAdminMixin, PrivateViewMixin, TemplateView): - template_name = 'projects/project_page_views.html' + template_name = 'projects/project_traffic_analytics.html' http_method_names = ['get'] def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) project = self.get_project() - top_viewed_pages = PageView.get_top_viewed_pages(project) - top_viewed_pages_iter = zip( - top_viewed_pages['pages'], - top_viewed_pages['view_counts'] + # Count of views for top pages over the month + top_pages = PageView.top_viewed_pages(project) + top_viewed_pages = zip( + top_pages['pages'], + top_pages['view_counts'] ) - all_pages = PageView.objects.filter(project=project).values_list('path', flat=True) - if all_pages.exists(): - all_pages = sorted(list(set(all_pages))) - page_path = self.request.GET.get('page', all_pages[0]) - else: - all_pages = [] - page_path = '' - - page_data = PageView.get_page_view_count_of_one_month( + # Aggregate pageviews grouped by day + page_data = PageView.page_views_by_date( project_slug=project.slug, - page_path=page_path ) context.update({ - 'top_viewed_pages_iter': top_viewed_pages_iter, + 'top_viewed_pages': top_viewed_pages, 'page_data': page_data, - 'page_path': page_path, - 'all_pages': all_pages, }) return context diff --git a/readthedocs/search/models.py b/readthedocs/search/models.py index 12366085964..7a6d169dfc5 100644 --- a/readthedocs/search/models.py +++ b/readthedocs/search/models.py @@ -13,7 +13,7 @@ from readthedocs.builds.models import Version from readthedocs.projects.models import Project from readthedocs.projects.querysets import RelatedProjectQuerySet -from readthedocs.search.utils import _get_last_31_days_iter, _get_last_31_days_str +from readthedocs.search.utils import _last_30_days_iter class SearchQuery(TimeStampedModel): @@ -61,9 +61,6 @@ def generate_queries_count_of_one_month(cls, project_slug): today = timezone.now().date() last_30th_day = timezone.now().date() - timezone.timedelta(days=30) - # this includes the current day also - last_31_days_iter = _get_last_31_days_iter() - qs = cls.objects.filter( project__slug=project_slug, created__date__lte=today, @@ -80,14 +77,17 @@ def generate_queries_count_of_one_month(cls, project_slug): .values_list('created_date', 'count') ) - count_data = [count_dict.get(date) or 0 for date in last_31_days_iter] + count_data = [count_dict.get(date) or 0 for date in _last_30_days_iter()] # format the date value to a more readable form # Eg. `16 Jul` - last_31_days_str = _get_last_31_days_str(date_format='%d %b') + last_30_days_str = [ + timezone.datetime.strftime(date, '%d %b') + for date in _last_30_days_iter() + ] final_data = { - 'labels': last_31_days_str, + 'labels': last_30_days_str, 'int_data': count_data, } @@ -114,10 +114,10 @@ class Meta: unique_together = ("project", "version", "path", "date") def __str__(self): - return f'PageView: [{self.project.slug}:{self.version.slug}] - {self.path}' + return f'PageView: [{self.project.slug}:{self.version.slug}] - {self.path} for {self.date}' @classmethod - def get_top_viewed_pages(cls, project): + def top_viewed_pages(cls, project, since=None): """ Returns top 10 pages according to view counts. @@ -131,9 +131,12 @@ def get_top_viewed_pages(cls, project): followed by `config-file/v1` and `intro/import-guide` having 120 and 100 total page views respectively. """ + if since is None: + since = timezone.now().date() - timezone.timedelta(days=30) + qs = ( cls.objects - .filter(project=project) + .filter(project=project, date__gte=since) .values_list('path') .annotate(total_views=Sum('view_count')) .values_list('path', 'total_views') @@ -155,9 +158,9 @@ def get_top_viewed_pages(cls, project): return final_data @classmethod - def get_page_view_count_of_one_month(cls, project_slug, page_path): + def page_views_by_date(cls, project_slug, since=None): """ - Returns the total page views count for last 30 days for a particular `page_path`. + Returns the total page views count for last 30 days for a particular project. Structure of returned data is compatible to make graphs. Sample returned data:: @@ -166,33 +169,33 @@ def get_page_view_count_of_one_month(cls, project_slug, page_path): 'int_data': [150, 200, 143] } This data shows that there were 150 page views on 01 July, - 200 page views on 02 July and 143 page views on 03 July for a particular `page_path`. + 200 page views on 02 July and 143 page views on 03 July. """ - today = timezone.now().date() - last_30th_day = timezone.now().date() - timezone.timedelta(days=30) - - # this includes the current day also - last_31_days_iter = _get_last_31_days_iter() + if since is None: + since = timezone.now().date() - timezone.timedelta(days=30) qs = cls.objects.filter( project__slug=project_slug, - path=page_path, - ).order_by('-date') + date__gt=since, + ).values('date').annotate(total_views=Sum('view_count')).order_by('date') count_dict = dict( - qs.values('date') - .order_by('date') - .values_list('date', 'view_count') + qs.order_by('date').values_list('date', 'total_views') ) - count_data = [count_dict.get(date) or 0 for date in last_31_days_iter] + # This fills in any dates where there is no data + # to make sure we have a full 30 days of dates + count_data = [count_dict.get(date) or 0 for date in _last_30_days_iter()] # format the date value to a more readable form # Eg. `16 Jul` - last_31_days_str = _get_last_31_days_str(date_format='%d %b') + last_30_days_str = [ + timezone.datetime.strftime(date, '%d %b') + for date in _last_30_days_iter() + ] final_data = { - 'labels': last_31_days_str, + 'labels': last_30_days_str, 'int_data': count_data, } diff --git a/readthedocs/search/tasks.py b/readthedocs/search/tasks.py index 69a3d7961a9..221e4bd34df 100644 --- a/readthedocs/search/tasks.py +++ b/readthedocs/search/tasks.py @@ -225,3 +225,14 @@ def increase_page_view_count(project_slug, version_slug, path): PageView.objects.filter(pk=page_view.pk).update( view_count=F('view_count') + 1 ) + + +@app.task(queue='web') +def delete_old_page_counts(): + """ + Delete page counts older than 30 days. + + This is intended to run from a periodic task daily. + """ + thirty_days_ago = timezone.now().date() - timezone.timedelta(days=30) + return PageView.objects.filter(date__lt=thirty_days_ago).delete() diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py index 89237dc78d3..70b0e5f0e80 100644 --- a/readthedocs/search/utils.py +++ b/readthedocs/search/utils.py @@ -156,21 +156,18 @@ def _get_sorted_results(results, source_key='_source'): return sorted_results -def _get_last_31_days_iter(): - """Returns iterator for last 31 days (including today).""" - today = timezone.now().date() - last_30th_day = timezone.now().date() - timezone.timedelta(days=30) +def _last_30_days_iter(): + """Returns iterator for previous 30 days (including today).""" + thirty_days_ago = timezone.now().date() - timezone.timedelta(days=30) - # this includes the current day also - last_31_days_iter = [last_30th_day + timezone.timedelta(days=n) for n in range(31)] - return last_31_days_iter + # this includes the current day, len() = 31 + return (thirty_days_ago + timezone.timedelta(days=n) for n in range(31)) -def _get_last_31_days_str(date_format): - """Returns the list of dates in string format for last 31 days (including today).""" - last_31_days_iter = _get_last_31_days_iter() - last_31_days_str = [ +def _get_last_30_days_str(date_format='%Y-%m-%d'): + """Returns the list of dates in string format for previous 30 days (including today).""" + last_30_days_str = [ timezone.datetime.strftime(date, date_format) - for date in last_31_days_iter + for date in _last_30_days_iter() ] - return last_31_days_str + return last_30_days_str diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py index 0dace8774a4..3f07ea307d7 100644 --- a/readthedocs/settings/base.py +++ b/readthedocs/settings/base.py @@ -350,7 +350,12 @@ def USE_PROMOS(self): # noqa 'task': 'readthedocs.search.tasks.delete_old_search_queries_from_db', 'schedule': crontab(minute=0, hour=0), 'options': {'queue': 'web'}, - } + }, + 'every-day-delete-old-page-views': { + 'task': 'readthedocs.search.tasks.delete_old_page_counts', + 'schedule': crontab(minute=0, hour=1), + 'options': {'queue': 'web'}, + }, } MULTIPLE_APP_SERVERS = [CELERY_DEFAULT_QUEUE] MULTIPLE_BUILD_SERVERS = [CELERY_DEFAULT_QUEUE] diff --git a/readthedocs/templates/projects/project_edit_base.html b/readthedocs/templates/projects/project_edit_base.html index 43de55ddac9..2199bd0005d 100644 --- a/readthedocs/templates/projects/project_edit_base.html +++ b/readthedocs/templates/projects/project_edit_base.html @@ -27,8 +27,8 @@
  • {% trans "Environment Variables" %}
  • {% trans "Automation Rules" %}
  • {% trans "Notifications" %}
  • +
  • {% trans "Traffic Analytics" %}
  • {% trans "Search Analytics" %}
  • -
  • {% trans "Page Views" %}
  • {% if USE_PROMOS %}
  • {% trans "Advertising" %}
  • {% endif %} diff --git a/readthedocs/templates/projects/project_page_views.html b/readthedocs/templates/projects/project_traffic_analytics.html similarity index 61% rename from readthedocs/templates/projects/project_page_views.html rename to readthedocs/templates/projects/project_traffic_analytics.html index cb875f97ab2..0de7e4e7b19 100644 --- a/readthedocs/templates/projects/project_page_views.html +++ b/readthedocs/templates/projects/project_traffic_analytics.html @@ -3,24 +3,22 @@ {% load i18n %} {% load static %} -{% block title %}{% trans "Page Views" %}{% endblock %} +{% block title %}{% trans "Traffic Analytics" %}{% endblock %} {% block nav-dashboard %} class="active"{% endblock %} -{% block project-page-views-active %}active{% endblock %} -{% block project_edit_content_header %}{% trans "Page Views" %}{% endblock %} +{% block project-traffic-analytics-active %}active{% endblock %} +{% block project_edit_content_header %}{% trans "Traffic Analytics" %}{% endblock %} {% block project_edit_content %} -

    {% trans "Top Viewed Pages" %}

    -
    +

    {% trans "Top viewed pages of the past month" %}

    +
      - {% for page, count in top_viewed_pages_iter %} + {% for page, count in top_viewed_pages %}
    • {{ page }} - - {{ count }} {{ view|pluralize:"s" }} - + {{ count }}
    • {% empty %}
    • @@ -33,17 +31,10 @@

      {% trans "Top Viewed Pages" %}

    -
    +
    -

    {% trans "Page Views Per Page" %}

    -
    - -
    - +

    {% trans "Overview of the past month" %}

    + {% endblock %} @@ -59,6 +50,8 @@

    {% trans "Page Views Per Page" %}

    {% block footerjs %} var line_chart = document.getElementById("page-views-per-page").getContext("2d"); + + {# Using |safe here is ok since this is just integers and formatted dates #} var line_chart_labels = {{ page_data.labels|safe }}; var line_chart_data = {{ page_data.int_data|safe }}; @@ -67,7 +60,7 @@

    {% trans "Page Views Per Page" %}

    data: { labels: line_chart_labels, datasets: [{ - label: "# of views for {{ page_path }}", + label: "# of views", data: line_chart_data, fill: false, borderColor: "rgba(75, 192, 192, 1)", diff --git a/readthedocs/templates/projects/projects_search_analytics.html b/readthedocs/templates/projects/projects_search_analytics.html index 93fa412f4ad..f713e4ebf1c 100644 --- a/readthedocs/templates/projects/projects_search_analytics.html +++ b/readthedocs/templates/projects/projects_search_analytics.html @@ -36,7 +36,7 @@

    {% trans "Top queries" %}


    {% if query_count_of_1_month.labels and query_count_of_1_month.int_data %} -

    {% trans "Overview of the past 1 month:" %}

    +

    {% trans "Overview of the past month" %}

    {% endif %} From 92bd4a371652c8b0a449f8e26a129fe44ec8ee80 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Mon, 11 May 2020 20:37:55 -0500 Subject: [PATCH 23/72] Search: Index more content from sphinx Closes https://github.com/readthedocs/readthedocs.org/issues/7059 --- readthedocs/search/parse_json.py | 99 ++++++++++++++++---------------- 1 file changed, 49 insertions(+), 50 deletions(-) diff --git a/readthedocs/search/parse_json.py b/readthedocs/search/parse_json.py index 715bd81c866..72eb37e8a09 100644 --- a/readthedocs/search/parse_json.py +++ b/readthedocs/search/parse_json.py @@ -13,7 +13,7 @@ log = logging.getLogger(__name__) -def generate_page_sections(body, fjson_storage_path): +def generate_page_sections(page_title, body, fjson_storage_path): """Generate section dicts for each section.""" # Removing all
    tags to prevent duplicate indexing with Sphinx Domains. @@ -38,54 +38,49 @@ def generate_page_sections(body, fjson_storage_path): for node in nodes_to_be_removed: node.decompose() - # Capture text inside h1 before the first h2 - h1_section = body.css('.section > h1') - if h1_section: - h1_section = h1_section[0] - div = h1_section.parent - h1_title = h1_section.text().replace('¶', '').strip() - h1_id = div.attributes.get('id', '') - h1_content = '' - next_p = body.css_first('h1').next - while next_p: - if next_p.tag == 'div' and 'class' in next_p.attributes: - if 'section' in next_p.attributes['class']: - break - - text = parse_content(next_p.text(), remove_first_line=False) - - if h1_content: - if text: - h1_content = f'{h1_content} {text}' - else: - h1_content = text - - next_p = next_p.next - - if h1_content: - yield { - 'id': h1_id, - 'title': h1_title, - 'content': h1_content, - } + # Index content for pages that don't start with a title. + content = _get_content_from_tag(body.body.child) + if content: + yield { + 'id': '', + 'title': page_title, + 'content': content, + } - # Capture text inside h2's - section_list = body.css('.section > h2') - for tag in section_list: - div = tag.parent - title = tag.text().replace('¶', '').strip() - section_id = div.attributes.get('id', '') + # sub-sections are nested, so they are children of the outer section. + # sections with the same level are neighbors. + for head_level in range(1, 7): + tags = body.css(f'.section > h{head_level}') + for tag in tags: + title = tag.text().replace('¶', '').strip() - content = div.text() - content = parse_content(content, remove_first_line=True) + div = tag.parent + section_id = div.attributes.get('id', '') - if content: yield { 'id': section_id, 'title': title, - 'content': content, + 'content': _get_content_from_tag(tag.next), } +def _get_content_from_tag(tag): + contents = [] + next_tag = tag + while next_tag and not _is_section(next_tag): + content = parse_content(next_tag.text()) + if content: + contents.append(content) + next_tag = next_tag.next + return ' '.join(contents) + + +def _is_section(tag): + """Check if the `tag` is a sphinx section (linkeable header).""" + return ( + tag.tag == 'div' and + 'section' in tag.attributes.get('class', []) + ) + def process_file(fjson_storage_path): """Read the fjson file from disk and parse it into a structured dict.""" @@ -111,10 +106,20 @@ def process_file(fjson_storage_path): else: log.info('Unable to index file due to no name %s', fjson_storage_path) + if 'title' in data: + title = data['title'] + title = HTMLParser(title).text().replace('¶', '').strip() + else: + log.info('Unable to index title for: %s', fjson_storage_path) + if data.get('body'): body = HTMLParser(data['body']) body_copy = HTMLParser(data['body']) - sections = generate_page_sections(body, fjson_storage_path) + sections = generate_page_sections( + page_title=title, + body=body, + fjson_storage_path=fjson_storage_path, + ) # pass a copy of `body` so that the removed # nodes in the original don't reflect here. @@ -122,12 +127,6 @@ def process_file(fjson_storage_path): else: log.info('Unable to index content for: %s', fjson_storage_path) - if 'title' in data: - title = data['title'] - title = HTMLParser(title).text().replace('¶', '').strip() - else: - log.info('Unable to index title for: %s', fjson_storage_path) - return { 'path': path, 'title': title, @@ -195,8 +194,8 @@ def parse_content(content, remove_first_line=False): if remove_first_line and len(content) > 1: content = content[1:] - # converting newlines to ". " - content = ' '.join(text.strip() for text in content if text) + content = map(lambda x: x.strip(), content) + content = ' '.join(text for text in content if text) return content From 1c859c234677c4ca3796ae39ea011835cc67d9f1 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 12 May 2020 12:02:39 -0500 Subject: [PATCH 24/72] Add tests --- readthedocs/search/parse_json.py | 27 ++++++--- .../search/tests/data/sphinx/in/no-title.html | 7 +++ .../search/tests/data/sphinx/in/no-title.json | 59 +++++++++++++++++++ .../search/tests/data/sphinx/in/page.html | 22 +++++++ .../search/tests/data/sphinx/in/page.json | 59 +++++++++++++++++++ .../tests/data/sphinx/out/no-title.json | 12 ++++ .../search/tests/data/sphinx/out/page.json | 31 ++++++++++ readthedocs/search/tests/test_parse_json.py | 56 +++++++++++++++++- 8 files changed, 264 insertions(+), 9 deletions(-) create mode 100644 readthedocs/search/tests/data/sphinx/in/no-title.html create mode 100644 readthedocs/search/tests/data/sphinx/in/no-title.json create mode 100644 readthedocs/search/tests/data/sphinx/in/page.html create mode 100644 readthedocs/search/tests/data/sphinx/in/page.json create mode 100644 readthedocs/search/tests/data/sphinx/out/no-title.json create mode 100644 readthedocs/search/tests/data/sphinx/out/page.json diff --git a/readthedocs/search/parse_json.py b/readthedocs/search/parse_json.py index 72eb37e8a09..3c356785da6 100644 --- a/readthedocs/search/parse_json.py +++ b/readthedocs/search/parse_json.py @@ -2,19 +2,29 @@ import logging from urllib.parse import urlparse -import orjson as json +import orjson as json from django.conf import settings from django.core.files.storage import get_storage_class - from selectolax.parser import HTMLParser - log = logging.getLogger(__name__) def generate_page_sections(page_title, body, fjson_storage_path): - """Generate section dicts for each section.""" + """ + Generate section dicts for each section for sphinx. + + In Sphinx sub-sections are nested, so they are children of the outer section, + and sections with the same level are neighbors. + We index the content under a section till before the next one. + + We can have pages that have content before the first title or that don't have a title, + we index that content first under the title of the original page (`page_title`). + + Contents that are likely to be a sphinx domain are deleted, + since we already index those in another step. + """ # Removing all
    tags to prevent duplicate indexing with Sphinx Domains. nodes_to_be_removed = [] @@ -47,8 +57,7 @@ def generate_page_sections(page_title, body, fjson_storage_path): 'content': content, } - # sub-sections are nested, so they are children of the outer section. - # sections with the same level are neighbors. + # Index content from h1 to h6 headers. for head_level in range(1, 7): tags = body.css(f'.section > h{head_level}') for tag in tags: @@ -63,7 +72,9 @@ def generate_page_sections(page_title, body, fjson_storage_path): 'content': _get_content_from_tag(tag.next), } + def _get_content_from_tag(tag): + """Gets the content from tag till before a new section.""" contents = [] next_tag = tag while next_tag and not _is_section(next_tag): @@ -75,7 +86,7 @@ def _get_content_from_tag(tag): def _is_section(tag): - """Check if the `tag` is a sphinx section (linkeable header).""" + """Check if `tag` is a sphinx section (linkeable header).""" return ( tag.tag == 'div' and 'section' in tag.attributes.get('class', []) @@ -130,7 +141,7 @@ def process_file(fjson_storage_path): return { 'path': path, 'title': title, - 'sections': tuple(sections), + 'sections': list(sections), 'domain_data': domain_data, } diff --git a/readthedocs/search/tests/data/sphinx/in/no-title.html b/readthedocs/search/tests/data/sphinx/in/no-title.html new file mode 100644 index 00000000000..5af77ac2e7f --- /dev/null +++ b/readthedocs/search/tests/data/sphinx/in/no-title.html @@ -0,0 +1,7 @@ +

    A page without a title.

    +

    Only content.

    +
      +
    • One
    • +
    • Two
    • +
    • Three
    • +
    diff --git a/readthedocs/search/tests/data/sphinx/in/no-title.json b/readthedocs/search/tests/data/sphinx/in/no-title.json new file mode 100644 index 00000000000..ae78faddcef --- /dev/null +++ b/readthedocs/search/tests/data/sphinx/in/no-title.json @@ -0,0 +1,59 @@ +{ + "parents": [ + { + "link": "../", + "title": "Guides" + } + ], + "prev": { + "link": "../conda/", + "title": "Conda Support" + }, + "next": { + "link": "../feature-flags/", + "title": "Feature Flags" + }, + "title": "<no title>", + "meta": {}, + "body": "", + "metatags": "", + "rellinks": [ + [ + "genindex", + "General Index", + "I", + "index" + ], + [ + "http-routingtable", + "HTTP Routing Table", + "", + "routing table" + ], + [ + "guides/feature-flags", + "Feature Flags", + "N", + "next" + ], + [ + "guides/conda", + "Conda Support", + "P", + "previous" + ] + ], + "sourcename": "guides/environment-variables.rst.txt", + "toc": "
      \n
    \n", + "display_toc": false, + "page_source_suffix": ".rst", + "current_page_name": "guides/environment-variables", + "sidebars": [ + "localtoc.html", + "relations.html", + "sourcelink.html", + "searchbox.html" + ], + "customsidebar": null, + "alabaster_version": "0.7.12" +} diff --git a/readthedocs/search/tests/data/sphinx/in/page.html b/readthedocs/search/tests/data/sphinx/in/page.html new file mode 100644 index 00000000000..dcdcbac030e --- /dev/null +++ b/readthedocs/search/tests/data/sphinx/in/page.html @@ -0,0 +1,22 @@ +

    Content at the beginning.

    + +
    +

    I Need Secrets (or Environment Variables) in my Build

    +

    It may happen that your documentation depends on an authenticated service to be built properly.

    +
    + +
    +

    Title One

    +

    This is another H1 title.

    + +
    +

    Sub-title one

    +

    Sub title

    + +
    +

    Subsub title

    +

    This is a H3 title.

    +
    +
    +
    diff --git a/readthedocs/search/tests/data/sphinx/in/page.json b/readthedocs/search/tests/data/sphinx/in/page.json new file mode 100644 index 00000000000..464200077c7 --- /dev/null +++ b/readthedocs/search/tests/data/sphinx/in/page.json @@ -0,0 +1,59 @@ +{ + "parents": [ + { + "link": "../", + "title": "Guides" + } + ], + "prev": { + "link": "../conda/", + "title": "Conda Support" + }, + "next": { + "link": "../feature-flags/", + "title": "Feature Flags" + }, + "title": "I Need Secrets (or Environment Variables) in my Build", + "meta": {}, + "body": "", + "metatags": "", + "rellinks": [ + [ + "genindex", + "General Index", + "I", + "index" + ], + [ + "http-routingtable", + "HTTP Routing Table", + "", + "routing table" + ], + [ + "guides/feature-flags", + "Feature Flags", + "N", + "next" + ], + [ + "guides/conda", + "Conda Support", + "P", + "previous" + ] + ], + "sourcename": "guides/environment-variables.rst.txt", + "toc": "\n", + "display_toc": true, + "page_source_suffix": ".rst", + "current_page_name": "guides/environment-variables", + "sidebars": [ + "localtoc.html", + "relations.html", + "sourcelink.html", + "searchbox.html" + ], + "customsidebar": null, + "alabaster_version": "0.7.12" +} diff --git a/readthedocs/search/tests/data/sphinx/out/no-title.json b/readthedocs/search/tests/data/sphinx/out/no-title.json new file mode 100644 index 00000000000..5ad8077ab2e --- /dev/null +++ b/readthedocs/search/tests/data/sphinx/out/no-title.json @@ -0,0 +1,12 @@ +{ + "title": "", + "path": "guides/environment-variables", + "sections": [ + { + "id": "", + "title": "", + "content": "A page without a title. Only content. One Two Three" + } + ], + "domain_data": {} +} diff --git a/readthedocs/search/tests/data/sphinx/out/page.json b/readthedocs/search/tests/data/sphinx/out/page.json new file mode 100644 index 00000000000..1996fb2d976 --- /dev/null +++ b/readthedocs/search/tests/data/sphinx/out/page.json @@ -0,0 +1,31 @@ +{ + "title": "I Need Secrets (or Environment Variables) in my Build", + "path": "guides/environment-variables", + "sections": [ + { + "id": "", + "title": "I Need Secrets (or Environment Variables) in my Build", + "content": "Content at the beginning." + }, + { + "id": "i-need-secrets-or-environment-variables-in-my-build", + "title": "I Need Secrets (or Environment Variables) in my Build", + "content": "It may happen that your documentation depends on an authenticated service to be built properly." + }, + { + "content": "This is another H1 title.", + "id": "title-one", + "title": "Title One" + }, + { + "content": "Sub title", + "id": "sub-title-one", + "title": "Sub-title one"}, + { + "content": "This is a H3 title.", + "id": "subsub-title", + "title": "Subsub title" + } + ], + "domain_data": {} +} diff --git a/readthedocs/search/tests/test_parse_json.py b/readthedocs/search/tests/test_parse_json.py index 3d6b3860041..e5ab1927da3 100644 --- a/readthedocs/search/tests/test_parse_json.py +++ b/readthedocs/search/tests/test_parse_json.py @@ -7,7 +7,7 @@ from django_dynamic_fixture import get from readthedocs.builds.storage import BuildMediaFileSystemStorage -from readthedocs.projects.constants import MKDOCS +from readthedocs.projects.constants import MKDOCS, SPHINX from readthedocs.projects.models import HTMLFile, Project data_path = Path(__file__).parent.resolve() / 'data' @@ -103,3 +103,57 @@ def test_mkdocs_old_version(self, storage_open, storage_exists): ] expected_json = json.load(open(data_path / 'mkdocs/out/search_index_old.json')) assert parsed_json == expected_json + + @mock.patch.object(BuildMediaFileSystemStorage, 'exists') + @mock.patch.object(BuildMediaFileSystemStorage, 'open') + def test_sphinx(self, storage_open, storage_exists): + json_file = data_path / 'sphinx/in/page.json' + html_content = data_path / 'sphinx/in/page.html' + + json_content = json.load(json_file.open()) + json_content['body'] = html_content.open().read() + storage_open.side_effect = self._mock_open( + json.dumps(json_content) + ) + storage_exists.return_value = True + + self.version.documentation_type = SPHINX + self.version.save() + + page_file = get( + HTMLFile, + project=self.project, + version=self.version, + path='page.html', + ) + + parsed_json = page_file.processed_json + expected_json = json.load(open(data_path / 'sphinx/out/page.json')) + assert parsed_json == expected_json + + @mock.patch.object(BuildMediaFileSystemStorage, 'exists') + @mock.patch.object(BuildMediaFileSystemStorage, 'open') + def test_sphinx_page_without_title(self, storage_open, storage_exists): + json_file = data_path / 'sphinx/in/no-title.json' + html_content = data_path / 'sphinx/in/no-title.html' + + json_content = json.load(json_file.open()) + json_content['body'] = html_content.open().read() + storage_open.side_effect = self._mock_open( + json.dumps(json_content) + ) + storage_exists.return_value = True + + self.version.documentation_type = SPHINX + self.version.save() + + page_file = get( + HTMLFile, + project=self.project, + version=self.version, + path='no-title.html', + ) + + parsed_json = page_file.processed_json + expected_json = json.load(open(data_path / 'sphinx/out/no-title.json')) + assert parsed_json == expected_json From 2066a46b6a9efaf5cc9816b5614ebc0880f7c54c Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Tue, 12 May 2020 13:28:16 -0700 Subject: [PATCH 25/72] Add a page about choosing between .com/.org MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is an initial attempt, but I think it’s important to document this. We get a lot of questions so it seems obvious we need to explain it better. --- docs/choosing-a-site.rst | 43 ++++++++++++++++++++++++++++++++++++++++ docs/index.rst | 1 + 2 files changed, 44 insertions(+) create mode 100644 docs/choosing-a-site.rst diff --git a/docs/choosing-a-site.rst b/docs/choosing-a-site.rst new file mode 100644 index 00000000000..0f02918845e --- /dev/null +++ b/docs/choosing-a-site.rst @@ -0,0 +1,43 @@ +Choosing between our two platforms +================================== + +A question our users often have is what the difference is between |org_brand| and |com_brand|. +This page will lay out the functional and philosophical differences between the two sites, +which should help you choose which is a better fit for your organization. + +The features available on both platforms are the same. +The primary difference is the audience and use cases that are supported. + +|org_brand| +-------------------- + +|org_brand| is meant for open source projects to use for documentation hosting. +This is great for user and developer documentation for your project. + +Important points: + +* All documentation sites have advertising +* Only supports public VCS repositories +* All documentation is publicly accessible to the world +* Less build time and fewer build resources (memory & CPU) + +|com_brand| +------------------- + +|com_brand| is meant for companies and users who have private documentation. +It works well for product documentation as well as internal docs for your developers. + +Important points: + +* No advertising +* Allows importing *private*and public repositories from VCS. +* Supports private versions that only your organization or people you give access to can see. +* More build time and more build resources (memory & CPU) + + +Questions? +---------- + +If you have a question about which platform would be best, +you can email us at support@readthedocs.org. + diff --git a/docs/index.rst b/docs/index.rst index 7b0bd26f8f8..8f107ee6a89 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -45,6 +45,7 @@ to help you create fantastic documentation for your project. :doc:`With Sphinx ` | :doc:`With MkDocs ` | :doc:`Feature Overview ` + :doc:`Choosing between our two sites ` * **Importing your existing documentation**: :doc:`Import guide ` From 76b5361c64b542cc243f68ca046521b753bf5b33 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Tue, 12 May 2020 13:35:08 -0700 Subject: [PATCH 26/72] Missed a spot --- docs/choosing-a-site.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/choosing-a-site.rst b/docs/choosing-a-site.rst index 0f02918845e..4b11654b1a1 100644 --- a/docs/choosing-a-site.rst +++ b/docs/choosing-a-site.rst @@ -1,5 +1,5 @@ -Choosing between our two platforms -================================== +Choosing between our two sites +============================== A question our users often have is what the difference is between |org_brand| and |com_brand|. This page will lay out the functional and philosophical differences between the two sites, From b533db07d14bce546147989aa3619057a58ab771 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Tue, 12 May 2020 13:47:51 -0700 Subject: [PATCH 27/72] Review fixes --- docs/choosing-a-site.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/choosing-a-site.rst b/docs/choosing-a-site.rst index 4b11654b1a1..e79749ce546 100644 --- a/docs/choosing-a-site.rst +++ b/docs/choosing-a-site.rst @@ -1,4 +1,4 @@ -Choosing between our two sites +Choosing Between Our Two Sites ============================== A question our users often have is what the difference is between |org_brand| and |com_brand|. @@ -30,7 +30,7 @@ It works well for product documentation as well as internal docs for your develo Important points: * No advertising -* Allows importing *private*and public repositories from VCS. +* Allows importing private and public repositories from VCS. * Supports private versions that only your organization or people you give access to can see. * More build time and more build resources (memory & CPU) From 116b72ce373ae2db4dd52abf83d7082a6f361b88 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Tue, 12 May 2020 13:52:41 -0700 Subject: [PATCH 28/72] Mention orgs --- docs/choosing-a-site.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/choosing-a-site.rst b/docs/choosing-a-site.rst index e79749ce546..5376e27939b 100644 --- a/docs/choosing-a-site.rst +++ b/docs/choosing-a-site.rst @@ -20,6 +20,7 @@ Important points: * Only supports public VCS repositories * All documentation is publicly accessible to the world * Less build time and fewer build resources (memory & CPU) +* Documentation is organized by projects |com_brand| ------------------- @@ -30,9 +31,10 @@ It works well for product documentation as well as internal docs for your develo Important points: * No advertising -* Allows importing private and public repositories from VCS. -* Supports private versions that only your organization or people you give access to can see. +* Allows importing private and public repositories from VCS +* Supports private versions that only your organization or people you give access to can see * More build time and more build resources (memory & CPU) +* Documentation is organized by organization, giving more control over permissions Questions? From e129b330d453414056385466b063c44f8c917078 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Tue, 12 May 2020 14:19:01 -0700 Subject: [PATCH 29/72] Fix PR builds being marked built This was removed in https://github.com/readthedocs/readthedocs.org/pull/7044, and it's needed to properly link to the PR builds and update their status. --- readthedocs/projects/tasks.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index 48d9e94d8ab..06ccde4300f 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -787,14 +787,13 @@ def run_build(self, record): # We upload EXTERNAL version media files to blob storage # We should have this check here to make sure # the files don't get re-uploaded on web. - if self.version.type != EXTERNAL: - self.update_app_instances( - html=bool(outcomes['html']), - search=bool(outcomes['search']), - localmedia=bool(outcomes['localmedia']), - pdf=bool(outcomes['pdf']), - epub=bool(outcomes['epub']), - ) + self.update_app_instances( + html=bool(outcomes['html']), + search=bool(outcomes['search']), + localmedia=bool(outcomes['localmedia']), + pdf=bool(outcomes['pdf']), + epub=bool(outcomes['epub']), + ) else: log.warning('No build ID, not syncing files') @@ -1114,9 +1113,6 @@ def update_app_instances( 'Updating version failed, skipping file sync: version=%s', self.version, ) - hostname = socket.gethostname() - - delete_unsynced_media = True # Broadcast finalization steps to web application instances fileify.delay( @@ -1272,7 +1268,7 @@ def fileify(version_pk, commit, build): This is so we have an idea of what files we have in the database. """ version = Version.objects.get_object_or_log(pk=version_pk) - if not version: + if not version or version.type == EXTERNAL: return project = version.project From 90ac6731442570fa1a8bc7514114e57e25e95203 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Tue, 12 May 2020 14:27:19 -0700 Subject: [PATCH 30/72] Fix comments --- readthedocs/projects/tasks.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index 06ccde4300f..79df64cfb6d 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -783,10 +783,7 @@ def run_build(self, record): epub=bool(outcomes['epub']), ) - # Finalize build and update web servers - # We upload EXTERNAL version media files to blob storage - # We should have this check here to make sure - # the files don't get re-uploaded on web. + # TODO: Remove this function and just update the DB and index search directly self.update_app_instances( html=bool(outcomes['html']), search=bool(outcomes['search']), @@ -1268,6 +1265,7 @@ def fileify(version_pk, commit, build): This is so we have an idea of what files we have in the database. """ version = Version.objects.get_object_or_log(pk=version_pk) + # Don't index external version builds for now if not version or version.type == EXTERNAL: return project = version.project From d663294511751782fd63306998540848ce21d31f Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Tue, 12 May 2020 14:53:05 -0700 Subject: [PATCH 31/72] Add to toc --- docs/index.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 8f107ee6a89..379cbf895f9 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -56,11 +56,12 @@ to help you create fantastic documentation for your project. :hidden: :caption: First steps - intro/getting-started-with-sphinx - intro/getting-started-with-mkdocs + /intro/getting-started-with-sphinx + /intro/getting-started-with-mkdocs - intro/import-guide - features + /intro/import-guide + /features + /choosing-a-site Getting started with Read the Docs From 82921ac0b727f25d41da28f592347ee60ed3f4f9 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Tue, 12 May 2020 14:53:41 -0700 Subject: [PATCH 32/72] Use title directly --- docs/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index 379cbf895f9..c8057acc7df 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -45,7 +45,7 @@ to help you create fantastic documentation for your project. :doc:`With Sphinx ` | :doc:`With MkDocs ` | :doc:`Feature Overview ` - :doc:`Choosing between our two sites ` + :doc:`/choosing-a-site` * **Importing your existing documentation**: :doc:`Import guide ` From 4c487755fb658f2361526f9db40959521518a770 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 12 May 2020 17:34:33 -0500 Subject: [PATCH 33/72] Search: generate full link from the server side We were returning an invalid link from the api, and the cliente was in charge of guessing the correct link. This also drops our builder as dependency for search :tada: Half fixex/ https://github.com/readthedocs/readthedocs.org/issues/5821 Fixes https://github.com/readthedocs/readthedocs.org/issues/6102 --- .../static-src/core/js/doc-embed/search.js | 9 +--- .../static/core/js/readthedocs-doc-embed.js | 2 +- readthedocs/search/api.py | 52 +++++++++++++------ 3 files changed, 39 insertions(+), 24 deletions(-) diff --git a/readthedocs/core/static-src/core/js/doc-embed/search.js b/readthedocs/core/static-src/core/js/doc-embed/search.js index e16cddf4671..d3b774ce749 100644 --- a/readthedocs/core/static-src/core/js/doc-embed/search.js +++ b/readthedocs/core/static-src/core/js/doc-embed/search.js @@ -74,14 +74,7 @@ function attach_elastic_search_query(data) { } } - // Creating the result from elements - var suffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX; - // Since sphinx 2.2.1 FILE_SUFFIX is .html for all builders, - // and there is a new BUILDER option. - if ('BUILDER' in DOCUMENTATION_OPTIONS && DOCUMENTATION_OPTIONS.BUILDER === 'readthedocsdirhtml') { - suffix = ''; - } - var link = doc.link + suffix + "?highlight=" + $.urlencode(query); + var link = doc.link + "?highlight=" + $.urlencode(query); var item = $('', {'href': link}); diff --git a/readthedocs/core/static/core/js/readthedocs-doc-embed.js b/readthedocs/core/static/core/js/readthedocs-doc-embed.js index 19496750196..97675bf4529 100644 --- a/readthedocs/core/static/core/js/readthedocs-doc-embed.js +++ b/readthedocs/core/static/core/js/readthedocs-doc-embed.js @@ -1 +1 @@ -!function o(s,a,l){function d(t,e){if(!a[t]){if(!s[t]){var i="function"==typeof require&&require;if(!e&&i)return i(t,!0);if(c)return c(t,!0);var n=new Error("Cannot find module '"+t+"'");throw n.code="MODULE_NOT_FOUND",n}var r=a[t]={exports:{}};s[t][0].call(r.exports,function(e){return d(s[t][1][e]||e)},r,r.exports,o,s,a,l)}return a[t].exports}for(var c="function"==typeof require&&require,e=0;e
    "),i("table.docutils.footnote").wrap("
    "),i("table.docutils.citation").wrap("
    "),i(".wy-menu-vertical ul").not(".simple").siblings("a").each(function(){var t=i(this);expand=i(''),expand.on("click",function(e){return n.toggleCurrent(t),e.stopPropagation(),!1}),t.prepend(expand)})},reset:function(){var e=encodeURI(window.location.hash)||"#";try{var t=$(".wy-menu-vertical"),i=t.find('[href="'+e+'"]');if(0===i.length){var n=$('.document [id="'+e.substring(1)+'"]').closest("div.section");0===(i=t.find('[href="#'+n.attr("id")+'"]')).length&&(i=t.find('[href="#"]'))}0this.docHeight||(this.navBar.scrollTop(i),this.winPosition=e)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",function(){this.linkScroll=!1})},toggleCurrent:function(e){var t=e.closest("li");t.siblings("li.current").removeClass("current"),t.siblings().find("li.current").removeClass("current"),t.find("> ul li.current").removeClass("current"),t.toggleClass("current")}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:t.exports.ThemeNav,StickyNav:t.exports.ThemeNav}),function(){for(var o=0,e=["ms","moz","webkit","o"],t=0;t/g,u=/"/g,h=/"/g,p=/&#([a-zA-Z0-9]*);?/gim,f=/:?/gim,g=/&newline;?/gim,m=/((j\s*a\s*v\s*a|v\s*b|l\s*i\s*v\s*e)\s*s\s*c\s*r\s*i\s*p\s*t\s*|m\s*o\s*c\s*h\s*a)\:/gi,v=/e\s*x\s*p\s*r\s*e\s*s\s*s\s*i\s*o\s*n\s*\(.*/gi,w=/u\s*r\s*l\s*\(.*/gi;function b(e){return e.replace(u,""")}function _(e){return e.replace(h,'"')}function y(e){return e.replace(p,function(e,t){return"x"===t[0]||"X"===t[0]?String.fromCharCode(parseInt(t.substr(1),16)):String.fromCharCode(parseInt(t,10))})}function x(e){return e.replace(f,":").replace(g," ")}function k(e){for(var t="",i=0,n=e.length;i/g;i.whiteList={a:["target","href","title"],abbr:["title"],address:[],area:["shape","coords","href","alt"],article:[],aside:[],audio:["autoplay","controls","loop","preload","src"],b:[],bdi:["dir"],bdo:["dir"],big:[],blockquote:["cite"],br:[],caption:[],center:[],cite:[],code:[],col:["align","valign","span","width"],colgroup:["align","valign","span","width"],dd:[],del:["datetime"],details:["open"],div:[],dl:[],dt:[],em:[],font:["color","size","face"],footer:[],h1:[],h2:[],h3:[],h4:[],h5:[],h6:[],header:[],hr:[],i:[],img:["src","alt","title","width","height"],ins:["datetime"],li:[],mark:[],nav:[],ol:[],p:[],pre:[],s:[],section:[],small:[],span:[],sub:[],sup:[],strong:[],table:["width","border","align","valign"],tbody:["align","valign"],td:["width","rowspan","colspan","align","valign"],tfoot:["align","valign"],th:["width","rowspan","colspan","align","valign"],thead:["align","valign"],tr:["rowspan","align","valign"],tt:[],u:[],ul:[],video:["autoplay","controls","loop","preload","src","height","width"]},i.getDefaultWhiteList=o,i.onTag=function(e,t,i){},i.onIgnoreTag=function(e,t,i){},i.onTagAttr=function(e,t,i){},i.onIgnoreTagAttr=function(e,t,i){},i.safeAttrValue=function(e,t,i,n){if(i=T(i),"href"===t||"src"===t){if("#"===(i=c.trim(i)))return"#";if("http://"!==i.substr(0,7)&&"https://"!==i.substr(0,8)&&"mailto:"!==i.substr(0,7)&&"tel:"!==i.substr(0,4)&&"#"!==i[0]&&"/"!==i[0])return""}else if("background"===t){if(m.lastIndex=0,m.test(i))return""}else if("style"===t){if(v.lastIndex=0,v.test(i))return"";if(w.lastIndex=0,w.test(i)&&(m.lastIndex=0,m.test(i)))return"";!1!==n&&(i=(n=n||s).process(i))}return i=E(i)},i.escapeHtml=a,i.escapeQuote=b,i.unescapeQuote=_,i.escapeHtmlEntities=y,i.escapeDangerHtml5Entities=x,i.clearNonPrintableCharacter=k,i.friendlyAttrValue=T,i.escapeAttrValue=E,i.onIgnoreTagStripAll=function(){return""},i.StripTagBody=function(o,s){"function"!=typeof s&&(s=function(){});var a=!Array.isArray(o),l=[],d=!1;return{onIgnoreTag:function(e,t,i){if(function(e){return a||-1!==c.indexOf(o,e)}(e)){if(i.isClosing){var n="[/removed]",r=i.position+n.length;return l.push([!1!==d?d:i.position,r]),d=!1,n}return d=d||i.position,"[removed]"}return s(e,t,i)},remove:function(t){var i="",n=0;return c.forEach(l,function(e){i+=t.slice(n,e[0]),n=e[1]}),i+=t.slice(n)}}},i.stripCommentTag=function(e){return e.replace(O,"")},i.stripBlankChar=function(e){var t=e.split("");return(t=t.filter(function(e){var t=e.charCodeAt(0);return 127!==t&&(!(t<=31)||(10===t||13===t))})).join("")},i.cssFilter=s,i.getDefaultCSSWhiteList=r},{"./util":5,cssfilter:10}],3:[function(e,t,i){var n=e("./default"),r=e("./parser"),o=e("./xss");for(var s in(i=t.exports=function(e,t){return new o(t).process(e)}).FilterXSS=o,n)i[s]=n[s];for(var s in r)i[s]=r[s];"undefined"!=typeof window&&(window.filterXSS=t.exports)},{"./default":2,"./parser":4,"./xss":6}],4:[function(e,t,i){var c=e("./util");function h(e){var t=c.spaceIndex(e);if(-1===t)var i=e.slice(1,-1);else i=e.slice(1,t+1);return"/"===(i=c.trim(i).toLowerCase()).slice(0,1)&&(i=i.slice(1)),"/"===i.slice(-1)&&(i=i.slice(0,-1)),i}var u=/[^a-zA-Z0-9_:\.\-]/gim;function p(e,t){for(;t"===u){n+=i(e.slice(r,o)),c=h(d=e.slice(o,a+1)),n+=t(o,n.length,c,d,"";var a=function(e){var t=b.spaceIndex(e);if(-1===t)return{html:"",closing:"/"===e[e.length-2]};var i="/"===(e=b.trim(e.slice(t+1,-1)))[e.length-1];return i&&(e=b.trim(e.slice(0,-1))),{html:e,closing:i}}(i),l=c[r],d=w(a.html,function(e,t){var i,n=-1!==b.indexOf(l,e);return _(i=p(r,e,t,n))?n?(t=g(r,e,t,v))?e+'="'+t+'"':e:_(i=f(r,e,t,n))?void 0:i:i});i="<"+r;return d&&(i+=" "+d),a.closing&&(i+=" /"),i+=">"}return _(o=h(r,i,s))?m(i):o},m);return i&&(n=i.remove(n)),n},t.exports=a},{"./default":2,"./parser":4,"./util":5,cssfilter:10}],7:[function(e,t,i){var n,r;n=this,r=function(){var T=!0;function s(i){function e(e){var t=i.match(e);return t&&1t[1][i])return 1;if(t[0][i]!==t[1][i])return-1;if(0===i)return 0}}function o(e,t,i){var n=a;"string"==typeof t&&(i=t,t=void 0),void 0===t&&(t=!1),i&&(n=s(i));var r=""+n.version;for(var o in e)if(e.hasOwnProperty(o)&&n[o]){if("string"!=typeof e[o])throw new Error("Browser version in the minVersion map should be a string: "+o+": "+String(e));return E([r,e[o]])<0}return t}return a.test=function(e){for(var t=0;t'),a=n.title;r&&r.title&&(a=R(r.title[0]));var l=DOCUMENTATION_OPTIONS.FILE_SUFFIX;"BUILDER"in DOCUMENTATION_OPTIONS&&"readthedocsdirhtml"===DOCUMENTATION_OPTIONS.BUILDER&&(l="");var d=n.link+l+"?highlight="+$.urlencode(A),c=$("
    ",{href:d});if(c.html(a),c.find("span").addClass("highlighted"),s.append(c),n.project!==S){var u=" (from project "+n.project+")",h=$("",{text:u});s.append(h)}for(var p=0;p'),g="",m="",v="",w="",b="",y="",x="",k="",T="",E="";if("sections"===o[p].type){if(m=(g=o[p])._source.title,v=d+"#"+g._source.id,w=[g._source.content.substr(0,I)+" ..."],g.highlight&&(g.highlight["sections.title"]&&(m=R(g.highlight["sections.title"][0])),g.highlight["sections.content"])){b=g.highlight["sections.content"],w=[];for(var O=0;O<%= section_subtitle %><% for (var i = 0; i < section_content.length; ++i) { %>
    <%= section_content[i] %>
    <% } %>',{section_subtitle_link:v,section_subtitle:m,section_content:w})}"domains"===o[p].type&&(x=(y=o[p])._source.role_name,k=d+"#"+y._source.anchor,T=y._source.name,(E="")!==y._source.docstrings&&(E=y._source.docstrings.substr(0,I)+" ..."),y.highlight&&(y.highlight["domains.docstrings"]&&(E="... "+R(y.highlight["domains.docstrings"][0])+" ..."),y.highlight["domains.name"]&&(T=R(y.highlight["domains.name"][0]))),M(f,'
    <%= domain_content %>
    ',{domain_subtitle_link:k,domain_subtitle:"["+x+"]: "+T,domain_content:E})),f.find("span").addClass("highlighted"),s.append(f),p!==o.length-1&&s.append($("
    "))}Search.output.append(s),s.slideDown(5)}t.length?Search.status.text(_("Search finished, found %s page(s) matching the search query.").replace("%s",t.length)):(Search.query_fallback(A),console.log("Read the Docs search failed. Falling back to Sphinx search."))}).fail(function(e){Search.query_fallback(A)}).always(function(){$("#search-progress").empty(),Search.stopPulse(),Search.title.text(_("Search Results")),Search.status.fadeIn(500)}),$.ajax({url:e.href,crossDomain:!0,xhrFields:{withCredentials:!0},complete:function(e,t){return"success"!==t||void 0===e.responseJSON||0===e.responseJSON.count?n.reject():n.resolve(e.responseJSON)}}).fail(function(e,t,i){return n.reject()})}}$(document).ready(function(){"undefined"!=typeof Search&&Search.init()})}(n.get())}}},{"./../../../../../../bower_components/xss/lib/index":3,"./rtd-data":16}],18:[function(r,e,t){var o=r("./rtd-data");e.exports={init:function(){var e=o.get();if($(document).on("click","[data-toggle='rst-current-version']",function(){var e=$("[data-toggle='rst-versions']").hasClass("shift-up")?"was_open":"was_closed";"undefined"!=typeof ga?ga("rtfd.send","event","Flyout","Click",e):"undefined"!=typeof _gaq&&_gaq.push(["rtfd._setAccount","UA-17997319-1"],["rtfd._trackEvent","Flyout","Click",e])}),void 0===window.SphinxRtdTheme){var t=r("./../../../../../../bower_components/sphinx-rtd-theme/js/theme.js").ThemeNav;if($(document).ready(function(){setTimeout(function(){t.navBar||t.enable()},1e3)}),e.is_rtd_like_theme())if(!$("div.wy-side-scroll:first").length){console.log("Applying theme sidebar fix...");var i=$("nav.wy-nav-side:first"),n=$("
    ").addClass("wy-side-scroll");i.children().detach().appendTo(n),n.prependTo(i),t.navBar=n}}}}},{"./../../../../../../bower_components/sphinx-rtd-theme/js/theme.js":1,"./rtd-data":16}],19:[function(e,t,i){var l,d=e("./constants"),c=e("./rtd-data"),n=e("bowser"),u="#ethical-ad-placement";function h(){var e,t,i="rtd-"+(Math.random()+1).toString(36).substring(4),n=d.PROMO_TYPES.LEFTNAV,r=d.DEFAULT_PROMO_PRIORITY,o=null;return l.is_mkdocs_builder()&&l.is_rtd_like_theme()?(o="nav.wy-nav-side",e="ethical-rtd ethical-dark-theme"):l.is_rtd_like_theme()?(o="nav.wy-nav-side > div.wy-side-scroll",e="ethical-rtd ethical-dark-theme"):l.is_alabaster_like_theme()&&(o="div.sphinxsidebar > div.sphinxsidebarwrapper",e="ethical-alabaster"),o?($("
    ").attr("id",i).addClass(e).appendTo(o),(!(t=$("#"+i).offset())||t.top>$(window).height())&&(r=d.LOW_PROMO_PRIORITY),{div_id:i,display_type:n,priority:r}):null}function p(){var e,t,i="rtd-"+(Math.random()+1).toString(36).substring(4),n=d.PROMO_TYPES.FOOTER,r=d.DEFAULT_PROMO_PRIORITY,o=null;return l.is_rtd_like_theme()?(o=$("
    ").insertAfter("footer hr"),e="ethical-rtd"):l.is_alabaster_like_theme()&&(o="div.bodywrapper .body",e="ethical-alabaster"),o?($("
    ").attr("id",i).addClass(e).appendTo(o),(!(t=$("#"+i).offset())||t.top<$(window).height())&&(r=d.LOW_PROMO_PRIORITY),{div_id:i,display_type:n,priority:r}):null}function f(){var e="rtd-"+(Math.random()+1).toString(36).substring(4),t=d.PROMO_TYPES.FIXED_FOOTER,i=d.DEFAULT_PROMO_PRIORITY;return n&&n.mobile&&(i=d.MAXIMUM_PROMO_PRIORITY),$("
    ").attr("id",e).appendTo("body"),{div_id:e,display_type:t,priority:i}}function g(e){this.id=e.id,this.div_id=e.div_id||"",this.html=e.html||"",this.display_type=e.display_type||"",this.view_tracking_url=e.view_url,this.click_handler=function(){"undefined"!=typeof ga?ga("rtfd.send","event","Promo","Click",e.id):"undefined"!=typeof _gaq&&_gaq.push(["rtfd._setAccount","UA-17997319-1"],["rtfd._trackEvent","Promo","Click",e.id])}}g.prototype.display=function(){var e="#"+this.div_id,t=this.view_tracking_url;$(e).html(this.html),$(e).find('a[href*="/sustainability/click/"]').on("click",this.click_handler);function i(){$.inViewport($(e),-3)&&($("").attr("src",t).css("display","none").appendTo(e),$(window).off(".rtdinview"),$(".wy-side-scroll").off(".rtdinview"))}$(window).on("DOMContentLoaded.rtdinview load.rtdinview scroll.rtdinview resize.rtdinview",i),$(".wy-side-scroll").on("scroll.rtdinview",i),$(".ethical-close").on("click",function(){return $(e).hide(),!1}),this.post_promo_display()},g.prototype.disable=function(){$("#"+this.div_id).hide()},g.prototype.post_promo_display=function(){this.display_type===d.PROMO_TYPES.FOOTER&&($("
    ").insertAfter("#"+this.div_id),$("
    ").insertBefore("#"+this.div_id+".ethical-alabaster .ethical-footer"))},t.exports={Promo:g,init:function(){var e,t,i={format:"jsonp"},n=[],r=[],o=[],s=[p,h,f];if(l=c.get(),t=function(){var e,t="rtd-"+(Math.random()+1).toString(36).substring(4),i=d.PROMO_TYPES.LEFTNAV;return e=l.is_rtd_like_theme()?"ethical-rtd ethical-dark-theme":"ethical-alabaster",0<$(u).length?($("
    ").attr("id",t).addClass(e).appendTo(u),{div_id:t,display_type:i}):null}())n.push(t.div_id),r.push(t.display_type),o.push(t.priority||d.DEFAULT_PROMO_PRIORITY),!0;else{if(!l.show_promo())return;for(var a=0;a").attr("id","rtd-detection").attr("class","ethical-rtd").html(" ").appendTo("body"),0===$("#rtd-detection").height()&&(e=!0),$("#rtd-detection").remove(),e}()&&(console.log("---------------------------------------------------------------------------------------"),console.log("Read the Docs hosts documentation for tens of thousands of open source projects."),console.log("We fund our development (we are open source) and operations through advertising."),console.log("We promise to:"),console.log(" - never let advertisers run 3rd party JavaScript"),console.log(" - never sell user data to advertisers or other 3rd parties"),console.log(" - only show advertisements of interest to developers"),console.log("Read more about our approach to advertising here: https://docs.readthedocs.io/en/latest/advertising/ethical-advertising.html"),console.log("%cPlease allow our Ethical Ads or go ad-free:","font-size: 2em"),console.log("https://docs.readthedocs.io/en/latest/advertising/ad-blocking.html"),console.log("--------------------------------------------------------------------------------------"),function(){var e=h(),t=null;e&&e.div_id&&(t=$("#"+e.div_id).attr("class","keep-us-sustainable"),$("

    ").text("Support Read the Docs!").appendTo(t),$("

    ").html('Please help keep us sustainable by allowing our Ethical Ads in your ad blocker or go ad-free by subscribing.').appendTo(t),$("

    ").text("Thank you! ❤️").appendTo(t))}())}})}}},{"./constants":14,"./rtd-data":16,bowser:7}],20:[function(e,t,i){var o=e("./rtd-data");t.exports={init:function(e){var t=o.get();if(!e.is_highest){var i=window.location.pathname.replace(t.version,e.slug),n=$('

    Note

    You are not reading the most recent version of this documentation. is the latest version available.

    ');n.find("a").attr("href",i).text(e.slug);var r=$("div.body");r.length||(r=$("div.document")),r.prepend(n)}}}},{"./rtd-data":16}],21:[function(e,t,i){var n=e("./doc-embed/sponsorship"),r=e("./doc-embed/footer.js"),o=(e("./doc-embed/rtd-data"),e("./doc-embed/sphinx")),s=e("./doc-embed/search");$.extend(e("verge")),$(document).ready(function(){r.init(),o.init(),s.init(),n.init()})},{"./doc-embed/footer.js":15,"./doc-embed/rtd-data":16,"./doc-embed/search":17,"./doc-embed/sphinx":18,"./doc-embed/sponsorship":19,verge:13}]},{},[21]); \ No newline at end of file +!function o(s,a,l){function d(t,e){if(!a[t]){if(!s[t]){var i="function"==typeof require&&require;if(!e&&i)return i(t,!0);if(c)return c(t,!0);var n=new Error("Cannot find module '"+t+"'");throw n.code="MODULE_NOT_FOUND",n}var r=a[t]={exports:{}};s[t][0].call(r.exports,function(e){return d(s[t][1][e]||e)},r,r.exports,o,s,a,l)}return a[t].exports}for(var c="function"==typeof require&&require,e=0;e
    "),i("table.docutils.footnote").wrap("
    "),i("table.docutils.citation").wrap("
    "),i(".wy-menu-vertical ul").not(".simple").siblings("a").each(function(){var t=i(this);expand=i(''),expand.on("click",function(e){return n.toggleCurrent(t),e.stopPropagation(),!1}),t.prepend(expand)})},reset:function(){var e=encodeURI(window.location.hash)||"#";try{var t=$(".wy-menu-vertical"),i=t.find('[href="'+e+'"]');if(0===i.length){var n=$('.document [id="'+e.substring(1)+'"]').closest("div.section");0===(i=t.find('[href="#'+n.attr("id")+'"]')).length&&(i=t.find('[href="#"]'))}0this.docHeight||(this.navBar.scrollTop(i),this.winPosition=e)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",function(){this.linkScroll=!1})},toggleCurrent:function(e){var t=e.closest("li");t.siblings("li.current").removeClass("current"),t.siblings().find("li.current").removeClass("current"),t.find("> ul li.current").removeClass("current"),t.toggleClass("current")}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:t.exports.ThemeNav,StickyNav:t.exports.ThemeNav}),function(){for(var o=0,e=["ms","moz","webkit","o"],t=0;t/g,u=/"/g,h=/"/g,p=/&#([a-zA-Z0-9]*);?/gim,f=/:?/gim,g=/&newline;?/gim,m=/((j\s*a\s*v\s*a|v\s*b|l\s*i\s*v\s*e)\s*s\s*c\s*r\s*i\s*p\s*t\s*|m\s*o\s*c\s*h\s*a)\:/gi,v=/e\s*x\s*p\s*r\s*e\s*s\s*s\s*i\s*o\s*n\s*\(.*/gi,w=/u\s*r\s*l\s*\(.*/gi;function b(e){return e.replace(u,""")}function _(e){return e.replace(h,'"')}function y(e){return e.replace(p,function(e,t){return"x"===t[0]||"X"===t[0]?String.fromCharCode(parseInt(t.substr(1),16)):String.fromCharCode(parseInt(t,10))})}function x(e){return e.replace(f,":").replace(g," ")}function k(e){for(var t="",i=0,n=e.length;i/g;i.whiteList={a:["target","href","title"],abbr:["title"],address:[],area:["shape","coords","href","alt"],article:[],aside:[],audio:["autoplay","controls","loop","preload","src"],b:[],bdi:["dir"],bdo:["dir"],big:[],blockquote:["cite"],br:[],caption:[],center:[],cite:[],code:[],col:["align","valign","span","width"],colgroup:["align","valign","span","width"],dd:[],del:["datetime"],details:["open"],div:[],dl:[],dt:[],em:[],font:["color","size","face"],footer:[],h1:[],h2:[],h3:[],h4:[],h5:[],h6:[],header:[],hr:[],i:[],img:["src","alt","title","width","height"],ins:["datetime"],li:[],mark:[],nav:[],ol:[],p:[],pre:[],s:[],section:[],small:[],span:[],sub:[],sup:[],strong:[],table:["width","border","align","valign"],tbody:["align","valign"],td:["width","rowspan","colspan","align","valign"],tfoot:["align","valign"],th:["width","rowspan","colspan","align","valign"],thead:["align","valign"],tr:["rowspan","align","valign"],tt:[],u:[],ul:[],video:["autoplay","controls","loop","preload","src","height","width"]},i.getDefaultWhiteList=o,i.onTag=function(e,t,i){},i.onIgnoreTag=function(e,t,i){},i.onTagAttr=function(e,t,i){},i.onIgnoreTagAttr=function(e,t,i){},i.safeAttrValue=function(e,t,i,n){if(i=T(i),"href"===t||"src"===t){if("#"===(i=c.trim(i)))return"#";if("http://"!==i.substr(0,7)&&"https://"!==i.substr(0,8)&&"mailto:"!==i.substr(0,7)&&"tel:"!==i.substr(0,4)&&"#"!==i[0]&&"/"!==i[0])return""}else if("background"===t){if(m.lastIndex=0,m.test(i))return""}else if("style"===t){if(v.lastIndex=0,v.test(i))return"";if(w.lastIndex=0,w.test(i)&&(m.lastIndex=0,m.test(i)))return"";!1!==n&&(i=(n=n||s).process(i))}return i=E(i)},i.escapeHtml=a,i.escapeQuote=b,i.unescapeQuote=_,i.escapeHtmlEntities=y,i.escapeDangerHtml5Entities=x,i.clearNonPrintableCharacter=k,i.friendlyAttrValue=T,i.escapeAttrValue=E,i.onIgnoreTagStripAll=function(){return""},i.StripTagBody=function(o,s){"function"!=typeof s&&(s=function(){});var a=!Array.isArray(o),l=[],d=!1;return{onIgnoreTag:function(e,t,i){if(function(e){return a||-1!==c.indexOf(o,e)}(e)){if(i.isClosing){var n="[/removed]",r=i.position+n.length;return l.push([!1!==d?d:i.position,r]),d=!1,n}return d=d||i.position,"[removed]"}return s(e,t,i)},remove:function(t){var i="",n=0;return c.forEach(l,function(e){i+=t.slice(n,e[0]),n=e[1]}),i+=t.slice(n)}}},i.stripCommentTag=function(e){return e.replace(A,"")},i.stripBlankChar=function(e){var t=e.split("");return(t=t.filter(function(e){var t=e.charCodeAt(0);return 127!==t&&(!(t<=31)||(10===t||13===t))})).join("")},i.cssFilter=s,i.getDefaultCSSWhiteList=r},{"./util":5,cssfilter:10}],3:[function(e,t,i){var n=e("./default"),r=e("./parser"),o=e("./xss");for(var s in(i=t.exports=function(e,t){return new o(t).process(e)}).FilterXSS=o,n)i[s]=n[s];for(var s in r)i[s]=r[s];"undefined"!=typeof window&&(window.filterXSS=t.exports)},{"./default":2,"./parser":4,"./xss":6}],4:[function(e,t,i){var c=e("./util");function h(e){var t=c.spaceIndex(e);if(-1===t)var i=e.slice(1,-1);else i=e.slice(1,t+1);return"/"===(i=c.trim(i).toLowerCase()).slice(0,1)&&(i=i.slice(1)),"/"===i.slice(-1)&&(i=i.slice(0,-1)),i}var u=/[^a-zA-Z0-9_:\.\-]/gim;function p(e,t){for(;t"===u){n+=i(e.slice(r,o)),c=h(d=e.slice(o,a+1)),n+=t(o,n.length,c,d,"";var a=function(e){var t=b.spaceIndex(e);if(-1===t)return{html:"",closing:"/"===e[e.length-2]};var i="/"===(e=b.trim(e.slice(t+1,-1)))[e.length-1];return i&&(e=b.trim(e.slice(0,-1))),{html:e,closing:i}}(i),l=c[r],d=w(a.html,function(e,t){var i,n=-1!==b.indexOf(l,e);return _(i=p(r,e,t,n))?n?(t=g(r,e,t,v))?e+'="'+t+'"':e:_(i=f(r,e,t,n))?void 0:i:i});i="<"+r;return d&&(i+=" "+d),a.closing&&(i+=" /"),i+=">"}return _(o=h(r,i,s))?m(i):o},m);return i&&(n=i.remove(n)),n},t.exports=a},{"./default":2,"./parser":4,"./util":5,cssfilter:10}],7:[function(e,t,i){var n,r;n=this,r=function(){var T=!0;function s(i){function e(e){var t=i.match(e);return t&&1t[1][i])return 1;if(t[0][i]!==t[1][i])return-1;if(0===i)return 0}}function o(e,t,i){var n=a;"string"==typeof t&&(i=t,t=void 0),void 0===t&&(t=!1),i&&(n=s(i));var r=""+n.version;for(var o in e)if(e.hasOwnProperty(o)&&n[o]){if("string"!=typeof e[o])throw new Error("Browser version in the minVersion map should be a string: "+o+": "+String(e));return E([r,e[o]])<0}return t}return a.test=function(e){for(var t=0;t'),a=n.title;r&&r.title&&(a=O(r.title[0]));var l=n.link+"?highlight="+$.urlencode(A),d=$("",{href:l});if(d.html(a),d.find("span").addClass("highlighted"),s.append(d),n.project!==S){var c=" (from project "+n.project+")",u=$("",{text:c});s.append(u)}for(var h=0;h'),f="",g="",m="",v="",w="",b="",y="",x="",k="",T="";if("sections"===o[h].type){if(g=(f=o[h])._source.title,m=l+"#"+f._source.id,v=[f._source.content.substr(0,C)+" ..."],f.highlight&&(f.highlight["sections.title"]&&(g=O(f.highlight["sections.title"][0])),f.highlight["sections.content"])){w=f.highlight["sections.content"],v=[];for(var E=0;E<%= section_subtitle %>
    <% for (var i = 0; i < section_content.length; ++i) { %>
    <%= section_content[i] %>
    <% } %>',{section_subtitle_link:m,section_subtitle:g,section_content:v})}"domains"===o[h].type&&(y=(b=o[h])._source.role_name,x=l+"#"+b._source.anchor,k=b._source.name,(T="")!==b._source.docstrings&&(T=b._source.docstrings.substr(0,C)+" ..."),b.highlight&&(b.highlight["domains.docstrings"]&&(T="... "+O(b.highlight["domains.docstrings"][0])+" ..."),b.highlight["domains.name"]&&(k=O(b.highlight["domains.name"][0]))),M(p,'
    <%= domain_content %>
    ',{domain_subtitle_link:x,domain_subtitle:"["+y+"]: "+k,domain_content:T})),p.find("span").addClass("highlighted"),s.append(p),h!==o.length-1&&s.append($("
    "))}Search.output.append(s),s.slideDown(5)}t.length?Search.status.text(_("Search finished, found %s page(s) matching the search query.").replace("%s",t.length)):(Search.query_fallback(A),console.log("Read the Docs search failed. Falling back to Sphinx search."))}).fail(function(e){Search.query_fallback(A)}).always(function(){$("#search-progress").empty(),Search.stopPulse(),Search.title.text(_("Search Results")),Search.status.fadeIn(500)}),$.ajax({url:e.href,crossDomain:!0,xhrFields:{withCredentials:!0},complete:function(e,t){return"success"!==t||void 0===e.responseJSON||0===e.responseJSON.count?n.reject():n.resolve(e.responseJSON)}}).fail(function(e,t,i){return n.reject()})}}$(document).ready(function(){"undefined"!=typeof Search&&Search.init()})}(n.get())}}},{"./../../../../../../bower_components/xss/lib/index":3,"./rtd-data":16}],18:[function(r,e,t){var o=r("./rtd-data");e.exports={init:function(){var e=o.get();if($(document).on("click","[data-toggle='rst-current-version']",function(){var e=$("[data-toggle='rst-versions']").hasClass("shift-up")?"was_open":"was_closed";"undefined"!=typeof ga?ga("rtfd.send","event","Flyout","Click",e):"undefined"!=typeof _gaq&&_gaq.push(["rtfd._setAccount","UA-17997319-1"],["rtfd._trackEvent","Flyout","Click",e])}),void 0===window.SphinxRtdTheme){var t=r("./../../../../../../bower_components/sphinx-rtd-theme/js/theme.js").ThemeNav;if($(document).ready(function(){setTimeout(function(){t.navBar||t.enable()},1e3)}),e.is_rtd_like_theme())if(!$("div.wy-side-scroll:first").length){console.log("Applying theme sidebar fix...");var i=$("nav.wy-nav-side:first"),n=$("
    ").addClass("wy-side-scroll");i.children().detach().appendTo(n),n.prependTo(i),t.navBar=n}}}}},{"./../../../../../../bower_components/sphinx-rtd-theme/js/theme.js":1,"./rtd-data":16}],19:[function(e,t,i){var l,d=e("./constants"),c=e("./rtd-data"),n=e("bowser"),u="#ethical-ad-placement";function h(){var e,t,i="rtd-"+(Math.random()+1).toString(36).substring(4),n=d.PROMO_TYPES.LEFTNAV,r=d.DEFAULT_PROMO_PRIORITY,o=null;return l.is_mkdocs_builder()&&l.is_rtd_like_theme()?(o="nav.wy-nav-side",e="ethical-rtd ethical-dark-theme"):l.is_rtd_like_theme()?(o="nav.wy-nav-side > div.wy-side-scroll",e="ethical-rtd ethical-dark-theme"):l.is_alabaster_like_theme()&&(o="div.sphinxsidebar > div.sphinxsidebarwrapper",e="ethical-alabaster"),o?($("
    ").attr("id",i).addClass(e).appendTo(o),(!(t=$("#"+i).offset())||t.top>$(window).height())&&(r=d.LOW_PROMO_PRIORITY),{div_id:i,display_type:n,priority:r}):null}function p(){var e,t,i="rtd-"+(Math.random()+1).toString(36).substring(4),n=d.PROMO_TYPES.FOOTER,r=d.DEFAULT_PROMO_PRIORITY,o=null;return l.is_rtd_like_theme()?(o=$("
    ").insertAfter("footer hr"),e="ethical-rtd"):l.is_alabaster_like_theme()&&(o="div.bodywrapper .body",e="ethical-alabaster"),o?($("
    ").attr("id",i).addClass(e).appendTo(o),(!(t=$("#"+i).offset())||t.top<$(window).height())&&(r=d.LOW_PROMO_PRIORITY),{div_id:i,display_type:n,priority:r}):null}function f(){var e="rtd-"+(Math.random()+1).toString(36).substring(4),t=d.PROMO_TYPES.FIXED_FOOTER,i=d.DEFAULT_PROMO_PRIORITY;return n&&n.mobile&&(i=d.MAXIMUM_PROMO_PRIORITY),$("
    ").attr("id",e).appendTo("body"),{div_id:e,display_type:t,priority:i}}function g(e){this.id=e.id,this.div_id=e.div_id||"",this.html=e.html||"",this.display_type=e.display_type||"",this.view_tracking_url=e.view_url,this.click_handler=function(){"undefined"!=typeof ga?ga("rtfd.send","event","Promo","Click",e.id):"undefined"!=typeof _gaq&&_gaq.push(["rtfd._setAccount","UA-17997319-1"],["rtfd._trackEvent","Promo","Click",e.id])}}g.prototype.display=function(){var e="#"+this.div_id,t=this.view_tracking_url;$(e).html(this.html),$(e).find('a[href*="/sustainability/click/"]').on("click",this.click_handler);function i(){$.inViewport($(e),-3)&&($("").attr("src",t).css("display","none").appendTo(e),$(window).off(".rtdinview"),$(".wy-side-scroll").off(".rtdinview"))}$(window).on("DOMContentLoaded.rtdinview load.rtdinview scroll.rtdinview resize.rtdinview",i),$(".wy-side-scroll").on("scroll.rtdinview",i),$(".ethical-close").on("click",function(){return $(e).hide(),!1}),this.post_promo_display()},g.prototype.disable=function(){$("#"+this.div_id).hide()},g.prototype.post_promo_display=function(){this.display_type===d.PROMO_TYPES.FOOTER&&($("
    ").insertAfter("#"+this.div_id),$("
    ").insertBefore("#"+this.div_id+".ethical-alabaster .ethical-footer"))},t.exports={Promo:g,init:function(){var e,t,i={format:"jsonp"},n=[],r=[],o=[],s=[p,h,f];if(l=c.get(),t=function(){var e,t="rtd-"+(Math.random()+1).toString(36).substring(4),i=d.PROMO_TYPES.LEFTNAV;return e=l.is_rtd_like_theme()?"ethical-rtd ethical-dark-theme":"ethical-alabaster",0<$(u).length?($("
    ").attr("id",t).addClass(e).appendTo(u),{div_id:t,display_type:i}):null}())n.push(t.div_id),r.push(t.display_type),o.push(t.priority||d.DEFAULT_PROMO_PRIORITY),!0;else{if(!l.show_promo())return;for(var a=0;a").attr("id","rtd-detection").attr("class","ethical-rtd").html(" ").appendTo("body"),0===$("#rtd-detection").height()&&(e=!0),$("#rtd-detection").remove(),e}()&&(console.log("---------------------------------------------------------------------------------------"),console.log("Read the Docs hosts documentation for tens of thousands of open source projects."),console.log("We fund our development (we are open source) and operations through advertising."),console.log("We promise to:"),console.log(" - never let advertisers run 3rd party JavaScript"),console.log(" - never sell user data to advertisers or other 3rd parties"),console.log(" - only show advertisements of interest to developers"),console.log("Read more about our approach to advertising here: https://docs.readthedocs.io/en/latest/advertising/ethical-advertising.html"),console.log("%cPlease allow our Ethical Ads or go ad-free:","font-size: 2em"),console.log("https://docs.readthedocs.io/en/latest/advertising/ad-blocking.html"),console.log("--------------------------------------------------------------------------------------"),function(){var e=h(),t=null;e&&e.div_id&&(t=$("#"+e.div_id).attr("class","keep-us-sustainable"),$("

    ").text("Support Read the Docs!").appendTo(t),$("

    ").html('Please help keep us sustainable by allowing our Ethical Ads in your ad blocker or go ad-free by subscribing.').appendTo(t),$("

    ").text("Thank you! ❤️").appendTo(t))}())}})}}},{"./constants":14,"./rtd-data":16,bowser:7}],20:[function(e,t,i){var o=e("./rtd-data");t.exports={init:function(e){var t=o.get();if(!e.is_highest){var i=window.location.pathname.replace(t.version,e.slug),n=$('

    Note

    You are not reading the most recent version of this documentation. is the latest version available.

    ');n.find("a").attr("href",i).text(e.slug);var r=$("div.body");r.length||(r=$("div.document")),r.prepend(n)}}}},{"./rtd-data":16}],21:[function(e,t,i){var n=e("./doc-embed/sponsorship"),r=e("./doc-embed/footer.js"),o=(e("./doc-embed/rtd-data"),e("./doc-embed/sphinx")),s=e("./doc-embed/search");$.extend(e("verge")),$(document).ready(function(){r.init(),o.init(),s.init(),n.init()})},{"./doc-embed/footer.js":15,"./doc-embed/rtd-data":16,"./doc-embed/search":17,"./doc-embed/sphinx":18,"./doc-embed/sponsorship":19,verge:13}]},{},[21]); \ No newline at end of file diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index d5e123d2863..e7155df61e2 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -9,6 +9,7 @@ from readthedocs.api.v2.permissions import IsAuthorizedToViewVersion from readthedocs.builds.models import Version +from readthedocs.projects.constants import MKDOCS, SPHINX_HTMLDIR from readthedocs.projects.models import HTMLFile, Project from readthedocs.search import tasks, utils from readthedocs.search.faceted_search import PageSearch @@ -27,15 +28,28 @@ class PageSearchSerializer(serializers.Serializer): version = serializers.CharField() title = serializers.CharField() path = serializers.CharField() + full_path = serializers.CharField() link = serializers.SerializerMethodField() highlight = serializers.SerializerMethodField() inner_hits = serializers.SerializerMethodField() def get_link(self, obj): - projects_url = self.context.get('projects_url') - if projects_url: - docs_url = projects_url[obj.project] - return docs_url + obj.path + project_data = self.context['projects_data'].get(obj.project) + if not project_data: + return None + + docs_url, doctype = project_data + path = obj.full_path + + # Generate an appropriate link for the doctype, + # And always end it with / so it goes directly to proxito. + if ( + doctype in {SPHINX_HTMLDIR, MKDOCS} and + path == 'index.html' or path.endswith('/index.html') + ): + path = path[:-len('index.html')].rstrip('/') + '/' + + return docs_url + path def get_highlight(self, obj): highlight = getattr(obj.meta, 'highlight', None) @@ -155,7 +169,7 @@ def validate_query_params(self): def get_serializer_context(self): context = super().get_serializer_context() - context['projects_url'] = self.get_all_projects_url() + context['projects_data'] = self.get_all_projects_data() return context def get_all_projects(self): @@ -183,19 +197,17 @@ def get_all_projects(self): all_projects.append(version.project) return all_projects - def get_all_projects_url(self): + def get_all_projects_data(self): """ - Return a dict containing the project slug and its version URL. - - The dictionary contains the project and its subprojects . Each project's - slug is used as a key and the documentation URL for that project and - version as the value. + Return a dict containing the project slug and its version URL and version's doctype. - Example: + The dictionary contains the project and its subprojects. Each project's + slug is used as a key and a tuple with the documentation URL and doctype + from the version. Example: { - "requests": "https://requests.readthedocs.io/en/latest/", - "requests-oauth": "https://requests-oauth.readthedocs.io/en/latest/", + "requests": ("https://requests.readthedocs.io/en/latest/", "sphinx"), + "requests-oauth": ("https://requests-oauth.readthedocs.io/en/latest/", "sphinx_htmldir"), } :rtype: dict @@ -205,7 +217,17 @@ def get_all_projects_url(self): projects_url = {} for project in all_projects: projects_url[project.slug] = project.get_docs_url(version_slug=version_slug) - return projects_url + + versions_doctype = ( + Version.objects + .filter(project__slug__in=projects_url.keys(), slug=version_slug) + .values_list('project__slug', 'documentation_type') + ) + projects_data = { + project_slug: (projects_url[project_slug], doctype) + for project_slug, doctype in versions_doctype + } + return projects_data def list(self, request, *args, **kwargs): """Overriding ``list`` method to record query in database.""" From 6f2ce2cbc34974a196c58c98e264c33fd18194f9 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 12 May 2020 18:24:14 -0500 Subject: [PATCH 34/72] Use regex --- readthedocs/search/api.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index e6ca62a3b78..bacd39f9b88 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -1,5 +1,6 @@ import itertools import logging +import re from django.shortcuts import get_object_or_404 from django.utils import timezone @@ -43,11 +44,9 @@ def get_link(self, obj): # Generate an appropriate link for the doctype, # And always end it with / so it goes directly to proxito. - if ( - doctype in {SPHINX_HTMLDIR, MKDOCS} and - path == 'index.html' or path.endswith('/index.html') - ): - path = path[:-len('index.html')].rstrip('/') + '/' + if doctype in {SPHINX_HTMLDIR, MKDOCS}: + new_path = re.sub('(^|/)index.html$', '', path) + path = path.rstrip('/') + '/' return docs_url + path @@ -208,8 +207,14 @@ def get_all_projects_data(self): from the version. Example: { - "requests": ("https://requests.readthedocs.io/en/latest/", "sphinx"), - "requests-oauth": ("https://requests-oauth.readthedocs.io/en/latest/", "sphinx_htmldir"), + "requests": ( + "https://requests.readthedocs.io/en/latest/", + "sphinx", + ), + "requests-oauth": ( + "https://requests-oauth.readthedocs.io/en/latest/", + "sphinx_htmldir", + ), } :rtype: dict From 247cf60e9e960b8c8c158ea49cd14f66570f175b Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 12 May 2020 18:25:14 -0500 Subject: [PATCH 35/72] Better wording --- readthedocs/search/api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index bacd39f9b88..ffb2a054607 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -42,8 +42,8 @@ def get_link(self, obj): docs_url, doctype = project_data path = obj.full_path - # Generate an appropriate link for the doctype, - # And always end it with / so it goes directly to proxito. + # Generate an appropriate link for the doctypes that use htmldir, + # and always end it with / so it goes directly to proxito. if doctype in {SPHINX_HTMLDIR, MKDOCS}: new_path = re.sub('(^|/)index.html$', '', path) path = path.rstrip('/') + '/' From 75623f1224315b0bc223f32685f65f97ea89c583 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 12 May 2020 18:29:41 -0500 Subject: [PATCH 36/72] Sphinx Search: don't skip indexing if one file fails Changed the var names to make them more clear --- readthedocs/projects/models.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index d20cd8048f1..5dd705c70b0 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -1242,9 +1242,6 @@ def get_processed_json_sphinx(self): Both lead to `foo/index.html` https://github.com/rtfd/readthedocs.org/issues/5368 """ - file_path = None - storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() - fjson_paths = [] basename = os.path.splitext(self.path)[0] fjson_paths.append(basename + '.fjson') @@ -1252,22 +1249,23 @@ def get_processed_json_sphinx(self): new_basename = re.sub(r'\/index$', '', basename) fjson_paths.append(new_basename + '.fjson') + storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() storage_path = self.project.get_storage_path( type_='json', version_slug=self.version.slug, include_file=False ) - try: - for fjson_path in fjson_paths: - file_path = storage.join(storage_path, fjson_path) - if storage.exists(file_path): - return process_file(file_path) - except Exception: - log.warning( - 'Unhandled exception during search processing file: %s', - file_path, - ) + for fjson_path in fjson_paths: + try: + fjson_storage_path = storage.join(storage_path, fjson_path) + if storage.exists(fjson_storage_path): + return process_file(fjson_storage_path) + except Exception: + log.warning( + 'Unhandled exception during search processing file: %s', + fjson_path, + ) return { - 'path': file_path, + 'path': self.path, 'title': '', 'sections': [], 'domain_data': {}, From b144d494a2f6a26fa728fb949e8bd49fc24e0e27 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 12 May 2020 18:34:03 -0500 Subject: [PATCH 37/72] Replace map with generator expression --- readthedocs/search/parse_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/search/parse_json.py b/readthedocs/search/parse_json.py index 3c356785da6..c71bce1549f 100644 --- a/readthedocs/search/parse_json.py +++ b/readthedocs/search/parse_json.py @@ -205,7 +205,7 @@ def parse_content(content, remove_first_line=False): if remove_first_line and len(content) > 1: content = content[1:] - content = map(lambda x: x.strip(), content) + content = (text.strip() for text in content) content = ' '.join(text for text in content if text) return content From 88ad2d10968a006546a54670044dcebea14d6484 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 12 May 2020 18:36:49 -0500 Subject: [PATCH 38/72] Add comment --- readthedocs/search/parse_json.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/readthedocs/search/parse_json.py b/readthedocs/search/parse_json.py index c71bce1549f..051ac19a2e8 100644 --- a/readthedocs/search/parse_json.py +++ b/readthedocs/search/parse_json.py @@ -199,12 +199,13 @@ def _get_text_for_domain_data(desc): def parse_content(content, remove_first_line=False): """Removes new line characters and ¶.""" content = content.replace('¶', '').strip() + content = content.split('\n') # removing the starting text of each - content = content.split('\n') if remove_first_line and len(content) > 1: content = content[1:] + # Convert all new lines to " " content = (text.strip() for text in content) content = ' '.join(text for text in content if text) return content From 774054246f5669b1b213f7b915d3ad0d9ab03880 Mon Sep 17 00:00:00 2001 From: David Fischer Date: Wed, 13 May 2020 10:22:01 -0700 Subject: [PATCH 39/72] Canonical/HTTPS redirect fix - Only canonicalize domain when HTTPS and canonical - On RTD for Business there was an infinite redirect issue here as the resolver won't use a custom domain without HTTPS. --- readthedocs/proxito/middleware.py | 5 ++++- readthedocs/proxito/tests/test_middleware.py | 5 +++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/readthedocs/proxito/middleware.py b/readthedocs/proxito/middleware.py index 5493de7f33d..d95eba84df5 100644 --- a/readthedocs/proxito/middleware.py +++ b/readthedocs/proxito/middleware.py @@ -56,7 +56,10 @@ def map_host_to_project_slug(request): # pylint: disable=too-many-return-statem project_slug = host_parts[0] request.subdomain = True log.debug('Proxito Public Domain: host=%s', host) - if Domain.objects.filter(project__slug=project_slug).filter(canonical=True).exists(): + if Domain.objects.filter(project__slug=project_slug).filter( + canonical=True, + https=True, + ).exists(): log.debug('Proxito Public Domain -> Canonical Domain Redirect: host=%s', host) request.canonicalize = 'canonical-cname' return project_slug diff --git a/readthedocs/proxito/tests/test_middleware.py b/readthedocs/proxito/tests/test_middleware.py index a7296ab7bfb..a71d404cce0 100644 --- a/readthedocs/proxito/tests/test_middleware.py +++ b/readthedocs/proxito/tests/test_middleware.py @@ -50,7 +50,7 @@ def test_proper_cname_https_upgrade(self): self.assertEqual(request.canonicalize, 'https') def test_canonical_cname_redirect(self): - """Requests to the public domain URL should redirect to the custom domain only if the domain is canonical.""" + """Requests to the public domain URL should redirect to the custom domain if the domain is canonical/https.""" cname = 'docs.random.com' domain = get(Domain, project=self.pip, domain=cname, canonical=False, https=False) @@ -59,8 +59,9 @@ def test_canonical_cname_redirect(self): self.assertIsNone(res) self.assertFalse(hasattr(request, 'canonicalize')) - # Make the domain canonical and make sure we redirect + # Make the domain canonical/https and make sure we redirect domain.canonical = True + domain.https = True domain.save() for url in (self.url, '/subdir/'): request = self.request(url, HTTP_HOST='pip.dev.readthedocs.io') From 12e6c076f5528bab3bb50d82758053d2b00cc7d1 Mon Sep 17 00:00:00 2001 From: santos22 Date: Sun, 10 May 2020 22:05:45 -0700 Subject: [PATCH 40/72] Hide unbuilt versions in footer flyout --- readthedocs/builds/querysets.py | 5 +- readthedocs/projects/models.py | 1 + readthedocs/rtd_tests/tests/test_footer.py | 59 +++++++++++++++++++++- 3 files changed, 63 insertions(+), 2 deletions(-) diff --git a/readthedocs/builds/querysets.py b/readthedocs/builds/querysets.py index 79f2362b6b3..ecaadd64d0e 100644 --- a/readthedocs/builds/querysets.py +++ b/readthedocs/builds/querysets.py @@ -24,7 +24,8 @@ def _add_user_repos(self, queryset, user): queryset = user_queryset | queryset return queryset - def public(self, user=None, project=None, only_active=True, include_hidden=True): + def public(self, user=None, project=None, only_active=True, + include_hidden=True, only_built=False): queryset = self.filter(privacy_level=constants.PUBLIC) if user: queryset = self._add_user_repos(queryset, user) @@ -32,6 +33,8 @@ def public(self, user=None, project=None, only_active=True, include_hidden=True) queryset = queryset.filter(project=project) if only_active: queryset = queryset.filter(active=True) + if only_built: + queryset = queryset.filter(built=True) if not include_hidden: queryset = queryset.filter(hidden=False) return queryset.distinct() diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index 5dd705c70b0..0006ccdd914 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -881,6 +881,7 @@ def ordered_active_versions(self, **kwargs): { 'project': self, 'only_active': True, + 'only_built': True, }, ) versions = ( diff --git a/readthedocs/rtd_tests/tests/test_footer.py b/readthedocs/rtd_tests/tests/test_footer.py index a77d4cbb1f0..568d89adeae 100644 --- a/readthedocs/rtd_tests/tests/test_footer.py +++ b/readthedocs/rtd_tests/tests/test_footer.py @@ -27,7 +27,7 @@ def setUp(self): privacy_level=PUBLIC, main_language_project=None, ) - self.pip.versions.update(privacy_level=PUBLIC) + self.pip.versions.update(privacy_level=PUBLIC, built=True) self.latest = self.pip.versions.get(slug=LATEST) self.url = ( @@ -224,6 +224,62 @@ def test_hidden_versions(self): self.assertIn('/en/latest/', response.data['html']) self.assertNotIn('/en/2.0/', response.data['html']) + def test_built_versions(self): + built_version = get( + Version, + slug='2.0', + active=True, + built=True, + privacy_level=PUBLIC, + project=self.pip, + ) + + # The built versions appears on the footer + self.url = ( + reverse('footer_html') + + f'?project={self.pip.slug}&version={self.latest.slug}&page=index&docroot=/' + ) + response = self.render() + self.assertIn('/en/latest/', response.data['html']) + self.assertIn('/en/2.0/', response.data['html']) + + # We can access the built version, and it appears on the footer + self.url = ( + reverse('footer_html') + + f'?project={self.pip.slug}&version={built_version.slug}&page=index&docroot=/' + ) + response = self.render() + self.assertIn('/en/latest/', response.data['html']) + self.assertIn('/en/2.0/', response.data['html']) + + def test_not_built_versions(self): + not_built_version = get( + Version, + slug='2.0', + active=True, + built=False, + privacy_level=PUBLIC, + project=self.pip, + ) + + # The un-built version doesn't appear on the footer + self.url = ( + reverse('footer_html') + + f'?project={self.pip.slug}&version={self.latest.slug}&page=index&docroot=/' + ) + response = self.render() + self.assertIn('/en/latest/', response.data['html']) + self.assertNotIn('/en/2.0/', response.data['html']) + + # We can access the unbuilt version, but it doesn't appear on the footer + self.url = ( + reverse('footer_html') + + f'?project={self.pip.slug}&version={not_built_version.slug}&page=index&docroot=/' + ) + response = self.render() + self.assertIn('/en/latest/', response.data['html']) + self.assertNotIn('/en/2.0/', response.data['html']) + class TestFooterHTML(BaseTestFooterHTML, TestCase): @@ -389,6 +445,7 @@ def test_version_queries(self): identifier=identifier, type=TAG, active=True, + built=True, ) with self.assertNumQueries(self.EXPECTED_QUERIES): From 19960dc2cf9b1b40245e9236c76336335c8fade0 Mon Sep 17 00:00:00 2001 From: santos22 Date: Sun, 10 May 2020 22:06:39 -0700 Subject: [PATCH 41/72] Fix test version class name --- readthedocs/api/v3/tests/test_versions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/api/v3/tests/test_versions.py b/readthedocs/api/v3/tests/test_versions.py index 3f112724a51..4ee2d95e95b 100644 --- a/readthedocs/api/v3/tests/test_versions.py +++ b/readthedocs/api/v3/tests/test_versions.py @@ -8,7 +8,7 @@ from readthedocs.projects.models import Project -class VerionsEndpointTests(APIEndpointMixin): +class VersionsEndpointTests(APIEndpointMixin): def test_projects_versions_list(self): self.client.credentials(HTTP_AUTHORIZATION=f'Token {self.token.key}') From 1fdea4b6c7327f0ab9b30370bdcc0f9369aed1f9 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 13 May 2020 16:46:13 -0500 Subject: [PATCH 42/72] Search: Little refactor Looks like user and filter_by_user are still needed in .com for filtering results in the dashboard (#6341). But it can be removed from tests. --- readthedocs/search/api.py | 17 ++++++++++------- readthedocs/search/tests/test_faceted_search.py | 4 ++-- readthedocs/search/tests/test_xss.py | 2 +- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index d6fedbf8779..4df2c5466ae 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -117,21 +117,24 @@ def get_queryset(self): # Validate all the required params are there self.validate_query_params() query = self.request.query_params.get('q', '') - kwargs = {'filter_by_user': False, 'filters': {}} - kwargs['filters']['project'] = [p.slug for p in self.get_all_projects()] - kwargs['filters']['version'] = self._get_version().slug + filters = {} + filters['project'] = [p.slug for p in self.get_all_projects()] + filters['version'] = self._get_version().slug # Check to avoid searching all projects in case these filters are empty. - if not kwargs['filters']['project']: + if not filters['project']: log.info("Unable to find a project to search") return HTMLFile.objects.none() - if not kwargs['filters']['version']: + if not filters['version']: log.info("Unable to find a version to search") return HTMLFile.objects.none() - user = self.request.user queryset = PageSearch( - query=query, user=user, **kwargs + query=query, + filters=filters, + user=self.request.user, + # We use a permission class to control authorization + filter_by_user=False, ) return queryset diff --git a/readthedocs/search/tests/test_faceted_search.py b/readthedocs/search/tests/test_faceted_search.py index d62a6903300..79a9c43b900 100644 --- a/readthedocs/search/tests/test_faceted_search.py +++ b/readthedocs/search/tests/test_faceted_search.py @@ -21,7 +21,7 @@ def test_search_exact_match(self, client, project, case): cased_query = getattr(query_text, case) query = cased_query() - page_search = PageSearch(query=query, user='') + page_search = PageSearch(query=query) results = page_search.execute() assert len(results) == 1 @@ -37,7 +37,7 @@ def test_search_combined_result(self, client, project): - Where `Foo` or `Bar` is present """ query = 'Elasticsearch Query' - page_search = PageSearch(query=query, user='') + page_search = PageSearch(query=query) results = page_search.execute() assert len(results) == 3 diff --git a/readthedocs/search/tests/test_xss.py b/readthedocs/search/tests/test_xss.py index 31353061e0d..e2ca97d9b80 100644 --- a/readthedocs/search/tests/test_xss.py +++ b/readthedocs/search/tests/test_xss.py @@ -9,7 +9,7 @@ class TestXSS: def test_facted_page_xss(self, client, project): query = 'XSS' - page_search = PageSearch(query=query, user='') + page_search = PageSearch(query=query) results = page_search.execute() expected = """ <h3>XSS exploit</h3> From 6b9fb1a009025f93c006260c7607edfb5e405b9e Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 13 May 2020 18:05:34 -0500 Subject: [PATCH 43/72] Add tests --- readthedocs/search/api.py | 15 ++- readthedocs/search/tests/conftest.py | 8 +- .../search/tests/data/docs/guides/index.json | 13 +++ readthedocs/search/tests/data/docs/index.json | 13 +++ readthedocs/search/tests/dummy_data.py | 2 +- readthedocs/search/tests/test_api.py | 105 +++++++++++++++++- 6 files changed, 147 insertions(+), 9 deletions(-) create mode 100644 readthedocs/search/tests/data/docs/guides/index.json create mode 100644 readthedocs/search/tests/data/docs/index.json diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index ffb2a054607..cdf6fc14448 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -45,8 +45,10 @@ def get_link(self, obj): # Generate an appropriate link for the doctypes that use htmldir, # and always end it with / so it goes directly to proxito. if doctype in {SPHINX_HTMLDIR, MKDOCS}: - new_path = re.sub('(^|/)index.html$', '', path) - path = path.rstrip('/') + '/' + new_path = re.sub('(^|/)index.html$', '/', path) + # docs_url already ends with /, + # make sure to not include it twice. + path = new_path.lstrip('/') return docs_url + path @@ -221,17 +223,18 @@ def get_all_projects_data(self): """ all_projects = self.get_all_projects() version_slug = self._get_version().slug - projects_url = {} + project_urls = {} for project in all_projects: - projects_url[project.slug] = project.get_docs_url(version_slug=version_slug) + project_urls[project.slug] = project.get_docs_url(version_slug=version_slug) versions_doctype = ( Version.objects - .filter(project__slug__in=projects_url.keys(), slug=version_slug) + .filter(project__slug__in=project_urls.keys(), slug=version_slug) .values_list('project__slug', 'documentation_type') ) + projects_data = { - project_slug: (projects_url[project_slug], doctype) + project_slug: (project_urls[project_slug], doctype) for project_slug, doctype in versions_doctype } return projects_data diff --git a/readthedocs/search/tests/conftest.py b/readthedocs/search/tests/conftest.py index 1705118b380..ff8caeda745 100644 --- a/readthedocs/search/tests/conftest.py +++ b/readthedocs/search/tests/conftest.py @@ -41,7 +41,13 @@ def all_projects(es_index, mock_processed_json, db, settings): # file_basename in config are without extension so add html extension file_name = file_basename + '.html' version = project.versions.all()[0] - html_file = G(HTMLFile, project=project, version=version, name=file_name) + html_file = G( + HTMLFile, + project=project, + version=version, + name=file_name, + path=file_name, + ) # creating sphinx domain test objects file_path = get_json_file_path(project.slug, file_basename) diff --git a/readthedocs/search/tests/data/docs/guides/index.json b/readthedocs/search/tests/data/docs/guides/index.json new file mode 100644 index 00000000000..baffc4d0f7e --- /dev/null +++ b/readthedocs/search/tests/data/docs/guides/index.json @@ -0,0 +1,13 @@ +{ + "path": "guides/index", + "title": "Guides", + "sections": [ + { + "id": "guides", + "title": "Guides", + "content": "Content from guides" + } + ], + "domains": [], + "domain_data": {} +} diff --git a/readthedocs/search/tests/data/docs/index.json b/readthedocs/search/tests/data/docs/index.json new file mode 100644 index 00000000000..c85846aa5bb --- /dev/null +++ b/readthedocs/search/tests/data/docs/index.json @@ -0,0 +1,13 @@ +{ + "path": "index", + "title": "Index", + "sections": [ + { + "id": "title", + "title": "Title", + "content": "Some content from index" + } + ], + "domains": [], + "domain_data": {} +} diff --git a/readthedocs/search/tests/dummy_data.py b/readthedocs/search/tests/dummy_data.py index 8c1cc9e5951..3b998c17474 100644 --- a/readthedocs/search/tests/dummy_data.py +++ b/readthedocs/search/tests/dummy_data.py @@ -1,7 +1,7 @@ PROJECT_DATA_FILES = { 'pipeline': ['installation', 'signals'], 'kuma': ['documentation', 'docker'], - 'docs': ['support', 'wiping'], + 'docs': ['support', 'wiping', 'index', 'guides/index'], } ALL_PROJECTS = PROJECT_DATA_FILES.keys() diff --git a/readthedocs/search/tests/test_api.py b/readthedocs/search/tests/test_api.py index 72d9b403edc..94dc37eeb6b 100644 --- a/readthedocs/search/tests/test_api.py +++ b/readthedocs/search/tests/test_api.py @@ -6,7 +6,14 @@ from django_dynamic_fixture import G from readthedocs.builds.models import Version -from readthedocs.projects.constants import PUBLIC +from readthedocs.projects.constants import ( + MKDOCS, + MKDOCS_HTML, + PUBLIC, + SPHINX, + SPHINX_HTMLDIR, + SPHINX_SINGLEHTML, +) from readthedocs.projects.models import HTMLFile, Project from readthedocs.search.api import PageSearchAPIView from readthedocs.search.documents import PageDocument @@ -324,6 +331,102 @@ def test_doc_search_hidden_versions(self, api_client, all_projects): first_result = data[0] assert first_result['project'] == subproject.slug + @pytest.mark.parametrize('doctype', [SPHINX, SPHINX_SINGLEHTML, MKDOCS_HTML]) + def test_search_correct_link_html_projects(self, api_client, doctype): + project = Project.objects.get(slug='docs') + project.versions.update(documentation_type=doctype) + version = project.versions.all().first() + + # Check for a normal page. + search_params = { + 'project': project.slug, + 'version': version.slug, + 'q': 'Support', + } + resp = self.get_search(api_client, search_params) + assert resp.status_code == 200 + + result = resp.data['results'][0] + + assert result['project'] == project.slug + assert result['link'].endswith('en/latest/support.html') + + # Check the main index page. + search_params = { + 'project': project.slug, + 'version': version.slug, + 'q': 'Some content from index', + } + resp = self.get_search(api_client, search_params) + assert resp.status_code == 200 + + result = resp.data['results'][0] + + assert result['project'] == project.slug + assert result['link'].endswith('en/latest/index.html') + + # Check the index page of a subdirectory. + search_params = { + 'project': project.slug, + 'version': version.slug, + 'q': 'Some content from guides/index', + } + resp = self.get_search(api_client, search_params) + assert resp.status_code == 200 + + result = resp.data['results'][0] + + assert result['project'] == project.slug + assert result['link'].endswith('en/latest/guides/index.html') + + @pytest.mark.parametrize('doctype', [SPHINX_HTMLDIR, MKDOCS]) + def test_search_correct_link_htmldir_projects(self, api_client, doctype): + project = Project.objects.get(slug='docs') + project.versions.update(documentation_type=doctype) + version = project.versions.all().first() + + # Check for a normal page. + search_params = { + 'project': project.slug, + 'version': version.slug, + 'q': 'Support', + } + resp = self.get_search(api_client, search_params) + assert resp.status_code == 200 + + result = resp.data['results'][0] + + assert result['project'] == project.slug + assert result['link'].endswith('en/latest/support.html') + + # Check the main index page. + search_params = { + 'project': project.slug, + 'version': version.slug, + 'q': 'Some content from index', + } + resp = self.get_search(api_client, search_params) + assert resp.status_code == 200 + + result = resp.data['results'][0] + + assert result['project'] == project.slug + assert result['link'].endswith('en/latest/') + + # Check the index page of a subdirectory. + search_params = { + 'project': project.slug, + 'version': version.slug, + 'q': 'Some content from guides/index', + } + resp = self.get_search(api_client, search_params) + assert resp.status_code == 200 + + result = resp.data['results'][0] + + assert result['project'] == project.slug + assert result['link'].endswith('en/latest/guides/') + class TestDocumentSearch(BaseTestDocumentSearch): From 1f01d2c7d79323e54e3c5c168eab6e2796dc6d89 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 13 May 2020 18:09:27 -0500 Subject: [PATCH 44/72] Update data --- readthedocs/search/tests/data/docs/guides/index.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/search/tests/data/docs/guides/index.json b/readthedocs/search/tests/data/docs/guides/index.json index baffc4d0f7e..5ca9b20764a 100644 --- a/readthedocs/search/tests/data/docs/guides/index.json +++ b/readthedocs/search/tests/data/docs/guides/index.json @@ -5,7 +5,7 @@ { "id": "guides", "title": "Guides", - "content": "Content from guides" + "content": "Content from guides/index" } ], "domains": [], From dca39bca49d6d7bbfcf03a2be161eb5bf3e50dd8 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Thu, 14 May 2020 10:28:17 -0500 Subject: [PATCH 45/72] Suggestions from review --- readthedocs/search/api.py | 2 +- readthedocs/search/tests/conftest.py | 8 +++--- readthedocs/search/tests/test_api.py | 40 ++++++++++++++++++---------- 3 files changed, 31 insertions(+), 19 deletions(-) diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index cdf6fc14448..fdcf40761e1 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -47,7 +47,7 @@ def get_link(self, obj): if doctype in {SPHINX_HTMLDIR, MKDOCS}: new_path = re.sub('(^|/)index.html$', '/', path) # docs_url already ends with /, - # make sure to not include it twice. + # so path doesn't need to start with /. path = new_path.lstrip('/') return docs_url + path diff --git a/readthedocs/search/tests/conftest.py b/readthedocs/search/tests/conftest.py index ff8caeda745..97bba15dc45 100644 --- a/readthedocs/search/tests/conftest.py +++ b/readthedocs/search/tests/conftest.py @@ -4,7 +4,7 @@ import pytest from django.core.management import call_command -from django_dynamic_fixture import G +from django_dynamic_fixture import get from readthedocs.projects.constants import PUBLIC from readthedocs.projects.models import HTMLFile, Project @@ -28,7 +28,7 @@ def all_projects(es_index, mock_processed_json, db, settings): settings.ELASTICSEARCH_DSL_AUTOSYNC = True projects_list = [] for project_slug in ALL_PROJECTS: - project = G( + project = get( Project, slug=project_slug, name=project_slug, @@ -41,7 +41,7 @@ def all_projects(es_index, mock_processed_json, db, settings): # file_basename in config are without extension so add html extension file_name = file_basename + '.html' version = project.versions.all()[0] - html_file = G( + html_file = get( HTMLFile, project=project, version=version, @@ -60,7 +60,7 @@ def all_projects(es_index, mock_processed_json, db, settings): domain_role_name = domain_data.pop('role_name') domain, type_ = domain_role_name.split(':') - G( + get( SphinxDomain, project=project, version=version, diff --git a/readthedocs/search/tests/test_api.py b/readthedocs/search/tests/test_api.py index 94dc37eeb6b..73025856f37 100644 --- a/readthedocs/search/tests/test_api.py +++ b/readthedocs/search/tests/test_api.py @@ -332,12 +332,11 @@ def test_doc_search_hidden_versions(self, api_client, all_projects): assert first_result['project'] == subproject.slug @pytest.mark.parametrize('doctype', [SPHINX, SPHINX_SINGLEHTML, MKDOCS_HTML]) - def test_search_correct_link_html_projects(self, api_client, doctype): + def test_search_correct_link_for_normal_page_html_projects(self, api_client, doctype): project = Project.objects.get(slug='docs') project.versions.update(documentation_type=doctype) version = project.versions.all().first() - # Check for a normal page. search_params = { 'project': project.slug, 'version': version.slug, @@ -347,11 +346,15 @@ def test_search_correct_link_html_projects(self, api_client, doctype): assert resp.status_code == 200 result = resp.data['results'][0] - assert result['project'] == project.slug assert result['link'].endswith('en/latest/support.html') - # Check the main index page. + @pytest.mark.parametrize('doctype', [SPHINX, SPHINX_SINGLEHTML, MKDOCS_HTML]) + def test_search_correct_link_for_index_page_html_projects(self, api_client, doctype): + project = Project.objects.get(slug='docs') + project.versions.update(documentation_type=doctype) + version = project.versions.all().first() + search_params = { 'project': project.slug, 'version': version.slug, @@ -361,11 +364,15 @@ def test_search_correct_link_html_projects(self, api_client, doctype): assert resp.status_code == 200 result = resp.data['results'][0] - assert result['project'] == project.slug assert result['link'].endswith('en/latest/index.html') - # Check the index page of a subdirectory. + @pytest.mark.parametrize('doctype', [SPHINX, SPHINX_SINGLEHTML, MKDOCS_HTML]) + def test_search_correct_link_for_index_page_subdirectory_html_projects(self, api_client, doctype): + project = Project.objects.get(slug='docs') + project.versions.update(documentation_type=doctype) + version = project.versions.all().first() + search_params = { 'project': project.slug, 'version': version.slug, @@ -375,17 +382,15 @@ def test_search_correct_link_html_projects(self, api_client, doctype): assert resp.status_code == 200 result = resp.data['results'][0] - assert result['project'] == project.slug assert result['link'].endswith('en/latest/guides/index.html') @pytest.mark.parametrize('doctype', [SPHINX_HTMLDIR, MKDOCS]) - def test_search_correct_link_htmldir_projects(self, api_client, doctype): + def test_search_correct_link_for_normal_page_htmldir_projects(self, api_client, doctype): project = Project.objects.get(slug='docs') project.versions.update(documentation_type=doctype) version = project.versions.all().first() - # Check for a normal page. search_params = { 'project': project.slug, 'version': version.slug, @@ -395,11 +400,15 @@ def test_search_correct_link_htmldir_projects(self, api_client, doctype): assert resp.status_code == 200 result = resp.data['results'][0] - assert result['project'] == project.slug assert result['link'].endswith('en/latest/support.html') - # Check the main index page. + @pytest.mark.parametrize('doctype', [SPHINX_HTMLDIR, MKDOCS]) + def test_search_correct_link_for_index_page_htmldir_projects(self, api_client, doctype): + project = Project.objects.get(slug='docs') + project.versions.update(documentation_type=doctype) + version = project.versions.all().first() + search_params = { 'project': project.slug, 'version': version.slug, @@ -409,11 +418,15 @@ def test_search_correct_link_htmldir_projects(self, api_client, doctype): assert resp.status_code == 200 result = resp.data['results'][0] - assert result['project'] == project.slug assert result['link'].endswith('en/latest/') - # Check the index page of a subdirectory. + @pytest.mark.parametrize('doctype', [SPHINX_HTMLDIR, MKDOCS]) + def test_search_correct_link_for_index_page_subdirectory_htmldir_projects(self, api_client, doctype): + project = Project.objects.get(slug='docs') + project.versions.update(documentation_type=doctype) + version = project.versions.all().first() + search_params = { 'project': project.slug, 'version': version.slug, @@ -423,7 +436,6 @@ def test_search_correct_link_htmldir_projects(self, api_client, doctype): assert resp.status_code == 200 result = resp.data['results'][0] - assert result['project'] == project.slug assert result['link'].endswith('en/latest/guides/') From 2237fbfffbc0dbfe870c3300adab4ab03f80be42 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Thu, 14 May 2020 19:28:25 +0200 Subject: [PATCH 46/72] Proxy /api/v2/embed/ URL to be used on custom domains --- readthedocs/api/v2/proxied_urls.py | 6 ++++++ readthedocs/proxito/urls.py | 1 + 2 files changed, 7 insertions(+) diff --git a/readthedocs/api/v2/proxied_urls.py b/readthedocs/api/v2/proxied_urls.py index 4a01e23a581..5ec02bb73fd 100644 --- a/readthedocs/api/v2/proxied_urls.py +++ b/readthedocs/api/v2/proxied_urls.py @@ -5,6 +5,7 @@ so they can make use of features that require to have access to their cookies. """ +from django.conf import settings from django.conf.urls import include, url from .views.proxied import ProxiedFooterHTML @@ -16,3 +17,8 @@ ] urlpatterns = api_footer_urls + +if 'readthedocsext.embed' in settings.INSTALLED_APPS: + urlpatterns += [ + url(r'embed/', include('readthedocsext.embed.urls')) + ] diff --git a/readthedocs/proxito/urls.py b/readthedocs/proxito/urls.py index 413731268d5..e657edc0e7b 100644 --- a/readthedocs/proxito/urls.py +++ b/readthedocs/proxito/urls.py @@ -82,6 +82,7 @@ ), # Serve proxied API + # /_/api/v2/ url( r'^{DOC_PATH_PREFIX}api/v2/'.format( DOC_PATH_PREFIX=DOC_PATH_PREFIX, From e7fdce15918beb4736681233b9c6454acd137d9c Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Thu, 14 May 2020 19:28:50 +0200 Subject: [PATCH 47/72] Small logging string fix --- readthedocs/core/unresolver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/readthedocs/core/unresolver.py b/readthedocs/core/unresolver.py index e2b35502001..cc848586f68 100644 --- a/readthedocs/core/unresolver.py +++ b/readthedocs/core/unresolver.py @@ -56,8 +56,8 @@ def unresolve(self, url): ) log.info( - 'Unresolver parsed:' - 'url=%s, project=%s lang_slug=%s version_slug=%s filename=%s', + 'Unresolver parsed: ' + 'url=%s project=%s lang_slug=%s version_slug=%s filename=%s', url, final_project.slug, lang_slug, version_slug, filename ) return UnresolvedObject(final_project, lang_slug, version_slug, filename, parsed.fragment) From 20b5412f0063f5a47d450fa671416e4615bc118e Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Thu, 14 May 2020 12:43:06 -0500 Subject: [PATCH 48/72] Search: track total results Closes https://github.com/readthedocs/readthedocs.org/issues/7047 --- readthedocs/projects/views/private.py | 6 +++--- .../migrations/0002_add_total_results_field.py | 18 ++++++++++++++++++ readthedocs/search/models.py | 6 ++++++ readthedocs/search/tasks.py | 16 ++++++++-------- 4 files changed, 35 insertions(+), 11 deletions(-) create mode 100644 readthedocs/search/migrations/0002_add_total_results_field.py diff --git a/readthedocs/projects/views/private.py b/readthedocs/projects/views/private.py index 34e59e30c7f..54dfd3f40b1 100644 --- a/readthedocs/projects/views/private.py +++ b/readthedocs/projects/views/private.py @@ -1040,7 +1040,7 @@ def _search_analytics_csv_data(self): created__date__lte=now, ) .order_by('-created') - .values_list('created', 'query') + .values_list('created', 'query', 'total_results') ) file_name = '{project_slug}_from_{start}_to_{end}.csv'.format( @@ -1052,8 +1052,8 @@ def _search_analytics_csv_data(self): file_name = '-'.join([text for text in file_name.split() if text]) csv_data = ( - [timezone.datetime.strftime(time, '%Y-%m-%d %H:%M:%S'), query] - for time, query in data + [timezone.datetime.strftime(time, '%Y-%m-%d %H:%M:%S'), query, total_results] + for time, query, total_results in data ) pseudo_buffer = Echo() writer = csv.writer(pseudo_buffer) diff --git a/readthedocs/search/migrations/0002_add_total_results_field.py b/readthedocs/search/migrations/0002_add_total_results_field.py new file mode 100644 index 00000000000..12704622569 --- /dev/null +++ b/readthedocs/search/migrations/0002_add_total_results_field.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.12 on 2020-05-14 17:29 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('search', '0001_initial'), + ] + + operations = [ + migrations.AddField( + model_name='searchquery', + name='total_results', + field=models.IntegerField(default=0, null=True, verbose_name='Total results'), + ), + ] diff --git a/readthedocs/search/models.py b/readthedocs/search/models.py index 53fedecaf96..33efb613dbf 100644 --- a/readthedocs/search/models.py +++ b/readthedocs/search/models.py @@ -32,6 +32,12 @@ class SearchQuery(TimeStampedModel): _('Query'), max_length=4092, ) + total_results = models.IntegerField( + _('Total results'), + default=0, + # TODO: to avoid downtime, remove later. + null=True, + ) objects = RelatedProjectQuerySet.as_manager() class Meta: diff --git a/readthedocs/search/tasks.py b/readthedocs/search/tasks.py index 98687f33fff..c5432110c7e 100644 --- a/readthedocs/search/tasks.py +++ b/readthedocs/search/tasks.py @@ -144,7 +144,7 @@ def delete_old_search_queries_from_db(): @app.task(queue='web') def record_search_query(project_slug, version_slug, query, total_results, time_string): - """Record/update search query in database.""" + """Record/update a search query for analytics.""" if not project_slug or not version_slug or not query: log.debug( 'Not recording the search query. Passed arguments: ' @@ -162,11 +162,12 @@ def record_search_query(project_slug, version_slug, query, total_results, time_s modified__gte=before_10_sec, ).order_by('-modified') - # check if partial query exists, - # if yes, then just update the object. + # If a partial query exists, + # then just update that object. for partial_query in partial_query_qs.iterator(): if query.startswith(partial_query.query): partial_query.query = query + partial_query.total_results = total_results partial_query.save() return @@ -190,9 +191,9 @@ def record_search_query(project_slug, version_slug, query, total_results, time_s ) return - version_qs = Version.objects.filter(project=project, slug=version_slug) + version = Version.objects.filter(project=project, slug=version_slug).first() - if not version_qs.exists(): + if not version: log.debug( 'Not recording the search query because version does not exist. ' 'project_slug: %s, version_slug: %s' % ( @@ -201,11 +202,10 @@ def record_search_query(project_slug, version_slug, query, total_results, time_s ) return - version = version_qs.first() - - # make a new SearchQuery object. + # Create a new SearchQuery object. SearchQuery.objects.create( project=project, version=version, query=query, + total_results=total_results, ) From c8b9f09a9a09c9c9994515ef8ceb4bcd06b14ea9 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Thu, 14 May 2020 13:40:21 -0500 Subject: [PATCH 49/72] Show number of results in the UI --- readthedocs/projects/views/private.py | 2 +- readthedocs/templates/projects/projects_search_analytics.html | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/readthedocs/projects/views/private.py b/readthedocs/projects/views/private.py index 54dfd3f40b1..c27d0ec7cae 100644 --- a/readthedocs/projects/views/private.py +++ b/readthedocs/projects/views/private.py @@ -1013,7 +1013,7 @@ def get_context_data(self, **kwargs): qs.values('query') .annotate(count=Count('id')) .order_by('-count', 'query') - .values_list('query', 'count') + .values_list('query', 'count', 'total_results') ) # only show top 100 queries diff --git a/readthedocs/templates/projects/projects_search_analytics.html b/readthedocs/templates/projects/projects_search_analytics.html index 93fa412f4ad..335d0fb971e 100644 --- a/readthedocs/templates/projects/projects_search_analytics.html +++ b/readthedocs/templates/projects/projects_search_analytics.html @@ -16,9 +16,10 @@

    {% trans "Top queries" %}