From bb4e5aacc4f5c766ef2c8469e570e857d2325714 Mon Sep 17 00:00:00 2001 From: Safwan Rahman Date: Thu, 28 Jun 2018 03:04:48 +0600 Subject: [PATCH 1/9] [fix #4265] Port Document search API for Elasticsearch 6.x --- readthedocs/search/api.py | 17 ++++++++++++++ readthedocs/search/documents.py | 34 +++++++++++++++++++++++++++- readthedocs/search/faceted_search.py | 18 ++++----------- readthedocs/search/filters.py | 15 ++++++++++++ readthedocs/search/pagination.py | 7 ++++++ readthedocs/search/serializers.py | 8 +++++++ readthedocs/search/utils.py | 18 +++++++++++++++ readthedocs/search/views.py | 12 ++++------ readthedocs/urls.py | 3 ++- 9 files changed, 108 insertions(+), 24 deletions(-) create mode 100644 readthedocs/search/api.py create mode 100644 readthedocs/search/filters.py create mode 100644 readthedocs/search/pagination.py create mode 100644 readthedocs/search/serializers.py diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py new file mode 100644 index 00000000000..f0151859fed --- /dev/null +++ b/readthedocs/search/api.py @@ -0,0 +1,17 @@ +from rest_framework import generics + +from readthedocs.search.documents import PageDocument +from readthedocs.search.filters import SearchFilterBackend +from readthedocs.search.pagination import SearchPagination +from readthedocs.search.serializers import PageSearchSerializer + + +class PageSearchAPIView(generics.ListAPIView): + pagination_class = SearchPagination + filter_backends = [SearchFilterBackend] + serializer_class = PageSearchSerializer + + def get_queryset(self): + query = self.request.query_params.get('query') + queryset = PageDocument.search(query=query) + return queryset diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py index 2b8259e8ce2..e468c162d3c 100644 --- a/readthedocs/search/documents.py +++ b/readthedocs/search/documents.py @@ -1,5 +1,6 @@ from django.conf import settings from django_elasticsearch_dsl import DocType, Index, fields +from elasticsearch_dsl.query import SimpleQueryString, Bool from readthedocs.projects.models import Project, HTMLFile from .conf import SEARCH_EXCLUDED_FILE @@ -60,14 +61,19 @@ class Meta(object): content = fields.TextField(attr='processed_json.content') path = fields.TextField(attr='processed_json.path') + # Fields to perform search with weight + search_fields = ['title^10', 'headers^5', 'content'] + @classmethod def faceted_search(cls, query, projects_list=None, versions_list=None, using=None, index=None): + es_query = cls.get_es_query(query=query) kwargs = { 'using': using or cls._doc_type.using, 'index': index or cls._doc_type.index, 'doc_types': [cls], 'model': cls._doc_type.model, - 'query': query + 'query': es_query, + 'fields': cls.search_fields } filters = {} @@ -80,6 +86,32 @@ def faceted_search(cls, query, projects_list=None, versions_list=None, using=Non return FileSearch(**kwargs) + @classmethod + def search(cls, using=None, index=None, **kwargs): + es_search = super(PageDocument, cls).search(using=using, index=index) + query = kwargs.pop('query') + es_query = cls.get_es_query(query=query) + + es_search = es_search.query(es_query) + return es_search + + @classmethod + def get_es_query(cls, query): + """Return the Elasticsearch query generated from the query string""" + all_queries = [] + + # Need to search for both 'AND' and 'OR' operations + # The score of AND should be higher as it satisfies both OR and AND + for operator in ['AND', 'OR']: + query_string = SimpleQueryString(query=query, fields=cls.search_fields, + default_operator=operator) + all_queries.append(query_string) + + # Run bool query with should, so it returns result where either of the query matches + bool_query = Bool(should=all_queries) + + return bool_query + def get_queryset(self): """Overwrite default queryset to filter certain files to index""" queryset = super(PageDocument, self).get_queryset() diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py index 77d9f13cc76..b483c8c5ea9 100644 --- a/readthedocs/search/faceted_search.py +++ b/readthedocs/search/faceted_search.py @@ -9,11 +9,13 @@ class RTDFacetedSearch(FacetedSearch): # TODO: Remove the overwrite when the elastic/elasticsearch-dsl-py#916 # See more: https://github.com/elastic/elasticsearch-dsl-py/issues/916 - def __init__(self, using, index, doc_types, model, **kwargs): + def __init__(self, using, index, doc_types, model, fields=None, **kwargs): self.using = using self.index = index self.doc_types = doc_types self._model = model + if fields: + self.fields = fields super(RTDFacetedSearch, self).__init__(**kwargs) @@ -25,7 +27,6 @@ class ProjectSearch(RTDFacetedSearch): class FileSearch(RTDFacetedSearch): - fields = ['title^10', 'headers^5', 'content'] facets = { 'project': TermsFacet(field='project'), 'version': TermsFacet(field='version') @@ -34,17 +35,6 @@ class FileSearch(RTDFacetedSearch): def query(self, search, query): """Add query part to ``search``""" if query: - all_queries = [] - - # Need to search for both 'AND' and 'OR' operations - # The score of AND should be higher as it comes first - for operator in ['AND', 'OR']: - query_string = SimpleQueryString(query=query, fields=self.fields, - default_operator=operator) - all_queries.append(query_string) - - # Run bool query with should, so it returns result where either of the query matches - bool_query = Bool(should=all_queries) - search = search.query(bool_query) + search = search.query(query) return search diff --git a/readthedocs/search/filters.py b/readthedocs/search/filters.py new file mode 100644 index 00000000000..4efdcf4a269 --- /dev/null +++ b/readthedocs/search/filters.py @@ -0,0 +1,15 @@ +from rest_framework import filters + +from readthedocs.search.utils import get_project_slug_list_or_404 + + +class SearchFilterBackend(filters.BaseFilterBackend): + """ + Filter search result with project + """ + + def filter_queryset(self, request, queryset, view): + project_slug = request.query_params.get('project') + project_slug_list = get_project_slug_list_or_404(project_slug=project_slug, + user=request.user) + return queryset.filter('terms', project=project_slug_list) diff --git a/readthedocs/search/pagination.py b/readthedocs/search/pagination.py new file mode 100644 index 00000000000..6abfba31b98 --- /dev/null +++ b/readthedocs/search/pagination.py @@ -0,0 +1,7 @@ +from rest_framework.pagination import PageNumberPagination + + +class SearchPagination(PageNumberPagination): + page_size = 10 + page_size_query_param = 'page_size' + max_page_size = 100 diff --git a/readthedocs/search/serializers.py b/readthedocs/search/serializers.py new file mode 100644 index 00000000000..99dbf9e7685 --- /dev/null +++ b/readthedocs/search/serializers.py @@ -0,0 +1,8 @@ +from rest_framework import serializers + + +class PageSearchSerializer(serializers.Serializer): + title = serializers.CharField() + headers = serializers.ListField() + content = serializers.CharField() + path = serializers.CharField() diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py index 081104c201d..5d1f3403813 100644 --- a/readthedocs/search/utils.py +++ b/readthedocs/search/utils.py @@ -11,8 +11,10 @@ import json from builtins import next, range +from django.shortcuts import get_object_or_404 from pyquery import PyQuery +from readthedocs.projects.models import Project log = logging.getLogger(__name__) @@ -306,3 +308,19 @@ def parse_sections(documentation_type, content): return '' return sections + + +# TODO: Rewrite all the views using this in Class Based View, +# and move this function to a mixin +def get_project_slug_list_or_404(project_slug, user): + """Return list of subproject's slug including own slug. + If the project is not available to user, redirect to 404 + """ + queryset = Project.objects.api(user).only('slug') + project = get_object_or_404(queryset, slug=project_slug) + + subprojects_slug = (queryset.filter(superprojects__parent_id=project.id) + .values_list('slug', flat=True)) + + slug_list = [project.slug] + list(subprojects_slug) + return slug_list diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index 385b7d371ca..e7989023109 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -14,6 +14,7 @@ from readthedocs.projects.models import Project from readthedocs.search import lib as search_lib from readthedocs.search.documents import ProjectDocument, PageDocument +from readthedocs.search.utils import get_project_slug_list_or_404 log = logging.getLogger(__name__) LOG_TEMPLATE = u'(Elastic Search) [{user}:{type}] [{project}:{version}:{language}] {msg}' @@ -54,14 +55,9 @@ def elastic_search(request): elif user_input.type == 'file': kwargs = {} if user_input.project: - queryset = Project.objects.api(request.user).only('slug') - project = get_object_or_404(queryset, slug=user_input.project) - - subprojects_slug = (queryset.filter(superprojects__parent_id=project.id) - .values_list('slug', flat=True)) - - projects_list = [project.slug] + list(subprojects_slug) - kwargs['projects_list'] = projects_list + project_slug_list = get_project_slug_list_or_404(project_slug=user_input.project, + user=request.user) + kwargs['projects_list'] = project_slug_list if user_input.version: kwargs['versions_list'] = user_input.version diff --git a/readthedocs/urls.py b/readthedocs/urls.py index 19914cd72e7..7503a0a435f 100644 --- a/readthedocs/urls.py +++ b/readthedocs/urls.py @@ -22,7 +22,7 @@ do_not_track, ) from readthedocs.search import views as search_views - +from readthedocs.search.api import PageSearchAPIView v1_api = Api(api_name='v1') v1_api.register(UserResource()) @@ -67,6 +67,7 @@ url(r'^api/', include(v1_api.urls)), url(r'^api/v2/', include('readthedocs.restapi.urls')), url(r'^api-auth/', include('rest_framework.urls', namespace='rest_framework')), + url(r'^api/search/', PageSearchAPIView.as_view()), ] i18n_urls = [ From 4dc3e353f358ec61d3347a5fbc1304ef72dd1d8b Mon Sep 17 00:00:00 2001 From: Safwan Rahman Date: Fri, 29 Jun 2018 02:24:34 +0600 Subject: [PATCH 2/9] fixup and adding test --- conftest.py | 6 + readthedocs/restapi/urls.py | 17 ++- readthedocs/search/api.py | 8 +- readthedocs/search/documents.py | 6 +- readthedocs/search/filters.py | 15 ++- readthedocs/search/serializers.py | 8 +- readthedocs/search/tests/test_api.py | 164 +++++++++++++++++++++++++++ readthedocs/search/utils.py | 3 +- readthedocs/urls.py | 2 - 9 files changed, 205 insertions(+), 24 deletions(-) create mode 100644 readthedocs/search/tests/test_api.py diff --git a/conftest.py b/conftest.py index 73474ea6726..e19351615de 100644 --- a/conftest.py +++ b/conftest.py @@ -1,6 +1,7 @@ import logging import pytest +from rest_framework.test import APIClient def pytest_addoption(parser): @@ -17,3 +18,8 @@ def pytest_configure(config): @pytest.fixture(autouse=True) def settings_modification(settings): settings.CELERY_ALWAYS_EAGER = True + + +@pytest.fixture +def api_client(): + return APIClient() diff --git a/readthedocs/restapi/urls.py b/readthedocs/restapi/urls.py index 51fc72e17c6..594d81882b2 100644 --- a/readthedocs/restapi/urls.py +++ b/readthedocs/restapi/urls.py @@ -11,6 +11,7 @@ from readthedocs.restapi.views import ( core_views, footer_views, search_views, task_views, integrations ) +from readthedocs.search.api import PageSearchAPIView from .views.model_views import (BuildViewSet, BuildCommandViewSet, ProjectViewSet, NotificationViewSet, @@ -85,19 +86,15 @@ name='api_webhook'), ] +api_search_urls = [ + url(r'^docsearch/$', PageSearchAPIView.as_view(), name='doc_search'), +] + urlpatterns += function_urls -urlpatterns += search_urls urlpatterns += task_urls +urlpatterns += search_urls urlpatterns += integration_urls - -try: - from readthedocsext.search.docsearch import DocSearch - api_search_urls = [ - url(r'^docsearch/$', DocSearch.as_view(), name='doc_search'), - ] - urlpatterns += api_search_urls -except ImportError: - pass +urlpatterns += api_search_urls try: from readthedocsext.donate.restapi.urls import urlpatterns as sustainability_urls diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index f0151859fed..a9675c28fd8 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -12,6 +12,12 @@ class PageSearchAPIView(generics.ListAPIView): serializer_class = PageSearchSerializer def get_queryset(self): - query = self.request.query_params.get('query') + """Return Elasticsearch DSL Search object instead of Django Queryset. + + Django Queryset and elasticsearch-dsl ``Search`` object is similar pattern. + So for searching, its possible to return ``Search`` object instead of queryset. + The ``filter_backends`` and ``pagination_class`` is compatible with ``Search`` + """ + query = self.request.query_params.get('query', '') queryset = PageDocument.search(query=query) return queryset diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py index e468c162d3c..f2cf9deda16 100644 --- a/readthedocs/search/documents.py +++ b/readthedocs/search/documents.py @@ -87,12 +87,12 @@ def faceted_search(cls, query, projects_list=None, versions_list=None, using=Non return FileSearch(**kwargs) @classmethod - def search(cls, using=None, index=None, **kwargs): - es_search = super(PageDocument, cls).search(using=using, index=index) + def search(cls, *args, **kwargs): query = kwargs.pop('query') + es_search = super(PageDocument, cls).search(*args, **kwargs) es_query = cls.get_es_query(query=query) - es_search = es_search.query(es_query) + es_search = es_search.query(es_query).highlight('content') return es_search @classmethod diff --git a/readthedocs/search/filters.py b/readthedocs/search/filters.py index 4efdcf4a269..16a498a7505 100644 --- a/readthedocs/search/filters.py +++ b/readthedocs/search/filters.py @@ -4,12 +4,17 @@ class SearchFilterBackend(filters.BaseFilterBackend): - """ - Filter search result with project - """ - def filter_queryset(self, request, queryset, view): + """Filter search result with project""" + + def filter_queryset(self, request, es_search, view): + """Overwrite the method to compatible with Elasticsearch DSL Search object.""" project_slug = request.query_params.get('project') + version_slug = request.query_params.get('version') project_slug_list = get_project_slug_list_or_404(project_slug=project_slug, user=request.user) - return queryset.filter('terms', project=project_slug_list) + # Elasticsearch ``terms`` query can take multiple values as list, + # while ``term`` query takes single value. + filtered_es_search = (es_search.filter('terms', project=project_slug_list) + .filter('term', version=version_slug)) + return filtered_es_search diff --git a/readthedocs/search/serializers.py b/readthedocs/search/serializers.py index 99dbf9e7685..4327c07d0ae 100644 --- a/readthedocs/search/serializers.py +++ b/readthedocs/search/serializers.py @@ -3,6 +3,10 @@ class PageSearchSerializer(serializers.Serializer): title = serializers.CharField() - headers = serializers.ListField() - content = serializers.CharField() path = serializers.CharField() + highlight = serializers.SerializerMethodField() + + def get_highlight(self, obj): + highlight = getattr(obj.meta, 'highlight', None) + if highlight: + return highlight.to_dict() diff --git a/readthedocs/search/tests/test_api.py b/readthedocs/search/tests/test_api.py new file mode 100644 index 00000000000..4133fb5aa85 --- /dev/null +++ b/readthedocs/search/tests/test_api.py @@ -0,0 +1,164 @@ +import pytest +from django.core.urlresolvers import reverse + +from readthedocs.search.tests.utils import get_search_query_from_project_file + + +@pytest.mark.django_db +@pytest.mark.search +class TestPageSearch(object): + url = reverse('doc_search') + + @pytest.mark.parametrize('data_type', ['content', 'headers', 'title']) + @pytest.mark.parametrize('page_num', [0, 1]) + def test_search_works(self, api_client, project, data_type, page_num): + query = get_search_query_from_project_file(project_slug=project.slug, page_num=page_num, + data_type=data_type) + + version = project.versions.all()[0] + url = reverse('doc_search') + resp = api_client.get(url, {'project': project.slug, 'version': version.slug, 'query': query}) + data = resp.data + assert len(data['results']) == 1 + + # @pytest.mark.parametrize('case', ['upper', 'lower', 'title']) + # def test_file_search_case_insensitive(self, client, project, case): + # """Check File search is case insensitive + # + # It tests with uppercase, lowercase and camelcase + # """ + # query_text = get_search_query_from_project_file(project_slug=project.slug) + # + # cased_query = getattr(query_text, case) + # query = cased_query() + # + # result, _ = self._get_search_result(url=self.url, client=client, + # search_params={'q': query, 'type': 'file'}) + # + # assert len(result) == 1 + # # Check the actual text is in the result, not the cased one + # assert query_text in result.text() + # + # def test_file_search_exact_match(self, client, project): + # """Check quoted query match exact phrase + # + # Making a query with quoted text like ``"foo bar"`` should match + # exactly ``foo bar`` phrase. + # """ + # + # # `Github` word is present both in `kuma` and `pipeline` files + # # But the phrase Github can is available only in kuma docs. + # # So search with this phrase to check + # query = r'"GitHub can"' + # + # result, _ = self._get_search_result(url=self.url, client=client, + # search_params={'q': query, 'type': 'file'}) + # + # assert len(result) == 1 + # + # def test_page_search_not_return_removed_page(self, client, project): + # """Check removed page are not in the search index""" + # query = get_search_query_from_project_file(project_slug=project.slug) + # # Make a query to check it returns result + # result, _ = self._get_search_result(url=self.url, client=client, + # search_params={'q': query, 'type': 'file'}) + # assert len(result) == 1 + # + # # Delete all the HTML files of the project + # HTMLFile.objects.filter(project=project).delete() + # # Run the query again and this time there should not be any result + # result, _ = self._get_search_result(url=self.url, client=client, + # search_params={'q': query, 'type': 'file'}) + # assert len(result) == 0 + # + # def test_file_search_show_projects(self, client, all_projects): + # """Test that search result page shows list of projects while searching for files""" + # + # # `Github` word is present both in `kuma` and `pipeline` files + # # so search with this phrase + # result, page = self._get_search_result(url=self.url, client=client, + # search_params={'q': 'GitHub', 'type': 'file'}) + # + # # There should be 2 search result + # assert len(result) == 2 + # + # # there should be 2 projects in the left side column + # content = page.find('.navigable .project-list') + # assert len(content) == 2 + # text = content.text() + # + # # kuma and pipeline should be there + # assert 'kuma' and 'pipeline' in text + # + # def test_file_search_filter_by_project(self, client): + # """Test that search result are filtered according to project""" + # + # # `Github` word is present both in `kuma` and `pipeline` files + # # so search with this phrase but filter through `kuma` project + # search_params = {'q': 'GitHub', 'type': 'file', 'project': 'kuma'} + # result, page = self._get_search_result(url=self.url, client=client, + # search_params=search_params) + # + # # There should be 1 search result as we have filtered + # assert len(result) == 1 + # content = page.find('.navigable .project-list') + # + # # kuma should should be there only + # assert 'kuma' in result.text() + # assert 'pipeline' not in result.text() + # + # # But there should be 2 projects in the left side column + # # as the query is present in both projects + # content = page.find('.navigable .project-list') + # if len(content) != 2: + # pytest.xfail("failing because currently all projects are not showing in project list") + # else: + # assert 'kuma' and 'pipeline' in content.text() + # + # @pytest.mark.xfail(reason="Versions are not showing correctly! Fixme while rewrite!") + # def test_file_search_show_versions(self, client, all_projects, es_index, settings): + # # override the settings to index all versions + # settings.INDEX_ONLY_LATEST = False + # + # project = all_projects[0] + # # Create some versions of the project + # versions = [G(Version, project=project) for _ in range(3)] + # + # query = get_search_query_from_project_file(project_slug=project.slug) + # + # result, page = self._get_search_result(url=self.url, client=client, + # search_params={'q': query, 'type': 'file'}) + # + # # There should be only one result because by default + # # only latest version result should be there + # assert len(result) == 1 + # + # content = page.find('.navigable .version-list') + # # There should be total 4 versions + # # one is latest, and other 3 that we created above + # assert len(content) == 4 + # + # project_versions = [v.slug for v in versions] + [LATEST] + # content_versions = [] + # for element in content: + # text = element.text_content() + # # strip and split to keep the version slug only + # slug = text.strip().split('\n')[0] + # content_versions.append(slug) + # + # assert sorted(project_versions) == sorted(content_versions) + # + # def test_file_search_subprojects(self, client, all_projects, es_index): + # """File search should return results from subprojects also""" + # project = all_projects[0] + # subproject = all_projects[1] + # # Add another project as subproject of the project + # project.add_subproject(subproject) + # + # # Now search with subproject content but explicitly filter by the parent project + # query = get_search_query_from_project_file(project_slug=subproject.slug) + # search_params = {'q': query, 'type': 'file', 'project': project.slug} + # result, page = self._get_search_result(url=self.url, client=client, + # search_params=search_params) + # + # assert len(result) == 1 \ No newline at end of file diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py index 5d1f3403813..0e769d872dc 100644 --- a/readthedocs/search/utils.py +++ b/readthedocs/search/utils.py @@ -313,7 +313,8 @@ def parse_sections(documentation_type, content): # TODO: Rewrite all the views using this in Class Based View, # and move this function to a mixin def get_project_slug_list_or_404(project_slug, user): - """Return list of subproject's slug including own slug. + """ + Return list of subproject's slug including own slug. If the project is not available to user, redirect to 404 """ queryset = Project.objects.api(user).only('slug') diff --git a/readthedocs/urls.py b/readthedocs/urls.py index 7503a0a435f..d6dbc593fdf 100644 --- a/readthedocs/urls.py +++ b/readthedocs/urls.py @@ -22,7 +22,6 @@ do_not_track, ) from readthedocs.search import views as search_views -from readthedocs.search.api import PageSearchAPIView v1_api = Api(api_name='v1') v1_api.register(UserResource()) @@ -67,7 +66,6 @@ url(r'^api/', include(v1_api.urls)), url(r'^api/v2/', include('readthedocs.restapi.urls')), url(r'^api-auth/', include('rest_framework.urls', namespace='rest_framework')), - url(r'^api/search/', PageSearchAPIView.as_view()), ] i18n_urls = [ From fe2aef1adaa296b0eb802e5e5f582d006173cb03 Mon Sep 17 00:00:00 2001 From: Safwan Rahman Date: Fri, 29 Jun 2018 02:32:12 +0600 Subject: [PATCH 3/9] more fixup --- readthedocs/search/api.py | 5 +++-- readthedocs/search/documents.py | 5 ++--- readthedocs/search/filters.py | 5 ++++- readthedocs/search/utils.py | 1 + 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index a9675c28fd8..2b2855618c0 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -12,12 +12,13 @@ class PageSearchAPIView(generics.ListAPIView): serializer_class = PageSearchSerializer def get_queryset(self): - """Return Elasticsearch DSL Search object instead of Django Queryset. + """ + Return Elasticsearch DSL Search object instead of Django Queryset. Django Queryset and elasticsearch-dsl ``Search`` object is similar pattern. So for searching, its possible to return ``Search`` object instead of queryset. The ``filter_backends`` and ``pagination_class`` is compatible with ``Search`` """ query = self.request.query_params.get('query', '') - queryset = PageDocument.search(query=query) + queryset = PageDocument.simple_search(query=query) return queryset diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py index f2cf9deda16..575f4f83322 100644 --- a/readthedocs/search/documents.py +++ b/readthedocs/search/documents.py @@ -87,9 +87,8 @@ def faceted_search(cls, query, projects_list=None, versions_list=None, using=Non return FileSearch(**kwargs) @classmethod - def search(cls, *args, **kwargs): - query = kwargs.pop('query') - es_search = super(PageDocument, cls).search(*args, **kwargs) + def simple_search(cls, query, using=None, index=None): + es_search = cls.search(using=using, index=index) es_query = cls.get_es_query(query=query) es_search = es_search.query(es_query).highlight('content') diff --git a/readthedocs/search/filters.py b/readthedocs/search/filters.py index 16a498a7505..8b7dc9c1b20 100644 --- a/readthedocs/search/filters.py +++ b/readthedocs/search/filters.py @@ -7,8 +7,11 @@ class SearchFilterBackend(filters.BaseFilterBackend): """Filter search result with project""" - def filter_queryset(self, request, es_search, view): + def filter_queryset(self, request, queryset, view): """Overwrite the method to compatible with Elasticsearch DSL Search object.""" + # ``queryset`` is actually a Elasticsearch DSL ``Search`` object. + # So change the variable name + es_search = queryset project_slug = request.query_params.get('project') version_slug = request.query_params.get('version') project_slug_list = get_project_slug_list_or_404(project_slug=project_slug, diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py index 0e769d872dc..09ca5c0f1f6 100644 --- a/readthedocs/search/utils.py +++ b/readthedocs/search/utils.py @@ -315,6 +315,7 @@ def parse_sections(documentation_type, content): def get_project_slug_list_or_404(project_slug, user): """ Return list of subproject's slug including own slug. + If the project is not available to user, redirect to 404 """ queryset = Project.objects.api(user).only('slug') From 41a67a33b9b494038f6d7133ef39336e5bc89083 Mon Sep 17 00:00:00 2001 From: Safwan Rahman Date: Sat, 30 Jun 2018 03:24:28 +0600 Subject: [PATCH 4/9] adding more tests --- readthedocs/restapi/urls.py | 7 +- readthedocs/search/api.py | 16 ++ readthedocs/search/documents.py | 3 +- readthedocs/search/serializers.py | 2 + readthedocs/search/tests/test_api.py | 218 +++++++++------------------ 5 files changed, 93 insertions(+), 153 deletions(-) diff --git a/readthedocs/restapi/urls.py b/readthedocs/restapi/urls.py index 594d81882b2..39f9dcd7a58 100644 --- a/readthedocs/restapi/urls.py +++ b/readthedocs/restapi/urls.py @@ -49,13 +49,14 @@ url(r'index_search/', search_views.index_search, name='index_search'), - url(r'search/$', views.search_views.search, name='api_search'), + # url(r'search/$', views.search_views.search, name='api_search'), url(r'search/project/$', search_views.project_search, name='api_project_search'), url(r'search/section/$', search_views.section_search, name='api_section_search'), + url(r'^docsearch/$', PageSearchAPIView.as_view(), name='doc_search'), ] task_urls = [ @@ -86,15 +87,11 @@ name='api_webhook'), ] -api_search_urls = [ - url(r'^docsearch/$', PageSearchAPIView.as_view(), name='doc_search'), -] urlpatterns += function_urls urlpatterns += task_urls urlpatterns += search_urls urlpatterns += integration_urls -urlpatterns += api_search_urls try: from readthedocsext.donate.restapi.urls import urlpatterns as sustainability_urls diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index 2b2855618c0..8e72547d195 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -1,4 +1,6 @@ from rest_framework import generics +from rest_framework import exceptions +from rest_framework.exceptions import ValidationError from readthedocs.search.documents import PageDocument from readthedocs.search.filters import SearchFilterBackend @@ -19,6 +21,20 @@ def get_queryset(self): So for searching, its possible to return ``Search`` object instead of queryset. The ``filter_backends`` and ``pagination_class`` is compatible with ``Search`` """ + # Validate all the required params are there + self.validate_query_params() query = self.request.query_params.get('query', '') queryset = PageDocument.simple_search(query=query) return queryset + + def validate_query_params(self): + required_query_params = set(['query', 'project', 'version']) + request_params = set(self.request.query_params.keys()) + missing_params = required_query_params - request_params + if missing_params: + errors = [] + for param in missing_params: + e = {param: "This query param is required"} + errors.append(e) + + raise ValidationError(errors) diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py index 575f4f83322..3cc70343c35 100644 --- a/readthedocs/search/documents.py +++ b/readthedocs/search/documents.py @@ -90,8 +90,9 @@ def faceted_search(cls, query, projects_list=None, versions_list=None, using=Non def simple_search(cls, query, using=None, index=None): es_search = cls.search(using=using, index=index) es_query = cls.get_es_query(query=query) + highlighted_fields = [f.split('^', 1)[0] for f in cls.search_fields] - es_search = es_search.query(es_query).highlight('content') + es_search = es_search.query(es_query).highlight(*highlighted_fields) return es_search @classmethod diff --git a/readthedocs/search/serializers.py b/readthedocs/search/serializers.py index 4327c07d0ae..dd3ad346eef 100644 --- a/readthedocs/search/serializers.py +++ b/readthedocs/search/serializers.py @@ -2,6 +2,8 @@ class PageSearchSerializer(serializers.Serializer): + project = serializers.CharField() + version = serializers.CharField() title = serializers.CharField() path = serializers.CharField() highlight = serializers.SerializerMethodField() diff --git a/readthedocs/search/tests/test_api.py b/readthedocs/search/tests/test_api.py index 4133fb5aa85..f02ab7b9e78 100644 --- a/readthedocs/search/tests/test_api.py +++ b/readthedocs/search/tests/test_api.py @@ -1,12 +1,15 @@ import pytest from django.core.urlresolvers import reverse +from django_dynamic_fixture import G +from readthedocs.builds.models import Version +from readthedocs.projects.models import HTMLFile from readthedocs.search.tests.utils import get_search_query_from_project_file @pytest.mark.django_db @pytest.mark.search -class TestPageSearch(object): +class TestDocumentSearch(object): url = reverse('doc_search') @pytest.mark.parametrize('data_type', ['content', 'headers', 'title']) @@ -16,149 +19,70 @@ def test_search_works(self, api_client, project, data_type, page_num): data_type=data_type) version = project.versions.all()[0] - url = reverse('doc_search') - resp = api_client.get(url, {'project': project.slug, 'version': version.slug, 'query': query}) - data = resp.data - assert len(data['results']) == 1 - - # @pytest.mark.parametrize('case', ['upper', 'lower', 'title']) - # def test_file_search_case_insensitive(self, client, project, case): - # """Check File search is case insensitive - # - # It tests with uppercase, lowercase and camelcase - # """ - # query_text = get_search_query_from_project_file(project_slug=project.slug) - # - # cased_query = getattr(query_text, case) - # query = cased_query() - # - # result, _ = self._get_search_result(url=self.url, client=client, - # search_params={'q': query, 'type': 'file'}) - # - # assert len(result) == 1 - # # Check the actual text is in the result, not the cased one - # assert query_text in result.text() - # - # def test_file_search_exact_match(self, client, project): - # """Check quoted query match exact phrase - # - # Making a query with quoted text like ``"foo bar"`` should match - # exactly ``foo bar`` phrase. - # """ - # - # # `Github` word is present both in `kuma` and `pipeline` files - # # But the phrase Github can is available only in kuma docs. - # # So search with this phrase to check - # query = r'"GitHub can"' - # - # result, _ = self._get_search_result(url=self.url, client=client, - # search_params={'q': query, 'type': 'file'}) - # - # assert len(result) == 1 - # - # def test_page_search_not_return_removed_page(self, client, project): - # """Check removed page are not in the search index""" - # query = get_search_query_from_project_file(project_slug=project.slug) - # # Make a query to check it returns result - # result, _ = self._get_search_result(url=self.url, client=client, - # search_params={'q': query, 'type': 'file'}) - # assert len(result) == 1 - # - # # Delete all the HTML files of the project - # HTMLFile.objects.filter(project=project).delete() - # # Run the query again and this time there should not be any result - # result, _ = self._get_search_result(url=self.url, client=client, - # search_params={'q': query, 'type': 'file'}) - # assert len(result) == 0 - # - # def test_file_search_show_projects(self, client, all_projects): - # """Test that search result page shows list of projects while searching for files""" - # - # # `Github` word is present both in `kuma` and `pipeline` files - # # so search with this phrase - # result, page = self._get_search_result(url=self.url, client=client, - # search_params={'q': 'GitHub', 'type': 'file'}) - # - # # There should be 2 search result - # assert len(result) == 2 - # - # # there should be 2 projects in the left side column - # content = page.find('.navigable .project-list') - # assert len(content) == 2 - # text = content.text() - # - # # kuma and pipeline should be there - # assert 'kuma' and 'pipeline' in text - # - # def test_file_search_filter_by_project(self, client): - # """Test that search result are filtered according to project""" - # - # # `Github` word is present both in `kuma` and `pipeline` files - # # so search with this phrase but filter through `kuma` project - # search_params = {'q': 'GitHub', 'type': 'file', 'project': 'kuma'} - # result, page = self._get_search_result(url=self.url, client=client, - # search_params=search_params) - # - # # There should be 1 search result as we have filtered - # assert len(result) == 1 - # content = page.find('.navigable .project-list') - # - # # kuma should should be there only - # assert 'kuma' in result.text() - # assert 'pipeline' not in result.text() - # - # # But there should be 2 projects in the left side column - # # as the query is present in both projects - # content = page.find('.navigable .project-list') - # if len(content) != 2: - # pytest.xfail("failing because currently all projects are not showing in project list") - # else: - # assert 'kuma' and 'pipeline' in content.text() - # - # @pytest.mark.xfail(reason="Versions are not showing correctly! Fixme while rewrite!") - # def test_file_search_show_versions(self, client, all_projects, es_index, settings): - # # override the settings to index all versions - # settings.INDEX_ONLY_LATEST = False - # - # project = all_projects[0] - # # Create some versions of the project - # versions = [G(Version, project=project) for _ in range(3)] - # - # query = get_search_query_from_project_file(project_slug=project.slug) - # - # result, page = self._get_search_result(url=self.url, client=client, - # search_params={'q': query, 'type': 'file'}) - # - # # There should be only one result because by default - # # only latest version result should be there - # assert len(result) == 1 - # - # content = page.find('.navigable .version-list') - # # There should be total 4 versions - # # one is latest, and other 3 that we created above - # assert len(content) == 4 - # - # project_versions = [v.slug for v in versions] + [LATEST] - # content_versions = [] - # for element in content: - # text = element.text_content() - # # strip and split to keep the version slug only - # slug = text.strip().split('\n')[0] - # content_versions.append(slug) - # - # assert sorted(project_versions) == sorted(content_versions) - # - # def test_file_search_subprojects(self, client, all_projects, es_index): - # """File search should return results from subprojects also""" - # project = all_projects[0] - # subproject = all_projects[1] - # # Add another project as subproject of the project - # project.add_subproject(subproject) - # - # # Now search with subproject content but explicitly filter by the parent project - # query = get_search_query_from_project_file(project_slug=subproject.slug) - # search_params = {'q': query, 'type': 'file', 'project': project.slug} - # result, page = self._get_search_result(url=self.url, client=client, - # search_params=search_params) - # - # assert len(result) == 1 \ No newline at end of file + search_params = {'project': project.slug, 'version': version.slug, 'query': query} + resp = api_client.get(self.url, search_params) + assert resp.status_code == 200 + + data = resp.data['results'] + assert len(data) == 1 + project_data = data[0] + assert project_data['project'] == project.slug + + # Check highlight return correct object + all_highlights = project_data['highlight'][data_type] + for highlight in all_highlights: + # Make it lower because our search is case insensitive + assert query.lower() in highlight.lower() + + def test_doc_search_filter_by_project(self, api_client): + """Test Doc search result are filtered according to project""" + + # `Github` word is present both in `kuma` and `pipeline` files + # so search with this phrase but filter through `kuma` project + search_params = {'query': 'GitHub', 'project': 'kuma', 'version': 'latest'} + resp = api_client.get(self.url, search_params) + assert resp.status_code == 200 + + data = resp.data['results'] + assert len(data) == 1 + assert data[0]['project'] == 'kuma' + + def test_doc_search_filter_by_version(self, api_client, project): + """Test Doc search result are filtered according to version""" + query = get_search_query_from_project_file(project_slug=project.slug) + latest_version = project.versions.all()[0] + # Create another version + dummy_version = G(Version, project=project) + # Create HTMLFile same as the latest version + latest_version_files = HTMLFile.objects.all().filter(version=latest_version) + for f in latest_version_files: + f.version = dummy_version + # Make primary key to None, so django will create new object + f.pk = None + f.save() + + search_params = {'query': query, 'project': project.slug, 'version': dummy_version.slug} + resp = api_client.get(self.url, search_params) + assert resp.status_code == 200 + + data = resp.data['results'] + assert len(data) == 1 + assert data[0]['project'] == project.slug + + def test_doc_search_subprojects(self, api_client, all_projects): + """Test Document search return results from subprojects also""" + project = all_projects[0] + subproject = all_projects[1] + version = project.versions.all()[0] + # Add another project as subproject of the project + project.add_subproject(subproject) + + # Now search with subproject content but explicitly filter by the parent project + query = get_search_query_from_project_file(project_slug=subproject.slug) + search_params = {'query': query, 'project': project.slug, 'version': version.slug} + resp = api_client.get(self.url, search_params) + assert resp.status_code == 200 + + data = resp.data['results'] + assert len(data) == 1 + assert data[0]['project'] == subproject.slug From 85b4686865c03b926c70c2e3618f6cf0c27f5101 Mon Sep 17 00:00:00 2001 From: Safwan Rahman Date: Sun, 1 Jul 2018 01:59:35 +0600 Subject: [PATCH 5/9] adding more tests and fixup --- readthedocs/restapi/urls.py | 2 -- readthedocs/search/api.py | 7 +++-- readthedocs/search/faceted_search.py | 5 +++- readthedocs/search/tests/test_api.py | 38 ++++++++++++++++++++++++++++ readthedocs/urls.py | 3 +++ 5 files changed, 48 insertions(+), 7 deletions(-) diff --git a/readthedocs/restapi/urls.py b/readthedocs/restapi/urls.py index 39f9dcd7a58..0a973a39d64 100644 --- a/readthedocs/restapi/urls.py +++ b/readthedocs/restapi/urls.py @@ -11,7 +11,6 @@ from readthedocs.restapi.views import ( core_views, footer_views, search_views, task_views, integrations ) -from readthedocs.search.api import PageSearchAPIView from .views.model_views import (BuildViewSet, BuildCommandViewSet, ProjectViewSet, NotificationViewSet, @@ -56,7 +55,6 @@ url(r'search/section/$', search_views.section_search, name='api_section_search'), - url(r'^docsearch/$', PageSearchAPIView.as_view(), name='doc_search'), ] task_urls = [ diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index 8e72547d195..82f735b7f76 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -28,13 +28,12 @@ def get_queryset(self): return queryset def validate_query_params(self): - required_query_params = set(['query', 'project', 'version']) + required_query_params = {'query', 'project', 'version'} # python `set` literal is `{}` request_params = set(self.request.query_params.keys()) missing_params = required_query_params - request_params if missing_params: - errors = [] + errors = {} for param in missing_params: - e = {param: "This query param is required"} - errors.append(e) + errors[param] = ["This query param is required"] raise ValidationError(errors) diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py index b483c8c5ea9..15819b0b642 100644 --- a/readthedocs/search/faceted_search.py +++ b/readthedocs/search/faceted_search.py @@ -33,7 +33,10 @@ class FileSearch(RTDFacetedSearch): } def query(self, search, query): - """Add query part to ``search``""" + """Add query part to ``search`` + + Overriding because we pass ES Query object instead of string + """ if query: search = search.query(query) diff --git a/readthedocs/search/tests/test_api.py b/readthedocs/search/tests/test_api.py index f02ab7b9e78..ca951b509b5 100644 --- a/readthedocs/search/tests/test_api.py +++ b/readthedocs/search/tests/test_api.py @@ -69,6 +69,44 @@ def test_doc_search_filter_by_version(self, api_client, project): assert len(data) == 1 assert data[0]['project'] == project.slug + def test_doc_search_pagination(self, api_client, project): + """Test Doc search result can be paginated""" + latest_version = project.versions.all()[0] + html_file = HTMLFile.objects.filter(version=latest_version)[0] + title = html_file.processed_json['title'] + query = title.split()[0] + + # Create 15 more same html file + for _ in range(15): + # Make primary key to None, so django will create new object + html_file.pk = None + html_file.save() + + search_params = {'query': query, 'project': project.slug, 'version': latest_version.slug} + resp = api_client.get(self.url, search_params) + assert resp.status_code == 200 + + # Check the count is 16 (1 existing and 15 new created) + assert resp.data['count'] == 16 + # Check there are next url + assert resp.data['next'] is not None + # There should be only 10 data as the pagination is 10 by default + assert len(resp.data['results']) == 10 + + # Add `page_size` parameter and check the data is paginated accordingly + search_params['page_size'] = 5 + resp = api_client.get(self.url, search_params) + assert resp.status_code == 200 + + assert len(resp.data['results']) == 5 + + def test_doc_search_without_parameters(self, api_client, project): + """Hitting Document Search endpoint without query parameters should return error""" + resp = api_client.get(self.url) + assert resp.status_code == 400 + # Check error message is there + assert sorted(['query', 'project', 'version']) == sorted(resp.data.keys()) + def test_doc_search_subprojects(self, api_client, all_projects): """Test Document search return results from subprojects also""" project = all_projects[0] diff --git a/readthedocs/urls.py b/readthedocs/urls.py index d6dbc593fdf..9a50b9b7619 100644 --- a/readthedocs/urls.py +++ b/readthedocs/urls.py @@ -22,6 +22,7 @@ do_not_track, ) from readthedocs.search import views as search_views +from readthedocs.search.api import PageSearchAPIView v1_api = Api(api_name='v1') v1_api.register(UserResource()) @@ -65,6 +66,8 @@ api_urls = [ url(r'^api/', include(v1_api.urls)), url(r'^api/v2/', include('readthedocs.restapi.urls')), + # Keep the `doc_search` at root level, so the test does not fail for other API + url(r'^api/v2/docsearch/$', PageSearchAPIView.as_view(), name='doc_search'), url(r'^api-auth/', include('rest_framework.urls', namespace='rest_framework')), ] From b01981fd3e614fc3b6172c95b965c9446ca9e8e5 Mon Sep 17 00:00:00 2001 From: Safwan Rahman Date: Sun, 1 Jul 2018 02:10:57 +0600 Subject: [PATCH 6/9] fixing lint --- readthedocs/search/faceted_search.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py index 15819b0b642..4a14cb8c541 100644 --- a/readthedocs/search/faceted_search.py +++ b/readthedocs/search/faceted_search.py @@ -33,7 +33,8 @@ class FileSearch(RTDFacetedSearch): } def query(self, search, query): - """Add query part to ``search`` + """ + Add query part to ``search`` Overriding because we pass ES Query object instead of string """ From 75de7f6a6fb554bcd6438fce7f43e94b2042c899 Mon Sep 17 00:00:00 2001 From: Safwan Rahman Date: Mon, 2 Jul 2018 23:28:58 +0600 Subject: [PATCH 7/9] fixing regex --- readthedocs/restapi/urls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/restapi/urls.py b/readthedocs/restapi/urls.py index 0a973a39d64..7e019ba597c 100644 --- a/readthedocs/restapi/urls.py +++ b/readthedocs/restapi/urls.py @@ -48,7 +48,7 @@ url(r'index_search/', search_views.index_search, name='index_search'), - # url(r'search/$', views.search_views.search, name='api_search'), + url(r'^search/$', views.search_views.search, name='api_search'), url(r'search/project/$', search_views.project_search, name='api_project_search'), From e2e8cbba9a368ea9c65e880cedd441bc7c540fa4 Mon Sep 17 00:00:00 2001 From: Safwan Rahman Date: Thu, 5 Jul 2018 03:35:21 +0600 Subject: [PATCH 8/9] Adding link to serialized data --- readthedocs/search/api.py | 28 ++++++++++++++++++++++++++++ readthedocs/search/filters.py | 7 ++----- readthedocs/search/serializers.py | 10 ++++++++++ readthedocs/search/tests/test_api.py | 3 +++ 4 files changed, 43 insertions(+), 5 deletions(-) diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index 82f735b7f76..c60dd1c040d 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -2,6 +2,7 @@ from rest_framework import exceptions from rest_framework.exceptions import ValidationError +from readthedocs.projects.models import Project from readthedocs.search.documents import PageDocument from readthedocs.search.filters import SearchFilterBackend from readthedocs.search.pagination import SearchPagination @@ -37,3 +38,30 @@ def validate_query_params(self): errors[param] = ["This query param is required"] raise ValidationError(errors) + + def get_serializer_context(self): + context = super(PageSearchAPIView, self).get_serializer_context() + context['projects_info'] = self.get_projects_info() + return context + + def _get_all_projects(self): + """Return list of project and its subprojects.""" + project_slug = self.request.query_params.get('project') + queryset = Project.objects.api(self.request.user).only('slug') + + project = generics.get_object_or_404(queryset, slug=project_slug) + subprojects = queryset.filter(superprojects__parent_id=project.id) + + project_list = list(subprojects) + [project] + return project_list + + def get_projects_info(self): + version_slug = self.request.query_params.get('version') + all_projects = self._get_all_projects() + projects_info = {} + + for project in all_projects: + data = {'docs_url': project.get_docs_url(version_slug=version_slug)} + projects_info[project.slug] = data + + return projects_info diff --git a/readthedocs/search/filters.py b/readthedocs/search/filters.py index 8b7dc9c1b20..9c1d3cdd560 100644 --- a/readthedocs/search/filters.py +++ b/readthedocs/search/filters.py @@ -1,7 +1,5 @@ from rest_framework import filters -from readthedocs.search.utils import get_project_slug_list_or_404 - class SearchFilterBackend(filters.BaseFilterBackend): @@ -12,10 +10,9 @@ def filter_queryset(self, request, queryset, view): # ``queryset`` is actually a Elasticsearch DSL ``Search`` object. # So change the variable name es_search = queryset - project_slug = request.query_params.get('project') version_slug = request.query_params.get('version') - project_slug_list = get_project_slug_list_or_404(project_slug=project_slug, - user=request.user) + projects_info = view.get_projects_info() + project_slug_list = projects_info.keys() # Elasticsearch ``terms`` query can take multiple values as list, # while ``term`` query takes single value. filtered_es_search = (es_search.filter('terms', project=project_slug_list) diff --git a/readthedocs/search/serializers.py b/readthedocs/search/serializers.py index dd3ad346eef..efbd9d510de 100644 --- a/readthedocs/search/serializers.py +++ b/readthedocs/search/serializers.py @@ -1,13 +1,23 @@ from rest_framework import serializers +from readthedocs.projects.models import Project + class PageSearchSerializer(serializers.Serializer): project = serializers.CharField() version = serializers.CharField() title = serializers.CharField() path = serializers.CharField() + link = serializers.SerializerMethodField() highlight = serializers.SerializerMethodField() + def get_link(self, obj): + projects_info = self.context.get('projects_info') + if projects_info: + project_data = projects_info[obj.project] + docs_url = project_data['docs_url'] + return docs_url + obj.path + def get_highlight(self, obj): highlight = getattr(obj.meta, 'highlight', None) if highlight: diff --git a/readthedocs/search/tests/test_api.py b/readthedocs/search/tests/test_api.py index ca951b509b5..23fc879cf2b 100644 --- a/readthedocs/search/tests/test_api.py +++ b/readthedocs/search/tests/test_api.py @@ -124,3 +124,6 @@ def test_doc_search_subprojects(self, api_client, all_projects): data = resp.data['results'] assert len(data) == 1 assert data[0]['project'] == subproject.slug + # Check the link is the subproject document link + document_link = subproject.get_docs_url(version_slug=version.slug) + assert document_link in data[0]['link'] From 733b0309d5a7d733434e7921f553823793ef95ca Mon Sep 17 00:00:00 2001 From: Safwan Rahman Date: Thu, 5 Jul 2018 19:50:02 +0600 Subject: [PATCH 9/9] fixing python3 compatibility --- readthedocs/search/api.py | 26 ++++++++------------------ readthedocs/search/filters.py | 4 ++-- readthedocs/search/serializers.py | 7 +++---- readthedocs/search/utils.py | 17 ++++++----------- readthedocs/search/views.py | 7 ++++--- 5 files changed, 23 insertions(+), 38 deletions(-) diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index c60dd1c040d..2602bdabcf4 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -7,6 +7,7 @@ from readthedocs.search.filters import SearchFilterBackend from readthedocs.search.pagination import SearchPagination from readthedocs.search.serializers import PageSearchSerializer +from readthedocs.search.utils import get_project_list_or_404 class PageSearchAPIView(generics.ListAPIView): @@ -41,27 +42,16 @@ def validate_query_params(self): def get_serializer_context(self): context = super(PageSearchAPIView, self).get_serializer_context() - context['projects_info'] = self.get_projects_info() + context['projects_url'] = self.get_all_projects_url() return context - def _get_all_projects(self): - """Return list of project and its subprojects.""" - project_slug = self.request.query_params.get('project') - queryset = Project.objects.api(self.request.user).only('slug') - - project = generics.get_object_or_404(queryset, slug=project_slug) - subprojects = queryset.filter(superprojects__parent_id=project.id) - - project_list = list(subprojects) + [project] - return project_list - - def get_projects_info(self): + def get_all_projects_url(self): version_slug = self.request.query_params.get('version') - all_projects = self._get_all_projects() - projects_info = {} + project_slug = self.request.query_params.get('project') + all_projects = get_project_list_or_404(project_slug=project_slug, user=self.request.user) + projects_url = {} for project in all_projects: - data = {'docs_url': project.get_docs_url(version_slug=version_slug)} - projects_info[project.slug] = data + projects_url[project.slug] = project.get_docs_url(version_slug=version_slug) - return projects_info + return projects_url diff --git a/readthedocs/search/filters.py b/readthedocs/search/filters.py index 9c1d3cdd560..ba0154def93 100644 --- a/readthedocs/search/filters.py +++ b/readthedocs/search/filters.py @@ -11,8 +11,8 @@ def filter_queryset(self, request, queryset, view): # So change the variable name es_search = queryset version_slug = request.query_params.get('version') - projects_info = view.get_projects_info() - project_slug_list = projects_info.keys() + projects_info = view.get_all_projects_url() + project_slug_list = list(projects_info.keys()) # Elasticsearch ``terms`` query can take multiple values as list, # while ``term`` query takes single value. filtered_es_search = (es_search.filter('terms', project=project_slug_list) diff --git a/readthedocs/search/serializers.py b/readthedocs/search/serializers.py index efbd9d510de..7aa8f01c93f 100644 --- a/readthedocs/search/serializers.py +++ b/readthedocs/search/serializers.py @@ -12,10 +12,9 @@ class PageSearchSerializer(serializers.Serializer): highlight = serializers.SerializerMethodField() def get_link(self, obj): - projects_info = self.context.get('projects_info') - if projects_info: - project_data = projects_info[obj.project] - docs_url = project_data['docs_url'] + projects_url = self.context.get('projects_url') + if projects_url: + docs_url = projects_url[obj.project] return docs_url + obj.path def get_highlight(self, obj): diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py index 09ca5c0f1f6..659effb1544 100644 --- a/readthedocs/search/utils.py +++ b/readthedocs/search/utils.py @@ -312,17 +312,12 @@ def parse_sections(documentation_type, content): # TODO: Rewrite all the views using this in Class Based View, # and move this function to a mixin -def get_project_slug_list_or_404(project_slug, user): - """ - Return list of subproject's slug including own slug. - - If the project is not available to user, redirect to 404 - """ +def get_project_list_or_404(project_slug, user): + """Return list of project and its subprojects.""" queryset = Project.objects.api(user).only('slug') - project = get_object_or_404(queryset, slug=project_slug) - subprojects_slug = (queryset.filter(superprojects__parent_id=project.id) - .values_list('slug', flat=True)) + project = get_object_or_404(queryset, slug=project_slug) + subprojects = queryset.filter(superprojects__parent_id=project.id) - slug_list = [project.slug] + list(subprojects_slug) - return slug_list + project_list = list(subprojects) + [project] + return project_list diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index e7989023109..062ee1fed50 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -14,7 +14,7 @@ from readthedocs.projects.models import Project from readthedocs.search import lib as search_lib from readthedocs.search.documents import ProjectDocument, PageDocument -from readthedocs.search.utils import get_project_slug_list_or_404 +from readthedocs.search.utils import get_project_list_or_404 log = logging.getLogger(__name__) LOG_TEMPLATE = u'(Elastic Search) [{user}:{type}] [{project}:{version}:{language}] {msg}' @@ -55,8 +55,9 @@ def elastic_search(request): elif user_input.type == 'file': kwargs = {} if user_input.project: - project_slug_list = get_project_slug_list_or_404(project_slug=user_input.project, - user=request.user) + projects_list = get_project_list_or_404(project_slug=user_input.project, + user=request.user) + project_slug_list = [project.slug for project in projects_list] kwargs['projects_list'] = project_slug_list if user_input.version: kwargs['versions_list'] = user_input.version