From bb4e5aacc4f5c766ef2c8469e570e857d2325714 Mon Sep 17 00:00:00 2001
From: Safwan Rahman <safwan.rahman15@gmail.com>
Date: Thu, 28 Jun 2018 03:04:48 +0600
Subject: [PATCH 1/9] [fix #4265] Port Document search API for Elasticsearch
 6.x

---
 readthedocs/search/api.py            | 17 ++++++++++++++
 readthedocs/search/documents.py      | 34 +++++++++++++++++++++++++++-
 readthedocs/search/faceted_search.py | 18 ++++-----------
 readthedocs/search/filters.py        | 15 ++++++++++++
 readthedocs/search/pagination.py     |  7 ++++++
 readthedocs/search/serializers.py    |  8 +++++++
 readthedocs/search/utils.py          | 18 +++++++++++++++
 readthedocs/search/views.py          | 12 ++++------
 readthedocs/urls.py                  |  3 ++-
 9 files changed, 108 insertions(+), 24 deletions(-)
 create mode 100644 readthedocs/search/api.py
 create mode 100644 readthedocs/search/filters.py
 create mode 100644 readthedocs/search/pagination.py
 create mode 100644 readthedocs/search/serializers.py

diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py
new file mode 100644
index 00000000000..f0151859fed
--- /dev/null
+++ b/readthedocs/search/api.py
@@ -0,0 +1,17 @@
+from rest_framework import generics
+
+from readthedocs.search.documents import PageDocument
+from readthedocs.search.filters import SearchFilterBackend
+from readthedocs.search.pagination import SearchPagination
+from readthedocs.search.serializers import PageSearchSerializer
+
+
+class PageSearchAPIView(generics.ListAPIView):
+    pagination_class = SearchPagination
+    filter_backends = [SearchFilterBackend]
+    serializer_class = PageSearchSerializer
+
+    def get_queryset(self):
+        query = self.request.query_params.get('query')
+        queryset = PageDocument.search(query=query)
+        return queryset
diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py
index 2b8259e8ce2..e468c162d3c 100644
--- a/readthedocs/search/documents.py
+++ b/readthedocs/search/documents.py
@@ -1,5 +1,6 @@
 from django.conf import settings
 from django_elasticsearch_dsl import DocType, Index, fields
+from elasticsearch_dsl.query import SimpleQueryString, Bool
 
 from readthedocs.projects.models import Project, HTMLFile
 from .conf import SEARCH_EXCLUDED_FILE
@@ -60,14 +61,19 @@ class Meta(object):
     content = fields.TextField(attr='processed_json.content')
     path = fields.TextField(attr='processed_json.path')
 
+    # Fields to perform search with weight
+    search_fields = ['title^10', 'headers^5', 'content']
+
     @classmethod
     def faceted_search(cls, query, projects_list=None, versions_list=None, using=None, index=None):
+        es_query = cls.get_es_query(query=query)
         kwargs = {
             'using': using or cls._doc_type.using,
             'index': index or cls._doc_type.index,
             'doc_types': [cls],
             'model': cls._doc_type.model,
-            'query': query
+            'query': es_query,
+            'fields': cls.search_fields
         }
         filters = {}
 
@@ -80,6 +86,32 @@ def faceted_search(cls, query, projects_list=None, versions_list=None, using=Non
 
         return FileSearch(**kwargs)
 
+    @classmethod
+    def search(cls, using=None, index=None, **kwargs):
+        es_search = super(PageDocument, cls).search(using=using, index=index)
+        query = kwargs.pop('query')
+        es_query = cls.get_es_query(query=query)
+
+        es_search = es_search.query(es_query)
+        return es_search
+
+    @classmethod
+    def get_es_query(cls, query):
+        """Return the Elasticsearch query generated from the query string"""
+        all_queries = []
+
+        # Need to search for both 'AND' and 'OR' operations
+        # The score of AND should be higher as it satisfies both OR and AND
+        for operator in ['AND', 'OR']:
+            query_string = SimpleQueryString(query=query, fields=cls.search_fields,
+                                             default_operator=operator)
+            all_queries.append(query_string)
+
+        # Run bool query with should, so it returns result where either of the query matches
+        bool_query = Bool(should=all_queries)
+
+        return bool_query
+
     def get_queryset(self):
         """Overwrite default queryset to filter certain files to index"""
         queryset = super(PageDocument, self).get_queryset()
diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py
index 77d9f13cc76..b483c8c5ea9 100644
--- a/readthedocs/search/faceted_search.py
+++ b/readthedocs/search/faceted_search.py
@@ -9,11 +9,13 @@ class RTDFacetedSearch(FacetedSearch):
     # TODO: Remove the overwrite when the elastic/elasticsearch-dsl-py#916
     # See more: https://github.com/elastic/elasticsearch-dsl-py/issues/916
 
-    def __init__(self, using, index, doc_types, model, **kwargs):
+    def __init__(self, using, index, doc_types, model, fields=None, **kwargs):
         self.using = using
         self.index = index
         self.doc_types = doc_types
         self._model = model
+        if fields:
+            self.fields = fields
         super(RTDFacetedSearch, self).__init__(**kwargs)
 
 
@@ -25,7 +27,6 @@ class ProjectSearch(RTDFacetedSearch):
 
 
 class FileSearch(RTDFacetedSearch):
-    fields = ['title^10', 'headers^5', 'content']
     facets = {
         'project': TermsFacet(field='project'),
         'version': TermsFacet(field='version')
@@ -34,17 +35,6 @@ class FileSearch(RTDFacetedSearch):
     def query(self, search, query):
         """Add query part to ``search``"""
         if query:
-            all_queries = []
-
-            # Need to search for both 'AND' and 'OR' operations
-            # The score of AND should be higher as it comes first
-            for operator in ['AND', 'OR']:
-                query_string = SimpleQueryString(query=query, fields=self.fields,
-                                                 default_operator=operator)
-                all_queries.append(query_string)
-
-            # Run bool query with should, so it returns result where either of the query matches
-            bool_query = Bool(should=all_queries)
-            search = search.query(bool_query)
+            search = search.query(query)
 
         return search
diff --git a/readthedocs/search/filters.py b/readthedocs/search/filters.py
new file mode 100644
index 00000000000..4efdcf4a269
--- /dev/null
+++ b/readthedocs/search/filters.py
@@ -0,0 +1,15 @@
+from rest_framework import filters
+
+from readthedocs.search.utils import get_project_slug_list_or_404
+
+
+class SearchFilterBackend(filters.BaseFilterBackend):
+    """
+    Filter search result with project
+    """
+
+    def filter_queryset(self, request, queryset, view):
+        project_slug = request.query_params.get('project')
+        project_slug_list = get_project_slug_list_or_404(project_slug=project_slug,
+                                                         user=request.user)
+        return queryset.filter('terms', project=project_slug_list)
diff --git a/readthedocs/search/pagination.py b/readthedocs/search/pagination.py
new file mode 100644
index 00000000000..6abfba31b98
--- /dev/null
+++ b/readthedocs/search/pagination.py
@@ -0,0 +1,7 @@
+from rest_framework.pagination import PageNumberPagination
+
+
+class SearchPagination(PageNumberPagination):
+    page_size = 10
+    page_size_query_param = 'page_size'
+    max_page_size = 100
diff --git a/readthedocs/search/serializers.py b/readthedocs/search/serializers.py
new file mode 100644
index 00000000000..99dbf9e7685
--- /dev/null
+++ b/readthedocs/search/serializers.py
@@ -0,0 +1,8 @@
+from rest_framework import serializers
+
+
+class PageSearchSerializer(serializers.Serializer):
+    title = serializers.CharField()
+    headers = serializers.ListField()
+    content = serializers.CharField()
+    path = serializers.CharField()
diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py
index 081104c201d..5d1f3403813 100644
--- a/readthedocs/search/utils.py
+++ b/readthedocs/search/utils.py
@@ -11,8 +11,10 @@
 import json
 
 from builtins import next, range
+from django.shortcuts import get_object_or_404
 from pyquery import PyQuery
 
+from readthedocs.projects.models import Project
 
 log = logging.getLogger(__name__)
 
@@ -306,3 +308,19 @@ def parse_sections(documentation_type, content):
             return ''
 
     return sections
+
+
+# TODO: Rewrite all the views using this in Class Based View,
+# and move this function to a mixin
+def get_project_slug_list_or_404(project_slug, user):
+    """Return list of subproject's slug including own slug.
+    If the project is not available to user, redirect to 404
+    """
+    queryset = Project.objects.api(user).only('slug')
+    project = get_object_or_404(queryset, slug=project_slug)
+
+    subprojects_slug = (queryset.filter(superprojects__parent_id=project.id)
+                        .values_list('slug', flat=True))
+
+    slug_list = [project.slug] + list(subprojects_slug)
+    return slug_list
diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py
index 385b7d371ca..e7989023109 100644
--- a/readthedocs/search/views.py
+++ b/readthedocs/search/views.py
@@ -14,6 +14,7 @@
 from readthedocs.projects.models import Project
 from readthedocs.search import lib as search_lib
 from readthedocs.search.documents import ProjectDocument, PageDocument
+from readthedocs.search.utils import get_project_slug_list_or_404
 
 log = logging.getLogger(__name__)
 LOG_TEMPLATE = u'(Elastic Search) [{user}:{type}] [{project}:{version}:{language}] {msg}'
@@ -54,14 +55,9 @@ def elastic_search(request):
         elif user_input.type == 'file':
             kwargs = {}
             if user_input.project:
-                queryset = Project.objects.api(request.user).only('slug')
-                project = get_object_or_404(queryset, slug=user_input.project)
-
-                subprojects_slug = (queryset.filter(superprojects__parent_id=project.id)
-                                            .values_list('slug', flat=True))
-
-                projects_list = [project.slug] + list(subprojects_slug)
-                kwargs['projects_list'] = projects_list
+                project_slug_list = get_project_slug_list_or_404(project_slug=user_input.project,
+                                                                 user=request.user)
+                kwargs['projects_list'] = project_slug_list
             if user_input.version:
                 kwargs['versions_list'] = user_input.version
 
diff --git a/readthedocs/urls.py b/readthedocs/urls.py
index 19914cd72e7..7503a0a435f 100644
--- a/readthedocs/urls.py
+++ b/readthedocs/urls.py
@@ -22,7 +22,7 @@
     do_not_track,
 )
 from readthedocs.search import views as search_views
-
+from readthedocs.search.api import PageSearchAPIView
 
 v1_api = Api(api_name='v1')
 v1_api.register(UserResource())
@@ -67,6 +67,7 @@
     url(r'^api/', include(v1_api.urls)),
     url(r'^api/v2/', include('readthedocs.restapi.urls')),
     url(r'^api-auth/', include('rest_framework.urls', namespace='rest_framework')),
+    url(r'^api/search/', PageSearchAPIView.as_view()),
 ]
 
 i18n_urls = [

From 4dc3e353f358ec61d3347a5fbc1304ef72dd1d8b Mon Sep 17 00:00:00 2001
From: Safwan Rahman <safwan.rahman15@gmail.com>
Date: Fri, 29 Jun 2018 02:24:34 +0600
Subject: [PATCH 2/9] fixup and adding test

---
 conftest.py                          |   6 +
 readthedocs/restapi/urls.py          |  17 ++-
 readthedocs/search/api.py            |   8 +-
 readthedocs/search/documents.py      |   6 +-
 readthedocs/search/filters.py        |  15 ++-
 readthedocs/search/serializers.py    |   8 +-
 readthedocs/search/tests/test_api.py | 164 +++++++++++++++++++++++++++
 readthedocs/search/utils.py          |   3 +-
 readthedocs/urls.py                  |   2 -
 9 files changed, 205 insertions(+), 24 deletions(-)
 create mode 100644 readthedocs/search/tests/test_api.py

diff --git a/conftest.py b/conftest.py
index 73474ea6726..e19351615de 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,6 +1,7 @@
 import logging
 
 import pytest
+from rest_framework.test import APIClient
 
 
 def pytest_addoption(parser):
@@ -17,3 +18,8 @@ def pytest_configure(config):
 @pytest.fixture(autouse=True)
 def settings_modification(settings):
     settings.CELERY_ALWAYS_EAGER = True
+
+
+@pytest.fixture
+def api_client():
+    return APIClient()
diff --git a/readthedocs/restapi/urls.py b/readthedocs/restapi/urls.py
index 51fc72e17c6..594d81882b2 100644
--- a/readthedocs/restapi/urls.py
+++ b/readthedocs/restapi/urls.py
@@ -11,6 +11,7 @@
 from readthedocs.restapi.views import (
     core_views, footer_views, search_views, task_views, integrations
 )
+from readthedocs.search.api import PageSearchAPIView
 
 from .views.model_views import (BuildViewSet, BuildCommandViewSet,
                                 ProjectViewSet, NotificationViewSet,
@@ -85,19 +86,15 @@
         name='api_webhook'),
 ]
 
+api_search_urls = [
+    url(r'^docsearch/$', PageSearchAPIView.as_view(), name='doc_search'),
+]
+
 urlpatterns += function_urls
-urlpatterns += search_urls
 urlpatterns += task_urls
+urlpatterns += search_urls
 urlpatterns += integration_urls
-
-try:
-    from readthedocsext.search.docsearch import DocSearch
-    api_search_urls = [
-        url(r'^docsearch/$', DocSearch.as_view(), name='doc_search'),
-    ]
-    urlpatterns += api_search_urls
-except ImportError:
-    pass
+urlpatterns += api_search_urls
 
 try:
     from readthedocsext.donate.restapi.urls import urlpatterns as sustainability_urls
diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py
index f0151859fed..a9675c28fd8 100644
--- a/readthedocs/search/api.py
+++ b/readthedocs/search/api.py
@@ -12,6 +12,12 @@ class PageSearchAPIView(generics.ListAPIView):
     serializer_class = PageSearchSerializer
 
     def get_queryset(self):
-        query = self.request.query_params.get('query')
+        """Return Elasticsearch DSL Search object instead of Django Queryset.
+
+        Django Queryset and elasticsearch-dsl ``Search`` object is similar pattern.
+        So for searching, its possible to return ``Search`` object instead of queryset.
+        The ``filter_backends`` and ``pagination_class`` is compatible with ``Search``
+        """
+        query = self.request.query_params.get('query', '')
         queryset = PageDocument.search(query=query)
         return queryset
diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py
index e468c162d3c..f2cf9deda16 100644
--- a/readthedocs/search/documents.py
+++ b/readthedocs/search/documents.py
@@ -87,12 +87,12 @@ def faceted_search(cls, query, projects_list=None, versions_list=None, using=Non
         return FileSearch(**kwargs)
 
     @classmethod
-    def search(cls, using=None, index=None, **kwargs):
-        es_search = super(PageDocument, cls).search(using=using, index=index)
+    def search(cls, *args, **kwargs):
         query = kwargs.pop('query')
+        es_search = super(PageDocument, cls).search(*args, **kwargs)
         es_query = cls.get_es_query(query=query)
 
-        es_search = es_search.query(es_query)
+        es_search = es_search.query(es_query).highlight('content')
         return es_search
 
     @classmethod
diff --git a/readthedocs/search/filters.py b/readthedocs/search/filters.py
index 4efdcf4a269..16a498a7505 100644
--- a/readthedocs/search/filters.py
+++ b/readthedocs/search/filters.py
@@ -4,12 +4,17 @@
 
 
 class SearchFilterBackend(filters.BaseFilterBackend):
-    """
-    Filter search result with project
-    """
 
-    def filter_queryset(self, request, queryset, view):
+    """Filter search result with project"""
+
+    def filter_queryset(self, request, es_search, view):
+        """Overwrite the method to compatible with Elasticsearch DSL Search object."""
         project_slug = request.query_params.get('project')
+        version_slug = request.query_params.get('version')
         project_slug_list = get_project_slug_list_or_404(project_slug=project_slug,
                                                          user=request.user)
-        return queryset.filter('terms', project=project_slug_list)
+        # Elasticsearch ``terms`` query can take multiple values as list,
+        # while ``term`` query takes single value.
+        filtered_es_search = (es_search.filter('terms', project=project_slug_list)
+                                       .filter('term', version=version_slug))
+        return filtered_es_search
diff --git a/readthedocs/search/serializers.py b/readthedocs/search/serializers.py
index 99dbf9e7685..4327c07d0ae 100644
--- a/readthedocs/search/serializers.py
+++ b/readthedocs/search/serializers.py
@@ -3,6 +3,10 @@
 
 class PageSearchSerializer(serializers.Serializer):
     title = serializers.CharField()
-    headers = serializers.ListField()
-    content = serializers.CharField()
     path = serializers.CharField()
+    highlight = serializers.SerializerMethodField()
+
+    def get_highlight(self, obj):
+        highlight = getattr(obj.meta, 'highlight', None)
+        if highlight:
+            return highlight.to_dict()
diff --git a/readthedocs/search/tests/test_api.py b/readthedocs/search/tests/test_api.py
new file mode 100644
index 00000000000..4133fb5aa85
--- /dev/null
+++ b/readthedocs/search/tests/test_api.py
@@ -0,0 +1,164 @@
+import pytest
+from django.core.urlresolvers import reverse
+
+from readthedocs.search.tests.utils import get_search_query_from_project_file
+
+
+@pytest.mark.django_db
+@pytest.mark.search
+class TestPageSearch(object):
+    url = reverse('doc_search')
+
+    @pytest.mark.parametrize('data_type', ['content', 'headers', 'title'])
+    @pytest.mark.parametrize('page_num', [0, 1])
+    def test_search_works(self, api_client, project, data_type, page_num):
+        query = get_search_query_from_project_file(project_slug=project.slug, page_num=page_num,
+                                                   data_type=data_type)
+
+        version = project.versions.all()[0]
+        url = reverse('doc_search')
+        resp = api_client.get(url, {'project': project.slug, 'version': version.slug, 'query': query})
+        data = resp.data
+        assert len(data['results']) == 1
+
+    # @pytest.mark.parametrize('case', ['upper', 'lower', 'title'])
+    # def test_file_search_case_insensitive(self, client, project, case):
+    #     """Check File search is case insensitive
+    #
+    #     It tests with uppercase, lowercase and camelcase
+    #     """
+    #     query_text = get_search_query_from_project_file(project_slug=project.slug)
+    #
+    #     cased_query = getattr(query_text, case)
+    #     query = cased_query()
+    #
+    #     result, _ = self._get_search_result(url=self.url, client=client,
+    #                                         search_params={'q': query, 'type': 'file'})
+    #
+    #     assert len(result) == 1
+    #     # Check the actual text is in the result, not the cased one
+    #     assert query_text in result.text()
+    #
+    # def test_file_search_exact_match(self, client, project):
+    #     """Check quoted query match exact phrase
+    #
+    #     Making a query with quoted text like ``"foo bar"`` should match
+    #     exactly ``foo bar`` phrase.
+    #     """
+    #
+    #     # `Github` word is present both in `kuma` and `pipeline` files
+    #     # But the phrase Github can is available only in kuma docs.
+    #     # So search with this phrase to check
+    #     query = r'"GitHub can"'
+    #
+    #     result, _ = self._get_search_result(url=self.url, client=client,
+    #                                         search_params={'q': query, 'type': 'file'})
+    #
+    #     assert len(result) == 1
+    #
+    # def test_page_search_not_return_removed_page(self, client, project):
+    #     """Check removed page are not in the search index"""
+    #     query = get_search_query_from_project_file(project_slug=project.slug)
+    #     # Make a query to check it returns result
+    #     result, _ = self._get_search_result(url=self.url, client=client,
+    #                                         search_params={'q': query, 'type': 'file'})
+    #     assert len(result) == 1
+    #
+    #     # Delete all the HTML files of the project
+    #     HTMLFile.objects.filter(project=project).delete()
+    #     # Run the query again and this time there should not be any result
+    #     result, _ = self._get_search_result(url=self.url, client=client,
+    #                                         search_params={'q': query, 'type': 'file'})
+    #     assert len(result) == 0
+    #
+    # def test_file_search_show_projects(self, client, all_projects):
+    #     """Test that search result page shows list of projects while searching for files"""
+    #
+    #     # `Github` word is present both in `kuma` and `pipeline` files
+    #     # so search with this phrase
+    #     result, page = self._get_search_result(url=self.url, client=client,
+    #                                            search_params={'q': 'GitHub', 'type': 'file'})
+    #
+    #     # There should be 2 search result
+    #     assert len(result) == 2
+    #
+    #     # there should be 2 projects in the left side column
+    #     content = page.find('.navigable .project-list')
+    #     assert len(content) == 2
+    #     text = content.text()
+    #
+    #     # kuma and pipeline should be there
+    #     assert 'kuma' and 'pipeline' in text
+    #
+    # def test_file_search_filter_by_project(self, client):
+    #     """Test that search result are filtered according to project"""
+    #
+    #     # `Github` word is present both in `kuma` and `pipeline` files
+    #     # so search with this phrase but filter through `kuma` project
+    #     search_params = {'q': 'GitHub', 'type': 'file', 'project': 'kuma'}
+    #     result, page = self._get_search_result(url=self.url, client=client,
+    #                                            search_params=search_params)
+    #
+    #     # There should be 1 search result as we have filtered
+    #     assert len(result) == 1
+    #     content = page.find('.navigable .project-list')
+    #
+    #     # kuma should should be there only
+    #     assert 'kuma' in result.text()
+    #     assert 'pipeline' not in result.text()
+    #
+    #     # But there should be 2 projects in the left side column
+    #     # as the query is present in both projects
+    #     content = page.find('.navigable .project-list')
+    #     if len(content) != 2:
+    #         pytest.xfail("failing because currently all projects are not showing in project list")
+    #     else:
+    #         assert 'kuma' and 'pipeline' in content.text()
+    #
+    # @pytest.mark.xfail(reason="Versions are not showing correctly! Fixme while rewrite!")
+    # def test_file_search_show_versions(self, client, all_projects, es_index, settings):
+    #     # override the settings to index all versions
+    #     settings.INDEX_ONLY_LATEST = False
+    #
+    #     project = all_projects[0]
+    #     # Create some versions of the project
+    #     versions = [G(Version, project=project) for _ in range(3)]
+    #
+    #     query = get_search_query_from_project_file(project_slug=project.slug)
+    #
+    #     result, page = self._get_search_result(url=self.url, client=client,
+    #                                            search_params={'q': query, 'type': 'file'})
+    #
+    #     # There should be only one result because by default
+    #     # only latest version result should be there
+    #     assert len(result) == 1
+    #
+    #     content = page.find('.navigable .version-list')
+    #     # There should be total 4 versions
+    #     # one is latest, and other 3 that we created above
+    #     assert len(content) == 4
+    #
+    #     project_versions = [v.slug for v in versions] + [LATEST]
+    #     content_versions = []
+    #     for element in content:
+    #         text = element.text_content()
+    #         # strip and split to keep the version slug only
+    #         slug = text.strip().split('\n')[0]
+    #         content_versions.append(slug)
+    #
+    #     assert sorted(project_versions) == sorted(content_versions)
+    #
+    # def test_file_search_subprojects(self, client, all_projects, es_index):
+    #     """File search should return results from subprojects also"""
+    #     project = all_projects[0]
+    #     subproject = all_projects[1]
+    #     # Add another project as subproject of the project
+    #     project.add_subproject(subproject)
+    #
+    #     # Now search with subproject content but explicitly filter by the parent project
+    #     query = get_search_query_from_project_file(project_slug=subproject.slug)
+    #     search_params = {'q': query, 'type': 'file', 'project': project.slug}
+    #     result, page = self._get_search_result(url=self.url, client=client,
+    #                                            search_params=search_params)
+    #
+    #     assert len(result) == 1
\ No newline at end of file
diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py
index 5d1f3403813..0e769d872dc 100644
--- a/readthedocs/search/utils.py
+++ b/readthedocs/search/utils.py
@@ -313,7 +313,8 @@ def parse_sections(documentation_type, content):
 # TODO: Rewrite all the views using this in Class Based View,
 # and move this function to a mixin
 def get_project_slug_list_or_404(project_slug, user):
-    """Return list of subproject's slug including own slug.
+    """
+    Return list of subproject's slug including own slug.
     If the project is not available to user, redirect to 404
     """
     queryset = Project.objects.api(user).only('slug')
diff --git a/readthedocs/urls.py b/readthedocs/urls.py
index 7503a0a435f..d6dbc593fdf 100644
--- a/readthedocs/urls.py
+++ b/readthedocs/urls.py
@@ -22,7 +22,6 @@
     do_not_track,
 )
 from readthedocs.search import views as search_views
-from readthedocs.search.api import PageSearchAPIView
 
 v1_api = Api(api_name='v1')
 v1_api.register(UserResource())
@@ -67,7 +66,6 @@
     url(r'^api/', include(v1_api.urls)),
     url(r'^api/v2/', include('readthedocs.restapi.urls')),
     url(r'^api-auth/', include('rest_framework.urls', namespace='rest_framework')),
-    url(r'^api/search/', PageSearchAPIView.as_view()),
 ]
 
 i18n_urls = [

From fe2aef1adaa296b0eb802e5e5f582d006173cb03 Mon Sep 17 00:00:00 2001
From: Safwan Rahman <safwan.rahman15@gmail.com>
Date: Fri, 29 Jun 2018 02:32:12 +0600
Subject: [PATCH 3/9] more fixup

---
 readthedocs/search/api.py       | 5 +++--
 readthedocs/search/documents.py | 5 ++---
 readthedocs/search/filters.py   | 5 ++++-
 readthedocs/search/utils.py     | 1 +
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py
index a9675c28fd8..2b2855618c0 100644
--- a/readthedocs/search/api.py
+++ b/readthedocs/search/api.py
@@ -12,12 +12,13 @@ class PageSearchAPIView(generics.ListAPIView):
     serializer_class = PageSearchSerializer
 
     def get_queryset(self):
-        """Return Elasticsearch DSL Search object instead of Django Queryset.
+        """
+        Return Elasticsearch DSL Search object instead of Django Queryset.
 
         Django Queryset and elasticsearch-dsl ``Search`` object is similar pattern.
         So for searching, its possible to return ``Search`` object instead of queryset.
         The ``filter_backends`` and ``pagination_class`` is compatible with ``Search``
         """
         query = self.request.query_params.get('query', '')
-        queryset = PageDocument.search(query=query)
+        queryset = PageDocument.simple_search(query=query)
         return queryset
diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py
index f2cf9deda16..575f4f83322 100644
--- a/readthedocs/search/documents.py
+++ b/readthedocs/search/documents.py
@@ -87,9 +87,8 @@ def faceted_search(cls, query, projects_list=None, versions_list=None, using=Non
         return FileSearch(**kwargs)
 
     @classmethod
-    def search(cls, *args, **kwargs):
-        query = kwargs.pop('query')
-        es_search = super(PageDocument, cls).search(*args, **kwargs)
+    def simple_search(cls, query, using=None, index=None):
+        es_search = cls.search(using=using, index=index)
         es_query = cls.get_es_query(query=query)
 
         es_search = es_search.query(es_query).highlight('content')
diff --git a/readthedocs/search/filters.py b/readthedocs/search/filters.py
index 16a498a7505..8b7dc9c1b20 100644
--- a/readthedocs/search/filters.py
+++ b/readthedocs/search/filters.py
@@ -7,8 +7,11 @@ class SearchFilterBackend(filters.BaseFilterBackend):
 
     """Filter search result with project"""
 
-    def filter_queryset(self, request, es_search, view):
+    def filter_queryset(self, request, queryset, view):
         """Overwrite the method to compatible with Elasticsearch DSL Search object."""
+        # ``queryset`` is actually a Elasticsearch DSL ``Search`` object.
+        # So change the variable name
+        es_search = queryset
         project_slug = request.query_params.get('project')
         version_slug = request.query_params.get('version')
         project_slug_list = get_project_slug_list_or_404(project_slug=project_slug,
diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py
index 0e769d872dc..09ca5c0f1f6 100644
--- a/readthedocs/search/utils.py
+++ b/readthedocs/search/utils.py
@@ -315,6 +315,7 @@ def parse_sections(documentation_type, content):
 def get_project_slug_list_or_404(project_slug, user):
     """
     Return list of subproject's slug including own slug.
+
     If the project is not available to user, redirect to 404
     """
     queryset = Project.objects.api(user).only('slug')

From 41a67a33b9b494038f6d7133ef39336e5bc89083 Mon Sep 17 00:00:00 2001
From: Safwan Rahman <safwan.rahman15@gmail.com>
Date: Sat, 30 Jun 2018 03:24:28 +0600
Subject: [PATCH 4/9] adding more tests

---
 readthedocs/restapi/urls.py          |   7 +-
 readthedocs/search/api.py            |  16 ++
 readthedocs/search/documents.py      |   3 +-
 readthedocs/search/serializers.py    |   2 +
 readthedocs/search/tests/test_api.py | 218 +++++++++------------------
 5 files changed, 93 insertions(+), 153 deletions(-)

diff --git a/readthedocs/restapi/urls.py b/readthedocs/restapi/urls.py
index 594d81882b2..39f9dcd7a58 100644
--- a/readthedocs/restapi/urls.py
+++ b/readthedocs/restapi/urls.py
@@ -49,13 +49,14 @@
     url(r'index_search/',
         search_views.index_search,
         name='index_search'),
-    url(r'search/$', views.search_views.search, name='api_search'),
+    # url(r'search/$', views.search_views.search, name='api_search'),
     url(r'search/project/$',
         search_views.project_search,
         name='api_project_search'),
     url(r'search/section/$',
         search_views.section_search,
         name='api_section_search'),
+    url(r'^docsearch/$', PageSearchAPIView.as_view(), name='doc_search'),
 ]
 
 task_urls = [
@@ -86,15 +87,11 @@
         name='api_webhook'),
 ]
 
-api_search_urls = [
-    url(r'^docsearch/$', PageSearchAPIView.as_view(), name='doc_search'),
-]
 
 urlpatterns += function_urls
 urlpatterns += task_urls
 urlpatterns += search_urls
 urlpatterns += integration_urls
-urlpatterns += api_search_urls
 
 try:
     from readthedocsext.donate.restapi.urls import urlpatterns as sustainability_urls
diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py
index 2b2855618c0..8e72547d195 100644
--- a/readthedocs/search/api.py
+++ b/readthedocs/search/api.py
@@ -1,4 +1,6 @@
 from rest_framework import generics
+from rest_framework import exceptions
+from rest_framework.exceptions import ValidationError
 
 from readthedocs.search.documents import PageDocument
 from readthedocs.search.filters import SearchFilterBackend
@@ -19,6 +21,20 @@ def get_queryset(self):
         So for searching, its possible to return ``Search`` object instead of queryset.
         The ``filter_backends`` and ``pagination_class`` is compatible with ``Search``
         """
+        # Validate all the required params are there
+        self.validate_query_params()
         query = self.request.query_params.get('query', '')
         queryset = PageDocument.simple_search(query=query)
         return queryset
+
+    def validate_query_params(self):
+        required_query_params = set(['query', 'project', 'version'])
+        request_params = set(self.request.query_params.keys())
+        missing_params = required_query_params - request_params
+        if missing_params:
+            errors = []
+            for param in missing_params:
+                e = {param: "This query param is required"}
+                errors.append(e)
+
+            raise ValidationError(errors)
diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py
index 575f4f83322..3cc70343c35 100644
--- a/readthedocs/search/documents.py
+++ b/readthedocs/search/documents.py
@@ -90,8 +90,9 @@ def faceted_search(cls, query, projects_list=None, versions_list=None, using=Non
     def simple_search(cls, query, using=None, index=None):
         es_search = cls.search(using=using, index=index)
         es_query = cls.get_es_query(query=query)
+        highlighted_fields = [f.split('^', 1)[0] for f in cls.search_fields]
 
-        es_search = es_search.query(es_query).highlight('content')
+        es_search = es_search.query(es_query).highlight(*highlighted_fields)
         return es_search
 
     @classmethod
diff --git a/readthedocs/search/serializers.py b/readthedocs/search/serializers.py
index 4327c07d0ae..dd3ad346eef 100644
--- a/readthedocs/search/serializers.py
+++ b/readthedocs/search/serializers.py
@@ -2,6 +2,8 @@
 
 
 class PageSearchSerializer(serializers.Serializer):
+    project = serializers.CharField()
+    version = serializers.CharField()
     title = serializers.CharField()
     path = serializers.CharField()
     highlight = serializers.SerializerMethodField()
diff --git a/readthedocs/search/tests/test_api.py b/readthedocs/search/tests/test_api.py
index 4133fb5aa85..f02ab7b9e78 100644
--- a/readthedocs/search/tests/test_api.py
+++ b/readthedocs/search/tests/test_api.py
@@ -1,12 +1,15 @@
 import pytest
 from django.core.urlresolvers import reverse
+from django_dynamic_fixture import G
 
+from readthedocs.builds.models import Version
+from readthedocs.projects.models import HTMLFile
 from readthedocs.search.tests.utils import get_search_query_from_project_file
 
 
 @pytest.mark.django_db
 @pytest.mark.search
-class TestPageSearch(object):
+class TestDocumentSearch(object):
     url = reverse('doc_search')
 
     @pytest.mark.parametrize('data_type', ['content', 'headers', 'title'])
@@ -16,149 +19,70 @@ def test_search_works(self, api_client, project, data_type, page_num):
                                                    data_type=data_type)
 
         version = project.versions.all()[0]
-        url = reverse('doc_search')
-        resp = api_client.get(url, {'project': project.slug, 'version': version.slug, 'query': query})
-        data = resp.data
-        assert len(data['results']) == 1
-
-    # @pytest.mark.parametrize('case', ['upper', 'lower', 'title'])
-    # def test_file_search_case_insensitive(self, client, project, case):
-    #     """Check File search is case insensitive
-    #
-    #     It tests with uppercase, lowercase and camelcase
-    #     """
-    #     query_text = get_search_query_from_project_file(project_slug=project.slug)
-    #
-    #     cased_query = getattr(query_text, case)
-    #     query = cased_query()
-    #
-    #     result, _ = self._get_search_result(url=self.url, client=client,
-    #                                         search_params={'q': query, 'type': 'file'})
-    #
-    #     assert len(result) == 1
-    #     # Check the actual text is in the result, not the cased one
-    #     assert query_text in result.text()
-    #
-    # def test_file_search_exact_match(self, client, project):
-    #     """Check quoted query match exact phrase
-    #
-    #     Making a query with quoted text like ``"foo bar"`` should match
-    #     exactly ``foo bar`` phrase.
-    #     """
-    #
-    #     # `Github` word is present both in `kuma` and `pipeline` files
-    #     # But the phrase Github can is available only in kuma docs.
-    #     # So search with this phrase to check
-    #     query = r'"GitHub can"'
-    #
-    #     result, _ = self._get_search_result(url=self.url, client=client,
-    #                                         search_params={'q': query, 'type': 'file'})
-    #
-    #     assert len(result) == 1
-    #
-    # def test_page_search_not_return_removed_page(self, client, project):
-    #     """Check removed page are not in the search index"""
-    #     query = get_search_query_from_project_file(project_slug=project.slug)
-    #     # Make a query to check it returns result
-    #     result, _ = self._get_search_result(url=self.url, client=client,
-    #                                         search_params={'q': query, 'type': 'file'})
-    #     assert len(result) == 1
-    #
-    #     # Delete all the HTML files of the project
-    #     HTMLFile.objects.filter(project=project).delete()
-    #     # Run the query again and this time there should not be any result
-    #     result, _ = self._get_search_result(url=self.url, client=client,
-    #                                         search_params={'q': query, 'type': 'file'})
-    #     assert len(result) == 0
-    #
-    # def test_file_search_show_projects(self, client, all_projects):
-    #     """Test that search result page shows list of projects while searching for files"""
-    #
-    #     # `Github` word is present both in `kuma` and `pipeline` files
-    #     # so search with this phrase
-    #     result, page = self._get_search_result(url=self.url, client=client,
-    #                                            search_params={'q': 'GitHub', 'type': 'file'})
-    #
-    #     # There should be 2 search result
-    #     assert len(result) == 2
-    #
-    #     # there should be 2 projects in the left side column
-    #     content = page.find('.navigable .project-list')
-    #     assert len(content) == 2
-    #     text = content.text()
-    #
-    #     # kuma and pipeline should be there
-    #     assert 'kuma' and 'pipeline' in text
-    #
-    # def test_file_search_filter_by_project(self, client):
-    #     """Test that search result are filtered according to project"""
-    #
-    #     # `Github` word is present both in `kuma` and `pipeline` files
-    #     # so search with this phrase but filter through `kuma` project
-    #     search_params = {'q': 'GitHub', 'type': 'file', 'project': 'kuma'}
-    #     result, page = self._get_search_result(url=self.url, client=client,
-    #                                            search_params=search_params)
-    #
-    #     # There should be 1 search result as we have filtered
-    #     assert len(result) == 1
-    #     content = page.find('.navigable .project-list')
-    #
-    #     # kuma should should be there only
-    #     assert 'kuma' in result.text()
-    #     assert 'pipeline' not in result.text()
-    #
-    #     # But there should be 2 projects in the left side column
-    #     # as the query is present in both projects
-    #     content = page.find('.navigable .project-list')
-    #     if len(content) != 2:
-    #         pytest.xfail("failing because currently all projects are not showing in project list")
-    #     else:
-    #         assert 'kuma' and 'pipeline' in content.text()
-    #
-    # @pytest.mark.xfail(reason="Versions are not showing correctly! Fixme while rewrite!")
-    # def test_file_search_show_versions(self, client, all_projects, es_index, settings):
-    #     # override the settings to index all versions
-    #     settings.INDEX_ONLY_LATEST = False
-    #
-    #     project = all_projects[0]
-    #     # Create some versions of the project
-    #     versions = [G(Version, project=project) for _ in range(3)]
-    #
-    #     query = get_search_query_from_project_file(project_slug=project.slug)
-    #
-    #     result, page = self._get_search_result(url=self.url, client=client,
-    #                                            search_params={'q': query, 'type': 'file'})
-    #
-    #     # There should be only one result because by default
-    #     # only latest version result should be there
-    #     assert len(result) == 1
-    #
-    #     content = page.find('.navigable .version-list')
-    #     # There should be total 4 versions
-    #     # one is latest, and other 3 that we created above
-    #     assert len(content) == 4
-    #
-    #     project_versions = [v.slug for v in versions] + [LATEST]
-    #     content_versions = []
-    #     for element in content:
-    #         text = element.text_content()
-    #         # strip and split to keep the version slug only
-    #         slug = text.strip().split('\n')[0]
-    #         content_versions.append(slug)
-    #
-    #     assert sorted(project_versions) == sorted(content_versions)
-    #
-    # def test_file_search_subprojects(self, client, all_projects, es_index):
-    #     """File search should return results from subprojects also"""
-    #     project = all_projects[0]
-    #     subproject = all_projects[1]
-    #     # Add another project as subproject of the project
-    #     project.add_subproject(subproject)
-    #
-    #     # Now search with subproject content but explicitly filter by the parent project
-    #     query = get_search_query_from_project_file(project_slug=subproject.slug)
-    #     search_params = {'q': query, 'type': 'file', 'project': project.slug}
-    #     result, page = self._get_search_result(url=self.url, client=client,
-    #                                            search_params=search_params)
-    #
-    #     assert len(result) == 1
\ No newline at end of file
+        search_params = {'project': project.slug, 'version': version.slug, 'query': query}
+        resp = api_client.get(self.url, search_params)
+        assert resp.status_code == 200
+
+        data = resp.data['results']
+        assert len(data) == 1
+        project_data = data[0]
+        assert project_data['project'] == project.slug
+
+        # Check highlight return correct object
+        all_highlights = project_data['highlight'][data_type]
+        for highlight in all_highlights:
+            # Make it lower because our search is case insensitive
+            assert query.lower() in highlight.lower()
+
+    def test_doc_search_filter_by_project(self, api_client):
+        """Test Doc search result are filtered according to project"""
+
+        # `Github` word is present both in `kuma` and `pipeline` files
+        # so search with this phrase but filter through `kuma` project
+        search_params = {'query': 'GitHub', 'project': 'kuma', 'version': 'latest'}
+        resp = api_client.get(self.url, search_params)
+        assert resp.status_code == 200
+
+        data = resp.data['results']
+        assert len(data) == 1
+        assert data[0]['project'] == 'kuma'
+
+    def test_doc_search_filter_by_version(self, api_client, project):
+        """Test Doc search result are filtered according to version"""
+        query = get_search_query_from_project_file(project_slug=project.slug)
+        latest_version = project.versions.all()[0]
+        # Create another version
+        dummy_version = G(Version, project=project)
+        # Create HTMLFile same as the latest version
+        latest_version_files = HTMLFile.objects.all().filter(version=latest_version)
+        for f in latest_version_files:
+            f.version = dummy_version
+            # Make primary key to None, so django will create new object
+            f.pk = None
+            f.save()
+
+        search_params = {'query': query, 'project': project.slug, 'version': dummy_version.slug}
+        resp = api_client.get(self.url, search_params)
+        assert resp.status_code == 200
+
+        data = resp.data['results']
+        assert len(data) == 1
+        assert data[0]['project'] == project.slug
+
+    def test_doc_search_subprojects(self, api_client, all_projects):
+        """Test Document search return results from subprojects also"""
+        project = all_projects[0]
+        subproject = all_projects[1]
+        version = project.versions.all()[0]
+        # Add another project as subproject of the project
+        project.add_subproject(subproject)
+
+        # Now search with subproject content but explicitly filter by the parent project
+        query = get_search_query_from_project_file(project_slug=subproject.slug)
+        search_params = {'query': query, 'project': project.slug, 'version': version.slug}
+        resp = api_client.get(self.url, search_params)
+        assert resp.status_code == 200
+
+        data = resp.data['results']
+        assert len(data) == 1
+        assert data[0]['project'] == subproject.slug

From 85b4686865c03b926c70c2e3618f6cf0c27f5101 Mon Sep 17 00:00:00 2001
From: Safwan Rahman <safwan.rahman15@gmail.com>
Date: Sun, 1 Jul 2018 01:59:35 +0600
Subject: [PATCH 5/9] adding more tests and fixup

---
 readthedocs/restapi/urls.py          |  2 --
 readthedocs/search/api.py            |  7 +++--
 readthedocs/search/faceted_search.py |  5 +++-
 readthedocs/search/tests/test_api.py | 38 ++++++++++++++++++++++++++++
 readthedocs/urls.py                  |  3 +++
 5 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/readthedocs/restapi/urls.py b/readthedocs/restapi/urls.py
index 39f9dcd7a58..0a973a39d64 100644
--- a/readthedocs/restapi/urls.py
+++ b/readthedocs/restapi/urls.py
@@ -11,7 +11,6 @@
 from readthedocs.restapi.views import (
     core_views, footer_views, search_views, task_views, integrations
 )
-from readthedocs.search.api import PageSearchAPIView
 
 from .views.model_views import (BuildViewSet, BuildCommandViewSet,
                                 ProjectViewSet, NotificationViewSet,
@@ -56,7 +55,6 @@
     url(r'search/section/$',
         search_views.section_search,
         name='api_section_search'),
-    url(r'^docsearch/$', PageSearchAPIView.as_view(), name='doc_search'),
 ]
 
 task_urls = [
diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py
index 8e72547d195..82f735b7f76 100644
--- a/readthedocs/search/api.py
+++ b/readthedocs/search/api.py
@@ -28,13 +28,12 @@ def get_queryset(self):
         return queryset
 
     def validate_query_params(self):
-        required_query_params = set(['query', 'project', 'version'])
+        required_query_params = {'query', 'project', 'version'}  # python `set` literal is `{}`
         request_params = set(self.request.query_params.keys())
         missing_params = required_query_params - request_params
         if missing_params:
-            errors = []
+            errors = {}
             for param in missing_params:
-                e = {param: "This query param is required"}
-                errors.append(e)
+                errors[param] = ["This query param is required"]
 
             raise ValidationError(errors)
diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py
index b483c8c5ea9..15819b0b642 100644
--- a/readthedocs/search/faceted_search.py
+++ b/readthedocs/search/faceted_search.py
@@ -33,7 +33,10 @@ class FileSearch(RTDFacetedSearch):
     }
 
     def query(self, search, query):
-        """Add query part to ``search``"""
+        """Add query part to ``search``
+
+        Overriding because we pass ES Query object instead of string
+        """
         if query:
             search = search.query(query)
 
diff --git a/readthedocs/search/tests/test_api.py b/readthedocs/search/tests/test_api.py
index f02ab7b9e78..ca951b509b5 100644
--- a/readthedocs/search/tests/test_api.py
+++ b/readthedocs/search/tests/test_api.py
@@ -69,6 +69,44 @@ def test_doc_search_filter_by_version(self, api_client, project):
         assert len(data) == 1
         assert data[0]['project'] == project.slug
 
+    def test_doc_search_pagination(self, api_client, project):
+        """Test Doc search result can be paginated"""
+        latest_version = project.versions.all()[0]
+        html_file = HTMLFile.objects.filter(version=latest_version)[0]
+        title = html_file.processed_json['title']
+        query = title.split()[0]
+
+        # Create 15 more same html file
+        for _ in range(15):
+            # Make primary key to None, so django will create new object
+            html_file.pk = None
+            html_file.save()
+
+        search_params = {'query': query, 'project': project.slug, 'version': latest_version.slug}
+        resp = api_client.get(self.url, search_params)
+        assert resp.status_code == 200
+
+        # Check the count is 16 (1 existing and 15 new created)
+        assert resp.data['count'] == 16
+        # Check there are next url
+        assert resp.data['next'] is not None
+        # There should be only 10 data as the pagination is 10 by default
+        assert len(resp.data['results']) == 10
+
+        # Add `page_size` parameter and check the data is paginated accordingly
+        search_params['page_size'] = 5
+        resp = api_client.get(self.url, search_params)
+        assert resp.status_code == 200
+
+        assert len(resp.data['results']) == 5
+
+    def test_doc_search_without_parameters(self, api_client, project):
+        """Hitting Document Search endpoint without query parameters should return error"""
+        resp = api_client.get(self.url)
+        assert resp.status_code == 400
+        # Check error message is there
+        assert sorted(['query', 'project', 'version']) == sorted(resp.data.keys())
+
     def test_doc_search_subprojects(self, api_client, all_projects):
         """Test Document search return results from subprojects also"""
         project = all_projects[0]
diff --git a/readthedocs/urls.py b/readthedocs/urls.py
index d6dbc593fdf..9a50b9b7619 100644
--- a/readthedocs/urls.py
+++ b/readthedocs/urls.py
@@ -22,6 +22,7 @@
     do_not_track,
 )
 from readthedocs.search import views as search_views
+from readthedocs.search.api import PageSearchAPIView
 
 v1_api = Api(api_name='v1')
 v1_api.register(UserResource())
@@ -65,6 +66,8 @@
 api_urls = [
     url(r'^api/', include(v1_api.urls)),
     url(r'^api/v2/', include('readthedocs.restapi.urls')),
+    # Keep the `doc_search` at root level, so the test does not fail for other API
+    url(r'^api/v2/docsearch/$', PageSearchAPIView.as_view(), name='doc_search'),
     url(r'^api-auth/', include('rest_framework.urls', namespace='rest_framework')),
 ]
 

From b01981fd3e614fc3b6172c95b965c9446ca9e8e5 Mon Sep 17 00:00:00 2001
From: Safwan Rahman <safwan.rahman15@gmail.com>
Date: Sun, 1 Jul 2018 02:10:57 +0600
Subject: [PATCH 6/9] fixing lint

---
 readthedocs/search/faceted_search.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py
index 15819b0b642..4a14cb8c541 100644
--- a/readthedocs/search/faceted_search.py
+++ b/readthedocs/search/faceted_search.py
@@ -33,7 +33,8 @@ class FileSearch(RTDFacetedSearch):
     }
 
     def query(self, search, query):
-        """Add query part to ``search``
+        """
+        Add query part to ``search``
 
         Overriding because we pass ES Query object instead of string
         """

From 75de7f6a6fb554bcd6438fce7f43e94b2042c899 Mon Sep 17 00:00:00 2001
From: Safwan Rahman <safwan.rahman15@gmail.com>
Date: Mon, 2 Jul 2018 23:28:58 +0600
Subject: [PATCH 7/9] fixing regex

---
 readthedocs/restapi/urls.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readthedocs/restapi/urls.py b/readthedocs/restapi/urls.py
index 0a973a39d64..7e019ba597c 100644
--- a/readthedocs/restapi/urls.py
+++ b/readthedocs/restapi/urls.py
@@ -48,7 +48,7 @@
     url(r'index_search/',
         search_views.index_search,
         name='index_search'),
-    # url(r'search/$', views.search_views.search, name='api_search'),
+    url(r'^search/$', views.search_views.search, name='api_search'),
     url(r'search/project/$',
         search_views.project_search,
         name='api_project_search'),

From e2e8cbba9a368ea9c65e880cedd441bc7c540fa4 Mon Sep 17 00:00:00 2001
From: Safwan Rahman <safwan.rahman15@gmail.com>
Date: Thu, 5 Jul 2018 03:35:21 +0600
Subject: [PATCH 8/9] Adding link to serialized data

---
 readthedocs/search/api.py            | 28 ++++++++++++++++++++++++++++
 readthedocs/search/filters.py        |  7 ++-----
 readthedocs/search/serializers.py    | 10 ++++++++++
 readthedocs/search/tests/test_api.py |  3 +++
 4 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py
index 82f735b7f76..c60dd1c040d 100644
--- a/readthedocs/search/api.py
+++ b/readthedocs/search/api.py
@@ -2,6 +2,7 @@
 from rest_framework import exceptions
 from rest_framework.exceptions import ValidationError
 
+from readthedocs.projects.models import Project
 from readthedocs.search.documents import PageDocument
 from readthedocs.search.filters import SearchFilterBackend
 from readthedocs.search.pagination import SearchPagination
@@ -37,3 +38,30 @@ def validate_query_params(self):
                 errors[param] = ["This query param is required"]
 
             raise ValidationError(errors)
+
+    def get_serializer_context(self):
+        context = super(PageSearchAPIView, self).get_serializer_context()
+        context['projects_info'] = self.get_projects_info()
+        return context
+
+    def _get_all_projects(self):
+        """Return list of project and its subprojects."""
+        project_slug = self.request.query_params.get('project')
+        queryset = Project.objects.api(self.request.user).only('slug')
+
+        project = generics.get_object_or_404(queryset, slug=project_slug)
+        subprojects = queryset.filter(superprojects__parent_id=project.id)
+
+        project_list = list(subprojects) + [project]
+        return project_list
+
+    def get_projects_info(self):
+        version_slug = self.request.query_params.get('version')
+        all_projects = self._get_all_projects()
+        projects_info = {}
+
+        for project in all_projects:
+            data = {'docs_url': project.get_docs_url(version_slug=version_slug)}
+            projects_info[project.slug] = data
+
+        return projects_info
diff --git a/readthedocs/search/filters.py b/readthedocs/search/filters.py
index 8b7dc9c1b20..9c1d3cdd560 100644
--- a/readthedocs/search/filters.py
+++ b/readthedocs/search/filters.py
@@ -1,7 +1,5 @@
 from rest_framework import filters
 
-from readthedocs.search.utils import get_project_slug_list_or_404
-
 
 class SearchFilterBackend(filters.BaseFilterBackend):
 
@@ -12,10 +10,9 @@ def filter_queryset(self, request, queryset, view):
         # ``queryset`` is actually a Elasticsearch DSL ``Search`` object.
         # So change the variable name
         es_search = queryset
-        project_slug = request.query_params.get('project')
         version_slug = request.query_params.get('version')
-        project_slug_list = get_project_slug_list_or_404(project_slug=project_slug,
-                                                         user=request.user)
+        projects_info = view.get_projects_info()
+        project_slug_list = projects_info.keys()
         # Elasticsearch ``terms`` query can take multiple values as list,
         # while ``term`` query takes single value.
         filtered_es_search = (es_search.filter('terms', project=project_slug_list)
diff --git a/readthedocs/search/serializers.py b/readthedocs/search/serializers.py
index dd3ad346eef..efbd9d510de 100644
--- a/readthedocs/search/serializers.py
+++ b/readthedocs/search/serializers.py
@@ -1,13 +1,23 @@
 from rest_framework import serializers
 
+from readthedocs.projects.models import Project
+
 
 class PageSearchSerializer(serializers.Serializer):
     project = serializers.CharField()
     version = serializers.CharField()
     title = serializers.CharField()
     path = serializers.CharField()
+    link = serializers.SerializerMethodField()
     highlight = serializers.SerializerMethodField()
 
+    def get_link(self, obj):
+        projects_info = self.context.get('projects_info')
+        if projects_info:
+            project_data = projects_info[obj.project]
+            docs_url = project_data['docs_url']
+            return docs_url + obj.path
+
     def get_highlight(self, obj):
         highlight = getattr(obj.meta, 'highlight', None)
         if highlight:
diff --git a/readthedocs/search/tests/test_api.py b/readthedocs/search/tests/test_api.py
index ca951b509b5..23fc879cf2b 100644
--- a/readthedocs/search/tests/test_api.py
+++ b/readthedocs/search/tests/test_api.py
@@ -124,3 +124,6 @@ def test_doc_search_subprojects(self, api_client, all_projects):
         data = resp.data['results']
         assert len(data) == 1
         assert data[0]['project'] == subproject.slug
+        # Check the link is the subproject document link
+        document_link = subproject.get_docs_url(version_slug=version.slug)
+        assert document_link in data[0]['link']

From 733b0309d5a7d733434e7921f553823793ef95ca Mon Sep 17 00:00:00 2001
From: Safwan Rahman <safwan.rahman15@gmail.com>
Date: Thu, 5 Jul 2018 19:50:02 +0600
Subject: [PATCH 9/9] fixing python3 compatibility

---
 readthedocs/search/api.py         | 26 ++++++++------------------
 readthedocs/search/filters.py     |  4 ++--
 readthedocs/search/serializers.py |  7 +++----
 readthedocs/search/utils.py       | 17 ++++++-----------
 readthedocs/search/views.py       |  7 ++++---
 5 files changed, 23 insertions(+), 38 deletions(-)

diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py
index c60dd1c040d..2602bdabcf4 100644
--- a/readthedocs/search/api.py
+++ b/readthedocs/search/api.py
@@ -7,6 +7,7 @@
 from readthedocs.search.filters import SearchFilterBackend
 from readthedocs.search.pagination import SearchPagination
 from readthedocs.search.serializers import PageSearchSerializer
+from readthedocs.search.utils import get_project_list_or_404
 
 
 class PageSearchAPIView(generics.ListAPIView):
@@ -41,27 +42,16 @@ def validate_query_params(self):
 
     def get_serializer_context(self):
         context = super(PageSearchAPIView, self).get_serializer_context()
-        context['projects_info'] = self.get_projects_info()
+        context['projects_url'] = self.get_all_projects_url()
         return context
 
-    def _get_all_projects(self):
-        """Return list of project and its subprojects."""
-        project_slug = self.request.query_params.get('project')
-        queryset = Project.objects.api(self.request.user).only('slug')
-
-        project = generics.get_object_or_404(queryset, slug=project_slug)
-        subprojects = queryset.filter(superprojects__parent_id=project.id)
-
-        project_list = list(subprojects) + [project]
-        return project_list
-
-    def get_projects_info(self):
+    def get_all_projects_url(self):
         version_slug = self.request.query_params.get('version')
-        all_projects = self._get_all_projects()
-        projects_info = {}
+        project_slug = self.request.query_params.get('project')
+        all_projects = get_project_list_or_404(project_slug=project_slug, user=self.request.user)
+        projects_url = {}
 
         for project in all_projects:
-            data = {'docs_url': project.get_docs_url(version_slug=version_slug)}
-            projects_info[project.slug] = data
+            projects_url[project.slug] = project.get_docs_url(version_slug=version_slug)
 
-        return projects_info
+        return projects_url
diff --git a/readthedocs/search/filters.py b/readthedocs/search/filters.py
index 9c1d3cdd560..ba0154def93 100644
--- a/readthedocs/search/filters.py
+++ b/readthedocs/search/filters.py
@@ -11,8 +11,8 @@ def filter_queryset(self, request, queryset, view):
         # So change the variable name
         es_search = queryset
         version_slug = request.query_params.get('version')
-        projects_info = view.get_projects_info()
-        project_slug_list = projects_info.keys()
+        projects_info = view.get_all_projects_url()
+        project_slug_list = list(projects_info.keys())
         # Elasticsearch ``terms`` query can take multiple values as list,
         # while ``term`` query takes single value.
         filtered_es_search = (es_search.filter('terms', project=project_slug_list)
diff --git a/readthedocs/search/serializers.py b/readthedocs/search/serializers.py
index efbd9d510de..7aa8f01c93f 100644
--- a/readthedocs/search/serializers.py
+++ b/readthedocs/search/serializers.py
@@ -12,10 +12,9 @@ class PageSearchSerializer(serializers.Serializer):
     highlight = serializers.SerializerMethodField()
 
     def get_link(self, obj):
-        projects_info = self.context.get('projects_info')
-        if projects_info:
-            project_data = projects_info[obj.project]
-            docs_url = project_data['docs_url']
+        projects_url = self.context.get('projects_url')
+        if projects_url:
+            docs_url = projects_url[obj.project]
             return docs_url + obj.path
 
     def get_highlight(self, obj):
diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py
index 09ca5c0f1f6..659effb1544 100644
--- a/readthedocs/search/utils.py
+++ b/readthedocs/search/utils.py
@@ -312,17 +312,12 @@ def parse_sections(documentation_type, content):
 
 # TODO: Rewrite all the views using this in Class Based View,
 # and move this function to a mixin
-def get_project_slug_list_or_404(project_slug, user):
-    """
-    Return list of subproject's slug including own slug.
-
-    If the project is not available to user, redirect to 404
-    """
+def get_project_list_or_404(project_slug, user):
+    """Return list of project and its subprojects."""
     queryset = Project.objects.api(user).only('slug')
-    project = get_object_or_404(queryset, slug=project_slug)
 
-    subprojects_slug = (queryset.filter(superprojects__parent_id=project.id)
-                        .values_list('slug', flat=True))
+    project = get_object_or_404(queryset, slug=project_slug)
+    subprojects = queryset.filter(superprojects__parent_id=project.id)
 
-    slug_list = [project.slug] + list(subprojects_slug)
-    return slug_list
+    project_list = list(subprojects) + [project]
+    return project_list
diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py
index e7989023109..062ee1fed50 100644
--- a/readthedocs/search/views.py
+++ b/readthedocs/search/views.py
@@ -14,7 +14,7 @@
 from readthedocs.projects.models import Project
 from readthedocs.search import lib as search_lib
 from readthedocs.search.documents import ProjectDocument, PageDocument
-from readthedocs.search.utils import get_project_slug_list_or_404
+from readthedocs.search.utils import get_project_list_or_404
 
 log = logging.getLogger(__name__)
 LOG_TEMPLATE = u'(Elastic Search) [{user}:{type}] [{project}:{version}:{language}] {msg}'
@@ -55,8 +55,9 @@ def elastic_search(request):
         elif user_input.type == 'file':
             kwargs = {}
             if user_input.project:
-                project_slug_list = get_project_slug_list_or_404(project_slug=user_input.project,
-                                                                 user=request.user)
+                projects_list = get_project_list_or_404(project_slug=user_input.project,
+                                                        user=request.user)
+                project_slug_list = [project.slug for project in projects_list]
                 kwargs['projects_list'] = project_slug_list
             if user_input.version:
                 kwargs['versions_list'] = user_input.version