forked from readthedocs/readthedocs.org
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request readthedocs#4309 from safwanrahman/search_api
[fix readthedocs#4265] Port Document search API for Elasticsearch 6.x
- Loading branch information
Showing
12 changed files
with
310 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
from rest_framework import generics | ||
from rest_framework import exceptions | ||
from rest_framework.exceptions import ValidationError | ||
|
||
from readthedocs.projects.models import Project | ||
from readthedocs.search.documents import PageDocument | ||
from readthedocs.search.filters import SearchFilterBackend | ||
from readthedocs.search.pagination import SearchPagination | ||
from readthedocs.search.serializers import PageSearchSerializer | ||
from readthedocs.search.utils import get_project_list_or_404 | ||
|
||
|
||
class PageSearchAPIView(generics.ListAPIView): | ||
pagination_class = SearchPagination | ||
filter_backends = [SearchFilterBackend] | ||
serializer_class = PageSearchSerializer | ||
|
||
def get_queryset(self): | ||
""" | ||
Return Elasticsearch DSL Search object instead of Django Queryset. | ||
Django Queryset and elasticsearch-dsl ``Search`` object is similar pattern. | ||
So for searching, its possible to return ``Search`` object instead of queryset. | ||
The ``filter_backends`` and ``pagination_class`` is compatible with ``Search`` | ||
""" | ||
# Validate all the required params are there | ||
self.validate_query_params() | ||
query = self.request.query_params.get('query', '') | ||
queryset = PageDocument.simple_search(query=query) | ||
return queryset | ||
|
||
def validate_query_params(self): | ||
required_query_params = {'query', 'project', 'version'} # python `set` literal is `{}` | ||
request_params = set(self.request.query_params.keys()) | ||
missing_params = required_query_params - request_params | ||
if missing_params: | ||
errors = {} | ||
for param in missing_params: | ||
errors[param] = ["This query param is required"] | ||
|
||
raise ValidationError(errors) | ||
|
||
def get_serializer_context(self): | ||
context = super(PageSearchAPIView, self).get_serializer_context() | ||
context['projects_url'] = self.get_all_projects_url() | ||
return context | ||
|
||
def get_all_projects_url(self): | ||
version_slug = self.request.query_params.get('version') | ||
project_slug = self.request.query_params.get('project') | ||
all_projects = get_project_list_or_404(project_slug=project_slug, user=self.request.user) | ||
projects_url = {} | ||
|
||
for project in all_projects: | ||
projects_url[project.slug] = project.get_docs_url(version_slug=version_slug) | ||
|
||
return projects_url |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
from rest_framework import filters | ||
|
||
|
||
class SearchFilterBackend(filters.BaseFilterBackend): | ||
|
||
"""Filter search result with project""" | ||
|
||
def filter_queryset(self, request, queryset, view): | ||
"""Overwrite the method to compatible with Elasticsearch DSL Search object.""" | ||
# ``queryset`` is actually a Elasticsearch DSL ``Search`` object. | ||
# So change the variable name | ||
es_search = queryset | ||
version_slug = request.query_params.get('version') | ||
projects_info = view.get_all_projects_url() | ||
project_slug_list = list(projects_info.keys()) | ||
# Elasticsearch ``terms`` query can take multiple values as list, | ||
# while ``term`` query takes single value. | ||
filtered_es_search = (es_search.filter('terms', project=project_slug_list) | ||
.filter('term', version=version_slug)) | ||
return filtered_es_search |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from rest_framework.pagination import PageNumberPagination | ||
|
||
|
||
class SearchPagination(PageNumberPagination): | ||
page_size = 10 | ||
page_size_query_param = 'page_size' | ||
max_page_size = 100 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
from rest_framework import serializers | ||
|
||
from readthedocs.projects.models import Project | ||
|
||
|
||
class PageSearchSerializer(serializers.Serializer): | ||
project = serializers.CharField() | ||
version = serializers.CharField() | ||
title = serializers.CharField() | ||
path = serializers.CharField() | ||
link = serializers.SerializerMethodField() | ||
highlight = serializers.SerializerMethodField() | ||
|
||
def get_link(self, obj): | ||
projects_url = self.context.get('projects_url') | ||
if projects_url: | ||
docs_url = projects_url[obj.project] | ||
return docs_url + obj.path | ||
|
||
def get_highlight(self, obj): | ||
highlight = getattr(obj.meta, 'highlight', None) | ||
if highlight: | ||
return highlight.to_dict() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
import pytest | ||
from django.core.urlresolvers import reverse | ||
from django_dynamic_fixture import G | ||
|
||
from readthedocs.builds.models import Version | ||
from readthedocs.projects.models import HTMLFile | ||
from readthedocs.search.tests.utils import get_search_query_from_project_file | ||
|
||
|
||
@pytest.mark.django_db | ||
@pytest.mark.search | ||
class TestDocumentSearch(object): | ||
url = reverse('doc_search') | ||
|
||
@pytest.mark.parametrize('data_type', ['content', 'headers', 'title']) | ||
@pytest.mark.parametrize('page_num', [0, 1]) | ||
def test_search_works(self, api_client, project, data_type, page_num): | ||
query = get_search_query_from_project_file(project_slug=project.slug, page_num=page_num, | ||
data_type=data_type) | ||
|
||
version = project.versions.all()[0] | ||
search_params = {'project': project.slug, 'version': version.slug, 'query': query} | ||
resp = api_client.get(self.url, search_params) | ||
assert resp.status_code == 200 | ||
|
||
data = resp.data['results'] | ||
assert len(data) == 1 | ||
project_data = data[0] | ||
assert project_data['project'] == project.slug | ||
|
||
# Check highlight return correct object | ||
all_highlights = project_data['highlight'][data_type] | ||
for highlight in all_highlights: | ||
# Make it lower because our search is case insensitive | ||
assert query.lower() in highlight.lower() | ||
|
||
def test_doc_search_filter_by_project(self, api_client): | ||
"""Test Doc search result are filtered according to project""" | ||
|
||
# `Github` word is present both in `kuma` and `pipeline` files | ||
# so search with this phrase but filter through `kuma` project | ||
search_params = {'query': 'GitHub', 'project': 'kuma', 'version': 'latest'} | ||
resp = api_client.get(self.url, search_params) | ||
assert resp.status_code == 200 | ||
|
||
data = resp.data['results'] | ||
assert len(data) == 1 | ||
assert data[0]['project'] == 'kuma' | ||
|
||
def test_doc_search_filter_by_version(self, api_client, project): | ||
"""Test Doc search result are filtered according to version""" | ||
query = get_search_query_from_project_file(project_slug=project.slug) | ||
latest_version = project.versions.all()[0] | ||
# Create another version | ||
dummy_version = G(Version, project=project) | ||
# Create HTMLFile same as the latest version | ||
latest_version_files = HTMLFile.objects.all().filter(version=latest_version) | ||
for f in latest_version_files: | ||
f.version = dummy_version | ||
# Make primary key to None, so django will create new object | ||
f.pk = None | ||
f.save() | ||
|
||
search_params = {'query': query, 'project': project.slug, 'version': dummy_version.slug} | ||
resp = api_client.get(self.url, search_params) | ||
assert resp.status_code == 200 | ||
|
||
data = resp.data['results'] | ||
assert len(data) == 1 | ||
assert data[0]['project'] == project.slug | ||
|
||
def test_doc_search_pagination(self, api_client, project): | ||
"""Test Doc search result can be paginated""" | ||
latest_version = project.versions.all()[0] | ||
html_file = HTMLFile.objects.filter(version=latest_version)[0] | ||
title = html_file.processed_json['title'] | ||
query = title.split()[0] | ||
|
||
# Create 15 more same html file | ||
for _ in range(15): | ||
# Make primary key to None, so django will create new object | ||
html_file.pk = None | ||
html_file.save() | ||
|
||
search_params = {'query': query, 'project': project.slug, 'version': latest_version.slug} | ||
resp = api_client.get(self.url, search_params) | ||
assert resp.status_code == 200 | ||
|
||
# Check the count is 16 (1 existing and 15 new created) | ||
assert resp.data['count'] == 16 | ||
# Check there are next url | ||
assert resp.data['next'] is not None | ||
# There should be only 10 data as the pagination is 10 by default | ||
assert len(resp.data['results']) == 10 | ||
|
||
# Add `page_size` parameter and check the data is paginated accordingly | ||
search_params['page_size'] = 5 | ||
resp = api_client.get(self.url, search_params) | ||
assert resp.status_code == 200 | ||
|
||
assert len(resp.data['results']) == 5 | ||
|
||
def test_doc_search_without_parameters(self, api_client, project): | ||
"""Hitting Document Search endpoint without query parameters should return error""" | ||
resp = api_client.get(self.url) | ||
assert resp.status_code == 400 | ||
# Check error message is there | ||
assert sorted(['query', 'project', 'version']) == sorted(resp.data.keys()) | ||
|
||
def test_doc_search_subprojects(self, api_client, all_projects): | ||
"""Test Document search return results from subprojects also""" | ||
project = all_projects[0] | ||
subproject = all_projects[1] | ||
version = project.versions.all()[0] | ||
# Add another project as subproject of the project | ||
project.add_subproject(subproject) | ||
|
||
# Now search with subproject content but explicitly filter by the parent project | ||
query = get_search_query_from_project_file(project_slug=subproject.slug) | ||
search_params = {'query': query, 'project': project.slug, 'version': version.slug} | ||
resp = api_client.get(self.url, search_params) | ||
assert resp.status_code == 200 | ||
|
||
data = resp.data['results'] | ||
assert len(data) == 1 | ||
assert data[0]['project'] == subproject.slug | ||
# Check the link is the subproject document link | ||
document_link = subproject.get_docs_url(version_slug=version.slug) | ||
assert document_link in data[0]['link'] |
Oops, something went wrong.