Skip to content

Commit

Permalink
Merge pull request readthedocs#4292 from safwanrahman/exact_match
Browse files Browse the repository at this point in the history
[Fix readthedocs#2457] Implement exact match search
  • Loading branch information
ericholscher authored and safwanrahman committed Jul 16, 2018
2 parents 24f1404 + 4ac0993 commit 38143e3
Show file tree
Hide file tree
Showing 7 changed files with 87 additions and 5 deletions.
19 changes: 19 additions & 0 deletions readthedocs/search/faceted_search.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from elasticsearch_dsl import FacetedSearch, TermsFacet
from elasticsearch_dsl.query import SimpleQueryString, Bool


class RTDFacetedSearch(FacetedSearch):
Expand Down Expand Up @@ -29,3 +30,21 @@ class FileSearch(RTDFacetedSearch):
'project': TermsFacet(field='project'),
'version': TermsFacet(field='version')
}

def query(self, search, query):
"""Add query part to ``search``"""
if query:
all_queries = []

# Need to search for both 'AND' and 'OR' operations
# The score of AND should be higher as it comes first
for operator in ['AND', 'OR']:
query_string = SimpleQueryString(query=query, fields=self.fields,
default_operator=operator)
all_queries.append(query_string)

# Run bool query with should, so it returns result where either of the query matches
bool_query = Bool(should=all_queries)
search = search.query(bool_query)

return search
2 changes: 1 addition & 1 deletion readthedocs/search/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def es_index():


@pytest.fixture(autouse=True)
def all_projects(es_index, mock_processed_json):
def all_projects(es_index, mock_processed_json, db):
projects_list = []
for project_slug in ALL_PROJECTS:
project = G(Project, slug=project_slug, name=project_slug)
Expand Down
3 changes: 1 addition & 2 deletions readthedocs/search/tests/data/docs/story.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
{
"content": "ReadtheDocsPhilosophy\nRead the Docs is Open Source software. We have licensed the code base as MIT, which provides almost no restrictions on the use of the code.\nHowever, as a project there are things that we care about more than others. We built Read the Docs to support in the Open Source community. The code is open for people to contribute to, so that they may build features into https://readthedocs.org that they want. We also believe sharing the code openly is a valuable learning tool, especially for demonsrating how to collaborate and maintain an enormous website.\nOfficial Support\nThe time of the core developers of Read the Docs is limited. We provide official support for the following things:\nLocal development on the Python code base\nUsage of https://readthedocs.org for Open Source projects\nBug fixes in the code base, as it applies to running it on https://readthedocs.org\nUnsupported\nThere are use cases that we don\u2019t support, because it doesn\u2019t further our goal of promoting in the Open Source Community.\nWe do not support:\nSpecific usage of Sphinx and Mkdocs, that don\u2019t affect our hosting\nCustom s of Read the Docs at your company\n of Read the Docs on other platforms\nAny issues outside of the Read the Docs Python Code\nRationale\nRead the Docs was founded to improve in the Open Source Community. We fully recognize and allow the code to be used for internal installs at companies, but we will not spend our time supporting it. Our time is limited, and we want to spend it on the mission that we set out to originally support.\nIf you feel strongly about installing Read the Docs internal to a company, we will happily link to third party resources on this topic. Please open an issue with a proposal if you want to take on this task.",
"content": "ReadtheDocsPhilosophy\nRead the Docs is Open Source software. We have licensed the code base as MIT, which provides almost no restrictions on the use of the code.\nHowever, as a project there are things that we care about more than others. We built Read the Docs to support in the Open Source community. The code is open for people to contribute to, so that they may build features into https://readthedocs.org that they want. We also believe sharing the code openly is a valuable learning tool, especially for demonsrating how to collaborate and maintain an enormous website.\nOfficial website Support\nThe time of the core developers of Read the Docs is limited. We provide official developers support for the following things:\nLocal development on the Python code base\nUsage of https://readthedocs.org for Open Source projects\nBug fixes in the code base, as it applies to running it on https://readthedocs.org\nUnsupported\nThere are use cases that we don\u2019t support, because it doesn\u2019t further our goal of promoting in the Open Source Community.\nWe do not support:\nSpecific usage of Sphinx and Mkdocs, that don\u2019t affect our hosting\nCustom s of Read the Docs at your company\n of Read the Docs on other platforms\nAny issues outside of the Read the Docs Python Code\nRationale\nRead the Docs was founded to improve in the Open Source Community. We fully recognize and allow the code to be used for internal installs at companies, but we will not spend our time supporting it. Our time is limited, and we want to spend it on the mission that we set out to originally support.\nIf you feel strongly about installing Read the Docs internal to a company, we will happily link to third party resources on this topic. Please open an issue with a proposal if you want to take on this task.",
"headers": [
"Official Support",
"Unsupported",
"Rationale"
],
Expand Down
2 changes: 1 addition & 1 deletion readthedocs/search/tests/data/pipeline/installation.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"content": "PipelineInstallation Either check out Pipeline from GitHub or to pull a release off PyPI\npip install django-pipeline\nAdd \u2018pipeline\u2019 to your INSTALLED_APPS\nINSTALLED_APPS = ( 'pipeline', )\nUse a pipeline storage for STATICFILES_STORAGE\nSTATICFILES_STORAGE = 'pipeline.storage.PipelineCachedStorage'\nAdd the PipelineFinder to STATICFILES_FINDERS\nSTATICFILES_FINDERS = ( 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 'pipeline.finders.PipelineFinder', )\nNote\nYou need to use Django>=1.7 to be able to use this version of pipeline.\nUpgrading from 1.3\nTo upgrade from pipeline 1.3, you will need to follow these steps:\nUpdate templates to use the new syntax\n{# pipeline<1.4 #} {% load compressed %} {% compressed_js 'group' %} {% compressed_css 'group' %}\n{# pipeline>=1.4 #} {% load pipeline %} {% javascript 'group' %} {% stylesheet 'group' %}\nAdd the PipelineFinder to STATICFILES_FINDERS\nSTATICFILES_FINDERS = ( 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 'pipeline.finders.PipelineFinder', )\nUpgrading from 1.5\nTo upgrade from pipeline 1.5, you will need update all your PIPELINE_* settings and move them under the new PIPELINE setting. See Configuration.\nRecommendations\nPipeline\u2019s default CSS and JS compressor is Yuglify. Yuglify wraps UglifyJS and cssmin, applying the default YUI configurations to them. It can be downloaded from: https://github.com/yui/yuglify/.\nIf you do not install yuglify, make sure to disable the compressor in your settings.",
"content": "PipelineInstallation Official Either check out Pipeline from GitHub or to pull a release off PyPI\npip install django-pipeline\nAdd \u2018pipeline\u2019 to your INSTALLED_APPS\nINSTALLED_APPS = ( 'pipeline', )\nUse a pipeline storage for STATICFILES_STORAGE\nSTATICFILES_STORAGE = 'pipeline.storage.PipelineCachedStorage'\nAdd the PipelineFinder to STATICFILES_FINDERS\nSTATICFILES_FINDERS = ( 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 'pipeline.finders.PipelineFinder', )\nNote\nYou need to use Django>=1.7 to be able to use this version of pipeline.\nUpgrading from 1.3\nTo upgrade from pipeline 1.3, you will need to follow these steps:\nUpdate templates to use the new syntax\n{# pipeline<1.4 #} {% load compressed %} {% compressed_js 'group' %} {% compressed_css 'group' %}\n{# pipeline>=1.4 #} {% load pipeline %} {% javascript 'group' %} {% stylesheet 'group' %}\nAdd the PipelineFinder to STATICFILES_FINDERS\nSTATICFILES_FINDERS = ( 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 'pipeline.finders.PipelineFinder', )\nUpgrading from 1.5\nTo upgrade from pipeline 1.5, you will need update all your PIPELINE_* settings and move them under the new PIPELINE setting. See Configuration.\nRecommendations\nPipeline\u2019s default CSS and JS compressor is Yuglify. Yuglify wraps UglifyJS and cssmin, applying the default YUI configurations to them. It can be downloaded from: https://github.com/yui/yuglify/.\nIf you do not install yuglify, make sure to disable the compressor in your settings.",
"headers": [
"Installation",
"Upgrading from 1.3",
Expand Down
48 changes: 48 additions & 0 deletions readthedocs/search/tests/test_faceted_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import pytest

from readthedocs.search.documents import PageDocument


class TestFileSearch(object):

@pytest.mark.parametrize('case', ['upper', 'lower', 'title'])
def test_search_exact_match(self, client, project, case):
"""Check quoted query match exact phrase with case insensitively
Making a query with quoted text like ``"foo bar"`` should match
exactly ``foo bar`` or ``Foo Bar`` etc
"""
# `Github` word is present both in `kuma` and `pipeline` files
# But the phrase Github can is available only in kuma docs.
# So search with this phrase to check
query_text = r'"GitHub can"'
cased_query = getattr(query_text, case)
query = cased_query()

page_search = PageDocument.faceted_search(query=query)
results = page_search.execute()

assert len(results) == 1
assert results[0]['project'] == 'kuma'
assert results[0]['path'] == 'documentation'

def test_search_combined_result(self, client, project):
"""Check search result are combined of both `AND` and `OR` operator
If query is `Foo Bar` then the result should be as following order:
- Where both `Foo Bar` is present
- Where `Foo` or `Bar` is present
"""
query = 'Official Support'
page_search = PageDocument.faceted_search(query=query)
results = page_search.execute()
assert len(results) == 3

result_paths = [r.path for r in results]
# ``open-source-philosophy`` page has both ``Official Support`` words
# ``docker`` page has ``Support`` word
# ``installation`` page has ``Official`` word
expected_paths = ['open-source-philosophy', 'docker', 'installation']

assert result_paths == expected_paths
17 changes: 17 additions & 0 deletions readthedocs/search/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,23 @@ def test_file_search_case_insensitive(self, client, project, case):
# Check the actual text is in the result, not the cased one
assert query_text in result.text()

def test_file_search_exact_match(self, client, project):
"""Check quoted query match exact phrase
Making a query with quoted text like ``"foo bar"`` should match
exactly ``foo bar`` phrase.
"""

# `Github` word is present both in `kuma` and `pipeline` files
# But the phrase Github can is available only in kuma docs.
# So search with this phrase to check
query = r'"GitHub can"'

result, _ = self._get_search_result(url=self.url, client=client,
search_params={'q': query, 'type': 'file'})

assert len(result) == 1

def test_page_search_not_return_removed_page(self, client, project):
"""Check removed page are not in the search index"""
query = get_search_query_from_project_file(project_slug=project.slug)
Expand Down
1 change: 0 additions & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ setenv =
DJANGO_SETTINGS_MODULE=readthedocs.settings.test
LANG=C
LC_CTYPE=C.UTF-8
DJANGO_SETTINGS_SKIP_LOCAL=True
deps = -r{toxinidir}/requirements/testing.txt
changedir = {toxinidir}/readthedocs
commands =
Expand Down

0 comments on commit 38143e3

Please sign in to comment.