From f31aa5b0cc976642d43b917eab701dd8450a8fdb Mon Sep 17 00:00:00 2001 From: Safwan Rahman Date: Fri, 22 Jun 2018 22:12:05 +0600 Subject: [PATCH 1/3] Implement exact match search and rewrite for operator ordering --- readthedocs/search/faceted_search.py | 22 ++++++++++++++++++++++ readthedocs/search/tests/test_views.py | 17 +++++++++++++++++ tox.ini | 1 - 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py index 56a1d3dec18..95a074ef4fb 100644 --- a/readthedocs/search/faceted_search.py +++ b/readthedocs/search/faceted_search.py @@ -1,4 +1,5 @@ from elasticsearch_dsl import FacetedSearch, TermsFacet +from elasticsearch_dsl.query import SimpleQueryString, Bool class RTDFacetedSearch(FacetedSearch): @@ -29,3 +30,24 @@ class FileSearch(RTDFacetedSearch): 'project': TermsFacet(field='project'), 'version': TermsFacet(field='version') } + + def query(self, search, query): + """ + Add query part to ``search``. + """ + + if query: + all_queries = [] + + # Need to search for both 'AND' and 'OR' operations + # The score of AND should be higher as it comes first + for operator in ['AND', 'OR']: + query_string = SimpleQueryString(query=query, fields=self.fields, + default_operator=operator) + all_queries.append(query_string) + + # Run bool query with should, so it returns result where either of the query matches + bool_query = Bool(should=all_queries) + search = search.query(bool_query) + + return search diff --git a/readthedocs/search/tests/test_views.py b/readthedocs/search/tests/test_views.py index 4e0f3614543..253ad077efb 100644 --- a/readthedocs/search/tests/test_views.py +++ b/readthedocs/search/tests/test_views.py @@ -101,6 +101,23 @@ def test_file_search_case_insensitive(self, client, project, case): # Check the actual text is in the result, not the cased one assert query_text in result.text() + def test_file_search_exact_match(self, client, project): + """Check quoted query match exact phrase + + Making a query with quoted text like *"foo bar"* should match + exactly *foo bar* phrase. + """ + + # `Github` word is present both in `kuma` and `pipeline` files + # But the phrase Github can is available only in kuma docs. + # So search with this phrase to check + query = r'"GitHub can"' + + result, _ = self._get_search_result(url=self.url, client=client, + search_params={'q': query, 'type': 'file'}) + + assert len(result) == 1 + def test_page_search_not_return_removed_page(self, client, project): """Check removed page are not in the search index""" query = get_search_query_from_project_file(project_slug=project.slug) diff --git a/tox.ini b/tox.ini index c7e5b74878b..821fded07d6 100644 --- a/tox.ini +++ b/tox.ini @@ -15,7 +15,6 @@ setenv = DJANGO_SETTINGS_MODULE=readthedocs.settings.test LANG=C LC_CTYPE=C.UTF-8 - DJANGO_SETTINGS_SKIP_LOCAL=True deps = -r{toxinidir}/requirements/testing.txt changedir = {toxinidir}/readthedocs commands = From a567551cd9d6b5c1a63f38c5e351ab5bc670f80f Mon Sep 17 00:00:00 2001 From: Safwan Rahman Date: Sat, 23 Jun 2018 01:26:06 +0600 Subject: [PATCH 2/3] adding test for faceted search --- readthedocs/search/faceted_search.py | 4 +- readthedocs/search/tests/conftest.py | 2 +- readthedocs/search/tests/data/docs/story.json | 3 +- .../tests/data/pipeline/installation.json | 2 +- .../search/tests/test_faceted_search.py | 50 +++++++++++++++++++ readthedocs/search/tests/test_views.py | 4 +- 6 files changed, 56 insertions(+), 9 deletions(-) create mode 100644 readthedocs/search/tests/test_faceted_search.py diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py index 95a074ef4fb..55a38ab0b03 100644 --- a/readthedocs/search/faceted_search.py +++ b/readthedocs/search/faceted_search.py @@ -32,9 +32,7 @@ class FileSearch(RTDFacetedSearch): } def query(self, search, query): - """ - Add query part to ``search``. - """ + """Add query part to ``search``.""" if query: all_queries = [] diff --git a/readthedocs/search/tests/conftest.py b/readthedocs/search/tests/conftest.py index b6aa7799799..52c4811342e 100644 --- a/readthedocs/search/tests/conftest.py +++ b/readthedocs/search/tests/conftest.py @@ -20,7 +20,7 @@ def es_index(): @pytest.fixture(autouse=True) -def all_projects(es_index, mock_processed_json): +def all_projects(es_index, mock_processed_json, db): projects_list = [] for project_slug in ALL_PROJECTS: project = G(Project, slug=project_slug, name=project_slug) diff --git a/readthedocs/search/tests/data/docs/story.json b/readthedocs/search/tests/data/docs/story.json index 05b1b614a7b..fc191d5e0ea 100644 --- a/readthedocs/search/tests/data/docs/story.json +++ b/readthedocs/search/tests/data/docs/story.json @@ -1,7 +1,6 @@ { - "content": "ReadtheDocsPhilosophy\nRead the Docs is Open Source software. We have licensed the code base as MIT, which provides almost no restrictions on the use of the code.\nHowever, as a project there are things that we care about more than others. We built Read the Docs to support in the Open Source community. The code is open for people to contribute to, so that they may build features into https://readthedocs.org that they want. We also believe sharing the code openly is a valuable learning tool, especially for demonsrating how to collaborate and maintain an enormous website.\nOfficial Support\nThe time of the core developers of Read the Docs is limited. We provide official support for the following things:\nLocal development on the Python code base\nUsage of https://readthedocs.org for Open Source projects\nBug fixes in the code base, as it applies to running it on https://readthedocs.org\nUnsupported\nThere are use cases that we don\u2019t support, because it doesn\u2019t further our goal of promoting in the Open Source Community.\nWe do not support:\nSpecific usage of Sphinx and Mkdocs, that don\u2019t affect our hosting\nCustom s of Read the Docs at your company\n of Read the Docs on other platforms\nAny issues outside of the Read the Docs Python Code\nRationale\nRead the Docs was founded to improve in the Open Source Community. We fully recognize and allow the code to be used for internal installs at companies, but we will not spend our time supporting it. Our time is limited, and we want to spend it on the mission that we set out to originally support.\nIf you feel strongly about installing Read the Docs internal to a company, we will happily link to third party resources on this topic. Please open an issue with a proposal if you want to take on this task.", + "content": "ReadtheDocsPhilosophy\nRead the Docs is Open Source software. We have licensed the code base as MIT, which provides almost no restrictions on the use of the code.\nHowever, as a project there are things that we care about more than others. We built Read the Docs to support in the Open Source community. The code is open for people to contribute to, so that they may build features into https://readthedocs.org that they want. We also believe sharing the code openly is a valuable learning tool, especially for demonsrating how to collaborate and maintain an enormous website.\nOfficial website Support\nThe time of the core developers of Read the Docs is limited. We provide official developers support for the following things:\nLocal development on the Python code base\nUsage of https://readthedocs.org for Open Source projects\nBug fixes in the code base, as it applies to running it on https://readthedocs.org\nUnsupported\nThere are use cases that we don\u2019t support, because it doesn\u2019t further our goal of promoting in the Open Source Community.\nWe do not support:\nSpecific usage of Sphinx and Mkdocs, that don\u2019t affect our hosting\nCustom s of Read the Docs at your company\n of Read the Docs on other platforms\nAny issues outside of the Read the Docs Python Code\nRationale\nRead the Docs was founded to improve in the Open Source Community. We fully recognize and allow the code to be used for internal installs at companies, but we will not spend our time supporting it. Our time is limited, and we want to spend it on the mission that we set out to originally support.\nIf you feel strongly about installing Read the Docs internal to a company, we will happily link to third party resources on this topic. Please open an issue with a proposal if you want to take on this task.", "headers": [ - "Official Support", "Unsupported", "Rationale" ], diff --git a/readthedocs/search/tests/data/pipeline/installation.json b/readthedocs/search/tests/data/pipeline/installation.json index 37bf0170c1b..c3a1bb7a5f1 100644 --- a/readthedocs/search/tests/data/pipeline/installation.json +++ b/readthedocs/search/tests/data/pipeline/installation.json @@ -1,5 +1,5 @@ { - "content": "PipelineInstallation Either check out Pipeline from GitHub or to pull a release off PyPI\npip install django-pipeline\nAdd \u2018pipeline\u2019 to your INSTALLED_APPS\nINSTALLED_APPS = ( 'pipeline', )\nUse a pipeline storage for STATICFILES_STORAGE\nSTATICFILES_STORAGE = 'pipeline.storage.PipelineCachedStorage'\nAdd the PipelineFinder to STATICFILES_FINDERS\nSTATICFILES_FINDERS = ( 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 'pipeline.finders.PipelineFinder', )\nNote\nYou need to use Django>=1.7 to be able to use this version of pipeline.\nUpgrading from 1.3\nTo upgrade from pipeline 1.3, you will need to follow these steps:\nUpdate templates to use the new syntax\n{# pipeline<1.4 #} {% load compressed %} {% compressed_js 'group' %} {% compressed_css 'group' %}\n{# pipeline>=1.4 #} {% load pipeline %} {% javascript 'group' %} {% stylesheet 'group' %}\nAdd the PipelineFinder to STATICFILES_FINDERS\nSTATICFILES_FINDERS = ( 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 'pipeline.finders.PipelineFinder', )\nUpgrading from 1.5\nTo upgrade from pipeline 1.5, you will need update all your PIPELINE_* settings and move them under the new PIPELINE setting. See Configuration.\nRecommendations\nPipeline\u2019s default CSS and JS compressor is Yuglify. Yuglify wraps UglifyJS and cssmin, applying the default YUI configurations to them. It can be downloaded from: https://github.com/yui/yuglify/.\nIf you do not install yuglify, make sure to disable the compressor in your settings.", + "content": "PipelineInstallation Official Either check out Pipeline from GitHub or to pull a release off PyPI\npip install django-pipeline\nAdd \u2018pipeline\u2019 to your INSTALLED_APPS\nINSTALLED_APPS = ( 'pipeline', )\nUse a pipeline storage for STATICFILES_STORAGE\nSTATICFILES_STORAGE = 'pipeline.storage.PipelineCachedStorage'\nAdd the PipelineFinder to STATICFILES_FINDERS\nSTATICFILES_FINDERS = ( 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 'pipeline.finders.PipelineFinder', )\nNote\nYou need to use Django>=1.7 to be able to use this version of pipeline.\nUpgrading from 1.3\nTo upgrade from pipeline 1.3, you will need to follow these steps:\nUpdate templates to use the new syntax\n{# pipeline<1.4 #} {% load compressed %} {% compressed_js 'group' %} {% compressed_css 'group' %}\n{# pipeline>=1.4 #} {% load pipeline %} {% javascript 'group' %} {% stylesheet 'group' %}\nAdd the PipelineFinder to STATICFILES_FINDERS\nSTATICFILES_FINDERS = ( 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 'pipeline.finders.PipelineFinder', )\nUpgrading from 1.5\nTo upgrade from pipeline 1.5, you will need update all your PIPELINE_* settings and move them under the new PIPELINE setting. See Configuration.\nRecommendations\nPipeline\u2019s default CSS and JS compressor is Yuglify. Yuglify wraps UglifyJS and cssmin, applying the default YUI configurations to them. It can be downloaded from: https://github.com/yui/yuglify/.\nIf you do not install yuglify, make sure to disable the compressor in your settings.", "headers": [ "Installation", "Upgrading from 1.3", diff --git a/readthedocs/search/tests/test_faceted_search.py b/readthedocs/search/tests/test_faceted_search.py new file mode 100644 index 00000000000..7156f91abbe --- /dev/null +++ b/readthedocs/search/tests/test_faceted_search.py @@ -0,0 +1,50 @@ +import pytest + +from readthedocs.search.documents import PageDocument + + +class TestFileSearch(object): + + @pytest.mark.parametrize('case', ['upper', 'lower', 'title']) + def test_search_exact_match(self, client, project, case): + """Check quoted query match exact phrase with case insensitively + + Making a query with quoted text like ``"foo bar"`` should match + exactly ``foo bar`` or ``Foo Bar`` etc + """ + + # `Github` word is present both in `kuma` and `pipeline` files + # But the phrase Github can is available only in kuma docs. + # So search with this phrase to check + query_text = r'"GitHub can"' + cased_query = getattr(query_text, case) + query = cased_query() + + page_search = PageDocument.faceted_search(query=query) + results = page_search.execute() + + assert len(results) == 1 + assert results[0]['project'] == 'kuma' + assert results[0]['path'] == 'documentation' + + def test_search_combined_result(self, client, project): + """Check search result are combined of both `AND` and `OR` operator + + If query is `Foo Bar` then the result should be as following order: + + - Where both `Foo Bar` is present + - Where `Foo` or `Bar` is present + """ + + query = 'Official Support' + page_search = PageDocument.faceted_search(query=query) + results = page_search.execute() + assert len(results) == 3 + + result_paths = [r.path for r in results] + # ``open-source-philosophy`` page has both ``Official Support`` words + # ``docker`` page has ``Support`` word + # ``installation`` page has ``Official`` word + expected_paths = ['open-source-philosophy', 'docker', 'installation'] + + assert result_paths == expected_paths diff --git a/readthedocs/search/tests/test_views.py b/readthedocs/search/tests/test_views.py index 253ad077efb..27567b97d5b 100644 --- a/readthedocs/search/tests/test_views.py +++ b/readthedocs/search/tests/test_views.py @@ -104,8 +104,8 @@ def test_file_search_case_insensitive(self, client, project, case): def test_file_search_exact_match(self, client, project): """Check quoted query match exact phrase - Making a query with quoted text like *"foo bar"* should match - exactly *foo bar* phrase. + Making a query with quoted text like ``"foo bar"`` should match + exactly ``foo bar`` phrase. """ # `Github` word is present both in `kuma` and `pipeline` files From 4ac0993925db006513dc968d232d0543780b353f Mon Sep 17 00:00:00 2001 From: Safwan Rahman Date: Sat, 23 Jun 2018 01:34:31 +0600 Subject: [PATCH 3/3] fixing linter --- readthedocs/search/faceted_search.py | 3 +-- readthedocs/search/tests/test_faceted_search.py | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py index 55a38ab0b03..77d9f13cc76 100644 --- a/readthedocs/search/faceted_search.py +++ b/readthedocs/search/faceted_search.py @@ -32,8 +32,7 @@ class FileSearch(RTDFacetedSearch): } def query(self, search, query): - """Add query part to ``search``.""" - + """Add query part to ``search``""" if query: all_queries = [] diff --git a/readthedocs/search/tests/test_faceted_search.py b/readthedocs/search/tests/test_faceted_search.py index 7156f91abbe..52ff2e81ed8 100644 --- a/readthedocs/search/tests/test_faceted_search.py +++ b/readthedocs/search/tests/test_faceted_search.py @@ -12,7 +12,6 @@ def test_search_exact_match(self, client, project, case): Making a query with quoted text like ``"foo bar"`` should match exactly ``foo bar`` or ``Foo Bar`` etc """ - # `Github` word is present both in `kuma` and `pipeline` files # But the phrase Github can is available only in kuma docs. # So search with this phrase to check @@ -35,7 +34,6 @@ def test_search_combined_result(self, client, project): - Where both `Foo Bar` is present - Where `Foo` or `Bar` is present """ - query = 'Official Support' page_search = PageDocument.faceted_search(query=query) results = page_search.execute()