Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Mkdocs search #6937

Merged
merged 18 commits into from
Apr 29, 2020
Merged
6 changes: 6 additions & 0 deletions readthedocs/builds/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@
MEDIA_TYPES,
PRIVACY_CHOICES,
SPHINX,
SPHINX_HTMLDIR,
SPHINX_SINGLEHTML,
)
from readthedocs.projects.models import APIProject, Project
from readthedocs.projects.version_handling import determine_stable_version
Expand Down Expand Up @@ -361,6 +363,10 @@ def supports_wipe(self):
"""Return True if version is not external."""
return not self.type == EXTERNAL

@property
def is_sphinx_type(self):
return self.documentation_type in {SPHINX, SPHINX_HTMLDIR, SPHINX_SINGLEHTML}

def get_subdomain_url(self):
external = self.type == EXTERNAL
return self.project.get_docs_url(
Expand Down
134 changes: 131 additions & 3 deletions readthedocs/core/static-src/core/js/doc-embed/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ function append_html_to_contents(contents, template, data) {
* Sphinx indexer. This will fall back to the standard indexer on an API
* failure,
*/
function attach_elastic_search_query(data) {
function attach_elastic_search_query_sphinx(data) {
var project = data.project;
var version = data.version;
var language = data.language || 'en';
Expand All @@ -56,7 +56,6 @@ function attach_elastic_search_query(data) {
search_def
.then(function (data) {
var hit_list = data.results || [];
var total_count = data.count || 0;

if (hit_list.length) {
for (var i = 0; i < hit_list.length; i += 1) {
Expand Down Expand Up @@ -288,9 +287,138 @@ function attach_elastic_search_query(data) {
}


function attach_elastic_search_query_mkdocs(data) {
var project = data.project;
var version = data.version;
var language = data.language || 'en';

var fallbackSearch = function () {
if (typeof window.doSearchFallback !== 'undefined') {
window.doSearchFallback();
} else {
console.log('Unable to fallback to original MkDocs search.');
}
};

var doSearch = function () {
var query = document.getElementById('mkdocs-search-query').value;

var search_def = $.Deferred();

var search_url = document.createElement('a');
search_url.href = data.proxied_api_host + '/api/v2/docsearch/';
search_url.search = '?q=' + encodeURIComponent(query) + '&project=' + project +
'&version=' + version + '&language=' + language;

search_def
.then(function (data) {
var hit_list = data.results || [];

if (hit_list.length) {
var searchResults = $('#mkdocs-search-results');
searchResults.empty();

for (var i = 0; i < hit_list.length; i += 1) {
var doc = hit_list[i];
var inner_hits = doc.inner_hits || [];

var result = $('<article>');
result.append(
$('<h3>').append($('<a>', {'href': doc.link, 'text': doc.title}))
);

if (doc.project !== project) {
var text = '(from project ' + doc.project + ')';
result.append($('<span>', {'text': text}));
}

for (var j = 0; j < inner_hits.length; j += 1) {
var section = inner_hits[j];

if (section.type === 'sections') {
var section_link = doc.link + '#' + section._source.id;
var section_title = section._source.title;
var section_content = section._source.content.substr(0, MAX_SUBSTRING_LIMIT) + " ...";

result.append(
$('<h4>').append($('<a>', {'href': section_link, 'text': section_title}))
);
result.append(
$('<p>', {'text': section_content})
);
searchResults.append(result);
}
}
}
} else {
console.log('Read the Docs search returned 0 result. Falling back to MkDocs search.');
fallbackSearch();
}
})
.fail(function (error) {
console.log('Read the Docs search failed. Falling back to MkDocs search.');
fallbackSearch();
});

$.ajax({
url: search_url.href,
crossDomain: true,
xhrFields: {
withCredentials: true,
},
complete: function (resp, status_code) {
if (
status_code !== 'success' ||
typeof (resp.responseJSON) === 'undefined' ||
resp.responseJSON.count === 0
) {
return search_def.reject();
}
return search_def.resolve(resp.responseJSON);
}
})
.fail(function (resp, status_code, error) {
return search_def.reject();
});
};

var initSearch = function () {
var search_input = document.getElementById('mkdocs-search-query');
if (search_input) {
search_input.addEventListener('keyup', doSearch);
}

var term = window.getSearchTermFromLocation();
if (term) {
search_input.value = term;
doSearch();
}
};

$(document).ready(function () {
// We can't override the search completely,
// because we can't delete the original event listener,
// and MkDocs includes its search functions after ours.
// If MkDocs is loaded before, this will trigger a double search
// (but ours will have precendece).

// Note: this function is only available on Mkdocs >=1.x
window.doSearchFallback = window.doSearch;

window.doSearch = doSearch;
window.initSearch = initSearch;
initSearch();
});
}


function init() {
var data = rtddata.get();
attach_elastic_search_query(data);
if (data.builder === 'mkdocs') {
attach_elastic_search_query_mkdocs(data);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This scares me a little bit, overwriting the users search content. Can we ship the indexing code and let it settle in before we try overriding the actual results for users?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I can revert the js change and put it in another PR respecting the "skip search" flag.

} else {
attach_elastic_search_query_sphinx(data);
}
}

module.exports = {
Expand Down
2 changes: 1 addition & 1 deletion readthedocs/core/static/core/js/readthedocs-doc-embed.js

Large diffs are not rendered by default.

27 changes: 26 additions & 1 deletion readthedocs/doc_builder/backends/mkdocs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@
import json
import logging
import os
import shutil
from pathlib import Path

import yaml
from django.conf import settings
from django.template import loader as template_loader
from readthedocs.projects.constants import MKDOCS_HTML, MKDOCS

from readthedocs.doc_builder.base import BaseBuilder
from readthedocs.doc_builder.exceptions import MkDocsYAMLParseError
from readthedocs.projects.constants import MKDOCS, MKDOCS_HTML
from readthedocs.projects.models import Feature


Expand Down Expand Up @@ -314,10 +316,33 @@ def get_theme_name(self, mkdocs_config):


class MkdocsHTML(BaseMkdocs):

type = 'mkdocs'
builder = 'build'
build_dir = '_build/html'

def move(self, **__):
super().move()
# Copy json search index to its own directory
json_file = (Path(self.old_artifact_path) / 'search/search_index.json').resolve()
json_path_target = Path(
self.project.artifact_path(
version=self.version.slug,
type_='mkdocs_search',
)
)
if json_file.exists():
if json_path_target.exists():
shutil.rmtree(json_path_target)
json_path_target.mkdir(parents=True, exist_ok=True)
log.info('Copying json on the local filesystem')
shutil.copy(
json_file,
json_path_target,
)
else:
log.warning('Not moving json because the build dir is unknown.',)


class MkdocsJSON(BaseMkdocs):
type = 'mkdocs_json'
Expand Down
50 changes: 48 additions & 2 deletions readthedocs/projects/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
validate_repository_url,
)
from readthedocs.projects.version_handling import determine_stable_version
from readthedocs.search.parse_json import process_file
from readthedocs.search.parse_json import process_file, process_mkdocs_index_file
from readthedocs.vcs_support.backends import backend_cls
from readthedocs.vcs_support.utils import Lock, NonBlockingLock

Expand Down Expand Up @@ -1329,7 +1329,7 @@ class Meta:

objects = HTMLFileManager.from_queryset(HTMLFileQuerySet)()

def get_processed_json(self):
def get_processed_json_sphinx(self):
"""
Get the parsed JSON for search indexing.

Expand Down Expand Up @@ -1373,6 +1373,52 @@ def get_processed_json(self):
'domain_data': {},
}

def get_processed_json_mkdocs(self):
log.debug('Processing mkdocs index')
storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()
storage_path = self.project.get_storage_path(
type_='json', version_slug=self.version.slug, include_file=False
stsewd marked this conversation as resolved.
Show resolved Hide resolved
)
try:
file_path = storage.join(storage_path, 'search_index.json')
if storage.exists(file_path):
index_data = process_mkdocs_index_file(file_path, page=self.path)
if index_data:
return index_data
except Exception:
log.warning(
'Unhandled exception during search processing file: %s',
file_path,
)
return {
'path': self.path,
'title': '',
'sections': [],
'domain_data': {},
}

def get_processed_json(self):
"""
Get the parsed JSON for search indexing.

Returns a dictionary with the following structure.
{
'path': 'file path',
'title': 'Title',
'sections': [
{
'id': 'section-anchor',
'title': 'Section title',
'content': 'Section content',
},
],
'domain_data': {},
}
"""
if self.version.is_sphinx_type:
return self.get_processed_json_sphinx()
return self.get_processed_json_mkdocs()

@cached_property
def processed_json(self):
return self.get_processed_json()
Expand Down
16 changes: 10 additions & 6 deletions readthedocs/projects/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -989,7 +989,10 @@ def store_build_artifacts(

# Search media (JSON)
if search:
types_to_copy.append(('json', 'sphinx_search'))
if self.config.doctype == MKDOCS:
types_to_copy.append(('json', 'mkdocs_search'))
else:
types_to_copy.append(('json', 'sphinx_search'))

if localmedia:
types_to_copy.append(('htmlzip', 'sphinx_localmedia'))
Expand Down Expand Up @@ -1219,11 +1222,9 @@ def get_final_doctype(self):

def build_docs_search(self):
"""Build search data."""
# Search is always run in sphinx using the rtd-sphinx-extension.
# Mkdocs has no search currently.
if self.is_type_sphinx() and self.version.type != EXTERNAL:
return True
return False
# Search is always run in mkdocs,
# and in sphinx is run using the rtd-sphinx-extension.
return self.version.type != EXTERNAL

def build_docs_localmedia(self):
"""Get local media files with separate build."""
Expand Down Expand Up @@ -1577,6 +1578,9 @@ def _create_intersphinx_data(version, commit, build):
:param commit: Commit that updated path
:param build: Build id
"""
if not version.is_sphinx_type:
return

storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()

html_storage_path = version.project.get_storage_path(
Expand Down
11 changes: 5 additions & 6 deletions readthedocs/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,10 @@ class Meta:

def prepare_domains(self, html_file):
"""Prepares and returns the values for domains field."""
all_domains = []
if not html_file.version.is_sphinx_type:
return []

all_domains = []
try:
domains_qs = html_file.sphinx_domains.exclude(
domain='std',
Expand Down Expand Up @@ -172,11 +174,8 @@ def get_queryset(self):
"""Overwrite default queryset to filter certain files to index."""
queryset = super().get_queryset()

# Do not index files that belong to non sphinx project
# Also do not index certain files
queryset = queryset.internal().filter(
project__documentation_type__contains='sphinx'
)
# Do not index files from external versions
queryset = queryset.internal().all()

# TODO: Make this smarter
# This was causing issues excluding some valid user documentation pages
Expand Down
Loading