readthedocs · stsewd · Jun 18, 2019 · Jun 11, 2019 · Jun 12, 2019 · Jun 12, 2019
diff --git a/readthedocs/core/static-src/core/js/doc-embed/search.js b/readthedocs/core/static-src/core/js/doc-embed/search.js
@@ -38,10 +38,9 @@ function attach_elastic_search_query(data) {
                         var list_item = $('<li style="display: none;"></li>');
 
                         // Creating the result from elements
-                        var link = doc.link + DOCUMENTATION_OPTIONS.FILE_SUFFIX +
-                                   '?highlight=' + $.urlencode(query);
+                        var url = doc.url + '?highlight=' + $.urlencode(query);
 
-                        var item = $('<a>', {'href': link});
+                        var item = $('<a>', {'href': url});
                         item.html(doc.title);
                         list_item.append(item);
 

diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py
@@ -1231,21 +1231,21 @@ def get_processed_json(self):
         Both lead to `foo/index.html`
         https://github.com/rtfd/readthedocs.org/issues/5368
         """
-        paths = []
+        fjson_paths = []
         basename = os.path.splitext(self.path)[0]
-        paths.append(basename + '.fjson')
+        fjson_paths.append(basename + '.fjson')
         if basename.endswith('/index'):
             new_basename = re.sub(r'\/index$', '', basename)
-            paths.append(new_basename + '.fjson')
+            fjson_paths.append(new_basename + '.fjson')
 
         full_json_path = self.project.get_production_media_path(
             type_='json', version_slug=self.version.slug, include_file=False
         )
         try:
-            for path in paths:
-                file_path = os.path.join(full_json_path, path)
+            for fjson_path in fjson_paths:
+                file_path = os.path.join(full_json_path, fjson_path)
                 if os.path.exists(file_path):
-                    return process_file(file_path)
+                    return process_file(file_path, self.path)
         except Exception:
             log.warning(
                 'Unhandled exception during search processing file: %s',
@@ -1254,7 +1254,7 @@ def get_processed_json(self):
         return {
             'headers': [],
             'content': '',
-            'path': file_path,
+            'path': self.path,
             'title': '',
             'sections': [],
         }

diff --git a/readthedocs/rtd_tests/tests/test_search_json_parsing.py b/readthedocs/rtd_tests/tests/test_search_json_parsing.py
@@ -16,6 +16,7 @@ def test_h2_parsing(self):
                 base_dir,
                 'files/api.fjson',
             ),
+            'files/api.html',
         )
         self.assertEqual(data['sections'][1]['id'], 'a-basic-api-client-using-slumber')
         # Only capture h2's after the first section

diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py
@@ -1,14 +1,15 @@
 import logging
+import os
 from pprint import pformat
 
-from rest_framework import generics
-from rest_framework import serializers
+from rest_framework import generics, serializers
 from rest_framework.exceptions import ValidationError
 from rest_framework.pagination import PageNumberPagination
 
 from readthedocs.search.faceted_search import PageSearch
 from readthedocs.search.utils import get_project_list_or_404
 
+
 log = logging.getLogger(__name__)
 
 
@@ -23,10 +24,29 @@ class PageSearchSerializer(serializers.Serializer):
     version = serializers.CharField()
     title = serializers.CharField()
     path = serializers.CharField()
+    # Doc url without extension
     link = serializers.SerializerMethodField()
+    # Doc url with extension
+    url = serializers.SerializerMethodField()
     highlight = serializers.SerializerMethodField()
 
     def get_link(self, obj):
+        """
+        Gets the url without extension.
+
+        .. warning::
+           This is only used to keep compatibility with
+           the previous search implementation.
+           Use `url` instead.
+        """
+        projects_url = self.context.get('projects_url')
+        if projects_url:
+            docs_url = projects_url[obj.project]
+            path = os.path.splitext(obj.path)[0]
+            return docs_url + path
+
+    def get_url(self, obj):
+        """Gets the full url."""
         projects_url = self.context.get('projects_url')
         if projects_url:
             docs_url = projects_url[obj.project]

diff --git a/readthedocs/search/parse_json.py b/readthedocs/search/parse_json.py
@@ -59,40 +59,37 @@ def generate_sections_from_pyquery(body):
         }
 
 
-def process_file(filename):
-    """Read a file from disk and parse it into a structured dict."""
+def process_file(fjson_filename, filename):
+    """Read the fjson file from disk and parse it into a structured dict."""
     try:
-        with codecs.open(filename, encoding='utf-8', mode='r') as f:
+        with codecs.open(fjson_filename, encoding='utf-8', mode='r') as f:
             file_contents = f.read()
     except IOError:
-        log.info('Unable to read file: %s', filename)
-        return None
+        log.info('Unable to read file: %s', fjson_filename)
+        raise
     data = json.loads(file_contents)
     sections = []
     title = ''
     body_content = ''
-    if 'current_page_name' in data:
-        path = data['current_page_name']
-    else:
-        log.info('Unable to index file due to no name %s', filename)
-        return None
-    if 'body' in data and data['body']:
+
+    if data.get('body'):
         body = PyQuery(data['body'])
         body_content = body.text().replace('¶', '')
         sections.extend(generate_sections_from_pyquery(body))
     else:
-        log.info('Unable to index content for: %s', filename)
+        log.info('Unable to index content for: %s', fjson_filename)
+
     if 'title' in data:
         title = data['title']
         if title.startswith('<'):
             title = PyQuery(data['title']).text()
     else:
-        log.info('Unable to index title for: %s', filename)
+        log.info('Unable to index title for: %s', fjson_filename)
 
     return {
-        'headers': process_headers(data, filename),
+        'headers': process_headers(data, fjson_filename),
         'content': body_content,
-        'path': path,
+        'path': filename,
         'title': title,
         'sections': sections,
     }