-
-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Index path with original path name #5785
Changes from 7 commits
8cd3e7e
abf4a75
0874fba
2fcd502
a4edbad
28fb13f
ec839cd
c971410
a2e0e3f
0e55e48
f55b971
96a85fa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -59,40 +59,37 @@ def generate_sections_from_pyquery(body): | |
} | ||
|
||
|
||
def process_file(filename): | ||
"""Read a file from disk and parse it into a structured dict.""" | ||
def process_file(fjson_filename, filename): | ||
"""Read the fjson file from disk and parse it into a structured dict.""" | ||
try: | ||
with codecs.open(filename, encoding='utf-8', mode='r') as f: | ||
with codecs.open(fjson_filename, encoding='utf-8', mode='r') as f: | ||
file_contents = f.read() | ||
except IOError: | ||
log.info('Unable to read file: %s', filename) | ||
return None | ||
log.info('Unable to read file: %s', fjson_filename) | ||
raise | ||
data = json.loads(file_contents) | ||
sections = [] | ||
title = '' | ||
body_content = '' | ||
if 'current_page_name' in data: | ||
path = data['current_page_name'] | ||
else: | ||
log.info('Unable to index file due to no name %s', filename) | ||
return None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we return none from this function, |
||
if 'body' in data and data['body']: | ||
|
||
if data.get('body'): | ||
body = PyQuery(data['body']) | ||
body_content = body.text().replace('¶', '') | ||
sections.extend(generate_sections_from_pyquery(body)) | ||
else: | ||
log.info('Unable to index content for: %s', filename) | ||
log.info('Unable to index content for: %s', fjson_filename) | ||
|
||
if 'title' in data: | ||
title = data['title'] | ||
if title.startswith('<'): | ||
title = PyQuery(data['title']).text() | ||
else: | ||
log.info('Unable to index title for: %s', filename) | ||
log.info('Unable to index title for: %s', fjson_filename) | ||
|
||
return { | ||
'headers': process_headers(data, filename), | ||
'headers': process_headers(data, fjson_filename), | ||
'content': body_content, | ||
'path': path, | ||
'path': filename, | ||
'title': title, | ||
'sections': sections, | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Won't this cause all indexing to fail if a single file is missing?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, I see we're catching it at a higher level, 👍
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We were returning None, but we expect to always have a dict.
This is caught by https://github.com/stsewd/readthedocs.org/blob/96a85fa8af3cac8b139bdf99598d119eae0e0163/readthedocs/projects/models.py#L1244-L1260
Which returns a default dict