Skip to content

Commit

Permalink
Make tools/getJSONPathFromKey.py leverage HTML parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
Benjamin-Loison committed Mar 31, 2024
1 parent 1767408 commit d24a69a
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions tools/getJSONPathFromKey.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import sys
import json
from lxml import html

def treatKey(obj, path, key):
objKey = obj[key]
Expand Down Expand Up @@ -76,9 +77,16 @@ def _finditem(obj, key, path = ''):
if not isJSON:
with open(filePath) as f:
content = f.read()
# Should use a HTML and JavaScript parser instead of proceeding that way.

# Should use a JavaScript parser instead of proceeding that way.
# Same comment concerning `getJSONStringFromHTMLScriptPrefix`, note that both parsing methods should be identical.
newContent = content.split(ytVariableName + ' = ')[1].split(';<')[0]
tree = html.fromstring(content)
ytVariableDeclaration = ytVariableName + ' = '
for script in tree.xpath('//script'):
scriptContent = script.text_content()
if ytVariableDeclaration in scriptContent:
newContent = scriptContent.split(ytVariableDeclaration)[1][:-1]
break
with open(filePath, 'w') as f:
f.write(newContent)

Expand Down

0 comments on commit d24a69a

Please sign in to comment.