Skip to content

Commit

Permalink
update cloudflare bypass (wip)
Browse files Browse the repository at this point in the history
This commit adds support for the two new JS expressions embedded in the
overall challenge code.

It does compute the correct 'js_answer' value, but the HTTP request to
/cdn-cgi/l/chk_jschl to get the 'cf_clearance' cookie always results in
a 403 response with a CAPTCHA inside (hence 'wip')

All steps to make this HTTP request indistinguishable from a regular web
browser (which passes the test) show no effect. This includes:
- using the exact same HTTP headers as a web browser
- follow query argument order
- different wait times
  • Loading branch information
mikf committed Apr 1, 2019
1 parent 0f02e85 commit 9ebd29f
Showing 1 changed file with 44 additions and 12 deletions.
56 changes: 44 additions & 12 deletions gallery_dl/cloudflare.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import time
import operator
import urllib.parse
from . import text
from . import text, exception
from .cache import memcache


Expand All @@ -22,6 +22,11 @@ def is_challenge(response):
b"jschl-answer" in response.content)


def is_captcha(response):
return (response.status_code == 403 and
b'name="captcha-bypass"' in response.content)


def solve_challenge(session, response, kwargs):
"""Solve Cloudflare challenge and get cfclearance cookie"""
parsed = urllib.parse.urlsplit(response.url)
Expand All @@ -35,8 +40,8 @@ def solve_challenge(session, response, kwargs):

page = response.text
params["s"] = text.extract(page, 'name="s" value="', '"')[0]
params["pass"] = text.extract(page, 'name="pass" value="', '"')[0]
params["jschl_vc"] = text.extract(page, 'name="jschl_vc" value="', '"')[0]
params["pass"] = text.extract(page, 'name="pass" value="', '"')[0]
params["jschl_answer"] = solve_js_challenge(page, parsed.netloc)
headers["Referer"] = response.url

Expand All @@ -46,7 +51,15 @@ def solve_challenge(session, response, kwargs):
cf_kwargs["allow_redirects"] = False
cf_response = session.request(response.request.method, url, **cf_kwargs)

location = cf_response.headers["Location"]
location = cf_response.headers.get("Location")
if not location:
import logging
log = logging.getLogger("cloudflare")
rtype = "CAPTCHA" if is_captcha(cf_response) else "Unexpected"
log.error("%s response", rtype)
log.debug("Headers:\n%s", cf_response.headers)
log.debug("Content:\n%s", cf_response.text)
raise exception.StopExtraction()
if location[0] == "/":
location = root + location

Expand All @@ -73,7 +86,7 @@ def solve_js_challenge(page, netloc):
vlength = len(variable)

# evaluate the initial expression
solution = evaluate_expression(data["expr"])
solution = evaluate_expression(data["expr"], page, netloc)

# iterator over all remaining expressions
# and combine their values in 'solution'
Expand All @@ -85,37 +98,55 @@ def solve_js_challenge(page, netloc):
# select arithmetc function based on operator (+/-/*)
func = OPERATORS[expr[vlength]]
# evaluate the rest of the expression
value = evaluate_expression(expr[vlength+2:])
value = evaluate_expression(expr[vlength+2:], page, netloc)
# combine expression value with our current solution
solution = func(solution, value)

elif expr.startswith("a.value"):
# add length of hostname
solution += len(netloc)

if "t.length)" in expr:
# add length of hostname
solution += len(netloc)
if ".toFixed(" in expr:
# trim solution to 10 decimal places
# and strip trailing zeros
solution = "{:.10f}".format(solution).rstrip("0")

return solution


def evaluate_expression(expr, split_re=re.compile(r"\(+([^)]*)\)")):
def evaluate_expression(expr, page, netloc, *,
split_re=re.compile(r"[(+]+([^)]*)\)")):
"""Evaluate a single Javascript expression for the challenge"""

if expr.startswith("function(p)"):
# get HTML element with ID k and evaluate the expression inside
# 'eval(eval("document.getElementById(k).innerHTML"))'
k, pos = text.extract(page, "k = '", "'")
e, pos = text.extract(page, 'id="'+k+'"', '<')
return evaluate_expression(e.partition(">")[2], page, netloc)

if "/" in expr:
# split the expression in numerator and denominator subexpressions,
# evaluate them separately,
# and return their fraction-result
num, _, denom = expr.partition("/")
return evaluate_expression(num) / evaluate_expression(denom)
num = evaluate_expression(num, page, netloc)
denom = evaluate_expression(denom, page, netloc)
return num / denom

if "function(p)" in expr:
# split initial expression and function code
initial, _, func = expr.partition("function(p)")
# evaluate said expression
initial = evaluate_expression(initial, page, netloc)
# get function argument and use it as index into 'netloc'
index = evaluate_expression(func[func.index("}")+1:], page, netloc)
return initial + ord(netloc[int(index)])

# iterate over all subexpressions,
# evaluate them,
# and accumulate their values in 'result'
result = ""
for subexpr in split_re.findall(expr):
for subexpr in split_re.findall(expr) or (expr,):
result += str(sum(
VALUES[part]
for part in subexpr.split("[]")
Expand All @@ -133,6 +164,7 @@ def evaluate_expression(expr, split_re=re.compile(r"\(+([^)]*)\)")):
"": 0,
"+": 0,
"!+": 1,
"!!": 1,
"+!!": 1,
}

Expand Down

0 comments on commit 9ebd29f

Please sign in to comment.