Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace www-authenticate #495

Merged
merged 1 commit into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions changelog.d/20231023_064405_michael.hanke_www_auth.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
### 🏠 Internal

- The `www-authenticate` dependencies is dropped. The functionality is
replaced by a `requests`-based implementation of an alternative parser.
This trims the dependency footprint and facilitates Debian-packaging.
The previous test cases are kept and further extended.
Fixes https://github.com/datalad/datalad-next/issues/493 via
https://github.com/datalad/datalad-next/pull/495 (by @mih)
8 changes: 5 additions & 3 deletions datalad_next/url_operations/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@
from typing import Dict
import requests
from requests_toolbelt import user_agent
import www_authenticate

import datalad

from datalad_next.utils.requests_auth import DataladAuth
from datalad_next.utils.requests_auth import (
DataladAuth,
parse_www_authenticate,
)
from . import (
UrlOperations,
UrlOperationsRemoteError,
Expand Down Expand Up @@ -233,7 +235,7 @@ def probe_url(self, url, timeout=10.0, headers=None):
headers=headers,
)
if 'www-authenticate' in req.headers:
props['auth'] = www_authenticate.parse(
props['auth'] = parse_www_authenticate(
req.headers['www-authenticate'])
props['is_redirect'] = True if req.history else False
props['status_code'] = req.status_code
Expand Down
75 changes: 72 additions & 3 deletions datalad_next/utils/requests_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from typing import Dict
from urllib.parse import urlparse
import requests
import www_authenticate

from datalad_next.config import ConfigManager
from datalad_next.utils import CredentialManager
Expand All @@ -16,7 +15,77 @@
lgr = logging.getLogger('datalad.ext.next.utils.requests_auth')


__all__ = ['DataladAuth', 'HTTPBearerTokenAuth']
__all__ = ['DataladAuth', 'HTTPBearerTokenAuth', 'parse_www_authenticate']


def parse_www_authenticate(hdr: str) -> dict:
"""Parse HTTP www-authenticate header

This helper uses ``requests`` utilities to parse the ``www-authenticate``
header as represented in a ``requests.Response`` instance. The header may
contain any number of challenge specifications.

The implementation follows RFC7235, where a challenge parameters set is
specified as: either a comma-separated list of parameters, or a single
sequence of characters capable of holding base64-encoded information,
and parameters are name=value pairs, where the name token is matched
case-insensitively, and each parameter name MUST only occur once
per challenge.

Returns
-------
dict
Keys are casefolded challenge labels (e.g., 'basic', 'digest').
Values are: ``None`` (no parameter), ``str`` (a token68), or
``dict`` (name/value mapping of challenge parameters)
"""
plh = requests.utils.parse_list_header
pdh = requests.utils.parse_dict_header
challenges = {}
challenge = None
# challenges as well as their properties are in a single
# comma-separated list
for item in plh(hdr):
# parse the item into a key/value set
# the value will be `None` if this item was no mapping
k, v = pdh(item).popitem()
# split the key to check for a challenge spec start
key_split = k.split(' ', maxsplit=1)
if len(key_split) > 1 or v is None:
item_suffix = item[len(key_split[0]) + 1:]
challenge = [item[len(key_split[0]) + 1:]] if item_suffix else None
challenges[key_split[0].casefold()] = challenge
else:
# implementation logic assumes that the above conditional
# was triggered before we ever get here
assert challenge
challenge.append(item)

return {
challenge: _convert_www_authenticate_items(items)
for challenge, items in challenges.items()
}


def _convert_www_authenticate_items(items: list) -> None | str | dict:
pdh = requests.utils.parse_dict_header
# according to RFC7235, items can be:
# either a comma-separated list of parameters
# or a single sequence of characters capable of holding base64-encoded
# information.
# parameters are name=value pairs, where the name token is matched
# case-insensitively, and each parameter name MUST only occur once
# per challenge.
if items is None:
return None
elif len(items) == 1 and pdh(items[0].rstrip('=')).popitem()[1] is None:
# this items matches the token68 appearance (no name value
# pair after potential base64 padding its removed
return items[0]
else:
return {
k.casefold(): v for i in items for k, v in pdh(i).items()
}


class DataladAuth(requests.auth.AuthBase):
Expand Down Expand Up @@ -201,7 +270,7 @@ def handle_401(self, r, **kwargs):
# www-authenticate with e.g. 403s
return r
# which auth schemes does the server support?
auth_schemes = www_authenticate.parse(r.headers['www-authenticate'])
auth_schemes = parse_www_authenticate(r.headers['www-authenticate'])
ascheme, credname, cred = self._get_credential(r.url, auth_schemes)

if cred is None or 'secret' not in cred:
Expand Down
45 changes: 45 additions & 0 deletions datalad_next/utils/tests/test_parse_www_authenticate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@

from ..requests_auth import parse_www_authenticate


challenges = (
# just challenge type
('Negotiate',
[('negotiate', None)]),
# challenge and just a token, tolerate any base64 padding
('Negotiate abcdef',
[('negotiate', 'abcdef')]),
('Negotiate abcdef=',
[('negotiate', 'abcdef=')]),
('Negotiate abcdef==',
[('negotiate', 'abcdef==')]),
# standard bearer
('Bearer realm=example.com',
[('bearer', {'realm': 'example.com'})]),
# standard digest
('Digest realm="example.com", qop="auth,auth-int", nonce="abcdef", '
'opaque="ghijkl"',
[('digest', {'realm': 'example.com', 'qop': 'auth,auth-int',
'nonce': 'abcdef', 'opaque': 'ghijkl'})]),
# multi challenge
('Basic speCial="paf ram", realm="basIC", '
'Bearer, '
'Digest realm="[email protected]", qop="auth, auth-int", '
'algorithm=MD5',
[('basic', {'special': 'paf ram', 'realm': 'basIC'}),
('bearer', None),
('digest', {'realm': "[email protected]", 'qop': "auth, auth-int",
'algorithm': 'MD5'})]),
# same challenge, multiple times, last one wins
('Basic realm="basIC", '
'Basic realm="complex"',
[('basic', {'realm': 'complex'})]),
)


def test_parse_www_authenticate():
for hdr, targets in challenges:
res = parse_www_authenticate(hdr)
for ctype, props in targets:
assert ctype in res
assert res[ctype] == props
1 change: 0 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ python_requires = >= 3.8
install_requires =
annexremote
datalad >= 0.18.4
www-authenticate
humanize
packages = find_namespace:
include_package_data = True
Expand Down
Loading