Skip to content

Commit

Permalink
[CVE-2024-9287] ensure that bracketed hosts found by urlsplit are of …
Browse files Browse the repository at this point in the history
…IPv6 or IPvFuture format

Fix urlparse incorrectly retrieves IPv4 and regular name hosts from inside of brackets

Reproducer is

    python3 -c \
    'from urllib.parse import urlparse; print(urlparse("https://user:some]password[@host.com"))'

This command should fail with the error "ValueError: '@host.com'
does not appear to be an IPv4 or IPv6 address". If it doesn’t and produces

    ParseResult(scheme='https', netloc='user:some]password[@host.com',
    path='', params='', query='', fragment='')

it is this bug.

Fixes: bsc#1233307 (CVE-2024-11168)
Fixes: gh#python#103848
Co-authored-by: JohnJamesUtley <[email protected]>
From-PR: gh#python/cpython!103849
Patch: CVE-2024-11168-validation-IPv6-addrs.patch
  • Loading branch information
mcepl and JamesJohnUtley committed Dec 2, 2024
1 parent c9571a5 commit 4f2496b
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 2 deletions.
26 changes: 25 additions & 1 deletion Lib/ipaddress.py
Original file line number Diff line number Diff line change
Expand Up @@ -1886,12 +1886,32 @@ def max_prefixlen(self):
def version(self):
return self._version

@staticmethod
def _split_scope_id(ip_str):
"""Helper function to parse IPv6 string address with scope id.
See RFC 4007 for details.
Args:
ip_str: A string, the IPv6 address.
Returns:
(addr, scope_id) tuple.
"""
addr, sep, scope_id = ip_str.partition('%')
if not sep:
scope_id = None
elif not scope_id or '%' in scope_id:
raise AddressValueError('Invalid IPv6 address: "%r"' % ip_str)
return addr, scope_id


class IPv6Address(_BaseV6, _BaseAddress):

"""Represent and manipulate single IPv6 Addresses."""

__slots__ = ('_ip', '__weakref__')
__slots__ = ('_ip', '_scope_id', '__weakref__')

def __init__(self, address):
"""Instantiate a new IPv6 address object.
Expand All @@ -1914,19 +1934,23 @@ def __init__(self, address):
if isinstance(address, int):
self._check_int_address(address)
self._ip = address
self._scope_id = None
return

# Constructing from a packed address
if isinstance(address, bytes):
self._check_packed_address(address, 16)
self._ip = int.from_bytes(address, 'big')
self._scope_id = None
return

# Assume input argument to be string or any object representation
# which converts into a formatted IP string.
addr_str = str(address)
if '/' in addr_str:
raise AddressValueError("Unexpected '/' in %r" % address)
addr_str, self._scope_id = self._split_scope_id(addr_str)

self._ip = self._ip_int_from_string(addr_str)

@property
Expand Down
26 changes: 26 additions & 0 deletions Lib/test/test_urlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -1035,6 +1035,32 @@ def test_issue14072(self):
self.assertEqual(p2.scheme, 'tel')
self.assertEqual(p2.path, '+31641044153')

def test_invalid_bracketed_hosts(self):
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[192.0.2.146]/Path?Query')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[important.com:8000]/Path?Query')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123r.IP]/Path?Query')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v12ae]/Path?Query')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v.IP]/Path?Query')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123.]/Path?Query')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v]/Path?Query')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path')

def test_splitting_bracketed_hosts(self):
p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query')
self.assertEqual(p1.hostname, 'v6a.ip')
self.assertEqual(p1.username, 'user')
self.assertEqual(p1.path, '/path')
p2 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7%test]/path?query')
self.assertEqual(p2.hostname, '0439:23af:2309::fae7%test')
self.assertEqual(p2.username, 'user')
self.assertEqual(p2.path, '/path')
p3 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7:1234:192.0.2.146%test]/path?query')
self.assertEqual(p3.hostname, '0439:23af:2309::fae7:1234:192.0.2.146%test')
self.assertEqual(p3.username, 'user')
self.assertEqual(p3.path, '/path')

def test_telurl_params(self):
p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')
self.assertEqual(p1.scheme, 'tel')
Expand Down
16 changes: 15 additions & 1 deletion Lib/urllib/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import re
import sys
import collections
import ipaddress

__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
"urlsplit", "urlunsplit", "urlencode", "parse_qs",
Expand Down Expand Up @@ -417,6 +418,17 @@ def _checknetloc(netloc):
raise ValueError("netloc '" + netloc + "' contains invalid " +
"characters under NFKC normalization")

# Valid bracketed hosts are defined in
# https://www.rfc-editor.org/rfc/rfc3986#page-49 and https://url.spec.whatwg.org/
def _check_bracketed_host(hostname):
if hostname.startswith('v'):
if not re.match(r"\Av[a-fA-F0-9]+\..+\Z", hostname):
raise ValueError(f"IPvFuture address is invalid")
else:
ip = ipaddress.ip_address(hostname) # Throws Value Error if not IPv6 or IPv4
if isinstance(ip, ipaddress.IPv4Address):
raise ValueError(f"An IPv4 address cannot be in brackets")

def _remove_unsafe_bytes_from_url(url):
for b in _UNSAFE_URL_BYTES_TO_REMOVE:
url = url.replace(b, "")
Expand Down Expand Up @@ -467,12 +479,14 @@ def urlsplit(url, scheme='', allow_fragments=True):
if not rest or any(c not in '0123456789' for c in rest):
# not a port number
scheme, url = url[:i].lower(), rest

if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
if (('[' in netloc and ']' not in netloc) or
(']' in netloc and '[' not in netloc)):
raise ValueError("Invalid IPv6 URL")
if '[' in netloc and ']' in netloc:
bracketed_host = netloc.partition('[')[2].partition(']')[0]
_check_bracketed_host(bracketed_host)
if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
if '?' in url:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add checks to ensure that ``[`` bracketed ``]`` hosts found by
:func:`urllib.parse.urlsplit` are of IPv6 or IPvFuture format.

0 comments on commit 4f2496b

Please sign in to comment.