-
-
Notifications
You must be signed in to change notification settings - Fork 861
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Differentiate between 'url.host' and 'url.raw_host' (#1590)
* Differentiate between 'url.host' and 'url.raw_host'
- Loading branch information
1 parent
d98e9e7
commit 39d8ee6
Showing
2 changed files
with
114 additions
and
40 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
from http.cookiejar import Cookie, CookieJar | ||
from urllib.parse import parse_qsl, quote, unquote, urlencode | ||
|
||
import idna | ||
import rfc3986 | ||
import rfc3986.exceptions | ||
|
||
|
@@ -60,33 +61,45 @@ | |
|
||
class URL: | ||
""" | ||
url = httpx.URL("HTTPS://jo%40email.com:a%20secret@example.com:1234/pa%20th?search=ab#anchorlink") | ||
url = httpx.URL("HTTPS://jo%40email.com:a%20secret@müller.de:1234/pa%20th?search=ab#anchorlink") | ||
assert url.scheme == "https" | ||
assert url.username == "[email protected]" | ||
assert url.password == "a secret" | ||
assert url.userinfo == b"jo%40email.com:a%20secret" | ||
assert url.host == "example.com" | ||
assert url.host == "müller.de" | ||
assert url.raw_host == b"xn--mller-kva.de" | ||
assert url.port == 1234 | ||
assert url.netloc == "example.com:1234" | ||
assert url.netloc == b"xn--mller-kva.de:1234" | ||
assert url.path == "/pa th" | ||
assert url.query == b"?search=ab" | ||
assert url.raw_path == b"/pa%20th?search=ab" | ||
assert url.fragment == "anchorlink" | ||
The components of a URL are broken down like this: | ||
https://jo%40email.com:a%20secret@example.com:1234/pa%20th?search=ab#anchorlink | ||
[scheme][ username ] [password] [ host ][port][ path ] [ query ] [fragment] | ||
[ userinfo ] [ netloc ][ raw_path ] | ||
https://jo%40email.com:a%20secret@müller.de:1234/pa%20th?search=ab#anchorlink | ||
[scheme] [ username ] [password] [ host ][port][ path ] [ query ] [fragment] | ||
[ userinfo ] [ netloc ][ raw_path ] | ||
Note that: | ||
* `url.scheme` is normalized to always be lowercased. | ||
* `url.host` is normalized to always be lowercased, and is IDNA encoded. For instance: | ||
url = httpx.URL("http://中国.icom.museum") | ||
assert url.host == "xn--fiqs8s.icom.museum" | ||
* `url.host` is normalized to always be lowercased. Internationalized domain | ||
names are represented in unicode, without IDNA encoding applied. For instance: | ||
url = httpx.URL("http://中国.icom.museum") | ||
assert url.host == "中国.icom.museum" | ||
url = httpx.URL("http://xn--fiqs8s.icom.museum") | ||
assert url.host == "中国.icom.museum" | ||
* `url.raw_host` is normalized to always be lowercased, and is IDNA encoded. | ||
url = httpx.URL("http://中国.icom.museum") | ||
assert url.raw_host == b"xn--fiqs8s.icom.museum" | ||
url = httpx.URL("http://xn--fiqs8s.icom.museum") | ||
assert url.raw_host == b"xn--fiqs8s.icom.museum" | ||
* `url.userinfo` is raw bytes, without URL escaping. Usually you'll want to work with | ||
`url.username` and `url.password` instead, which handle the URL escaping. | ||
|
@@ -150,6 +163,14 @@ def scheme(self) -> str: | |
""" | ||
return self._uri_reference.scheme or "" | ||
|
||
@property | ||
def raw_scheme(self) -> bytes: | ||
""" | ||
The raw bytes representation of the URL scheme, such as b"http", b"https". | ||
Always normalised to lowercase. | ||
""" | ||
return self.scheme.encode("ascii") | ||
|
||
@property | ||
def userinfo(self) -> bytes: | ||
""" | ||
|
@@ -181,26 +202,60 @@ def password(self) -> str: | |
def host(self) -> str: | ||
""" | ||
The URL host as a string. | ||
Always normlized to lowercase, and IDNA encoded. | ||
Always normalized to lowercase, with IDNA hosts decoded into unicode. | ||
Examples: | ||
url = httpx.URL("http://www.EXAMPLE.org") | ||
assert url.host == "www.example.org" | ||
url = httpx.URL("http://中国.icom.museum") | ||
assert url.host == "xn--fiqs8s.icom.museum" | ||
assert url.host == "中国.icom.museum" | ||
url = httpx.URL("http://xn--fiqs8s.icom.museum") | ||
assert url.host == "中国.icom.museum" | ||
url = httpx.URL("https://[::ffff:192.168.0.1]") | ||
assert url.host == "::ffff:192.168.0.1" | ||
""" | ||
host: str = self._uri_reference.host | ||
host: str = self._uri_reference.host or "" | ||
|
||
if host and ":" in host and host[0] == "[": | ||
# it's an IPv6 address | ||
host = host.lstrip("[").rstrip("]") | ||
|
||
if host.startswith("xn--"): | ||
host = idna.decode(host) | ||
|
||
return host | ||
|
||
@property | ||
def raw_host(self) -> bytes: | ||
""" | ||
The raw bytes representation of the URL host. | ||
Always normalized to lowercase, and IDNA encoded. | ||
Examples: | ||
url = httpx.URL("http://www.EXAMPLE.org") | ||
assert url.raw_host == b"www.example.org" | ||
url = httpx.URL("http://中国.icom.museum") | ||
assert url.raw_host == b"xn--fiqs8s.icom.museum" | ||
url = httpx.URL("http://xn--fiqs8s.icom.museum") | ||
assert url.raw_host == b"xn--fiqs8s.icom.museum" | ||
url = httpx.URL("https://[::ffff:192.168.0.1]") | ||
assert url.raw_host == b"::ffff:192.168.0.1" | ||
""" | ||
host: str = self._uri_reference.host or "" | ||
|
||
if host and ":" in host and host[0] == "[": | ||
# it's an IPv6 address | ||
host = host.lstrip("[").rstrip("]") | ||
|
||
return host or "" | ||
return host.encode("ascii") | ||
|
||
@property | ||
def port(self) -> typing.Optional[int]: | ||
|
@@ -211,14 +266,17 @@ def port(self) -> typing.Optional[int]: | |
return int(port) if port else None | ||
|
||
@property | ||
def netloc(self) -> str: | ||
def netloc(self) -> bytes: | ||
""" | ||
Either `<host>` or `<host>:<port>` as a string. | ||
Always normlized to lowercase, and IDNA encoded. | ||
Either `<host>` or `<host>:<port>` as bytes. | ||
Always normalized to lowercase, and IDNA encoded. | ||
""" | ||
host = self._uri_reference.host or "" | ||
port = self._uri_reference.port | ||
return host if port is None else f"{host}:{port}" | ||
netloc = host.encode("ascii") | ||
if port: | ||
netloc = netloc + b":" + str(port).encode("ascii") | ||
return netloc | ||
|
||
@property | ||
def path(self) -> str: | ||
|
@@ -277,8 +335,8 @@ def raw(self) -> RawURL: | |
Provides the (scheme, host, port, target) for the outgoing request. | ||
""" | ||
return ( | ||
self.scheme.encode("ascii"), | ||
self.host.encode("ascii"), | ||
self.raw_scheme, | ||
self.raw_host, | ||
self.port, | ||
self.raw_path, | ||
) | ||
|
@@ -293,7 +351,7 @@ def is_absolute_url(self) -> bool: | |
# URLs with a fragment portion as not absolute. | ||
# What we actually care about is if the URL provides | ||
# a scheme and hostname to which connections should be made. | ||
return bool(self.scheme and self.host) | ||
return bool(self._uri_reference.scheme and self._uri_reference.host) | ||
|
||
@property | ||
def is_relative_url(self) -> bool: | ||
|
@@ -321,7 +379,7 @@ def copy_with(self, **kwargs: typing.Any) -> "URL": | |
"userinfo": bytes, | ||
"host": str, | ||
"port": int, | ||
"netloc": str, | ||
"netloc": bytes, | ||
"path": str, | ||
"query": bytes, | ||
"raw_path": bytes, | ||
|
@@ -354,12 +412,16 @@ def copy_with(self, **kwargs: typing.Any) -> "URL": | |
# it's an IPv6 address, so it should be hidden under bracket | ||
host = f"[{host}]" | ||
|
||
kwargs["netloc"] = f"{host}:{port}" if port is not None else host | ||
kwargs["netloc"] = ( | ||
f"{host}:{port}".encode("ascii") | ||
if port is not None | ||
else host.encode("ascii") | ||
) | ||
|
||
if "userinfo" in kwargs or "netloc" in kwargs: | ||
# Consolidate userinfo and netloc into authority. | ||
userinfo = (kwargs.pop("userinfo", self.userinfo) or b"").decode("ascii") | ||
netloc = kwargs.pop("netloc", self.netloc) or "" | ||
netloc = (kwargs.pop("netloc", self.netloc) or b"").decode("ascii") | ||
authority = f"{userinfo}@{netloc}" if userinfo else netloc | ||
kwargs["authority"] = authority | ||
|
||
|
@@ -848,11 +910,10 @@ def _prepare(self, default_headers: typing.Dict[str, str]) -> None: | |
) | ||
|
||
if not has_host and self.url.host: | ||
default_port = {"http": 80, "https": 443}.get(self.url.scheme) | ||
if self.url.port is None or self.url.port == default_port: | ||
host_header = self.url.host.encode("ascii") | ||
else: | ||
host_header = self.url.netloc.encode("ascii") | ||
default_port = {"http": b":80", "https": b":443"}.get(self.url.scheme, b"") | ||
host_header = self.url.netloc | ||
if host_header.endswith(default_port): | ||
host_header = host_header[: -len(default_port)] | ||
auto_headers.append((b"Host", host_header)) | ||
if not has_content_length and self.method in ("POST", "PUT", "PATCH"): | ||
auto_headers.append((b"Content-Length", b"0")) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters