Skip to content

Commit

Permalink
Migrate ExtractResult from namedtuple to dataclass (#306)
Browse files Browse the repository at this point in the history
  • Loading branch information
john-kurkowski authored Oct 11, 2023
1 parent 4067dea commit e739ff8
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 70 deletions.
29 changes: 6 additions & 23 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,6 @@ ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk', is_private=False
ExtractResult(subdomain='www', domain='worldbank', suffix='org.kg', is_private=False)
```

`ExtractResult` is a namedtuple, so it's simple to access the parts you want.

```python
>>> ext = tldextract.extract('http://forums.bbc.co.uk')
>>> (ext.subdomain, ext.domain, ext.suffix)
('forums', 'bbc', 'co.uk')
>>> # rejoin subdomain and domain
>>> '.'.join(ext[:2])
'forums.bbc'
>>> # a common alias
>>> ext.registered_domain
'bbc.co.uk'
```

Note subdomain and suffix are _optional_. Not all URL-like inputs have a
subdomain or a valid suffix.

Expand All @@ -59,17 +45,14 @@ ExtractResult(subdomain='google', domain='notavalidsuffix', suffix='', is_privat
ExtractResult(subdomain='', domain='127.0.0.1', suffix='', is_private=False)
```

If you want to rejoin the whole namedtuple, regardless of whether a subdomain
or suffix were found:
To rejoin the original hostname, if it was indeed a valid, registered hostname:

```python
>>> ext = tldextract.extract('http://127.0.0.1:8080/deployed/')
>>> # this has unwanted dots
>>> '.'.join(ext[:3])
'.127.0.0.1.'
>>> # join each part only if it's truthy
>>> '.'.join(part for part in ext[:3] if part)
'127.0.0.1'
>>> ext = tldextract.extract('http://forums.bbc.co.uk')
>>> ext.registered_domain
'bbc.co.uk'
>>> ext.fqdn
'forums.bbc.co.uk'
```

By default, this package supports the public ICANN TLDs and their exceptions.
Expand Down
5 changes: 3 additions & 2 deletions tests/custom_suffix_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import tempfile

import tldextract
from tldextract.tldextract import ExtractResult

FAKE_SUFFIX_LIST_URL = "file://" + os.path.join(
os.path.dirname(os.path.abspath(__file__)), "fixtures/fake_suffix_list_fixture.dat"
Expand All @@ -27,8 +28,8 @@ def test_private_extraction() -> None:
"""Test this library's uncached, offline, private domain extraction."""
tld = tldextract.TLDExtract(cache_dir=tempfile.mkdtemp(), suffix_list_urls=[])

assert tld("foo.blogspot.com") == ("foo", "blogspot", "com", False)
assert tld("foo.blogspot.com", include_psl_private_domains=True) == (
assert tld("foo.blogspot.com") == ExtractResult("foo", "blogspot", "com", False)
assert tld("foo.blogspot.com", include_psl_private_domains=True) == ExtractResult(
"",
"foo",
"blogspot.com",
Expand Down
14 changes: 0 additions & 14 deletions tests/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,20 +412,6 @@ def test_ipv4_lookalike() -> None:
)


def test_result_as_dict() -> None:
"""Test that the result is a namedtuple."""
result = extract(
"http://admin:[email protected]:666/secret/admin/interface?param1=42"
)
expected_dict = {
"subdomain": "www",
"domain": "google",
"suffix": "com",
"is_private": False,
}
assert result._asdict() == expected_dict


def test_cache_permission(
mocker: pytest_mock.MockerFixture, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
) -> None:
Expand Down
4 changes: 2 additions & 2 deletions tldextract/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,5 +88,5 @@ def main() -> None:
sys.exit(1)

for i in args.input:
subdomain, domain, suffix, _ = tld_extract(i)
print(f"{subdomain} {domain} {suffix}")
ext = tld_extract(i)
print(f"{ext.subdomain} {ext.domain} {ext.suffix}")
45 changes: 16 additions & 29 deletions tldextract/tldextract.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,6 @@
>>> tldextract.extract('http://www.worldbank.org.kg/') # Kyrgyzstan
ExtractResult(subdomain='www', domain='worldbank', suffix='org.kg', is_private=False)
`ExtractResult` is a namedtuple, so it's simple to access the parts you want.
>>> ext = tldextract.extract('http://forums.bbc.co.uk')
>>> (ext.subdomain, ext.domain, ext.suffix)
('forums', 'bbc', 'co.uk')
>>> # rejoin subdomain and domain
>>> '.'.join(ext[:2])
'forums.bbc'
>>> # a common alias
>>> ext.registered_domain
'bbc.co.uk'
Note subdomain and suffix are _optional_. Not all URL-like inputs have a
subdomain or a valid suffix.
Expand All @@ -37,16 +25,13 @@
>>> tldextract.extract('http://127.0.0.1:8080/deployed/')
ExtractResult(subdomain='', domain='127.0.0.1', suffix='', is_private=False)
If you want to rejoin the whole namedtuple, regardless of whether a subdomain
or suffix were found:
To rejoin the original hostname, if it was indeed a valid, registered hostname:
>>> ext = tldextract.extract('http://127.0.0.1:8080/deployed/')
>>> # this has unwanted dots
>>> '.'.join(part for part in ext[:3])
'.127.0.0.1.'
>>> # join part only if truthy
>>> '.'.join(part for part in ext[:3] if part)
'127.0.0.1'
>>> ext = tldextract.extract('http://forums.bbc.co.uk')
>>> ext.registered_domain
'bbc.co.uk'
>>> ext.fqdn
'forums.bbc.co.uk'
"""

from __future__ import annotations
Expand All @@ -55,10 +40,8 @@
import os
import urllib.parse
from collections.abc import Collection, Sequence
from dataclasses import dataclass
from functools import wraps
from typing import (
NamedTuple,
)

import idna

Expand All @@ -77,13 +60,17 @@
)


class ExtractResult(NamedTuple):
"""namedtuple of a URL's subdomain, domain, suffix, and flag that indicates if URL has private suffix."""
@dataclass(order=True)
class ExtractResult:
"""A URL's extracted subdomain, domain, and suffix.
Also contains metadata, like a flag that indicates if the URL has a private suffix.
"""

subdomain: str
domain: str
suffix: str
is_private: bool = False
is_private: bool

@property
def registered_domain(self) -> str:
Expand All @@ -110,7 +97,7 @@ def fqdn(self) -> str:
''
"""
if self.suffix and (self.domain or self.is_private):
return ".".join(i for i in self[:3] if i)
return ".".join(i for i in (self.subdomain, self.domain, self.suffix) if i)
return ""

@property
Expand Down Expand Up @@ -291,7 +278,7 @@ def _extract_netloc(
and netloc_with_ascii_dots[-1] == "]"
):
if looks_like_ipv6(netloc_with_ascii_dots[1:-1]):
return ExtractResult("", netloc_with_ascii_dots, "")
return ExtractResult("", netloc_with_ascii_dots, "", is_private=False)

labels = netloc_with_ascii_dots.split(".")

Expand Down

0 comments on commit e739ff8

Please sign in to comment.