Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reject IPv4 addresses with trailing whitespaces + non-whitespaces #293

Merged
merged 12 commits into from
Jun 8, 2023
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ tldextract.egg-info
tldextract/.suffix_cache/*
.tox
.pytest_cache
.coverage
elliotwutingfeng marked this conversation as resolved.
Show resolved Hide resolved
22 changes: 22 additions & 0 deletions tests/cli_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pytest

from tldextract.cli import main
from tldextract.tldextract import PUBLIC_SUFFIX_LIST_URLS


def test_cli_no_input(monkeypatch):
Expand Down Expand Up @@ -33,3 +34,24 @@ def test_cli_posargs(capsys, monkeypatch):
stdout, stderr = capsys.readouterr()
assert not stderr
assert stdout == " example com\n bbc co.uk\nforums bbc co.uk\n"


def test_cli_namedargs(capsys, monkeypatch):
elliotwutingfeng marked this conversation as resolved.
Show resolved Hide resolved
monkeypatch.setattr(
sys,
"argv",
[
"tldextract",
"--suffix_list_url",
PUBLIC_SUFFIX_LIST_URLS[0],
"example.com",
"bbc.co.uk",
"forums.bbc.co.uk",
],
)

main()

stdout, stderr = capsys.readouterr()
assert not stderr
assert stdout == " example com\n bbc co.uk\nforums bbc co.uk\n"
12 changes: 11 additions & 1 deletion tests/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,16 +136,22 @@ def test_ip():
@pytest.mark.skipif(not inet_pton, reason="inet_pton unavailable")
def test_looks_like_ip_with_inet_pton():
assert looks_like_ip("1.1.1.1", inet_pton) is True
assert looks_like_ip("a.1.1.1", inet_pton) is False
assert looks_like_ip("1.1.1.1\n", inet_pton) is False
assert looks_like_ip("256.256.256.256", inet_pton) is False


def test_looks_like_ip_without_inet_pton():
assert looks_like_ip("1.1.1.1", None) is True
assert looks_like_ip("a.1.1.1", None) is False
assert looks_like_ip("1.1.1.1\n", None) is False
assert looks_like_ip("256.256.256.256", None) is False


def test_similar_to_ip():
assert_extract("1\xe9", ("", "", "1\xe9", ""))
assert_extract("1.1.1.1\ncom", ("", "1.1.1", "1\ncom", ""))
assert_extract("1.1.1.1\rcom", ("", "1.1.1", "1\rcom", ""))


def test_punycode():
Expand All @@ -172,7 +178,7 @@ def test_punycode():
"com",
),
)
# This subdomain generates UnicodeError 'incomplete punicode string'
# This subdomain generates UnicodeError 'incomplete punycode string'
assert_extract(
"xn--tub-1m9d15sfkkhsifsbqygyujjrw60.google.com",
(
Expand All @@ -199,6 +205,10 @@ def test_invalid_puny_with_puny():
)


def test_invalid_puny_with_nonpuny():
assert_extract("xn--ß‌꫶ᢥ.com", ("xn--ß‌꫶ᢥ.com", "", "xn--ß‌꫶ᢥ", "com"))


def test_puny_with_non_puny():
assert_extract(
"http://xn--zckzap6140b352by.blog.so-net.教育.hk",
Expand Down
5 changes: 1 addition & 4 deletions tldextract/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from __future__ import annotations

import errno
import hashlib
import json
import logging
import os
Expand Down Expand Up @@ -41,9 +40,7 @@ def get_pkg_unique_identifier() -> str:
tldextract_version = "tldextract-" + version
python_env_name = os.path.basename(sys.prefix)
# just to handle the edge case of two identically named python environments
python_binary_path_short_hash = hashlib.md5(sys.prefix.encode("utf-8")).hexdigest()[
:6
]
python_binary_path_short_hash = md5(sys.prefix.encode("utf-8")).hexdigest()[:6]
python_version = ".".join([str(v) for v in sys.version_info[:-1]])
identifier_parts = [
python_version,
Expand Down
2 changes: 1 addition & 1 deletion tldextract/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,4 @@ def looks_like_ip(
return True
except OSError:
return False
return IP_RE.match(maybe_ip) is not None
return IP_RE.fullmatch(maybe_ip) is not None
8 changes: 6 additions & 2 deletions tldextract/tldextract.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
import idna

from .cache import DiskCache, get_cache_dir
from .remote import IP_RE, lenient_netloc, looks_like_ip
from .remote import lenient_netloc, looks_like_ip
from .suffix_list import get_suffix_lists

LOG = logging.getLogger("tldextract")
Expand Down Expand Up @@ -126,7 +126,11 @@ def ipv4(self) -> str:
>>> extract('http://256.1.1.1').ipv4
''
"""
if not (self.suffix or self.subdomain) and IP_RE.match(self.domain):
if (
self.domain
and not (self.suffix or self.subdomain)
and looks_like_ip(self.domain)
):
return self.domain
return ""

Expand Down