Skip to content

Commit

Permalink
Reject IPv4 addresses with trailing whitespaces + non-whitespaces (#293)
Browse files Browse the repository at this point in the history
Reject edge cases where inputs like `1.1.1.1 com` or `1.1.1.1\rsomething` are parsed as IPv4 addresses. 

**fullmatch** is about 10% slower than **match** but it'll only affect systems that do not support **pton** (non-Unix/Windows).
  • Loading branch information
elliotwutingfeng authored Jun 8, 2023
1 parent 366112c commit 570060d
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 4 deletions.
12 changes: 11 additions & 1 deletion tests/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,16 +136,22 @@ def test_ip():
@pytest.mark.skipif(not inet_pton, reason="inet_pton unavailable")
def test_looks_like_ip_with_inet_pton():
assert looks_like_ip("1.1.1.1", inet_pton) is True
assert looks_like_ip("a.1.1.1", inet_pton) is False
assert looks_like_ip("1.1.1.1\n", inet_pton) is False
assert looks_like_ip("256.256.256.256", inet_pton) is False


def test_looks_like_ip_without_inet_pton():
assert looks_like_ip("1.1.1.1", None) is True
assert looks_like_ip("a.1.1.1", None) is False
assert looks_like_ip("1.1.1.1\n", None) is False
assert looks_like_ip("256.256.256.256", None) is False


def test_similar_to_ip():
assert_extract("1\xe9", ("", "", "1\xe9", ""))
assert_extract("1.1.1.1\ncom", ("", "1.1.1", "1\ncom", ""))
assert_extract("1.1.1.1\rcom", ("", "1.1.1", "1\rcom", ""))


def test_punycode():
Expand All @@ -172,7 +178,7 @@ def test_punycode():
"com",
),
)
# This subdomain generates UnicodeError 'incomplete punicode string'
# This subdomain generates UnicodeError 'incomplete punycode string'
assert_extract(
"xn--tub-1m9d15sfkkhsifsbqygyujjrw60.google.com",
(
Expand All @@ -199,6 +205,10 @@ def test_invalid_puny_with_puny():
)


def test_invalid_puny_with_nonpuny():
assert_extract("xn--ß‌꫶ᢥ.com", ("xn--ß‌꫶ᢥ.com", "", "xn--ß‌꫶ᢥ", "com"))


def test_puny_with_non_puny():
assert_extract(
"http://xn--zckzap6140b352by.blog.so-net.教育.hk",
Expand Down
2 changes: 1 addition & 1 deletion tldextract/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,4 @@ def looks_like_ip(
return True
except OSError:
return False
return IP_RE.match(maybe_ip) is not None
return IP_RE.fullmatch(maybe_ip) is not None
8 changes: 6 additions & 2 deletions tldextract/tldextract.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
import idna

from .cache import DiskCache, get_cache_dir
from .remote import IP_RE, lenient_netloc, looks_like_ip
from .remote import lenient_netloc, looks_like_ip
from .suffix_list import get_suffix_lists

LOG = logging.getLogger("tldextract")
Expand Down Expand Up @@ -126,7 +126,11 @@ def ipv4(self) -> str:
>>> extract('http://256.1.1.1').ipv4
''
"""
if not (self.suffix or self.subdomain) and IP_RE.match(self.domain):
if (
self.domain
and not (self.suffix or self.subdomain)
and looks_like_ip(self.domain)
):
return self.domain
return ""

Expand Down

0 comments on commit 570060d

Please sign in to comment.