From 8fd7d0257bcd121a86cd587f27524a399322a271 Mon Sep 17 00:00:00 2001 From: Wu Tingfeng Date: Thu, 25 May 2023 09:36:29 +0800 Subject: [PATCH 1/7] Fix IPv4 parsing. Support IPv4 with unicode dots. --- tests/main_test.py | 11 +++++++++++ tldextract/remote.py | 19 ++++++++++--------- tldextract/tldextract.py | 8 ++++---- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/tests/main_test.py b/tests/main_test.py index 6bfbda27..31e1778c 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -328,6 +328,11 @@ def test_ipv4(): ("", "", "127.0.0.1", ""), expected_ip_data="127.0.0.1", ) + assert_extract( + "http://127\u30020\uff0e0\uff611/foo/bar", + ("", "", "127.0.0.1", ""), + expected_ip_data="127.0.0.1", + ) def test_ipv4_bad(): @@ -339,6 +344,12 @@ def test_ipv4_bad(): def test_ipv4_lookalike(): + assert_extract( + "http://127.0.0/foo/bar", ("", "127.0", "0", ""), expected_ip_data="" + ) + assert_extract( + "http://127.0.0.0x1/foo/bar", ("", "127.0.0", "0x1", ""), expected_ip_data="" + ) assert_extract( "http://127.0.0.1.9/foo/bar", ("", "127.0.0.1", "9", ""), expected_ip_data="" ) diff --git a/tldextract/remote.py b/tldextract/remote.py index 60b86294..b8808a5b 100644 --- a/tldextract/remote.py +++ b/tldextract/remote.py @@ -1,7 +1,10 @@ """tldextract helpers for testing and fetching remote resources.""" +try: + from socket import inet_pton, AF_INET # Availability: Unix, Windows. +except ImportError: + inet_pton = None # type: ignore import re -import socket from urllib.parse import scheme_chars IP_RE = re.compile( @@ -49,13 +52,11 @@ def looks_like_ip(maybe_ip: str) -> bool: if not maybe_ip[0].isdigit(): return False - try: - socket.inet_aton(maybe_ip) - return True - except (AttributeError, UnicodeError): - if IP_RE.match(maybe_ip): + if inet_pton is not None: + try: + inet_pton(AF_INET, maybe_ip) return True - except OSError: - pass + except OSError: + return False - return False + return IP_RE.match(maybe_ip) is not None diff --git a/tldextract/tldextract.py b/tldextract/tldextract.py index 41ab2290..6dafcb17 100644 --- a/tldextract/tldextract.py +++ b/tldextract/tldextract.py @@ -251,19 +251,19 @@ def extract_urllib( def _extract_netloc( self, netloc: str, include_psl_private_domains: bool | None ) -> ExtractResult: - labels = ( + netloc_with_ascii_dots = ( netloc.replace("\u3002", "\u002e") .replace("\uff0e", "\u002e") .replace("\uff61", "\u002e") - .split(".") ) + labels = netloc_with_ascii_dots.split(".") suffix_index = self._get_tld_extractor().suffix_index( labels, include_psl_private_domains=include_psl_private_domains ) - if suffix_index == len(labels) and netloc and looks_like_ip(netloc): - return ExtractResult("", netloc, "") + if suffix_index == len(labels) == 4 and looks_like_ip(netloc_with_ascii_dots): + return ExtractResult("", netloc_with_ascii_dots, "") suffix = ".".join(labels[suffix_index:]) if suffix_index != len(labels) else "" subdomain = ".".join(labels[: suffix_index - 1]) if suffix_index >= 2 else "" From 5307d2f5f733c0c7120f3d2fc83011d257bec71c Mon Sep 17 00:00:00 2001 From: John Kurkowski Date: Fri, 26 May 2023 13:17:40 -0700 Subject: [PATCH 2/7] Fix type: ignore --- tldextract/remote.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tldextract/remote.py b/tldextract/remote.py index b8808a5b..1c6cad2a 100644 --- a/tldextract/remote.py +++ b/tldextract/remote.py @@ -1,12 +1,17 @@ """tldextract helpers for testing and fetching remote resources.""" -try: - from socket import inet_pton, AF_INET # Availability: Unix, Windows. -except ImportError: - inet_pton = None # type: ignore +from __future__ import annotations + import re +from collections.abc import Callable from urllib.parse import scheme_chars +inet_pton: Callable[[int, str], bytes] | None +try: + from socket import AF_INET, inet_pton # Availability: Unix, Windows. +except ImportError: + inet_pton = None + IP_RE = re.compile( r"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.)" r"{3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$" From b402518988237cad42877408979598a40a0c7323 Mon Sep 17 00:00:00 2001 From: Wu Tingfeng Date: Sat, 27 May 2023 05:21:45 +0800 Subject: [PATCH 3/7] Add assert --- tldextract/remote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tldextract/remote.py b/tldextract/remote.py index 1c6cad2a..1f0136e4 100644 --- a/tldextract/remote.py +++ b/tldextract/remote.py @@ -63,5 +63,5 @@ def looks_like_ip(maybe_ip: str) -> bool: return True except OSError: return False - + assert inet_pton is None return IP_RE.match(maybe_ip) is not None From 3f7c66f135e8e162a09d9e2bce68d3855f0e8e1c Mon Sep 17 00:00:00 2001 From: Wu Tingfeng Date: Sat, 27 May 2023 05:47:45 +0800 Subject: [PATCH 4/7] Test both inet_pton and regex --- tests/main_test.py | 11 +++++++++++ tldextract/remote.py | 9 +++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/main_test.py b/tests/main_test.py index 31e1778c..df4e5eef 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -6,6 +6,7 @@ import os import tempfile from collections.abc import Sequence +from socket import inet_pton import pytest import responses @@ -13,6 +14,7 @@ import tldextract import tldextract.suffix_list from tldextract.cache import DiskCache +from tldextract.remote import looks_like_ip from tldextract.suffix_list import SuffixListNotFound from tldextract.tldextract import ExtractResult @@ -133,6 +135,15 @@ def test_ip(): def test_looks_like_ip(): + assert inet_pton is not None and looks_like_ip("1.1.1.1", inet_pton) is True + assert looks_like_ip("1.1.1.1", None) is True + assert ( + inet_pton is not None and looks_like_ip("256.256.256.256", inet_pton) is False + ) + assert looks_like_ip("256.256.256.256", None) is False + + +def test_similar_to_ip(): assert_extract("1\xe9", ("", "", "1\xe9", "")) diff --git a/tldextract/remote.py b/tldextract/remote.py index 1f0136e4..ea11b467 100644 --- a/tldextract/remote.py +++ b/tldextract/remote.py @@ -52,16 +52,17 @@ def _schemeless_url(url: str) -> str: return url[double_slashes_start + 2 :] -def looks_like_ip(maybe_ip: str) -> bool: +def looks_like_ip( + maybe_ip: str, pton: Callable[[int, str], bytes] | None = inet_pton +) -> bool: """Check whether the given str looks like an IP address.""" if not maybe_ip[0].isdigit(): return False - if inet_pton is not None: + if pton is not None: try: - inet_pton(AF_INET, maybe_ip) + pton(AF_INET, maybe_ip) return True except OSError: return False - assert inet_pton is None return IP_RE.match(maybe_ip) is not None From 9242a58d5d548e7dbd6e2a0dc1638fcbb47cc9b9 Mon Sep 17 00:00:00 2001 From: Wu Tingfeng Date: Sat, 27 May 2023 05:53:52 +0800 Subject: [PATCH 5/7] Use inet_pton from remote.py --- tests/main_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/main_test.py b/tests/main_test.py index df4e5eef..39ab4ee7 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -6,7 +6,6 @@ import os import tempfile from collections.abc import Sequence -from socket import inet_pton import pytest import responses @@ -14,7 +13,7 @@ import tldextract import tldextract.suffix_list from tldextract.cache import DiskCache -from tldextract.remote import looks_like_ip +from tldextract.remote import inet_pton, looks_like_ip from tldextract.suffix_list import SuffixListNotFound from tldextract.tldextract import ExtractResult From 718b88cfd7fe5389727639274db612e93a46bcd1 Mon Sep 17 00:00:00 2001 From: Wu Tingfeng Date: Sat, 27 May 2023 06:13:43 +0800 Subject: [PATCH 6/7] Assert callable --- tests/main_test.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/main_test.py b/tests/main_test.py index 39ab4ee7..553f6473 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -134,11 +134,10 @@ def test_ip(): def test_looks_like_ip(): - assert inet_pton is not None and looks_like_ip("1.1.1.1", inet_pton) is True + assert callable(inet_pton) + assert looks_like_ip("1.1.1.1", inet_pton) is True + assert looks_like_ip("256.256.256.256", inet_pton) is False assert looks_like_ip("1.1.1.1", None) is True - assert ( - inet_pton is not None and looks_like_ip("256.256.256.256", inet_pton) is False - ) assert looks_like_ip("256.256.256.256", None) is False From 4014c84aaffc5f755339268dc560c57daee42fd6 Mon Sep 17 00:00:00 2001 From: John Kurkowski Date: Fri, 26 May 2023 15:39:31 -0700 Subject: [PATCH 7/7] Skip test on platforms it would fail on --- tests/main_test.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/main_test.py b/tests/main_test.py index 553f6473..347711c3 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -133,10 +133,13 @@ def test_ip(): ) -def test_looks_like_ip(): - assert callable(inet_pton) +@pytest.mark.skipif(not inet_pton, reason="inet_pton unavailable") +def test_looks_like_ip_with_inet_pton(): assert looks_like_ip("1.1.1.1", inet_pton) is True assert looks_like_ip("256.256.256.256", inet_pton) is False + + +def test_looks_like_ip_without_inet_pton(): assert looks_like_ip("1.1.1.1", None) is True assert looks_like_ip("256.256.256.256", None) is False