From 2683d23dc4a635ba599df68a960c29b8e210dc11 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Thu, 19 Jan 2017 15:06:26 +0100 Subject: [PATCH 1/2] Test whether non-UTF-8 encoded paths in URLs are left intact --- tests/test_surt.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_surt.py b/tests/test_surt.py index be552be..6fa8517 100644 --- a/tests/test_surt.py +++ b/tests/test_surt.py @@ -376,6 +376,12 @@ def test_surt_return_type(burl): u = surt.surt(uurl) assert type(u) is type(uurl) +def test_unescape_encoding(): + # percent encoding, utf-8 encoded characters + assert surt.surt("http://example.com/city-of-M%C3%BCnchen.html") == 'com,example)/city-of-m%c3%bcnchen.html' + # percent encoding, latin-1 encoded characters (pre-RFC3986) + assert surt.surt("http://example.com/city-of-M%FCnchen.html") == 'com,example)/city-of-m%fcnchen.html' + def test_options(): assert surt.IAURLCanonicalizer.canonicalize(handyurl.parse('http://example.com/foo?X=Y')).getURLString() == 'http://example.com/foo?x=y' assert surt.IAURLCanonicalizer.canonicalize(handyurl.parse('http://example.com/foo?X=Y'), query_lowercase=False).getURLString() == 'http://example.com/foo?X=Y' From 9a6717a64e374c4690cf6e28ae7d3504b3bda6cc Mon Sep 17 00:00:00 2001 From: Tom Morris Date: Sun, 27 Aug 2023 14:10:34 -0400 Subject: [PATCH 2/2] Add a couple more tests from the Java implementation --- tests/test_surt.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_surt.py b/tests/test_surt.py index 6fa8517..d2dc3e5 100644 --- a/tests/test_surt.py +++ b/tests/test_surt.py @@ -376,14 +376,20 @@ def test_surt_return_type(burl): u = surt.surt(uurl) assert type(u) is type(uurl) + def test_unescape_encoding(): # percent encoding, utf-8 encoded characters assert surt.surt("http://example.com/city-of-M%C3%BCnchen.html") == 'com,example)/city-of-m%c3%bcnchen.html' # percent encoding, latin-1 encoded characters (pre-RFC3986) assert surt.surt("http://example.com/city-of-M%FCnchen.html") == 'com,example)/city-of-m%fcnchen.html' + assert surt.surt("http://www.aluroba.com/tags/%C3%CE%CA%C7%D1%E5%C7.htm") == 'com,aluroba)/tags/%c3%ce%ca%c7%d1%e5%c7.htm' + assert surt.surt("https://1kr.ua/newslist.html?tag=%E4%EE%F8%EA%EE%EB%FC%ED%EE%E5") == 'ua,1kr)/newslist.html?tag=%e4%ee%f8%ea%ee%eb%fc%ed%ee%e5' + def test_options(): assert surt.IAURLCanonicalizer.canonicalize(handyurl.parse('http://example.com/foo?X=Y')).getURLString() == 'http://example.com/foo?x=y' assert surt.IAURLCanonicalizer.canonicalize(handyurl.parse('http://example.com/foo?X=Y'), query_lowercase=False).getURLString() == 'http://example.com/foo?X=Y' assert surt.DefaultIAURLCanonicalizer.canonicalize(handyurl.parse('http://example.com/foo?X=Y')).getURLString() == 'http://example.com/foo?x=y' assert surt.DefaultIAURLCanonicalizer.canonicalize(handyurl.parse('http://example.com/foo?X=Y'), query_lowercase=False).getURLString() == 'http://example.com/foo?X=Y' + +