From 2d4bde5e73e04e1af086a90c06e835f5be8ccdce Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Thu, 19 Jan 2017 15:06:26 +0100 Subject: [PATCH] Test whether non-UTF-8 encoded paths in URLs are left intact --- tests/test_surt.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_surt.py b/tests/test_surt.py index cd694b6..b16f035 100644 --- a/tests/test_surt.py +++ b/tests/test_surt.py @@ -320,6 +320,12 @@ def test_surt(): assert surt.surt("warcinfo:foo.warc.gz", with_scheme=True) == 'warcinfo:foo.warc.gz' assert surt.surt("warcinfo:foo.warc.gz", with_scheme=True, trailing_comma=True) == 'warcinfo:foo.warc.gz' +def test_unescape_encoding(): + # percent encoding, utf-8 encoded characters + assert surt.surt("http://example.com/city-of-M%C3%BCnchen.html") == 'com,example)/city-of-m%c3%bcnchen.html' + # percent encoding, latin-1 encoded characters (pre-RFC3986) + assert surt.surt("http://example.com/city-of-M%FCnchen.html") == 'com,example)/city-of-m%fcnchen.html' + def test_options(): assert surt.IAURLCanonicalizer.canonicalize(handyurl.parse('http://example.com/foo?X=Y')).getURLString() == 'http://example.com/foo?x=y' assert surt.IAURLCanonicalizer.canonicalize(handyurl.parse('http://example.com/foo?X=Y'), query_lowercase=False).getURLString() == 'http://example.com/foo?X=Y'