Skip to content

Commit

Permalink
Merge branch 'no-reverse-ip-addresses'
Browse files Browse the repository at this point in the history
(includes PR #13 from ldko).
  • Loading branch information
kngenie committed May 28, 2017
2 parents 7746f59 + a2d5bd3 commit 7aaf758
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 9 deletions.
4 changes: 4 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ Usage:
>>> from surt import surt
>>> surt("http://archive.org/goo/?a=2&b&a=1")
'org,archive)/goo?a=1&a=2&b'
>>> surt("http://archive.org/goo/?a=2&b&a=1", trailing_comma=True)
'org,archive,)/goo?a=1&a=2&b'
>>> surt("http://123.456.78.910/goo/?a=2&b&a=1", reverse_ipaddr=False)
'123.456.78.910)/goo?a=1&a=2&b'

Installation:

Expand Down
9 changes: 6 additions & 3 deletions surt/URLRegexTransformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,12 @@ def stripQuerySessionID(query):

# hostToSURT
#_______________________________________________________________________________
def hostToSURT(host):
# TODO: ensure we DONT reverse IP addresses!
_RE_IP_ADDRESS = re.compile(r"(?:(?:\d{1,3}\.){3}\d{1,3})$")

def hostToSURT(host, reverse_ipaddr=True):
if not reverse_ipaddr and _RE_IP_ADDRESS.match(host):
return host

parts = host.split('.')
parts.reverse()
return ','.join(parts)

4 changes: 2 additions & 2 deletions surt/handyurl.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ def getURLString(self,
surt=False,
public_suffix=False,
trailing_comma=False,
reverse_ipaddr=True,
**options):

s = self.scheme + ':'
Expand All @@ -168,7 +169,7 @@ def getURLString(self,
if public_suffix:
hostSrc = self.getPublicSuffix()
if surt:
hostSrc = hostToSURT(hostSrc)
hostSrc = hostToSURT(hostSrc, reverse_ipaddr)

if hostSrc:
if self.scheme != 'dns':
Expand Down Expand Up @@ -231,4 +232,3 @@ def getPublicPrefix(self):
# "__repr__ returning unicode doesn't work when called implicitly"
#def __repr__(self):
# return u"""handyurl(scheme=%s, authUser=%s, authPass=%s, host=%s, port=%s, path=%s, query=%s, hash=%s)""".encode('utf-8') % (self.scheme, self.authUser, self.authPass, self.host, self.port, self.path, self.query, self.hash)

32 changes: 28 additions & 4 deletions tests/test_surt.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,20 @@
import surt
from surt import handyurl

import pytest

def test_handyurl_parse():
# These tests come from URLParserTest.java
assert handyurl.parse("http://www.archive.org/index.html#foo").geturl() == 'http://www.archive.org/index.html#foo'
assert handyurl.parse("http://www.archive.org/").geturl() == 'http://www.archive.org/'
assert handyurl.parse("http://www.archive.org").geturl() == 'http://www.archive.org'
assert handyurl.parse("http://www.archive.org?").geturl() == 'http://www.archive.org?'
assert handyurl.parse("http://www.archive.org?").geturl() == 'http://www.archive.org?'
assert handyurl.parse("http://www.archive.org:8080/index.html?query#foo").geturl() == 'http://www.archive.org:8080/index.html?query#foo'
assert handyurl.parse("http://www.archive.org:8080/index.html?#foo").geturl() == 'http://www.archive.org:8080/index.html#foo'
assert handyurl.parse("http://www.archive.org:8080?#foo").geturl() == 'http://www.archive.org:8080/#foo'
assert handyurl.parse(u"http://bücher.ch:8080?#foo").geturl() == u'http://bücher.ch:8080/#foo'
assert handyurl.parse(u"dns:bücher.ch").geturl() == u'dns:bücher.ch'
# XXX assert print(handyurl.parse(u"http://bücher.ch:8080?#foo").geturl()) == http://b\xfccher.ch:8080/#foo
# XXX assert print(handyurl.parse(u"http://bücher.ch:8080?#foo").geturl()) == http://b\xfccher.ch:8080/#foo
# XXX assert print(handyurl.parse(u"dns:bücher.ch").geturl()) == dns:b\xfccher.ch
assert handyurl.parse(u"http://bücher.ch:8080?#foo").geturl() == u"http://b\xfccher.ch:8080/#foo"
assert handyurl.parse(u"dns:bücher.ch").geturl() == u"dns:b\xfccher.ch"
Expand Down Expand Up @@ -254,8 +256,21 @@ def test_stripQuerySessionID():
url = "?CFID=4308017&CFTOKEN=63914124&requestID=200608200458360%2E39414378"
assert surt.URLRegexTransformer.stripQuerySessionID(url) == '?requestID=200608200458360%2E39414378'

def test_hostToSURT():
assert surt.URLRegexTransformer.hostToSURT("www.archive.org") == 'org,archive,www'
@pytest.mark.parametrize("host_in,host_out", [
("www.archive.org", ["org,archive,www", "org,archive,www"]),
("123.123.net", ["net,123,123", "net,123,123"]),
("100.100.100.100.org", ["org,100,100,100,100", "org,100,100,100,100"]),
("123.45.167.89", ["89,167,45,123", "123.45.167.89"]),
("10.162.1024.3", ["3,1024,162,10", "3,1024,162,10"]),
# any four period-delimited 1-3 digit integers are interpreted as IP address, currently
("990.991.992.993", ["993,992,991,990", "990.991.992.993"])
])
def test_hostToSURT(host_in, host_out):
h = surt.URLRegexTransformer.hostToSURT

assert h(host_in) == host_out[0]
assert h(host_in, reverse_ipaddr=True) == host_out[0]
assert h(host_in, reverse_ipaddr=False) == host_out[1]

def test_surt():
# These tests are from WaybackURLKeyMakerTest.java
Expand Down Expand Up @@ -320,6 +335,15 @@ def test_surt():
assert surt.surt("warcinfo:foo.warc.gz", with_scheme=True) == 'warcinfo:foo.warc.gz'
assert surt.surt("warcinfo:foo.warc.gz", with_scheme=True, trailing_comma=True) == 'warcinfo:foo.warc.gz'

@pytest.mark.parametrize("url,opts,out", [
("http://www.example.com/", dict(reverse_ipaddr=False), "com,example)/"),
("http://192.168.1.254/info/", {}, "254,1,168,192)/info"),
("http://192.168.1.254/info/", dict(reverse_ipaddr=True), "254,1,168,192)/info"),
("http://192.168.1.254/info/", dict(reverse_ipaddr=False), "192.168.1.254)/info")
])
def test_surt_ipaddress(url, opts, out):
assert surt.surt(url, **opts) == out

def test_options():
assert surt.IAURLCanonicalizer.canonicalize(handyurl.parse('http://example.com/foo?X=Y')).getURLString() == 'http://example.com/foo?x=y'
assert surt.IAURLCanonicalizer.canonicalize(handyurl.parse('http://example.com/foo?X=Y'), query_lowercase=False).getURLString() == 'http://example.com/foo?X=Y'
Expand Down

0 comments on commit 7aaf758

Please sign in to comment.