Skip to content

Commit

Permalink
[url] Remove unneeded unicode en/decoding
Browse files Browse the repository at this point in the history
embolalia committed May 19, 2013
1 parent e9d9ccc commit 3d3e941
Showing 1 changed file with 0 additions and 26 deletions.
26 changes: 0 additions & 26 deletions url.py
Original file line number Diff line number Diff line change
@@ -11,7 +11,6 @@
from htmlentitydefs import name2codepoint
import willie.web as web
import urllib2
import unicodedata
import urlparse

url_finder = None
@@ -130,8 +129,6 @@ def process_urls(willie, trigger, urls):
for url in urls:
if not url.startswith(exclusion_char):
# Magic stuff to account for international domain names
url = uni_encode(url)
url = uni_decode(url)
url = iri_to_uri(url)
# First, check that the URL we got doesn't match
matched = check_callbacks(willie, trigger, url, False)
@@ -210,7 +207,6 @@ def get_unicode_entity(match):
return unichr(cp)

title = r_entity.sub(get_unicode_entity, title)
title = uni_decode(title)

title = ' '.join(title.split()) # cleanly remove multiple spaces

@@ -236,28 +232,6 @@ def getTLD(url):
# Functions for international domain name magic


def uni_decode(bytes):
try:
text = bytes.decode('utf-8')
except UnicodeDecodeError:
try:
text = bytes.decode('iso-8859-1')
except UnicodeDecodeError:
text = bytes.decode('cp1252')
return text


def uni_encode(bytes):
try:
text = bytes.encode('utf-8')
except UnicodeEncodeError:
try:
text = bytes.encode('iso-8859-1')
except UnicodeEncodeError:
text = bytes.encode('cp1252')
return text


def urlEncodeNonAscii(b):
return re.sub('[\x80-\xFF]', lambda c: '%%%02x' % ord(c.group(0)), b)

0 comments on commit 3d3e941

Please sign in to comment.