Skip to content

Commit

Permalink
[url] Handle when a page does not specify its encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
embolalia committed Jun 8, 2013
1 parent bfa7c69 commit 4211d63
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions url.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,15 +190,15 @@ def find_title(url):
content = web.get(url, headers={'Accept-Charset': 'utf-8'})
headers = web.head(url, headers={'Accept-Charset': 'utf-8'})
content_type = headers.get('Content-Type')
encoding = re.match('.*?charset *= *(\S+)', content_type).group(1)
encoding_match = re.match('.*?charset *= *(\S+)', content_type)
# If they gave us something else instead, try that
if encoding:
if encoding_match:
try:
content = content.decode(encoding)
content = content.decode(encoding_match.group(1))
except:
encoding = None
encoding_match = None
# They didn't tell us what they gave us, so go with UTF-8 or fail silently.
if not encoding:
if not encoding_match:
try:
content = content.decode('utf-8')
except:
Expand Down

0 comments on commit 4211d63

Please sign in to comment.