From 4211d63b49b3779e41e2841308cbcedc4a3bfed7 Mon Sep 17 00:00:00 2001 From: Edward Powell Date: Sat, 8 Jun 2013 17:33:30 -0400 Subject: [PATCH] [url] Handle when a page does not specify its encoding --- url.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/url.py b/url.py index 1407f49c40..388450a1bb 100644 --- a/url.py +++ b/url.py @@ -190,15 +190,15 @@ def find_title(url): content = web.get(url, headers={'Accept-Charset': 'utf-8'}) headers = web.head(url, headers={'Accept-Charset': 'utf-8'}) content_type = headers.get('Content-Type') - encoding = re.match('.*?charset *= *(\S+)', content_type).group(1) + encoding_match = re.match('.*?charset *= *(\S+)', content_type) # If they gave us something else instead, try that - if encoding: + if encoding_match: try: - content = content.decode(encoding) + content = content.decode(encoding_match.group(1)) except: - encoding = None + encoding_match = None # They didn't tell us what they gave us, so go with UTF-8 or fail silently. - if not encoding: + if not encoding_match: try: content = content.decode('utf-8') except: