From f8c7c97b5a1d63269d477503cf5cd3ca88eae7b3 Mon Sep 17 00:00:00 2001 From: Filip Balos Date: Wed, 22 Jun 2016 19:58:15 -0400 Subject: [PATCH] Make greg work with broken enclosure links Taken from http://stackoverflow.com/questions/120951/how-can-i-normalize-a-url-in-python, referring to http://bugs.python.org/issue918368. Enclosure links with spaces in them were causing greg to fail when attempting to download them, or store them in the list of seen enclosures. This fix uses 'quote' from 'urllib' to replace funny characters in the url with escape sequences. I.e. " " becomes "%20". --- greg/greg.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/greg/greg.py b/greg/greg.py index a63472f..8ab5ce0 100755 --- a/greg/greg.py +++ b/greg/greg.py @@ -27,6 +27,7 @@ from itertools import filterfalse from urllib.request import urlretrieve from urllib.parse import urlparse +from urllib.parse import quote from urllib.error import URLError from lxml import etree as ET @@ -291,8 +292,10 @@ def download_entry(self, entry): downloaded = False ignoreenclosures = self.retrieve_config('ignoreenclosures', 'no') notype = self.retrieve_config('notype', 'no') + # Clean up urls if ignoreenclosures == 'no': for enclosure in entry.enclosures: + enclosure["href"] = quote(enclosure["href"], safe="%/:=&?~#+!$,;'@()*[]") #Clean up url if notype == 'yes': downloadlinks[urlparse(enclosure["href"]).path.split( "/")[-1]] = enclosure["href"] @@ -313,6 +316,7 @@ def download_entry(self, entry): "option in your greg.conf", file=sys.stderr, flush=True) else: + entry.link = quote(entry.link, safe="%/:=&?~#+!$,;'@()*[]") downloadlinks[urlparse(entry.link).query.split( "/")[-1]] = entry.link for podname in downloadlinks: