From 89df8902147815bce0c51f6c4c85fb771def2006 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Odd=20Str=C3=A5b=C3=B8?= Date: Sat, 5 Jan 2019 00:58:27 +0100 Subject: [PATCH 1/2] [YouPorn] Make title regex more specific. Safe to drop _search_regex? _og_search_title also finds the correct title. --- youtube_dl/extractor/youporn.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index ea0bce784c5..5d1ae215da5 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -68,13 +68,13 @@ def _real_extract(self, url): request.add_header('Cookie', 'age_verified=1') webpage = self._download_webpage(request, display_id) - title = self._search_regex( - [r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P(?:(?!\1).)+)\1', - r'<h1[^>]+class=["\']heading\d?["\'][^>]*>(?P<title>[^<]+)<'], - webpage, 'title', group='title', - default=None) or self._og_search_title( - webpage, default=None) or self._html_search_meta( - 'title', webpage, fatal=True) + title = self._search_regex([ + r'[=:]\s*(["\'])video[\._-]titles?\1[^>]*>\s*<\s*h1[^>]+class=["\']heading\d?["\'][^>]*>(?P<title>[^<]+)<', + r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P<title>(?:(?!\1).)+)\1', + #r'<h1[^>]+class=["\']heading\d?["\'][^>]*>(?P<title>[^<]+)<', + ], webpage, 'title', group='title', default=None) \ + or self._og_search_title(webpage, default=None) \ + or self._html_search_meta('title', webpage, fatal=True) links = [] From 6754a8cb0398b86da64e49cacf19a6479a22c11e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Odd=20Str=C3=A5b=C3=B8?= <oddstr13@openshell.no> Date: Tue, 8 Jan 2019 09:21:36 +0100 Subject: [PATCH 2/2] Update youporn.py --- youtube_dl/extractor/youporn.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 5d1ae215da5..e7f8e9610b6 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -68,13 +68,7 @@ def _real_extract(self, url): request.add_header('Cookie', 'age_verified=1') webpage = self._download_webpage(request, display_id) - title = self._search_regex([ - r'[=:]\s*(["\'])video[\._-]titles?\1[^>]*>\s*<\s*h1[^>]+class=["\']heading\d?["\'][^>]*>(?P<title>[^<]+)<', - r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P<title>(?:(?!\1).)+)\1', - #r'<h1[^>]+class=["\']heading\d?["\'][^>]*>(?P<title>[^<]+)<', - ], webpage, 'title', group='title', default=None) \ - or self._og_search_title(webpage, default=None) \ - or self._html_search_meta('title', webpage, fatal=True) + title = self._og_search_title(webpage, default=None) or self._html_search_meta('title', webpage, fatal=True) links = []