From c7d0ab0678d4b6257326a982bf3a0222fcfdd09d Mon Sep 17 00:00:00 2001 From: GDR! Date: Fri, 19 Jul 2019 16:49:11 +0200 Subject: [PATCH 1/3] New regex for extracting series title Closes #21833 --- youtube_dl/extractor/tvn24.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py index 6590e1fd018..82faee1875d 100644 --- a/youtube_dl/extractor/tvn24.py +++ b/youtube_dl/extractor/tvn24.py @@ -39,7 +39,10 @@ def _real_extract(self, url): webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage) + title = self._html_search_regex( + r']+class="standardHeader1[^"]*"[^>]*>\s*(.+?)\s*', + webpage, 'title', default=None) + def extract_json(attr, name, fatal=True): return self._parse_json( From ae19aa945d092694625d95787e763ed9d0fb8848 Mon Sep 17 00:00:00 2001 From: Andrzej Godziuk Date: Mon, 22 Jul 2019 19:12:59 +0200 Subject: [PATCH 2/3] tvn24.py extractor: clean up --- youtube_dl/extractor/tvn24.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py index 82faee1875d..24dd202cdec 100644 --- a/youtube_dl/extractor/tvn24.py +++ b/youtube_dl/extractor/tvn24.py @@ -41,8 +41,7 @@ def _real_extract(self, url): title = self._html_search_regex( r']+class="standardHeader1[^"]*"[^>]*>\s*(.+?)\s*', - webpage, 'title', default=None) - + webpage, 'title') def extract_json(attr, name, fatal=True): return self._parse_json( From d0e9650461952cd6d1cae8bc49c0a8f556b44d0b Mon Sep 17 00:00:00 2001 From: Andrzej Godziuk Date: Mon, 22 Jul 2019 19:18:32 +0200 Subject: [PATCH 3/3] Make the title extraction regex more specific --- youtube_dl/extractor/tvn24.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py index 24dd202cdec..7aa681e0cd7 100644 --- a/youtube_dl/extractor/tvn24.py +++ b/youtube_dl/extractor/tvn24.py @@ -40,7 +40,7 @@ def _real_extract(self, url): webpage = self._download_webpage(url, video_id) title = self._html_search_regex( - r']+class="standardHeader1[^"]*"[^>]*>\s*(.+?)\s*', + r']+class="standardHeader1 headerPadding header-span headerMargin"[^>]*>\s*(.+?)\s*', webpage, 'title') def extract_json(attr, name, fatal=True):