Skip to content

Commit

Permalink
[YoutubeDL] Add negation support for string comparisons in format sel…
Browse files Browse the repository at this point in the history
…ection expressions (closes #18600, closes #18805)
  • Loading branch information
dstftw committed Jan 20, 2019
1 parent 379306e commit 2cc779f
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 4 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -667,13 +667,14 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `
- `asr`: Audio sampling rate in Hertz
- `fps`: Frame rate

Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begins with), `$=` (ends with), `*=` (contains) and following string meta fields:
Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields:
- `ext`: File extension
- `acodec`: Name of the audio codec in use
- `vcodec`: Name of the video codec in use
- `container`: Name of the container format
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
- `format_id`: A short description of the format
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain).

Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.

Expand Down
46 changes: 46 additions & 0 deletions test/test_YoutubeDL.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,52 @@ def test_format_selection_video(self):
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot')

def test_format_selection_string_ops(self):
formats = [
{'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL},
]
info_dict = _make_result(formats)

# equals (=)
ydl = YDL({'format': '[format_id=abc-cba]'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'abc-cba')

# does not equal (!=)
ydl = YDL({'format': '[format_id!=abc-cba]'})
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())

# starts with (^=)
ydl = YDL({'format': '[format_id^=abc]'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'abc-cba')

# does not start with (!^=)
ydl = YDL({'format': '[format_id!^=abc-cba]'})
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())

# ends with ($=)
ydl = YDL({'format': '[format_id$=cba]'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'abc-cba')

# does not end with (!$=)
ydl = YDL({'format': '[format_id!$=abc-cba]'})
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())

# contains (*=)
ydl = YDL({'format': '[format_id*=-]'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'abc-cba')

# does not contain (!*=)
ydl = YDL({'format': '[format_id!*=-]'})
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())

def test_youtube_format_selection(self):
order = [
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13',
Expand Down
9 changes: 6 additions & 3 deletions youtube_dl/YoutubeDL.py
Original file line number Diff line number Diff line change
Expand Up @@ -1063,21 +1063,24 @@ def _build_format_filter(self, filter_spec):
if not m:
STR_OPERATORS = {
'=': operator.eq,
'!=': operator.ne,
'^=': lambda attr, value: attr.startswith(value),
'$=': lambda attr, value: attr.endswith(value),
'*=': lambda attr, value: value in attr,
}
str_operator_rex = re.compile(r'''(?x)
\s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
\s*(?P<value>[a-zA-Z0-9._-]+)
\s*$
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
m = str_operator_rex.search(filter_spec)
if m:
comparison_value = m.group('value')
op = STR_OPERATORS[m.group('op')]
str_op = STR_OPERATORS[m.group('op')]
if m.group('negation'):
op = lambda attr, value: not str_op
else:
op = str_op

if not m:
raise ValueError('Invalid filter specification %r' % filter_spec)
Expand Down

0 comments on commit 2cc779f

Please sign in to comment.