diff --git a/CHANGELOG.md b/CHANGELOG.md index 1cccd3a8..412f37d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ The format mostly follows [Keep a Changelog](http://keepachangelog.com/en/1.0.0/ - New `enabled` option for all jobs. Set to false to disable a job without needing to remove it or comment it out (Requested in #625 by snowman, contributed in #785 by jamstah) - New option `ignore_incomplete_reads` (Requested in #725 by wschoot, contributed in #787 by wfrisch) - New option `wait_for` in browser jobs (Requested in #763 by yuis-ice, contributed in #810 by jamstah) +- Added tags to jobs and the ability to select them at the command line (#789 by jamstah) ### Changed diff --git a/docs/source/jobs.rst b/docs/source/jobs.rst index 5b091416..61860374 100644 --- a/docs/source/jobs.rst +++ b/docs/source/jobs.rst @@ -172,6 +172,7 @@ Optional keys for all job types ------------------------------- - ``name``: Human-readable name/label of the job +- ``tags``: Array of tags - ``filter``: :doc:`filters` (if any) to apply to the output (can be tested with ``--test-filter``) - ``max_tries``: After this many sequential failed runs, the error will be reported rather than ignored - ``diff_tool``: Command to a custom tool for generating diff text diff --git a/docs/source/manpage.rst b/docs/source/manpage.rst index e23ee52c..fa3b41b1 100644 --- a/docs/source/manpage.rst +++ b/docs/source/manpage.rst @@ -23,13 +23,17 @@ This manpage describes the CLI tool. positional arguments: JOB - index of job(s) to run, as numbered according to the --list command. - If none are specified, then all jobs will be run. + indexes or tags of job(s) to run. + If --tags is set, each JOB is a tag, + if not, each JOB is an index numbered according to the --list command. optional arguments: -h, --help show this help message and exit + --tags + use tags instead of indexes to select jobs to run + --version show program's version number and exit diff --git a/lib/urlwatch/command.py b/lib/urlwatch/command.py index 7fe8091f..01f09a97 100644 --- a/lib/urlwatch/command.py +++ b/lib/urlwatch/command.py @@ -95,16 +95,16 @@ def show_features(self): return 0 def list_urls(self): - for idx, job in enumerate(self.urlwatcher.jobs): + for idx, job in enumerate(self.urlwatcher.jobs, 1): if self.urlwatch_config.verbose: - print('%d: %s' % (idx + 1, repr(job))) + print('%d: %s' % (idx, repr(job))) else: pretty_name = job.pretty_name() location = job.get_location() if pretty_name != location: - print('%d: %s ( %s )' % (idx + 1, pretty_name, location)) + print('%d: %s ( %s )' % (idx, pretty_name, location)) else: - print('%d: %s' % (idx + 1, pretty_name)) + print('%d: %s' % (idx, pretty_name)) return 0 def _find_job(self, query): diff --git a/lib/urlwatch/config.py b/lib/urlwatch/config.py index 40d206ec..f5d1ec5c 100644 --- a/lib/urlwatch/config.py +++ b/lib/urlwatch/config.py @@ -64,12 +64,13 @@ def __init__(self, args, pkgname, urlwatch_dir, prefix, config, urls, hooks, cac self.parse_args(args) def parse_args(self, cmdline_args): - parser = argparse.ArgumentParser(description=urlwatch.__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('joblist', metavar='JOB', type=int, nargs="*", help='index of job(s) to run, as numbered according to the --list command. If none specified, then all jobs will be run.') + parser.add_argument('joblist', metavar='JOB', type=str, nargs="*", help='indexes or tags of job(s) to run, depending on --tags. If using indexes, they are as numbered according to the --list command. If none are specified, then all jobs will be run.') + parser.add_argument('--tags', action='store_true', help='Use tags instead of indexes to select jobs to run') parser.add_argument('--version', action='version', version='%(prog)s {}'.format(urlwatch.__version__)) parser.add_argument('-v', '--verbose', action='store_true', help='show debug output') + group = parser.add_argument_group('files and directories') group.add_argument('--urls', metavar='FILE', help='read job list (URLs) from FILE', default=self.urls) @@ -95,10 +96,12 @@ def parse_args(self, cmdline_args): group.add_argument('--test-diff-filter', metavar='JOB', help='test diff filter output of job by location or index (needs at least 2 snapshots)') group.add_argument('--dump-history', metavar='JOB', help='dump historical cached data for a job') + group = parser.add_argument_group('interactive commands ($EDITOR/$VISUAL)') group.add_argument('--edit', action='store_true', help='edit URL/job list') group.add_argument('--edit-config', action='store_true', help='edit configuration file') group.add_argument('--edit-hooks', action='store_true', help='edit hooks script') + group = parser.add_argument_group('miscellaneous') group.add_argument('--features', action='store_true', help='list supported jobs/filters/reporters') group.add_argument('--gc-cache', metavar='RETAIN_LIMIT', type=int, help='remove old cache entries, keeping the latest RETAIN_LIMIT (default: 1)', @@ -106,6 +109,16 @@ def parse_args(self, cmdline_args): args = parser.parse_args(cmdline_args) - for i, arg in enumerate(vars(args)): + if args.tags: + if not args.joblist: + raise SystemExit("No tags specified") + self.tag_set = frozenset(args.joblist) + else: + try: + self.idx_set = frozenset(int(s) for s in args.joblist) + except ValueError as e: + parser.error(e) + + for arg in vars(args): argval = getattr(args, arg) setattr(self, arg, argval) diff --git a/lib/urlwatch/jobs.py b/lib/urlwatch/jobs.py index 966aab85..1262443c 100644 --- a/lib/urlwatch/jobs.py +++ b/lib/urlwatch/jobs.py @@ -35,6 +35,7 @@ import re import subprocess import textwrap +from typing import Iterable, Optional, Set, FrozenSet import requests from requests.packages.urllib3.exceptions import InsecureRequestWarning @@ -196,7 +197,10 @@ def ignore_error(self, exception): class Job(JobBase): __required__ = () - __optional__ = ('name', 'filter', 'max_tries', 'diff_tool', 'compared_versions', 'diff_filter', 'enabled', 'treat_new_as_changed', 'user_visible_url') + __optional__ = ('name', 'filter', 'max_tries', 'diff_tool', 'compared_versions', 'diff_filter', 'enabled', 'treat_new_as_changed', 'user_visible_url', 'tags') + + def matching_tags(self, tags: Set[str]) -> Set[str]: + return self.tags & tags # determine if hyperlink "a" tag is used in HtmlReporter def location_is_url(self): @@ -208,6 +212,17 @@ def pretty_name(self): def is_enabled(self): return self.enabled is None or self.enabled + @property + def tags(self) -> Optional[FrozenSet[str]]: + return self._tags + + @tags.setter + def tags(self, value: Optional[Iterable[str]]): + if value is None: + self._tags = None + else: + self._tags = frozenset(value) + class ShellJob(Job): """Run a shell command and get its standard output""" diff --git a/lib/urlwatch/main.py b/lib/urlwatch/main.py index cbf5eb5c..2c27c921 100644 --- a/lib/urlwatch/main.py +++ b/lib/urlwatch/main.py @@ -68,6 +68,21 @@ def __init__(self, urlwatch_config, config_storage, cache_storage, urls_storage) if hasattr(self.urlwatch_config, 'migrate_urls'): self.urlwatch_config.migrate_cache(self) + def should_run(self, idx, job): + if not job.is_enabled(): + return False + + # Tag mode and tag(s) were specified + if self.urlwatch_config.tags and self.urlwatch_config.tag_set: + return job.matching_tags(self.urlwatch_config.tag_set) + + # Index mode and index(es) were specified + if not self.urlwatch_config.tags and self.urlwatch_config.idx_set: + return idx in self.urlwatch_config.idx_set + + # Either mode, and no jobs were specified + return True + def check_directories(self): if not os.path.exists(self.urlwatch_config.config): self.config_storage.write_default_config(self.urlwatch_config.config) diff --git a/lib/urlwatch/reporters.py b/lib/urlwatch/reporters.py index 2df73771..2fc67a33 100644 --- a/lib/urlwatch/reporters.py +++ b/lib/urlwatch/reporters.py @@ -313,7 +313,7 @@ def submit(self): sep = (line_length * '=') or None yield from (part for part in itertools.chain( (sep,), - ('%02d. %s' % (idx + 1, line) for idx, line in enumerate(summary)), + ('%02d. %s' % (idx, line) for idx, line in enumerate(summary, 1)), (sep, ''), ) if part is not None) @@ -860,7 +860,7 @@ def _render(cls, max_length, summary=None, details=None, footer=None): # The footer/summary lengths are the sum of the length of their parts # plus the space taken up by newlines. if summary: - summary = ['%d. %s' % (idx + 1, line) for idx, line in enumerate(summary)] + summary = ['%d. %s' % (idx, line) for idx, line in enumerate(summary, 1)] summary_len = sum(len(part) for part in summary) + len(summary) - 1 else: summary_len = 0 diff --git a/lib/urlwatch/tests/data/jobs-with-tags.yaml b/lib/urlwatch/tests/data/jobs-with-tags.yaml new file mode 100644 index 00000000..20af17e7 --- /dev/null +++ b/lib/urlwatch/tests/data/jobs-with-tags.yaml @@ -0,0 +1,17 @@ +--- +name: UTC +command: date -u +tags: + - arg + - utc +--- +name: RFC +command: date -R +tags: + - arg + - rfc +--- +name: Local +command: date +tags: + - local diff --git a/lib/urlwatch/tests/test_handler.py b/lib/urlwatch/tests/test_handler.py index d193d378..13acb4cb 100644 --- a/lib/urlwatch/tests/test_handler.py +++ b/lib/urlwatch/tests/test_handler.py @@ -78,8 +78,8 @@ def test_load_hooks_py(): class ConfigForTest(CommandConfig): - def __init__(self, config, urls, cache, hooks, verbose): - super().__init__([], 'urlwatch', os.path.dirname(__file__), root, config, urls, hooks, cache, verbose) + def __init__(self, config, urls, cache, hooks, verbose, args=()): + super().__init__(args, 'urlwatch', os.path.dirname(__file__), root, config, urls, hooks, cache, verbose) @contextlib.contextmanager @@ -112,6 +112,110 @@ def test_run_watcher(): cache_storage.close() +def prepare_tags_test(args): + urls = os.path.join(here, 'data', 'jobs-with-tags.yaml') + config = os.path.join(here, 'data', 'urlwatch.yaml') + cache = os.path.join(here, 'data', 'cache.db') + hooks = '' + + config_storage = YamlConfigStorage(config) + urls_storage = UrlsYaml(urls) + cache_storage = CacheMiniDBStorage(cache) + + urlwatch_config = ConfigForTest(config, urls, cache, hooks, True, args=args) + urlwatcher = Urlwatch(urlwatch_config, config_storage, cache_storage, urls_storage) + + return urlwatcher, cache_storage + + +def test_idxs_none(): + with teardown_func(): + urlwatcher, cache_storage = prepare_tags_test([]) + try: + urlwatcher.run_jobs() + + assert len(urlwatcher.report.job_states) == 3 + finally: + cache_storage.close() + + +def test_idxs_zero(): + with teardown_func(): + urlwatcher, cache_storage = prepare_tags_test(['0']) + try: + with pytest.raises(ValueError): + urlwatcher.run_jobs() + finally: + cache_storage.close() + + +def test_idxs_massive(): + with teardown_func(): + urlwatcher, cache_storage = prepare_tags_test(['99999']) + try: + with pytest.raises(ValueError): + urlwatcher.run_jobs() + finally: + cache_storage.close() + + +def test_idxs_nan(): + with teardown_func(): + with pytest.raises(SystemExit): + ConfigForTest('', '', '', '', True, ['NaN']) + + +def test_idxs_one(): + with teardown_func(): + urlwatcher, cache_storage = prepare_tags_test(['1']) + try: + urlwatcher.run_jobs() + + assert len(urlwatcher.report.job_states) == 1 + assert urlwatcher.report.job_states[0].job.name == "UTC" + finally: + cache_storage.close() + + +def test_tags_empty(): + with teardown_func(): + with pytest.raises(SystemExit): + ConfigForTest('', '', '', '', True, ['--tags']) + + +def test_tags_no_match(): + with teardown_func(): + urlwatcher, cache_storage = prepare_tags_test(['--tags', 'foo']) + try: + urlwatcher.run_jobs() + + assert len(urlwatcher.report.job_states) == 0 + finally: + cache_storage.close() + + +def test_tags_single(): + with teardown_func(): + urlwatcher, cache_storage = prepare_tags_test(['--tags', 'arg']) + try: + urlwatcher.run_jobs() + + assert len(urlwatcher.report.job_states) == 2 + finally: + cache_storage.close() + + +def test_tags_multiple(): + with teardown_func(): + urlwatcher, cache_storage = prepare_tags_test(['--tags', 'utc', 'local']) + try: + urlwatcher.run_jobs() + + assert len(urlwatcher.report.job_states) == 2 + finally: + cache_storage.close() + + def test_disabled_job(): with teardown_func(): urls = os.path.join(here, 'data', 'disabled-job.yaml') diff --git a/lib/urlwatch/worker.py b/lib/urlwatch/worker.py index 23e710b7..d9b2f526 100644 --- a/lib/urlwatch/worker.py +++ b/lib/urlwatch/worker.py @@ -51,11 +51,13 @@ def run_parallel(func, items): def run_jobs(urlwatcher): - if not all(1 <= idx <= len(urlwatcher.jobs) for idx in urlwatcher.urlwatch_config.joblist): + if not urlwatcher.urlwatch_config.tags and not all(1 <= idx <= len(urlwatcher.jobs) for idx in urlwatcher.urlwatch_config.idx_set): raise ValueError(f'All job indices must be between 1 and {len(urlwatcher.jobs)}: {urlwatcher.urlwatch_config.joblist}') cache_storage = urlwatcher.cache_storage jobs = [job.with_defaults(urlwatcher.config_storage.config) - for (idx, job) in enumerate(urlwatcher.jobs) if job.is_enabled() and ((idx + 1) in urlwatcher.urlwatch_config.joblist or (not urlwatcher.urlwatch_config.joblist))] + for (idx, job) in enumerate(urlwatcher.jobs, 1) + if urlwatcher.should_run(idx, job) + ] report = urlwatcher.report logger.debug('Processing %d jobs (out of %d)', len(jobs), len(urlwatcher.jobs))