Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tags to jobs and a command line arg to select them #789

Merged
merged 5 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ The format mostly follows [Keep a Changelog](http://keepachangelog.com/en/1.0.0/
- New `enabled` option for all jobs. Set to false to disable a job without needing to remove it or comment it out (Requested in #625 by snowman, contributed in #785 by jamstah)
- New option `ignore_incomplete_reads` (Requested in #725 by wschoot, contributed in #787 by wfrisch)
- New option `wait_for` in browser jobs (Requested in #763 by yuis-ice, contributed in #810 by jamstah)
- Added tags to jobs and the ability to select them at the command line (#789 by jamstah)

### Changed

Expand Down
1 change: 1 addition & 0 deletions docs/source/jobs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ Optional keys for all job types
-------------------------------

- ``name``: Human-readable name/label of the job
- ``tags``: Array of tags
- ``filter``: :doc:`filters` (if any) to apply to the output (can be tested with ``--test-filter``)
- ``max_tries``: After this many sequential failed runs, the error will be reported rather than ignored
- ``diff_tool``: Command to a custom tool for generating diff text
Expand Down
8 changes: 6 additions & 2 deletions docs/source/manpage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,17 @@ This manpage describes the CLI tool.

positional arguments:
JOB
index of job(s) to run, as numbered according to the --list command.
If none are specified, then all jobs will be run.
indexes or tags of job(s) to run.
If --tags is set, each JOB is a tag,
if not, each JOB is an index numbered according to the --list command.

optional arguments:
-h, --help
show this help message and exit

--tags
use tags instead of indexes to select jobs to run

--version
show program's version number and exit

Expand Down
8 changes: 4 additions & 4 deletions lib/urlwatch/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,16 +95,16 @@ def show_features(self):
return 0

def list_urls(self):
for idx, job in enumerate(self.urlwatcher.jobs):
for idx, job in enumerate(self.urlwatcher.jobs, 1):
if self.urlwatch_config.verbose:
print('%d: %s' % (idx + 1, repr(job)))
print('%d: %s' % (idx, repr(job)))
else:
pretty_name = job.pretty_name()
location = job.get_location()
if pretty_name != location:
print('%d: %s ( %s )' % (idx + 1, pretty_name, location))
print('%d: %s ( %s )' % (idx, pretty_name, location))
else:
print('%d: %s' % (idx + 1, pretty_name))
print('%d: %s' % (idx, pretty_name))
return 0

def _find_job(self, query):
Expand Down
19 changes: 16 additions & 3 deletions lib/urlwatch/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,13 @@ def __init__(self, args, pkgname, urlwatch_dir, prefix, config, urls, hooks, cac
self.parse_args(args)

def parse_args(self, cmdline_args):

parser = argparse.ArgumentParser(description=urlwatch.__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('joblist', metavar='JOB', type=int, nargs="*", help='index of job(s) to run, as numbered according to the --list command. If none specified, then all jobs will be run.')
parser.add_argument('joblist', metavar='JOB', type=str, nargs="*", help='indexes or tags of job(s) to run, depending on --tags. If using indexes, they are as numbered according to the --list command. If none are specified, then all jobs will be run.')
parser.add_argument('--tags', action='store_true', help='Use tags instead of indexes to select jobs to run')
parser.add_argument('--version', action='version', version='%(prog)s {}'.format(urlwatch.__version__))
parser.add_argument('-v', '--verbose', action='store_true', help='show debug output')

group = parser.add_argument_group('files and directories')
group.add_argument('--urls', metavar='FILE', help='read job list (URLs) from FILE',
default=self.urls)
Expand All @@ -95,17 +96,29 @@ def parse_args(self, cmdline_args):
group.add_argument('--test-diff-filter', metavar='JOB',
help='test diff filter output of job by location or index (needs at least 2 snapshots)')
group.add_argument('--dump-history', metavar='JOB', help='dump historical cached data for a job')

group = parser.add_argument_group('interactive commands ($EDITOR/$VISUAL)')
group.add_argument('--edit', action='store_true', help='edit URL/job list')
group.add_argument('--edit-config', action='store_true', help='edit configuration file')
group.add_argument('--edit-hooks', action='store_true', help='edit hooks script')

group = parser.add_argument_group('miscellaneous')
group.add_argument('--features', action='store_true', help='list supported jobs/filters/reporters')
group.add_argument('--gc-cache', metavar='RETAIN_LIMIT', type=int, help='remove old cache entries, keeping the latest RETAIN_LIMIT (default: 1)',
nargs='?', const=1)

args = parser.parse_args(cmdline_args)

for i, arg in enumerate(vars(args)):
if args.tags:
if not args.joblist:
raise SystemExit("No tags specified")
self.tag_set = frozenset(args.joblist)
thp marked this conversation as resolved.
Show resolved Hide resolved
else:
try:
self.idx_set = frozenset(int(s) for s in args.joblist)
except ValueError as e:
parser.error(e)

for arg in vars(args):
argval = getattr(args, arg)
setattr(self, arg, argval)
17 changes: 16 additions & 1 deletion lib/urlwatch/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import re
import subprocess
import textwrap
from typing import Iterable, Optional, Set, FrozenSet

import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
Expand Down Expand Up @@ -196,7 +197,10 @@ def ignore_error(self, exception):

class Job(JobBase):
__required__ = ()
__optional__ = ('name', 'filter', 'max_tries', 'diff_tool', 'compared_versions', 'diff_filter', 'enabled', 'treat_new_as_changed', 'user_visible_url')
__optional__ = ('name', 'filter', 'max_tries', 'diff_tool', 'compared_versions', 'diff_filter', 'enabled', 'treat_new_as_changed', 'user_visible_url', 'tags')

def matching_tags(self, tags: Set[str]) -> Set[str]:
return self.tags & tags

# determine if hyperlink "a" tag is used in HtmlReporter
def location_is_url(self):
Expand All @@ -208,6 +212,17 @@ def pretty_name(self):
def is_enabled(self):
return self.enabled is None or self.enabled

@property
def tags(self) -> Optional[FrozenSet[str]]:
return self._tags

@tags.setter
def tags(self, value: Optional[Iterable[str]]):
if value is None:
self._tags = None
else:
self._tags = frozenset(value)
thp marked this conversation as resolved.
Show resolved Hide resolved


class ShellJob(Job):
"""Run a shell command and get its standard output"""
Expand Down
15 changes: 15 additions & 0 deletions lib/urlwatch/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,21 @@ def __init__(self, urlwatch_config, config_storage, cache_storage, urls_storage)
if hasattr(self.urlwatch_config, 'migrate_urls'):
self.urlwatch_config.migrate_cache(self)

def should_run(self, idx, job):
Jamstah marked this conversation as resolved.
Show resolved Hide resolved
if not job.is_enabled():
return False

# Tag mode and tag(s) were specified
if self.urlwatch_config.tags and self.urlwatch_config.tag_set:
return job.matching_tags(self.urlwatch_config.tag_set)

# Index mode and index(es) were specified
if not self.urlwatch_config.tags and self.urlwatch_config.idx_set:
return idx in self.urlwatch_config.idx_set

# Either mode, and no jobs were specified
return True

def check_directories(self):
if not os.path.exists(self.urlwatch_config.config):
self.config_storage.write_default_config(self.urlwatch_config.config)
Expand Down
4 changes: 2 additions & 2 deletions lib/urlwatch/reporters.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def submit(self):
sep = (line_length * '=') or None
yield from (part for part in itertools.chain(
(sep,),
('%02d. %s' % (idx + 1, line) for idx, line in enumerate(summary)),
('%02d. %s' % (idx, line) for idx, line in enumerate(summary, 1)),
(sep, ''),
) if part is not None)

Expand Down Expand Up @@ -860,7 +860,7 @@ def _render(cls, max_length, summary=None, details=None, footer=None):
# The footer/summary lengths are the sum of the length of their parts
# plus the space taken up by newlines.
if summary:
summary = ['%d. %s' % (idx + 1, line) for idx, line in enumerate(summary)]
summary = ['%d. %s' % (idx, line) for idx, line in enumerate(summary, 1)]
summary_len = sum(len(part) for part in summary) + len(summary) - 1
else:
summary_len = 0
Expand Down
17 changes: 17 additions & 0 deletions lib/urlwatch/tests/data/jobs-with-tags.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
name: UTC
command: date -u
tags:
- arg
- utc
---
name: RFC
command: date -R
tags:
- arg
- rfc
---
name: Local
command: date
tags:
- local
108 changes: 106 additions & 2 deletions lib/urlwatch/tests/test_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ def test_load_hooks_py():


class ConfigForTest(CommandConfig):
def __init__(self, config, urls, cache, hooks, verbose):
super().__init__([], 'urlwatch', os.path.dirname(__file__), root, config, urls, hooks, cache, verbose)
def __init__(self, config, urls, cache, hooks, verbose, args=()):
super().__init__(args, 'urlwatch', os.path.dirname(__file__), root, config, urls, hooks, cache, verbose)


@contextlib.contextmanager
Expand Down Expand Up @@ -112,6 +112,110 @@ def test_run_watcher():
cache_storage.close()


def prepare_tags_test(args):
urls = os.path.join(here, 'data', 'jobs-with-tags.yaml')
config = os.path.join(here, 'data', 'urlwatch.yaml')
cache = os.path.join(here, 'data', 'cache.db')
hooks = ''

config_storage = YamlConfigStorage(config)
urls_storage = UrlsYaml(urls)
cache_storage = CacheMiniDBStorage(cache)

urlwatch_config = ConfigForTest(config, urls, cache, hooks, True, args=args)
urlwatcher = Urlwatch(urlwatch_config, config_storage, cache_storage, urls_storage)

return urlwatcher, cache_storage


def test_idxs_none():
with teardown_func():
urlwatcher, cache_storage = prepare_tags_test([])
try:
urlwatcher.run_jobs()

assert len(urlwatcher.report.job_states) == 3
finally:
cache_storage.close()


def test_idxs_zero():
with teardown_func():
urlwatcher, cache_storage = prepare_tags_test(['0'])
try:
with pytest.raises(ValueError):
urlwatcher.run_jobs()
finally:
cache_storage.close()


def test_idxs_massive():
with teardown_func():
urlwatcher, cache_storage = prepare_tags_test(['99999'])
try:
with pytest.raises(ValueError):
urlwatcher.run_jobs()
finally:
cache_storage.close()


def test_idxs_nan():
with teardown_func():
with pytest.raises(SystemExit):
ConfigForTest('', '', '', '', True, ['NaN'])


def test_idxs_one():
with teardown_func():
urlwatcher, cache_storage = prepare_tags_test(['1'])
try:
urlwatcher.run_jobs()

assert len(urlwatcher.report.job_states) == 1
assert urlwatcher.report.job_states[0].job.name == "UTC"
finally:
cache_storage.close()


def test_tags_empty():
with teardown_func():
with pytest.raises(SystemExit):
ConfigForTest('', '', '', '', True, ['--tags'])


def test_tags_no_match():
with teardown_func():
urlwatcher, cache_storage = prepare_tags_test(['--tags', 'foo'])
try:
urlwatcher.run_jobs()

assert len(urlwatcher.report.job_states) == 0
finally:
cache_storage.close()


def test_tags_single():
with teardown_func():
urlwatcher, cache_storage = prepare_tags_test(['--tags', 'arg'])
try:
urlwatcher.run_jobs()

assert len(urlwatcher.report.job_states) == 2
finally:
cache_storage.close()


def test_tags_multiple():
with teardown_func():
urlwatcher, cache_storage = prepare_tags_test(['--tags', 'utc', 'local'])
try:
urlwatcher.run_jobs()

assert len(urlwatcher.report.job_states) == 2
finally:
cache_storage.close()


def test_disabled_job():
with teardown_func():
urls = os.path.join(here, 'data', 'disabled-job.yaml')
Expand Down
6 changes: 4 additions & 2 deletions lib/urlwatch/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,13 @@ def run_parallel(func, items):


def run_jobs(urlwatcher):
if not all(1 <= idx <= len(urlwatcher.jobs) for idx in urlwatcher.urlwatch_config.joblist):
if not urlwatcher.urlwatch_config.tags and not all(1 <= idx <= len(urlwatcher.jobs) for idx in urlwatcher.urlwatch_config.idx_set):
raise ValueError(f'All job indices must be between 1 and {len(urlwatcher.jobs)}: {urlwatcher.urlwatch_config.joblist}')
cache_storage = urlwatcher.cache_storage
jobs = [job.with_defaults(urlwatcher.config_storage.config)
for (idx, job) in enumerate(urlwatcher.jobs) if job.is_enabled() and ((idx + 1) in urlwatcher.urlwatch_config.joblist or (not urlwatcher.urlwatch_config.joblist))]
for (idx, job) in enumerate(urlwatcher.jobs, 1)
if urlwatcher.should_run(idx, job)
thp marked this conversation as resolved.
Show resolved Hide resolved
]
report = urlwatcher.report

logger.debug('Processing %d jobs (out of %d)', len(jobs), len(urlwatcher.jobs))
Expand Down
Loading