From 123de66d019aef7fc18fab6d56cc2a54d81fea3f Mon Sep 17 00:00:00 2001 From: James Hewitt Date: Mon, 12 Feb 2024 18:12:49 +0000 Subject: [PATCH 1/3] Update CSS Selector to use new style (#786) New style of calling the CSSSelector directly instead of using the evaluate function. This has been supported since lxml 1.1 [1] and the evaluate method has been deprecated since lxml 2.1 [2]. [1] https://github.com/lxml/lxml/blob/lxml-1.1/src/lxml/xpath.pxi#L66 [2] https://github.com/lxml/lxml/blob/lxml-2.1/src/lxml/xpath.pxi#L143 Signed-off-by: James Hewitt --- CHANGELOG.md | 1 + lib/urlwatch/filters.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dbf24410..cdfce836 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ The format mostly follows [Keep a Changelog](http://keepachangelog.com/en/1.0.0/ - Fix documentation for watching Github tags and releases, again (#723) - Fix `--test-reporter` command-line option so `separate` configuration option is no longer ignored when sending test notifications (#772, by marunjar) - Fix line height and dark mode regression (#774 reported by kongomongo, PRs #777 and #778 by trevorshannon) +- Fix compatibility with lxml >= 5 which caused the CSS Selector filter to fail (#783 reported by jamesquilty, PR #786 by jamstah) ## [2.28] -- 2023-05-03 diff --git a/lib/urlwatch/filters.py b/lib/urlwatch/filters.py index 7b7c95b1..ed21b4c0 100644 --- a/lib/urlwatch/filters.py +++ b/lib/urlwatch/filters.py @@ -761,9 +761,9 @@ def _get_filtered_elements(self): excluded_elems = None if self.filter_kind == 'css': selected_elems = CSSSelector(self.expression, - namespaces=self.namespaces).evaluate(root) + namespaces=self.namespaces)(root) excluded_elems = CSSSelector(self.exclude, - namespaces=self.namespaces).evaluate(root) if self.exclude else None + namespaces=self.namespaces)(root) if self.exclude else None elif self.filter_kind == 'xpath': selected_elems = root.xpath(self.expression, namespaces=self.namespaces) excluded_elems = root.xpath(self.exclude, namespaces=self.namespaces) if self.exclude else None From 4143e3400b9e3be657cdd4f2c7afd29523dfe60f Mon Sep 17 00:00:00 2001 From: trevorshannon Date: Mon, 12 Feb 2024 10:16:29 -0800 Subject: [PATCH 2/3] Updates max_tries documentation. (#792) --- docs/source/jobs.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/jobs.rst b/docs/source/jobs.rst index 8c55d585..67273a64 100644 --- a/docs/source/jobs.rst +++ b/docs/source/jobs.rst @@ -169,7 +169,7 @@ Optional keys for all job types - ``name``: Human-readable name/label of the job - ``filter``: :doc:`filters` (if any) to apply to the output (can be tested with ``--test-filter``) -- ``max_tries``: Number of times to retry fetching the resource +- ``max_tries``: After this many sequential failed runs, the error will be reported rather than ignored - ``diff_tool``: Command to a custom tool for generating diff text - ``diff_filter``: :doc:`filters` (if any) to apply to the diff result (can be tested with ``--test-diff-filter``) - ``treat_new_as_changed``: Will treat jobs that don't have any historic data as ``CHANGED`` instead of ``NEW`` (and create a diff for new jobs) From cb930d79423ba84b69aab903d68b9997c74a63ea Mon Sep 17 00:00:00 2001 From: James Hewitt Date: Mon, 12 Feb 2024 18:17:33 +0000 Subject: [PATCH 3/3] Add option to disable individual jobs (#785) Signed-off-by: James Hewitt --- CHANGELOG.md | 4 ++++ docs/source/jobs.rst | 1 + lib/urlwatch/jobs.py | 5 ++++- lib/urlwatch/tests/data/disabled-job.yaml | 6 ++++++ lib/urlwatch/tests/test_handler.py | 21 +++++++++++++++++++++ lib/urlwatch/worker.py | 2 +- 6 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 lib/urlwatch/tests/data/disabled-job.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index cdfce836..9e96ff90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ The format mostly follows [Keep a Changelog](http://keepachangelog.com/en/1.0.0/ ## UNRELEASED +### Added + +- New `enabled` option for all jobs. Set to false to disable a job without needing to remove it or comment it out (Requested in #625 by snowman, contributed in #785 by jamstah) + ### Changed - Remove EOL'd Python 3.7 (new minimum requirement is Python 3.8), add Python 3.12 testing diff --git a/docs/source/jobs.rst b/docs/source/jobs.rst index 67273a64..e2c51f52 100644 --- a/docs/source/jobs.rst +++ b/docs/source/jobs.rst @@ -176,6 +176,7 @@ Optional keys for all job types - ``compared_versions``: Number of versions to compare for similarity - ``kind`` (redundant): Either ``url``, ``shell`` or ``browser``. Automatically derived from the unique key (``url``, ``command`` or ``navigate``) of the job type - ``user_visible_url``: Different URL to show in reports (e.g. when watched URL is a REST API URL, and you want to show a webpage) +- ``enabled``: Can be set to false to disable an individual job (default is ``true``) Setting keys for all jobs at once diff --git a/lib/urlwatch/jobs.py b/lib/urlwatch/jobs.py index f4db8217..d89f41f1 100644 --- a/lib/urlwatch/jobs.py +++ b/lib/urlwatch/jobs.py @@ -196,7 +196,7 @@ def ignore_error(self, exception): class Job(JobBase): __required__ = () - __optional__ = ('name', 'filter', 'max_tries', 'diff_tool', 'compared_versions', 'diff_filter', 'treat_new_as_changed', 'user_visible_url') + __optional__ = ('name', 'filter', 'max_tries', 'diff_tool', 'compared_versions', 'diff_filter', 'enabled', 'treat_new_as_changed', 'user_visible_url') # determine if hyperlink "a" tag is used in HtmlReporter def location_is_url(self): @@ -205,6 +205,9 @@ def location_is_url(self): def pretty_name(self): return self.name if self.name else self.get_location() + def is_enabled(self): + return self.enabled is None or self.enabled + class ShellJob(Job): """Run a shell command and get its standard output""" diff --git a/lib/urlwatch/tests/data/disabled-job.yaml b/lib/urlwatch/tests/data/disabled-job.yaml new file mode 100644 index 00000000..8b550c3a --- /dev/null +++ b/lib/urlwatch/tests/data/disabled-job.yaml @@ -0,0 +1,6 @@ +name: "1" +url: "|echo job 1" +enabled: false +--- +name: "2" +url: "|echo job 2" diff --git a/lib/urlwatch/tests/test_handler.py b/lib/urlwatch/tests/test_handler.py index 7886acc9..8d90cbdd 100644 --- a/lib/urlwatch/tests/test_handler.py +++ b/lib/urlwatch/tests/test_handler.py @@ -122,6 +122,27 @@ def test_run_watcher(): cache_storage.close() +def test_disabled_job(): + with teardown_func(): + urls = os.path.join(here, 'data', 'disabled-job.yaml') + config = os.path.join(here, 'data', 'urlwatch.yaml') + cache = os.path.join(here, 'data', 'cache.db') + hooks = '' + + config_storage = YamlConfigStorage(config) + urls_storage = UrlsYaml(urls) + cache_storage = CacheMiniDBStorage(cache) + try: + urlwatch_config = ConfigForTest(config, urls, cache, hooks, True) + + urlwatcher = Urlwatch(urlwatch_config, config_storage, cache_storage, urls_storage) + urlwatcher.run_jobs() + + assert len(urlwatcher.report.job_states) == 1 + finally: + cache_storage.close() + + def test_unserialize_shell_job_without_kind(): job = JobBase.unserialize({ 'name': 'hoho', diff --git a/lib/urlwatch/worker.py b/lib/urlwatch/worker.py index 8a7ea8c4..23e710b7 100644 --- a/lib/urlwatch/worker.py +++ b/lib/urlwatch/worker.py @@ -55,7 +55,7 @@ def run_jobs(urlwatcher): raise ValueError(f'All job indices must be between 1 and {len(urlwatcher.jobs)}: {urlwatcher.urlwatch_config.joblist}') cache_storage = urlwatcher.cache_storage jobs = [job.with_defaults(urlwatcher.config_storage.config) - for (idx, job) in enumerate(urlwatcher.jobs) if ((idx + 1) in urlwatcher.urlwatch_config.joblist or (not urlwatcher.urlwatch_config.joblist))] + for (idx, job) in enumerate(urlwatcher.jobs) if job.is_enabled() and ((idx + 1) in urlwatcher.urlwatch_config.joblist or (not urlwatcher.urlwatch_config.joblist))] report = urlwatcher.report logger.debug('Processing %d jobs (out of %d)', len(jobs), len(urlwatcher.jobs))