From 27dba711efdeeaeb9f1b06bf47334ec501a1eb6d Mon Sep 17 00:00:00 2001 From: Jan Willhaus Date: Mon, 1 Apr 2024 14:46:35 +0200 Subject: [PATCH] refactor: Improve config file parsing (#124) --- podcast_archiver/cli.py | 127 +++++++++++++++++++--------------- podcast_archiver/config.py | 34 ++++----- podcast_archiver/constants.py | 5 ++ podcast_archiver/download.py | 20 +++--- podcast_archiver/processor.py | 9 ++- tests/test_download.py | 22 +++--- tests/test_main.py | 4 +- 7 files changed, 124 insertions(+), 97 deletions(-) diff --git a/podcast_archiver/cli.py b/podcast_archiver/cli.py index bbaaf62..fbda238 100644 --- a/podcast_archiver/cli.py +++ b/podcast_archiver/cli.py @@ -1,23 +1,30 @@ +from __future__ import annotations + +import os import pathlib -from os import PathLike, getenv -from typing import Any, cast +import stat +from os import getenv +from typing import TYPE_CHECKING, Any import rich_click as click -from click.core import Context, Parameter from podcast_archiver import __version__ as version +from podcast_archiver import constants from podcast_archiver.base import PodcastArchiver -from podcast_archiver.config import DEFAULT_SETTINGS, Settings +from podcast_archiver.config import Settings from podcast_archiver.console import console -from podcast_archiver.constants import ENVVAR_PREFIX, PROG_NAME from podcast_archiver.exceptions import InvalidSettings from podcast_archiver.logging import configure_logging +if TYPE_CHECKING: + from click.shell_completion import CompletionItem + + click.rich_click.USE_RICH_MARKUP = True click.rich_click.USE_MARKDOWN = True click.rich_click.OPTIONS_PANEL_TITLE = "Miscellaneous Options" click.rich_click.OPTION_GROUPS = { - PROG_NAME: [ + constants.PROG_NAME: [ { "name": "Basic parameters", "options": [ @@ -46,54 +53,53 @@ } -class ConfigPath(click.Path): - def __init__(self) -> None: - return super().__init__( - exists=True, - readable=True, - file_okay=True, - dir_okay=False, - resolve_path=True, - path_type=pathlib.Path, - ) +class ConfigFile(click.ParamType): + name = "file" - def convert( # type: ignore[override] - self, value: str | PathLike[str], param: Parameter | None, ctx: Context | None - ) -> str | bytes | PathLike[str] | None: - if value is None: - return None - if ( - ctx - and param - and isinstance(value, pathlib.Path) - and value == param.get_default(ctx, call=True) - and not value.exists() - ): - try: + def _check_existence(self, value: pathlib.Path, param: click.Parameter | None, ctx: click.Context | None) -> None: + try: + st = value.stat() + except OSError: + if value == get_default_config_path(): value.parent.mkdir(exist_ok=True, parents=True) with value.open("w") as fp: Settings.generate_default_config(file=fp) - except (OSError, FileNotFoundError): - return None + return - filepath = cast(pathlib.Path, super().convert(value, param, ctx)) - if not ctx or ctx.resilient_parsing: - return filepath + self.fail(f"{self.name.title()} {click.format_filename(value)!r} does not exist.", param, ctx) - try: - ctx.default_map = ctx.default_map or {} - settings = Settings.load_from_yaml(filepath) - ctx.default_map.update(settings.model_dump(exclude_unset=True, exclude_none=True, by_alias=True)) - except InvalidSettings as exc: - self.fail(f"{self.name.title()} {click.format_filename(filepath)!r} is invalid: {exc}", param, ctx) + if not stat.S_ISREG(st.st_mode): + self.fail(f"{self.name.title()} {click.format_filename(value)!r} is not a file.", param, ctx) + + if not os.access(value, os.R_OK): + self.fail(f"{self.name.title()} {click.format_filename(value)!r} is not readable.", param, ctx) + + def convert( + self, value: str | pathlib.Path, param: click.Parameter | None, ctx: click.Context | None + ) -> pathlib.Path: + if isinstance(value, str): + value = pathlib.Path(value) + value = value.resolve() + self._check_existence(value, param, ctx) - return filepath + if ctx: + try: + settings = Settings.load_from_yaml(value) + ctx.default_map = settings.model_dump(exclude_unset=True, exclude_none=True) + except InvalidSettings as exc: + self.fail(f"{self.name.title()} {click.format_filename(value)!r} is invalid: {exc}", param, ctx) + return value + + def shell_complete(self, ctx: click.Context, param: click.Parameter, incomplete: str) -> list[CompletionItem]: + from click.shell_completion import CompletionItem + + return [CompletionItem(incomplete, type="file")] def get_default_config_path() -> pathlib.Path | None: if getenv("TESTING", "0").lower() in ("1", "true"): return None - return pathlib.Path(click.get_app_dir(PROG_NAME)) / "config.yaml" # pragma: no cover + return (pathlib.Path(click.get_app_dir(constants.PROG_NAME)) / "config.yaml").resolve() # pragma: no cover def generate_default_config(ctx: click.Context, param: click.Parameter, value: bool) -> None: @@ -106,7 +112,7 @@ def generate_default_config(ctx: click.Context, param: click.Parameter, value: b @click.command( context_settings={ - "auto_envvar_prefix": ENVVAR_PREFIX, + "auto_envvar_prefix": constants.ENVVAR_PREFIX, }, help="Archive all of your favorite podcasts", ) @@ -114,6 +120,7 @@ def generate_default_config(ctx: click.Context, param: click.Parameter, value: b @click.option( "-f", "--feed", + "feeds", multiple=True, show_envvar=True, help=Settings.model_fields["feeds"].description + " Use repeatedly for multiple feeds.", # type: ignore[operator] @@ -121,6 +128,14 @@ def generate_default_config(ctx: click.Context, param: click.Parameter, value: b @click.option( "-o", "--opml", + "opml_files", + type=click.Path( + exists=True, + readable=True, + dir_okay=False, + resolve_path=True, + path_type=pathlib.Path, + ), multiple=True, show_envvar=True, help=( @@ -131,6 +146,7 @@ def generate_default_config(ctx: click.Context, param: click.Parameter, value: b @click.option( "-d", "--dir", + "archive_directory", type=click.Path( exists=False, writable=True, @@ -141,7 +157,7 @@ def generate_default_config(ctx: click.Context, param: click.Parameter, value: b ), show_default=True, required=False, - default=DEFAULT_SETTINGS.archive_directory, + default=pathlib.Path("."), show_envvar=True, help=Settings.model_fields["archive_directory"].description, ) @@ -151,15 +167,15 @@ def generate_default_config(ctx: click.Context, param: click.Parameter, value: b type=str, show_default=True, required=False, - default=DEFAULT_SETTINGS.filename_template, + default=constants.DEFAULT_FILENAME_TEMPLATE, show_envvar=True, help=Settings.model_fields["filename_template"].description, ) @click.option( "-u", "--update", + "update_archive", type=bool, - default=DEFAULT_SETTINGS.update_archive, is_flag=True, show_envvar=True, help=Settings.model_fields["update_archive"].description, @@ -167,7 +183,6 @@ def generate_default_config(ctx: click.Context, param: click.Parameter, value: b @click.option( "--write-info-json", type=bool, - default=DEFAULT_SETTINGS.write_info_json, is_flag=True, show_envvar=True, help=Settings.model_fields["write_info_json"].description, @@ -176,7 +191,6 @@ def generate_default_config(ctx: click.Context, param: click.Parameter, value: b "-q", "--quiet", type=bool, - default=DEFAULT_SETTINGS.quiet, is_flag=True, show_envvar=True, help=Settings.model_fields["quiet"].description, @@ -185,14 +199,13 @@ def generate_default_config(ctx: click.Context, param: click.Parameter, value: b "-C", "--concurrency", type=int, - default=DEFAULT_SETTINGS.concurrency, + default=constants.DEFAULT_CONCURRENCY, show_envvar=True, help=Settings.model_fields["concurrency"].description, ) @click.option( "--debug-partial", type=bool, - default=DEFAULT_SETTINGS.debug_partial, is_flag=True, show_envvar=True, help=Settings.model_fields["debug_partial"].description, @@ -202,14 +215,13 @@ def generate_default_config(ctx: click.Context, param: click.Parameter, value: b "--verbose", count=True, show_envvar=True, - default=DEFAULT_SETTINGS.verbose, help=Settings.model_fields["verbose"].description, ) @click.option( "-S", "--slugify", + "slugify_paths", type=bool, - default=DEFAULT_SETTINGS.slugify_paths, is_flag=True, show_envvar=True, help=Settings.model_fields["slugify_paths"].description, @@ -217,15 +229,16 @@ def generate_default_config(ctx: click.Context, param: click.Parameter, value: b @click.option( "-m", "--max-episodes", + "maximum_episode_count", type=int, - default=DEFAULT_SETTINGS.maximum_episode_count, + default=0, help=Settings.model_fields["maximum_episode_count"].description, ) @click.version_option( version, "-V", "--version", - prog_name=PROG_NAME, + prog_name=constants.PROG_NAME, ) @click.option( "--config-generate", @@ -239,8 +252,8 @@ def generate_default_config(ctx: click.Context, param: click.Parameter, value: b @click.option( "-c", "--config", - type=ConfigPath(), - expose_value=False, + "config_path", + type=ConfigFile(), default=get_default_config_path, show_default=False, is_eager=True, @@ -272,4 +285,4 @@ def main(ctx: click.RichContext, /, **kwargs: Any) -> int: if __name__ == "__main__": - main.main(prog_name=PROG_NAME) + main.main(prog_name=constants.PROG_NAME) diff --git a/podcast_archiver/config.py b/podcast_archiver/config.py index df4ee2f..3336632 100644 --- a/podcast_archiver/config.py +++ b/podcast_archiver/config.py @@ -36,13 +36,11 @@ class Settings(BaseModel): feeds: list[AnyHttpUrl] = Field( default_factory=list, - alias="feed", description="Feed URLs to archive.", ) opml_files: list[UserExpandedFile] = Field( default_factory=list, - alias="opml", description=( "OPML files containing feed URLs to archive. OPML files can be exported from a variety of podcatchers." ), @@ -50,7 +48,6 @@ class Settings(BaseModel): archive_directory: UserExpandedDir = Field( default=UserExpandedDir("."), - alias="dir", description=( "Directory to which to download the podcast archive. " "By default, the archive will be created in the current working directory ('.')." @@ -59,7 +56,6 @@ class Settings(BaseModel): update_archive: bool = Field( default=False, - alias="update", description=( "Update the feeds with newly added episodes only. " "Adding episodes ends with the first episode already present in the download directory." @@ -68,31 +64,26 @@ class Settings(BaseModel): write_info_json: bool = Field( default=False, - alias="write_info_json", description="Write episode metadata to a .info.json file next to the media file itself.", ) quiet: bool = Field( default=False, - alias="quiet", description="Print only minimal progress information. Errors will always be emitted.", ) verbose: int = Field( default=0, - alias="verbose", description="Increase the level of verbosity while downloading.", ) slugify_paths: bool = Field( default=False, - alias="slugify", description="Format filenames in the most compatible way, replacing all special characters.", ) filename_template: str = Field( - alias="filename_template", - default="{show.title}/{episode.published_time:%Y-%m-%d} - {episode.title}.{ext}", + default=constants.DEFAULT_FILENAME_TEMPLATE, description=( "Template to be used when generating filenames. Available template variables are: " f"{ALL_FIELD_TITLES_STR}, and 'ext' (the filename extension)" @@ -101,7 +92,6 @@ class Settings(BaseModel): maximum_episode_count: int = Field( default=0, - alias="max_episodes", description=( "Only download the given number of episodes per podcast feed. " "Useful if you don't really need the entire backlog." @@ -110,16 +100,19 @@ class Settings(BaseModel): concurrency: int = Field( default=4, - alias="concurrency", description="Maximum number of simultaneous downloads.", ) debug_partial: bool = Field( default=False, - alias="debug_partial", description=f"Download only the first {constants.DEBUG_PARTIAL_SIZE} bytes of episodes for debugging purposes.", ) + config_path: FilePath | None = Field( + default=None, + exclude=True, + ) + @classmethod def load_from_dict(cls, value: dict[str, Any]) -> Settings: try: @@ -135,9 +128,13 @@ def load_from_yaml(cls, path: pathlib.Path) -> Settings: except YAMLError as exc: raise InvalidSettings("Not a valid YAML document") from exc - if content: - return cls.load_from_dict(content) - return cls() + content = content or {} + + if not isinstance(content, dict): + raise InvalidSettings("Not a valid YAML document") + + content.update(config_path=path) + return cls.load_from_dict(content) @classmethod def generate_default_config(cls, file: IO[Text] | None = None) -> None: @@ -150,6 +147,8 @@ def generate_default_config(cls, file: IO[Text] | None = None) -> None: ] for name, field in cls.model_fields.items(): + if name in ("config_path",): + continue cli_opt = ( wrapper.wrap(f"Equivalent command line option: --{field.alias.replace('_', '-')}") if field.alias @@ -175,6 +174,3 @@ def generate_default_config(cls, file: IO[Text] | None = None) -> None: @cached_property def filename_formatter(self) -> FilenameFormatter: return FilenameFormatter(self) - - -DEFAULT_SETTINGS = Settings() diff --git a/podcast_archiver/constants.py b/podcast_archiver/constants.py index 8458d9e..5a07b32 100644 --- a/podcast_archiver/constants.py +++ b/podcast_archiver/constants.py @@ -1,3 +1,4 @@ +import pathlib import re from podcast_archiver import __version__ @@ -13,3 +14,7 @@ DEBUG_PARTIAL_SIZE = DOWNLOAD_CHUNK_SIZE * 4 MAX_TITLE_LENGTH = 96 + +DEFAULT_ARCHIVE_DIRECTORY = pathlib.Path(".") +DEFAULT_FILENAME_TEMPLATE = "{show.title}/{episode.published_time:%Y-%m-%d} - {episode.title}.{ext}" +DEFAULT_CONCURRENCY = 4 diff --git a/podcast_archiver/download.py b/podcast_archiver/download.py index ac952eb..d4265d5 100644 --- a/podcast_archiver/download.py +++ b/podcast_archiver/download.py @@ -4,7 +4,6 @@ from typing import IO, TYPE_CHECKING, Any from podcast_archiver import constants -from podcast_archiver.config import DEFAULT_SETTINGS, Settings from podcast_archiver.enums import DownloadResult from podcast_archiver.logging import logger from podcast_archiver.session import session @@ -16,6 +15,7 @@ from requests import Response from rich import progress as rich_progress + from podcast_archiver.config import Settings from podcast_archiver.models import Episode, FeedInfo @@ -26,6 +26,9 @@ class DownloadJob: target: Path stop_event: Event + _debug_partial: bool + _write_info_json: bool + _progress: rich_progress.Progress | None = None _task_id: rich_progress.TaskID | None = None @@ -33,18 +36,19 @@ def __init__( self, episode: Episode, *, + target: Path, feed_info: FeedInfo, - settings: Settings = DEFAULT_SETTINGS, + debug_partial: bool = False, + write_info_json: bool = False, progress: rich_progress.Progress | None = None, stop_event: Event | None = None, ) -> None: self.episode = episode + self.target = target self.feed_info = feed_info - self.settings = settings + self._debug_partial = debug_partial + self._write_info_json = write_info_json self._progress = progress - self.target = self.settings.filename_formatter.format(episode=self.episode, feed_info=self.feed_info) - if settings.debug_partial: - self.target = self.target.with_suffix(".partial" + self.target.suffix) self.stop_event = stop_event or Event() self.init_progress() @@ -124,7 +128,7 @@ def receive_data(self, fp: IO[str], response: Response) -> bool: total_written += fp.write(chunk) self.update_progress(completed=total_written) - if self.settings.debug_partial and total_written >= constants.DEBUG_PARTIAL_SIZE: + if self._debug_partial and total_written >= constants.DEBUG_PARTIAL_SIZE: logger.debug("Partial download completed.") return True if self.stop_event.is_set(): @@ -134,7 +138,7 @@ def receive_data(self, fp: IO[str], response: Response) -> bool: return True def write_info_json(self) -> None: - if not self.settings.write_info_json: + if not self._write_info_json: return logger.info("Writing episode metadata to %s", self.infojsonfile.name) with atomic_write(self.infojsonfile) as fp: diff --git a/podcast_archiver/processor.py b/podcast_archiver/processor.py index 1caf526..1957fb7 100644 --- a/podcast_archiver/processor.py +++ b/podcast_archiver/processor.py @@ -14,6 +14,7 @@ from podcast_archiver.enums import DownloadResult, QueueCompletionType from podcast_archiver.logging import logger from podcast_archiver.models import Feed +from podcast_archiver.utils import FilenameFormatter if TYPE_CHECKING: from podcast_archiver.config import Settings @@ -40,12 +41,15 @@ class ProcessingResult: class FeedProcessor: settings: Settings + filename_formatter: FilenameFormatter + pool_executor: ThreadPoolExecutor progress: rich_progress.Progress stop_event: Event def __init__(self, settings: Settings) -> None: self.settings = settings + self.filename_formatter = FilenameFormatter(settings) self.pool_executor = ThreadPoolExecutor(max_workers=self.settings.concurrency) self.progress = rich_progress.Progress( *PROGRESS_COLUMNS, @@ -82,10 +86,13 @@ def process(self, url: AnyHttpUrl) -> ProcessingResult: def _process_episodes(self, feed: Feed) -> tuple[list[Future[DownloadResult]], QueueCompletionType]: futures: list[Future[DownloadResult]] = [] for idx, episode in enumerate(feed.episode_iter(self.settings.maximum_episode_count), 1): + target = self.filename_formatter.format(episode=episode, feed_info=feed.info) download_job = DownloadJob( episode, + target=target, feed_info=feed.info, - settings=self.settings, + debug_partial=self.settings.debug_partial, + write_info_json=self.settings.write_info_json, progress=self.progress, stop_event=self.stop_event, ) diff --git a/tests/test_download.py b/tests/test_download.py index 3158408..09f7916 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -2,6 +2,7 @@ import logging from functools import partial +from pathlib import Path from typing import TYPE_CHECKING, Any, Protocol from unittest import mock @@ -9,14 +10,11 @@ from requests import HTTPError from podcast_archiver import download, utils -from podcast_archiver.config import Settings from podcast_archiver.enums import DownloadResult from podcast_archiver.models import FeedPage from tests.conftest import MEDIA_URL if TYPE_CHECKING: - from pathlib import Path - from responses import RequestsMock @@ -29,7 +27,7 @@ def test_download_job(tmp_path_cd: Path, feedobj_lautsprecher: dict[str, Any]) - "update.return_value": None, } ) - job = download.DownloadJob(episode=episode, feed_info=feed.feed, progress=mock_progress) + job = download.DownloadJob(episode=episode, feed_info=feed.feed, target=Path("file.mp3"), progress=mock_progress) result = job() assert result == DownloadResult.COMPLETED_SUCCESSFULLY @@ -41,8 +39,8 @@ def test_download_already_exists(tmp_path_cd: Path, feedobj_lautsprecher_notcons feed = FeedPage.model_validate(feedobj_lautsprecher_notconsumed) episode = feed.episodes[0] - job = download.DownloadJob(episode=episode, feed_info=feed.feed) - job.target.parent.mkdir() + job = download.DownloadJob(episode=episode, feed_info=feed.feed, target=Path("file.mp3")) + job.target.parent.mkdir(exist_ok=True) job.target.touch() result = job() @@ -53,7 +51,7 @@ def test_download_aborted(tmp_path_cd: Path, feedobj_lautsprecher: dict[str, Any feed = FeedPage.model_validate(feedobj_lautsprecher) episode = feed.episodes[0] - job = download.DownloadJob(episode=episode, feed_info=feed.feed) + job = download.DownloadJob(episode=episode, feed_info=feed.feed, target=Path("file.mp3")) job.stop_event.set() result = job() @@ -86,7 +84,7 @@ def test_download_failed( if should_download: responses.add(responses.GET, MEDIA_URL, b"BLOB") - job = download.DownloadJob(episode=episode, feed_info=feed.feed) + job = download.DownloadJob(episode=episode, feed_info=feed.feed, target=Path("file.mp3")) with failure_mode(side_effect=side_effect), caplog.at_level(logging.ERROR): result = job() @@ -107,8 +105,12 @@ def test_download_failed( def test_download_info_json(tmp_path_cd: Path, feedobj_lautsprecher: dict[str, Any], write_info_json: bool) -> None: feed = FeedPage.model_validate(feedobj_lautsprecher) episode = feed.episodes[0] - settings = Settings(write_info_json=write_info_json) - job = download.DownloadJob(episode=episode, feed_info=feed.feed, settings=settings) + job = download.DownloadJob( + episode=episode, + feed_info=feed.feed, + target=tmp_path_cd / "file.mp3", + write_info_json=write_info_json, + ) result = job() assert result == DownloadResult.COMPLETED_SUCCESSFULLY diff --git a/tests/test_main.py b/tests/test_main.py index 2f63c1d..4418d2b 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -30,7 +30,7 @@ def test_main_nonexistent_dir(feed_lautsprecher_notconsumed: Url) -> None: def test_main_nonexistent_opml(tmp_path_cd: Path, feed_lautsprecher_notconsumed: Url) -> None: - with pytest.raises(click.BadParameter, match="Field 'opml.0': Path does not point to a file"): + with pytest.raises(click.BadParameter, match="File '/nonexistent.xml' does not exist."): cli.main(["--opml", "/nonexistent.xml"], standalone_mode=False) @@ -76,7 +76,7 @@ def test_main_config_file_invalid(tmp_path_cd: Path, feed_lautsprecher_notconsum configfile = tmp_path_cd / "configtmp.yaml" configfile.write_text("asdf bla") - with pytest.raises(click.BadParameter, match="File '.+/configtmp.yaml' is invalid"): + with pytest.raises(click.UsageError, match="Not a valid YAML document"): cli.main(["--config", str(configfile)], standalone_mode=False)