From ca57aac71ccc3eac17d61741fed7173c06e656cd Mon Sep 17 00:00:00 2001 From: Tomas Bajer Date: Sun, 29 Oct 2023 21:10:40 +0100 Subject: [PATCH] tbajer-retry-functionality-for-podman --- tmt/steps/provision/podman.py | 48 +++++++++++++++++++++++++++++++---- tmt/utils.py | 37 +++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 5 deletions(-) diff --git a/tmt/steps/provision/podman.py b/tmt/steps/provision/podman.py index 0ff59af0cc..6a8a032fe9 100644 --- a/tmt/steps/provision/podman.py +++ b/tmt/steps/provision/podman.py @@ -9,7 +9,7 @@ import tmt.steps import tmt.steps.provision import tmt.utils -from tmt.utils import Command, Path, ShellScript, field +from tmt.utils import Command, Path, ShellScript, field, retry # Timeout in seconds of waiting for a connection CONNECTION_TIMEOUT = 60 @@ -17,6 +17,8 @@ # Defaults DEFAULT_IMAGE = "fedora" DEFAULT_USER = "root" +DEFAULT_PULL_ATTEMPTS = 5 +DEFAULT_PULL_INTERVAL = 5 @dataclasses.dataclass @@ -44,6 +46,26 @@ class PodmanGuestData(tmt.steps.provision.GuestData): metavar='NAME', help='Name or id of an existing container to be used.') + pull_attempts: Optional[int] = field( + default=DEFAULT_PULL_ATTEMPTS, + option='--pull-attempts', + metavar='ATTEMPT', + help=f""" + How many times to try pulling the image, + {DEFAULT_PULL_ATTEMPTS} attempts by default. + """, + normalize=tmt.utils.normalize_int) + + pull_interval: Optional[int] = field( + default=DEFAULT_PULL_INTERVAL, + option='--pull-interval', + metavar='SECONDS', + help=f""" + How long to wait before a new pull attempt, + {DEFAULT_PULL_INTERVAL} seconds by default. + """, + normalize=tmt.utils.normalize_int) + @dataclasses.dataclass class ProvisionPodmanData(PodmanGuestData, tmt.steps.provision.ProvisionStepData): @@ -60,6 +82,9 @@ class GuestContainer(tmt.Guest): user: str force_pull: bool parent: tmt.steps.Step + pull_attempts: int + pull_interval: int + logger: tmt.log.Logger @property def is_ready(self) -> bool: @@ -79,6 +104,15 @@ def wake(self) -> None: self.debug( f"Waking up container '{self.container}'.", level=2, shift=0) + def pull_image(self) -> None: + """ Pull image if not available or pull forced """ + assert self.image is not None # narrow type + + self.podman( + Command('pull', '-q', self.image), + message=f"Pull image '{self.image}'." + ) + def start(self) -> None: """ Start provisioned guest """ if self.is_dry_run: @@ -95,11 +129,15 @@ def start(self) -> None: except tmt.utils.RunError: needs_pull = True - # Pull image if not available or pull forced + # Retry pulling the image in case of network issues + # Temporary solution until configurable in podman itself if needs_pull or self.force_pull: - self.podman( - Command('pull', '-q', self.image), - message=f"Pull image '{self.image}'." + retry( + self.pull_image, + self.pull_attempts, + self.pull_interval, + f"Pulling '{self.image}' image", + self._logger ) # Mount the whole plan directory in the container diff --git a/tmt/utils.py b/tmt/utils.py index 8bacd2f805..b7ccd04e59 100644 --- a/tmt/utils.py +++ b/tmt/utils.py @@ -1695,6 +1695,13 @@ def __init__( self.check_success = check_success +class RetryError(GeneralError): + """ Retries unsuccessful """ + + def __init__(self, label: str, exc: Optional[Exception]) -> None: + super().__init__(f"Retries of {label} unsuccessful. {exc}") + + # Step exceptions @@ -6183,3 +6190,33 @@ def __exit__(self, *args: Any) -> None: @property def duration(self) -> datetime.timedelta: return self.end_time - self.start_time + + +def retry( + func: Callable[..., T], + retries: int, + delay: int, + label: str, + logger: tmt.log.Logger, + *args: Optional[Any], + **kwargs: Optional[Any] + ) -> T: + """ Retry functionality to be used elsewhere in the code. + + :param func: function to be called with any amount + of arguments of Any type, returning the value of type TypeVar + :param retries: number of tries to call the function + :param delay: amount of seconds to wait before a new try + :param label: action to retry + """ + exc = None + for i in range(retries): + try: + return func(*args, **kwargs) + except Exception as e: + exc = e + logger.debug( + 'retry', + f"{label} failed, {retries - i} retries left, trying again in {delay:.2f} seconds") + time.sleep(delay) + raise RetryError(label, exc)