diff --git a/cou/apps/auxiliary.py b/cou/apps/auxiliary.py index 8c0f9290c..5a9a57cb9 100644 --- a/cou/apps/auxiliary.py +++ b/cou/apps/auxiliary.py @@ -15,7 +15,7 @@ import logging from typing import Optional -from cou.apps.base import OpenStackApplication +from cou.apps.base import LONG_IDLE_TIMEOUT, OpenStackApplication from cou.apps.factory import AppFactory from cou.exceptions import ApplicationError from cou.steps import PreUpgradeStep @@ -128,7 +128,7 @@ class RabbitMQServer(OpenStackAuxiliaryApplication): RabbitMQ must wait for the entire model to be idle before declaring the upgrade complete. """ - wait_timeout = 30 * 60 # 30 min + wait_timeout = LONG_IDLE_TIMEOUT wait_for_model = True @@ -136,7 +136,7 @@ class RabbitMQServer(OpenStackAuxiliaryApplication): class CephMonApplication(OpenStackAuxiliaryApplication): """Application for Ceph Monitor charm.""" - wait_timeout = 30 * 60 # 30 min + wait_timeout = LONG_IDLE_TIMEOUT wait_for_model = True def pre_upgrade_steps(self, target: OpenStackRelease) -> list[PreUpgradeStep]: @@ -189,7 +189,7 @@ class MysqlInnodbClusterApplication(OpenStackAuxiliaryApplication): # NOTE(agileshaw): holding 'mysql-server-core-8.0' package prevents undesired # mysqld processes from restarting, which lead to outages packages_to_hold: Optional[list] = ["mysql-server-core-8.0"] - wait_timeout = 30 * 60 # 30 min + wait_timeout = LONG_IDLE_TIMEOUT # NOTE (gabrielcocenza): Although CephOSD class is empty now, it will be diff --git a/cou/apps/base.py b/cou/apps/base.py index 06a883e64..0ff40d4db 100644 --- a/cou/apps/base.py +++ b/cou/apps/base.py @@ -16,6 +16,7 @@ from __future__ import annotations import logging +import os from collections import defaultdict from dataclasses import dataclass, field from io import StringIO @@ -46,7 +47,10 @@ logger = logging.getLogger(__name__) -DEFAULT_WAITING_TIMEOUT = 5 * 60 # 5 min +STANDARD_IDLE_TIMEOUT: int = int( + os.environ.get("COU_STANDARD_IDLE_TIMEOUT", 5 * 60) +) # default of 5 min +LONG_IDLE_TIMEOUT: int = int(os.environ.get("COU_LONG_IDLE_TIMEOUT", 30 * 60)) # default of 30 min @dataclass @@ -105,7 +109,7 @@ class OpenStackApplication: origin_setting: Optional[str] = None units: list[ApplicationUnit] = field(default_factory=lambda: []) packages_to_hold: Optional[list] = field(default=None, init=False) - wait_timeout: int = field(default=DEFAULT_WAITING_TIMEOUT, init=False) + wait_timeout: int = field(default=STANDARD_IDLE_TIMEOUT, init=False) wait_for_model: bool = field(default=False, init=False) # waiting only for application itself def __post_init__(self) -> None: diff --git a/cou/apps/core.py b/cou/apps/core.py index 1acf9e71c..c153f3597 100644 --- a/cou/apps/core.py +++ b/cou/apps/core.py @@ -15,7 +15,7 @@ """Core application class.""" import logging -from cou.apps.base import OpenStackApplication +from cou.apps.base import LONG_IDLE_TIMEOUT, OpenStackApplication from cou.apps.factory import AppFactory logger = logging.getLogger(__name__) @@ -28,7 +28,7 @@ class Keystone(OpenStackApplication): Keystone must wait for the entire model to be idle before declaring the upgrade complete. """ - wait_timeout = 30 * 60 # 30 min + wait_timeout = LONG_IDLE_TIMEOUT wait_for_model = True @@ -39,4 +39,4 @@ class Octavia(OpenStackApplication): Octavia required more time to settle before COU can continue. """ - wait_timeout = 30 * 60 # 30 min + wait_timeout = LONG_IDLE_TIMEOUT diff --git a/docs/how-to/different-model.rst b/docs/how-to/different-model.rst index ac05386f5..29a9be63d 100644 --- a/docs/how-to/different-model.rst +++ b/docs/how-to/different-model.rst @@ -29,5 +29,5 @@ configuration files. .. LINKS .. _python-libjuju: https://github.com/juju/python-libjuju -.. _Juju environment variables: https://juju.is/docs/juju/environment-variables#heading--jujudata +.. _Juju environment variables: https://juju.is/docs/juju/environment-variables .. _JUJU_DATA: https://juju.is/docs/juju/environment-variables#heading--jujudata diff --git a/docs/reference/environment-variables.rst b/docs/reference/environment-variables.rst index 9dba5d55d..7601dc7dc 100644 --- a/docs/reference/environment-variables.rst +++ b/docs/reference/environment-variables.rst @@ -6,3 +6,5 @@ Environment Variables * **COU_TIMEOUT** - define timeout for **COU** retry policy. Default value is 10 seconds. * **COU_MODEL_RETRIES** - define how many times to retry the connection to Juju model before giving up. Default value is 5 times. * **COU_MODEL_RETRY_BACKOFF** - define number of seconds to increase the wait between connection to the Juju model retry attempts. Default value is 2 seconds. +* **COU_STANDARD_IDLE_TIMEOUT** - how long COU will wait for an application to settle to active/idle and declare the upgrade complete. The default value is 300 seconds. +* **COU_LONG_IDLE_TIMEOUT** - a longer version of COU_STANDARD_IDLE_TIMEOUT for applications that are known to need more time than usual to upgrade like such as Keystone and Octavia. The default value is 1800 seconds.