From 8fd74a94749c998a6895c7824f5737e49b956bbf Mon Sep 17 00:00:00 2001 From: Guillaume Mulocher Date: Thu, 4 Apr 2024 14:50:01 +0200 Subject: [PATCH] doc: Add troubleshooting section (#620) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * doc: Add troubleshooting section * doc: add FAQ about timeouts --------- Co-authored-by: Matthieu Tâche --- anta/decorators.py | 8 ++-- anta/device.py | 2 +- anta/inventory/__init__.py | 36 ++++++++--------- anta/logger.py | 6 +-- anta/models.py | 12 +++--- anta/tests/routing/bgp.py | 22 +++++------ anta/tests/routing/ospf.py | 6 +-- docs/faq.md | 19 ++++++++- docs/troubleshooting.md | 79 ++++++++++++++++++++++++++++++++++++++ mkdocs.yml | 7 +++- 10 files changed, 149 insertions(+), 48 deletions(-) create mode 100644 docs/troubleshooting.md diff --git a/anta/decorators.py b/anta/decorators.py index 76e16f56c..dc57e13ec 100644 --- a/anta/decorators.py +++ b/anta/decorators.py @@ -22,7 +22,7 @@ def deprecated_test(new_tests: list[str] | None = None) -> Callable[[F], F]: Args: ---- - new_tests (Optional[list[str]]): A list of new test classes that should replace the deprecated test. + new_tests: A list of new test classes that should replace the deprecated test. Returns ------- @@ -35,7 +35,7 @@ def decorator(function: F) -> F: Args: ---- - function (F): The test function to be decorated. + function: The test function to be decorated. Returns ------- @@ -66,7 +66,7 @@ def skip_on_platforms(platforms: list[str]) -> Callable[[F], F]: Args: ---- - platforms (list[str]): List of hardware models on which the test should be skipped. + platforms: List of hardware models on which the test should be skipped. Returns ------- @@ -79,7 +79,7 @@ def decorator(function: F) -> F: Args: ---- - function (F): The test function to be decorated. + function: The test function to be decorated. Returns ------- diff --git a/anta/device.py b/anta/device.py index 1fd5b5ca0..1e5fe47b6 100644 --- a/anta/device.py +++ b/anta/device.py @@ -251,7 +251,7 @@ def __init__( port: eAPI port. Defaults to 80 is proto is 'http' or 443 if proto is 'https'. ssh_port: SSH port. tags: Tags for this device. - timeout: Timeout value in seconds for outgoing connections. Default to 10 secs. + timeout: Timeout value in seconds for outgoing connections. insecure: Disable SSH Host Key validation. proto: eAPI protocol. Value can be 'http' or 'https'. disable_cache: Disable caching for all commands for this device. diff --git a/anta/inventory/__init__.py b/anta/inventory/__init__.py index e0462fdd4..2efba99a6 100644 --- a/anta/inventory/__init__.py +++ b/anta/inventory/__init__.py @@ -46,7 +46,7 @@ def _update_disable_cache(kwargs: dict[str, Any], *, inventory_disable_cache: bo Args: ---- - inventory_disable_cache (bool): The value of disable_cache in the inventory + inventory_disable_cache: The value of disable_cache in the inventory kwargs: The kwargs to instantiate the device """ @@ -64,9 +64,9 @@ def _parse_hosts( Args: ---- - inventory_input (AntaInventoryInput): AntaInventoryInput used to parse the devices - inventory (AntaInventory): AntaInventory to add the parsed devices to - **kwargs (dict[str, Any]): Additional keyword arguments to pass to the device constructor + inventory_input: AntaInventoryInput used to parse the devices + inventory: AntaInventory to add the parsed devices to + **kwargs: Additional keyword arguments to pass to the device constructor """ if inventory_input.hosts is None: @@ -93,9 +93,9 @@ def _parse_networks( Args: ---- - inventory_input (AntaInventoryInput): AntaInventoryInput used to parse the devices - inventory (AntaInventory): AntaInventory to add the parsed devices to - **kwargs (dict[str, Any]): Additional keyword arguments to pass to the device constructor + inventory_input: AntaInventoryInput used to parse the devices + inventory: AntaInventory to add the parsed devices to + **kwargs: Additional keyword arguments to pass to the device constructor Raises ------ @@ -126,9 +126,9 @@ def _parse_ranges( Args: ---- - inventory_input (AntaInventoryInput): AntaInventoryInput used to parse the devices - inventory (AntaInventory): AntaInventory to add the parsed devices to - **kwargs (dict[str, Any]): Additional keyword arguments to pass to the device constructor + inventory_input: AntaInventoryInput used to parse the devices + inventory: AntaInventory to add the parsed devices to + **kwargs: Additional keyword arguments to pass to the device constructor Raises ------ @@ -177,14 +177,14 @@ def parse( Args: ---- - filename (str): Path to device inventory YAML file - username (str): Username to use to connect to devices - password (str): Password to use to connect to devices - enable (bool): Whether or not the commands need to be run in enable mode towards the devices - enable_password (str, optional): Enable password to use if required - timeout (float, optional): timeout in seconds for every API call. - insecure (bool): Disable SSH Host Key validation - disable_cache (bool): Disable cache globally + filename: Path to device inventory YAML file + username: Username to use to connect to devices + password: Password to use to connect to devices + enable: Whether or not the commands need to be run in enable mode towards the devices + enable_password: Enable password to use if required + timeout: timeout in seconds for every API call. + insecure: Disable SSH Host Key validation + disable_cache: Disable cache globally Raises ------ diff --git a/anta/logger.py b/anta/logger.py index a9e59608b..07c7afdea 100644 --- a/anta/logger.py +++ b/anta/logger.py @@ -99,9 +99,9 @@ def anta_log_exception(exception: BaseException, message: str | None = None, cal Args: ---- - exception (BaseException): The Exception being logged - message (str): An optional message - calling_logger (logging.Logger): A logger to which the exception should be logged. If not present, the logger in this file is used. + exception: The Exception being logged + message: An optional message + calling_logger: A logger to which the exception should be logged. If not present, the logger in this file is used. """ if calling_logger is None: diff --git a/anta/models.py b/anta/models.py index 9733b72d7..4cb75c7f1 100644 --- a/anta/models.py +++ b/anta/models.py @@ -60,10 +60,10 @@ class AntaTemplate(BaseModel): Attributes ---------- template: Python f-string. Example: 'show vlan {vlan_id}' - version: eAPI version - valid values are 1 or "latest" - default is "latest" + version: eAPI version - valid values are 1 or "latest". revision: Revision of the command. Valid values are 1 to 99. Revision has precedence over version. - ofmt: eAPI output - json or text - default is json - use_cache: Enable or disable caching for this AntaTemplate if the AntaDevice supports it - default is True + ofmt: eAPI output - json or text. + use_cache: Enable or disable caching for this AntaTemplate if the AntaDevice supports it. """ @@ -120,14 +120,14 @@ class AntaCommand(BaseModel): Attributes ---------- command: Device command - version: eAPI version - valid values are 1 or "latest" - default is "latest" + version: eAPI version - valid values are 1 or "latest". revision: eAPI revision of the command. Valid values are 1 to 99. Revision has precedence over version. - ofmt: eAPI output - json or text - default is json + ofmt: eAPI output - json or text. output: Output of the command populated by the collect() function template: AntaTemplate object used to render this command params: Dictionary of variables with string values to render the template errors: If the command execution fails, eAPI returns a list of strings detailing the error - use_cache: Enable or disable caching for this AntaCommand if the AntaDevice supports it - default is True + use_cache: Enable or disable caching for this AntaCommand if the AntaDevice supports it. """ diff --git a/anta/tests/routing/bgp.py b/anta/tests/routing/bgp.py index 140f99567..571e86925 100644 --- a/anta/tests/routing/bgp.py +++ b/anta/tests/routing/bgp.py @@ -26,11 +26,11 @@ def _add_bgp_failures(failures: dict[tuple[str, str | None], dict[str, Any]], af Args: ---- - failures (dict): The dictionary to which the failure will be added. - afi (Afi): The address family identifier. - vrf (str): The VRF name. - safi (Safi, optional): The subsequent address family identifier. - issue (Any): A description of the issue. Can be of any type. + failures: The dictionary to which the failure will be added. + afi: The address family identifier. + vrf: The VRF name. + safi: The subsequent address family identifier. + issue: A description of the issue. Can be of any type. Example: ------- @@ -65,7 +65,7 @@ def _check_peer_issues(peer_data: dict[str, Any] | None) -> dict[str, Any]: Args: ---- - peer_data (dict, optional): The BGP peer data dictionary nested in the `show bgp summary` command. + peer_data: The BGP peer data dictionary nested in the `show bgp summary` command. Returns ------- @@ -110,11 +110,11 @@ def _add_bgp_routes_failure( Args: ---- - bgp_routes (list[str]): The list of expected routes. - bgp_output (dict[str, Any]): The BGP output from the device. - peer (str): The IP address of the BGP peer. - vrf (str): The name of the VRF for which the routes need to be verified. - route_type (str, optional): The type of BGP routes. Defaults to 'advertised_routes'. + bgp_routes: The list of expected routes. + bgp_output: The BGP output from the device. + peer: The IP address of the BGP peer. + vrf: The name of the VRF for which the routes need to be verified. + route_type: The type of BGP routes. Defaults to 'advertised_routes'. Returns ------- diff --git a/anta/tests/routing/ospf.py b/anta/tests/routing/ospf.py index d8155a32b..5910bf04e 100644 --- a/anta/tests/routing/ospf.py +++ b/anta/tests/routing/ospf.py @@ -20,7 +20,7 @@ def _count_ospf_neighbor(ospf_neighbor_json: dict[str, Any]) -> int: Args: ---- - ospf_neighbor_json (dict[str, Any]): The JSON output of the `show ip ospf neighbor` command. + ospf_neighbor_json: The JSON output of the `show ip ospf neighbor` command. Returns ------- @@ -39,7 +39,7 @@ def _get_not_full_ospf_neighbors(ospf_neighbor_json: dict[str, Any]) -> list[dic Args: ---- - ospf_neighbor_json (dict[str, Any]): The JSON output of the `show ip ospf neighbor` command. + ospf_neighbor_json: The JSON output of the `show ip ospf neighbor` command. Returns ------- @@ -65,7 +65,7 @@ def _get_ospf_max_lsa_info(ospf_process_json: dict[str, Any]) -> list[dict[str, Args: ---- - ospf_process_json (dict[str, Any]): OSPF process information in JSON format. + ospf_process_json: OSPF process information in JSON format. Returns ------- diff --git a/docs/faq.md b/docs/faq.md index 04db24d58..7d254612a 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -23,6 +23,23 @@ toc_depth: 4 # Frequently Asked Questions (FAQ) +## `Timeout` error in the logs +???+ faq "`Timeout` error in the logs" + + When running ANTA, you can receive `Timeout` errors in the logs (could be ReadTimeout, WriteTimeout, ConnectTimeout, PoolTimeout). More details on the timeouts of the underlying library are available here: https://www.python-httpx.org/advanced/timeouts/ + + This might be due to the time the host on which ANTA is run takes to reach the target devices (for instance if going through firewalls, NATs, ...) or when a lot of tests are being run at the same time on a device (eAPI has a queue mechanism to avoid exhausting EOS resources because of a high number of simultaneous eAPI requests). + + ### Solution + + Use the `timeout` option. As an example for the `nrfu` command: + + ```bash + anta nrfu --enable --username username --password arista --inventory inventory.yml -c nrfu.yml --timeout 50 text + ``` + + The previous command set a couple of options for ANTA NRFU, one them being the `timeout` command, by default, when running ANTA from CLI, it is set to 30s. + The timeout is increased to 50s to allow ANTA to wait for API calls a little longer. ## `ImportError` related to `urllib3` ???+ faq "`ImportError` related to `urllib3` when running ANTA" @@ -88,4 +105,4 @@ toc_depth: 4 # Still facing issues? -If you've tried the above solutions and continue to experience problems, please report the issue in our [GitHub repository](https://github.com/arista-netdevops-community/anta). +If you've tried the above solutions and continue to experience problems, please follow the [troubleshooting](../troubleshooting) instructions and report the issue in our [GitHub repository](https://github.com/arista-netdevops-community/anta). diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md new file mode 100644 index 000000000..596aad67c --- /dev/null +++ b/docs/troubleshooting.md @@ -0,0 +1,79 @@ + + +# Troubleshooting ANTA + +A couple of things to check when hitting an issue with ANTA: + +```mermaid +flowchart LR + A>Hitting an issue with ANTA] --> B{Is my issue
listed in the FAQ?} + B -- Yes --> C{Does the FAQ solution<
works for me?} + C -- Yes --> V(((Victory))) + B -->|No| E{Is my problem
mentioned in one<
of the open issues?} + C -->|No| E + E -- Yes --> F{Has the issue been
fixed in a newer
release or in main?} + F -- Yes --> U[Upgrade] + E -- No ---> H((Follow the steps below
and open a Github issue)) + U --> I{Did it fix
your problem} + I -- Yes --> V + I -- No --> H + F -- No ----> G((Add a comment on the
issue indicating you
are hitting this and
describing your setup
and adding your logs.)) + + click B "../faq" "FAQ" + click E "https://github.com/arista-netdevops-community/anta/issues" + click H "https://github.com/arista-netdevops-community/anta/issues" + style A stroke:#f00,stroke-width:2px +``` + +## Capturing logs + +To help document the issue in Github, it is important to capture some logs so the developers can understand what is affecting your system. No logs mean that the first question asked on the issue will probably be _"Can you share some logs please?"_. + +ANTA provides very verbose logs when using the `DEBUG` level. When using DEBUG log level with a log file, the DEBUG logging level is not sent to stdout, but only to the file. + +!!! danger + + On real deployments, do not use DEBUG logging level without setting a log file at the same time. + +To save the logs to a file called `anta.log`, use the following flags: + +```bash +# Where ANTA_COMMAND is one of nrfu, debug, get, exec, check +anta -l DEBUG –log-file anta.log +``` + +See `anta --help` for more information. These have to precede the `nrfu` cmd. + +!!! tip + + Remember that in ANTA, each level of command has its own options and they can only be set at this level. + so the `-l` and `--log-file` MUST be between `anta` and the `ANTA_COMMAND`. + similarly, all the `nrfu` options MUST be set between the `nrfu` and the `ANTA_NRFU_SUBCOMMAND` (`json`, `text`, `table` or `tpl-report`). + + +As an example, for the `nrfu` command, it would look like: + +```bash +anta -l DEBUG --log-file anta.log nrfu --enable --username username --password arista --inventory inventory.yml -c nrfu.yml text +``` + + +### `ANTA_DEBUG` environment variable + +??? warning + + Do not use this if you do not know why. This produces a lot of logs and can create confusion if you do not know what to look for. + +The environment variable `ANTA_DEBUG=true` enable ANTA Debug Mode. + +This flag is used by various functions in ANTA: when set to true, the function will display or log more information. In particular, when an Exception occurs in the code and this variable is set, the logging function used by ANTA is different to also produce the Python traceback for debugging. This typically needs to be done when opening a GitHub issue and an Exception is seen at runtime. + +Example: + +```bash +ANTA_DEBUG=true anta -l DEBUG --log-file anta.log nrfu --enable --username username --password arista --inventory inventory.yml -c nrfu.yml text +``` diff --git a/mkdocs.yml b/mkdocs.yml index eda1d1fe8..784ade5a2 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -129,7 +129,11 @@ markdown_extensions: - pymdownx.magiclink - pymdownx.mark - pymdownx.smartsymbols - - pymdownx.superfences + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format - pymdownx.tasklist: custom_checkbox: true - pymdownx.tilde @@ -212,5 +216,6 @@ nav: - Result Manager module: api/result_manager.md - Result Manager models: api/result_manager_models.md - Report Manager: api/report_manager.md + - Troubleshooting: troubleshooting.md - Contributions: contribution.md - FAQ: faq.md