From 8865f32c02d812b64409c26c0d0aae4a6145c611 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Mon, 29 Aug 2022 12:47:35 +1200 Subject: [PATCH 1/2] fix: retry upto 3 times when InvalidIdentityTokenException happens InvalidIdentityTokenException seems to intermitently cause workflows to fail when trying to get_credentials. catch the failure and attempt to retry. --- scripts/aws/aws_helper.py | 13 +++++++++++++ scripts/gdal/gdal_helper.py | 5 ++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/scripts/aws/aws_helper.py b/scripts/aws/aws_helper.py index 534f64013..2c9346286 100644 --- a/scripts/aws/aws_helper.py +++ b/scripts/aws/aws_helper.py @@ -1,5 +1,6 @@ import json from os import environ +from time import sleep from typing import Any, Dict, List, NamedTuple, Optional from urllib.parse import urlparse @@ -21,6 +22,7 @@ client_sts = session.client("sts") bucket_config_path = environ.get("AWS_ROLE_CONFIG_PATH", "s3://linz-bucket-config/config.json") +role_retry_count = int(environ.get("AWS_ROLE_RETRY_COUNT", "3")) # Load bucket to roleArn mapping for LINZ internal buckets from SSM def _init_roles() -> None: @@ -78,6 +80,17 @@ def get_session(prefix: str) -> boto3.Session: get_log().info("role_assume", prefix=prefix, bucket=cfg.bucket, role_arn=cfg.roleArn) return current_session +def get_session_credentials(prefix: str, retry_count=3) -> boto3.Credentials: + """ + Attempt to get cretentials for a prefix, retrying upto retry_count amount of times + """ + for retry in range(retry_count): + try: + get_session(prefix).get_credentials() + except botocore.errorfactory.InvalidIdentityTokenException: + get_log().warn("bucket_load_retry", retry_count=retry) + sleep(0.5 * (retry + 1)) + def _get_credential_config(prefix: str) -> Optional[CredentialSource]: get_log().debug("get_credentials_bucket_name", prefix=prefix) diff --git a/scripts/gdal/gdal_helper.py b/scripts/gdal/gdal_helper.py index 5cfd5666b..35f4031e7 100644 --- a/scripts/gdal/gdal_helper.py +++ b/scripts/gdal/gdal_helper.py @@ -4,7 +4,7 @@ from linz_logger import get_log -from scripts.aws.aws_helper import get_session, is_s3 +from scripts.aws.aws_helper import is_s3 from scripts.logging.time_helper import time_in_ms @@ -60,8 +60,7 @@ def run_gdal( if input_file: if is_s3(input_file): # Set the credentials for GDAL to be able to read the input file - session = get_session(input_file) - credentials = session.get_credentials() + credentials = get_session_credentials(input_file) gdal_env["AWS_ACCESS_KEY_ID"] = credentials.access_key gdal_env["AWS_SECRET_ACCESS_KEY"] = credentials.secret_key gdal_env["AWS_SESSION_TOKEN"] = credentials.token From dcb9e466e045509ef13a9ad5e9c9a7d97a13a7e8 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Mon, 29 Aug 2022 15:45:48 +1200 Subject: [PATCH 2/2] fix: retry fetching credentials on failure --- scripts/aws/aws_helper.py | 32 +++++++++++++++++++++++++------- scripts/gdal/gdal_helper.py | 2 +- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/scripts/aws/aws_helper.py b/scripts/aws/aws_helper.py index 2c9346286..854361b66 100644 --- a/scripts/aws/aws_helper.py +++ b/scripts/aws/aws_helper.py @@ -1,4 +1,5 @@ import json +from dataclasses import dataclass from os import environ from time import sleep from typing import Any, Dict, List, NamedTuple, Optional @@ -22,7 +23,6 @@ client_sts = session.client("sts") bucket_config_path = environ.get("AWS_ROLE_CONFIG_PATH", "s3://linz-bucket-config/config.json") -role_retry_count = int(environ.get("AWS_ROLE_RETRY_COUNT", "3")) # Load bucket to roleArn mapping for LINZ internal buckets from SSM def _init_roles() -> None: @@ -80,16 +80,34 @@ def get_session(prefix: str) -> boto3.Session: get_log().info("role_assume", prefix=prefix, bucket=cfg.bucket, role_arn=cfg.roleArn) return current_session -def get_session_credentials(prefix: str, retry_count=3) -> boto3.Credentials: + +@dataclass +class AwsFrozenCredentials: """ - Attempt to get cretentials for a prefix, retrying upto retry_count amount of times + work around as I couldn't find the type for get_frozen_credentials() """ - for retry in range(retry_count): + + access_key: str + secret_key: str + token: str + + +def get_session_credentials(prefix: str, retry_count: int = 3) -> AwsFrozenCredentials: + """ + Attempt to get credentials for a prefix, retrying upto retry_count amount of times + """ + last_error: Exception = Exception(f"Invalid retry count: {retry_count}") + for retry in range(1, retry_count + 1): try: - get_session(prefix).get_credentials() - except botocore.errorfactory.InvalidIdentityTokenException: + # Get credentials may give differing access_key and secret_key + credentials: AwsFrozenCredentials = get_session(prefix).get_frozen_credentials() + return credentials + except client_sts.meta.client.exceptions.InvalidIdentityTokenException as e: get_log().warn("bucket_load_retry", retry_count=retry) - sleep(0.5 * (retry + 1)) + sleep(0.5 * retry) + last_error = e + + raise last_error def _get_credential_config(prefix: str) -> Optional[CredentialSource]: diff --git a/scripts/gdal/gdal_helper.py b/scripts/gdal/gdal_helper.py index 35f4031e7..277b0d78a 100644 --- a/scripts/gdal/gdal_helper.py +++ b/scripts/gdal/gdal_helper.py @@ -4,7 +4,7 @@ from linz_logger import get_log -from scripts.aws.aws_helper import is_s3 +from scripts.aws.aws_helper import get_session_credentials, is_s3 from scripts.logging.time_helper import time_in_ms