From 2ab2ae8849bf6d80a700b1b74cef37eb187161ad Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Wed, 22 Dec 2021 17:52:20 +0100 Subject: [PATCH] Fix backwards compatibility issue in AWS provider's _get_credentials (#20463) The #19815 change introduced backwards incompatibility for the _get_credentials method - which is a centerpiece of AWS provider and is likely to be overwritten by the user who want for example inject auditing or other credentials-related custom beheviours when interfacing with AWS even if the method is protected. The change added default for region, which caused signature incompatibility with such derived classes. Unfortunately, we already released 2.5.0 provider with this change. We had to yank it and in order to avoid adding backwards-incompatible 3.0.0 release we are going to release 2.5.1 with this change included. Fixes: #20457 --- airflow/providers/amazon/aws/hooks/base_aws.py | 18 ++++++++---------- airflow/providers/amazon/aws/hooks/glue.py | 2 +- airflow/providers/amazon/aws/hooks/s3.py | 8 ++++++-- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/airflow/providers/amazon/aws/hooks/base_aws.py b/airflow/providers/amazon/aws/hooks/base_aws.py index c11f0f5c92f9d..8f9db323c9440 100644 --- a/airflow/providers/amazon/aws/hooks/base_aws.py +++ b/airflow/providers/amazon/aws/hooks/base_aws.py @@ -406,10 +406,7 @@ def __init__( if not (self.client_type or self.resource_type): raise AirflowException('Either client_type or resource_type must be provided.') - def _get_credentials( - self, - region_name: Optional[str] = None, - ) -> Tuple[boto3.session.Session, Optional[str]]: + def _get_credentials(self, region_name: Optional[str]) -> Tuple[boto3.session.Session, Optional[str]]: if not self.aws_conn_id: session = boto3.session.Session(region_name=region_name) @@ -456,7 +453,7 @@ def get_client_type( config: Optional[Config] = None, ) -> boto3.client: """Get the underlying boto3 client using boto3 session""" - session, endpoint_url = self._get_credentials(region_name) + session, endpoint_url = self._get_credentials(region_name=region_name) if client_type: warnings.warn( @@ -481,7 +478,7 @@ def get_resource_type( config: Optional[Config] = None, ) -> boto3.resource: """Get the underlying boto3 resource using boto3 session""" - session, endpoint_url = self._get_credentials(region_name) + session, endpoint_url = self._get_credentials(region_name=region_name) if resource_type: warnings.warn( @@ -530,7 +527,7 @@ def get_conn(self) -> Union[boto3.client, boto3.resource]: def get_session(self, region_name: Optional[str] = None) -> boto3.session.Session: """Get the underlying boto3.session.""" - session, _ = self._get_credentials(region_name) + session, _ = self._get_credentials(region_name=region_name) return session def get_credentials(self, region_name: Optional[str] = None) -> ReadOnlyCredentials: @@ -539,24 +536,25 @@ def get_credentials(self, region_name: Optional[str] = None) -> ReadOnlyCredenti This contains the following authentication attributes: access_key, secret_key and token. """ - session, _ = self._get_credentials(region_name) + session, _ = self._get_credentials(region_name=region_name) # Credentials are refreshable, so accessing your access key and # secret key separately can lead to a race condition. # See https://stackoverflow.com/a/36291428/8283373 return session.get_credentials().get_frozen_credentials() - def expand_role(self, role: str) -> str: + def expand_role(self, role: str, region_name: Optional[str] = None) -> str: """ If the IAM role is a role name, get the Amazon Resource Name (ARN) for the role. If IAM role is already an IAM role ARN, no change is made. :param role: IAM role name or ARN + :param region_name: Optional region name to get credentials for :return: IAM role ARN """ if "/" in role: return role else: - session, endpoint_url = self._get_credentials() + session, endpoint_url = self._get_credentials(region_name=region_name) _client = session.client('iam', endpoint_url=endpoint_url, config=self.config, verify=self.verify) return _client.get_role(RoleName=role)["Role"]["Arn"] diff --git a/airflow/providers/amazon/aws/hooks/glue.py b/airflow/providers/amazon/aws/hooks/glue.py index 5a2c4b101f590..3fbb1fa7555b9 100644 --- a/airflow/providers/amazon/aws/hooks/glue.py +++ b/airflow/providers/amazon/aws/hooks/glue.py @@ -101,7 +101,7 @@ def list_jobs(self) -> List: def get_iam_execution_role(self) -> Dict: """:return: iam role for job execution""" - session, endpoint_url = self._get_credentials(self.region_name) + session, endpoint_url = self._get_credentials(region_name=self.region_name) iam_client = session.client('iam', endpoint_url=endpoint_url, config=self.config, verify=self.verify) try: diff --git a/airflow/providers/amazon/aws/hooks/s3.py b/airflow/providers/amazon/aws/hooks/s3.py index 6ca4b41f66085..141487042dfb8 100644 --- a/airflow/providers/amazon/aws/hooks/s3.py +++ b/airflow/providers/amazon/aws/hooks/s3.py @@ -173,7 +173,9 @@ def get_bucket(self, bucket_name: Optional[str] = None) -> str: :return: the bucket object to the bucket name. :rtype: boto3.S3.Bucket """ - session, endpoint_url = self._get_credentials() + # Buckets have no regions, and we cannot remove the region name from _get_credentials as we would + # break compatibility, so we set it explicitly to None. + session, endpoint_url = self._get_credentials(region_name=None) s3_resource = session.resource( "s3", endpoint_url=endpoint_url, @@ -346,7 +348,9 @@ def get_key(self, key: str, bucket_name: Optional[str] = None) -> S3Transfer: :return: the key object from the bucket :rtype: boto3.s3.Object """ - session, endpoint_url = self._get_credentials() + # Buckets have no regions, and we cannot remove the region name from _get_credentials as we would + # break compatibility, so we set it explicitly to None. + session, endpoint_url = self._get_credentials(region_name=None) s3_resource = session.resource( "s3", endpoint_url=endpoint_url,