diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 12bb35181b..bfe1e62fac 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -218,6 +218,19 @@ catalog: table-name: iceberg ``` +If you prefer to pass the credentials explicitly to the client instead of relying on environment variables, + +```yaml +catalog: + default: + type: dynamodb + table-name: iceberg + aws_access_key_id: + aws_secret_access_key: + aws_session_token: + region_name: +``` + # Concurrency PyIceberg uses multiple threads to parallelize operations. The number of workers can be configured by supplying a `max-workers` entry in the configuration file, or by setting the `PYICEBERG_MAX_WORKERS` environment variable. The default value depends on the system hardware and Python version. See [the Python documentation](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor) for more details. diff --git a/mkdocs/docs/contributing.md b/mkdocs/docs/contributing.md index 3973b763a0..8ec6dcb2d2 100644 --- a/mkdocs/docs/contributing.md +++ b/mkdocs/docs/contributing.md @@ -30,10 +30,12 @@ For the development, Poetry is used for packing and dependency management. You c pip install poetry ``` -If you have an older version of pip and virtualenv you need to update these: +Make sure you're using an up-to-date environment from venv ```bash pip install --upgrade virtualenv pip +python -m venv ./venv +source ./venv/bin/activate ``` To get started, you can run `make install`, which installs Poetry and all the dependencies of the Iceberg library. This also installs the development dependencies. If you don't want to install the development dependencies, you need to install using `poetry install --no-dev`. diff --git a/pyiceberg/catalog/dynamodb.py b/pyiceberg/catalog/dynamodb.py index 3eee95da3c..6c3f931bd8 100644 --- a/pyiceberg/catalog/dynamodb.py +++ b/pyiceberg/catalog/dynamodb.py @@ -80,7 +80,15 @@ class DynamoDbCatalog(Catalog): def __init__(self, name: str, **properties: str): super().__init__(name, **properties) - self.dynamodb = boto3.client(DYNAMODB_CLIENT) + session = boto3.Session( + profile_name=properties.get("profile_name"), + region_name=properties.get("region_name"), + botocore_session=properties.get("botocore_session"), + aws_access_key_id=properties.get("aws_access_key_id"), + aws_secret_access_key=properties.get("aws_secret_access_key"), + aws_session_token=properties.get("aws_session_token"), + ) + self.dynamodb = session.client(DYNAMODB_CLIENT) self.dynamodb_table_name = self.properties.get(DYNAMODB_TABLE_NAME, DYNAMODB_TABLE_NAME_DEFAULT) self._ensure_catalog_table_exists_or_create() diff --git a/tests/catalog/test_dynamodb.py b/tests/catalog/test_dynamodb.py index 917a5d2937..fb4eaaa575 100644 --- a/tests/catalog/test_dynamodb.py +++ b/tests/catalog/test_dynamodb.py @@ -14,11 +14,12 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -from typing import List +from typing import Any, Dict, List import boto3 import pytest from moto import mock_dynamodb +from unittest import mock from pyiceberg.catalog import METADATA_LOCATION, TABLE_TYPE from pyiceberg.catalog.dynamodb import ( @@ -26,6 +27,7 @@ DYNAMODB_COL_IDENTIFIER, DYNAMODB_COL_NAMESPACE, DYNAMODB_TABLE_NAME_DEFAULT, + ACTIVE, DynamoDbCatalog, _add_property_prefix, ) @@ -47,12 +49,13 @@ def test_create_dynamodb_catalog_with_table_name(_dynamodb, _bucket_initialize: DynamoDbCatalog("test_ddb_catalog") response = _dynamodb.describe_table(TableName=DYNAMODB_TABLE_NAME_DEFAULT) assert response["Table"]["TableName"] == DYNAMODB_TABLE_NAME_DEFAULT + assert response["Table"]["TableStatus"] == ACTIVE custom_table_name = "custom_table_name" DynamoDbCatalog("test_ddb_catalog", **{"table-name": custom_table_name}) response = _dynamodb.describe_table(TableName=custom_table_name) assert response["Table"]["TableName"] == custom_table_name - + assert response["Table"]["TableStatus"] == ACTIVE @mock_dynamodb def test_create_table_with_database_location( @@ -506,3 +509,24 @@ def test_update_namespace_properties_overlap_update_removal(_bucket_initialize: test_catalog.update_namespace_properties(database_name, removals, updates) # should not modify the properties assert test_catalog.load_namespace_properties(database_name) == test_properties + +def test_passing_provided_profile() -> None: + catalog_name = "test_ddb_catalog" + session_props = { + "aws_access_key_id": "abc", + "aws_secret_access_key": "def", + "aws_session_token": "ghi", + "region_name": "eu-central-1", + "botocore_session": None, + "profile_name": None + } + props = {"py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"} + props.update(session_props) + with mock.patch('boto3.Session', return_value=mock.Mock()) as mock_session: + mock_client = mock.Mock() + mock_session.return_value.client.return_value = mock_client + mock_client.describe_table.return_value = {'Table': {'TableStatus': 'ACTIVE'}} + test_catalog = DynamoDbCatalog(catalog_name, **props) + assert test_catalog.dynamodb is mock_client + mock_session.assert_called_with(**session_props) + assert test_catalog.dynamodb is mock_session().client()