apache · HonahX · Jul 12, 2024 · Mar 18, 2024 · Mar 18, 2024 · Jul 12, 2024
diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md
@@ -288,6 +288,16 @@ catalog:
     region_name: <REGION_NAME>
 ```
 
+<!-- markdown-link-check-disable -->
+
+| Key               | Example                              | Description                                                                     |
+| ----------------- | ------------------------------------ | ------------------------------------------------------------------------------- |
+| glue.id           | 111111111111                         | Configure the 12-digit ID of the Glue Catalog                                   |
+| glue.skip-archive | true                                 | Configure whether to skip the archival of older table versions. Default to true |
+| glue.endpoint     | https://glue.us-east-1.amazonaws.com | Configure an alternative endpoint of the Glue service for GlueCatalog to access |
+
+<!-- markdown-link-check-enable-->
+
 ## DynamoDB Catalog
 
 If you want to use AWS DynamoDB as the catalog, you can use the last two ways to configure the pyiceberg and refer

diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py
@@ -109,6 +109,10 @@
 GLUE_SKIP_ARCHIVE = "glue.skip-archive"
 GLUE_SKIP_ARCHIVE_DEFAULT = True
 
+# Configure an alternative endpoint of the Glue service for GlueCatalog to access.
+# This could be used to use GlueCatalog with any glue-compatible metastore service that has a different endpoint
+GLUE_CATALOG_ENDPOINT = "glue.endpoint"
+
 ICEBERG_FIELD_ID = "iceberg.field.id"
 ICEBERG_FIELD_OPTIONAL = "iceberg.field.optional"
 ICEBERG_FIELD_CURRENT = "iceberg.field.current"
@@ -289,7 +293,7 @@ def __init__(self, name: str, **properties: Any):
             aws_secret_access_key=properties.get("aws_secret_access_key"),
             aws_session_token=properties.get("aws_session_token"),
         )
-        self.glue: GlueClient = session.client("glue")
+        self.glue: GlueClient = session.client("glue", endpoint_url=properties.get(GLUE_CATALOG_ENDPOINT))
 
         if glue_catalog_id := properties.get(GLUE_ID):
             _register_glue_catalog_id_with_glue_client(self.glue, glue_catalog_id)

diff --git a/tests/catalog/integration_test_glue.py b/tests/catalog/integration_test_glue.py
@@ -25,7 +25,7 @@
 from botocore.exceptions import ClientError
 
 from pyiceberg.catalog import Catalog, MetastoreCatalog
-from pyiceberg.catalog.glue import GlueCatalog
+from pyiceberg.catalog.glue import GLUE_CATALOG_ENDPOINT, GlueCatalog
 from pyiceberg.exceptions import (
     NamespaceAlreadyExistsError,
     NamespaceNotEmptyError,
@@ -36,7 +36,7 @@
 from pyiceberg.io.pyarrow import _dataframe_to_data_files, schema_to_pyarrow
 from pyiceberg.schema import Schema
 from pyiceberg.types import IntegerType
-from tests.conftest import clean_up, get_bucket_name, get_s3_path
+from tests.conftest import clean_up, get_bucket_name, get_glue_endpoint, get_s3_path
 
 # The number of tables/databases used in list_table/namespace test
 LIST_TEST_NUMBER = 2
@@ -51,7 +51,9 @@ def fixture_glue_client() -> boto3.client:
 @pytest.fixture(name="test_catalog", scope="module")
 def fixture_test_catalog() -> Generator[Catalog, None, None]:
     """Configure the pre- and post-setting of aws integration test."""
-    test_catalog = GlueCatalog(CATALOG_NAME, warehouse=get_s3_path(get_bucket_name()))
+    test_catalog = GlueCatalog(
+        CATALOG_NAME, **{"warehouse": get_s3_path(get_bucket_name()), GLUE_CATALOG_ENDPOINT: get_glue_endpoint()}
+    )
     yield test_catalog
     clean_up(test_catalog)
 

diff --git a/tests/catalog/test_glue.py b/tests/catalog/test_glue.py
@@ -862,3 +862,13 @@ def test_register_table_with_given_location(
     table = test_catalog.register_table(identifier, location)
     assert table.identifier == (catalog_name,) + identifier
     assert test_catalog.table_exists(identifier) is True
+
+
+@mock_aws
+def test_glue_endpoint_override(_bucket_initialize: None, moto_endpoint_url: str, database_name: str) -> None:
+    catalog_name = "glue"
+    test_endpoint = "https://test-endpoint"
+    test_catalog = GlueCatalog(
+        catalog_name, **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}", "glue.endpoint": test_endpoint}
+    )
+    assert test_catalog.glue.meta.endpoint_url == test_endpoint
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -2043,6 +2043,11 @@ def get_bucket_name() -> str:
     return bucket_name
 
 
+def get_glue_endpoint() -> Optional[str]:
+    """Set the optional environment variable AWS_TEST_GLUE_ENDPOINT for a glue endpoint to test."""
+    return os.getenv("AWS_TEST_GLUE_ENDPOINT")
+
+
 def get_s3_path(bucket_name: str, database_name: Optional[str] = None, table_name: Optional[str] = None) -> str:
     result_path = f"s3://{bucket_name}"
     if database_name is not None: