From 32024801e2878a5e876ccbace1bfe59d4387c635 Mon Sep 17 00:00:00 2001 From: mmalhotra Date: Sat, 9 Mar 2024 01:12:52 -0800 Subject: [PATCH] add retry and jmx means for the Thrift's delegation token generation --- .../thrift/ThriftHttpMetastoreFactory.java | 4 +-- .../ThriftMetastoreAuthenticationModule.java | 3 ++ .../thrift/ThriftMetastoreConfig.java | 4 +-- .../thrift/ThriftMetastoreStats.java | 8 +++++ .../TokenFetchingMetastoreClientFactory.java | 32 ++++++++++++++++--- .../trino/plugin/hive/util/RetryDriver.java | 7 ++-- .../thrift/TestThriftMetastoreConfig.java | 2 +- .../product/deltalake/TestDeltaLakeJmx.java | 4 ++- 8 files changed, 51 insertions(+), 13 deletions(-) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHttpMetastoreFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHttpMetastoreFactory.java index 9c7d891956a723..bfc8e8c7820f83 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHttpMetastoreFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHttpMetastoreFactory.java @@ -63,8 +63,8 @@ public ThriftMetastore createMetastore(Optional identity) fileSystemFactory, metastoreClientFactory, RetryDriver.DEFAULT_SCALE_FACTOR, - RetryDriver.DEFAULT_SLEEP_TIME, - RetryDriver.DEFAULT_SLEEP_TIME, + RetryDriver.DEFAULT_MIN_BACKOFF_DELAY, + RetryDriver.DEFAULT_MAX_BACKOFF_DELAY, RetryDriver.DEFAULT_MAX_RETRY_TIME, new Duration(10, MINUTES), RetryDriver.DEFAULT_MAX_ATTEMPTS, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreAuthenticationModule.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreAuthenticationModule.java index 9aeacf6d8c827c..066c6628e01f08 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreAuthenticationModule.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreAuthenticationModule.java @@ -27,6 +27,7 @@ import static com.google.inject.multibindings.OptionalBinder.newOptionalBinder; import static io.airlift.configuration.ConfigBinder.configBinder; import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreAuthenticationConfig.ThriftMetastoreAuthenticationType.KERBEROS; +import static org.weakref.jmx.guice.ExportBinder.newExporter; public class ThriftMetastoreAuthenticationModule extends AbstractConfigurationAwareModule @@ -36,6 +37,8 @@ protected void setup(Binder binder) { newOptionalBinder(binder, IdentityAwareMetastoreClientFactory.class) .setDefault().to(UgiBasedMetastoreClientFactory.class).in(SINGLETON); + newExporter(binder).export(IdentityAwareMetastoreClientFactory.class) + .as(generator -> generator.generatedNameOf(ThriftMetastoreStats.class)); newOptionalBinder(binder, HiveMetastoreAuthentication.class) .setDefault().to(NoHiveMetastoreAuthentication.class).in(SINGLETON); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreConfig.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreConfig.java index 807d9da084c25f..b4511e45c17b91 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreConfig.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreConfig.java @@ -39,8 +39,8 @@ public class ThriftMetastoreConfig private HostAndPort socksProxy; private int maxRetries = RetryDriver.DEFAULT_MAX_ATTEMPTS - 1; private double backoffScaleFactor = RetryDriver.DEFAULT_SCALE_FACTOR; - private Duration minBackoffDelay = RetryDriver.DEFAULT_SLEEP_TIME; - private Duration maxBackoffDelay = RetryDriver.DEFAULT_SLEEP_TIME; + private Duration minBackoffDelay = RetryDriver.DEFAULT_MIN_BACKOFF_DELAY; + private Duration maxBackoffDelay = RetryDriver.DEFAULT_MAX_BACKOFF_DELAY; private Duration maxRetryTime = RetryDriver.DEFAULT_MAX_RETRY_TIME; private boolean impersonationEnabled; private boolean useSparkTableStatisticsFallback = true; diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreStats.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreStats.java index afc03b53ef7116..d9cb9e743992ba 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreStats.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreStats.java @@ -69,6 +69,7 @@ public class ThriftMetastoreStats private final ThriftMetastoreApiStats createFunction = new ThriftMetastoreApiStats(); private final ThriftMetastoreApiStats alterFunction = new ThriftMetastoreApiStats(); private final ThriftMetastoreApiStats dropFunction = new ThriftMetastoreApiStats(); + private final ThriftMetastoreApiStats getThriftDelegationToken = new ThriftMetastoreApiStats(); @Managed @Nested @@ -426,4 +427,11 @@ public ThriftMetastoreApiStats getDropFunction() { return dropFunction; } + + @Managed + @Nested + public ThriftMetastoreApiStats getThriftDelegationToken() + { + return getThriftDelegationToken; + } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/TokenFetchingMetastoreClientFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/TokenFetchingMetastoreClientFactory.java index 75628e34cd8ae1..3c9cafbb339c70 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/TokenFetchingMetastoreClientFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/TokenFetchingMetastoreClientFactory.java @@ -17,6 +17,10 @@ import com.google.common.cache.CacheLoader; import com.google.common.util.concurrent.UncheckedExecutionException; import com.google.inject.Inject; +import dev.failsafe.Failsafe; +import dev.failsafe.FailsafeException; +import dev.failsafe.RetryPolicy; +import dev.failsafe.function.CheckedSupplier; import io.trino.cache.NonEvictableLoadingCache; import io.trino.plugin.base.security.UserNameProvider; import io.trino.plugin.hive.ForHiveMetastore; @@ -28,8 +32,10 @@ import java.util.Optional; import static com.google.common.base.Throwables.throwIfInstanceOf; +import static com.google.common.base.Throwables.throwIfUnchecked; import static io.trino.cache.SafeCaches.buildNonEvictableCache; import static io.trino.plugin.hive.HiveErrorCode.HIVE_METASTORE_ERROR; +import static java.time.temporal.ChronoUnit.MILLIS; import static java.util.Objects.requireNonNull; import static java.util.concurrent.TimeUnit.MILLISECONDS; @@ -41,6 +47,8 @@ public class TokenFetchingMetastoreClientFactory private final boolean impersonationEnabled; private final NonEvictableLoadingCache delegationTokenCache; private final long refreshPeriod; + private final ThriftMetastoreStats stats = new ThriftMetastoreStats(); + private final RetryPolicy retryPolicy; @Inject public TokenFetchingMetastoreClientFactory( @@ -58,6 +66,12 @@ public TokenFetchingMetastoreClientFactory( .maximumSize(thriftConfig.getDelegationTokenCacheMaximumSize()), CacheLoader.from(this::loadDelegationToken)); this.refreshPeriod = Duration.ofMinutes(1).toNanos(); + retryPolicy = RetryPolicy.builder() + .withMaxDuration(thriftConfig.getMaxRetryTime().toJavaTime()) + .withMaxAttempts(thriftConfig.getMaxRetries() + 1) + .withBackoff(thriftConfig.getMinBackoffDelay().toMillis(), thriftConfig.getMaxBackoffDelay().toMillis(), MILLIS, thriftConfig.getBackoffScaleFactor()) + .abortOn(TException.class) + .build(); } private ThriftMetastoreClient createMetastoreClient() @@ -106,11 +120,21 @@ private DelegationToken getDelegationToken(String username) private DelegationToken loadDelegationToken(String username) { - try (ThriftMetastoreClient client = createMetastoreClient()) { - return new DelegationToken(System.nanoTime(), client.getDelegationToken(username)); + try { + // added retry and stats for the thrift delegation token + return (DelegationToken) Failsafe.with(retryPolicy).get((CheckedSupplier) () -> + stats.getThriftDelegationToken().wrap(() -> { + try (ThriftMetastoreClient client = createMetastoreClient()) { + return new DelegationToken(System.nanoTime(), client.getDelegationToken(username)); + } + } + ).call()); } - catch (TException e) { - throw new TrinoException(HIVE_METASTORE_ERROR, e); + catch (FailsafeException e) { + if (e.getCause() instanceof TException) { + throw new TrinoException(HIVE_METASTORE_ERROR, e.getCause()); + } + throw e; } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/RetryDriver.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/RetryDriver.java index 441e893c6c66ce..10f31de8482dc4 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/RetryDriver.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/RetryDriver.java @@ -31,7 +31,8 @@ public class RetryDriver { private static final Logger log = Logger.get(RetryDriver.class); public static final int DEFAULT_MAX_ATTEMPTS = 10; - public static final Duration DEFAULT_SLEEP_TIME = new Duration(1, SECONDS); + public static final Duration DEFAULT_MIN_BACKOFF_DELAY = new Duration(1, SECONDS); + public static final Duration DEFAULT_MAX_BACKOFF_DELAY = new Duration(2, SECONDS); public static final Duration DEFAULT_MAX_RETRY_TIME = new Duration(30, SECONDS); public static final double DEFAULT_SCALE_FACTOR = 2.0; @@ -61,8 +62,8 @@ private RetryDriver( private RetryDriver() { this(DEFAULT_MAX_ATTEMPTS, - DEFAULT_SLEEP_TIME, - DEFAULT_SLEEP_TIME, + DEFAULT_MIN_BACKOFF_DELAY, + DEFAULT_MAX_BACKOFF_DELAY, DEFAULT_SCALE_FACTOR, DEFAULT_MAX_RETRY_TIME, ImmutableList.of()); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestThriftMetastoreConfig.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestThriftMetastoreConfig.java index b204f613a44359..39048aff4f135d 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestThriftMetastoreConfig.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/TestThriftMetastoreConfig.java @@ -44,7 +44,7 @@ public void testDefaults() .setMaxRetries(9) .setBackoffScaleFactor(2.0) .setMinBackoffDelay(new Duration(1, SECONDS)) - .setMaxBackoffDelay(new Duration(1, SECONDS)) + .setMaxBackoffDelay(new Duration(2, SECONDS)) .setMaxRetryTime(new Duration(30, SECONDS)) .setTlsEnabled(false) .setKeystorePath(null) diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeJmx.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeJmx.java index d2b2286ebe55f0..3afef5bcdaef89 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeJmx.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeJmx.java @@ -50,6 +50,8 @@ public void testJmxTablesExposedByDeltaLakeConnectorBackedByThriftMetastore() row("io.trino.plugin.hive.metastore.thrift:name=delta,type=thrifthivemetastore"), row("io.trino.plugin.hive:catalog=delta,name=delta,type=fileformatdatasourcestats"), row("trino.plugin.deltalake.metastore:catalog=delta,name=delta,type=deltalaketablemetadatascheduler"), - row("trino.plugin.deltalake.transactionlog:catalog=delta,name=delta,type=transactionlogaccess")); + row("trino.plugin.deltalake.transactionlog:catalog=delta,name=delta,type=transactionlogaccess"), + row("trino.plugin.deltalake.transactionlog:catalog=delta,name=delta,type=transactionlogaccess"), + row("io.trino.plugin.hive.metastore.thrift:name=delta,type=thriftmetastorestats")); } }