From f7a277b28032feae6fb86aacdc17662b3fdd22d9 Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Sat, 10 Jul 2021 21:40:58 +0200 Subject: [PATCH 01/21] Removing previous Async commit features Previous async commit features had - async commit configured by conifg-param - special treatments to stil force sync commit for certain threadpools - special treatment to stil force sync commit on transaction level for certain transactions. This is a preparation step to clean the path for adding a new approach for async commit treatment: - only session/connection level async configuration - no transaction level special treatments - only enable async commit for specific Connection pools (where it is needed / is safe) See next commits changelog_begin changelog_end --- .../scala/platform/indexer/JdbcIndexer.scala | 1 - .../store/appendonlydao/JdbcLedgerDao.scala | 8 -------- .../store/backend/StorageBackend.scala | 1 - .../store/backend/h2/H2StorageBackend.scala | 2 -- .../backend/oracle/OracleStorageBackend.scala | 2 -- .../postgresql/PostgresStorageBackend.scala | 13 +------------ .../store/dao/JdbcLedgerDaoBackend.scala | 1 - .../ledger/sql/SqlLedgerSpecAppendOnly.scala | 18 ------------------ 8 files changed, 1 insertion(+), 45 deletions(-) diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/JdbcIndexer.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/JdbcIndexer.scala index a9c8ffd5f807..906461336486 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/JdbcIndexer.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/JdbcIndexer.scala @@ -135,7 +135,6 @@ object JdbcIndexer { servicesExecutionContext, metrics, lfValueTranslationCache, - jdbcAsyncCommitMode = config.asyncCommitMode, enricher = None, participantId = config.participantId, ) diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/JdbcLedgerDao.scala b/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/JdbcLedgerDao.scala index 123058922f3a..1dca3a0db08d 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/JdbcLedgerDao.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/JdbcLedgerDao.scala @@ -124,7 +124,6 @@ private class JdbcLedgerDao( )(implicit loggingContext: LoggingContext): Future[Unit] = dbDispatcher.executeSql(metrics.daml.index.db.initializeLedgerParameters) { implicit connection => - storageBackend.enforceSynchronousCommit(connection) storageBackend.updateLedgerId(ledgerId.unwrap)(connection) } @@ -132,7 +131,6 @@ private class JdbcLedgerDao( participantId: ParticipantId )(implicit loggingContext: LoggingContext): Future[Unit] = dbDispatcher.executeSql(metrics.daml.index.db.initializeParticipantId) { implicit connection => - storageBackend.enforceSynchronousCommit(connection) storageBackend.updateParticipantId(participantId.unwrap)(connection) } @@ -387,7 +385,6 @@ private class JdbcLedgerDao( logger.info("Storing initial state") dbDispatcher.executeSql(metrics.daml.index.db.storeInitialStateFromScenario) { implicit connection => - storageBackend.enforceSynchronousCommit(connection) ledgerEntries.foreach { case (offset, entry) => entry match { case tx: LedgerEntry.Transaction => @@ -769,7 +766,6 @@ private[platform] object JdbcLedgerDao { servicesExecutionContext: ExecutionContext, metrics: Metrics, lfValueTranslationCache: LfValueTranslationCache.Cache, - jdbcAsyncCommitMode: DbType.AsyncCommitMode, enricher: Option[ValueEnricher], participantId: Ref.ParticipantId, )(implicit loggingContext: LoggingContext): ResourceOwner[LedgerDao] = { @@ -785,8 +781,6 @@ private[platform] object JdbcLedgerDao { servicesExecutionContext, metrics, lfValueTranslationCache, - jdbcAsyncCommitMode = - if (dbType.supportsAsynchronousCommits) jdbcAsyncCommitMode else DbType.SynchronousCommit, enricher = enricher, participantId = participantId, compressionStrategy = CompressionStrategy.none(metrics), // not needed @@ -860,7 +854,6 @@ private[platform] object JdbcLedgerDao { metrics: Metrics, lfValueTranslationCache: LfValueTranslationCache.Cache, validatePartyAllocation: Boolean = false, - jdbcAsyncCommitMode: DbType.AsyncCommitMode = DbType.SynchronousCommit, enricher: Option[ValueEnricher], participantId: Ref.ParticipantId, compressionStrategy: CompressionStrategy, @@ -872,7 +865,6 @@ private[platform] object JdbcLedgerDao { connectionPoolSize, connectionTimeout, metrics, - jdbcAsyncCommitMode, ) dbType = DbType.jdbcType(jdbcUrl) storageBackend = StorageBackend.of(dbType) diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala index d074a4b019e1..061964f1e9fb 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala @@ -46,7 +46,6 @@ trait StorageBackend[DB_BATCH] with ContractStorageBackend with EventStorageBackend { def reset(connection: Connection): Unit - def enforceSynchronousCommit(connection: Connection): Unit def duplicateKeyError: String // TODO: Avoid brittleness of error message checks } diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/h2/H2StorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/h2/H2StorageBackend.scala index f0cdf2c335d5..082812cfaa1e 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/h2/H2StorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/h2/H2StorageBackend.scala @@ -45,8 +45,6 @@ private[backend] object H2StorageBackend () } - override def enforceSynchronousCommit(connection: Connection): Unit = () // Not supported - override def duplicateKeyError: String = "Unique index or primary key violation" val SQL_INSERT_COMMAND: String = diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala index f92e2bacf0fa..674b857cd01b 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala @@ -42,8 +42,6 @@ private[backend] object OracleStorageBackend "truncate table party_entries cascade", ).map(SQL(_)).foreach(_.execute()(connection)) - override def enforceSynchronousCommit(connection: Connection): Unit = () // Not supported - override def duplicateKeyError: String = "unique constraint" val SQL_INSERT_COMMAND: String = diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala index 8060fdf03122..43fd8e4830e0 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala @@ -74,18 +74,7 @@ private[backend] object PostgresStorageBackend () } - override def enforceSynchronousCommit(connnection: Connection): Unit = { - val statement = - connnection.prepareStatement("SET LOCAL synchronous_commit = 'on'") - try { - statement.execute() - () - } finally { - statement.close() - } - } - - override val duplicateKeyError: String = "duplicate key" + val duplicateKeyError: String = "duplicate key" override def commandCompletions( startExclusive: Offset, diff --git a/ledger/participant-integration-api/src/test/lib/scala/platform/store/dao/JdbcLedgerDaoBackend.scala b/ledger/participant-integration-api/src/test/lib/scala/platform/store/dao/JdbcLedgerDaoBackend.scala index 680c6840604e..ccabfa56c753 100644 --- a/ledger/participant-integration-api/src/test/lib/scala/platform/store/dao/JdbcLedgerDaoBackend.scala +++ b/ledger/participant-integration-api/src/test/lib/scala/platform/store/dao/JdbcLedgerDaoBackend.scala @@ -76,7 +76,6 @@ private[dao] trait JdbcLedgerDaoBackend extends AkkaBeforeAndAfterAll { servicesExecutionContext = executionContext, metrics = new Metrics(new MetricRegistry), lfValueTranslationCache = LfValueTranslationCache.Cache.none, - jdbcAsyncCommitMode = DbType.AsynchronousCommit, enricher = Some(new ValueEnricher(new Engine())), participantId = JdbcLedgerDaoBackend.TestParticipantIdRef, ) diff --git a/ledger/sandbox-classic/src/test/suite/scala/platform/sandbox/stores/ledger/sql/SqlLedgerSpecAppendOnly.scala b/ledger/sandbox-classic/src/test/suite/scala/platform/sandbox/stores/ledger/sql/SqlLedgerSpecAppendOnly.scala index 2511adc40350..19b042fa5c46 100644 --- a/ledger/sandbox-classic/src/test/suite/scala/platform/sandbox/stores/ledger/sql/SqlLedgerSpecAppendOnly.scala +++ b/ledger/sandbox-classic/src/test/suite/scala/platform/sandbox/stores/ledger/sql/SqlLedgerSpecAppendOnly.scala @@ -6,7 +6,6 @@ package com.daml.platform.sandbox.stores.ledger.sql import java.io.File import java.time.Instant -import ch.qos.logback.classic.Level import com.daml.api.util.TimeProvider import com.daml.bazeltools.BazelRunfiles.rlocation import com.daml.daml_lf_dev.DamlLf @@ -211,23 +210,6 @@ final class SqlLedgerSpecAppendOnly ledger.currentHealth() should be(Healthy) } } - - /** Workaround test for asserting that PostgreSQL asynchronous commits are disabled in - * [[com.daml.platform.store.dao.JdbcLedgerDao]] transactions when used from [[SqlLedger]]. - * - * NOTE: This is needed for ensuring durability guarantees of Daml-on-SQL. - */ - "does not use async commit when building JdbcLedgerDao" in { - for { - _ <- createSqlLedger(validatePartyAllocation = false) - } yield { - val hikariDataSourceLogs = - LogCollector.read[this.type]("com.daml.platform.store.appendonlydao.HikariConnection") - hikariDataSourceLogs should contain( - Level.INFO -> "Creating Hikari connections with synchronous commit ON" - ) - } - } } private def createSqlLedger(validatePartyAllocation: Boolean): Future[Ledger] = From 4116e825340b69c7305f8c5b2897e3a2b6661f20 Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Sat, 10 Jul 2021 21:53:22 +0200 Subject: [PATCH 02/21] Add DataSourceStorageBackend - to spawn DataSources in a controlled fashion these will be needed in upcoming commits for the HikariCP - DataSources can have Connection init hooks defined with the help of the InitHookDataSourceProxy (this is needed for HA implementation) - added DataSourceConfig to capture needed level of fine-tuning for DataSource creation changelog_begin changelog_end --- .../participant-integration-api/BUILD.bazel | 6 +- .../main/scala/platform/store/DbType.scala | 1 + .../store/backend/StorageBackend.scala | 34 +++ .../backend/common/CommonStorageBackend.scala | 10 + .../common/InitHookDataSourceProxy.scala | 213 ++++++++++++++++++ .../store/backend/h2/H2StorageBackend.scala | 21 +- .../backend/oracle/OracleStorageBackend.scala | 15 +- .../postgresql/PostgresStorageBackend.scala | 22 +- 8 files changed, 316 insertions(+), 6 deletions(-) create mode 100644 ledger/participant-integration-api/src/main/scala/platform/store/backend/common/InitHookDataSourceProxy.scala diff --git a/ledger/participant-integration-api/BUILD.bazel b/ledger/participant-integration-api/BUILD.bazel index e10916cb5204..cf6577c1166c 100644 --- a/ledger/participant-integration-api/BUILD.bazel +++ b/ledger/participant-integration-api/BUILD.bazel @@ -63,6 +63,9 @@ compile_deps = [ "@maven//:io_opentelemetry_opentelemetry_context", "@maven//:org_slf4j_slf4j_api", # this oracle import is problematic for daml assistant build + "@maven//:com_h2database_h2", + "@maven//:org_postgresql_postgresql", + "@maven//:com_oracle_database_jdbc_ojdbc8", ] scala_compile_deps = [ @@ -79,9 +82,6 @@ scala_compile_deps = [ runtime_deps = [ "@maven//:ch_qos_logback_logback_classic", - "@maven//:com_h2database_h2", - "@maven//:org_postgresql_postgresql", - "@maven//:com_oracle_database_jdbc_ojdbc8", ] da_scala_library( diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/DbType.scala b/ledger/participant-integration-api/src/main/scala/platform/store/DbType.scala index 27c1c8985cf1..93bd1a2395d2 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/DbType.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/DbType.scala @@ -51,6 +51,7 @@ private[platform] object DbType { sys.error(s"JDBC URL doesn't match any supported databases (h2, pg, oracle)") } + // TODO append-only: adapt AsyncCommit related configuration here sealed trait AsyncCommitMode { def setting: String } diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala index 061964f1e9fb..07dc24659a60 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala @@ -14,6 +14,7 @@ import com.daml.ledger.offset.Offset import com.daml.ledger.participant.state.index.v2.PackageDetails import com.daml.lf.data.Ref import com.daml.lf.ledger.EventId +import com.daml.logging.LoggingContext import com.daml.platform import com.daml.platform.store.DbType import com.daml.platform.store.appendonlydao.events.{ContractId, EventsTable, Key, Raw} @@ -25,6 +26,7 @@ import com.daml.platform.store.backend.postgresql.PostgresStorageBackend import com.daml.platform.store.entries.{ConfigurationEntry, PackageLedgerEntry, PartyLedgerEntry} import com.daml.platform.store.interfaces.LedgerDaoContractsReader.KeyState import com.daml.scalautil.NeverEqualsOverride +import javax.sql.DataSource import scala.util.Try @@ -235,6 +237,38 @@ object EventStorageBackend { ) } +trait DataSourceStorageBackend { + def createDataSource( + jdbcUrl: String, + dataSourceConfig: DataSourceStorageBackend.DataSourceConfig = + DataSourceStorageBackend.DataSourceConfig(), + connectionInitHook: Option[Connection => Unit] = None, + )(implicit loggingContext: LoggingContext): DataSource +} + +object DataSourceStorageBackend { + case class DataSourceConfig( + pgSynchronousCommit: Option[PgSynchronousCommitValue] = None + ) + + sealed abstract class PgSynchronousCommitValue(val pgSqlName: String) + object PgSynchronousCommitValue { + case object On extends PgSynchronousCommitValue("on") + case object Off extends PgSynchronousCommitValue("off") + case object RemoteWrite extends PgSynchronousCommitValue("remote_write") + case object RemoteApply extends PgSynchronousCommitValue("remote_apply") + case object Local extends PgSynchronousCommitValue("local") + + def apply(s: String): PgSynchronousCommitValue = s.toLowerCase match { + case On.`pgSqlName` => On + case Off.`pgSqlName` => Off + case RemoteWrite.`pgSqlName` => RemoteWrite + case RemoteApply.`pgSqlName` => RemoteApply + case Local.`pgSqlName` => Local + } + } +} + object StorageBackend { case class Params(ledgerEnd: Offset, eventSeqId: Long) diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/common/CommonStorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/common/CommonStorageBackend.scala index fb60644b7906..8f0ee597dc72 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/common/CommonStorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/common/CommonStorageBackend.scala @@ -957,4 +957,14 @@ private[backend] trait CommonStorageBackend[DB_BATCH] extends StorageBackend[DB_ and event_kind != 0 ORDER BY event_sequential_id ASC""" .asVectorOf(rawTransactionEventParser)(connection) + + protected def exe(statement: String): Connection => Unit = { connection => + val stmnt = connection.createStatement() + try { + stmnt.execute(statement) + () + } finally { + stmnt.close() + } + } } diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/common/InitHookDataSourceProxy.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/common/InitHookDataSourceProxy.scala new file mode 100644 index 000000000000..81ac987fec10 --- /dev/null +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/common/InitHookDataSourceProxy.scala @@ -0,0 +1,213 @@ +// Copyright (c) 2021 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +package com.daml.platform.store.backend.common + +import java.io.PrintWriter +import java.sql.{ + Blob, + CallableStatement, + Clob, + Connection, + DatabaseMetaData, + NClob, + PreparedStatement, + SQLWarning, + SQLXML, + Savepoint, + Statement, + Struct, +} +import java.util.Properties +import java.util.concurrent.Executor +import java.util.logging.Logger +import java.{sql, util} + +import com.daml.logging.{ContextualizedLogger, LoggingContext} +import javax.sql.DataSource + +private[backend] object InitHookDataSourceProxy { + val logger: ContextualizedLogger = ContextualizedLogger.get(this.getClass) + + def apply( + delegate: DataSource, + initHooks: List[Connection => Unit], + )(implicit loggingContext: LoggingContext): DataSource = + if (initHooks.isEmpty) delegate + else InitHookDataSourceProxy(delegate, c => initHooks.foreach(_(c))) +} + +import com.daml.platform.store.backend.common.InitHookDataSourceProxy._ + +private[backend] case class InitHookDataSourceProxy( + delegate: DataSource, + initHook: Connection => Unit, +)(implicit loggingContext: LoggingContext) + extends DataSource { + override def getConnection: Connection = { + val connectionId = + List.fill(8)(scala.util.Random.nextPrintableChar()).mkString // TODO FIXME maybe not needed + logger.debug(s"$connectionId Creating new connection") + val connection = delegate.getConnection + try { + logger.debug(s"$connectionId Applying connection init hook") + initHook(connection) + } catch { + case t: Throwable => + logger.info(s"$connectionId Init hook execution failed: ${t.getMessage}") + throw t + } + logger.info(s"$connectionId Init hook execution finished successfully, connection ready") + new LoggingConnectionProxy(connection, connectionId) + } + + override def getConnection(s: String, s1: String): Connection = { + val connection = delegate.getConnection(s, s1) + initHook(connection) + connection + } + + override def getLogWriter: PrintWriter = delegate.getLogWriter + + override def setLogWriter(printWriter: PrintWriter): Unit = delegate.setLogWriter(printWriter) + + override def setLoginTimeout(i: Int): Unit = delegate.setLoginTimeout(i) + + override def getLoginTimeout: Int = delegate.getLoginTimeout + + override def getParentLogger: Logger = delegate.getParentLogger + + override def unwrap[T](aClass: Class[T]): T = delegate.unwrap(aClass) + + override def isWrapperFor(aClass: Class[_]): Boolean = delegate.isWrapperFor(aClass) +} + +// TODO consider to remove this is only for logging the closures of connections +private[backend] class LoggingConnectionProxy( + delegate: Connection, + connectionId: String, +)(implicit loggingContext: LoggingContext) + extends Connection { + override def createStatement(): Statement = delegate.createStatement() + + override def prepareStatement(s: String): PreparedStatement = delegate.prepareStatement(s) + + override def prepareCall(s: String): CallableStatement = delegate.prepareCall(s) + + override def nativeSQL(s: String): String = delegate.nativeSQL(s) + + override def setAutoCommit(b: Boolean): Unit = delegate.setAutoCommit(b) + + override def getAutoCommit: Boolean = delegate.getAutoCommit + + override def commit(): Unit = delegate.commit() + + override def rollback(): Unit = delegate.rollback() + + override def close(): Unit = { + logger.info(s"$connectionId Connection is closing") + delegate.close() + logger.info(s"$connectionId Connection is closed") + } + + override def isClosed: Boolean = delegate.isClosed + + override def getMetaData: DatabaseMetaData = delegate.getMetaData + + override def setReadOnly(b: Boolean): Unit = delegate.setReadOnly(b) + + override def isReadOnly: Boolean = delegate.isReadOnly + + override def setCatalog(s: String): Unit = delegate.setCatalog(s) + + override def getCatalog: String = delegate.getCatalog + + override def setTransactionIsolation(i: Int): Unit = delegate.setTransactionIsolation(i) + + override def getTransactionIsolation: Int = delegate.getTransactionIsolation + + override def getWarnings: SQLWarning = delegate.getWarnings + + override def clearWarnings(): Unit = delegate.clearWarnings() + + override def createStatement(i: Int, i1: Int): Statement = delegate.createStatement(i, i1) + + override def prepareStatement(s: String, i: Int, i1: Int): PreparedStatement = + delegate.prepareStatement(s, i, i1) + + override def prepareCall(s: String, i: Int, i1: Int): CallableStatement = + delegate.prepareCall(s, i, i1) + + override def getTypeMap: util.Map[String, Class[_]] = delegate.getTypeMap + + override def setTypeMap(map: util.Map[String, Class[_]]): Unit = delegate.setTypeMap(map) + + override def setHoldability(i: Int): Unit = delegate.setHoldability(i) + + override def getHoldability: Int = delegate.getHoldability + + override def setSavepoint(): Savepoint = delegate.setSavepoint() + + override def setSavepoint(s: String): Savepoint = delegate.setSavepoint(s) + + override def rollback(savepoint: Savepoint): Unit = delegate.rollback(savepoint) + + override def releaseSavepoint(savepoint: Savepoint): Unit = delegate.releaseSavepoint(savepoint) + + override def createStatement(i: Int, i1: Int, i2: Int): Statement = + delegate.createStatement(i, i1, i2) + + override def prepareStatement(s: String, i: Int, i1: Int, i2: Int): PreparedStatement = + delegate.prepareStatement(s, i, i1, i2) + + override def prepareCall(s: String, i: Int, i1: Int, i2: Int): CallableStatement = + delegate.prepareCall(s, i, i1, i2) + + override def prepareStatement(s: String, i: Int): PreparedStatement = + delegate.prepareStatement(s, i) + + override def prepareStatement(s: String, ints: Array[Int]): PreparedStatement = + delegate.prepareStatement(s, ints) + + override def prepareStatement(s: String, strings: Array[String]): PreparedStatement = + delegate.prepareStatement(s, strings) + + override def createClob(): Clob = delegate.createClob() + + override def createBlob(): Blob = delegate.createBlob() + + override def createNClob(): NClob = delegate.createNClob() + + override def createSQLXML(): SQLXML = delegate.createSQLXML() + + override def isValid(i: Int): Boolean = delegate.isValid(i) + + override def setClientInfo(s: String, s1: String): Unit = delegate.setClientInfo(s, s1) + + override def setClientInfo(properties: Properties): Unit = delegate.setClientInfo(properties) + + override def getClientInfo(s: String): String = delegate.getClientInfo(s) + + override def getClientInfo: Properties = delegate.getClientInfo + + override def createArrayOf(s: String, objects: Array[AnyRef]): sql.Array = + delegate.createArrayOf(s, objects) + + override def createStruct(s: String, objects: Array[AnyRef]): Struct = + delegate.createStruct(s, objects) + + override def setSchema(s: String): Unit = delegate.setSchema(s) + + override def getSchema: String = delegate.getSchema + + override def abort(executor: Executor): Unit = delegate.abort(executor) + + override def setNetworkTimeout(executor: Executor, i: Int): Unit = + delegate.setNetworkTimeout(executor, i) + + override def getNetworkTimeout: Int = delegate.getNetworkTimeout + + override def unwrap[T](aClass: Class[T]): T = delegate.unwrap(aClass) + + override def isWrapperFor(aClass: Class[_]): Boolean = delegate.isWrapperFor(aClass) +} diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/h2/H2StorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/h2/H2StorageBackend.scala index 082812cfaa1e..3070d276abb8 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/h2/H2StorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/h2/H2StorageBackend.scala @@ -11,6 +11,7 @@ import anorm.SqlParser.get import com.daml.ledger.api.v1.command_completion_service.CompletionStreamResponse import com.daml.ledger.offset.Offset import com.daml.lf.data.Ref +import com.daml.logging.LoggingContext import com.daml.platform.store.appendonlydao.events.{ContractId, Key} import com.daml.platform.store.backend.EventStorageBackend.FilterParams import com.daml.platform.store.backend.common.{ @@ -18,9 +19,16 @@ import com.daml.platform.store.backend.common.{ CommonStorageBackend, EventStorageBackendTemplate, EventStrategy, + InitHookDataSourceProxy, TemplatedStorageBackend, } -import com.daml.platform.store.backend.{DbDto, StorageBackend, common} +import com.daml.platform.store.backend.{ + DataSourceStorageBackend, + DbDto, + StorageBackend, + common, +} +import javax.sql.DataSource private[backend] object H2StorageBackend extends StorageBackend[AppendOnlySchema.Batch] @@ -181,4 +189,15 @@ private[backend] object H2StorageBackend "false" else parties.view.map(p => s"array_contains($arrayColumn, '$p')").mkString("(", " or ", ")") + + override def createDataSource( + jdbcUrl: String, + dataSourceConfig: DataSourceStorageBackend.DataSourceConfig, + connectionInitHook: Option[Connection => Unit], + )(implicit loggingContext: LoggingContext): DataSource = { + val h2DataSource = new org.h2.jdbcx.JdbcDataSource() + h2DataSource.setUrl(jdbcUrl) + InitHookDataSourceProxy(h2DataSource, connectionInitHook.toList) + } + } diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala index 674b857cd01b..d603b6293bc1 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala @@ -13,15 +13,19 @@ import com.daml.platform.store.backend.common.{ CommonStorageBackend, EventStorageBackendTemplate, EventStrategy, + InitHookDataSourceProxy, TemplatedStorageBackend, } -import com.daml.platform.store.backend.{DbDto, StorageBackend, common} +import com.daml.platform.store.backend.{DataSourceStorageBackend, DbDto, StorageBackend, common} import java.sql.Connection import java.time.Instant import com.daml.ledger.offset.Offset import com.daml.platform.store.backend.EventStorageBackend.FilterParams +import com.daml.logging.LoggingContext +import javax.sql.DataSource + private[backend] object OracleStorageBackend extends StorageBackend[AppendOnlySchema.Batch] with CommonStorageBackend[AppendOnlySchema.Batch] @@ -234,4 +238,13 @@ private[backend] object OracleStorageBackend .mkString(" OR ") + ")" } + override def createDataSource( + jdbcUrl: String, + dataSourceConfig: DataSourceStorageBackend.DataSourceConfig, + connectionInitHook: Option[Connection => Unit], + )(implicit loggingContext: LoggingContext): DataSource = { + val oracleDataSource = new oracle.jdbc.pool.OracleDataSource + oracleDataSource.setURL(jdbcUrl) + InitHookDataSourceProxy(oracleDataSource, connectionInitHook.toList) + } } diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala index 43fd8e4830e0..200e9757a78f 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala @@ -11,6 +11,7 @@ import anorm.SqlParser.get import com.daml.ledger.api.v1.command_completion_service.CompletionStreamResponse import com.daml.ledger.offset.Offset import com.daml.lf.data.Ref +import com.daml.logging.LoggingContext import com.daml.platform.store.appendonlydao.events.{ContractId, Key, Party} import com.daml.platform.store.backend.EventStorageBackend.FilterParams import com.daml.platform.store.backend.common.{ @@ -18,9 +19,12 @@ import com.daml.platform.store.backend.common.{ CommonStorageBackend, EventStorageBackendTemplate, EventStrategy, + InitHookDataSourceProxy, TemplatedStorageBackend, } -import com.daml.platform.store.backend.{DbDto, StorageBackend, common} +import com.daml.platform.store.backend.{DataSourceStorageBackend, DbDto, StorageBackend, common} +import javax.sql.DataSource +import org.postgresql.ds.PGSimpleDataSource private[backend] object PostgresStorageBackend extends StorageBackend[AppendOnlySchema.Batch] @@ -190,4 +194,20 @@ private[backend] object PostgresStorageBackend // TODO append-only: remove as part of ContractStorageBackend consolidation private def arrayIntersectionWhereClause(arrayColumn: String, parties: Set[Ref.Party]): String = s"$arrayColumn::text[] && array[${format(parties)}]::text[]" + + override def createDataSource( + jdbcUrl: String, + dataSourceConfig: DataSourceStorageBackend.DataSourceConfig, + connectionInitHook: Option[Connection => Unit], + )(implicit loggingContext: LoggingContext): DataSource = { + val pgSimpleDataSource = new PGSimpleDataSource() + pgSimpleDataSource.setUrl(jdbcUrl) + val hookFunctions = List( + dataSourceConfig.pgSynchronousCommit.toList + .map(synchCommitValue => exe(s"SET synchronous_commit TO ${synchCommitValue.pgSqlName}")), + connectionInitHook.toList, + ).flatten + InitHookDataSourceProxy(pgSimpleDataSource, hookFunctions) + } + } From 3d684e7d6c24d5468033c63ecae0cad141b7020b Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Sat, 10 Jul 2021 21:59:30 +0200 Subject: [PATCH 03/21] Switches to DataSource wrapping in HikariCP instantiation changelog_begin changelog_end --- .../store/appendonlydao/DbDispatcher.scala | 16 +- .../HikariJdbcConnectionProvider.scala | 188 ++++++------------ .../store/appendonlydao/JdbcLedgerDao.scala | 43 +--- 3 files changed, 74 insertions(+), 173 deletions(-) diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/DbDispatcher.scala b/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/DbDispatcher.scala index 89312d6e1134..53d5e7178782 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/DbDispatcher.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/DbDispatcher.scala @@ -13,16 +13,15 @@ import com.daml.logging.LoggingContext.withEnrichedLoggingContext import com.daml.logging.{ContextualizedLogger, LoggingContext} import com.daml.metrics.{DatabaseMetrics, Metrics} import com.daml.platform.configuration.ServerRole -import com.daml.platform.store.DbType import com.google.common.util.concurrent.ThreadFactoryBuilder +import javax.sql.DataSource import scala.concurrent.{ExecutionContext, Future} import scala.concurrent.duration.FiniteDuration import scala.util.control.NonFatal private[platform] final class DbDispatcher private ( - val maxConnections: Int, - connectionProvider: HikariJdbcConnectionProvider, + connectionProvider: JdbcConnectionProvider, executor: Executor, overallWaitTimer: Timer, overallExecutionTimer: Timer, @@ -83,22 +82,24 @@ private[platform] object DbDispatcher { private val logger = ContextualizedLogger.get(this.getClass) def owner( + dataSource: DataSource, serverRole: ServerRole, jdbcUrl: String, connectionPoolSize: Int, connectionTimeout: FiniteDuration, metrics: Metrics, - connectionAsyncCommitMode: DbType.AsyncCommitMode, )(implicit loggingContext: LoggingContext): ResourceOwner[DbDispatcher] = for { - connectionProvider <- HikariJdbcConnectionProvider.owner( + hikariConnectionPool <- new HikariDataSourceOwner( + dataSource, serverRole, jdbcUrl, connectionPoolSize, + connectionPoolSize, connectionTimeout, - metrics.registry, - connectionAsyncCommitMode, + Some(metrics.registry), ) + connectionProvider <- DataSourceConnectionProvider.owner(hikariConnectionPool) threadPoolName = s"daml.index.db.threadpool.connection.${serverRole.threadPoolSuffix}" executor <- ResourceOwner.forExecutorService(() => new InstrumentedExecutorService( @@ -116,7 +117,6 @@ private[platform] object DbDispatcher { ) ) } yield new DbDispatcher( - maxConnections = connectionPoolSize, connectionProvider = connectionProvider, executor = executor, overallWaitTimer = metrics.daml.index.db.waitAll, diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/HikariJdbcConnectionProvider.scala b/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/HikariJdbcConnectionProvider.scala index 6b3deff3d125..7e1564e7ec77 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/HikariJdbcConnectionProvider.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/HikariJdbcConnectionProvider.scala @@ -13,39 +13,33 @@ import com.daml.ledger.resources.{Resource, ResourceContext, ResourceOwner} import com.daml.logging.{ContextualizedLogger, LoggingContext} import com.daml.metrics.{DatabaseMetrics, Timed} import com.daml.platform.configuration.ServerRole -import com.daml.platform.store.DbType -import com.daml.platform.store.appendonlydao.HikariJdbcConnectionProvider._ import com.daml.timer.RetryStrategy import com.zaxxer.hikari.{HikariConfig, HikariDataSource} +import javax.sql.DataSource import scala.concurrent.Future import scala.concurrent.duration.{DurationInt, FiniteDuration} import scala.util.control.NonFatal -private[platform] final class HikariConnection( +private[platform] final class HikariDataSourceOwner( + dataSource: DataSource, serverRole: ServerRole, jdbcUrl: String, minimumIdle: Int, maxPoolSize: Int, connectionTimeout: FiniteDuration, metrics: Option[MetricRegistry], - connectionPoolPrefix: String, - maxInitialConnectRetryAttempts: Int, - connectionAsyncCommitMode: DbType.AsyncCommitMode, + connectionPoolPrefix: String = "daml.index.db.connection", + maxInitialConnectRetryAttempts: Int = 600, )(implicit loggingContext: LoggingContext) - extends ResourceOwner[HikariDataSource] { + extends ResourceOwner[DataSource] { private val logger = ContextualizedLogger.get(this.getClass) override def acquire()(implicit context: ResourceContext): Resource[HikariDataSource] = { val config = new HikariConfig - val dbType = DbType.jdbcType(jdbcUrl) - + config.setDataSource(dataSource) config.setJdbcUrl(jdbcUrl) - config.setDriverClassName(dbType.driver) - config.addDataSourceProperty("cachePrepStmts", "true") - config.addDataSourceProperty("prepStmtCacheSize", "128") - config.addDataSourceProperty("prepStmtCacheSqlLimit", "2048") config.setAutoCommit(false) config.setMaximumPoolSize(maxPoolSize) config.setMinimumIdle(minimumIdle) @@ -53,8 +47,6 @@ private[platform] final class HikariConnection( config.setPoolName(s"$connectionPoolPrefix.${serverRole.threadPoolSuffix}") metrics.foreach(config.setMetricRegistry) - configureAsyncCommit(config, dbType) - // Hikari dies if a database connection could not be opened almost immediately // regardless of any connection timeout settings. We retry connections so that // Postgres and Sandbox can be started in any order. @@ -72,124 +64,66 @@ private[platform] final class HikariConnection( } )(conn => Future { conn.close() }) } - - private def configureAsyncCommit(config: HikariConfig, dbType: DbType): Unit = - if (dbType.supportsAsynchronousCommits) { - logger.info( - s"Creating Hikari connections with synchronous commit ${connectionAsyncCommitMode.setting}" - ) - config.setConnectionInitSql(s"SET synchronous_commit=${connectionAsyncCommitMode.setting}") - } else if (connectionAsyncCommitMode != DbType.SynchronousCommit) { - logger.warn( - s"Asynchronous commit setting ${connectionAsyncCommitMode.setting} is not compatible with ${dbType.name} database backend" - ) - } -} - -private[platform] object HikariConnection { - private val MaxInitialConnectRetryAttempts = 600 - private val ConnectionPoolPrefix: String = "daml.index.db.connection" - - def owner( - serverRole: ServerRole, - jdbcUrl: String, - minimumIdle: Int, - maxPoolSize: Int, - connectionTimeout: FiniteDuration, - metrics: Option[MetricRegistry], - connectionAsyncCommitMode: DbType.AsyncCommitMode, - )(implicit loggingContext: LoggingContext): HikariConnection = - new HikariConnection( - serverRole, - jdbcUrl, - minimumIdle, - maxPoolSize, - connectionTimeout, - metrics, - ConnectionPoolPrefix, - MaxInitialConnectRetryAttempts, - connectionAsyncCommitMode, - ) -} - -private[platform] class HikariJdbcConnectionProvider( - dataSource: HikariDataSource, - healthPoller: Timer, -) extends JdbcConnectionProvider { - private val transientFailureCount = new AtomicInteger(0) - - private val checkHealth = new TimerTask { - override def run(): Unit = { - try { - dataSource.getConnection().close() - transientFailureCount.set(0) - } catch { - case _: SQLTransientConnectionException => - val _ = transientFailureCount.incrementAndGet() - } - } - } - - healthPoller.schedule(checkHealth, 0, HealthPollingSchedule.toMillis) - - override def currentHealth(): HealthStatus = - if (transientFailureCount.get() < MaxTransientFailureCount) - Healthy - else - Unhealthy - - override def runSQL[T](databaseMetrics: DatabaseMetrics)(block: Connection => T): T = { - val conn = dataSource.getConnection() - conn.setAutoCommit(false) - try { - val res = Timed.value( - databaseMetrics.queryTimer, - block(conn), - ) - Timed.value( - databaseMetrics.commitTimer, - conn.commit(), - ) - res - } catch { - case e: SQLTransientConnectionException => - transientFailureCount.incrementAndGet() - throw e - case NonFatal(t) => - // Log the error in the caller with access to more logging context (such as the sql statement description) - conn.rollback() - throw t - } finally { - conn.close() - } - } } -private[platform] object HikariJdbcConnectionProvider { +object DataSourceConnectionProvider { private val MaxTransientFailureCount: Int = 5 private val HealthPollingSchedule: FiniteDuration = 1.second - def owner( - serverRole: ServerRole, - jdbcUrl: String, - maxConnections: Int, - connectionTimeout: FiniteDuration, - metrics: MetricRegistry, - connectionAsyncCommitMode: DbType.AsyncCommitMode = DbType.SynchronousCommit, - )(implicit loggingContext: LoggingContext): ResourceOwner[HikariJdbcConnectionProvider] = + def owner(dataSource: DataSource): ResourceOwner[JdbcConnectionProvider] = for { - // these connections should never time out as we have the same number of threads as connections - dataSource <- HikariConnection.owner( - serverRole, - jdbcUrl, - maxConnections, - maxConnections, - connectionTimeout, - Some(metrics), - connectionAsyncCommitMode, - ) healthPoller <- ResourceOwner.forTimer(() => - new Timer(s"${classOf[HikariJdbcConnectionProvider].getName}#healthPoller") + new Timer("DataSourceConnectionProvider#healthPoller") ) - } yield new HikariJdbcConnectionProvider(dataSource, healthPoller) + } yield { + val transientFailureCount = new AtomicInteger(0) + + val checkHealth = new TimerTask { + override def run(): Unit = { + try { + dataSource.getConnection().close() + transientFailureCount.set(0) + } catch { + case _: SQLTransientConnectionException => + val _ = transientFailureCount.incrementAndGet() + } + } + } + + healthPoller.schedule(checkHealth, 0, HealthPollingSchedule.toMillis) + + new JdbcConnectionProvider { + override def runSQL[T](databaseMetrics: DatabaseMetrics)(block: Connection => T): T = { + val conn = dataSource.getConnection() + conn.setAutoCommit(false) + try { + val res = Timed.value( + databaseMetrics.queryTimer, + block(conn), + ) + Timed.value( + databaseMetrics.commitTimer, + conn.commit(), + ) + res + } catch { + case e: SQLTransientConnectionException => + transientFailureCount.incrementAndGet() + throw e + case NonFatal(t) => + // Log the error in the caller with access to more logging context (such as the sql statement description) + conn.rollback() + throw t + } finally { + conn.close() + } + } + + override def currentHealth(): HealthStatus = + if (transientFailureCount.get() < MaxTransientFailureCount) + Healthy + else + Unhealthy + } + } } diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/JdbcLedgerDao.scala b/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/JdbcLedgerDao.scala index 1dca3a0db08d..8d30a4bf127b 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/JdbcLedgerDao.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/JdbcLedgerDao.scala @@ -857,17 +857,18 @@ private[platform] object JdbcLedgerDao { enricher: Option[ValueEnricher], participantId: Ref.ParticipantId, compressionStrategy: CompressionStrategy, - )(implicit loggingContext: LoggingContext): ResourceOwner[LedgerDao] = + )(implicit loggingContext: LoggingContext): ResourceOwner[LedgerDao] = { + val dbType = DbType.jdbcType(jdbcUrl) + val storageBackend = StorageBackend.of(dbType) for { dbDispatcher <- DbDispatcher.owner( + storageBackend.createDataSource(jdbcUrl), serverRole, jdbcUrl, connectionPoolSize, connectionTimeout, metrics, ) - dbType = DbType.jdbcType(jdbcUrl) - storageBackend = StorageBackend.of(dbType) } yield new JdbcLedgerDao( dbDispatcher, servicesExecutionContext, @@ -888,41 +889,7 @@ private[platform] object JdbcLedgerDao { participantId, storageBackend, ) - - // TODO H2 support -// object H2DatabaseQueries extends Queries { -// override protected[JdbcLedgerDao] val SQL_INSERT_COMMAND: String = -// """merge into participant_command_submissions pcs -// |using dual on deduplication_key = {deduplicationKey} -// |when not matched then -// | insert (deduplication_key, deduplicate_until) -// | values ({deduplicationKey}, {deduplicateUntil}) -// |when matched and pcs.deduplicate_until < {submittedAt} then -// | update set deduplicate_until={deduplicateUntil}""".stripMargin -// -// override protected[JdbcLedgerDao] val DUPLICATE_KEY_ERROR: String = -// "Unique index or primary key violation" -// -// override protected[JdbcLedgerDao] val SQL_TRUNCATE_TABLES: String = -// """set referential_integrity false; -// |truncate table configuration_entries; -// |truncate table package_entries; -// |truncate table parameters; -// |truncate table participant_command_completions; -// |truncate table participant_command_submissions; -// |truncate table participant_events; -// |truncate table participant_contracts; -// |truncate table participant_contract_witnesses; -// |truncate table parties; -// |truncate table party_entries; -// |set referential_integrity true; -// """.stripMargin -// -// /** H2 does not support asynchronous commits */ -// override protected[JdbcLedgerDao] def enforceSynchronousCommit(implicit -// conn: Connection -// ): Unit = () -// } + } val acceptType = "accept" val rejectType = "reject" From f9a6db5a5ee2727d4a178fb59330923ef14bc41c Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Sat, 10 Jul 2021 22:01:54 +0200 Subject: [PATCH 04/21] Adds DBLockStorageBackend - this is the abstraction and the implementation of database level locking - with support for Oracle and Postgres changelog_begin changelog_end --- .../store/backend/StorageBackend.scala | 29 ++++++++- .../store/backend/h2/H2StorageBackend.scala | 14 +++++ .../backend/oracle/OracleStorageBackend.scala | 60 ++++++++++++++++++- .../postgresql/PostgresStorageBackend.scala | 40 ++++++++++++- .../scala/testing/oracle/OracleAround.scala | 7 ++- 5 files changed, 146 insertions(+), 4 deletions(-) diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala index 07dc24659a60..26802869dc7a 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala @@ -46,7 +46,9 @@ trait StorageBackend[DB_BATCH] with DeduplicationStorageBackend with CompletionStorageBackend with ContractStorageBackend - with EventStorageBackend { + with EventStorageBackend + with DataSourceStorageBackend + with DBLockStorageBackend { def reset(connection: Connection): Unit def duplicateKeyError: String // TODO: Avoid brittleness of error message checks } @@ -269,6 +271,31 @@ object DataSourceStorageBackend { } } +trait DBLockStorageBackend { + def aquireImmediately( + lockId: DBLockStorageBackend.LockId, + lockMode: DBLockStorageBackend.LockMode, + )(connection: Connection): Option[DBLockStorageBackend.Lock] + + def release(lock: DBLockStorageBackend.Lock)(connection: Connection): Boolean + + def lock(id: Int): DBLockStorageBackend.LockId + + def dbLockSupported: Boolean +} + +object DBLockStorageBackend { + case class Lock(lockId: LockId, lockMode: LockMode) + + trait LockId + + trait LockMode + object LockMode { + case object Exclusive extends LockMode + case object Shared extends LockMode + } +} + object StorageBackend { case class Params(ledgerEnd: Offset, eventSeqId: Long) diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/h2/H2StorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/h2/H2StorageBackend.scala index 3070d276abb8..656ee36ad08c 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/h2/H2StorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/h2/H2StorageBackend.scala @@ -23,6 +23,7 @@ import com.daml.platform.store.backend.common.{ TemplatedStorageBackend, } import com.daml.platform.store.backend.{ + DBLockStorageBackend, DataSourceStorageBackend, DbDto, StorageBackend, @@ -200,4 +201,17 @@ private[backend] object H2StorageBackend InitHookDataSourceProxy(h2DataSource, connectionInitHook.toList) } + override def aquireImmediately( + lockId: DBLockStorageBackend.LockId, + lockMode: DBLockStorageBackend.LockMode, + )(connection: Connection): Option[DBLockStorageBackend.Lock] = + throw new UnsupportedOperationException("db level locks are not supported for H2") + + override def release(lock: DBLockStorageBackend.Lock)(connection: Connection): Boolean = + throw new UnsupportedOperationException("db level locks are not supported for H2") + + override def lock(id: Int): DBLockStorageBackend.LockId = + throw new UnsupportedOperationException("db level locks are not supported for H2") + + override def dbLockSupported: Boolean = false } diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala index d603b6293bc1..5f9cb7b26708 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala @@ -16,7 +16,7 @@ import com.daml.platform.store.backend.common.{ InitHookDataSourceProxy, TemplatedStorageBackend, } -import com.daml.platform.store.backend.{DataSourceStorageBackend, DbDto, StorageBackend, common} +import com.daml.platform.store.backend.{DBLockStorageBackend, DataSourceStorageBackend, DbDto, StorageBackend, common} import java.sql.Connection import java.time.Instant @@ -247,4 +247,62 @@ private[backend] object OracleStorageBackend oracleDataSource.setURL(jdbcUrl) InitHookDataSourceProxy(oracleDataSource, connectionInitHook.toList) } + + override def aquireImmediately( + lockId: DBLockStorageBackend.LockId, + lockMode: DBLockStorageBackend.LockMode, + )(connection: Connection): Option[DBLockStorageBackend.Lock] = { + val oracleLockMode = lockMode match { + case DBLockStorageBackend.LockMode.Exclusive => "6" // "DBMS_LOCK.x_mode" + case DBLockStorageBackend.LockMode.Shared => "4" // "DBMS_LOCK.s_mode" + } + SQL""" + SELECT DBMS_LOCK.REQUEST( + id => ${oracleIntLockId(lockId)}, + lockmode => #$oracleLockMode, + timeout => 0 + ) FROM DUAL""" + .as(get[Int](1).single)(connection) match { + case 0 => Some(DBLockStorageBackend.Lock(lockId, lockMode)) + case 1 => None + case 2 => throw new Exception("Aquiring lock caused a deadlock!") + case 3 => throw new Exception("Parameter error") + case 4 => Some(DBLockStorageBackend.Lock(lockId, lockMode)) + case 5 => throw new Exception("Illegal lock handle") + case unknown => throw new Exception(s"Invalid result from DBMS_LOCK.REQUEST: $unknown") + } + } + + override def release(lock: DBLockStorageBackend.Lock)(connection: Connection): Boolean = { + SQL""" + SELECT DBMS_LOCK.RELEASE( + id => ${oracleIntLockId(lock.lockId)} + ) FROM DUAL""" + .as(get[Int](1).single)(connection) match { + case 0 => true + case 3 => throw new Exception("Parameter error") + case 4 => throw new Exception("Trying to release not-owned lock") + case 5 => throw new Exception("Illegal lock handle") + case unknown => throw new Exception(s"Invalid result from DBMS_LOCK.RELEASE: $unknown") + } + } + + case class OracleLockId(id: Int) extends DBLockStorageBackend.LockId { + // respecting Oracle limitations: https://docs.oracle.com/cd/B19306_01/appdev.102/b14258/d_lock.htm#ARPLS021 + assert(id >= 0) + assert(id <= 1073741823) + } + + private def oracleIntLockId(lockId: DBLockStorageBackend.LockId): Int = + lockId match { + case OracleLockId(id) => id + case unknown => + throw new Exception( + s"LockId $unknown not supported. Probable cause: LockId was created by a different StorageBackend" + ) + } + + override def lock(id: Int): DBLockStorageBackend.LockId = OracleLockId(id) + + override def dbLockSupported: Boolean = true } diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala index 200e9757a78f..00ac91db62fc 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala @@ -22,7 +22,7 @@ import com.daml.platform.store.backend.common.{ InitHookDataSourceProxy, TemplatedStorageBackend, } -import com.daml.platform.store.backend.{DataSourceStorageBackend, DbDto, StorageBackend, common} +import com.daml.platform.store.backend.{DBLockStorageBackend, DataSourceStorageBackend, DbDto, StorageBackend, common} import javax.sql.DataSource import org.postgresql.ds.PGSimpleDataSource @@ -210,4 +210,42 @@ private[backend] object PostgresStorageBackend InitHookDataSourceProxy(pgSimpleDataSource, hookFunctions) } + override def aquireImmediately( + lockId: DBLockStorageBackend.LockId, + lockMode: DBLockStorageBackend.LockMode, + )(connection: Connection): Option[DBLockStorageBackend.Lock] = { + val lockFunction = lockMode match { + case DBLockStorageBackend.LockMode.Exclusive => "pg_try_advisory_lock" + case DBLockStorageBackend.LockMode.Shared => "pg_try_advisory_lock_shared" + } + SQL"SELECT #$lockFunction(${pgBigintLockId(lockId)})" + .as(get[Boolean](1).single)(connection) match { + case true => Some(DBLockStorageBackend.Lock(lockId, lockMode)) + case false => None + } + } + + override def release(lock: DBLockStorageBackend.Lock)(connection: Connection): Boolean = { + val lockFunction = lock.lockMode match { + case DBLockStorageBackend.LockMode.Exclusive => "pg_advisory_unlock" + case DBLockStorageBackend.LockMode.Shared => "pg_advisory_unlock_shared" + } + SQL"SELECT #$lockFunction(${pgBigintLockId(lock.lockId)})" + .as(get[Boolean](1).single)(connection) + } + + case class PGLockId(id: Long) extends DBLockStorageBackend.LockId + + private def pgBigintLockId(lockId: DBLockStorageBackend.LockId): Long = + lockId match { + case PGLockId(id) => id + case unknown => + throw new Exception( + s"LockId $unknown not supported. Probable cause: LockId was created by a different StorageBackend" + ) + } + + override def lock(id: Int): DBLockStorageBackend.LockId = PGLockId(id.toLong) + + override def dbLockSupported: Boolean = true } diff --git a/libs-scala/oracle-testing/src/main/scala/testing/oracle/OracleAround.scala b/libs-scala/oracle-testing/src/main/scala/testing/oracle/OracleAround.scala index 1c3c5bebb716..6bd684fccc6c 100644 --- a/libs-scala/oracle-testing/src/main/scala/testing/oracle/OracleAround.scala +++ b/libs-scala/oracle-testing/src/main/scala/testing/oracle/OracleAround.scala @@ -40,7 +40,7 @@ trait OracleAround { val con = use( DriverManager.getConnection( s"jdbc:oracle:thin:@localhost:$port/ORCLPDB1", - systemUser, + "sys as sysdba", // TODO this is needed for being able to grant the execute access for the sys.dbms_lock below. Consider making this configurable systemPwd, ) ) @@ -51,6 +51,11 @@ trait OracleAround { s"""grant create table, create materialized view, create view, create procedure, create sequence, create type to $name""" ) stmt.execute(s"""alter user $name quota unlimited on users""") + + // for DBMS_LOCK access + stmt.execute(s"""GRANT EXECUTE ON SYS.DBMS_LOCK TO $name""") + stmt.execute(s"""GRANT SELECT ON V_$$MYSTAT TO $name""") + stmt.execute(s"""GRANT SELECT ON V_$$LOCK TO $name""") }.get User(name, pwd) } From 11301164f38d8a05d53d7c9172bca3f2d8f57a60 Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Sat, 10 Jul 2021 22:03:37 +0200 Subject: [PATCH 05/21] Adds HaCoordinator and implementation - this is the core implementation of the Participant HA feature changelog_begin changelog_end --- .../platform/indexer/ha/HaCoordinator.scala | 167 +++++++++++++ .../platform/indexer/ha/PollingChecker.scala | 74 ++++++ .../indexer/ha/PreemptableSequence.scala | 233 ++++++++++++++++++ 3 files changed, 474 insertions(+) create mode 100644 ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala create mode 100644 ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PollingChecker.scala create mode 100644 ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PreemptableSequence.scala diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala new file mode 100644 index 000000000000..86affcdef836 --- /dev/null +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala @@ -0,0 +1,167 @@ +// Copyright (c) 2021 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +package com.daml.platform.indexer.ha + +import java.sql.Connection + +import akka.actor.Scheduler +import akka.stream.KillSwitch +import com.daml.logging.{ContextualizedLogger, LoggingContext} +import com.daml.platform.store.backend.DBLockStorageBackend.{Lock, LockId, LockMode} +import com.daml.platform.store.backend.DBLockStorageBackend +import javax.sql.DataSource + +import scala.concurrent.duration.Duration +import scala.concurrent.{Await, ExecutionContext, Future} + +/** A handle of a running program + * @param completed will complete as the program completes + * - if no KillSwitch used, + * - it will complete successfully as program successfully ends + * - it will complete with a failure as program failed + * - if KillSwitch aborted, this completes with the same Throwable + * - if KillSwitch shut down, this completes successfully + * As this completes, the program finished it's execution, also all resources released as well. + * @param killSwitch to signal abortion and shutdown + */ +case class Handle(completed: Future[Unit], killSwitch: KillSwitch) + +/** This functionality sign off a worker Connection, anc clears it for further usage. + * This only need to be done once at the beginning of the Connection life-cycle + * On any error an exception will be thrown + */ +trait SignConnection { + def sign(connection: Connection): Unit +} + +/** To add High Availability related features to a program + */ +trait HaCoordinator { + + /** Execute block in High Availability mode. + * Wraps around the Handle of the block. + * + * @param block HaCoordinator provides a SignConnection which need to be used for all database connections to do work in the block + * Future[Handle] embodies asynchronous initialisation of the block + * (e.g. not the actual work. That asynchronous execution completes with the completed Future of the Handle) + * @return the new Handle, which is available immediately to observe and interact with the complete program here + */ + def protectedBlock(block: SignConnection => Future[Handle]): Handle +} + +case class HaConfig( + mainLockAquireRetryMillis: Long = 500, + workerLockAquireRetryMillis: Long = 500, + workerLockAquireMaxRetry: Long = 1000, + mainLockCheckerPeriodMillis: Long = 1000, + indexerLockId: Int = 100, + indexerWorkerLockId: Int = 101, +) + +object HaCoordinator { + + private val logger = ContextualizedLogger.get(this.getClass) + + /** This implementation of the HaCoordinator + * - provides a database lock based isolation of the protected blocks + * - will run the block at-most once during the entire lifecycle + * - will wait infinitely to acquire the lock needed to start the protected block + * - provides a SignConnection function which is mandatory to execute on all worker connections inside of the block + * - will spawn a polling-daemon to observe continuous presence of the main lock + * + * @param dataSource to spawn the main connection which keeps the Indexer Lock + * @param storageBackend is the database-independent abstraction of session/connection level database locking + * @param executionContext which is use to execute initialisation, will do blocking/IO work, so dedicated execution context is recommended + */ + def databaseLockBasedHaCoordinator( + dataSource: DataSource, + storageBackend: DBLockStorageBackend, + executionContext: ExecutionContext, + scheduler: Scheduler, + haConfig: HaConfig, + )(implicit loggingContext: LoggingContext): HaCoordinator = { + implicit val ec: ExecutionContext = executionContext + + val indexerLockId = storageBackend.lock(haConfig.indexerLockId) + val indexerWorkerLockId = storageBackend.lock(haConfig.indexerWorkerLockId) + val preemptableSequence = PreemptableSequence(scheduler) + + asyncHandle => + def acquireLock(connection: Connection, lockId: LockId, lockMode: LockMode): Lock = { + logger.debug(s"Acquiring lock $lockId $lockMode") + storageBackend + .aquireImmediately(lockId, lockMode)(connection) + .getOrElse( + throw new Exception(s"Cannot acquire lock $lockId in lock-mode $lockMode: lock busy") + ) + } + + def acquireMainLock(connection: Connection): Unit = { + acquireLock(connection, indexerLockId, LockMode.Exclusive) + () + } + + preemptableSequence.executeSequence { sequenceHelper => + import sequenceHelper._ + logger.info("Starting databaseLockBasedHaCoordinator") + for { + mainConnection <- go[Connection](dataSource.getConnection) + _ = logger.info("Step 1: creating main-connection - DONE") + _ = registerRelease { + logger.info("Releasing main connection...") + mainConnection.close() + logger.info("Released main connection") + } + _ <- retry(haConfig.mainLockAquireRetryMillis)(acquireMainLock(mainConnection)) + _ = logger.info("Step 2: acquire exclusive Indexer Lock on main-connection - DONE") + exclusiveWorkerLock <- retry[Lock]( + haConfig.workerLockAquireRetryMillis, + haConfig.workerLockAquireMaxRetry, + )( + acquireLock(mainConnection, indexerWorkerLockId, LockMode.Exclusive) + ) + _ = logger.info("Step 3: acquire exclusive Indexer Worker Lock on main-connection - DONE") + _ <- go(storageBackend.release(exclusiveWorkerLock)(mainConnection)) + _ = logger.info("Step 4: release exclusive Indexer Worker Lock on main-connection - DONE") + mainLockChecker <- go[PollingChecker]( + new PollingChecker( + periodMillis = haConfig.mainLockCheckerPeriodMillis, + checkBlock = acquireMainLock(mainConnection), + killSwitch = + handle.killSwitch, // meaning: this PollingChecker will shut down the main preemptableSequence + ) + ) + _ = logger.info( + "Step 5: activate periodic checker of the exclusive Indexer Lock on the main connection - DONE" + ) + _ = registerRelease { + logger.info( + "Releasing periodic checker of the exclusive Indexer Lock on the main connection..." + ) + mainLockChecker.close() + logger.info( + "Released periodic checker of the exclusive Indexer Lock on the main connection" + ) + } + protectedHandle <- goF(asyncHandle(workerConnection => { + // this is the checking routine on connection creation + // step 1: acquire shared worker-lock + logger.info(s"Preparing worker connection. Step 1: acquire lock.") + acquireLock(workerConnection, indexerWorkerLockId, LockMode.Shared) + // step 2: check if main connection still holds the lock + logger.info(s"Preparing worker connection. Step 2: checking main lock.") + mainLockChecker.check() + logger.info(s"Preparing worker connection DONE.") + })) + _ = logger.info("Step 6: initialize protected block - DONE") + _ <- merge(protectedHandle) + } yield () + } + } +} + +object NoopHaCoordinator extends HaCoordinator { + override def protectedBlock(block: SignConnection => Future[Handle]): Handle = + Await.result(block(_ => ()), Duration.Inf) +} diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PollingChecker.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PollingChecker.scala new file mode 100644 index 000000000000..7a7e98de11e7 --- /dev/null +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PollingChecker.scala @@ -0,0 +1,74 @@ +// Copyright (c) 2021 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +package com.daml.platform.indexer.ha + +import java.util.{Timer, TimerTask} +import java.util.concurrent.atomic.AtomicBoolean + +import akka.stream.KillSwitch +import com.daml.logging.{ContextualizedLogger, LoggingContext} + +import scala.util.{Failure, Success, Try} + +class PollingChecker( + periodMillis: Long, + checkBlock: => Unit, + killSwitch: KillSwitch, +)(implicit loggingContext: LoggingContext) { + private val logger = ContextualizedLogger.get(this.getClass) + + private val timer = new Timer(true) + + private val lostMainConnectionEmulation = new AtomicBoolean(false) + + timer.scheduleAtFixedRate( + new TimerTask { + override def run(): Unit = { + Try(check()) + () + } + }, + periodMillis, + periodMillis, + ) + + // TODO uncomment this for main-connection-lost simulation + // timer.schedule( + // new TimerTask { + // override def run(): Unit = lostMainConnectionEmulation.set(true) + // }, + // 20000, + // ) + + // This is a cruel approach for ensuring single threaded usage of the mainConnection. + // In theory this could have been made much more efficient: not enqueueing for a check of it's own, + // but collecting requests, and replying in batches. + // Although experiments show approx 1s until a full connection pool is initialized at first + // (the peek scenario) which should be enough, and which can leave this code very simple. + def check(): Unit = synchronized { + logger.debug(s"Checking...") + Try(checkBlock) match { + case Success(_) if !lostMainConnectionEmulation.get => + logger.debug(s"Check successful.") + + case Success(_) => + logger.info( + s"Check failed due to lost-main-connection simulation. KillSwitch/abort called." + ) + killSwitch.abort( + new Exception( + "Check failed due to lost-main-connection simulation. KillSwitch/abort called." + ) + ) + throw new Exception("Check failed due to lost-main-connection simulation.") + + case Failure(ex) => + logger.info(s"Check failed (${ex.getMessage}). KillSwitch/abort called.") + killSwitch.abort(new Exception("check failed, killSwitch aborted", ex)) + throw ex + } + } + + def close(): Unit = timer.cancel() +} diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PreemptableSequence.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PreemptableSequence.scala new file mode 100644 index 000000000000..37621d536695 --- /dev/null +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PreemptableSequence.scala @@ -0,0 +1,233 @@ +// Copyright (c) 2021 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +package com.daml.platform.indexer.ha + +import java.util.concurrent.atomic.AtomicReference + +import akka.actor.Scheduler +import akka.stream.KillSwitch +import com.daml.logging.{ContextualizedLogger, LoggingContext} + +import scala.concurrent.duration.FiniteDuration +import scala.concurrent.{ExecutionContext, Future, Promise} +import scala.util.{Failure, Success} + +/** PreemptableSequence is a helper to + * - facilitate a Future sequence, which can be stopped or aborted + * - provide a Handle for the client + * - manage the state to implement the above + */ +trait PreemptableSequence { + + /** Execute the preemptable sequence + * + * @param sequence This Future sequence needs to be constructed with the help of the SequenceHelper functions. + * @return the Handle, to observe and to interact with the sequence. + * - The completion future will only complete as soon the sequence and all registered release functionality finished as well + * - The Handle is available immediately + */ + def executeSequence(sequence: SequenceHelper => Future[_]): Handle +} + +/** A collection of helper functions to compose a preemptable-sequence + */ +trait SequenceHelper { + + /** Register at any point in time a synchronous release function, + * which will be ensured to run before completion future of the handle completes. + * + * @param block the release lambda + */ + def registerRelease(block: => Unit): Unit + + /** Wrap a CBN (lazy) Future, so it is only started if the PreemptableSequence is not yet aborted/shut down. + * + * @param f The lazy Future block + * @return the wrapped future + */ + def goF[T](f: => Future[T]): Future[T] + + /** Wrap a CBN (lazy) synchronous function in a Future, which is only started if the PreemptableSequence is not yet aborted/shut down. + * + * @param t The lazy synchronous block + * @return the wrapped future + */ + def go[T](t: => T): Future[T] + + /** Wrap a synchronous block into a Future sequence, which + * - will be preemptable + * - will retry to execute a block if Exception-s thrown + * + * @return the preemptable, retrying Future sequence + */ + def retry[T](waitMillisBetweenRetries: Long, maxAmountOfRetries: Long = -1)( + block: => T + ): Future[T] + + /** Delegate the preemptable-future sequence to another Handle + * - the completion Future future of the PreemptableSequence will only finish after this Hanlde finishes, + * and previously registered release functions all completed + * - KillSwitch events will be replayed to this handle + * - In case of abort/shutdown the PreemptableSequence's completion result will conform to the KillSwitch usage, + * not to the completion of this handle (although it will wait for it naturally) + * + * @param handle The handle to delegate to + * @return the completion of the Handle + */ + def merge(handle: Handle): Future[Unit] + + /** The handle of the PreemprableSequence. This handle is available for sequence construction as well. + * @return the Handle + */ + def handle: Handle +} + +// these family of KillSwitch-es enable the behavior of recording the usage of the KillSwitch +// - Shutdown always wins: in scenarios like multiple abort and then a shutdown will always capture a shutdown, +// even if additional aborts arrive after the shutdown. This is needed so that graceful-shutdown can stop possible +// recovery scenarios. +// - Always the last abort wins. +trait UsedKillSwitch extends KillSwitch { + override def shutdown(): Unit = () + override def abort(ex: Throwable): Unit = () +} +case object ShutDownKillSwitch extends UsedKillSwitch +case class AbortedKillSwitch(ex: Throwable, _myReference: AtomicReference[KillSwitch]) + extends CaptureKillSwitch(_myReference) +class CaptureKillSwitch(myReference: AtomicReference[KillSwitch]) extends KillSwitch { + override def shutdown(): Unit = myReference.set(ShutDownKillSwitch) + override def abort(ex: Throwable): Unit = myReference.set(AbortedKillSwitch(ex, myReference)) +} + +object PreemptableSequence { + private val logger = ContextualizedLogger.get(this.getClass) + + /** @param executionContext this execution context will be used to: + * - execute future transformations + * - and encapsulate synchronous work in futures (this could be possibly blocking) + * Because of the possible blocking nature a dedicated pool is recommended. + */ + def apply(scheduler: Scheduler)(implicit + executionContext: ExecutionContext, + loggingContext: LoggingContext, + ): PreemptableSequence = { sequence => + val delegateKillSwitch = new AtomicReference[Option[KillSwitch]](None) + val resultCompleted = Promise[Unit]() + val mutableKillSwitch = new AtomicReference[KillSwitch]() + mutableKillSwitch.set(new CaptureKillSwitch(mutableKillSwitch)) + val resultKillSwitch = new KillSwitch { + override def shutdown(): Unit = { + logger.info("Shutdown called for PreemptableSequence!") + mutableKillSwitch.get().shutdown() + delegateKillSwitch.get().foreach { ks => + logger.info("Shutdown call delegated!") + ks.shutdown() + } + } + + override def abort(ex: Throwable): Unit = { + logger.info(s"Abort called for PreemptableSequence! (${ex.getMessage})") + mutableKillSwitch.get().abort(ex) + delegateKillSwitch.get().foreach { ks => + logger.info(s"Abort call delegated! (${ex.getMessage})") + ks.abort(ex) + } + } + } + val resultHandle = Handle(resultCompleted.future, resultKillSwitch) + var releaseStack: List[() => Future[Unit]] = Nil + + val helper: SequenceHelper = new SequenceHelper { + private def waitFor(delayMillis: Long): Future[Unit] = + goF(akka.pattern.after(FiniteDuration(delayMillis, "millis"), scheduler)(Future.unit)) + + override def registerRelease(block: => Unit): Unit = synchronized { + logger.info(s"Registered release function") + releaseStack = (() => Future(block)) :: releaseStack + } + + override def goF[T](f: => Future[T]): Future[T] = + mutableKillSwitch.get() match { + case _: UsedKillSwitch => + // Failing Future here means we interrupt the Future sequencing. + // The failure itself is not important, since the returning Handle-s completion-future-s result is overridden in case KillSwitch was used. + logger.info(s"KillSwitch already used, interrupting sequence!") + Future.failed(new Exception("UsedKillSwitch")) + + case _ => + f + } + + override def go[T](t: => T): Future[T] = goF[T](Future(t)) + + override def retry[T](waitMillisBetweenRetries: Long, maxAmountOfRetries: Long)( + block: => T + ): Future[T] = + go(block).transformWith { + // since we check countdown to 0, starting from negative means unlimited retries + case Failure(ex) if maxAmountOfRetries == 0 => + logger.info( + s"Maximum amount of retries reached (${maxAmountOfRetries}) failing permanently. (${ex.getMessage})" + ) + Future.failed(ex) + case Success(t) => Future.successful(t) + case Failure(ex) => + logger.debug(s"Retrying (retires left: ${if (maxAmountOfRetries < 0) "unlimited" + else maxAmountOfRetries - 1}). Due to: ${ex.getMessage}") + waitFor(waitMillisBetweenRetries).flatMap(_ => + // Note: this recursion is out of stack + retry(waitMillisBetweenRetries, maxAmountOfRetries - 1)(block) + ) + } + + override def merge(handle: Handle): Future[Unit] = { + logger.info(s"Delegating KillSwitch upon merge.") + delegateKillSwitch.set(Some(handle.killSwitch)) + // for safety reasons. if between creation of that killSwitch and delegation there was a usage, we replay that after delegation (worst case multiple calls) + mutableKillSwitch.get() match { + case ShutDownKillSwitch => + logger.info(s"Replying ShutDown after merge.") + handle.killSwitch.shutdown() + case AbortedKillSwitch(ex, _) => + logger.info(s"Replaying abort (${ex.getMessage}) after merge.") + handle.killSwitch.abort(ex) + case _ => () + } + val result = handle.completed + // not strictly needed for this use case, but in theory multiple preemptable stages are possible after each other + // this is needed to remove the delegation of the killSwitch after stage is complete + result.onComplete(_ => delegateKillSwitch.set(None)) + result + } + + override def handle: Handle = resultHandle + } + + def release: Future[Unit] = synchronized { + releaseStack match { + case Nil => Future.unit + case x :: xs => + releaseStack = xs + x().transformWith(_ => release) + } + } + + sequence(helper).transformWith(fResult => release.transform(_ => fResult)).onComplete { + case Success(_) => + mutableKillSwitch.get() match { + case ShutDownKillSwitch => resultCompleted.success(()) + case AbortedKillSwitch(ex, _) => resultCompleted.failure(ex) + case _ => resultCompleted.success(()) + } + case Failure(ex) => + mutableKillSwitch.get() match { + case ShutDownKillSwitch => resultCompleted.success(()) + case AbortedKillSwitch(ex, _) => resultCompleted.failure(ex) + case _ => resultCompleted.failure(ex) + } + } + + resultHandle + } +} From b0259c85b0e1c9955f6fe0706890f764755f73e0 Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Sat, 10 Jul 2021 22:04:38 +0200 Subject: [PATCH 06/21] Wiring of HaCoordinator in parallel indexer changelog_begin changelog_end --- .../platform/indexer/IndexerConfig.scala | 3 + .../scala/platform/indexer/JdbcIndexer.scala | 17 +++ .../parallel/ParallelIndexerFactory.scala | 135 +++++++++++------- 3 files changed, 106 insertions(+), 49 deletions(-) diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/IndexerConfig.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/IndexerConfig.scala index c005f8d42e2a..d06d2c82eebc 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/IndexerConfig.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/IndexerConfig.scala @@ -6,6 +6,7 @@ package com.daml.platform.indexer import com.daml.lf.data.Ref import com.daml.platform.configuration.IndexConfiguration import com.daml.platform.indexer.IndexerConfig._ +import com.daml.platform.indexer.ha.HaConfig import com.daml.platform.store.DbType import scala.concurrent.duration.{DurationInt, FiniteDuration} @@ -23,6 +24,7 @@ case class IndexerConfig( allowExistingSchema: Boolean = false, // TODO append-only: remove after removing support for the current (mutating) schema enableAppendOnlySchema: Boolean = false, + // TODO append-only: this is now configuring only the append-only pool asyncCommitMode: DbType.AsyncCommitMode = DefaultAsyncCommitMode, maxInputBufferSize: Int = DefaultMaxInputBufferSize, inputMappingParallelism: Int = DefaultInputMappingParallelism, @@ -32,6 +34,7 @@ case class IndexerConfig( tailingRateLimitPerSecond: Int = DefaultTailingRateLimitPerSecond, batchWithinMillis: Long = DefaultBatchWithinMillis, enableCompression: Boolean = DefaultEnableCompression, + haConfig: HaConfig = HaConfig(), ) object IndexerConfig { diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/JdbcIndexer.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/JdbcIndexer.scala index 906461336486..bad905e5100b 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/JdbcIndexer.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/JdbcIndexer.scala @@ -21,7 +21,16 @@ import com.daml.platform.common import com.daml.platform.common.MismatchException import com.daml.platform.configuration.ServerRole import com.daml.platform.indexer.parallel.ParallelIndexerFactory +import com.daml.platform.store.DbType.{ + AsynchronousCommit, + LocalSynchronousCommit, + SynchronousCommit, +} import com.daml.platform.store.appendonlydao.events.{CompressionStrategy, LfValueTranslation} +import com.daml.platform.store.backend.DataSourceStorageBackend.{ + DataSourceConfig, + PgSynchronousCommitValue, +} import com.daml.platform.store.backend.StorageBackend import com.daml.platform.store.dao.LedgerDao import com.daml.platform.store.{DbType, FlywayMigrations, LfValueTranslationCache} @@ -171,6 +180,14 @@ object JdbcIndexer { tailingRateLimitPerSecond = config.tailingRateLimitPerSecond, batchWithinMillis = config.batchWithinMillis, metrics = metrics, + dataSourceConfig = DataSourceConfig( + pgSynchronousCommit = Some(config.asyncCommitMode match { + case SynchronousCommit => PgSynchronousCommitValue.On + case AsynchronousCommit => PgSynchronousCommitValue.Off + case LocalSynchronousCommit => PgSynchronousCommitValue.Local + }) + ), + haConfig = config.haConfig, ) } diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala index c219cbbc68cf..f450ea5ea9a3 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala @@ -4,7 +4,7 @@ package com.daml.platform.indexer.parallel import java.sql.Connection -import java.util.concurrent.TimeUnit +import java.util.concurrent.{Executors, TimeUnit} import akka.NotUsed import akka.stream.scaladsl.{Keep, Sink, Source} @@ -18,22 +18,24 @@ import com.daml.logging.LoggingContext.{withEnrichedLoggingContext, withEnriched import com.daml.logging.{ContextualizedLogger, LoggingContext} import com.daml.metrics.{InstrumentedSource, Metrics} import com.daml.platform.configuration.ServerRole +import com.daml.platform.indexer.ha.HaConfig +import com.daml.platform.indexer.ha.{HaCoordinator, Handle, NoopHaCoordinator} import com.daml.platform.indexer.parallel.AsyncSupport._ import com.daml.platform.indexer.{IndexFeedHandle, Indexer} import com.daml.platform.store.appendonlydao.DbDispatcher import com.daml.platform.store.appendonlydao.events.{CompressionStrategy, LfValueTranslation} +import com.daml.platform.store.backend +import com.daml.platform.store.backend.DataSourceStorageBackend.DataSourceConfig import com.daml.platform.store.backend.{DbDto, StorageBackend} -import com.daml.platform.store.{DbType, backend} import com.daml.resources +import com.google.common.util.concurrent.ThreadFactoryBuilder -import scala.concurrent.Future import scala.concurrent.duration.FiniteDuration +import scala.concurrent.{ExecutionContext, Future, Promise} import scala.util.control.NonFatal object ParallelIndexerFactory { - private val keepAliveMaxIdleDuration = FiniteDuration(200, "millis") - private val logger = ContextualizedLogger.get(this.getClass) def apply[DB_BATCH]( @@ -51,6 +53,8 @@ object ParallelIndexerFactory { tailingRateLimitPerSecond: Int, batchWithinMillis: Long, metrics: Metrics, + dataSourceConfig: DataSourceConfig, + haConfig: HaConfig, )(implicit loggingContext: LoggingContext): ResourceOwner[Indexer] = { for { inputMapperExecutor <- asyncPool( @@ -63,26 +67,37 @@ object ParallelIndexerFactory { "batching-pool", Some(metrics.daml.parallelIndexer.batching.executor -> metrics.registry), ) - dbDispatcher <- DbDispatcher - .owner( - serverRole = ServerRole.Indexer, - jdbcUrl = jdbcUrl, - connectionPoolSize = ingestionParallelism + 1, // + 1 for the tailing ledger_end updates - connectionTimeout = FiniteDuration( - 250, - "millis", - ), // 250 millis is the lowest possible value for this Hikari configuration (see HikariConfig JavaDoc) - metrics = metrics, - connectionAsyncCommitMode = DbType.AsynchronousCommit, - ) toDbDto = backend.UpdateToDbDto( participantId = participantId, translation = translation, compressionStrategy = compressionStrategy, ) + haCoordinator <- + if (storageBackend.dbLockSupported) // TODO feature flag comes here + ResourceOwner + .forExecutorService(() => + ExecutionContext.fromExecutorService( + Executors.newFixedThreadPool( + 1, + new ThreadFactoryBuilder().setNameFormat(s"ha-coordinator-%d").build, + ) + ) + ) + .map( + HaCoordinator.databaseLockBasedHaCoordinator( + dataSource = storageBackend.createDataSource(jdbcUrl), + storageBackend = storageBackend, + _, + scheduler = mat.system.scheduler, + haConfig = haConfig, + ) + ) + else + ResourceOwner.successful(NoopHaCoordinator) } yield { - val ingest: Long => Source[(Offset, Update), NotUsed] => Source[Unit, NotUsed] = - initialSeqId => + val ingest + : (Long, DbDispatcher) => Source[(Offset, Update), NotUsed] => Source[Unit, NotUsed] = + (initialSeqId, dbDispatcher) => source => BatchingParallelIngestionPipe( submissionBatchSize = submissionBatchSize, @@ -108,31 +123,56 @@ object ParallelIndexerFactory { .map(_ -> System.nanoTime()) ) .map(_ => ()) - .keepAlive( - keepAliveMaxIdleDuration, - () => - if (dbDispatcher.currentHealth() == HealthStatus.healthy) { - logger.debug("Indexer keep-alive: database connectivity OK") - () - } else { - logger - .warn("Indexer keep-alive: database connectivity lost. Stopping indexing.") - throw new Exception( - "Connectivity issue to the index-database detected. Stopping indexing." - ) - }, - ) - def subscribe(readService: ReadService): Future[Source[Unit, NotUsed]] = - dbDispatcher - .executeSql(metrics.daml.parallelIndexer.initialization)(storageBackend.initialize) - .map(initialized => - ingest(initialized.lastEventSeqId.getOrElse(0L))( - readService.stateUpdates(beginAfter = initialized.lastOffset) + def subscribe(resourceContext: ResourceContext)(readService: ReadService): Handle = { + implicit val rc: ResourceContext = resourceContext + implicit val ec: ExecutionContext = resourceContext.executionContext + implicit val matImplicit: Materializer = mat + haCoordinator.protectedBlock { signConnection => + val killSwitchCaptor = Promise[KillSwitch]() + + val completionFuture = DbDispatcher + .owner( + dataSource = storageBackend.createDataSource( + jdbcUrl = jdbcUrl, + dataSourceConfig = dataSourceConfig, + connectionInitHook = Some(signConnection.sign), + ), + serverRole = ServerRole.Indexer, + jdbcUrl = jdbcUrl, + connectionPoolSize = + ingestionParallelism + 1, // + 1 for the tailing ledger_end updates + connectionTimeout = FiniteDuration( + 250, + "millis", + ), // 250 millis is the lowest possible value for this Hikari configuration (see HikariConfig JavaDoc) + metrics = metrics, ) - )(scala.concurrent.ExecutionContext.global) + .use { dbDispatcher => + dbDispatcher + .executeSql(metrics.daml.parallelIndexer.initialization)(storageBackend.initialize) + .flatMap { initialized => + val (killSwitch, completionFuture) = + ingest(initialized.lastEventSeqId.getOrElse(0L), dbDispatcher)( + readService.stateUpdates(beginAfter = initialized.lastOffset) + ) + .viaMat(KillSwitches.single)(Keep.right[NotUsed, UniqueKillSwitch]) + .toMat(Sink.ignore)(Keep.both) + .run() + // the tricky bit: + // the future in the completion handler will be this one + // but the future for signaling for the HaCoordinator, that the protected block is initialized needs to complete precisely here + killSwitchCaptor.success(killSwitch) + completionFuture + } + } + + killSwitchCaptor.future + .map(Handle(completionFuture.map(_ => ()), _)) + } + } - toIndexer(subscribe)(mat) + toIndexer(subscribe) } } @@ -298,20 +338,17 @@ object ParallelIndexerFactory { } def toIndexer( - ingestionPipeOn: ReadService => Future[Source[Unit, NotUsed]] - )(implicit mat: Materializer): Indexer = + ingestionPipeOn: ResourceContext => ReadService => Handle + ): Indexer = readService => new ResourceOwner[IndexFeedHandle] { override def acquire()(implicit context: ResourceContext ): resources.Resource[ResourceContext, IndexFeedHandle] = { Resource { - ingestionPipeOn(readService).map { pipe => - val (killSwitch, completionFuture) = pipe - .viaMat(KillSwitches.single)(Keep.right[NotUsed, UniqueKillSwitch]) - .toMat(Sink.ignore)(Keep.both) - .run() - new SubscriptionIndexFeedHandle(killSwitch, completionFuture.map(_ => ())) + Future { + val handle = ingestionPipeOn(context)(readService) + new SubscriptionIndexFeedHandle(handle.killSwitch, handle.completed) } } { handle => handle.killSwitch.shutdown() From ae4dabcec6df3ae6d59a2ee93fc1c66bc844710d Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Sat, 10 Jul 2021 22:05:12 +0200 Subject: [PATCH 07/21] Temporal log-level settings to see HA related logs changelog_begin changelog_end --- .../ledger/participant/state/kvutils/app/logback.base.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ledger/participant-state/kvutils/app/src/main/resources/com/daml/ledger/participant/state/kvutils/app/logback.base.xml b/ledger/participant-state/kvutils/app/src/main/resources/com/daml/ledger/participant/state/kvutils/app/logback.base.xml index 204db12d08ee..8cdd44878a0d 100644 --- a/ledger/participant-state/kvutils/app/src/main/resources/com/daml/ledger/participant/state/kvutils/app/logback.base.xml +++ b/ledger/participant-state/kvutils/app/src/main/resources/com/daml/ledger/participant/state/kvutils/app/logback.base.xml @@ -20,6 +20,10 @@ + + + + From 1b340c36b8eb8943cbca9b31142a8cd3cf2528fb Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Tue, 13 Jul 2021 01:04:14 +0200 Subject: [PATCH 08/21] Changes as per review * Pulling out and fixing concurrency issue with KillSwitchCaptor * Fixing typos, doc * Renames * Fixes logging changelog_begin changelog_end --- .../platform/indexer/ha/HaCoordinator.scala | 48 ++++++----- .../indexer/ha/KillSwitchCaptor.scala | 67 +++++++++++++++ .../platform/indexer/ha/PollingChecker.scala | 34 +++++--- .../indexer/ha/PreemptableSequence.scala | 85 +++++++------------ .../parallel/ParallelIndexerFactory.scala | 6 +- .../store/backend/StorageBackend.scala | 2 +- .../store/backend/h2/H2StorageBackend.scala | 2 +- .../backend/oracle/OracleStorageBackend.scala | 14 +-- .../postgresql/PostgresStorageBackend.scala | 2 +- .../state/kvutils/app/logback.base.xml | 2 +- 10 files changed, 163 insertions(+), 99 deletions(-) create mode 100644 ledger/participant-integration-api/src/main/scala/platform/indexer/ha/KillSwitchCaptor.scala diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala index 86affcdef836..c99d876e2f8b 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala @@ -16,38 +16,42 @@ import scala.concurrent.duration.Duration import scala.concurrent.{Await, ExecutionContext, Future} /** A handle of a running program - * @param completed will complete as the program completes + * @param completed will complete right after the program completed * - if no KillSwitch used, * - it will complete successfully as program successfully ends * - it will complete with a failure as program failed * - if KillSwitch aborted, this completes with the same Throwable * - if KillSwitch shut down, this completes successfully - * As this completes, the program finished it's execution, also all resources released as well. + * After signalling completion, the program finished it's execution and has released all resources it acquired. * @param killSwitch to signal abortion and shutdown */ case class Handle(completed: Future[Unit], killSwitch: KillSwitch) -/** This functionality sign off a worker Connection, anc clears it for further usage. +/** This functionality initializes a worker Connection, and clears it for further usage. * This only need to be done once at the beginning of the Connection life-cycle * On any error an exception will be thrown */ -trait SignConnection { - def sign(connection: Connection): Unit +trait ConnectionInitializer { + def initialize(connection: Connection): Unit } -/** To add High Availability related features to a program +/** To add High Availability related features to a program, which intends to use database-connections to do it's work. + * Features include: + * - Safety: mutual exclusion of these programs ensured by DB locking mechanisms + * - Availability: release of the exclusion is detected by idle programs, which start competing for the lock to do + * their work. */ trait HaCoordinator { - /** Execute block in High Availability mode. - * Wraps around the Handle of the block. + /** Execute in High Availability mode. + * Wraps around the Handle of the execution. * - * @param block HaCoordinator provides a SignConnection which need to be used for all database connections to do work in the block - * Future[Handle] embodies asynchronous initialisation of the block - * (e.g. not the actual work. That asynchronous execution completes with the completed Future of the Handle) + * @param initializeExecution HaCoordinator provides a ConnectionInitializer which need to be used for all database connections during execution + * Future[Handle] embodies asynchronous initialisation of the execution + * (e.g. not the actual work. That asynchronous execution completes with the completed Future of the Handle) * @return the new Handle, which is available immediately to observe and interact with the complete program here */ - def protectedBlock(block: SignConnection => Future[Handle]): Handle + def protectedExecution(initializeExecution: ConnectionInitializer => Future[Handle]): Handle } case class HaConfig( @@ -64,10 +68,10 @@ object HaCoordinator { private val logger = ContextualizedLogger.get(this.getClass) /** This implementation of the HaCoordinator - * - provides a database lock based isolation of the protected blocks - * - will run the block at-most once during the entire lifecycle - * - will wait infinitely to acquire the lock needed to start the protected block - * - provides a SignConnection function which is mandatory to execute on all worker connections inside of the block + * - provides a database lock based isolation of the protected executions + * - will run the execution at-most once during the entire lifecycle + * - will wait infinitely to acquire the lock needed to start the protected execution + * - provides a ConnectionInitializer function which is mandatory to execute on all worker connections during execution * - will spawn a polling-daemon to observe continuous presence of the main lock * * @param dataSource to spawn the main connection which keeps the Indexer Lock @@ -91,7 +95,7 @@ object HaCoordinator { def acquireLock(connection: Connection, lockId: LockId, lockMode: LockMode): Lock = { logger.debug(s"Acquiring lock $lockId $lockMode") storageBackend - .aquireImmediately(lockId, lockMode)(connection) + .tryAcquire(lockId, lockMode)(connection) .getOrElse( throw new Exception(s"Cannot acquire lock $lockId in lock-mode $lockMode: lock busy") ) @@ -127,7 +131,7 @@ object HaCoordinator { mainLockChecker <- go[PollingChecker]( new PollingChecker( periodMillis = haConfig.mainLockCheckerPeriodMillis, - checkBlock = acquireMainLock(mainConnection), + check = acquireMainLock(mainConnection), killSwitch = handle.killSwitch, // meaning: this PollingChecker will shut down the main preemptableSequence ) @@ -154,7 +158,7 @@ object HaCoordinator { mainLockChecker.check() logger.info(s"Preparing worker connection DONE.") })) - _ = logger.info("Step 6: initialize protected block - DONE") + _ = logger.info("Step 6: initialize protected execution - DONE") _ <- merge(protectedHandle) } yield () } @@ -162,6 +166,8 @@ object HaCoordinator { } object NoopHaCoordinator extends HaCoordinator { - override def protectedBlock(block: SignConnection => Future[Handle]): Handle = - Await.result(block(_ => ()), Duration.Inf) + override def protectedExecution( + initializeExecution: ConnectionInitializer => Future[Handle] + ): Handle = + Await.result(initializeExecution(_ => ()), Duration.Inf) } diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/KillSwitchCaptor.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/KillSwitchCaptor.scala new file mode 100644 index 000000000000..a2133d53f71b --- /dev/null +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/KillSwitchCaptor.scala @@ -0,0 +1,67 @@ +// Copyright (c) 2021 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +package com.daml.platform.indexer.ha + +import java.util.concurrent.atomic.AtomicReference + +import akka.stream.KillSwitch +import com.daml.logging.{ContextualizedLogger, LoggingContext} + +/** A KillSwitch which captures it's usage + * - Shutdown always wins + * - From aborts, the last abort wins + * - With setDelegate() we can set a delegate KillSwitch, which to usage will be replayed + * - Captured state is available with state + */ +class KillSwitchCaptor(implicit loggingContext: LoggingContext) extends KillSwitch { + import KillSwitchCaptor._ + import State._ + + private val logger = ContextualizedLogger.get(this.getClass) + + private val _state = new AtomicReference[State](Unused) + private val _delegate = new AtomicReference[Option[KillSwitch]](None) + + private def updateState(newState: Used): Unit = { + _state.getAndAccumulate( + newState, + { + case (Shutdown, _) => Shutdown + case (_, used) => used + }, + ) + () + } + + override def shutdown(): Unit = { + logger.info("Shutdown called!") + updateState(Shutdown) + _delegate.get.foreach { ks => + logger.info("Shutdown call delegated!") + ks.shutdown() + } + } + + override def abort(ex: Throwable): Unit = { + logger.info(s"Abort called! (${ex.getMessage})") + updateState(Aborted(ex)) + _delegate.get.foreach { ks => + logger.info(s"Abort call delegated! (${ex.getMessage})") + ks.abort(ex) + } + } + + def state: State = _state.get() + def setDelegate(delegate: Option[KillSwitch]): Unit = _delegate.set(delegate) +} + +object KillSwitchCaptor { + sealed trait State + object State { + case object Unused extends State + sealed trait Used extends State + case object Shutdown extends Used + final case class Aborted(t: Throwable) extends Used + } +} diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PollingChecker.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PollingChecker.scala index 7a7e98de11e7..df87a7958819 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PollingChecker.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PollingChecker.scala @@ -11,18 +11,30 @@ import com.daml.logging.{ContextualizedLogger, LoggingContext} import scala.util.{Failure, Success, Try} +/** A simple host of checking. + * - This will ensure that check is accessed by only one caller at a time + * - Does periodic checking + * - Exposes check() for on-demand checking from the outside + * - If whatever check() fails, it uses killSwitch with an abort + * - It is also an AutoCloseable to release internal resources + * + * @param periodMillis period of the checking, between each scheduled checks there will be so much delay + * @param check the check function, Exception signals failed check + * @param killSwitch to abort if a check fails + */ class PollingChecker( periodMillis: Long, - checkBlock: => Unit, + check: => Unit, killSwitch: KillSwitch, -)(implicit loggingContext: LoggingContext) { +)(implicit loggingContext: LoggingContext) + extends AutoCloseable { private val logger = ContextualizedLogger.get(this.getClass) private val timer = new Timer(true) private val lostMainConnectionEmulation = new AtomicBoolean(false) - timer.scheduleAtFixedRate( + timer.schedule( new TimerTask { override def run(): Unit = { Try(check()) @@ -34,21 +46,21 @@ class PollingChecker( ) // TODO uncomment this for main-connection-lost simulation - // timer.schedule( - // new TimerTask { - // override def run(): Unit = lostMainConnectionEmulation.set(true) - // }, - // 20000, - // ) + timer.schedule( + new TimerTask { + override def run(): Unit = lostMainConnectionEmulation.set(true) + }, + 20000, + ) // This is a cruel approach for ensuring single threaded usage of the mainConnection. // In theory this could have been made much more efficient: not enqueueing for a check of it's own, // but collecting requests, and replying in batches. // Although experiments show approx 1s until a full connection pool is initialized at first - // (the peek scenario) which should be enough, and which can leave this code very simple. + // (the peak scenario) which should be enough, and which can leave this code very simple. def check(): Unit = synchronized { logger.debug(s"Checking...") - Try(checkBlock) match { + Try(check) match { case Success(_) if !lostMainConnectionEmulation.get => logger.debug(s"Check successful.") diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PreemptableSequence.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PreemptableSequence.scala index 37621d536695..855fc1f7a359 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PreemptableSequence.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PreemptableSequence.scala @@ -14,7 +14,7 @@ import scala.concurrent.{ExecutionContext, Future, Promise} import scala.util.{Failure, Success} /** PreemptableSequence is a helper to - * - facilitate a Future sequence, which can be stopped or aborted + * - facilitate the execution of a sequence of Futures, which can be stopped or aborted * - provide a Handle for the client * - manage the state to implement the above */ @@ -35,34 +35,34 @@ trait PreemptableSequence { trait SequenceHelper { /** Register at any point in time a synchronous release function, - * which will be ensured to run before completion future of the handle completes. + * which will be ensured to run before the completion future of the handle completes. * - * @param block the release lambda + * @param release the release lambda */ - def registerRelease(block: => Unit): Unit + def registerRelease(release: => Unit): Unit /** Wrap a CBN (lazy) Future, so it is only started if the PreemptableSequence is not yet aborted/shut down. * - * @param f The lazy Future block + * @param f The lazy Future * @return the wrapped future */ def goF[T](f: => Future[T]): Future[T] /** Wrap a CBN (lazy) synchronous function in a Future, which is only started if the PreemptableSequence is not yet aborted/shut down. * - * @param t The lazy synchronous block + * @param body The lazy synchronous body * @return the wrapped future */ - def go[T](t: => T): Future[T] + def go[T](body: => T): Future[T] - /** Wrap a synchronous block into a Future sequence, which + /** Wrap a synchronous call into a Future sequence, which * - will be preemptable - * - will retry to execute a block if Exception-s thrown + * - will retry to execute the body if Exception-s thrown * * @return the preemptable, retrying Future sequence */ def retry[T](waitMillisBetweenRetries: Long, maxAmountOfRetries: Long = -1)( - block: => T + body: => T ): Future[T] /** Delegate the preemptable-future sequence to another Handle @@ -77,7 +77,7 @@ trait SequenceHelper { */ def merge(handle: Handle): Future[Unit] - /** The handle of the PreemprableSequence. This handle is available for sequence construction as well. + /** The handle of the PreemptableSequence. This handle is available for sequence construction as well. * @return the Handle */ def handle: Handle @@ -112,44 +112,23 @@ object PreemptableSequence { executionContext: ExecutionContext, loggingContext: LoggingContext, ): PreemptableSequence = { sequence => - val delegateKillSwitch = new AtomicReference[Option[KillSwitch]](None) val resultCompleted = Promise[Unit]() - val mutableKillSwitch = new AtomicReference[KillSwitch]() - mutableKillSwitch.set(new CaptureKillSwitch(mutableKillSwitch)) - val resultKillSwitch = new KillSwitch { - override def shutdown(): Unit = { - logger.info("Shutdown called for PreemptableSequence!") - mutableKillSwitch.get().shutdown() - delegateKillSwitch.get().foreach { ks => - logger.info("Shutdown call delegated!") - ks.shutdown() - } - } - - override def abort(ex: Throwable): Unit = { - logger.info(s"Abort called for PreemptableSequence! (${ex.getMessage})") - mutableKillSwitch.get().abort(ex) - delegateKillSwitch.get().foreach { ks => - logger.info(s"Abort call delegated! (${ex.getMessage})") - ks.abort(ex) - } - } - } - val resultHandle = Handle(resultCompleted.future, resultKillSwitch) + val killSwitchCaptor = new KillSwitchCaptor + val resultHandle = Handle(resultCompleted.future, killSwitchCaptor) var releaseStack: List[() => Future[Unit]] = Nil val helper: SequenceHelper = new SequenceHelper { private def waitFor(delayMillis: Long): Future[Unit] = goF(akka.pattern.after(FiniteDuration(delayMillis, "millis"), scheduler)(Future.unit)) - override def registerRelease(block: => Unit): Unit = synchronized { + override def registerRelease(release: => Unit): Unit = synchronized { logger.info(s"Registered release function") - releaseStack = (() => Future(block)) :: releaseStack + releaseStack = (() => Future(release)) :: releaseStack } override def goF[T](f: => Future[T]): Future[T] = - mutableKillSwitch.get() match { - case _: UsedKillSwitch => + killSwitchCaptor.state match { + case _: KillSwitchCaptor.State.Used => // Failing Future here means we interrupt the Future sequencing. // The failure itself is not important, since the returning Handle-s completion-future-s result is overridden in case KillSwitch was used. logger.info(s"KillSwitch already used, interrupting sequence!") @@ -159,12 +138,12 @@ object PreemptableSequence { f } - override def go[T](t: => T): Future[T] = goF[T](Future(t)) + override def go[T](body: => T): Future[T] = goF[T](Future(body)) override def retry[T](waitMillisBetweenRetries: Long, maxAmountOfRetries: Long)( - block: => T + body: => T ): Future[T] = - go(block).transformWith { + go(body).transformWith { // since we check countdown to 0, starting from negative means unlimited retries case Failure(ex) if maxAmountOfRetries == 0 => logger.info( @@ -177,19 +156,19 @@ object PreemptableSequence { else maxAmountOfRetries - 1}). Due to: ${ex.getMessage}") waitFor(waitMillisBetweenRetries).flatMap(_ => // Note: this recursion is out of stack - retry(waitMillisBetweenRetries, maxAmountOfRetries - 1)(block) + retry(waitMillisBetweenRetries, maxAmountOfRetries - 1)(body) ) } override def merge(handle: Handle): Future[Unit] = { logger.info(s"Delegating KillSwitch upon merge.") - delegateKillSwitch.set(Some(handle.killSwitch)) + killSwitchCaptor.setDelegate(Some(handle.killSwitch)) // for safety reasons. if between creation of that killSwitch and delegation there was a usage, we replay that after delegation (worst case multiple calls) - mutableKillSwitch.get() match { - case ShutDownKillSwitch => + killSwitchCaptor.state match { + case KillSwitchCaptor.State.Shutdown => logger.info(s"Replying ShutDown after merge.") handle.killSwitch.shutdown() - case AbortedKillSwitch(ex, _) => + case KillSwitchCaptor.State.Aborted(ex) => logger.info(s"Replaying abort (${ex.getMessage}) after merge.") handle.killSwitch.abort(ex) case _ => () @@ -197,7 +176,7 @@ object PreemptableSequence { val result = handle.completed // not strictly needed for this use case, but in theory multiple preemptable stages are possible after each other // this is needed to remove the delegation of the killSwitch after stage is complete - result.onComplete(_ => delegateKillSwitch.set(None)) + result.onComplete(_ => killSwitchCaptor.setDelegate(None)) result } @@ -215,15 +194,15 @@ object PreemptableSequence { sequence(helper).transformWith(fResult => release.transform(_ => fResult)).onComplete { case Success(_) => - mutableKillSwitch.get() match { - case ShutDownKillSwitch => resultCompleted.success(()) - case AbortedKillSwitch(ex, _) => resultCompleted.failure(ex) + killSwitchCaptor.state match { + case KillSwitchCaptor.State.Shutdown => resultCompleted.success(()) + case KillSwitchCaptor.State.Aborted(ex) => resultCompleted.failure(ex) case _ => resultCompleted.success(()) } case Failure(ex) => - mutableKillSwitch.get() match { - case ShutDownKillSwitch => resultCompleted.success(()) - case AbortedKillSwitch(ex, _) => resultCompleted.failure(ex) + killSwitchCaptor.state match { + case KillSwitchCaptor.State.Shutdown => resultCompleted.success(()) + case KillSwitchCaptor.State.Aborted(ex) => resultCompleted.failure(ex) case _ => resultCompleted.failure(ex) } } diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala index f450ea5ea9a3..6e8c3e925467 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala @@ -128,7 +128,7 @@ object ParallelIndexerFactory { implicit val rc: ResourceContext = resourceContext implicit val ec: ExecutionContext = resourceContext.executionContext implicit val matImplicit: Materializer = mat - haCoordinator.protectedBlock { signConnection => + haCoordinator.protectedExecution { connectionInitializer => val killSwitchCaptor = Promise[KillSwitch]() val completionFuture = DbDispatcher @@ -136,7 +136,7 @@ object ParallelIndexerFactory { dataSource = storageBackend.createDataSource( jdbcUrl = jdbcUrl, dataSourceConfig = dataSourceConfig, - connectionInitHook = Some(signConnection.sign), + connectionInitHook = Some(connectionInitializer.initialize), ), serverRole = ServerRole.Indexer, jdbcUrl = jdbcUrl, @@ -161,7 +161,7 @@ object ParallelIndexerFactory { .run() // the tricky bit: // the future in the completion handler will be this one - // but the future for signaling for the HaCoordinator, that the protected block is initialized needs to complete precisely here + // but the future for signaling for the HaCoordinator, that the protected execution is initialized needs to complete precisely here killSwitchCaptor.success(killSwitch) completionFuture } diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala index 26802869dc7a..443863e8fbdd 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala @@ -272,7 +272,7 @@ object DataSourceStorageBackend { } trait DBLockStorageBackend { - def aquireImmediately( + def tryAcquire( lockId: DBLockStorageBackend.LockId, lockMode: DBLockStorageBackend.LockMode, )(connection: Connection): Option[DBLockStorageBackend.Lock] diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/h2/H2StorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/h2/H2StorageBackend.scala index 656ee36ad08c..572c012ae93c 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/h2/H2StorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/h2/H2StorageBackend.scala @@ -201,7 +201,7 @@ private[backend] object H2StorageBackend InitHookDataSourceProxy(h2DataSource, connectionInitHook.toList) } - override def aquireImmediately( + override def tryAcquire( lockId: DBLockStorageBackend.LockId, lockMode: DBLockStorageBackend.LockMode, )(connection: Connection): Option[DBLockStorageBackend.Lock] = diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala index 5f9cb7b26708..6a7e8a42bd25 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala @@ -248,7 +248,7 @@ private[backend] object OracleStorageBackend InitHookDataSourceProxy(oracleDataSource, connectionInitHook.toList) } - override def aquireImmediately( + override def tryAcquire( lockId: DBLockStorageBackend.LockId, lockMode: DBLockStorageBackend.LockMode, )(connection: Connection): Option[DBLockStorageBackend.Lock] = { @@ -265,10 +265,10 @@ private[backend] object OracleStorageBackend .as(get[Int](1).single)(connection) match { case 0 => Some(DBLockStorageBackend.Lock(lockId, lockMode)) case 1 => None - case 2 => throw new Exception("Aquiring lock caused a deadlock!") - case 3 => throw new Exception("Parameter error") + case 2 => throw new Exception("Oracle DB Error 2: Acquiring lock caused a deadlock!") + case 3 => throw new Exception("Oracle DB Error 3: Parameter error as acquiring lock") case 4 => Some(DBLockStorageBackend.Lock(lockId, lockMode)) - case 5 => throw new Exception("Illegal lock handle") + case 5 => throw new Exception("Oracle DB Error 5: Illegal lock handle as acquiring lock") case unknown => throw new Exception(s"Invalid result from DBMS_LOCK.REQUEST: $unknown") } } @@ -280,9 +280,9 @@ private[backend] object OracleStorageBackend ) FROM DUAL""" .as(get[Int](1).single)(connection) match { case 0 => true - case 3 => throw new Exception("Parameter error") - case 4 => throw new Exception("Trying to release not-owned lock") - case 5 => throw new Exception("Illegal lock handle") + case 3 => throw new Exception("Oracle DB Error 3: Parameter error as releasing lock") + case 4 => throw new Exception("Oracle DB Error 4: Trying to release not-owned lock") + case 5 => throw new Exception("Oracle DB Error 5: Illegal lock handle as releasing lock") case unknown => throw new Exception(s"Invalid result from DBMS_LOCK.RELEASE: $unknown") } } diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala index 00ac91db62fc..14408fa7ae09 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala @@ -210,7 +210,7 @@ private[backend] object PostgresStorageBackend InitHookDataSourceProxy(pgSimpleDataSource, hookFunctions) } - override def aquireImmediately( + override def tryAcquire( lockId: DBLockStorageBackend.LockId, lockMode: DBLockStorageBackend.LockMode, )(connection: Connection): Option[DBLockStorageBackend.Lock] = { diff --git a/ledger/participant-state/kvutils/app/src/main/resources/com/daml/ledger/participant/state/kvutils/app/logback.base.xml b/ledger/participant-state/kvutils/app/src/main/resources/com/daml/ledger/participant/state/kvutils/app/logback.base.xml index 8cdd44878a0d..1770839dfc0a 100644 --- a/ledger/participant-state/kvutils/app/src/main/resources/com/daml/ledger/participant/state/kvutils/app/logback.base.xml +++ b/ledger/participant-state/kvutils/app/src/main/resources/com/daml/ledger/participant/state/kvutils/app/logback.base.xml @@ -22,7 +22,7 @@ - + From f96cf32366019d9172adf204ca76653f196702a6 Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Tue, 13 Jul 2021 01:18:29 +0200 Subject: [PATCH 09/21] Some formatting changes after rebase changelog_begin changelog_end --- .../store/backend/oracle/OracleStorageBackend.scala | 8 +++++++- .../store/backend/postgresql/PostgresStorageBackend.scala | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala index 6a7e8a42bd25..0eb622ea677d 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala @@ -16,7 +16,13 @@ import com.daml.platform.store.backend.common.{ InitHookDataSourceProxy, TemplatedStorageBackend, } -import com.daml.platform.store.backend.{DBLockStorageBackend, DataSourceStorageBackend, DbDto, StorageBackend, common} +import com.daml.platform.store.backend.{ + DBLockStorageBackend, + DataSourceStorageBackend, + DbDto, + StorageBackend, + common, +} import java.sql.Connection import java.time.Instant diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala index 14408fa7ae09..55e7e9adc4c6 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala @@ -22,7 +22,13 @@ import com.daml.platform.store.backend.common.{ InitHookDataSourceProxy, TemplatedStorageBackend, } -import com.daml.platform.store.backend.{DBLockStorageBackend, DataSourceStorageBackend, DbDto, StorageBackend, common} +import com.daml.platform.store.backend.{ + DBLockStorageBackend, + DataSourceStorageBackend, + DbDto, + StorageBackend, + common, +} import javax.sql.DataSource import org.postgresql.ds.PGSimpleDataSource From 0e2969e730e6a3e91e5bd2e65aa4f8882c4b3119 Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Tue, 13 Jul 2021 01:56:31 +0200 Subject: [PATCH 10/21] Preparation for merge * Adds feature flag * Removes TODOs * Removes extra logging * Remove main-connection-lost simulation changelog_begin changelog_end --- .../platform/indexer/IndexerConfig.scala | 1 - .../platform/indexer/ha/HaCoordinator.scala | 1 + .../platform/indexer/ha/PollingChecker.scala | 24 +-- .../indexer/ha/PreemptableSequence.scala | 2 +- .../parallel/ParallelIndexerFactory.scala | 2 +- .../common/InitHookDataSourceProxy.scala | 160 +----------------- .../state/kvutils/app/logback.base.xml | 4 - .../state/kvutils/app/Config.scala | 11 ++ .../state/kvutils/app/LedgerFactory.scala | 1 + .../app/ParticipantIndexerConfig.scala | 2 + 10 files changed, 24 insertions(+), 184 deletions(-) diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/IndexerConfig.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/IndexerConfig.scala index d06d2c82eebc..75d2bc828d99 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/IndexerConfig.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/IndexerConfig.scala @@ -24,7 +24,6 @@ case class IndexerConfig( allowExistingSchema: Boolean = false, // TODO append-only: remove after removing support for the current (mutating) schema enableAppendOnlySchema: Boolean = false, - // TODO append-only: this is now configuring only the append-only pool asyncCommitMode: DbType.AsyncCommitMode = DefaultAsyncCommitMode, maxInputBufferSize: Int = DefaultMaxInputBufferSize, inputMappingParallelism: Int = DefaultInputMappingParallelism, diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala index c99d876e2f8b..1379924da98c 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala @@ -61,6 +61,7 @@ case class HaConfig( mainLockCheckerPeriodMillis: Long = 1000, indexerLockId: Int = 100, indexerWorkerLockId: Int = 101, + enable: Boolean = false, // TODO ha: remove as stable ) object HaCoordinator { diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PollingChecker.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PollingChecker.scala index df87a7958819..d743d9647b8f 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PollingChecker.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PollingChecker.scala @@ -4,7 +4,6 @@ package com.daml.platform.indexer.ha import java.util.{Timer, TimerTask} -import java.util.concurrent.atomic.AtomicBoolean import akka.stream.KillSwitch import com.daml.logging.{ContextualizedLogger, LoggingContext} @@ -32,8 +31,6 @@ class PollingChecker( private val timer = new Timer(true) - private val lostMainConnectionEmulation = new AtomicBoolean(false) - timer.schedule( new TimerTask { override def run(): Unit = { @@ -45,14 +42,6 @@ class PollingChecker( periodMillis, ) - // TODO uncomment this for main-connection-lost simulation - timer.schedule( - new TimerTask { - override def run(): Unit = lostMainConnectionEmulation.set(true) - }, - 20000, - ) - // This is a cruel approach for ensuring single threaded usage of the mainConnection. // In theory this could have been made much more efficient: not enqueueing for a check of it's own, // but collecting requests, and replying in batches. @@ -61,19 +50,8 @@ class PollingChecker( def check(): Unit = synchronized { logger.debug(s"Checking...") Try(check) match { - case Success(_) if !lostMainConnectionEmulation.get => - logger.debug(s"Check successful.") - case Success(_) => - logger.info( - s"Check failed due to lost-main-connection simulation. KillSwitch/abort called." - ) - killSwitch.abort( - new Exception( - "Check failed due to lost-main-connection simulation. KillSwitch/abort called." - ) - ) - throw new Exception("Check failed due to lost-main-connection simulation.") + logger.debug(s"Check successful.") case Failure(ex) => logger.info(s"Check failed (${ex.getMessage}). KillSwitch/abort called.") diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PreemptableSequence.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PreemptableSequence.scala index 855fc1f7a359..62699ff34ff2 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PreemptableSequence.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PreemptableSequence.scala @@ -147,7 +147,7 @@ object PreemptableSequence { // since we check countdown to 0, starting from negative means unlimited retries case Failure(ex) if maxAmountOfRetries == 0 => logger.info( - s"Maximum amount of retries reached (${maxAmountOfRetries}) failing permanently. (${ex.getMessage})" + s"Maximum amount of retries reached ($maxAmountOfRetries) failing permanently. (${ex.getMessage})" ) Future.failed(ex) case Success(t) => Future.successful(t) diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala index 6e8c3e925467..803eb72059b4 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala @@ -73,7 +73,7 @@ object ParallelIndexerFactory { compressionStrategy = compressionStrategy, ) haCoordinator <- - if (storageBackend.dbLockSupported) // TODO feature flag comes here + if (storageBackend.dbLockSupported && haConfig.enable) ResourceOwner .forExecutorService(() => ExecutionContext.fromExecutorService( diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/common/InitHookDataSourceProxy.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/common/InitHookDataSourceProxy.scala index 81ac987fec10..d55a5a1d96a9 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/common/InitHookDataSourceProxy.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/common/InitHookDataSourceProxy.scala @@ -4,24 +4,8 @@ package com.daml.platform.store.backend.common import java.io.PrintWriter -import java.sql.{ - Blob, - CallableStatement, - Clob, - Connection, - DatabaseMetaData, - NClob, - PreparedStatement, - SQLWarning, - SQLXML, - Savepoint, - Statement, - Struct, -} -import java.util.Properties -import java.util.concurrent.Executor +import java.sql.Connection import java.util.logging.Logger -import java.{sql, util} import com.daml.logging.{ContextualizedLogger, LoggingContext} import javax.sql.DataSource @@ -45,20 +29,18 @@ private[backend] case class InitHookDataSourceProxy( )(implicit loggingContext: LoggingContext) extends DataSource { override def getConnection: Connection = { - val connectionId = - List.fill(8)(scala.util.Random.nextPrintableChar()).mkString // TODO FIXME maybe not needed - logger.debug(s"$connectionId Creating new connection") + logger.debug(s"Creating new connection") val connection = delegate.getConnection try { - logger.debug(s"$connectionId Applying connection init hook") + logger.debug(s"Applying connection init hook") initHook(connection) } catch { case t: Throwable => - logger.info(s"$connectionId Init hook execution failed: ${t.getMessage}") + logger.info(s"Init hook execution failed: ${t.getMessage}") throw t } - logger.info(s"$connectionId Init hook execution finished successfully, connection ready") - new LoggingConnectionProxy(connection, connectionId) + logger.info(s"Init hook execution finished successfully, connection ready") + connection } override def getConnection(s: String, s1: String): Connection = { @@ -81,133 +63,3 @@ private[backend] case class InitHookDataSourceProxy( override def isWrapperFor(aClass: Class[_]): Boolean = delegate.isWrapperFor(aClass) } - -// TODO consider to remove this is only for logging the closures of connections -private[backend] class LoggingConnectionProxy( - delegate: Connection, - connectionId: String, -)(implicit loggingContext: LoggingContext) - extends Connection { - override def createStatement(): Statement = delegate.createStatement() - - override def prepareStatement(s: String): PreparedStatement = delegate.prepareStatement(s) - - override def prepareCall(s: String): CallableStatement = delegate.prepareCall(s) - - override def nativeSQL(s: String): String = delegate.nativeSQL(s) - - override def setAutoCommit(b: Boolean): Unit = delegate.setAutoCommit(b) - - override def getAutoCommit: Boolean = delegate.getAutoCommit - - override def commit(): Unit = delegate.commit() - - override def rollback(): Unit = delegate.rollback() - - override def close(): Unit = { - logger.info(s"$connectionId Connection is closing") - delegate.close() - logger.info(s"$connectionId Connection is closed") - } - - override def isClosed: Boolean = delegate.isClosed - - override def getMetaData: DatabaseMetaData = delegate.getMetaData - - override def setReadOnly(b: Boolean): Unit = delegate.setReadOnly(b) - - override def isReadOnly: Boolean = delegate.isReadOnly - - override def setCatalog(s: String): Unit = delegate.setCatalog(s) - - override def getCatalog: String = delegate.getCatalog - - override def setTransactionIsolation(i: Int): Unit = delegate.setTransactionIsolation(i) - - override def getTransactionIsolation: Int = delegate.getTransactionIsolation - - override def getWarnings: SQLWarning = delegate.getWarnings - - override def clearWarnings(): Unit = delegate.clearWarnings() - - override def createStatement(i: Int, i1: Int): Statement = delegate.createStatement(i, i1) - - override def prepareStatement(s: String, i: Int, i1: Int): PreparedStatement = - delegate.prepareStatement(s, i, i1) - - override def prepareCall(s: String, i: Int, i1: Int): CallableStatement = - delegate.prepareCall(s, i, i1) - - override def getTypeMap: util.Map[String, Class[_]] = delegate.getTypeMap - - override def setTypeMap(map: util.Map[String, Class[_]]): Unit = delegate.setTypeMap(map) - - override def setHoldability(i: Int): Unit = delegate.setHoldability(i) - - override def getHoldability: Int = delegate.getHoldability - - override def setSavepoint(): Savepoint = delegate.setSavepoint() - - override def setSavepoint(s: String): Savepoint = delegate.setSavepoint(s) - - override def rollback(savepoint: Savepoint): Unit = delegate.rollback(savepoint) - - override def releaseSavepoint(savepoint: Savepoint): Unit = delegate.releaseSavepoint(savepoint) - - override def createStatement(i: Int, i1: Int, i2: Int): Statement = - delegate.createStatement(i, i1, i2) - - override def prepareStatement(s: String, i: Int, i1: Int, i2: Int): PreparedStatement = - delegate.prepareStatement(s, i, i1, i2) - - override def prepareCall(s: String, i: Int, i1: Int, i2: Int): CallableStatement = - delegate.prepareCall(s, i, i1, i2) - - override def prepareStatement(s: String, i: Int): PreparedStatement = - delegate.prepareStatement(s, i) - - override def prepareStatement(s: String, ints: Array[Int]): PreparedStatement = - delegate.prepareStatement(s, ints) - - override def prepareStatement(s: String, strings: Array[String]): PreparedStatement = - delegate.prepareStatement(s, strings) - - override def createClob(): Clob = delegate.createClob() - - override def createBlob(): Blob = delegate.createBlob() - - override def createNClob(): NClob = delegate.createNClob() - - override def createSQLXML(): SQLXML = delegate.createSQLXML() - - override def isValid(i: Int): Boolean = delegate.isValid(i) - - override def setClientInfo(s: String, s1: String): Unit = delegate.setClientInfo(s, s1) - - override def setClientInfo(properties: Properties): Unit = delegate.setClientInfo(properties) - - override def getClientInfo(s: String): String = delegate.getClientInfo(s) - - override def getClientInfo: Properties = delegate.getClientInfo - - override def createArrayOf(s: String, objects: Array[AnyRef]): sql.Array = - delegate.createArrayOf(s, objects) - - override def createStruct(s: String, objects: Array[AnyRef]): Struct = - delegate.createStruct(s, objects) - - override def setSchema(s: String): Unit = delegate.setSchema(s) - - override def getSchema: String = delegate.getSchema - - override def abort(executor: Executor): Unit = delegate.abort(executor) - - override def setNetworkTimeout(executor: Executor, i: Int): Unit = - delegate.setNetworkTimeout(executor, i) - - override def getNetworkTimeout: Int = delegate.getNetworkTimeout - - override def unwrap[T](aClass: Class[T]): T = delegate.unwrap(aClass) - - override def isWrapperFor(aClass: Class[_]): Boolean = delegate.isWrapperFor(aClass) -} diff --git a/ledger/participant-state/kvutils/app/src/main/resources/com/daml/ledger/participant/state/kvutils/app/logback.base.xml b/ledger/participant-state/kvutils/app/src/main/resources/com/daml/ledger/participant/state/kvutils/app/logback.base.xml index 1770839dfc0a..204db12d08ee 100644 --- a/ledger/participant-state/kvutils/app/src/main/resources/com/daml/ledger/participant/state/kvutils/app/logback.base.xml +++ b/ledger/participant-state/kvutils/app/src/main/resources/com/daml/ledger/participant/state/kvutils/app/logback.base.xml @@ -20,10 +20,6 @@ - - - - diff --git a/ledger/participant-state/kvutils/app/src/main/scala/com/daml/ledger/participant/state/kvutils/app/Config.scala b/ledger/participant-state/kvutils/app/src/main/scala/com/daml/ledger/participant/state/kvutils/app/Config.scala index dd947d1e1a3b..47b453339205 100644 --- a/ledger/participant-state/kvutils/app/src/main/scala/com/daml/ledger/participant/state/kvutils/app/Config.scala +++ b/ledger/participant-state/kvutils/app/src/main/scala/com/daml/ledger/participant/state/kvutils/app/Config.scala @@ -44,6 +44,7 @@ final case class Config[Extra]( enableAppendOnlySchema: Boolean, // TODO append-only: remove after removing support for the current (mutating) schema enableMutableContractStateCache: Boolean, enableInMemoryFanOutForLedgerApi: Boolean, + enableHa: Boolean, // TODO ha: remove after stable extra: Extra, ) { def withTlsConfig(modify: TlsConfiguration => TlsConfiguration): Config[Extra] = @@ -76,6 +77,7 @@ object Config { enableAppendOnlySchema = false, enableMutableContractStateCache = false, enableInMemoryFanOutForLedgerApi = false, + enableHa = false, extra = extra, ) @@ -535,6 +537,15 @@ object Config { ) else success ) + + // TODO ha: remove after stable + opt[Unit]("index-ha-unsafe") + .optional() + .hidden() + .text( + s"Use the experimental High Availability feature with the indexer. Should not be used in production." + ) + .action((_, config) => config.copy(enableHa = true)) } extraOptions(parser) parser diff --git a/ledger/participant-state/kvutils/app/src/main/scala/com/daml/ledger/participant/state/kvutils/app/LedgerFactory.scala b/ledger/participant-state/kvutils/app/src/main/scala/com/daml/ledger/participant/state/kvutils/app/LedgerFactory.scala index 9f859b5e94bb..6a0823b76b97 100644 --- a/ledger/participant-state/kvutils/app/src/main/scala/com/daml/ledger/participant/state/kvutils/app/LedgerFactory.scala +++ b/ledger/participant-state/kvutils/app/src/main/scala/com/daml/ledger/participant/state/kvutils/app/LedgerFactory.scala @@ -51,6 +51,7 @@ trait ConfigProvider[ExtraConfig] { tailingRateLimitPerSecond = participantConfig.indexerConfig.tailingRateLimitPerSecond, batchWithinMillis = participantConfig.indexerConfig.batchWithinMillis, enableCompression = participantConfig.indexerConfig.enableCompression, + haConfig = participantConfig.indexerConfig.haConfig.copy(enable = config.enableHa), ) def apiServerConfig( diff --git a/ledger/participant-state/kvutils/app/src/main/scala/com/daml/ledger/participant/state/kvutils/app/ParticipantIndexerConfig.scala b/ledger/participant-state/kvutils/app/src/main/scala/com/daml/ledger/participant/state/kvutils/app/ParticipantIndexerConfig.scala index 1761790f118f..458d4f11a1b8 100644 --- a/ledger/participant-state/kvutils/app/src/main/scala/com/daml/ledger/participant/state/kvutils/app/ParticipantIndexerConfig.scala +++ b/ledger/participant-state/kvutils/app/src/main/scala/com/daml/ledger/participant/state/kvutils/app/ParticipantIndexerConfig.scala @@ -4,6 +4,7 @@ package com.daml.ledger.participant.state.kvutils.app import com.daml.platform.indexer.IndexerConfig +import com.daml.platform.indexer.ha.HaConfig import scala.concurrent.duration.FiniteDuration @@ -25,6 +26,7 @@ final case class ParticipantIndexerConfig( tailingRateLimitPerSecond: Int = ParticipantIndexerConfig.DefaultTailingRateLimitPerSecond, batchWithinMillis: Long = ParticipantIndexerConfig.DefaultBatchWithinMillis, enableCompression: Boolean = ParticipantIndexerConfig.DefaultEnableCompression, + haConfig: HaConfig = HaConfig(), ) object ParticipantIndexerConfig { From 53b1b1bdfc21017ae7ca45236f1d9133ddffc851 Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Tue, 13 Jul 2021 11:40:27 +0200 Subject: [PATCH 11/21] Removes some dead code changelog_begin changelog_end --- .../indexer/ha/PreemptableSequence.scala | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PreemptableSequence.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PreemptableSequence.scala index 62699ff34ff2..eb4cd7b9b578 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PreemptableSequence.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PreemptableSequence.scala @@ -3,10 +3,7 @@ package com.daml.platform.indexer.ha -import java.util.concurrent.atomic.AtomicReference - import akka.actor.Scheduler -import akka.stream.KillSwitch import com.daml.logging.{ContextualizedLogger, LoggingContext} import scala.concurrent.duration.FiniteDuration @@ -83,23 +80,6 @@ trait SequenceHelper { def handle: Handle } -// these family of KillSwitch-es enable the behavior of recording the usage of the KillSwitch -// - Shutdown always wins: in scenarios like multiple abort and then a shutdown will always capture a shutdown, -// even if additional aborts arrive after the shutdown. This is needed so that graceful-shutdown can stop possible -// recovery scenarios. -// - Always the last abort wins. -trait UsedKillSwitch extends KillSwitch { - override def shutdown(): Unit = () - override def abort(ex: Throwable): Unit = () -} -case object ShutDownKillSwitch extends UsedKillSwitch -case class AbortedKillSwitch(ex: Throwable, _myReference: AtomicReference[KillSwitch]) - extends CaptureKillSwitch(_myReference) -class CaptureKillSwitch(myReference: AtomicReference[KillSwitch]) extends KillSwitch { - override def shutdown(): Unit = myReference.set(ShutDownKillSwitch) - override def abort(ex: Throwable): Unit = myReference.set(AbortedKillSwitch(ex, myReference)) -} - object PreemptableSequence { private val logger = ContextualizedLogger.get(this.getClass) From 0916d476dcf61eabad29946bd48bc570f9a74bd7 Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Tue, 13 Jul 2021 22:12:33 +0200 Subject: [PATCH 12/21] Minor changes based on review changelog_begin changelog_end --- .../scala/platform/indexer/ha/HaCoordinator.scala | 2 +- .../platform/indexer/ha/KillSwitchCaptor.scala | 13 ++++++++----- .../scala/platform/indexer/ha/PollingChecker.scala | 6 +++--- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala index 1379924da98c..bb4ef4f2696c 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala @@ -132,7 +132,7 @@ object HaCoordinator { mainLockChecker <- go[PollingChecker]( new PollingChecker( periodMillis = haConfig.mainLockCheckerPeriodMillis, - check = acquireMainLock(mainConnection), + checkBody = acquireMainLock(mainConnection), killSwitch = handle.killSwitch, // meaning: this PollingChecker will shut down the main preemptableSequence ) diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/KillSwitchCaptor.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/KillSwitchCaptor.scala index a2133d53f71b..ec7decb83cda 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/KillSwitchCaptor.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/KillSwitchCaptor.scala @@ -8,11 +8,14 @@ import java.util.concurrent.atomic.AtomicReference import akka.stream.KillSwitch import com.daml.logging.{ContextualizedLogger, LoggingContext} -/** A KillSwitch which captures it's usage - * - Shutdown always wins - * - From aborts, the last abort wins - * - With setDelegate() we can set a delegate KillSwitch, which to usage will be replayed - * - Captured state is available with state +/** This KillSwitch captures it's usage in it's internal state, which can be queried. + * Captured state is available with the 'state' method. + * + * Rules of state transitions: + * - Shutdown is always the final state + * - From multiple aborts, the last abort wins + * + * With setDelegate() we can set a delegate KillSwitch, which to usage will be replayed */ class KillSwitchCaptor(implicit loggingContext: LoggingContext) extends KillSwitch { import KillSwitchCaptor._ diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PollingChecker.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PollingChecker.scala index d743d9647b8f..a793a59f9eb5 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PollingChecker.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PollingChecker.scala @@ -18,12 +18,12 @@ import scala.util.{Failure, Success, Try} * - It is also an AutoCloseable to release internal resources * * @param periodMillis period of the checking, between each scheduled checks there will be so much delay - * @param check the check function, Exception signals failed check + * @param checkBody the check function, Exception signals failed check * @param killSwitch to abort if a check fails */ class PollingChecker( periodMillis: Long, - check: => Unit, + checkBody: => Unit, killSwitch: KillSwitch, )(implicit loggingContext: LoggingContext) extends AutoCloseable { @@ -49,7 +49,7 @@ class PollingChecker( // (the peak scenario) which should be enough, and which can leave this code very simple. def check(): Unit = synchronized { logger.debug(s"Checking...") - Try(check) match { + Try(checkBody) match { case Success(_) => logger.debug(s"Check successful.") From 14ac0414b09b1c36e258e80f7551d87ba88d40e6 Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Tue, 13 Jul 2021 23:13:46 +0200 Subject: [PATCH 13/21] Rollback removal of the keepAlive safety guard for async commit We need this extra level of safety until HA is behind feature flag We can remove this part as soon as HA is mandatory, since HA coordination takes care of this aspect implicitly, and more precisely (aspect: detecting loss of connection to db) changelog_begin changelog_end --- .../indexer/parallel/ParallelIndexerFactory.scala | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala index 803eb72059b4..22a57b8e4727 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala @@ -36,6 +36,8 @@ import scala.util.control.NonFatal object ParallelIndexerFactory { + private val keepAliveMaxIdleDuration = FiniteDuration(200, "millis") + private val logger = ContextualizedLogger.get(this.getClass) def apply[DB_BATCH]( @@ -123,6 +125,19 @@ object ParallelIndexerFactory { .map(_ -> System.nanoTime()) ) .map(_ => ()) + .keepAlive( // TODO ha: remove as stable. This keepAlive approach was introduced for safety with async commit. This is still needed until HA is mandatory for Postgres to ensure safety with async commit. + keepAliveMaxIdleDuration, + () => + if (dbDispatcher.currentHealth() == HealthStatus.healthy) { + logger.debug("Indexer keep-alive: database connectivity OK") + } else { + logger + .warn("Indexer keep-alive: database connectivity lost. Stopping indexing.") + throw new Exception( + "Connectivity issue to the index-database detected. Stopping indexing." + ) + }, + ) def subscribe(resourceContext: ResourceContext)(readService: ReadService): Handle = { implicit val rc: ResourceContext = resourceContext From 4573b91e2322382e8f4aae70fe84662f66a83b5c Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Wed, 14 Jul 2021 23:00:32 +0200 Subject: [PATCH 14/21] More changes as per review * Increase log level for a potentially transient log message * Adds logging to the second getConnection method in InitHookDataSourceProxy * Rename changelog_begin changelog_end --- .../store/appendonlydao/DbDispatcher.scala | 4 ++-- .../common/InitHookDataSourceProxy.scala | 17 +++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/DbDispatcher.scala b/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/DbDispatcher.scala index 53d5e7178782..9c6d6fe60a05 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/DbDispatcher.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/DbDispatcher.scala @@ -90,7 +90,7 @@ private[platform] object DbDispatcher { metrics: Metrics, )(implicit loggingContext: LoggingContext): ResourceOwner[DbDispatcher] = for { - hikariConnectionPool <- new HikariDataSourceOwner( + hikariDataSource <- new HikariDataSourceOwner( dataSource, serverRole, jdbcUrl, @@ -99,7 +99,7 @@ private[platform] object DbDispatcher { connectionTimeout, Some(metrics.registry), ) - connectionProvider <- DataSourceConnectionProvider.owner(hikariConnectionPool) + connectionProvider <- DataSourceConnectionProvider.owner(hikariDataSource) threadPoolName = s"daml.index.db.threadpool.connection.${serverRole.threadPoolSuffix}" executor <- ResourceOwner.forExecutorService(() => new InstrumentedExecutorService( diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/common/InitHookDataSourceProxy.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/common/InitHookDataSourceProxy.scala index d55a5a1d96a9..e25587ee886d 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/common/InitHookDataSourceProxy.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/common/InitHookDataSourceProxy.scala @@ -28,26 +28,27 @@ private[backend] case class InitHookDataSourceProxy( initHook: Connection => Unit, )(implicit loggingContext: LoggingContext) extends DataSource { - override def getConnection: Connection = { + + private def getConnection(connectionBody: => Connection): Connection = { logger.debug(s"Creating new connection") - val connection = delegate.getConnection + val connection = connectionBody try { logger.debug(s"Applying connection init hook") initHook(connection) } catch { case t: Throwable => - logger.info(s"Init hook execution failed: ${t.getMessage}") + logger.warn(s"Init hook execution failed", t) throw t } logger.info(s"Init hook execution finished successfully, connection ready") connection } - override def getConnection(s: String, s1: String): Connection = { - val connection = delegate.getConnection(s, s1) - initHook(connection) - connection - } + override def getConnection: Connection = getConnection(delegate.getConnection) + + override def getConnection(s: String, s1: String): Connection = getConnection( + delegate.getConnection(s, s1) + ) override def getLogWriter: PrintWriter = delegate.getLogWriter From b7319eeaa4be32fcae2b25f8c86ef723f9c063d6 Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Thu, 15 Jul 2021 01:16:48 +0200 Subject: [PATCH 15/21] Using stronger default lock identifiers As suggested in the design document changelog_begin changelog_end --- .../src/main/scala/platform/indexer/ha/HaCoordinator.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala index bb4ef4f2696c..8730b86c2a15 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala @@ -59,8 +59,8 @@ case class HaConfig( workerLockAquireRetryMillis: Long = 500, workerLockAquireMaxRetry: Long = 1000, mainLockCheckerPeriodMillis: Long = 1000, - indexerLockId: Int = 100, - indexerWorkerLockId: Int = 101, + indexerLockId: Int = 0x646d6c00, + indexerWorkerLockId: Int = 0x646d6c01, enable: Boolean = false, // TODO ha: remove as stable ) From 0d2e4a30b18e58087a8331e54918c03e49fe6ab3 Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Thu, 15 Jul 2021 11:50:42 +0200 Subject: [PATCH 16/21] More changes as per review * Wording changes * Unwrapping a single abstract method for clarity * Comment changes * Logging changes * Changes the name of the feature flag changelog_begin changelog_end --- .../platform/indexer/ha/HaCoordinator.scala | 157 +++++++++--------- .../parallel/ParallelIndexerFactory.scala | 4 + .../backend/oracle/OracleStorageBackend.scala | 14 +- .../state/kvutils/app/Config.scala | 2 +- 4 files changed, 96 insertions(+), 81 deletions(-) diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala index 8730b86c2a15..6fdc20822e6a 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/HaCoordinator.scala @@ -19,7 +19,7 @@ import scala.concurrent.{Await, ExecutionContext, Future} * @param completed will complete right after the program completed * - if no KillSwitch used, * - it will complete successfully as program successfully ends - * - it will complete with a failure as program failed + * - it will complete with the same failure that failed the program * - if KillSwitch aborted, this completes with the same Throwable * - if KillSwitch shut down, this completes successfully * After signalling completion, the program finished it's execution and has released all resources it acquired. @@ -28,8 +28,8 @@ import scala.concurrent.{Await, ExecutionContext, Future} case class Handle(completed: Future[Unit], killSwitch: KillSwitch) /** This functionality initializes a worker Connection, and clears it for further usage. - * This only need to be done once at the beginning of the Connection life-cycle - * On any error an exception will be thrown + * This only needs to be done once at the beginning of the Connection life-cycle + * Initialization errors are signaled by throwing an exception. */ trait ConnectionInitializer { def initialize(connection: Connection): Unit @@ -46,8 +46,8 @@ trait HaCoordinator { /** Execute in High Availability mode. * Wraps around the Handle of the execution. * - * @param initializeExecution HaCoordinator provides a ConnectionInitializer which need to be used for all database connections during execution - * Future[Handle] embodies asynchronous initialisation of the execution + * @param initializeExecution HaCoordinator provides a ConnectionInitializer that must to be used for all database connections during execution + * Future[Handle] embodies asynchronous initialization of the execution * (e.g. not the actual work. That asynchronous execution completes with the completed Future of the Handle) * @return the new Handle, which is available immediately to observe and interact with the complete program here */ @@ -59,7 +59,7 @@ case class HaConfig( workerLockAquireRetryMillis: Long = 500, workerLockAquireMaxRetry: Long = 1000, mainLockCheckerPeriodMillis: Long = 1000, - indexerLockId: Int = 0x646d6c00, + indexerLockId: Int = 0x646d6c00, // note 0x646d6c equals ASCII encoded "dml" indexerWorkerLockId: Int = 0x646d6c01, enable: Boolean = false, // TODO ha: remove as stable ) @@ -75,7 +75,7 @@ object HaCoordinator { * - provides a ConnectionInitializer function which is mandatory to execute on all worker connections during execution * - will spawn a polling-daemon to observe continuous presence of the main lock * - * @param dataSource to spawn the main connection which keeps the Indexer Lock + * @param dataSource to spawn the main connection which keeps the Indexer Main Lock * @param storageBackend is the database-independent abstraction of session/connection level database locking * @param executionContext which is use to execute initialisation, will do blocking/IO work, so dedicated execution context is recommended */ @@ -92,77 +92,86 @@ object HaCoordinator { val indexerWorkerLockId = storageBackend.lock(haConfig.indexerWorkerLockId) val preemptableSequence = PreemptableSequence(scheduler) - asyncHandle => - def acquireLock(connection: Connection, lockId: LockId, lockMode: LockMode): Lock = { - logger.debug(s"Acquiring lock $lockId $lockMode") - storageBackend - .tryAcquire(lockId, lockMode)(connection) - .getOrElse( - throw new Exception(s"Cannot acquire lock $lockId in lock-mode $lockMode: lock busy") - ) - } - - def acquireMainLock(connection: Connection): Unit = { - acquireLock(connection, indexerLockId, LockMode.Exclusive) - () - } - - preemptableSequence.executeSequence { sequenceHelper => - import sequenceHelper._ - logger.info("Starting databaseLockBasedHaCoordinator") - for { - mainConnection <- go[Connection](dataSource.getConnection) - _ = logger.info("Step 1: creating main-connection - DONE") - _ = registerRelease { - logger.info("Releasing main connection...") - mainConnection.close() - logger.info("Released main connection") - } - _ <- retry(haConfig.mainLockAquireRetryMillis)(acquireMainLock(mainConnection)) - _ = logger.info("Step 2: acquire exclusive Indexer Lock on main-connection - DONE") - exclusiveWorkerLock <- retry[Lock]( - haConfig.workerLockAquireRetryMillis, - haConfig.workerLockAquireMaxRetry, - )( - acquireLock(mainConnection, indexerWorkerLockId, LockMode.Exclusive) - ) - _ = logger.info("Step 3: acquire exclusive Indexer Worker Lock on main-connection - DONE") - _ <- go(storageBackend.release(exclusiveWorkerLock)(mainConnection)) - _ = logger.info("Step 4: release exclusive Indexer Worker Lock on main-connection - DONE") - mainLockChecker <- go[PollingChecker]( - new PollingChecker( - periodMillis = haConfig.mainLockCheckerPeriodMillis, - checkBody = acquireMainLock(mainConnection), - killSwitch = - handle.killSwitch, // meaning: this PollingChecker will shut down the main preemptableSequence + new HaCoordinator { + override def protectedExecution( + initializeExecution: ConnectionInitializer => Future[Handle] + ): Handle = { + def acquireLock(connection: Connection, lockId: LockId, lockMode: LockMode): Lock = { + logger.debug(s"Acquiring lock $lockId $lockMode") + storageBackend + .tryAcquire(lockId, lockMode)(connection) + .getOrElse( + throw new Exception(s"Cannot acquire lock $lockId in lock-mode $lockMode: lock busy") + ) + } + + def acquireMainLock(connection: Connection): Unit = { + acquireLock(connection, indexerLockId, LockMode.Exclusive) + () + } + + preemptableSequence.executeSequence { sequenceHelper => + import sequenceHelper._ + logger.info("Starting databaseLockBasedHaCoordinator") + for { + mainConnection <- go[Connection](dataSource.getConnection) + _ = logger.info("Step 1: creating main-connection - DONE") + _ = registerRelease { + logger.info("Releasing main connection...") + mainConnection.close() + logger.info("Released main connection") + } + _ <- retry(haConfig.mainLockAquireRetryMillis)(acquireMainLock(mainConnection)) + _ = logger.info("Step 2: acquire exclusive Indexer Main Lock on main-connection - DONE") + exclusiveWorkerLock <- retry[Lock]( + haConfig.workerLockAquireRetryMillis, + haConfig.workerLockAquireMaxRetry, + )( + acquireLock(mainConnection, indexerWorkerLockId, LockMode.Exclusive) + ) + _ = logger.info( + "Step 3: acquire exclusive Indexer Worker Lock on main-connection - DONE" + ) + _ <- go(storageBackend.release(exclusiveWorkerLock)(mainConnection)) + _ = logger.info( + "Step 4: release exclusive Indexer Worker Lock on main-connection - DONE" ) - ) - _ = logger.info( - "Step 5: activate periodic checker of the exclusive Indexer Lock on the main connection - DONE" - ) - _ = registerRelease { - logger.info( - "Releasing periodic checker of the exclusive Indexer Lock on the main connection..." + mainLockChecker <- go[PollingChecker]( + new PollingChecker( + periodMillis = haConfig.mainLockCheckerPeriodMillis, + checkBody = acquireMainLock(mainConnection), + killSwitch = + handle.killSwitch, // meaning: this PollingChecker will shut down the main preemptableSequence + ) ) - mainLockChecker.close() - logger.info( - "Released periodic checker of the exclusive Indexer Lock on the main connection" + _ = logger.info( + "Step 5: activate periodic checker of the exclusive Indexer Main Lock on the main connection - DONE" ) - } - protectedHandle <- goF(asyncHandle(workerConnection => { - // this is the checking routine on connection creation - // step 1: acquire shared worker-lock - logger.info(s"Preparing worker connection. Step 1: acquire lock.") - acquireLock(workerConnection, indexerWorkerLockId, LockMode.Shared) - // step 2: check if main connection still holds the lock - logger.info(s"Preparing worker connection. Step 2: checking main lock.") - mainLockChecker.check() - logger.info(s"Preparing worker connection DONE.") - })) - _ = logger.info("Step 6: initialize protected execution - DONE") - _ <- merge(protectedHandle) - } yield () + _ = registerRelease { + logger.info( + "Releasing periodic checker of the exclusive Indexer Main Lock on the main connection..." + ) + mainLockChecker.close() + logger.info( + "Released periodic checker of the exclusive Indexer Main Lock on the main connection" + ) + } + protectedHandle <- goF(initializeExecution(workerConnection => { + // this is the checking routine on connection creation + // step 1: acquire shared worker-lock + logger.info(s"Preparing worker connection. Step 1: acquire lock.") + acquireLock(workerConnection, indexerWorkerLockId, LockMode.Shared) + // step 2: check if main connection still holds the lock + logger.info(s"Preparing worker connection. Step 2: checking main lock.") + mainLockChecker.check() + logger.info(s"Preparing worker connection DONE.") + })) + _ = logger.info("Step 6: initialize protected execution - DONE") + _ <- merge(protectedHandle) + } yield () + } } + } } } diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala index 22a57b8e4727..8eb1411e8c9d 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala @@ -87,6 +87,8 @@ object ParallelIndexerFactory { ) .map( HaCoordinator.databaseLockBasedHaCoordinator( + // this DataSource will be used to spawn the main connection where we keep the Indexer Main Lock + // The life-cycle of such connections matches the life-cycle of a protectedExecution dataSource = storageBackend.createDataSource(jdbcUrl), storageBackend = storageBackend, _, @@ -148,6 +150,8 @@ object ParallelIndexerFactory { val completionFuture = DbDispatcher .owner( + // this is tha DataSource which will be wrapped by HikariCP, and which will drive the ingestion + // therefore this needs to be configured with the connection-init-hook, what we get from HaCoordinator dataSource = storageBackend.createDataSource( jdbcUrl = jdbcUrl, dataSourceConfig = dataSourceConfig, diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala index 0eb622ea677d..563fb22c89f7 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/oracle/OracleStorageBackend.scala @@ -271,10 +271,11 @@ private[backend] object OracleStorageBackend .as(get[Int](1).single)(connection) match { case 0 => Some(DBLockStorageBackend.Lock(lockId, lockMode)) case 1 => None - case 2 => throw new Exception("Oracle DB Error 2: Acquiring lock caused a deadlock!") - case 3 => throw new Exception("Oracle DB Error 3: Parameter error as acquiring lock") + case 2 => throw new Exception("DBMS_LOCK.REQUEST Error 2: Acquiring lock caused a deadlock!") + case 3 => throw new Exception("DBMS_LOCK.REQUEST Error 3: Parameter error as acquiring lock") case 4 => Some(DBLockStorageBackend.Lock(lockId, lockMode)) - case 5 => throw new Exception("Oracle DB Error 5: Illegal lock handle as acquiring lock") + case 5 => + throw new Exception("DBMS_LOCK.REQUEST Error 5: Illegal lock handle as acquiring lock") case unknown => throw new Exception(s"Invalid result from DBMS_LOCK.REQUEST: $unknown") } } @@ -286,9 +287,10 @@ private[backend] object OracleStorageBackend ) FROM DUAL""" .as(get[Int](1).single)(connection) match { case 0 => true - case 3 => throw new Exception("Oracle DB Error 3: Parameter error as releasing lock") - case 4 => throw new Exception("Oracle DB Error 4: Trying to release not-owned lock") - case 5 => throw new Exception("Oracle DB Error 5: Illegal lock handle as releasing lock") + case 3 => throw new Exception("DBMS_LOCK.RELEASE Error 3: Parameter error as releasing lock") + case 4 => throw new Exception("DBMS_LOCK.RELEASE Error 4: Trying to release not-owned lock") + case 5 => + throw new Exception("DBMS_LOCK.RELEASE Error 5: Illegal lock handle as releasing lock") case unknown => throw new Exception(s"Invalid result from DBMS_LOCK.RELEASE: $unknown") } } diff --git a/ledger/participant-state/kvutils/app/src/main/scala/com/daml/ledger/participant/state/kvutils/app/Config.scala b/ledger/participant-state/kvutils/app/src/main/scala/com/daml/ledger/participant/state/kvutils/app/Config.scala index 47b453339205..a96c80e0783b 100644 --- a/ledger/participant-state/kvutils/app/src/main/scala/com/daml/ledger/participant/state/kvutils/app/Config.scala +++ b/ledger/participant-state/kvutils/app/src/main/scala/com/daml/ledger/participant/state/kvutils/app/Config.scala @@ -539,7 +539,7 @@ object Config { ) // TODO ha: remove after stable - opt[Unit]("index-ha-unsafe") + opt[Unit]("experimental-index-ha") .optional() .hidden() .text( From 8246151b7a090bfac0250d0cba1a152647d85ab8 Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Thu, 15 Jul 2021 11:56:00 +0200 Subject: [PATCH 17/21] Removes the comment on warning on problematic compile dependency As discussed in the PR, the note is not needed by now. changelog_begin changelog_end --- ledger/participant-integration-api/BUILD.bazel | 1 - 1 file changed, 1 deletion(-) diff --git a/ledger/participant-integration-api/BUILD.bazel b/ledger/participant-integration-api/BUILD.bazel index cf6577c1166c..7afdbf5fb002 100644 --- a/ledger/participant-integration-api/BUILD.bazel +++ b/ledger/participant-integration-api/BUILD.bazel @@ -62,7 +62,6 @@ compile_deps = [ "@maven//:io_opentelemetry_opentelemetry_api", "@maven//:io_opentelemetry_opentelemetry_context", "@maven//:org_slf4j_slf4j_api", - # this oracle import is problematic for daml assistant build "@maven//:com_h2database_h2", "@maven//:org_postgresql_postgresql", "@maven//:com_oracle_database_jdbc_ojdbc8", From 7d0fe79606c2acf73e3fc7cb823f7c1fbe599e5c Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Sun, 18 Jul 2021 23:02:29 +0200 Subject: [PATCH 18/21] Further changes as per review * removing jdbcUrl from HikariDataSourceOwner * comment changes * renames * re-adding an override * removing some parsing logic from PgSynchronousCommitValue changelog_begin changelog_end --- .../main/scala/platform/indexer/ha/PollingChecker.scala | 9 +++++---- .../indexer/parallel/ParallelIndexerFactory.scala | 7 +++---- .../platform/store/appendonlydao/DbDispatcher.scala | 2 -- .../appendonlydao/HikariJdbcConnectionProvider.scala | 2 -- .../platform/store/appendonlydao/JdbcLedgerDao.scala | 1 - .../scala/platform/store/backend/StorageBackend.scala | 8 -------- .../backend/postgresql/PostgresStorageBackend.scala | 2 +- 7 files changed, 9 insertions(+), 22 deletions(-) diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PollingChecker.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PollingChecker.scala index a793a59f9eb5..8294a7dcff98 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PollingChecker.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PollingChecker.scala @@ -11,7 +11,7 @@ import com.daml.logging.{ContextualizedLogger, LoggingContext} import scala.util.{Failure, Success, Try} /** A simple host of checking. - * - This will ensure that check is accessed by only one caller at a time + * - This will ensure that checkBody is accessed by only one caller at a time * - Does periodic checking * - Exposes check() for on-demand checking from the outside * - If whatever check() fails, it uses killSwitch with an abort @@ -42,11 +42,12 @@ class PollingChecker( periodMillis, ) - // This is a cruel approach for ensuring single threaded usage of the mainConnection. + // This is a cruel approach for ensuring single threaded usage of checkBody. // In theory this could have been made much more efficient: not enqueueing for a check of it's own, // but collecting requests, and replying in batches. - // Although experiments show approx 1s until a full connection pool is initialized at first - // (the peak scenario) which should be enough, and which can leave this code very simple. + // Current usage of this class does not necessarily motivate further optimizations: used from HaCoordinator + // to check Indexer Main Lock seems to be sufficiently fast even in peak scenario: the initialization of the + // complete pool. def check(): Unit = synchronized { logger.debug(s"Checking...") Try(checkBody) match { diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala index 8eb1411e8c9d..577721dad286 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala @@ -146,7 +146,7 @@ object ParallelIndexerFactory { implicit val ec: ExecutionContext = resourceContext.executionContext implicit val matImplicit: Materializer = mat haCoordinator.protectedExecution { connectionInitializer => - val killSwitchCaptor = Promise[KillSwitch]() + val killSwitchPromise = Promise[KillSwitch]() val completionFuture = DbDispatcher .owner( @@ -158,7 +158,6 @@ object ParallelIndexerFactory { connectionInitHook = Some(connectionInitializer.initialize), ), serverRole = ServerRole.Indexer, - jdbcUrl = jdbcUrl, connectionPoolSize = ingestionParallelism + 1, // + 1 for the tailing ledger_end updates connectionTimeout = FiniteDuration( @@ -181,12 +180,12 @@ object ParallelIndexerFactory { // the tricky bit: // the future in the completion handler will be this one // but the future for signaling for the HaCoordinator, that the protected execution is initialized needs to complete precisely here - killSwitchCaptor.success(killSwitch) + killSwitchPromise.success(killSwitch) completionFuture } } - killSwitchCaptor.future + killSwitchPromise.future .map(Handle(completionFuture.map(_ => ()), _)) } } diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/DbDispatcher.scala b/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/DbDispatcher.scala index 9c6d6fe60a05..283b3022d4f0 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/DbDispatcher.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/DbDispatcher.scala @@ -84,7 +84,6 @@ private[platform] object DbDispatcher { def owner( dataSource: DataSource, serverRole: ServerRole, - jdbcUrl: String, connectionPoolSize: Int, connectionTimeout: FiniteDuration, metrics: Metrics, @@ -93,7 +92,6 @@ private[platform] object DbDispatcher { hikariDataSource <- new HikariDataSourceOwner( dataSource, serverRole, - jdbcUrl, connectionPoolSize, connectionPoolSize, connectionTimeout, diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/HikariJdbcConnectionProvider.scala b/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/HikariJdbcConnectionProvider.scala index 7e1564e7ec77..10698a799059 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/HikariJdbcConnectionProvider.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/HikariJdbcConnectionProvider.scala @@ -24,7 +24,6 @@ import scala.util.control.NonFatal private[platform] final class HikariDataSourceOwner( dataSource: DataSource, serverRole: ServerRole, - jdbcUrl: String, minimumIdle: Int, maxPoolSize: Int, connectionTimeout: FiniteDuration, @@ -39,7 +38,6 @@ private[platform] final class HikariDataSourceOwner( override def acquire()(implicit context: ResourceContext): Resource[HikariDataSource] = { val config = new HikariConfig config.setDataSource(dataSource) - config.setJdbcUrl(jdbcUrl) config.setAutoCommit(false) config.setMaximumPoolSize(maxPoolSize) config.setMinimumIdle(minimumIdle) diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/JdbcLedgerDao.scala b/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/JdbcLedgerDao.scala index 8d30a4bf127b..d77b5b0d31b2 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/JdbcLedgerDao.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/appendonlydao/JdbcLedgerDao.scala @@ -864,7 +864,6 @@ private[platform] object JdbcLedgerDao { dbDispatcher <- DbDispatcher.owner( storageBackend.createDataSource(jdbcUrl), serverRole, - jdbcUrl, connectionPoolSize, connectionTimeout, metrics, diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala index 443863e8fbdd..a8513a2ab1eb 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala @@ -260,14 +260,6 @@ object DataSourceStorageBackend { case object RemoteWrite extends PgSynchronousCommitValue("remote_write") case object RemoteApply extends PgSynchronousCommitValue("remote_apply") case object Local extends PgSynchronousCommitValue("local") - - def apply(s: String): PgSynchronousCommitValue = s.toLowerCase match { - case On.`pgSqlName` => On - case Off.`pgSqlName` => Off - case RemoteWrite.`pgSqlName` => RemoteWrite - case RemoteApply.`pgSqlName` => RemoteApply - case Local.`pgSqlName` => Local - } } } diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala index 55e7e9adc4c6..184456abd772 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala @@ -84,7 +84,7 @@ private[backend] object PostgresStorageBackend () } - val duplicateKeyError: String = "duplicate key" + override val duplicateKeyError: String = "duplicate key" override def commandCompletions( startExclusive: Offset, From 7d890a9f43c98a318692622f8a5814309a8cac6d Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Mon, 19 Jul 2021 21:24:22 +0200 Subject: [PATCH 19/21] Restructuring DataSourceConfig for more clarity on StorageBackend implementation related configurations changelog_begin changelog_end --- .../scala/platform/indexer/JdbcIndexer.scala | 18 ++++++++-------- .../store/backend/StorageBackend.scala | 16 +++++--------- .../postgresql/PostgresDataSourceConfig.scala | 21 +++++++++++++++++++ .../postgresql/PostgresStorageBackend.scala | 2 +- 4 files changed, 36 insertions(+), 21 deletions(-) create mode 100644 ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresDataSourceConfig.scala diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/JdbcIndexer.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/JdbcIndexer.scala index bad905e5100b..d33b6cca4adc 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/JdbcIndexer.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/JdbcIndexer.scala @@ -27,11 +27,9 @@ import com.daml.platform.store.DbType.{ SynchronousCommit, } import com.daml.platform.store.appendonlydao.events.{CompressionStrategy, LfValueTranslation} -import com.daml.platform.store.backend.DataSourceStorageBackend.{ - DataSourceConfig, - PgSynchronousCommitValue, -} +import com.daml.platform.store.backend.DataSourceStorageBackend.DataSourceConfig import com.daml.platform.store.backend.StorageBackend +import com.daml.platform.store.backend.postgresql.PostgresDataSourceConfig import com.daml.platform.store.dao.LedgerDao import com.daml.platform.store.{DbType, FlywayMigrations, LfValueTranslationCache} @@ -181,11 +179,13 @@ object JdbcIndexer { batchWithinMillis = config.batchWithinMillis, metrics = metrics, dataSourceConfig = DataSourceConfig( - pgSynchronousCommit = Some(config.asyncCommitMode match { - case SynchronousCommit => PgSynchronousCommitValue.On - case AsynchronousCommit => PgSynchronousCommitValue.Off - case LocalSynchronousCommit => PgSynchronousCommitValue.Local - }) + postgresConfig = PostgresDataSourceConfig( + synchronousCommit = Some(config.asyncCommitMode match { + case SynchronousCommit => PostgresDataSourceConfig.SynchronousCommitValue.On + case AsynchronousCommit => PostgresDataSourceConfig.SynchronousCommitValue.Off + case LocalSynchronousCommit => PostgresDataSourceConfig.SynchronousCommitValue.Local + }) + ) ), haConfig = config.haConfig, ) diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala index a8513a2ab1eb..6b97269c0bc2 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/StorageBackend.scala @@ -22,7 +22,7 @@ import com.daml.platform.store.backend.EventStorageBackend.{FilterParams, RangeP import com.daml.platform.store.backend.StorageBackend.RawTransactionEvent import com.daml.platform.store.backend.h2.H2StorageBackend import com.daml.platform.store.backend.oracle.OracleStorageBackend -import com.daml.platform.store.backend.postgresql.PostgresStorageBackend +import com.daml.platform.store.backend.postgresql.{PostgresDataSourceConfig, PostgresStorageBackend} import com.daml.platform.store.entries.{ConfigurationEntry, PackageLedgerEntry, PartyLedgerEntry} import com.daml.platform.store.interfaces.LedgerDaoContractsReader.KeyState import com.daml.scalautil.NeverEqualsOverride @@ -249,18 +249,12 @@ trait DataSourceStorageBackend { } object DataSourceStorageBackend { + + /** @param postgresConfig configurations which apply only for the PostgresSQL backend + */ case class DataSourceConfig( - pgSynchronousCommit: Option[PgSynchronousCommitValue] = None + postgresConfig: PostgresDataSourceConfig = PostgresDataSourceConfig() ) - - sealed abstract class PgSynchronousCommitValue(val pgSqlName: String) - object PgSynchronousCommitValue { - case object On extends PgSynchronousCommitValue("on") - case object Off extends PgSynchronousCommitValue("off") - case object RemoteWrite extends PgSynchronousCommitValue("remote_write") - case object RemoteApply extends PgSynchronousCommitValue("remote_apply") - case object Local extends PgSynchronousCommitValue("local") - } } trait DBLockStorageBackend { diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresDataSourceConfig.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresDataSourceConfig.scala new file mode 100644 index 000000000000..3cf5eac770dc --- /dev/null +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresDataSourceConfig.scala @@ -0,0 +1,21 @@ +// Copyright (c) 2021 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +package com.daml.platform.store.backend.postgresql + +import com.daml.platform.store.backend.postgresql.PostgresDataSourceConfig.SynchronousCommitValue + +case class PostgresDataSourceConfig( + synchronousCommit: Option[SynchronousCommitValue] = None +) + +object PostgresDataSourceConfig { + sealed abstract class SynchronousCommitValue(val pgSqlName: String) + object SynchronousCommitValue { + case object On extends SynchronousCommitValue("on") + case object Off extends SynchronousCommitValue("off") + case object RemoteWrite extends SynchronousCommitValue("remote_write") + case object RemoteApply extends SynchronousCommitValue("remote_apply") + case object Local extends SynchronousCommitValue("local") + } +} diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala b/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala index 184456abd772..99703b50481c 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/store/backend/postgresql/PostgresStorageBackend.scala @@ -209,7 +209,7 @@ private[backend] object PostgresStorageBackend val pgSimpleDataSource = new PGSimpleDataSource() pgSimpleDataSource.setUrl(jdbcUrl) val hookFunctions = List( - dataSourceConfig.pgSynchronousCommit.toList + dataSourceConfig.postgresConfig.synchronousCommit.toList .map(synchCommitValue => exe(s"SET synchronous_commit TO ${synchCommitValue.pgSqlName}")), connectionInitHook.toList, ).flatten From 26d55b214bb36665c2c6fec16fbf7acb3ea79daa Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Mon, 19 Jul 2021 21:33:26 +0200 Subject: [PATCH 20/21] Rename feature flag command line argument changelog_begin changelog_end --- .../com/daml/ledger/participant/state/kvutils/app/Config.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ledger/participant-state/kvutils/app/src/main/scala/com/daml/ledger/participant/state/kvutils/app/Config.scala b/ledger/participant-state/kvutils/app/src/main/scala/com/daml/ledger/participant/state/kvutils/app/Config.scala index a96c80e0783b..47b453339205 100644 --- a/ledger/participant-state/kvutils/app/src/main/scala/com/daml/ledger/participant/state/kvutils/app/Config.scala +++ b/ledger/participant-state/kvutils/app/src/main/scala/com/daml/ledger/participant/state/kvutils/app/Config.scala @@ -539,7 +539,7 @@ object Config { ) // TODO ha: remove after stable - opt[Unit]("experimental-index-ha") + opt[Unit]("index-ha-unsafe") .optional() .hidden() .text( From a8382b20e208ee9ef95cd58200e2febe036084e8 Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Wed, 21 Jul 2021 22:52:13 +0200 Subject: [PATCH 21/21] Minor changes based on review changelog_begin changelog_end --- .../main/scala/platform/indexer/ha/PreemptableSequence.scala | 5 +++-- .../platform/indexer/parallel/ParallelIndexerFactory.scala | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PreemptableSequence.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PreemptableSequence.scala index eb4cd7b9b578..e353c4dbbe8b 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PreemptableSequence.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/ha/PreemptableSequence.scala @@ -126,8 +126,9 @@ object PreemptableSequence { go(body).transformWith { // since we check countdown to 0, starting from negative means unlimited retries case Failure(ex) if maxAmountOfRetries == 0 => - logger.info( - s"Maximum amount of retries reached ($maxAmountOfRetries) failing permanently. (${ex.getMessage})" + logger.warn( + s"Maximum amount of retries reached ($maxAmountOfRetries) failing permanently.", + ex, ) Future.failed(ex) case Success(t) => Future.successful(t) diff --git a/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala b/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala index 577721dad286..6c8bf784e95d 100644 --- a/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala +++ b/ledger/participant-integration-api/src/main/scala/platform/indexer/parallel/ParallelIndexerFactory.scala @@ -127,7 +127,7 @@ object ParallelIndexerFactory { .map(_ -> System.nanoTime()) ) .map(_ => ()) - .keepAlive( // TODO ha: remove as stable. This keepAlive approach was introduced for safety with async commit. This is still needed until HA is mandatory for Postgres to ensure safety with async commit. + .keepAlive( // TODO ha: remove as stable. This keepAlive approach was introduced for safety with async commit. This is still needed until HA is mandatory for Postgres to ensure safety with async commit. This will not needed anymore if HA is enabled by default, since the Ha mutual exclusion implementation with advisory locks makes impossible to let a db-shutdown go undetected. keepAliveMaxIdleDuration, () => if (dbDispatcher.currentHealth() == HealthStatus.healthy) {