From d006ad0e8b7619e7018d51a89b0b6002a520c207 Mon Sep 17 00:00:00 2001 From: Marton Nagy Date: Wed, 3 Nov 2021 21:27:10 +0100 Subject: [PATCH] Add string-interning view primitives [DPP-702] (#11475) changelog_begin changelog_end --- .../store/interning/RawStringInterning.scala | 40 ++++ .../store/interning/StringInterning.scala | 87 +++++++ .../store/interning/StringInterningView.scala | 114 ++++++++++ .../interning/StringInterningViewSpec.scala | 215 ++++++++++++++++++ 4 files changed, 456 insertions(+) create mode 100644 ledger/participant-integration-api/src/main/scala/platform/store/interning/RawStringInterning.scala create mode 100644 ledger/participant-integration-api/src/main/scala/platform/store/interning/StringInterning.scala create mode 100644 ledger/participant-integration-api/src/main/scala/platform/store/interning/StringInterningView.scala create mode 100644 ledger/participant-integration-api/src/test/suite/scala/platform/store/interning/StringInterningViewSpec.scala diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/interning/RawStringInterning.scala b/ledger/participant-integration-api/src/main/scala/platform/store/interning/RawStringInterning.scala new file mode 100644 index 000000000000..45175d6bb60a --- /dev/null +++ b/ledger/participant-integration-api/src/main/scala/platform/store/interning/RawStringInterning.scala @@ -0,0 +1,40 @@ +// Copyright (c) 2021 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +package com.daml.platform.store.interning + +private[interning] case class RawStringInterning( + map: Map[String, Int], + idMap: Map[Int, String], + lastId: Int, +) + +private[interning] object RawStringInterning { + + def from( + entries: Iterable[(Int, String)], + rawStringInterning: RawStringInterning = RawStringInterning(Map.empty, Map.empty, 0), + ): RawStringInterning = + if (entries.isEmpty) rawStringInterning + else + RawStringInterning( + map = rawStringInterning.map ++ entries.view.map(_.swap), + idMap = rawStringInterning.idMap ++ entries, + lastId = entries.view.map(_._1).max, + ) + + def newEntries( + strings: Iterator[String], + rawStringInterning: RawStringInterning, + ): Vector[(Int, String)] = + strings + .filterNot(rawStringInterning.map.contains) + .toVector + .distinct // TODO Iterators do not have .distinct in Scala 2.12 + .view + .zipWithIndex + .map { case (string, index) => + (index + 1 + rawStringInterning.lastId, string) + } + .toVector +} diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/interning/StringInterning.scala b/ledger/participant-integration-api/src/main/scala/platform/store/interning/StringInterning.scala new file mode 100644 index 000000000000..b25f702c486a --- /dev/null +++ b/ledger/participant-integration-api/src/main/scala/platform/store/interning/StringInterning.scala @@ -0,0 +1,87 @@ +// Copyright (c) 2021 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +package com.daml.platform.store.interning + +import com.daml.lf.data.Ref + +/** The facade for all supported string-interning domains + */ +trait StringInterning { + def templateId: StringInterningDomain[Ref.Identifier] + def party: StringInterningDomain[Ref.Party] +} + +/** Composes a StringInterningAccessor for the domain-string type and an unsafe StringInterningAccessor for raw strings + * + * @tparam T is the type of the string-related domain object which is interned + */ +trait StringInterningDomain[T] extends StringInterningAccessor[T] { + def unsafe: StringInterningAccessor[String] +} + +object StringInterningDomain { + private[interning] def prefixing[T]( + prefix: String, + prefixedAccessor: StringInterningAccessor[String], + to: String => T, + from: T => String, + ): StringInterningDomain[T] = + new StringInterningDomain[T] { + override val unsafe: StringInterningAccessor[String] = new StringInterningAccessor[String] { + override def internalize(t: String): Int = prefixedAccessor.internalize(prefix + t) + + override def tryInternalize(t: String): Option[Int] = + prefixedAccessor.tryInternalize(prefix + t) + + override def externalize(id: Int): String = + prefixedAccessor.externalize(id).substring(prefix.length) + + override def tryExternalize(id: Int): Option[String] = + prefixedAccessor.tryExternalize(id).map(_.substring(prefix.length)) + } + + override def internalize(t: T): Int = unsafe.internalize(from(t)) + + override def tryInternalize(t: T): Option[Int] = unsafe.tryInternalize(from(t)) + + override def externalize(id: Int): T = to(unsafe.externalize(id)) + + override def tryExternalize(id: Int): Option[T] = unsafe.tryExternalize(id).map(to) + } +} + +/** The main interface for using string-interning. + * Client code can use this to map between interned id-s and string-domain objects back and forth + * + * @tparam T is the type of the string-related domain object which is interned + */ +trait StringInterningAccessor[T] { + + /** Get the interned id + * + * @param t the value + * @return the integer id, throws exception if id not found + */ + def internalize(t: T): Int + + /** Optionally get the interned id + * @param t the value + * @return some integer id, or none if not found + */ + def tryInternalize(t: T): Option[Int] + + /** Get the value for an id + * + * @param id integer id + * @return the value, throws exception if no value found + */ + def externalize(id: Int): T + + /** Optionally get the value for an id + * + * @param id integer id + * @return some value, or none if not found + */ + def tryExternalize(id: Int): Option[T] +} diff --git a/ledger/participant-integration-api/src/main/scala/platform/store/interning/StringInterningView.scala b/ledger/participant-integration-api/src/main/scala/platform/store/interning/StringInterningView.scala new file mode 100644 index 000000000000..6c3f17a26c4b --- /dev/null +++ b/ledger/participant-integration-api/src/main/scala/platform/store/interning/StringInterningView.scala @@ -0,0 +1,114 @@ +// Copyright (c) 2021 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +package com.daml.platform.store.interning + +import com.daml.dec.DirectExecutionContext +import com.daml.lf.data.Ref +import com.daml.logging.LoggingContext + +import scala.concurrent.Future + +class DomainStringIterators( + val parties: Iterator[String], + val templateIds: Iterator[String], +) + +trait InternizingStringInterningView { + + /** Internize strings of different domains. The new entries are returend as prefixed entries for persistent storage. + * + * @param domainStringIterators iterators of the new entires + * @return If some of the entries were not part of the view: they will be added, and these will be returned as a interned-id and raw, prefixed string pairs. + */ + def internize(domainStringIterators: DomainStringIterators): Iterable[(Int, String)] +} + +trait UpdatingStringInterningView { + + /** Update the StringInterningView from persistence + * + * @param lastStringInterningId this is the "version" of the persistent view, which from the StringInterningView can see if it is behind + * @return a completion Future: if the view is behind it will load the missing entries from persistence, and update the view state + */ + def update(lastStringInterningId: Int)(implicit loggingContext: LoggingContext): Future[Unit] +} + +/** Encapsulate the dependency to load a range of string-interning-entries from persistence + */ +trait LoadStringInterningEntries { + def apply( + fromExclusive: Int, + toInclusive: Int, + ): LoggingContext => Future[Iterable[(Int, String)]] +} + +/** This uses the prefixed raw representation internally similar to the persistence layer. + * Concurrent view usage is optimized for reading: + * - The single, volatile reference enables non-synchronized access from all threads, accessing persistent-immutable datastructure + * - On the writing side it synchronizes (this usage is anyway expected) and maintains the immutable internal datastructure + */ +class StringInterningView(loadPrefixedEntries: LoadStringInterningEntries) + extends StringInterning + with InternizingStringInterningView + with UpdatingStringInterningView { + @volatile private var raw: RawStringInterning = RawStringInterning.from(Nil) + + private def rawAccessor: StringInterningAccessor[String] = new StringInterningAccessor[String] { + override def internalize(t: String): Int = raw.map(t) + override def tryInternalize(t: String): Option[Int] = raw.map.get(t) + override def externalize(id: Int): String = raw.idMap(id) + override def tryExternalize(id: Int): Option[String] = raw.idMap.get(id) + } + + private val TemplatePrefix = "t|" + private val PartyPrefix = "p|" + + override val templateId: StringInterningDomain[Ref.Identifier] = + StringInterningDomain.prefixing( + prefix = TemplatePrefix, + prefixedAccessor = rawAccessor, + to = Ref.Identifier.assertFromString, + from = _.toString, + ) + + override val party: StringInterningDomain[Ref.Party] = + StringInterningDomain.prefixing( + prefix = PartyPrefix, + prefixedAccessor = rawAccessor, + to = Ref.Party.assertFromString, + from = _.toString, + ) + + override def internize(domainStringIterators: DomainStringIterators): Iterable[(Int, String)] = + synchronized { + val allPrefixedStrings = + domainStringIterators.parties.map(PartyPrefix + _) ++ + domainStringIterators.templateIds.map(TemplatePrefix + _) + val newEntries = RawStringInterning.newEntries( + strings = allPrefixedStrings, + rawStringInterning = raw, + ) + updateView(newEntries) + newEntries + } + + override def update( + lastStringInterningId: Int + )(implicit loggingContext: LoggingContext): Future[Unit] = + if (lastStringInterningId <= raw.lastId) { + Future.unit + } else { + loadPrefixedEntries(raw.lastId, lastStringInterningId)(loggingContext) + .map(updateView)(DirectExecutionContext) + } + + private def updateView(newEntries: Iterable[(Int, String)]): Unit = synchronized { + if (newEntries.nonEmpty) { + raw = RawStringInterning.from( + entries = newEntries, + rawStringInterning = raw, + ) + } + } +} diff --git a/ledger/participant-integration-api/src/test/suite/scala/platform/store/interning/StringInterningViewSpec.scala b/ledger/participant-integration-api/src/test/suite/scala/platform/store/interning/StringInterningViewSpec.scala new file mode 100644 index 000000000000..2ff037a33ff4 --- /dev/null +++ b/ledger/participant-integration-api/src/test/suite/scala/platform/store/interning/StringInterningViewSpec.scala @@ -0,0 +1,215 @@ +// Copyright (c) 2021 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +package com.daml.platform.store.interning + +import com.daml.lf.data.Ref +import com.daml.logging.LoggingContext +import org.scalatest.flatspec.AsyncFlatSpec +import org.scalatest.matchers.should.Matchers + +import scala.concurrent.Future +import scala.util.Try + +class StringInterningViewSpec extends AsyncFlatSpec with Matchers { + private implicit val lc: LoggingContext = LoggingContext.ForTesting + + behavior of "StringInterningView" + + it should "provide working cache by extending" in { + val testee = new StringInterningView((_, _) => _ => Future.successful(Nil)) + partyAbsent(testee, "p1") + partyAbsent(testee, "p2") + partyAbsent(testee, "22:same:name") + templateAbsent(testee, "22:t:a") + templateAbsent(testee, "22:t:b") + templateAbsent(testee, "22:same:name") + testee.internize( + new DomainStringIterators( + parties = List("p1", "p2", "22:same:name").iterator, + templateIds = List("22:t:a", "22:t:b", "22:same:name").iterator, + ) + ) shouldBe Vector( + 1 -> "p|p1", + 2 -> "p|p2", + 3 -> "p|22:same:name", + 4 -> "t|22:t:a", + 5 -> "t|22:t:b", + 6 -> "t|22:same:name", + ) + partyPresent(testee, "p1", 1) + partyPresent(testee, "p2", 2) + partyPresent(testee, "22:same:name", 3) + partyAbsent(testee, "unknown") + templatePresent(testee, "22:t:a", 4) + templatePresent(testee, "22:t:b", 5) + templatePresent(testee, "22:same:name", 6) + templateAbsent(testee, "22:unkno:wn") + } + + it should "extend working view correctly" in { + val testee = new StringInterningView((_, _) => _ => Future.successful(Nil)) + partyAbsent(testee, "p1") + partyAbsent(testee, "p2") + partyAbsent(testee, "22:same:name") + templateAbsent(testee, "22:t:a") + templateAbsent(testee, "22:t:b") + templateAbsent(testee, "22:same:name") + testee.internize( + new DomainStringIterators( + parties = List("p1", "p2", "22:same:name").iterator, + templateIds = List("22:t:a").iterator, + ) + ) shouldBe Vector( + 1 -> "p|p1", + 2 -> "p|p2", + 3 -> "p|22:same:name", + 4 -> "t|22:t:a", + ) + partyPresent(testee, "p1", 1) + partyPresent(testee, "p2", 2) + partyPresent(testee, "22:same:name", 3) + partyAbsent(testee, "unknown") + templatePresent(testee, "22:t:a", 4) + templateAbsent(testee, "22:t:b") + templateAbsent(testee, "22:same:name") + templateAbsent(testee, "22:unkno:wn") + testee.internize( + new DomainStringIterators( + parties = List("p1", "p2").iterator, + templateIds = List("22:t:a", "22:t:b", "22:same:name").iterator, + ) + ) shouldBe Vector( + 5 -> "t|22:t:b", + 6 -> "t|22:same:name", + ) + partyPresent(testee, "p1", 1) + partyPresent(testee, "p2", 2) + partyPresent(testee, "22:same:name", 3) + partyAbsent(testee, "unknown") + templatePresent(testee, "22:t:a", 4) + templatePresent(testee, "22:t:b", 5) + templatePresent(testee, "22:same:name", 6) + templateAbsent(testee, "22:unkno:wn") + } + + it should "not update view if last id is behind" in { + val testee = new StringInterningView((from, to) => + _ => { + from shouldBe 0 + to shouldBe 6 + Future.successful( + Vector( + 1 -> "p|p1", + 2 -> "p|p2", + 3 -> "p|22:same:name", + 4 -> "t|22:t:a", + 5 -> "t|22:t:b", + 6 -> "t|22:same:name", + ) + ) + } + ) + partyAbsent(testee, "p1") + partyAbsent(testee, "p2") + partyAbsent(testee, "22:same:name") + templateAbsent(testee, "22:t:a") + templateAbsent(testee, "22:t:b") + templateAbsent(testee, "22:same:name") + testee.update(6).map { _ => + partyPresent(testee, "p1", 1) + partyPresent(testee, "p2", 2) + partyPresent(testee, "22:same:name", 3) + partyAbsent(testee, "unknown") + templatePresent(testee, "22:t:a", 4) + templatePresent(testee, "22:t:b", 5) + templatePresent(testee, "22:same:name", 6) + templateAbsent(testee, "22:unk:nown") + } + } + + it should "be able to update working view correctly" in { + val testee = new StringInterningView((from, to) => + _ => { + from shouldBe 2 + to shouldBe 6 + Future.successful( + Vector( + 3 -> "p|22:same:name", + 4 -> "t|22:t:a", + 5 -> "t|22:t:b", + 6 -> "t|22:same:name", + ) + ) + } + ) + partyAbsent(testee, "p1") + partyAbsent(testee, "p2") + partyAbsent(testee, "22:same:name") + templateAbsent(testee, "22:t:a") + templateAbsent(testee, "22:t:b") + templateAbsent(testee, "22:same:name") + testee.internize( + new DomainStringIterators( + parties = List("p1", "p2").iterator, + templateIds = List().iterator, + ) + ) + partyPresent(testee, "p1", 1) + partyPresent(testee, "p2", 2) + partyAbsent(testee, "22:same:name") + templateAbsent(testee, "22:t:a") + templateAbsent(testee, "22:t:b") + templateAbsent(testee, "22:same:name") + testee.update(6).map { _ => + partyPresent(testee, "p1", 1) + partyPresent(testee, "p2", 2) + partyPresent(testee, "22:same:name", 3) + partyAbsent(testee, "unknown") + templatePresent(testee, "22:t:a", 4) + templatePresent(testee, "22:t:b", 5) + templatePresent(testee, "22:same:name", 6) + templateAbsent(testee, "22:unk:nown") + } + } + + private def partyPresent(view: StringInterning, party: String, id: Int) = { + val typedParty = Ref.Party.assertFromString(party) + view.party.internalize(typedParty) shouldBe id + view.party.tryInternalize(typedParty) shouldBe Some(id) + view.party.externalize(id) shouldBe typedParty + view.party.tryExternalize(id) shouldBe Some(typedParty) + view.party.unsafe.internalize(party) shouldBe id + view.party.unsafe.tryInternalize(party) shouldBe Some(id) + view.party.unsafe.externalize(id) shouldBe party + view.party.unsafe.tryExternalize(id) shouldBe Some(party) + } + + private def partyAbsent(view: StringInterning, party: String) = { + val typedParty = Ref.Party.assertFromString(party) + Try(view.party.internalize(typedParty)).isFailure shouldBe true + view.party.tryInternalize(typedParty) shouldBe None + Try(view.party.unsafe.internalize(party)).isFailure shouldBe true + view.party.unsafe.tryInternalize(party) shouldBe None + } + + private def templatePresent(view: StringInterning, template: String, id: Int) = { + val typedTemplate = Ref.Identifier.assertFromString(template) + view.templateId.internalize(typedTemplate) shouldBe id + view.templateId.tryInternalize(typedTemplate) shouldBe Some(id) + view.templateId.externalize(id) shouldBe typedTemplate + view.templateId.tryExternalize(id) shouldBe Some(typedTemplate) + view.templateId.unsafe.internalize(template) shouldBe id + view.templateId.unsafe.tryInternalize(template) shouldBe Some(id) + view.templateId.unsafe.externalize(id) shouldBe template + view.templateId.unsafe.tryExternalize(id) shouldBe Some(template) + } + + private def templateAbsent(view: StringInterning, template: String) = { + val typedTemplate = Ref.Identifier.assertFromString(template) + Try(view.templateId.internalize(typedTemplate)).isFailure shouldBe true + view.templateId.tryInternalize(typedTemplate) shouldBe None + Try(view.templateId.unsafe.internalize(template)).isFailure shouldBe true + view.templateId.unsafe.tryInternalize(template) shouldBe None + } +}