Skip to content

Commit

Permalink
Add string-interning view primitives [DPP-702] (#11475)
Browse files Browse the repository at this point in the history
changelog_begin
changelog_end
  • Loading branch information
nmarton-da authored Nov 3, 2021
1 parent a917520 commit d006ad0
Show file tree
Hide file tree
Showing 4 changed files with 456 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Copyright (c) 2021 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.store.interning

private[interning] case class RawStringInterning(
map: Map[String, Int],
idMap: Map[Int, String],
lastId: Int,
)

private[interning] object RawStringInterning {

def from(
entries: Iterable[(Int, String)],
rawStringInterning: RawStringInterning = RawStringInterning(Map.empty, Map.empty, 0),
): RawStringInterning =
if (entries.isEmpty) rawStringInterning
else
RawStringInterning(
map = rawStringInterning.map ++ entries.view.map(_.swap),
idMap = rawStringInterning.idMap ++ entries,
lastId = entries.view.map(_._1).max,
)

def newEntries(
strings: Iterator[String],
rawStringInterning: RawStringInterning,
): Vector[(Int, String)] =
strings
.filterNot(rawStringInterning.map.contains)
.toVector
.distinct // TODO Iterators do not have .distinct in Scala 2.12
.view
.zipWithIndex
.map { case (string, index) =>
(index + 1 + rawStringInterning.lastId, string)
}
.toVector
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Copyright (c) 2021 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.store.interning

import com.daml.lf.data.Ref

/** The facade for all supported string-interning domains
*/
trait StringInterning {
def templateId: StringInterningDomain[Ref.Identifier]
def party: StringInterningDomain[Ref.Party]
}

/** Composes a StringInterningAccessor for the domain-string type and an unsafe StringInterningAccessor for raw strings
*
* @tparam T is the type of the string-related domain object which is interned
*/
trait StringInterningDomain[T] extends StringInterningAccessor[T] {
def unsafe: StringInterningAccessor[String]
}

object StringInterningDomain {
private[interning] def prefixing[T](
prefix: String,
prefixedAccessor: StringInterningAccessor[String],
to: String => T,
from: T => String,
): StringInterningDomain[T] =
new StringInterningDomain[T] {
override val unsafe: StringInterningAccessor[String] = new StringInterningAccessor[String] {
override def internalize(t: String): Int = prefixedAccessor.internalize(prefix + t)

override def tryInternalize(t: String): Option[Int] =
prefixedAccessor.tryInternalize(prefix + t)

override def externalize(id: Int): String =
prefixedAccessor.externalize(id).substring(prefix.length)

override def tryExternalize(id: Int): Option[String] =
prefixedAccessor.tryExternalize(id).map(_.substring(prefix.length))
}

override def internalize(t: T): Int = unsafe.internalize(from(t))

override def tryInternalize(t: T): Option[Int] = unsafe.tryInternalize(from(t))

override def externalize(id: Int): T = to(unsafe.externalize(id))

override def tryExternalize(id: Int): Option[T] = unsafe.tryExternalize(id).map(to)
}
}

/** The main interface for using string-interning.
* Client code can use this to map between interned id-s and string-domain objects back and forth
*
* @tparam T is the type of the string-related domain object which is interned
*/
trait StringInterningAccessor[T] {

/** Get the interned id
*
* @param t the value
* @return the integer id, throws exception if id not found
*/
def internalize(t: T): Int

/** Optionally get the interned id
* @param t the value
* @return some integer id, or none if not found
*/
def tryInternalize(t: T): Option[Int]

/** Get the value for an id
*
* @param id integer id
* @return the value, throws exception if no value found
*/
def externalize(id: Int): T

/** Optionally get the value for an id
*
* @param id integer id
* @return some value, or none if not found
*/
def tryExternalize(id: Int): Option[T]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// Copyright (c) 2021 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.store.interning

import com.daml.dec.DirectExecutionContext
import com.daml.lf.data.Ref
import com.daml.logging.LoggingContext

import scala.concurrent.Future

class DomainStringIterators(
val parties: Iterator[String],
val templateIds: Iterator[String],
)

trait InternizingStringInterningView {

/** Internize strings of different domains. The new entries are returend as prefixed entries for persistent storage.
*
* @param domainStringIterators iterators of the new entires
* @return If some of the entries were not part of the view: they will be added, and these will be returned as a interned-id and raw, prefixed string pairs.
*/
def internize(domainStringIterators: DomainStringIterators): Iterable[(Int, String)]
}

trait UpdatingStringInterningView {

/** Update the StringInterningView from persistence
*
* @param lastStringInterningId this is the "version" of the persistent view, which from the StringInterningView can see if it is behind
* @return a completion Future: if the view is behind it will load the missing entries from persistence, and update the view state
*/
def update(lastStringInterningId: Int)(implicit loggingContext: LoggingContext): Future[Unit]
}

/** Encapsulate the dependency to load a range of string-interning-entries from persistence
*/
trait LoadStringInterningEntries {
def apply(
fromExclusive: Int,
toInclusive: Int,
): LoggingContext => Future[Iterable[(Int, String)]]
}

/** This uses the prefixed raw representation internally similar to the persistence layer.
* Concurrent view usage is optimized for reading:
* - The single, volatile reference enables non-synchronized access from all threads, accessing persistent-immutable datastructure
* - On the writing side it synchronizes (this usage is anyway expected) and maintains the immutable internal datastructure
*/
class StringInterningView(loadPrefixedEntries: LoadStringInterningEntries)
extends StringInterning
with InternizingStringInterningView
with UpdatingStringInterningView {
@volatile private var raw: RawStringInterning = RawStringInterning.from(Nil)

private def rawAccessor: StringInterningAccessor[String] = new StringInterningAccessor[String] {
override def internalize(t: String): Int = raw.map(t)
override def tryInternalize(t: String): Option[Int] = raw.map.get(t)
override def externalize(id: Int): String = raw.idMap(id)
override def tryExternalize(id: Int): Option[String] = raw.idMap.get(id)
}

private val TemplatePrefix = "t|"
private val PartyPrefix = "p|"

override val templateId: StringInterningDomain[Ref.Identifier] =
StringInterningDomain.prefixing(
prefix = TemplatePrefix,
prefixedAccessor = rawAccessor,
to = Ref.Identifier.assertFromString,
from = _.toString,
)

override val party: StringInterningDomain[Ref.Party] =
StringInterningDomain.prefixing(
prefix = PartyPrefix,
prefixedAccessor = rawAccessor,
to = Ref.Party.assertFromString,
from = _.toString,
)

override def internize(domainStringIterators: DomainStringIterators): Iterable[(Int, String)] =
synchronized {
val allPrefixedStrings =
domainStringIterators.parties.map(PartyPrefix + _) ++
domainStringIterators.templateIds.map(TemplatePrefix + _)
val newEntries = RawStringInterning.newEntries(
strings = allPrefixedStrings,
rawStringInterning = raw,
)
updateView(newEntries)
newEntries
}

override def update(
lastStringInterningId: Int
)(implicit loggingContext: LoggingContext): Future[Unit] =
if (lastStringInterningId <= raw.lastId) {
Future.unit
} else {
loadPrefixedEntries(raw.lastId, lastStringInterningId)(loggingContext)
.map(updateView)(DirectExecutionContext)
}

private def updateView(newEntries: Iterable[(Int, String)]): Unit = synchronized {
if (newEntries.nonEmpty) {
raw = RawStringInterning.from(
entries = newEntries,
rawStringInterning = raw,
)
}
}
}
Loading

0 comments on commit d006ad0

Please sign in to comment.