Skip to content

Commit

Permalink
ledger-api-bench-tool: Fix flaky MetricsCollectorSpec (#11750)
Browse files Browse the repository at this point in the history
* Renamed MetricReporter to MetricFormatter

* Make MetricsCollector communicate via messages only, not by logging

* Logging reports from MetricManger

* Cancel periodic report request when stream is done

* Removed debug sleep

* Fix error when defining reporting period less than one second

* Fixed flaky MetricsCollectorSpec test

CHANGELOG_BEGIN
CHANGELOG_END
  • Loading branch information
kamil-da authored Nov 25, 2021
1 parent cb758e8 commit f69bd68
Show file tree
Hide file tree
Showing 10 changed files with 383 additions and 283 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ package com.daml.ledger.api.benchtool
import com.daml.ledger.api.benchtool.config.WorkflowConfig.StreamConfig
import com.daml.ledger.api.benchtool.metrics.{
MetricRegistryOwner,
MetricsCollector,
MetricsSet,
StreamMetrics,
StreamResult,
}
import com.daml.ledger.api.benchtool.services.LedgerApiServices
import com.daml.ledger.api.benchtool.util.TypedActorSystemResourceOwner
Expand Down Expand Up @@ -110,7 +110,7 @@ object Benchmark {
}
.transform {
case Success(results) =>
if (results.contains(MetricsCollector.Message.MetricsResult.ObjectivesViolated))
if (results.contains(StreamResult.ObjectivesViolated))
Failure(new RuntimeException("Metrics objectives not met."))
else Success(())
case Failure(ex) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,14 @@ class MeteredStreamObserver[T](
val streamName: String,
logger: Logger,
manager: MetricsManager[T],
) extends ObserverWithResult[T, MetricsCollector.Message.MetricsResult](logger) {
) extends ObserverWithResult[T, StreamResult](logger) {

override def onNext(value: T): Unit = {
manager.sendNewValue(value)
super.onNext(value)
}

override def completeWith(): Future[MetricsCollector.Message.MetricsResult] = {
logger.debug(withStreamName(s"Asking for stream result..."))
override def completeWith(): Future[StreamResult] = {
manager.result()
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class MetricRegistryOwner(
registry <- ResourceOwner.forValue(() => new MetricRegistry).acquire()
_ <- acquireSlfjReporter(registry)
metricsReporter <- acquireMetricsReporter(registry)
_ = metricsReporter.start(reportingInterval.toSeconds, TimeUnit.SECONDS)
_ = metricsReporter.start(reportingInterval.toMillis, TimeUnit.MILLISECONDS)
} yield registry

private def acquireSlfjReporter(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,95 +3,82 @@

package com.daml.ledger.api.benchtool.metrics

import akka.actor.typed.scaladsl.{Behaviors, TimerScheduler}
import akka.actor.typed.scaladsl.Behaviors
import akka.actor.typed.{ActorRef, Behavior}
import com.daml.ledger.api.benchtool.util.{MetricReporter, TimeUtil}
import com.daml.ledger.api.benchtool.metrics.objectives.ServiceLevelObjective
import com.daml.ledger.api.benchtool.util.TimeUtil

import java.time.Instant
import scala.concurrent.duration._
import java.time.{Clock, Duration, Instant}

object MetricsCollector {

sealed trait Message
object Message {
sealed trait MetricsResult
object MetricsResult {
final case object Ok extends MetricsResult
final case object ObjectivesViolated extends MetricsResult
}
final case class NewValue[T](value: T) extends Message
final case class PeriodicUpdateCommand() extends Message
final case class StreamCompleted(replyTo: ActorRef[MetricsResult]) extends Message
final case class PeriodicReportRequest(replyTo: ActorRef[Response.PeriodicReport])
extends Message
final case class FinalReportRequest(replyTo: ActorRef[Response.FinalReport]) extends Message
}

sealed trait Response
object Response {
final case class PeriodicReport(values: List[MetricValue]) extends Response
final case class MetricFinalReportData(
name: String,
value: MetricValue,
violatedObjective: Option[(ServiceLevelObjective[_], MetricValue)],
)
final case class FinalReport(totalDuration: Duration, metricsData: List[MetricFinalReportData])
extends Response
}

def apply[T](
streamName: String,
metrics: List[Metric[T]],
logInterval: FiniteDuration,
reporter: MetricReporter,
exposedMetrics: Option[ExposedMetrics[T]] = None,
): Behavior[Message] =
Behaviors.withTimers { timers =>
val startTime: Instant = Instant.now()
new MetricsCollector[T](timers, streamName, logInterval, reporter, startTime, exposedMetrics)
.handlingMessages(metrics, startTime)
}

): Behavior[Message] = {
val clock = Clock.systemUTC()
val startTime: Instant = clock.instant()
new MetricsCollector[T](exposedMetrics, clock).handlingMessages(metrics, startTime, startTime)
}
}

class MetricsCollector[T](
timers: TimerScheduler[MetricsCollector.Message],
streamName: String,
logInterval: FiniteDuration,
reporter: MetricReporter,
startTime: Instant,
exposedMetrics: Option[ExposedMetrics[T]],
) {
class MetricsCollector[T](exposedMetrics: Option[ExposedMetrics[T]], clock: Clock) {
import MetricsCollector._
import MetricsCollector.Message._
import MetricsCollector.Response._

timers.startTimerWithFixedDelay(PeriodicUpdateCommand(), logInterval)

def handlingMessages(metrics: List[Metric[T]], lastPeriodicCheck: Instant): Behavior[Message] = {
Behaviors.receive { case (context, message) =>
def handlingMessages(
metrics: List[Metric[T]],
lastPeriodicCheck: Instant,
startTime: Instant,
): Behavior[Message] = {
Behaviors.receive { case (_, message) =>
message match {
case newValue: NewValue[T] =>
exposedMetrics.foreach(_.onNext(newValue.value))
handlingMessages(metrics.map(_.onNext(newValue.value)), lastPeriodicCheck)
handlingMessages(metrics.map(_.onNext(newValue.value)), lastPeriodicCheck, startTime)

case _: PeriodicUpdateCommand =>
val currentTime = Instant.now()
case request: PeriodicReportRequest =>
val currentTime = clock.instant()
val (newMetrics, values) = metrics
.map(_.periodicValue(TimeUtil.durationBetween(lastPeriodicCheck, currentTime)))
.unzip
context.log.info(namedMessage(reporter.formattedValues(values)))
handlingMessages(newMetrics, currentTime)

case message: StreamCompleted =>
context.log.info(
namedMessage(
reporter.finalReport(
streamName = streamName,
metrics = metrics,
duration = totalDuration,
request.replyTo ! Response.PeriodicReport(values)
handlingMessages(newMetrics, currentTime, startTime)

case request: FinalReportRequest =>
val duration = TimeUtil.durationBetween(startTime, clock.instant())
val data: List[MetricFinalReportData] =
metrics.map { metric =>
MetricFinalReportData(
name = metric.name,
value = metric.finalValue(duration),
violatedObjective = metric.violatedObjective,
)
)
)
message.replyTo ! result(metrics)
}
request.replyTo ! FinalReport(duration, data)
Behaviors.stopped
}
}
}

private def result(metrics: List[Metric[T]]): MetricsResult = {
val atLeastOneObjectiveViolated = metrics.exists(_.violatedObjective.nonEmpty)

if (atLeastOneObjectiveViolated) MetricsResult.ObjectivesViolated
else MetricsResult.Ok
}

private def namedMessage(message: String) = s"[$streamName] $message"

private def totalDuration: java.time.Duration =
TimeUtil.durationBetween(startTime, Instant.now())
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,71 @@

package com.daml.ledger.api.benchtool.metrics

import akka.actor.CoordinatedShutdown
import akka.actor.{Cancellable, CoordinatedShutdown}
import akka.actor.typed.scaladsl.AskPattern._
import akka.actor.typed.{ActorRef, ActorSystem, Props, SpawnProtocol}
import akka.util.Timeout
import com.daml.ledger.api.benchtool.util.MetricReporter
import com.daml.ledger.api.benchtool.util.ReportFormatter
import org.slf4j.LoggerFactory

import scala.concurrent.duration._
import scala.concurrent.{ExecutionContext, Future}

case class MetricsManager[T](collector: ActorRef[MetricsCollector.Message])(implicit
case class MetricsManager[T](
collector: ActorRef[MetricsCollector.Message],
logInterval: FiniteDuration,
streamName: String,
)(implicit
system: ActorSystem[SpawnProtocol.Command]
) {
def sendNewValue(value: T): Unit =
collector ! MetricsCollector.Message.NewValue(value)

def result[Result](): Future[StreamResult] = {
logger.debug(s"Requesting result of stream: $streamName")
periodicRequest.cancel()
implicit val timeout: Timeout = Timeout(3.seconds)
collector
.ask(MetricsCollector.Message.FinalReportRequest)
.map { response: MetricsCollector.Response.FinalReport =>
logger.info(
ReportFormatter.formatFinalReport(
streamName = streamName,
finalReport = response,
)
)
if (response.metricsData.exists(_.violatedObjective.isDefined))
StreamResult.ObjectivesViolated
else
StreamResult.Ok
}(system.executionContext)
}

CoordinatedShutdown(system).addTask(
phase = CoordinatedShutdown.PhaseBeforeServiceUnbind,
taskName = "report-results",
) { () =>
println(s"Coordinated shutdown in progress...")
logger.debug(s"Shutting down infrastructure for stream: $streamName")
result().map(_ => akka.Done)(system.executionContext)
}

def sendNewValue(value: T): Unit =
collector ! MetricsCollector.Message.NewValue(value)
private val periodicRequest: Cancellable =
system.scheduler.scheduleWithFixedDelay(logInterval, logInterval)(() => {
implicit val timeout: Timeout = Timeout(logInterval)
collector
.ask(MetricsCollector.Message.PeriodicReportRequest)
.map { response =>
logger.info(
ReportFormatter.formatPeriodicReport(
streamName = streamName,
periodicReport = response,
)
)
}(system.executionContext)
()
})(system.executionContext)

def result[Result](): Future[MetricsCollector.Message.MetricsResult] = {
implicit val timeout: Timeout = Timeout(3.seconds)
collector.ask(MetricsCollector.Message.StreamCompleted)
}
private val logger = LoggerFactory.getLogger(getClass)
}

object MetricsManager {
Expand All @@ -47,10 +85,7 @@ object MetricsManager {
val collectorActor: Future[ActorRef[MetricsCollector.Message]] = system.ask(
SpawnProtocol.Spawn(
behavior = MetricsCollector(
streamName = streamName,
metrics = metrics,
logInterval = logInterval,
reporter = MetricReporter.Default,
exposedMetrics = exposedMetrics,
),
name = s"${streamName}-collector",
Expand All @@ -59,6 +94,12 @@ object MetricsManager {
)
)

collectorActor.map(MetricsManager[StreamElem](_))
collectorActor.map(collector =>
MetricsManager[StreamElem](
collector = collector,
logInterval = logInterval,
streamName = streamName,
)
)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Copyright (c) 2021 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.api.benchtool.metrics

sealed trait StreamResult extends Product with Serializable

object StreamResult {
final case object Ok extends StreamResult
final case object ObjectivesViolated extends StreamResult
}
Loading

0 comments on commit f69bd68

Please sign in to comment.