Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(tracing): Ensure clean MDC context at top coroutine scopes #1845

Merged
merged 4 commits into from
Mar 4, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package com.netflix.spinnaker.keel.actuation

import com.netflix.spinnaker.keel.activation.ApplicationDown
import com.netflix.spinnaker.keel.activation.ApplicationUp
import com.netflix.spinnaker.keel.logging.TracingSupport.Companion.clearMDC
import com.netflix.spinnaker.keel.persistence.AgentLockRepository
import com.netflix.spinnaker.keel.persistence.KeelRepository
import com.netflix.spinnaker.keel.telemetry.AgentInvocationComplete
Expand All @@ -19,6 +20,7 @@ import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.TimeoutCancellationException
import kotlinx.coroutines.launch
import kotlinx.coroutines.runBlocking
import kotlinx.coroutines.slf4j.MDCContext
import kotlinx.coroutines.supervisorScope
import kotlinx.coroutines.withTimeout
import org.slf4j.LoggerFactory
Expand Down Expand Up @@ -75,7 +77,7 @@ class CheckScheduler(
fun checkResources() {
if (enabled.get()) {
val startTime = clock.instant()
val job = launch {
val job = launch(clearMDC) {
supervisorScope {
runCatching {
repository
Expand Down Expand Up @@ -114,7 +116,7 @@ class CheckScheduler(
if (enabled.get()) {
publisher.publishEvent(ScheduledEnvironmentCheckStarting)

val job = launch {
val job = launch(clearMDC) {
supervisorScope {
repository
.deliveryConfigsDueForCheck(checkMinAge, resourceCheckBatchSize)
Expand Down Expand Up @@ -149,7 +151,7 @@ class CheckScheduler(
if (enabled.get()) {
val startTime = clock.instant()
publisher.publishEvent(ScheduledArtifactCheckStarting)
val job = launch {
val job = launch(clearMDC) {
supervisorScope {
repository.artifactsDueForCheck(checkMinAge, resourceCheckBatchSize)
.forEach { artifact ->
Expand Down Expand Up @@ -179,7 +181,7 @@ class CheckScheduler(
val startTime = clock.instant()
publisher.publishEvent(ScheduledEnvironmentVerificationStarting)

val job = launch {
val job = launch(clearMDC) {
supervisorScope {
repository
.nextEnvironmentsForVerification(environmentVerificationMinAge, environmentVerificationBatchSize)
Expand Down Expand Up @@ -213,7 +215,7 @@ class CheckScheduler(
val agentName: String = it.javaClass.simpleName
val lockAcquired = agentLockRepository.tryAcquireLock(agentName, it.lockTimeoutSeconds)
if (lockAcquired) {
runBlocking {
runBlocking(clearMDC) {
it.invokeAgent()
}
publisher.publishEvent(AgentInvocationComplete(Duration.between(startTime, clock.instant()), agentName))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ class TracingSupport {
companion object {
const val X_SPINNAKER_RESOURCE_ID = "X-SPINNAKER-RESOURCE-ID"

val clearMDC: MDCContext = MDCContext(emptyMap())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should be emptyMDC - when I read it as a parameter I thought it was a function that cleared the mdc context.

Actually, I think any wording for this - "empty", "clean", "clear" would have given me the same thought. Maybe blankMDC? Change if you think that is more helpful or clear, if you don't think it adds anything feel free to ignore.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renamed to blankMDC.


suspend fun <T : ResourceSpec, R> withTracingContext(
resource: Resource<T>,
block: suspend CoroutineScope.() -> R
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ class VerificationRunner(
}

if (statuses.anyStillRunning) {
log.debug("Verification already running for {}", environment.name)
log.debug("Verification already running for environment {} of application {}", environment.name, deliveryConfig.application)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ooo thank you thank you

return
}

statuses.firstOutstanding?.let { verification ->
start(verification, imageFinder.getImages(context.deliveryConfig, context.environmentName))
} ?: log.debug("Verification complete for {}", environment.name)
} ?: log.debug("Verification complete for environment {} of application {}", environment.name, deliveryConfig.application)
}
}

Expand All @@ -56,7 +56,8 @@ class VerificationRunner(
evaluators.evaluate(this, verification, state.metadata)
.also { newStatus ->
if (newStatus.complete) {
log.debug("Verification {} completed with status {} for {}", verification, newStatus, environment.name)
log.debug("Verification {} completed with status {} for environment {} of application {}",
verification, newStatus, environment.name, deliveryConfig.application)
markAs(verification, newStatus)
eventPublisher.publishEvent(VerificationCompleted(this, verification, newStatus, state.metadata))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import com.netflix.spinnaker.keel.api.plugins.supporting
import com.netflix.spinnaker.keel.clouddriver.CloudDriverCache
import com.netflix.spinnaker.keel.core.api.SubmittedResource
import com.netflix.spinnaker.keel.core.parseMoniker
import com.netflix.spinnaker.keel.logging.TracingSupport.Companion.clearMDC
import com.netflix.spinnaker.keel.logging.TracingSupport.Companion.withTracingContext
import com.netflix.spinnaker.keel.yaml.APPLICATION_YAML_VALUE
import kotlinx.coroutines.runBlocking
Expand Down Expand Up @@ -74,7 +75,7 @@ class ExportController(
val handler = handlers.supporting(kind)
val exportable = generateExportable(cloudProvider, type, account, user, name)

return runBlocking {
return runBlocking(clearMDC) {
withTracingContext(exportable) {
log.info("Exporting resource ${exportable.toResourceId()}")
SubmittedResource(
Expand All @@ -99,7 +100,7 @@ class ExportController(
val handler = handlers.supporting(kind)
val exportable = generateExportable(cloudProvider, "cluster", account, user, name)

return runBlocking {
return runBlocking(clearMDC) {
withTracingContext(exportable) {
log.info("Exporting artifact from cluster ${exportable.toResourceId()}")
handler.exportArtifact(exportable)
Expand Down