Skip to content
This repository has been archived by the owner on Nov 14, 2024. It is now read-only.

Commit

Permalink
[Timelock Partitioning] Part 32: Client aware event recorders (#4263)
Browse files Browse the repository at this point in the history
* PaxosLeadershipEventRecorder is namespaced via the metric registry and logging.

* remove low value test since we don't have access to tagged metrics anymore

* Add generated changelog entries

* safeArgs -> contextArgs
  • Loading branch information
felixdesouza authored Sep 26, 2019
1 parent a127f80 commit 5edd417
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,10 @@ public static LocalPaxosServices createInstrumentedLocalServices(

AsyncLeadershipObserver leadershipObserver = AsyncLeadershipObserver.create();
PaxosLeadershipEventRecorder leadershipEventRecorder = PaxosLeadershipEventRecorder.create(
metricsManager.getRegistry(), leaderUuid.toString(), leadershipObserver);
metricsManager.getTaggedRegistry(),
leaderUuid.toString(),
leadershipObserver,
ImmutableList.of());

PaxosAcceptor ourAcceptor = AtlasDbMetrics.instrument(metricsManager.getRegistry(),
PaxosAcceptor.class,
Expand Down
6 changes: 6 additions & 0 deletions changelog/@unreleased/pr-4263.v2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
type: break
break:
description: '`PaxosLeadershipEventRecorder` now takes in a `TaggedMetricRegistry`
instead of a `MetricRegistry`. The names of the metrics remain the same.'
links:
- https://github.com/palantir/atlasdb/pull/4263
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,18 @@
*/
package com.palantir.leader;

import java.util.List;

import org.apache.commons.lang3.ArrayUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.codahale.metrics.Meter;
import com.codahale.metrics.MetricRegistry;
import com.palantir.logsafe.SafeArg;
import com.palantir.paxos.PaxosRoundFailureException;
import com.palantir.paxos.PaxosValue;
import com.palantir.tritium.metrics.registry.MetricName;
import com.palantir.tritium.metrics.registry.TaggedMetricRegistry;

import net.jcip.annotations.ThreadSafe;

Expand All @@ -40,61 +44,75 @@ class LeadershipEvents {
private final Meter leaderPingFailure;
private final Meter leaderPingTimeout;
private final Meter leaderPingReturnedFalse;
private final Object[] contextArgs;

LeadershipEvents(MetricRegistry metrics) {
gainedLeadership = metrics.meter("leadership.gained");
lostLeadership = metrics.meter("leadership.lost");
noQuorum = metrics.meter("leadership.no-quorum");
proposedLeadership = metrics.meter("leadership.proposed");
proposalFailure = metrics.meter("leadership.proposed.failure");
leaderPingFailure = metrics.meter("leadership.ping-leader.failure");
leaderPingTimeout = metrics.meter("leadership.ping-leader.timeout");
leaderPingReturnedFalse = metrics.meter("leadership.ping-leader.returned-false");
LeadershipEvents(TaggedMetricRegistry metrics, List<SafeArg<Object>> contextArgs) {
gainedLeadership = metrics.meter(withName("leadership.gained"));
lostLeadership = metrics.meter(withName("leadership.lost"));
noQuorum = metrics.meter(withName("leadership.no-quorum"));
proposedLeadership = metrics.meter(withName("leadership.proposed"));
proposalFailure = metrics.meter(withName("leadership.proposed.failure"));
leaderPingFailure = metrics.meter(withName("leadership.ping-leader.failure"));
leaderPingTimeout = metrics.meter(withName("leadership.ping-leader.timeout"));
leaderPingReturnedFalse = metrics.meter(withName("leadership.ping-leader.returned-false"));
this.contextArgs = contextArgs.toArray(new Object[0]);
}

void proposedLeadershipFor(long round) {
leaderLog.info("Proposing leadership for {}", SafeArg.of("round", round));
leaderLog.info("Proposing leadership for {}", withContextArgs(SafeArg.of("round", round)));
proposedLeadership.mark();
}

void gainedLeadershipFor(PaxosValue value) {
leaderLog.info("Gained leadership for {}", SafeArg.of("value", value));
leaderLog.info("Gained leadership for {}", withContextArgs(SafeArg.of("value", value)));
gainedLeadership.mark();
}

void lostLeadershipFor(PaxosValue value) {
leaderLog.info("Lost leadership for {}", SafeArg.of("value", value));
leaderLog.info("Lost leadership for {}", withContextArgs(SafeArg.of("value", value)));
lostLeadership.mark();
}

void noQuorum(PaxosValue value) {
leaderLog.warn("The most recent known information says this server is the leader,"
+ " but there is no quorum right now. The paxos value is {}",
SafeArg.of("value", value));
withContextArgs(SafeArg.of("value", value)));
noQuorum.mark();
}

void leaderPingFailure(Throwable error) {
leaderLog.warn("Failed to ping the current leader", error);
leaderLog.warn("Failed to ping the current leader", withContextArgs(error));
leaderPingFailure.mark();
}

void leaderPingTimeout() {
leaderLog.warn("Timed out while attempting to ping the current leader");
leaderLog.warn("Timed out while attempting to ping the current leader", contextArgs);
leaderPingTimeout.mark();
}

void leaderPingReturnedFalse() {
leaderLog.info("We contacted the suspected leader, but it reported that it was no longer leading");
leaderLog.info("We contacted the suspected leader, but it reported that it was no longer leading", contextArgs);
leaderPingReturnedFalse.mark();
}

void proposalFailure(PaxosRoundFailureException paxosException) {
leaderLog.warn("Leadership was not gained.\n"
+ "We should recover automatically. If this recurs often, try to \n"
+ " (1) ensure that most other nodes are reachable over the network, and \n"
+ " (2) increase the randomWaitBeforeProposingLeadershipMs timeout in your configuration.",
paxosException);
+ "We should recover automatically. If this recurs often, try to \n"
+ " (1) ensure that most other nodes are reachable over the network, and \n"
+ " (2) increase the randomWaitBeforeProposingLeadershipMs timeout in your configuration.",
withContextArgs(paxosException));
proposalFailure.mark();
}

private Object[] withContextArgs(Object arg) {
if (contextArgs.length == 0) {
return new Object[] { arg };
} else {
return ArrayUtils.add(contextArgs, arg);
}
}

private static MetricName withName(String name) {
return MetricName.builder().safeName(name).build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,17 @@
*/
package com.palantir.leader;

import java.util.List;
import java.util.Optional;

import javax.annotation.concurrent.GuardedBy;

import com.codahale.metrics.MetricRegistry;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.palantir.logsafe.SafeArg;
import com.palantir.paxos.PaxosRoundFailureException;
import com.palantir.paxos.PaxosValue;
import com.palantir.tritium.metrics.registry.TaggedMetricRegistry;

public class PaxosLeadershipEventRecorder implements PaxosKnowledgeEventRecorder, PaxosLeaderElectionEventRecorder {

Expand All @@ -33,14 +36,17 @@ public class PaxosLeadershipEventRecorder implements PaxosKnowledgeEventRecorder
@GuardedBy("this") private PaxosValue currentRound = null;
@GuardedBy("this") private boolean isLeading = false;

public static PaxosLeadershipEventRecorder create(MetricRegistry metrics, String leaderUuid) {
return create(metrics, leaderUuid, null);
public static PaxosLeadershipEventRecorder create(TaggedMetricRegistry metrics, String leaderUuid) {
return create(metrics, leaderUuid, null, ImmutableList.of());
}

public static PaxosLeadershipEventRecorder create(MetricRegistry metrics,
String leaderUuid, LeadershipObserver observer) {
public static PaxosLeadershipEventRecorder create(
TaggedMetricRegistry metrics,
String leaderUuid,
LeadershipObserver observer,
List<SafeArg<Object>> safeArgs) {
return new PaxosLeadershipEventRecorder(
new LeadershipEvents(metrics),
new LeadershipEvents(metrics, safeArgs),
leaderUuid,
Optional.ofNullable(observer));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ private int lockAndUnlockAndCountExceptions(List<LockService> lockServices, int
for (int i = 0; i < numRequestsPerClient; i++) {
int currentTrial = i;
futures.add(executorService.submit(() ->
lockService.lock(CLIENT_2 + String.valueOf(currentTrial), REQUEST_LOCK_WITH_LONG_TIMEOUT))
lockService.lock(CLIENT_2 + currentTrial, REQUEST_LOCK_WITH_LONG_TIMEOUT))
);
}
}
Expand Down Expand Up @@ -412,20 +412,6 @@ public void throwsOnFastForwardWithIncorrectParameter() throws IOException {
assertThat(response.code()).isEqualTo(HttpStatus.BAD_REQUEST_400);
}

@Test
public void leadershipEventsSmokeTest() throws IOException {
MetricsOutput metrics = getMetricsOutput();

metrics.assertContainsMeter("leadership.gained");
metrics.assertContainsMeter("leadership.lost");
metrics.assertContainsMeter("leadership.proposed");
metrics.assertContainsMeter("leadership.no-quorum");
metrics.assertContainsMeter("leadership.proposed.failure");

assertThat(metrics.getMeter("leadership.gained").get("count").intValue()).isEqualTo(1);
assertThat(metrics.getMeter("leadership.proposed").get("count").intValue()).isEqualTo(1);
}

@Test
// TODO(nziebart): test remote service instrumentation - we need a multi-node server config for this
public void instrumentationSmokeTest() throws IOException {
Expand Down

0 comments on commit 5edd417

Please sign in to comment.