Skip to content
This repository has been archived by the owner on Nov 14, 2024. It is now read-only.

Commit

Permalink
[TimeLock] Bump Maximum Clients & Saturation Metrics (#3413)
Browse files Browse the repository at this point in the history
* bump default + add metrics

* metric tests

* Fix tags, and speculatively put the number in

* Don't use stringjoiner for 2 components
  • Loading branch information
jeremyk-91 authored Jul 25, 2018
1 parent 1d159bc commit 4788b98
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 4 deletions.
12 changes: 12 additions & 0 deletions docs/source/release_notes/release-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,18 @@ develop
- Changed the range scan behavior for the sweep priority table so that reads scan less data in Cassandra.
(`Pull Request <https://github.com/palantir/atlasdb/pull/3410>`__)

* - |improved|
- TimeLock by default now has a client limit of 500.
Previously, this used to be 100 - however we have run into issues internally where stacks legitimately reach this threshold.
Note that we still need to maintain the client limit to avoid a possible DOS attack with users creating arbitrarily many clients.
(`Pull Request <https://github.com/palantir/atlasdb/pull/3413>`__)

* - |new| |metrics|
- Added metrics for the number of active clients and maximum number of clients in TimeLock Server.
These are useful to identify stacks that may be in danger of breaching their maxima.
(`Pull Request <https://github.com/palantir/atlasdb/pull/3413>`__)


=======
v0.97.0
=======
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public PaxosRuntimeConfiguration paxos() {
@JsonProperty("max-number-of-clients")
@Value.Default
public Integer maxNumberOfClients() {
return 100;
return 500;
}

/**
Expand All @@ -46,6 +46,8 @@ public long slowLockLogTriggerMillis() {

@Value.Check
public void check() {
Preconditions.checkState(maxNumberOfClients() >= 0,
"Maximum number of clients must be nonnegative, but found %s", maxNumberOfClients());
Preconditions.checkState(slowLockLogTriggerMillis() >= 0,
"Slow lock log trigger threshold must be nonnegative, but found %s", slowLockLogTriggerMillis());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,9 @@ private void createAndRegisterResources() {

// Finally, register the health check, and endpoints associated with the clients.
healthCheckSupplier = leadershipCreator.getHealthCheck();
resource = new TimeLockResource(this::createInvalidatingTimeLockServices,
resource = TimeLockResource.create(
metricsManager,
this::createInvalidatingTimeLockServices,
JavaSuppliers.compose(TimeLockRuntimeConfiguration::maxNumberOfClients, runtime));
registrar.accept(resource);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import com.palantir.atlasdb.timelock.paxos.PaxosTimeLockConstants;
import com.palantir.atlasdb.util.MetricsManager;
import com.palantir.lock.LockService;
import com.palantir.logsafe.Safe;
import com.palantir.logsafe.SafeArg;
Expand All @@ -39,17 +40,30 @@
public class TimeLockResource {
private final Logger log = LoggerFactory.getLogger(TimeLockResource.class);

@VisibleForTesting
static final String ACTIVE_CLIENTS = "activeClients";
@VisibleForTesting
static final String MAX_CLIENTS = "maxClients";

private final Function<String, TimeLockServices> clientServicesFactory;
private final ConcurrentMap<String, TimeLockServices> servicesByNamespace = Maps.newConcurrentMap();
private final Supplier<Integer> maxNumberOfClients;

public TimeLockResource(
private TimeLockResource(
Function<String, TimeLockServices> clientServicesFactory,
Supplier<Integer> maxNumberOfClients) {
this.clientServicesFactory = clientServicesFactory;
this.maxNumberOfClients = maxNumberOfClients;
}

public static TimeLockResource create(MetricsManager metricsManager,
Function<String, TimeLockServices> clientServicesFactory,
Supplier<Integer> maxNumberOfClients) {
TimeLockResource resource = new TimeLockResource(clientServicesFactory, maxNumberOfClients);
registerClientCapacityMetrics(resource, metricsManager);
return resource;
}

@Path("/lock")
public LockService getLockService(@Safe @PathParam("namespace") String namespace) {
return getOrCreateServices(namespace).getLockService();
Expand Down Expand Up @@ -97,4 +111,9 @@ private TimeLockServices createNewClient(String namespace) {

return clientServicesFactory.apply(namespace);
}

private static void registerClientCapacityMetrics(TimeLockResource resource, MetricsManager metricsManager) {
metricsManager.registerMetric(TimeLockResource.class, ACTIVE_CLIENTS, resource::numberOfClients);
metricsManager.registerMetric(TimeLockResource.class, MAX_CLIENTS, resource.maxNumberOfClients::get);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,18 @@
import static org.mockito.Mockito.verifyNoMoreInteractions;
import static org.mockito.Mockito.when;

import java.util.Optional;
import java.util.UUID;
import java.util.function.Function;
import java.util.function.Supplier;

import org.junit.Before;
import org.junit.Test;

import com.codahale.metrics.MetricRegistry;
import com.palantir.atlasdb.util.MetricsManager;
import com.palantir.tritium.metrics.registry.DefaultTaggedMetricRegistry;

public class TimeLockResourceTest {
private static final String CLIENT_A = "a-client";
private static final String CLIENT_B = "b-client";
Expand All @@ -42,7 +47,12 @@ public class TimeLockResourceTest {

private final Function<String, TimeLockServices> serviceFactory = mock(Function.class);
private final Supplier<Integer> maxNumberOfClientsSupplier = mock(Supplier.class);
private final TimeLockResource resource = new TimeLockResource(
private final MetricsManager metricsManager = new MetricsManager(
new MetricRegistry(),
DefaultTaggedMetricRegistry.getDefault(),
unused -> false);
private final TimeLockResource resource = TimeLockResource.create(
metricsManager,
serviceFactory,
maxNumberOfClientsSupplier);

Expand Down Expand Up @@ -103,6 +113,38 @@ public void canDynamicallyIncreaseMaxAllowedClients() {
resource.getTimeService(uniqueClient());
}

@Test
public void numberOfActiveClientsUpdatesAsNewClientsCreated() {
assertNumberOfActiveClientsIs(0);
assertMaxClientsIs(DEFAULT_MAX_NUMBER_OF_CLIENTS);

resource.getOrCreateServices(uniqueClient());

assertNumberOfActiveClientsIs(1);
assertMaxClientsIs(DEFAULT_MAX_NUMBER_OF_CLIENTS);

resource.getOrCreateServices(uniqueClient());

assertNumberOfActiveClientsIs(2);
assertMaxClientsIs(DEFAULT_MAX_NUMBER_OF_CLIENTS);
}

@Test
public void maxNumberOfClientsRespondsToChanges() {
assertNumberOfActiveClientsIs(0);
assertMaxClientsIs(DEFAULT_MAX_NUMBER_OF_CLIENTS);

when(maxNumberOfClientsSupplier.get()).thenReturn(1);

assertNumberOfActiveClientsIs(0);
assertMaxClientsIs(1);

when(maxNumberOfClientsSupplier.get()).thenReturn(77);

assertNumberOfActiveClientsIs(0);
assertMaxClientsIs(77);
}

private void createMaximumNumberOfClients() {
for (int i = 0; i < DEFAULT_MAX_NUMBER_OF_CLIENTS; i++) {
resource.getTimeService(uniqueClient());
Expand All @@ -113,4 +155,22 @@ private String uniqueClient() {
return UUID.randomUUID().toString();
}

private void assertNumberOfActiveClientsIs(int expected) {
assertThat(getGaugeValueForTimeLockResource(TimeLockResource.ACTIVE_CLIENTS))
.isEqualTo(expected);
}

private void assertMaxClientsIs(int expected) {
assertThat(getGaugeValueForTimeLockResource(TimeLockResource.MAX_CLIENTS))
.isEqualTo(expected);
}

private int getGaugeValueForTimeLockResource(String gaugeName) {
Object value = Optional.ofNullable(metricsManager.getRegistry()
.getGauges()
.get(TimeLockResource.class.getCanonicalName() + "." + gaugeName)
.getValue())
.orElseThrow(() -> new IllegalStateException("Gauge with gauge name " + gaugeName + " did not exist."));
return (int) value;
}
}

0 comments on commit 4788b98

Please sign in to comment.