Skip to content
This repository has been archived by the owner on Nov 14, 2024. It is now read-only.

[Timelock Corruption]: Final Wiring part 1 #5071

Merged
merged 7 commits into from
Oct 23, 2020
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
package com.palantir.atlasdb.timelock.paxos;

import com.google.common.base.Suppliers;
import com.google.common.collect.ImmutableList;
import com.palantir.atlasdb.util.MetricsManager;
import com.palantir.common.proxy.PredicateSwitchedProxy;
import com.palantir.conjure.java.api.config.service.UserAgent;
Expand All @@ -38,6 +37,9 @@
import com.palantir.timelock.corruption.detection.CorruptionHealthCheck;
import com.palantir.timelock.corruption.detection.LocalCorruptionDetector;
import com.palantir.timelock.corruption.detection.RemoteCorruptionDetector;
import com.palantir.timelock.history.LocalHistoryLoader;
import com.palantir.timelock.history.PaxosLogHistoryProvider;
import com.palantir.timelock.history.sqlite.SqlitePaxosStateLogHistory;
import com.palantir.timelock.paxos.PaxosRemotingUtils;
import com.palantir.timelock.paxos.TimeLockDialogueServiceProvider;
import com.palantir.timestamp.ManagedTimestampService;
Expand All @@ -50,6 +52,7 @@
import java.util.UUID;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import javax.sql.DataSource;
import org.immutables.value.Value;

public final class PaxosResourcesFactory {
Expand Down Expand Up @@ -109,7 +112,7 @@ private static PaxosResources configureLeaderForEachClient(
.leadershipContextFactory(factory)
.putLeadershipBatchComponents(PaxosUseCase.LEADER_FOR_EACH_CLIENT, factory.components())
.addAdhocResources(new BatchPingableLeaderResource(install.nodeUuid(), factory.components()))
.timeLockCorruptionComponents(timeLockCorruptionComponents(remoteClients))
.timeLockCorruptionComponents(timeLockCorruptionComponents(install.sqliteDataSource(), remoteClients))
.build();
}

Expand Down Expand Up @@ -155,7 +158,7 @@ private static PaxosResources configureLeaderForAllClients(
factory.components().acceptor(PaxosUseCase.PSEUDO_LEADERSHIP_CLIENT)),
new LeaderLearnerResource(factory.components().learner(PaxosUseCase.PSEUDO_LEADERSHIP_CLIENT)),
factory.components().pingableLeader(PaxosUseCase.PSEUDO_LEADERSHIP_CLIENT))
.timeLockCorruptionComponents(timeLockCorruptionComponents(remoteClients))
.timeLockCorruptionComponents(timeLockCorruptionComponents(install.sqliteDataSource(), remoteClients))
.build();
}

Expand Down Expand Up @@ -241,17 +244,26 @@ private static ImmutablePaxosResources.Builder setupTimestampResources(
.timestampServiceFactory(timestampFactory);
}

private static TimeLockCorruptionComponents timeLockCorruptionComponents(PaxosRemoteClients remoteClients) {
private static TimeLockCorruptionComponents timeLockCorruptionComponents(
DataSource dataSource, PaxosRemoteClients remoteClients) {
RemoteCorruptionDetector remoteCorruptionDetector = new RemoteCorruptionDetector();

CorruptionHealthCheck healthCheck = new CorruptionHealthCheck(ImmutableList.of(
LocalCorruptionDetector.create(remoteClients.getRemoteCorruptionNotifiers()),
remoteCorruptionDetector));
PaxosLogHistoryProvider historyProvider =
new PaxosLogHistoryProvider(dataSource, remoteClients.getRemoteHistoryProviders());

LocalCorruptionDetector localCorruptionDetector =
LocalCorruptionDetector.create(historyProvider, remoteClients.getRemoteCorruptionNotifiers());

CorruptionHealthCheck healthCheck =
new CorruptionHealthCheck(localCorruptionDetector, remoteCorruptionDetector);

LocalHistoryLoader localHistoryLoader =
LocalHistoryLoader.create(SqlitePaxosStateLogHistory.create(dataSource));

return TimeLockCorruptionComponents.builder()
.timeLockCorruptionHealthCheck(healthCheck)
.remoteCorruptionDetector(remoteCorruptionDetector)
.remoteHistoryProviders(remoteClients.getRemoteHistoryProviders())
.localHistoryLoader(localHistoryLoader)
.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@

import com.palantir.timelock.corruption.detection.CorruptionHealthCheck;
import com.palantir.timelock.corruption.detection.RemoteCorruptionDetector;
import com.palantir.timelock.history.TimeLockPaxosHistoryProvider;
import java.util.List;
import com.palantir.timelock.history.LocalHistoryLoader;
import org.immutables.value.Value;

@Value.Immutable
Expand All @@ -28,7 +27,7 @@ public interface TimeLockCorruptionComponents {

RemoteCorruptionDetector remoteCorruptionDetector();

List<TimeLockPaxosHistoryProvider> remoteHistoryProviders();
LocalHistoryLoader localHistoryLoader();

static ImmutableTimeLockCorruptionComponents.Builder builder() {
return ImmutableTimeLockCorruptionComponents.builder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,11 @@
import com.palantir.timelock.config.TimeLockInstallConfiguration;
import com.palantir.timelock.config.TimeLockRuntimeConfiguration;
import com.palantir.timelock.config.TsBoundPersisterConfiguration;
import com.palantir.timelock.corruption.detection.CorruptionHealthReport;
import com.palantir.timelock.corruption.handle.CorruptionNotifierResource;
import com.palantir.timelock.corruption.handle.JerseyCorruptionFilter;
import com.palantir.timelock.corruption.handle.UndertowCorruptionHandlerService;
import com.palantir.timelock.history.LocalHistoryLoader;
import com.palantir.timelock.history.remote.TimeLockPaxosHistoryProviderResource;
import com.palantir.timelock.history.sqlite.SqlitePaxosStateLogHistory;
import com.palantir.timelock.invariants.NoSimultaneousServiceCheck;
import com.palantir.timelock.invariants.TimeLockActivityCheckerFactory;
import com.palantir.timelock.management.ImmutableTimestampStorage;
Expand Down Expand Up @@ -261,8 +260,6 @@ private void createAndRegisterResources() {
namespace -> namespaces.get(namespace).getTimelockService();
Function<String, LockService> lockServiceGetter =
namespace -> namespaces.get(namespace).getLockService();
LocalHistoryLoader localHistoryLoader =
LocalHistoryLoader.create(SqlitePaxosStateLogHistory.create(sqliteDataSource));

if (undertowRegistrar.isPresent()) {
Consumer<UndertowService> presentUndertowRegistrar = undertowRegistrar.get();
Expand All @@ -276,12 +273,13 @@ private void createAndRegisterResources() {
presentUndertowRegistrar,
ConjureLockV1Resource.undertow(redirectRetryTargeter(), lockServiceGetter));
registerCorruptionHandlerWrappedService(
presentUndertowRegistrar, TimeLockPaxosHistoryProviderResource.undertow(localHistoryLoader));
presentUndertowRegistrar,
TimeLockPaxosHistoryProviderResource.undertow(corruptionComponents.localHistoryLoader()));
} else {
registrar.accept(ConjureTimelockResource.jersey(redirectRetryTargeter(), asyncTimelockServiceGetter));
registrar.accept(ConjureLockWatchingResource.jersey(redirectRetryTargeter(), asyncTimelockServiceGetter));
registrar.accept(ConjureLockV1Resource.jersey(redirectRetryTargeter(), lockServiceGetter));
registrar.accept(TimeLockPaxosHistoryProviderResource.jersey(localHistoryLoader));
registrar.accept(TimeLockPaxosHistoryProviderResource.jersey(corruptionComponents.localHistoryLoader()));
}
}

Expand Down Expand Up @@ -436,6 +434,10 @@ public LeaderElectionHealthReport timeLockLeadershipHealthCheck() {
.leaderElectionRateHealthReport();
}

public CorruptionHealthReport timeLockCorruptionHealthCheck() {
return corruptionComponents.timeLockCorruptionHealthCheck().localCorruptionReport();
}

public void shutdown() {
paxosResources.leadershipComponents().shutdown();
sqliteDataSource.close();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ public enum CorruptionCheckViolation {
ACCEPTED_VALUE_GREATER_THAN_LEARNED(true, false);

private final boolean shouldRaiseErrorAlert;
private final boolean shouldShootTimeLock;
private final boolean shouldRejectRequests;

CorruptionCheckViolation(boolean shouldRaiseErrorAlert, boolean shouldShootTimeLock) {
CorruptionCheckViolation(boolean shouldRaiseErrorAlert, boolean shouldRejectRequests) {
this.shouldRaiseErrorAlert = shouldRaiseErrorAlert;
this.shouldShootTimeLock = shouldShootTimeLock;
this.shouldRejectRequests = shouldRejectRequests;
}

public boolean shootTimeLock() {
return shouldShootTimeLock;
public boolean shouldRejectRequests() {
return shouldRejectRequests;
}

public boolean raiseErrorAlert() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@
package com.palantir.timelock.corruption.detection;

public interface CorruptionDetector {
boolean hasDetectedCorruption();
boolean shouldRejectRequests();
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,21 @@

package com.palantir.timelock.corruption.detection;

import java.util.List;

public class CorruptionHealthCheck {
private final List<CorruptionDetector> corruptionDetectors;
private final LocalCorruptionDetector localCorruptionDetector;
private final RemoteCorruptionDetector remoteCorruptionDetector;

public CorruptionHealthCheck(
LocalCorruptionDetector localCorruptionDetector, RemoteCorruptionDetector remoteCorruptionDetector) {
this.localCorruptionDetector = localCorruptionDetector;
this.remoteCorruptionDetector = remoteCorruptionDetector;
}

public CorruptionHealthCheck(List<CorruptionDetector> corruptionDetectors) {
this.corruptionDetectors = corruptionDetectors;
public boolean shouldRejectRequests() {
return localCorruptionDetector.shouldRejectRequests() || remoteCorruptionDetector.shouldRejectRequests();
}

public boolean isHealthy() {
return corruptionDetectors.stream().noneMatch(CorruptionDetector::hasDetectedCorruption);
public CorruptionHealthReport localCorruptionReport() {
return localCorruptionDetector.corruptionHealthReport();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* (c) Copyright 2020 Palantir Technologies Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.palantir.timelock.corruption.detection;

import com.google.common.collect.ImmutableSetMultimap;
import com.google.common.collect.SetMultimap;
import com.palantir.paxos.NamespaceAndUseCase;
import org.immutables.value.Value;

@Value.Immutable
public interface CorruptionHealthReport {
@Value.Parameter
SetMultimap<CorruptionCheckViolation, NamespaceAndUseCase> violatingStatusesToNamespaceAndUseCase();

static ImmutableCorruptionHealthReport.Builder builder() {
return ImmutableCorruptionHealthReport.builder();
}

static CorruptionHealthReport defaultHealthyReport() {
return CorruptionHealthReport.builder()
.violatingStatusesToNamespaceAndUseCase(ImmutableSetMultimap.of())
.build();
}

default boolean shouldRejectRequests() {
return violatingStatusesToNamespaceAndUseCase().keySet().stream()
.anyMatch(CorruptionCheckViolation::shouldRejectRequests);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public enum CorruptionStatus {
this.shouldShootTimeLock = shouldShootTimeLock;
}

public boolean shootTimeLock() {
public boolean shouldRejectRequests() {
return shouldShootTimeLock;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,34 +17,73 @@
package com.palantir.timelock.corruption.detection;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import com.google.common.collect.SetMultimap;
import com.palantir.atlasdb.encoding.PtBytes;
import com.palantir.common.streams.KeyedStream;
import com.palantir.logsafe.Preconditions;
import com.palantir.paxos.ImmutableNamespaceAndUseCase;
import com.palantir.paxos.NamespaceAndUseCase;
import com.palantir.paxos.PaxosValue;
import com.palantir.timelock.history.PaxosAcceptorData;
import com.palantir.timelock.history.models.CompletePaxosHistoryForNamespaceAndUseCase;
import com.palantir.timelock.history.models.ConsolidatedLearnerAndAcceptorRecord;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public final class HistoryAnalyzer {
private HistoryAnalyzer() {
// do not create instance of this class
}

public static CorruptionHealthReport corruptionHealthReportForHistory(
List<CompletePaxosHistoryForNamespaceAndUseCase> history) {

Map<NamespaceAndUseCase, CorruptionCheckViolation> namespaceAndUseCaseCorruptionCheckViolationMap =
history.stream()
.collect(Collectors.toMap(
HistoryAnalyzer::extractNamespaceAndUseCase,
HistoryAnalyzer::corruptionCheckViolationLevelForNamespaceAndUseCase));

SetMultimap<CorruptionCheckViolation, NamespaceAndUseCase> namespacesExhibitingViolations = KeyedStream.stream(
namespaceAndUseCaseCorruptionCheckViolationMap)
.mapEntries((k, v) -> Maps.immutableEntry(v, k))
.filterKeys(CorruptionCheckViolation::raiseErrorAlert)
.collectToSetMultimap();

return ImmutableCorruptionHealthReport.builder()
.violatingStatusesToNamespaceAndUseCase(namespacesExhibitingViolations)
.build();
}

private static NamespaceAndUseCase extractNamespaceAndUseCase(
CompletePaxosHistoryForNamespaceAndUseCase historyForNamespaceAndUseCase) {
return ImmutableNamespaceAndUseCase.builder()
.namespace(historyForNamespaceAndUseCase.namespace())
.useCase(historyForNamespaceAndUseCase.useCase())
.build();
}

@VisibleForTesting
static List<CorruptionCheckViolation> violatedCorruptionChecksForNamespaceAndUseCase(
static CorruptionCheckViolation corruptionCheckViolationLevelForNamespaceAndUseCase(
CompletePaxosHistoryForNamespaceAndUseCase history) {
return Stream.of(
divergedLearners(history),
learnedValueWithoutQuorum(history),
greatestAcceptedValueNotLearned(history))
.filter(CorruptionCheckViolation::raiseErrorAlert)
.collect(Collectors.toList());
List<Function<CompletePaxosHistoryForNamespaceAndUseCase, CorruptionCheckViolation>> violationChecks =
ImmutableList.of(
HistoryAnalyzer::divergedLearners,
HistoryAnalyzer::learnedValueWithoutQuorum,
HistoryAnalyzer::greatestAcceptedValueNotLearned);
return violationChecks.stream()
.map(check -> check.apply(history))
.filter(check -> check.raiseErrorAlert())
.findFirst()
.orElse(CorruptionCheckViolation.NONE);
}

@VisibleForTesting
Expand Down
Loading