Skip to content
This repository has been archived by the owner on Nov 14, 2024. It is now read-only.

ABR12: AllNodesDisabledNamespacesUpdater #5876

Merged
merged 47 commits into from
Feb 7, 2022
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
7d4fa82
Revert "Remove AllNodesDNU from this PR"
gsheasby Jan 21, 2022
a5b0457
use TimelockNamespaces
gsheasby Jan 21, 2022
45fb392
handle failure cases
gsheasby Jan 21, 2022
bff95a5
mock reEnable
gsheasby Jan 21, 2022
6c22ef7
another test
gsheasby Jan 21, 2022
1054319
and another test
gsheasby Jan 21, 2022
913e714
refactor
gsheasby Jan 21, 2022
1f22605
errorprone
gsheasby Jan 21, 2022
9b7ee14
rollback if re-enable fails
gsheasby Jan 21, 2022
b79a40f
re-enable case B
gsheasby Jan 21, 2022
2392ac1
re-enable case C
gsheasby Jan 21, 2022
dbea5f6
refactor
gsheasby Jan 24, 2022
ff52ba8
encapsulate visitor logic
gsheasby Jan 24, 2022
684ab27
consistently vs inconsistently disabled
gsheasby Jan 24, 2022
abb2e01
re-enabled response granularity
gsheasby Jan 24, 2022
4c7b7c0
extract methods from disable
gsheasby Jan 25, 2022
873e8c7
reuse code
gsheasby Jan 25, 2022
eb59331
robust failure handling
gsheasby Jan 25, 2022
a7716fa
I did this!
gsheasby Jan 25, 2022
5e62e7a
split disable/reenable resps (WIP)
gsheasby Feb 1, 2022
cf4a6bf
single SingleNodeUpdateResponse
gsheasby Feb 1, 2022
2705b80
only update locally if successful elsewhere
gsheasby Feb 1, 2022
33d94a6
fix tests
gsheasby Feb 1, 2022
957f0af
test when lock IDs are different on different nodes
gsheasby Feb 1, 2022
e04a30c
deduplicate
gsheasby Feb 1, 2022
a27943d
more reorg
gsheasby Feb 1, 2022
e2a9f09
shift+tab
gsheasby Feb 1, 2022
24d4f7f
pull out logs
gsheasby Feb 1, 2022
083fef6
Wrapped->Successful PaxosResponse
gsheasby Feb 2, 2022
3459a75
docs
gsheasby Feb 2, 2022
24a5b73
minor refactors
gsheasby Feb 2, 2022
71a931f
make SingleNodeUpdateResponse a PaxosResponse
gsheasby Feb 2, 2022
df20196
remove SuccessfulPaxosResponse
gsheasby Feb 2, 2022
abe2a18
more nits
gsheasby Feb 2, 2022
79e6015
unlock non-conflicting namespaces during re-enable
gsheasby Feb 3, 2022
1389ae7
simplify reenable workflow
gsheasby Feb 3, 2022
f12319e
javadoc
gsheasby Feb 3, 2022
e9ec17e
style
gsheasby Feb 3, 2022
5d4b03c
wait for all responses even if we get a failure
gsheasby Feb 3, 2022
a461714
don't roll back on nodes where we got a failure response
gsheasby Feb 3, 2022
2ee419b
handle unreachable nodes
gsheasby Feb 3, 2022
5c45c2a
JsonValue
gsheasby Feb 3, 2022
6cbf150
fix json
gsheasby Feb 3, 2022
daecef1
fixes
gsheasby Feb 3, 2022
e7a8f16
one last test
gsheasby Feb 3, 2022
db3f65c
final refactors
gsheasby Feb 7, 2022
a160cac
log outside txn
gsheasby Feb 7, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion leader-election-api/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ dependencies {
implementation 'com.palantir.safe-logging:safe-logging'
implementation project(':commons-annotations')


annotationProcessor group: 'org.immutables', name: 'value'
compileOnly 'org.immutables:value::annotations'
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,25 @@ private PaxosQuorumChecker() {
// Private constructor. Disallow instantiation.
}

public static <SERVICE, RESPONSE extends PaxosResponse>
PaxosResponsesWithRemote<SERVICE, RESPONSE> collectAllResponses(
ImmutableList<SERVICE> remotes,
Function<SERVICE, RESPONSE> request,
Map<? extends SERVICE, CheckedRejectionExecutorService> executors,
Duration remoteRequestTimeout,
boolean cancelRemainingCalls) {
Preconditions.checkState(
executors.keySet().equals(new HashSet<>(remotes)), "Each remote should have an executor.");
gsheasby marked this conversation as resolved.
Show resolved Hide resolved
return collectResponses(
remotes,
request,
remotes.size(), // wait until all responses have been received
remoteRequestTimeout,
_unused -> false, // never abort early
cancelRemainingCalls,
MultiplexingCompletionService.createFromCheckedExecutors(executors));
}

/**
* Collects a list of responses from a quorum of remote services.
* This method short-circuits if a quorum can no longer be obtained (if too many servers have sent nacks), and
Expand Down
11 changes: 11 additions & 0 deletions timelock-api/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,21 @@ conjure {
}
}

dependencies {
implementation project(':leader-election-api')

compileOnly 'com.fasterxml.jackson.core:jackson-databind'
compileOnly 'org.immutables:value::annotations'
annotationProcessor group: 'org.immutables', name: 'value'
}

subprojects {
apply from: "../../gradle/shared.gradle"
dependencies {
compile project(':lock-api-objects')
compile project(':timelock-api')
implementation project(':leader-election-api')

compile 'com.palantir.conjure.java:conjure-lib'

implementation 'com.fasterxml.jackson.core:jackson-annotations'
Expand Down
6 changes: 4 additions & 2 deletions timelock-api/src/main/conjure/timelock-api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ types:
base-type: any
external:
java: com.palantir.lock.v2.LeaderTime
Namespace:
base-type: string
external:
java: com.palantir.atlasdb.timelock.api.Namespace
NanoTime:
base-type: safelong
external:
Expand Down Expand Up @@ -114,8 +118,6 @@ types:
LockWatchRequest:
fields:
references: set<LockWatchReference>
Namespace:
alias: string
LeaderTimes:
fields:
leaderTimes: map<Namespace, LeaderTime>
Expand Down
35 changes: 20 additions & 15 deletions timelock-api/src/main/conjure/timelock-management-api.yml
Original file line number Diff line number Diff line change
@@ -1,28 +1,30 @@
types:
conjure-imports:
api: timelock-api.yml
imports:
SingleNodeUpdateResponse:
external:
java: com.palantir.atlasdb.timelock.api.SingleNodeUpdateResponse
definitions:
default-package: com.palantir.atlasdb.timelock.api
objects:
DisableNamespacesRequest:
fields:
namespaces: set<api.Namespace>
lockId: uuid
SingleNodeUpdateResponse:
fields:
wasSuccessful: boolean
# other namespaces will not have been disabled/re-enabled (the transaction will not complete)
lockedNamespaces: map<api.Namespace, uuid>
SuccessfulDisableNamespacesResponse:
fields:
lockId: uuid
UnsuccessfulDisableNamespacesResponse:
fields:
# we can assume another restore is in progress for this namespace
consistentlyDisabledNamespaces: set<api.Namespace>
# either another restore is in progress for this namespace (and we've hit a race condition), or
# the namespace is stuck and needs to be manually fixed.
partiallyDisabledNamespaces: set<api.Namespace>
consistentlyDisabledNamespaces:
type: set<api.Namespace>
docs: Namespaces where we can assume another restore is in progress
partiallyDisabledNamespaces:
type: set<api.Namespace>
docs: |
Either another restore is in progress for this namespace (and we've hit a race condition), or
the namespace is stuck and needs to be manually fixed.
DisableNamespacesResponse:
union:
successful: SuccessfulDisableNamespacesResponse
Expand All @@ -35,11 +37,14 @@ types:
alias: boolean
UnsuccessfulReenableNamespacesResponse:
fields:
# we can assume another restore is in progress for this namespace (we lost our lock)
consistentlyLockedNamespaces: set<api.Namespace>
# either another restore is in progress for this namespace (and we lost our lock), or
# the namespace is stuck and needs to be manually fixed.
partiallyLockedNamespaces: set<api.Namespace>
consistentlyLockedNamespaces:
type: set<api.Namespace>
docs: We can assume another restore is in progress for this namespace (we lost our lock)
partiallyLockedNamespaces:
type: set<api.Namespace>
docs: |
Either another restore is in progress for this namespace (and we lost our lock), or
the namespace is stuck and needs to be manually fixed.
ReenableNamespacesResponse:
union:
successful: SuccessfulReenableNamespacesResponse
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* (c) Copyright 2022 Palantir Technologies Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.palantir.atlasdb.timelock.api;

import com.fasterxml.jackson.annotation.JsonValue;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.fasterxml.jackson.databind.annotation.JsonSerialize;
import org.immutables.value.Value;

@JsonDeserialize(as = ImmutableNamespace.class)
@JsonSerialize(as = ImmutableNamespace.class)
@Value.Immutable
public interface Namespace {
@JsonValue
String value();

// For back-compatibility with conjure-generated Namespace
default String get() {
return value();
}

static Namespace valueOf(String value) {
return of(value);
}

static Namespace of(String value) {
return ImmutableNamespace.builder().value(value).build();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* (c) Copyright 2022 Palantir Technologies Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.palantir.atlasdb.timelock.api;

import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.fasterxml.jackson.databind.annotation.JsonSerialize;
import com.palantir.paxos.PaxosResponse;
import java.util.Map;
import java.util.UUID;
import org.immutables.value.Value;

@JsonDeserialize(as = ImmutableSingleNodeUpdateResponse.class)
@JsonSerialize(as = ImmutableSingleNodeUpdateResponse.class)
@Value.Immutable
public interface SingleNodeUpdateResponse extends PaxosResponse {
/**
* other namespaces will not have been disabled/re-enabled (the transaction will not complete).
*/
Map<Namespace, UUID> lockedNamespaces();

static SingleNodeUpdateResponse successful() {
return ImmutableSingleNodeUpdateResponse.builder().isSuccessful(true).build();
}

static SingleNodeUpdateResponse failed(Map<Namespace, UUID> lockedNamespaces) {
return ImmutableSingleNodeUpdateResponse.builder()
.isSuccessful(false)
.lockedNamespaces(lockedNamespaces)
.build();
}
}
1 change: 1 addition & 0 deletions timelock-impl/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ dependencies {
compile project(":lock-conjure-api:lock-conjure-api-undertow")
compile project(":lock-conjure-api:lock-conjure-api-jersey")
compile project(":lock-impl")
compile project(':timelock-api')
compile project(':timelock-api:timelock-api-undertow')
compile project(':timelock-api:timelock-api-jersey')
compile project(":leader-election-impl")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,17 +129,13 @@ public void invalidateResourcesForClient(String namespace) {
}
}

public Map<Namespace, UUID> getIncorrectlyLockedNamespaces(Set<Namespace> namespaces, UUID expectedLockId) {
log.info(
"Reading namespace state locally",
SafeArg.of("namespaces", namespaces),
SafeArg.of("expectedLockId", expectedLockId));
return disabledNamespaces.getIncorrectlyLockedNamespaces(namespaces, expectedLockId);
public Map<Namespace, UUID> getNamespacesLockedWithDifferentLockId(Set<Namespace> namespaces, UUID expectedLockId) {
return disabledNamespaces.getNamespacesLockedWithDifferentLockId(namespaces, expectedLockId);
}

public SingleNodeUpdateResponse disable(DisableNamespacesRequest request) {
gsheasby marked this conversation as resolved.
Show resolved Hide resolved
SingleNodeUpdateResponse response = disabledNamespaces.disable(request);
if (response.getWasSuccessful()) {
if (response.isSuccessful()) {
request.getNamespaces().stream().map(Namespace::get).forEach(this::invalidateResourcesForClient);
} else {
log.info(
Expand Down
Loading