-
Notifications
You must be signed in to change notification settings - Fork 24.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Track cancellable tasks by parent ID #76186
Changes from all commits
e76da24
359aafc
bb7e1c3
507aa98
8f65a06
9aedfa7
6921c0a
76ed889
b63d6c5
03cf0a9
184d410
059c004
9554f12
16fa401
0d98b3e
e0ea7a1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
|
||
package org.elasticsearch.tasks; | ||
|
||
import org.elasticsearch.common.util.concurrent.ConcurrentCollections; | ||
|
||
import java.util.Arrays; | ||
import java.util.Collection; | ||
import java.util.HashSet; | ||
import java.util.Map; | ||
import java.util.Set; | ||
import java.util.concurrent.ConcurrentHashMap; | ||
import java.util.stream.Stream; | ||
|
||
/** | ||
* Tracks items that are associated with cancellable tasks, supporting efficient lookup by task ID and by parent task ID | ||
*/ | ||
public class CancellableTasksTracker<T> { | ||
|
||
private final T[] empty; | ||
|
||
public CancellableTasksTracker(T[] empty) { | ||
assert empty.length == 0; | ||
this.empty = empty; | ||
} | ||
|
||
private final Map<Long, T> byTaskId = ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency(); | ||
private final Map<TaskId, T[]> byParentTaskId = ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we extend We could consider doing it in an assertion too but doing it concurrently while running might be difficult? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes I don't think we can express any true invariants very easily; I added an eventually-true assertion in 9554f12.
arteam marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
/** | ||
* Add an item for the given task. Should only be called once for each task, and {@code item} must be unique per task too. | ||
*/ | ||
public void put(Task task, T item) { | ||
final long taskId = task.getId(); | ||
if (task.getParentTaskId().isSet()) { | ||
byParentTaskId.compute(task.getParentTaskId(), (ignored, oldValue) -> { | ||
if (oldValue == null) { | ||
oldValue = empty; | ||
} | ||
final T[] newValue = Arrays.copyOf(oldValue, oldValue.length + 1); | ||
newValue[oldValue.length] = item; | ||
return newValue; | ||
}); | ||
} | ||
final T oldItem = byTaskId.put(taskId, item); | ||
assert oldItem == null : "duplicate entry for task [" + taskId + "]"; | ||
} | ||
|
||
/** | ||
* Get the item that corresponds with the given task, or {@code null} if there is no such item. | ||
*/ | ||
public T get(long id) { | ||
return byTaskId.get(id); | ||
} | ||
|
||
/** | ||
* Remove (and return) the item that corresponds with the given task. Return {@code null} if not present. Safe to call multiple times | ||
* for each task. However, {@link #getByParent} may return this task even after a call to this method completes, if the removal is | ||
* actually being completed by a concurrent call that's still ongoing. | ||
*/ | ||
public T remove(Task task) { | ||
final long taskId = task.getId(); | ||
final T oldItem = byTaskId.remove(taskId); | ||
if (oldItem != null && task.getParentTaskId().isSet()) { | ||
byParentTaskId.compute(task.getParentTaskId(), (ignored, oldValue) -> { | ||
if (oldValue == null) { | ||
return null; | ||
} | ||
if (oldValue.length == 1) { | ||
if (oldValue[0] == oldItem) { | ||
return null; | ||
} else { | ||
return oldValue; | ||
} | ||
} | ||
if (oldValue[0] == oldItem) { | ||
return Arrays.copyOfRange(oldValue, 1, oldValue.length); | ||
} | ||
for (int i = 1; i < oldValue.length; i++) { | ||
if (oldValue[i] == oldItem) { | ||
final T[] newValue = Arrays.copyOf(oldValue, oldValue.length - 1); | ||
System.arraycopy(oldValue, i + 1, newValue, i, oldValue.length - i - 1); | ||
return newValue; | ||
} | ||
} | ||
return oldValue; | ||
}); | ||
} | ||
return oldItem; | ||
} | ||
|
||
/** | ||
* Return a collection of all the tracked items. May be large. In the presence of concurrent calls to {@link #put} and {@link #remove} | ||
* it behaves similarly to {@link ConcurrentHashMap#values()}. | ||
*/ | ||
public Collection<T> values() { | ||
return byTaskId.values(); | ||
} | ||
|
||
/** | ||
* Return a collection of all the tracked items with a given parent, which will include at least every item for which {@link #put} | ||
* completed, but {@link #remove} hasn't started. May include some additional items for which all the calls to {@link #remove} that | ||
* started before this method was called have not completed. | ||
*/ | ||
public Stream<T> getByParent(TaskId parentTaskId) { | ||
final T[] byParent = byParentTaskId.get(parentTaskId); | ||
if (byParent == null) { | ||
return Stream.empty(); | ||
} | ||
return Arrays.stream(byParent); | ||
} | ||
|
||
// assertion for tests, not an invariant but should eventually be true | ||
boolean assertConsistent() { | ||
// mustn't leak any items tracked by parent | ||
assert byTaskId.isEmpty() == false || byParentTaskId.isEmpty(); | ||
|
||
// every by-parent value must be tracked by task too; the converse isn't true since we don't track values without a parent | ||
final Set<T> byTaskValues = new HashSet<>(byTaskId.values()); | ||
for (T[] byParent : byParentTaskId.values()) { | ||
assert byParent.length > 0; | ||
for (T t : byParent) { | ||
assert byTaskValues.contains(t); | ||
} | ||
} | ||
|
||
return true; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,178 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
|
||
package org.elasticsearch.tasks; | ||
|
||
import org.elasticsearch.test.ESTestCase; | ||
|
||
import java.util.Collections; | ||
import java.util.HashSet; | ||
import java.util.Set; | ||
import java.util.concurrent.BrokenBarrierException; | ||
import java.util.concurrent.CyclicBarrier; | ||
import java.util.concurrent.TimeUnit; | ||
import java.util.concurrent.TimeoutException; | ||
import java.util.concurrent.atomic.AtomicInteger; | ||
import java.util.stream.Collectors; | ||
|
||
import static org.hamcrest.Matchers.empty; | ||
import static org.hamcrest.Matchers.hasItem; | ||
import static org.hamcrest.Matchers.lessThanOrEqualTo; | ||
import static org.hamcrest.Matchers.not; | ||
|
||
public class CancellableTasksTrackerTests extends ESTestCase { | ||
|
||
private static class TestTask { | ||
private final Thread actionThread; | ||
private final Thread watchThread; | ||
private final Thread concurrentRemoveThread; | ||
|
||
// 0 == before put, 1 == during put, 2 == after put, before remove, 3 == during remove, 4 == after remove | ||
private final AtomicInteger state = new AtomicInteger(); | ||
private final boolean concurrentRemove = randomBoolean(); | ||
|
||
TestTask(Task task, String item, CancellableTasksTracker<String> tracker, Runnable awaitStart) { | ||
if (concurrentRemove) { | ||
concurrentRemoveThread = new Thread(() -> { | ||
awaitStart.run(); | ||
|
||
for (int i = 0; i < 10; i++) { | ||
if (3 <= state.get()) { | ||
final String removed = tracker.remove(task); | ||
if (removed != null) { | ||
assertSame(item, removed); | ||
} | ||
} | ||
} | ||
}); | ||
} else { | ||
concurrentRemoveThread = new Thread(awaitStart); | ||
} | ||
|
||
actionThread = new Thread(() -> { | ||
awaitStart.run(); | ||
|
||
state.incrementAndGet(); | ||
tracker.put(task, item); | ||
state.incrementAndGet(); | ||
|
||
Thread.yield(); | ||
|
||
state.incrementAndGet(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not entirely sure we would ever see state==2 in other threads, I would think it would be ok for the JVM to optimize it away. This is possibly even true for the entire There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, an earlier iteration had some assertions in between these calls. In theory you're right that it would be legitimate for the compiler to collapse these tests to something fairly trivial, but in practice they do cover many interleavings and the other threads do indeed observe |
||
final String removed = tracker.remove(task); | ||
state.incrementAndGet(); | ||
if (concurrentRemove == false || removed != null) { | ||
assertSame(item, removed); | ||
} | ||
|
||
assertNull(tracker.remove(task)); | ||
}, "action-thread-" + item); | ||
|
||
watchThread = new Thread(() -> { | ||
awaitStart.run(); | ||
|
||
for (int i = 0; i < 10; i++) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this not always run at least until state.get() ==4 (and then another round)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found that this sometimes noticeably added to the test runtime, we spent ages doing this loop before actually doing the useful work. Again, in practice this covers what we care about. |
||
final int stateBefore = state.get(); | ||
final String getResult = tracker.get(task.getId()); | ||
final Set<String> getByParentResult = tracker.getByParent(task.getParentTaskId()).collect(Collectors.toSet()); | ||
final Set<String> values = new HashSet<>(tracker.values()); | ||
final int stateAfter = state.get(); | ||
|
||
assertThat(stateBefore, lessThanOrEqualTo(stateAfter)); | ||
|
||
if (getResult != null && task.getParentTaskId().isSet() && tracker.get(task.getId()) != null) { | ||
assertThat(getByParentResult, hasItem(item)); | ||
} | ||
|
||
if (stateAfter == 0) { | ||
assertNull(getResult); | ||
assertThat(getByParentResult, not(hasItem(item))); | ||
assertThat(values, not(hasItem(item))); | ||
} | ||
|
||
if (stateBefore == 2 && stateAfter == 2) { | ||
assertSame(item, getResult); | ||
if (task.getParentTaskId().isSet()) { | ||
assertThat(getByParentResult, hasItem(item)); | ||
} else { | ||
assertThat(getByParentResult, empty()); | ||
} | ||
assertThat(values, hasItem(item)); | ||
} | ||
|
||
if (stateBefore == 4) { | ||
assertNull(getResult); | ||
if (concurrentRemove == false) { | ||
assertThat(getByParentResult, not(hasItem(item))); | ||
} // else our remove might have completed but the concurrent one hasn't updated the parent ID map yet | ||
assertThat(values, not(hasItem(item))); | ||
} | ||
} | ||
}, "watch-thread-" + item); | ||
} | ||
|
||
void start() { | ||
watchThread.start(); | ||
concurrentRemoveThread.start(); | ||
actionThread.start(); | ||
} | ||
|
||
void join() throws InterruptedException { | ||
actionThread.join(); | ||
concurrentRemoveThread.join(); | ||
watchThread.join(); | ||
} | ||
} | ||
|
||
public void testCancellableTasksTracker() throws InterruptedException { | ||
|
||
final TaskId[] parentTaskIds | ||
= randomArray(10, 10, TaskId[]::new, () -> new TaskId(randomAlphaOfLength(5), randomNonNegativeLong())); | ||
|
||
final CancellableTasksTracker<String> tracker = new CancellableTasksTracker<>(new String[0]); | ||
final TestTask[] tasks = new TestTask[between(1, 100)]; | ||
|
||
final Runnable awaitStart = new Runnable() { | ||
private final CyclicBarrier startBarrier = new CyclicBarrier(tasks.length * 3); | ||
|
||
@Override | ||
public void run() { | ||
try { | ||
startBarrier.await(10, TimeUnit.SECONDS); | ||
} catch (InterruptedException | BrokenBarrierException | TimeoutException e) { | ||
throw new AssertionError("unexpected", e); | ||
} | ||
} | ||
}; | ||
|
||
for (int i = 0; i < tasks.length; i++) { | ||
tasks[i] = new TestTask( | ||
new Task( | ||
randomNonNegativeLong(), | ||
randomAlphaOfLength(5), | ||
randomAlphaOfLength(5), | ||
randomAlphaOfLength(5), | ||
rarely() ? TaskId.EMPTY_TASK_ID : randomFrom(parentTaskIds), | ||
Collections.emptyMap()), | ||
"item-" + i, | ||
tracker, | ||
awaitStart | ||
); | ||
} | ||
|
||
for (TestTask task : tasks) { | ||
task.start(); | ||
} | ||
|
||
for (TestTask task : tasks) { | ||
task.join(); | ||
} | ||
|
||
tracker.assertConsistent(); | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should also check the more trivial case where no parent id is set. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe I misunderstand, I think we already do?
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can
empty
be a constant? It doesn't seems to modified anywhere and it's used inArrays.copyOf
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Again I didn't find a way to do so, but you're welcome to show me how :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why does this have to be a generic,
T == CancellableTaskHolder
is the only use case we have isn't it?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah in production, I didn't want to make
CancellableTaskHolder
public just for the tests and I don't think it makes a performance difference. I could be persuaded.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nah there isn't any difference I think lets leave it as is then IMO :)