Skip to content

Commit

Permalink
deflake and speedup ExecutorStepTest tests
Browse files Browse the repository at this point in the history
calling setLabels on an agent will not persist the node.

in older versions of Jenkins the tests would be less flaky as adding any
Node would cause all labels to be re-evaluated, so when creating a few
agents and adding labels in a loop the last one created would at least
deterministically ensure that all previous agents labels where correct.

However since 2.332 (jenkinsci/jenkins#5882)
only labels part of a node added or removed would be updated, and when
creating the agents they where created without labels, which where added
later.

This caused tests to be flaky depending on when the periodic
`trimLabels` was called (or at least on other timing related things)

THis was discovered by enabling a loggerrule for hudson.model.queue and
observing that the builds would timeout as not all the agents would have
the expected nodes.

e.g.

```
  12.023 [id=141]       FINEST  hudson.model.Queue#maintain: JobOffer[ jenkinsci#1] rejected part of demo jenkinsci#1: ?Jenkins? doesn?t have label ?foo?
  12.023 [id=141]       FINEST  hudson.model.Queue#maintain: JobOffer[slave0 #0] is a potential candidate for task part of demo jenkinsci#1
  12.024 [id=141]       FINEST  hudson.model.Queue#maintain: JobOffer[slave2 #0] rejected part of demo jenkinsci#1: ?slave2? doesn?t have label ?foo?
  12.024 [id=141]       FINEST  hudson.model.Queue#maintain: JobOffer[slave1 #0] rejected part of demo jenkinsci#1: ?slave1? doesn?t have label ?foo?
  12.024 [id=141]       FINEST  hudson.model.Queue#maintain: JobOffer[ #0] rejected part of demo jenkinsci#1: ?Jenkins? doesn?t have label ?foo?
  12.024 [id=141]       FINEST  hudson.model.Queue#maintain: JobOffer[slave3 #0] rejected part of demo jenkinsci#1: ?slave3? doesn?t have label ?foo?
```

from `reuseNodesWithSameLabelsInParallelStages`

Additionally creating agents and waiting for them to come oneline is
slow.  A pipeline will start and then wait for the node to be available,
so we can do other things whilst the agent is connecting.

For the case where we need a number of agents connected before we start
to run the pipeline, we now create iall the agents before waiting for them to connect.
  • Loading branch information
jtnord committed May 11, 2022
1 parent 493c2cb commit 5f726ea
Showing 1 changed file with 23 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import com.gargoylesoftware.htmlunit.Page;
import com.google.common.base.Predicate;
import edu.umd.cs.findbugs.annotations.Nullable;
import hudson.EnvVars;
import hudson.FilePath;
import hudson.Functions;
import hudson.model.Computer;
Expand Down Expand Up @@ -139,10 +140,8 @@ public class ExecutorStepTest {
*/
@Test public void buildShellScriptOnSlave() throws Throwable {
sessions.then(r -> {
DumbSlave s = r.createOnlineSlave();
s.setLabelString("remote quick");
DumbSlave s = r.createSlave("remote quick", null);
s.getNodeProperties().add(new EnvironmentVariablesNodeProperty(new EnvironmentVariablesNodeProperty.Entry("ONSLAVE", "true")));

WorkflowJob p = r.createProject(WorkflowJob.class, "demo");
p.setDefinition(new CpsFlowDefinition(
"node('" + s.getNodeName() + "') {\n" +
Expand Down Expand Up @@ -183,7 +182,7 @@ public class ExecutorStepTest {
*/
@Test public void buildShellScriptWithPersistentProcesses() throws Throwable {
sessions.then(r -> {
DumbSlave s = r.createOnlineSlave();
DumbSlave s = r.createSlave();
Path f1 = r.jenkins.getRootDir().toPath().resolve("test.txt");
String fullPathToTestFile = f1.toAbsolutePath().toString();
// Escape any \ in the source so that the script is valid
Expand Down Expand Up @@ -408,7 +407,7 @@ private static void assertWorkspaceLocked(Computer computer, String workspacePat

@Test public void buildShellScriptQuick() throws Throwable {
sessions.then(r -> {
DumbSlave s = r.createOnlineSlave();
DumbSlave s = r.createSlave();
s.getNodeProperties().add(new EnvironmentVariablesNodeProperty(new EnvironmentVariablesNodeProperty.Entry("ONSLAVE", "true")));

WorkflowJob p = r.createProject(WorkflowJob.class, "demo");
Expand Down Expand Up @@ -522,7 +521,7 @@ private static void assertLogMatches(WorkflowRun build, String regexp) throws IO

@Test public void detailsExported() throws Throwable {
sessions.then(r -> {
DumbSlave s = r.createOnlineSlave();
DumbSlave s = r.createSlave();

WorkflowJob p = r.createProject(WorkflowJob.class, "demo");
p.setDefinition(new CpsFlowDefinition(
Expand Down Expand Up @@ -681,10 +680,7 @@ private List<WorkspaceAction> getWorkspaceActions(WorkflowRun workflowRun) {
@Issue("JENKINS-36547")
@Test public void reuseNodeFromPreviousRun() throws Throwable {
sessions.then(r -> {
for (int i = 0; i < 5; ++i) {
DumbSlave slave = r.createOnlineSlave();
slave.setLabelString("foo bar");
}
createNOnlineAgentWithLabels(r, 5, "foo bar");

WorkflowJob p = r.createProject(WorkflowJob.class, "demo");
p.setDefinition(new CpsFlowDefinition("node('foo') {\n" +
Expand Down Expand Up @@ -744,10 +740,7 @@ private Map<String, String> mapNodeNameToLogText(WorkflowRun workflowRun) throws
@Issue("JENKINS-36547")
@Test public void reuseNodesWithDifferentLabelsFromPreviousRuns() throws Throwable {
sessions.then(r -> {
for (int i = 0; i < 1; ++i) {
DumbSlave slave = r.createOnlineSlave();
slave.setLabelString("foo bar");
}
createNOnlineAgentWithLabels(r, 1, "foo bar");

WorkflowJob p = r.createProject(WorkflowJob.class, "demo");
p.setDefinition(new CpsFlowDefinition(
Expand Down Expand Up @@ -778,10 +771,7 @@ private Map<String, String> mapNodeNameToLogText(WorkflowRun workflowRun) throws
// Note: for Jenkins versions > 2.65, the number of agents must be increased to 5.
// This is due to changes in the Load Balancer (See JENKINS-60563).
int totalAgents = Jenkins.getVersion().isNewerThan(new VersionNumber("2.265")) ? 5 : 3;
for (int i = 0; i < totalAgents; ++i) {
DumbSlave slave = r.createOnlineSlave();
slave.setLabelString("foo bar");
}
createNOnlineAgentWithLabels(r, totalAgents, "foo bar");

WorkflowJob p = r.createProject(WorkflowJob.class, "demo");
p.setDefinition(new CpsFlowDefinition("" +
Expand Down Expand Up @@ -829,10 +819,7 @@ private Map<String, String> mapNodeNameToLogText(WorkflowRun workflowRun) throws
@Issue("JENKINS-36547")
@Test public void reuseNodesWithSameLabelsInParallelStages() throws Throwable {
sessions.then(r -> {
for (int i = 0; i < 4; ++i) {
DumbSlave slave = r.createOnlineSlave();
slave.setLabelString("foo bar");
}
createNOnlineAgentWithLabels(r, 4, "foo bar");

WorkflowJob p = r.createProject(WorkflowJob.class, "demo");

Expand Down Expand Up @@ -894,10 +881,7 @@ private Map<String, String> mapNodeNameToLogText(WorkflowRun workflowRun) throws
@Issue("JENKINS-36547")
@Test public void reuseNodesWithSameLabelsInStagesWrappedInsideParallelStages() throws Throwable {
sessions.then(r -> {
for (int i = 0; i < 4; ++i) {
DumbSlave slave = r.createOnlineSlave();
slave.setLabelString("foo bar");
}
createNOnlineAgentWithLabels(r, 4, "foo bar");

WorkflowJob p = r.createProject(WorkflowJob.class, "demo");
p.setDefinition(new CpsFlowDefinition("" +
Expand Down Expand Up @@ -960,10 +944,7 @@ private Map<String, String> mapNodeNameToLogText(WorkflowRun workflowRun) throws
@Issue("JENKINS-36547")
@Test public void reuseNodeInSameRun() throws Throwable {
sessions.then(r -> {
for (int i = 0; i < 5; ++i) {
DumbSlave slave = r.createOnlineSlave();
slave.setLabelString("foo");
}
createNOnlineAgentWithLabels(r, 5, "foo");

WorkflowJob p = r.createProject(WorkflowJob.class, "demo");
p.setDefinition(new CpsFlowDefinition("for (int i = 0; i < 20; ++i) {node('foo') {echo \"ran node block ${i}\"}}", true));
Expand Down Expand Up @@ -1099,7 +1080,7 @@ private static List<String> currentLabels(JenkinsRule r) {
@Issue("JENKINS-58900")
@Test public void nodeDisconnectMissingContextVariableException() throws Throwable {
sessions.then(r -> {
DumbSlave agent = r.createOnlineSlave();
DumbSlave agent = r.createSlave();
WorkflowJob p = r.createProject(WorkflowJob.class);
p.setDefinition(new CpsFlowDefinition(
"node ('" + agent.getNodeName() + "') {\n" +
Expand Down Expand Up @@ -1161,7 +1142,7 @@ public void getOwnerTaskPermissions() throws Throwable {

@Test public void getParentExecutable() throws Throwable {
sessions.then(r -> {
DumbSlave s = r.createOnlineSlave();
DumbSlave s = r.createSlave();
WorkflowJob p = r.createProject(WorkflowJob.class, "p");
p.setDefinition(new CpsFlowDefinition("node('" + s.getNodeName() + "') {semaphore('wait')}", true));
WorkflowRun b = p.scheduleBuild2(0).waitForStart();
Expand Down Expand Up @@ -1211,4 +1192,14 @@ public String getShortDescription() {
}
}

private void createNOnlineAgentWithLabels(JenkinsRule r, int number, String label) throws Exception {
// create all the slaves then wait for them to connect as it will be quicker as agents connect in parallel
ArrayList<DumbSlave> agents = new ArrayList<>();
for (int i = 0; i < number; ++i) {
agents.add(r.createSlave("foo bar", null));
}
for (DumbSlave agent : agents) {
r.waitOnline(agent);
}
}
}

0 comments on commit 5f726ea

Please sign in to comment.