Skip to content

Commit

Permalink
Merge pull request #244 from scoheb/restart-2
Browse files Browse the repository at this point in the history
allow re-use of existing pods for restarts
  • Loading branch information
carlossg authored Nov 21, 2017
2 parents 0924477 + 2fc30fa commit 3ee086b
Show file tree
Hide file tree
Showing 14 changed files with 395 additions and 80 deletions.
12 changes: 2 additions & 10 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<parent>
<groupId>org.jenkins-ci.plugins</groupId>
<artifactId>plugin</artifactId>
<version>2.32</version>
<version>3.0</version>
</parent>

<groupId>org.csanchez.jenkins.plugins</groupId>
Expand Down Expand Up @@ -49,12 +49,11 @@
<jenkins.version>2.32.1</jenkins.version>

<kubernetes-client.version>2.6.1</kubernetes-client.version>
<slf4j.version>1.7.13</slf4j.version>

<!-- jenkins plugins versions -->
<jenkins-basic-steps.version>2.3</jenkins-basic-steps.version>
<jenkins-credentials.version>2.1.7</jenkins-credentials.version>
<jenkins-durable-task.version>1.13</jenkins-durable-task.version>
<jenkins-durable-task.version>1.16</jenkins-durable-task.version>
<jenkins-durable-task-step.version>2.11</jenkins-durable-task-step.version>
<jenkins-structs.version>1.6</jenkins-structs.version>
<jenkins-workflow-cps.version>2.29</jenkins-workflow-cps.version>
Expand Down Expand Up @@ -82,13 +81,6 @@
<version>${httpclient.version}</version>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
<scope>provided</scope>
</dependency>

<!-- required plugins -->
<dependency>
<groupId>org.jenkins-ci.plugins</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import static java.util.logging.Level.INFO;
import static org.csanchez.jenkins.plugins.kubernetes.KubernetesCloud.JNLP_NAME;
import static org.csanchez.jenkins.plugins.kubernetes.PodTemplateUtils.substituteEnv;

Expand Down Expand Up @@ -120,6 +121,12 @@ public void launch(SlaveComputer computer, TaskListener listener) {
if (slave == null) {
throw new IllegalStateException("Node has been removed, cannot launch " + computer.getName());
}
if (launched) {
LOGGER.log(INFO, "Agent has already been launched, activating: {}", slave.getNodeName());
computer.setAcceptingTasks(true);
return;
}

KubernetesCloud cloud = slave.getKubernetesCloud();
final PodTemplate unwrappedTemplate = slave.getTemplate();
try {
Expand All @@ -131,7 +138,7 @@ public void launch(SlaveComputer computer, TaskListener listener) {

LOGGER.log(Level.FINE, "Creating Pod: {0} in namespace {1}", new Object[]{podId, namespace});
pod = client.pods().inNamespace(namespace).create(pod);
LOGGER.log(Level.INFO, "Created Pod: {0} in namespace {1}", new Object[]{podId, namespace});
LOGGER.log(INFO, "Created Pod: {0} in namespace {1}", new Object[]{podId, namespace});
listener.getLogger().printf("Created Pod: %s in namespace %s", podId, namespace);

// We need the pod to be running and connected before returning
Expand All @@ -145,7 +152,7 @@ public void launch(SlaveComputer computer, TaskListener listener) {

// wait for Pod to be running
for (; i < j; i++) {
LOGGER.log(Level.INFO, "Waiting for Pod to be scheduled ({1}/{2}): {0}", new Object[]{podId, i, j});
LOGGER.log(INFO, "Waiting for Pod to be scheduled ({1}/{2}): {0}", new Object[]{podId, i, j});
listener.getLogger().printf("Waiting for Pod to be scheduled (%2$s/%3$s): %1$s", podId, i, j);

Thread.sleep(6000);
Expand All @@ -161,7 +168,7 @@ public void launch(SlaveComputer computer, TaskListener listener) {
if (info != null) {
if (info.getState().getWaiting() != null) {
// Pod is waiting for some reason
LOGGER.log(Level.INFO, "Container is waiting {0} [{2}]: {1}",
LOGGER.log(INFO, "Container is waiting {0} [{2}]: {1}",
new Object[]{podId, info.getState().getWaiting(), info.getName()});
listener.getLogger().printf("Container is waiting %1$s [%3$s]: %2$s",
podId, info.getState().getWaiting(), info.getName());
Expand Down Expand Up @@ -207,7 +214,7 @@ public void launch(SlaveComputer computer, TaskListener listener) {
if (slave.getComputer().isOnline()) {
break;
}
LOGGER.log(Level.INFO, "Waiting for slave to connect ({1}/{2}): {0}", new Object[]{podId, i, j});
LOGGER.log(INFO, "Waiting for slave to connect ({1}/{2}): {0}", new Object[]{podId, i, j});
listener.getLogger().printf("Waiting for slave to connect (%2$s/%3$s): %1$s", podId, i, j);
Thread.sleep(1000);
}
Expand All @@ -229,6 +236,12 @@ public void launch(SlaveComputer computer, TaskListener listener) {
throw Throwables.propagate(ex);
}
launched = true;
try {
// We need to persist the "launched" setting...
slave.save();
} catch (IOException e) {
LOGGER.log(Level.WARNING, "Could not save() agent: " + e.getMessage(), e);
}
}

private Pod getPodTemplate(KubernetesSlave slave, PodTemplate template) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,10 @@ public class ContainerExecDecorator extends LauncherDecorator implements Seriali
private static final String JENKINS_HOME = "JENKINS_HOME=";
private static final Logger LOGGER = Logger.getLogger(ContainerExecDecorator.class.getName());

private final transient KubernetesClient client;
private transient KubernetesClient client;

@SuppressFBWarnings(value = "SE_TRANSIENT_FIELD_NOT_RESTORED", justification = "not needed on deserialization")
private final transient List<Closeable> closables = new ArrayList<>();
private transient List<Closeable> closables;
private final String podName;
private final String namespace;
private final String containerName;
Expand Down Expand Up @@ -239,6 +239,9 @@ public void onClose(int i, String s) {
this.setupEnvironmentVariable(envVars, watch);
doExec(watch, printStream, commands);
ContainerExecProc proc = new ContainerExecProc(watch, alive, finished, exitCodeOutputStream::getExitCode);
if (closables == null) {
closables = new ArrayList<>();
}
closables.add(proc);
return proc;
} catch (InterruptedException ie) {
Expand Down Expand Up @@ -313,6 +316,8 @@ private void waitUntilContainerIsReady() throws IOException {

@Override
public void close() throws IOException {
if (closables == null) return;

for (Closeable closable : closables) {
try {
closable.close();
Expand Down Expand Up @@ -378,6 +383,10 @@ private static void closeWatch(ExecWatch watch) {
}
}

public void setKubernetesClient(KubernetesClient client) {
this.client = client;
}

/**
* Keeps the last bytes of the output stream to parse the exit code
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,45 @@
import java.io.Closeable;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.jenkinsci.plugins.workflow.steps.AbstractStepExecutionImpl;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import org.jenkinsci.plugins.workflow.steps.BodyExecutionCallback;
import org.jenkinsci.plugins.workflow.steps.BodyInvoker;
import org.jenkinsci.plugins.workflow.steps.EnvironmentExpander;
import org.jenkinsci.plugins.workflow.steps.StepContext;

import hudson.EnvVars;
import hudson.LauncherDecorator;
import io.fabric8.kubernetes.client.KubernetesClient;
import org.jenkinsci.plugins.workflow.steps.StepExecution;

import javax.annotation.Nonnull;

import static org.csanchez.jenkins.plugins.kubernetes.pipeline.Resources.closeQuietly;

public class ContainerStepExecution extends AbstractStepExecutionImpl {
public class ContainerStepExecution extends StepExecution {

private static final long serialVersionUID = 7634132798345235774L;

private static final transient Logger LOGGER = Logger.getLogger(ContainerStepExecution.class.getName());

private final ContainerStep step;
@SuppressFBWarnings(value = "SE_TRANSIENT_FIELD_NOT_RESTORED", justification = "not needed on deserialization")
private final transient ContainerStep step;

private transient KubernetesClient client;
private transient ContainerExecDecorator decorator;
private ContainerExecDecorator decorator;

@Override
// TODO Revisit for JENKINS-40161
public void onResume() {
super.onResume();
LOGGER.log(Level.FINE, "onResume");
try {
KubernetesNodeContext nodeContext = new KubernetesNodeContext(getContext());
client = nodeContext.connectToCloud();
decorator.setKubernetesClient(client);
} catch (Exception e) {
ContainerStepExecution.this.getContext().onFailure(e);
}
}

ContainerStepExecution(ContainerStep step, StepContext context) {
super(context);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import java.util.logging.Level;
import java.util.logging.Logger;

import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.commons.lang.StringUtils;
import org.csanchez.jenkins.plugins.kubernetes.KubernetesCloud;
import org.csanchez.jenkins.plugins.kubernetes.PodImagePullSecret;
import org.csanchez.jenkins.plugins.kubernetes.PodTemplate;
Expand All @@ -30,11 +30,16 @@ public class PodTemplateStepExecution extends AbstractStepExecutionImpl {

private static final transient String NAME_FORMAT = "%s-%s";

private final PodTemplateStep step;
@SuppressFBWarnings(value = "SE_TRANSIENT_FIELD_NOT_RESTORED", justification = "not needed on deserialization")
private final transient PodTemplateStep step;
private final String cloudName;

private PodTemplate newTemplate = null;

PodTemplateStepExecution(PodTemplateStep step, StepContext context) {
super(context);
this.step = step;
this.cloudName = step.getCloud();
}

@Override
Expand All @@ -59,7 +64,7 @@ public boolean start() throws Exception {
String name = String.format(NAME_FORMAT, step.getName(), randString);
String namespace = checkNamespace(kubernetesCloud, namespaceAction);

PodTemplate newTemplate = new PodTemplate();
newTemplate = new PodTemplate();
newTemplate.setName(name);
newTemplate.setNamespace(namespace);
newTemplate.setInheritFrom(!Strings.isNullOrEmpty( podTemplateAction.getParentTemplates()) ? podTemplateAction.getParentTemplates() : step.getInheritFrom());
Expand Down Expand Up @@ -124,10 +129,10 @@ private PodTemplateCallback(PodTemplate podTemplate) {
* Remove the template after step is done
*/
protected void finished(StepContext context) throws Exception {
Cloud cloud = Jenkins.getInstance().getCloud(step.getCloud());
Cloud cloud = Jenkins.getInstance().getCloud(cloudName);
if (cloud == null) {
LOGGER.log(Level.WARNING, "Cloud {0} no longer exists, cannot delete pod template {1}",
new Object[] { step.getCloud(), podTemplate.getName() });
new Object[] { cloudName, podTemplate.getName() });
return;
}
if (cloud instanceof KubernetesCloud) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ public static boolean deletePods(KubernetesClient client, Map<String, String> la

if (client != null) {

// wait for 30 seconds for all pods to be terminated
// wait for 90 seconds for all pods to be terminated
if (wait) {
LOGGER.log(INFO, "Waiting for pods to terminate");
ForkJoinPool forkJoinPool = new ForkJoinPool(1);
Expand All @@ -119,7 +119,7 @@ public static boolean deletePods(KubernetesClient client, Map<String, String> la
LOGGER.log(INFO, "Waiting for pods to terminate - interrupted");
return true;
}
})).get(60, TimeUnit.SECONDS);
})).get(90, TimeUnit.SECONDS);
} catch (TimeoutException e) {
LOGGER.log(INFO, "Waiting for pods to terminate - timed out");
// job not done in interval
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,11 @@
import org.jenkinsci.plugins.workflow.cps.CpsFlowDefinition;
import org.jenkinsci.plugins.workflow.job.WorkflowJob;
import org.jenkinsci.plugins.workflow.job.WorkflowRun;
import org.jenkinsci.plugins.workflow.test.steps.SemaphoreStep;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.runners.model.Statement;
import org.jvnet.hudson.test.JenkinsRuleNonLocalhost;
import org.jvnet.hudson.test.RestartableJenkinsRule;

import hudson.model.Node;
import hudson.slaves.DumbSlave;
import hudson.slaves.NodeProperty;
import hudson.slaves.RetentionStrategy;
import io.fabric8.kubernetes.api.model.NamespaceBuilder;
import io.fabric8.kubernetes.client.KubernetesClient;

Expand All @@ -59,8 +52,6 @@ public class KubernetesPipelineTest extends AbstractKubernetesPipelineTest {

private static final Logger LOGGER = Logger.getLogger(KubernetesPipelineTest.class.getName());

@Rule
public RestartableJenkinsRule story = new RestartableJenkinsRule();
@Rule
public TemporaryFolder tmp = new TemporaryFolder();

Expand All @@ -82,7 +73,7 @@ public void runInPod() throws Exception {
PodTemplate template = templates.get(0);
assertEquals(Integer.MAX_VALUE, template.getInstanceCap());
r.assertBuildStatusSuccess(r.waitForCompletion(b));
r.assertLogContains("PID file contents: ", b);
r.assertLogContains("script file contents: ", b);
assertFalse("There are pods leftover after test execution, see previous logs",
deletePods(cloud.connect(), KubernetesCloud.DEFAULT_POD_LABELS, true));
}
Expand Down Expand Up @@ -259,39 +250,6 @@ public void runInPodWithLivenessProbe() throws Exception {
r.assertLogContains("Still alive", b);
}

// @Test
public void runInPodWithRestart() throws Exception {
story.addStep(new Statement() {
@Override
public void evaluate() throws Throwable {
story.j.jenkins.clouds.add(new KubernetesCloud("test"));

story.j.jenkins.addNode(new DumbSlave("slave", "dummy", tmp.newFolder("remoteFS").getPath(), "1",
Node.Mode.NORMAL, "", story.j.createComputerLauncher(null), RetentionStrategy.NOOP,
Collections.<NodeProperty<?>>emptyList())); // TODO JENKINS-26398 clumsy
WorkflowJob p = story.j.jenkins.createProject(WorkflowJob.class, "p");
p.setDefinition(new CpsFlowDefinition(loadPipelineScript("runInPodWithRestart.groovy")
, true));
WorkflowRun b = p.scheduleBuild2(0).waitForStart();
SemaphoreStep.waitForStart("withDisplayAfterRestart/1", b);
}
});
story.addStep(new Statement() {
@SuppressWarnings("SleepWhileInLoop")
@Override
public void evaluate() throws Throwable {
SemaphoreStep.success("withDisplayAfterRestart/1", null);
WorkflowJob p = story.j.jenkins.getItemByFullName("p", WorkflowJob.class);
assertNotNull(p);
WorkflowRun b = p.getBuildByNumber(1);
assertNotNull(b);
story.j.assertBuildStatusSuccess(story.j.waitForCompletion(b));
story.j.assertLogContains("DISPLAY=:", b);
r.assertLogContains("xxx", b);
}
});
}

@Test
public void runWithActiveDeadlineSeconds() throws Exception {
WorkflowJob p = r.jenkins.createProject(WorkflowJob.class, "Deadline");
Expand Down
Loading

0 comments on commit 3ee086b

Please sign in to comment.