From ef2a2ac0fa51dfcedde9e383ff6c5758157c111c Mon Sep 17 00:00:00 2001 From: Karol Lassak Date: Thu, 13 Jun 2019 13:18:17 +0200 Subject: [PATCH 01/13] Restart preempted jobs v2 --- .../computeengine/ComputeEngineComputer.java | 57 +++++++- .../ComputeEngineComputerListener.java | 6 +- .../computeengine/ComputeEngineInstance.java | 2 +- .../ComputeEngineRetentionStrategy.java | 137 ++++++++++++++++++ .../ComputeEngineWindowsLauncher.java | 1 - .../computeengine/InstanceConfiguration.java | 7 +- .../computeengine/PreemptedCheckCallable.java | 73 ++++++++++ .../computeengine/client/ComputeClient.java | 4 +- .../plugins/computeengine/Messages.properties | 3 +- 9 files changed, 276 insertions(+), 14 deletions(-) create mode 100644 src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java create mode 100644 src/main/java/com/google/jenkins/plugins/computeengine/PreemptedCheckCallable.java diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java index 5c81877f..7c5a74db 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java @@ -17,10 +17,17 @@ package com.google.jenkins.plugins.computeengine; import com.google.api.services.compute.model.Instance; +import hudson.model.Executor; +import hudson.model.Result; +import hudson.model.TaskListener; +import hudson.remoting.Channel; import hudson.slaves.AbstractCloudComputer; import java.io.IOException; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; import java.util.logging.Level; import java.util.logging.Logger; +import jenkins.model.CauseOfInterruption; import org.kohsuke.stapler.DataBoundSetter; import org.kohsuke.stapler.HttpRedirect; import org.kohsuke.stapler.HttpResponse; @@ -28,6 +35,7 @@ public class ComputeEngineComputer extends AbstractCloudComputer { private volatile Instance instance; + private Future preemptedFuture; private static final Logger LOGGER = Logger.getLogger(ComputeEngineCloud.class.getName()); @@ -40,10 +48,57 @@ public ComputeEngineInstance getNode() { return (ComputeEngineInstance) super.getNode(); } - public void onConnected() { + void onConnected(TaskListener listener) throws IOException { ComputeEngineInstance node = getNode(); if (node != null) { node.onConnected(); + if (getPreemptible()) { + String nodeName = node.getNodeName(); + LOGGER.log( + Level.INFO, "Instance " + nodeName + " is preemptive, setting up preemption listener"); + preemptedFuture = getChannel().callAsync(new PreemptedCheckCallable(listener)); + getChannel() + .addListener( + new Channel.Listener() { + @Override + public void onClosed(Channel channel, IOException cause) { + LOGGER.log(Level.FINE, "Got channel close event"); + if (getPreempted()) { + LOGGER.log( + Level.FINE, "Preempted node channel closed, terminating all executors"); + getExecutors().forEach(executor -> interruptExecutor(executor, nodeName)); + } + } + }); + } + } + } + + private void interruptExecutor(Executor executor, String nodeName) { + LOGGER.log(Level.INFO, "Terminating executor " + executor + " node " + nodeName); + executor.interrupt( + Result.FAILURE, + new CauseOfInterruption() { + @Override + public String getShortDescription() { + return "Instance " + nodeName + " was preempted"; + } + }); + } + + boolean getPreemptible() { + try { + return getInstance().getScheduling().getPreemptible(); + } catch (IOException | NullPointerException e) { + return false; + } + } + + boolean getPreempted() { + try { + return preemptedFuture != null && preemptedFuture.isDone() && preemptedFuture.get(); + } catch (InterruptedException | ExecutionException e) { + return false; } } diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputerListener.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputerListener.java index a1c06b3b..6d6b7ac7 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputerListener.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputerListener.java @@ -20,13 +20,15 @@ import hudson.model.Computer; import hudson.model.TaskListener; import hudson.slaves.ComputerListener; +import java.io.IOException; @Extension public class ComputeEngineComputerListener extends ComputerListener { @Override - public void onOnline(Computer c, TaskListener listener) { + public void onOnline(Computer c, TaskListener listener) throws IOException { if (c instanceof ComputeEngineComputer) { - ((ComputeEngineComputer) c).onConnected(); + ComputeEngineComputer computer = (ComputeEngineComputer) c; + computer.onConnected(listener); } } } diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineInstance.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineInstance.java index 6c08a9e4..595d9832 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineInstance.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineInstance.java @@ -116,7 +116,7 @@ protected void _terminate(TaskListener listener) throws IOException, Interrupted // If the instance is running, attempt to terminate it. This is an asynch call and we // return immediately, hoping for the best. - cloud.getClient().terminateInstanceWithStatus(cloud.getProjectId(), zone, name, "RUNNING"); + cloud.getClient().terminateInstance(cloud.getProjectId(), zone, name); } catch (CloudNotFoundException cnfe) { listener.error(cnfe.getMessage()); } diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java new file mode 100644 index 00000000..15dc26c5 --- /dev/null +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java @@ -0,0 +1,137 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.jenkins.plugins.computeengine; + +import com.google.common.collect.ImmutableList; +import hudson.model.Action; +import hudson.model.Cause; +import hudson.model.CauseAction; +import hudson.model.Executor; +import hudson.model.ExecutorListener; +import hudson.model.Job; +import hudson.model.Queue; +import hudson.security.ACL; +import hudson.security.ACLContext; +import hudson.slaves.RetentionStrategy; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; +import jenkins.model.Jenkins; +import org.jenkinsci.plugins.durabletask.executors.OnceRetentionStrategy; + +/** + * A strategy that allows: - setting one shot instances {@link OnceRetentionStrategy} - in case of + * preemption of GCP instance to restart preempted tasks + */ +public class ComputeEngineRetentionStrategy extends RetentionStrategy + implements ExecutorListener { + private static final Logger LOGGER = + Logger.getLogger(ComputeEngineRetentionStrategy.class.getName()); + + private final OnceRetentionStrategy delegate; + private final boolean oneShot; + + /** + * Creates the retention strategy. + * + * @param retentionTimeMinutes number of minutes of idleness after which to kill the slave; serves + * a backup in case the strategy fails to detect the end of a task + * @param oneShot create one shot instance strategy + */ + ComputeEngineRetentionStrategy(int retentionTimeMinutes, boolean oneShot) { + this.oneShot = oneShot; + delegate = new OnceRetentionStrategy(retentionTimeMinutes); + } + + @Override + public long check(ComputeEngineComputer c) { + return delegate.check(c); + } + + @Override + public void start(ComputeEngineComputer c) { + delegate.start(c); + } + + @Override + public void taskAccepted(Executor executor, Queue.Task task) { + if (oneShot) { + delegate.taskAccepted(executor, task); + } + } + + @Override + public void taskCompleted(Executor executor, Queue.Task task, long durationMS) { + checkPreempted(executor, task); + if (oneShot) { + delegate.taskCompleted(executor, task, durationMS); + } + } + + @Override + public void taskCompletedWithProblems( + Executor executor, Queue.Task task, long durationMS, Throwable problems) { + checkPreempted(executor, task); + if (oneShot) { + delegate.taskCompletedWithProblems(executor, task, durationMS, problems); + } + } + + private Queue.Task getBaseTask(Queue.Task task) { + Queue.Task parent = task.getOwnerTask(); + while (task != parent) { + task = parent; + parent = task.getOwnerTask(); + } + return parent; + } + + private void checkPreempted(Executor executor, Queue.Task task) { + ComputeEngineComputer computer = (ComputeEngineComputer) executor.getOwner(); + final boolean preemptible = computer.getPreemptible(); + final boolean preempted = computer.getPreempted(); + Queue.Task baseTask = getBaseTask(task); + if (preemptible && preempted) { + LOGGER.log(Level.INFO, baseTask + " is preemptive and was preempted"); + List actions = generateActionsForTask(task); + try (ACLContext notUsed = ACL.as(task.getDefaultAuthentication())) { + Jenkins.get().getQueue().schedule2(baseTask, 0, actions); + } + } else if (preemptible) { + LOGGER.log(Level.INFO, baseTask + " is preemptive and was NOT preempted"); + } + } + + private List generateActionsForTask(Queue.Task task) { + Queue.Task baseTask = getBaseTask(task); + try { + final Job job = (Job) baseTask; + final List causes = job.getLastBuild().getCauses(); + LOGGER.log(Level.FINE, "Causes: " + causes); + } catch (Exception e) { + LOGGER.log(Level.WARNING, "Exception for " + baseTask, e); + } + return ImmutableList.of( + new CauseAction(new Cause.UserIdCause()), new CauseAction(new RebuildCause())); + } + + public static class RebuildCause extends Cause { + @Override + public String getShortDescription() { + return Messages.RebuildCause_ShortDescription(); + } + } +} diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineWindowsLauncher.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineWindowsLauncher.java index 01687ed5..765e452c 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineWindowsLauncher.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineWindowsLauncher.java @@ -28,7 +28,6 @@ import java.net.InetSocketAddress; import java.net.Proxy; import java.util.Optional; -import java.util.logging.Level; import java.util.logging.Logger; import jenkins.model.Jenkins; import lombok.Getter; diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/InstanceConfiguration.java b/src/main/java/com/google/jenkins/plugins/computeengine/InstanceConfiguration.java index 12668b3a..13c4ddbc 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/InstanceConfiguration.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/InstanceConfiguration.java @@ -50,7 +50,6 @@ import hudson.model.Node; import hudson.model.TaskListener; import hudson.model.labels.LabelAtom; -import hudson.slaves.CloudRetentionStrategy; import hudson.util.FormValidation; import hudson.util.ListBoxModel; import java.io.IOException; @@ -73,7 +72,6 @@ import org.apache.commons.lang.NotImplementedException; import org.apache.commons.lang.StringUtils; import org.apache.commons.text.RandomStringGenerator; -import org.jenkinsci.plugins.durabletask.executors.OnceRetentionStrategy; import org.kohsuke.stapler.AncestorInPath; import org.kohsuke.stapler.DataBoundConstructor; import org.kohsuke.stapler.DataBoundSetter; @@ -315,10 +313,7 @@ public ComputeEngineInstance provision(TaskListener listener) throws IOException .mode(mode) .labelString(labels) .launcher(launcher) - .retentionStrategy( - oneShot - ? new OnceRetentionStrategy(retentionTimeMinutes) - : new CloudRetentionStrategy(retentionTimeMinutes)) + .retentionStrategy(new ComputeEngineRetentionStrategy(retentionTimeMinutes, oneShot)) .launchTimeout(getLaunchTimeoutMillis()) .javaExecPath(javaExecPath) .sshKeyPair(sshKeyPair) diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/PreemptedCheckCallable.java b/src/main/java/com/google/jenkins/plugins/computeengine/PreemptedCheckCallable.java new file mode 100644 index 00000000..ff40d36d --- /dev/null +++ b/src/main/java/com/google/jenkins/plugins/computeengine/PreemptedCheckCallable.java @@ -0,0 +1,73 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.jenkins.plugins.computeengine; + +import com.google.api.client.http.GenericUrl; +import com.google.api.client.http.HttpHeaders; +import com.google.api.client.http.HttpRequest; +import com.google.api.client.http.HttpResponse; +import com.google.api.client.http.HttpTransport; +import com.google.api.client.http.javanet.NetHttpTransport; +import hudson.model.TaskListener; +import java.io.IOException; +import jenkins.security.MasterToSlaveCallable; +import org.apache.commons.io.Charsets; +import org.apache.commons.io.IOUtils; + +/** + * Slave callback class checking if instance was preempted. All of code here is serialized and + * executed on node. + */ +final class PreemptedCheckCallable extends MasterToSlaveCallable { + private static final String METADATA_SERVER_URL = + "http://metadata.google.internal/computeMetadata/v1/instance/preempted?wait_for_change=true"; + + private final TaskListener listener; + + /** + * Callback constructor. + * + * @param listener Node listener on which we can add extra information from slave side. + */ + PreemptedCheckCallable(TaskListener listener) { + this.listener = listener; + } + + /** + * Actual callback code, executed on node side. Checks in Google metadata server if instance was + * preempted. + * + *

See + * https://cloud.google.com/compute/docs/instances/create-start-preemptible-instance#detecting_if_an_instance_was_preempted + * + * @return True if node was preempted. + * @throws IOException Exception when calling Google metadata API + */ + @Override + public Boolean call() throws IOException { + HttpTransport transport = new NetHttpTransport(); + GenericUrl metadata = new GenericUrl(METADATA_SERVER_URL); + HttpRequest request = transport.createRequestFactory().buildGetRequest(metadata); + request.setHeaders(new HttpHeaders().set("Metadata-Flavor", "Google")); + request.setReadTimeout(Integer.MAX_VALUE); + listener.getLogger().println("Preemptive instance, listening to metadata for preemption event"); + HttpResponse response = request.execute(); + final String result = IOUtils.toString(response.getContent(), Charsets.UTF_8); + listener.getLogger().println("Got preemption event " + result); + response.disconnect(); + return "TRUE".equals(result); + } +} diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/client/ComputeClient.java b/src/main/java/com/google/jenkins/plugins/computeengine/client/ComputeClient.java index 26aa0aef..d1e78657 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/client/ComputeClient.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/client/ComputeClient.java @@ -359,10 +359,10 @@ public Operation insertInstance(String projectId, String template, Instance inst return insert.execute(); } - public Operation terminateInstance(String projectId, String zone, String InstanceId) + public Operation terminateInstance(String projectId, String zone, String instanceId) throws IOException { zone = nameFromSelfLink(zone); - return compute.instances().delete(projectId, zone, InstanceId).execute(); + return compute.instances().delete(projectId, zone, instanceId).execute(); } public Operation terminateInstanceWithStatus( diff --git a/src/main/resources/com/google/jenkins/plugins/computeengine/Messages.properties b/src/main/resources/com/google/jenkins/plugins/computeengine/Messages.properties index 233a63c1..84f4fd59 100644 --- a/src/main/resources/com/google/jenkins/plugins/computeengine/Messages.properties +++ b/src/main/resources/com/google/jenkins/plugins/computeengine/Messages.properties @@ -11,4 +11,5 @@ # License. ComputeEngineCloud.DisplayName=Google Compute Engine ComputeEngineAgent.DisplayName=Google Compute Engine -InstanceConfiguration.SnapshotConfigError=One-shot must be enabled to create snapshots \ No newline at end of file +InstanceConfiguration.SnapshotConfigError=One-shot must be enabled to create snapshots +RebuildCause.ShortDescription=Rebuilding preempted job From 052d59ae553c28b686184b30eed4a4baa84a8583 Mon Sep 17 00:00:00 2001 From: Karol Lassak Date: Thu, 13 Jun 2019 13:31:13 +0200 Subject: [PATCH 02/13] Moving to lombok logger --- .../computeengine/ComputeEngineComputer.java | 20 +++++++------------ .../ComputeEngineRetentionStrategy.java | 13 ++++++------ 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java index 7c5a74db..3d4255e9 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java @@ -26,35 +26,29 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.logging.Level; -import java.util.logging.Logger; import jenkins.model.CauseOfInterruption; +import lombok.extern.java.Log; import org.kohsuke.stapler.DataBoundSetter; import org.kohsuke.stapler.HttpRedirect; import org.kohsuke.stapler.HttpResponse; +@Log public class ComputeEngineComputer extends AbstractCloudComputer { private volatile Instance instance; private Future preemptedFuture; - private static final Logger LOGGER = Logger.getLogger(ComputeEngineCloud.class.getName()); - public ComputeEngineComputer(ComputeEngineInstance slave) { super(slave); } - @Override - public ComputeEngineInstance getNode() { - return (ComputeEngineInstance) super.getNode(); - } - void onConnected(TaskListener listener) throws IOException { ComputeEngineInstance node = getNode(); if (node != null) { node.onConnected(); if (getPreemptible()) { String nodeName = node.getNodeName(); - LOGGER.log( + log.log( Level.INFO, "Instance " + nodeName + " is preemptive, setting up preemption listener"); preemptedFuture = getChannel().callAsync(new PreemptedCheckCallable(listener)); getChannel() @@ -62,9 +56,9 @@ void onConnected(TaskListener listener) throws IOException { new Channel.Listener() { @Override public void onClosed(Channel channel, IOException cause) { - LOGGER.log(Level.FINE, "Got channel close event"); + log.log(Level.FINE, "Got channel close event"); if (getPreempted()) { - LOGGER.log( + log.log( Level.FINE, "Preempted node channel closed, terminating all executors"); getExecutors().forEach(executor -> interruptExecutor(executor, nodeName)); } @@ -75,7 +69,7 @@ public void onClosed(Channel channel, IOException cause) { } private void interruptExecutor(Executor executor, String nodeName) { - LOGGER.log(Level.INFO, "Terminating executor " + executor + " node " + nodeName); + log.log(Level.INFO, "Terminating executor " + executor + " node " + nodeName); executor.interrupt( Result.FAILURE, new CauseOfInterruption() { @@ -179,7 +173,7 @@ public HttpResponse doDoDelete() throws IOException { node.terminate(); } catch (InterruptedException ie) { // Termination Exception - LOGGER.log(Level.WARNING, "Node Termination Error", ie); + log.log(Level.WARNING, "Node Termination Error", ie); } } return new HttpRedirect(".."); diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java index 15dc26c5..241ab33d 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java @@ -28,18 +28,17 @@ import hudson.slaves.RetentionStrategy; import java.util.List; import java.util.logging.Level; -import java.util.logging.Logger; import jenkins.model.Jenkins; +import lombok.extern.java.Log; import org.jenkinsci.plugins.durabletask.executors.OnceRetentionStrategy; /** * A strategy that allows: - setting one shot instances {@link OnceRetentionStrategy} - in case of * preemption of GCP instance to restart preempted tasks */ +@Log public class ComputeEngineRetentionStrategy extends RetentionStrategy implements ExecutorListener { - private static final Logger LOGGER = - Logger.getLogger(ComputeEngineRetentionStrategy.class.getName()); private final OnceRetentionStrategy delegate; private final boolean oneShot; @@ -105,13 +104,13 @@ private void checkPreempted(Executor executor, Queue.Task task) { final boolean preempted = computer.getPreempted(); Queue.Task baseTask = getBaseTask(task); if (preemptible && preempted) { - LOGGER.log(Level.INFO, baseTask + " is preemptive and was preempted"); + log.log(Level.INFO, baseTask + " is preemptive and was preempted"); List actions = generateActionsForTask(task); try (ACLContext notUsed = ACL.as(task.getDefaultAuthentication())) { Jenkins.get().getQueue().schedule2(baseTask, 0, actions); } } else if (preemptible) { - LOGGER.log(Level.INFO, baseTask + " is preemptive and was NOT preempted"); + log.log(Level.INFO, baseTask + " is preemptive and was NOT preempted"); } } @@ -120,9 +119,9 @@ private List generateActionsForTask(Queue.Task task) { try { final Job job = (Job) baseTask; final List causes = job.getLastBuild().getCauses(); - LOGGER.log(Level.FINE, "Causes: " + causes); + log.log(Level.FINE, "Causes: " + causes); } catch (Exception e) { - LOGGER.log(Level.WARNING, "Exception for " + baseTask, e); + log.log(Level.WARNING, "Exception for " + baseTask, e); } return ImmutableList.of( new CauseAction(new Cause.UserIdCause()), new CauseAction(new RebuildCause())); From f6ba90e928cbf60834b6558fdbe8a8162390f961 Mon Sep 17 00:00:00 2001 From: Karol Lassak Date: Thu, 13 Jun 2019 14:56:35 +0200 Subject: [PATCH 03/13] Add integration tests --- .../computeengine/ComputeEngineCloud.java | 6 +- .../computeengine/ComputeEngineComputer.java | 4 +- .../computeengine/InstanceConfiguration.java | 13 +- .../computeengine/client/ComputeClient.java | 4 + .../ComputeEngineCloudRestartPreemptedIT.java | 121 ++++++++++++++++++ 5 files changed, 137 insertions(+), 11 deletions(-) create mode 100644 src/test/java/com/google/jenkins/plugins/computeengine/integration/ComputeEngineCloudRestartPreemptedIT.java diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineCloud.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineCloud.java index af769e54..bc46bb58 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineCloud.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineCloud.java @@ -43,7 +43,7 @@ import hudson.util.FormValidation; import hudson.util.HttpResponses; import hudson.util.ListBoxModel; -import hudson.util.StreamTaskListener; + import java.io.IOException; import java.io.PrintStream; import java.util.ArrayList; @@ -251,7 +251,7 @@ public Collection provision(Label label, int excessWorkload) { // Get next config in round robin fashion InstanceConfiguration config = configs.get(i % configs.size()); - final ComputeEngineInstance node = config.provision(StreamTaskListener.fromStdout()); + final ComputeEngineInstance node = config.provision(); Jenkins.get().addNode(node); result.add(createPlannedNode(config, node)); excessWorkload -= node.getNumExecutors(); @@ -408,7 +408,7 @@ public HttpResponse doProvision(@QueryParameter String configuration) throw HttpResponses.error(SC_BAD_REQUEST, "No such Instance Configuration: " + configuration); } - ComputeEngineInstance node = c.provision(StreamTaskListener.fromStdout()); + ComputeEngineInstance node = c.provision(); if (node == null) throw HttpResponses.error(SC_BAD_REQUEST, "Could not provision new node."); Jenkins.get().addNode(node); diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java index 3d4255e9..360be132 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java @@ -80,7 +80,7 @@ public String getShortDescription() { }); } - boolean getPreemptible() { + public boolean getPreemptible() { try { return getInstance().getScheduling().getPreemptible(); } catch (IOException | NullPointerException e) { @@ -88,7 +88,7 @@ boolean getPreemptible() { } } - boolean getPreempted() { + public boolean getPreempted() { try { return preemptedFuture != null && preemptedFuture.isDone() && preemptedFuture.get(); } catch (InterruptedException | ExecutionException e) { diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/InstanceConfiguration.java b/src/main/java/com/google/jenkins/plugins/computeengine/InstanceConfiguration.java index 13c4ddbc..882788d5 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/InstanceConfiguration.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/InstanceConfiguration.java @@ -48,12 +48,10 @@ import hudson.model.Descriptor; import hudson.model.Label; import hudson.model.Node; -import hudson.model.TaskListener; import hudson.model.labels.LabelAtom; import hudson.util.FormValidation; import hudson.util.ListBoxModel; import java.io.IOException; -import java.io.PrintStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -63,12 +61,15 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.logging.Level; + import jenkins.model.Jenkins; import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Getter; import lombok.Setter; +import lombok.extern.java.Log; import org.apache.commons.lang.NotImplementedException; import org.apache.commons.lang.StringUtils; import org.apache.commons.text.RandomStringGenerator; @@ -83,6 +84,7 @@ * until lombok 1.18.8 is released. */ @Builder(builderClassName = "Builder", buildMethodName = "notbuild") @AllArgsConstructor +@Log public class InstanceConfiguration implements Describable { public static final String SSH_METADATA_KEY = "ssh-keys"; public static final Long DEFAULT_BOOT_DISK_SIZE_GB = 10L; @@ -274,14 +276,13 @@ public void appendLabel(String key, String value) { googleLabels.put(key, value); } - public ComputeEngineInstance provision(TaskListener listener) throws IOException { - PrintStream logger = listener.getLogger(); + public ComputeEngineInstance provision() throws IOException { try { Instance instance = instance(); // TODO: JENKINS-55285 Operation operation = cloud.getClient().insertInstance(cloud.getProjectId(), template, instance); - logger.println("Sent insert request for instance configuration [" + description + "]"); + log.info("Sent insert request for instance configuration [" + description + "]"); String targetRemoteFs = this.remoteFs; ComputeEngineComputerLauncher launcher; if (this.windowsConfiguration != null) { @@ -319,7 +320,7 @@ public ComputeEngineInstance provision(TaskListener listener) throws IOException .sshKeyPair(sshKeyPair) .build(); } catch (Descriptor.FormException fe) { - logger.printf("Error provisioning instance: %s", fe.getMessage()); + log.log(Level.WARNING, "Error provisioning instance: " + fe.getMessage(), fe); return null; } } diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/client/ComputeClient.java b/src/main/java/com/google/jenkins/plugins/computeengine/client/ComputeClient.java index d1e78657..cbc54cdb 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/client/ComputeClient.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/client/ComputeClient.java @@ -594,4 +594,8 @@ public Operation.Error waitForOperationCompletion( } return operation.getError(); } + + public void simulateMaintenanceEvent(String projectId, String zone, String instanceId) throws IOException { + compute.instances().simulateMaintenanceEvent(projectId, zone, instanceId).execute(); + } } diff --git a/src/test/java/com/google/jenkins/plugins/computeengine/integration/ComputeEngineCloudRestartPreemptedIT.java b/src/test/java/com/google/jenkins/plugins/computeengine/integration/ComputeEngineCloudRestartPreemptedIT.java new file mode 100644 index 00000000..161dd91b --- /dev/null +++ b/src/test/java/com/google/jenkins/plugins/computeengine/integration/ComputeEngineCloudRestartPreemptedIT.java @@ -0,0 +1,121 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.jenkins.plugins.computeengine.integration; + +import com.google.common.collect.Lists; +import com.google.jenkins.plugins.computeengine.ComputeEngineCloud; +import com.google.jenkins.plugins.computeengine.ComputeEngineComputer; +import com.google.jenkins.plugins.computeengine.InstanceConfiguration; +import com.google.jenkins.plugins.computeengine.client.ComputeClient; +import hudson.model.Computer; +import hudson.model.Node; +import hudson.model.labels.LabelAtom; +import hudson.slaves.NodeProvisioner.PlannedNode; +import lombok.extern.java.Log; +import org.awaitility.Awaitility; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.rules.Timeout; +import org.jvnet.hudson.test.JenkinsRule; + +import java.io.IOException; +import java.util.Collection; +import java.util.Iterator; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.logging.Logger; + +import static com.google.jenkins.plugins.computeengine.integration.ITUtil.DEB_JAVA_STARTUP_SCRIPT; +import static com.google.jenkins.plugins.computeengine.integration.ITUtil.LABEL; +import static com.google.jenkins.plugins.computeengine.integration.ITUtil.NULL_TEMPLATE; +import static com.google.jenkins.plugins.computeengine.integration.ITUtil.NUM_EXECUTORS; +import static com.google.jenkins.plugins.computeengine.integration.ITUtil.PROJECT_ID; +import static com.google.jenkins.plugins.computeengine.integration.ITUtil.TEST_TIMEOUT_MULTIPLIER; +import static com.google.jenkins.plugins.computeengine.integration.ITUtil.ZONE; +import static com.google.jenkins.plugins.computeengine.integration.ITUtil.getLabel; +import static com.google.jenkins.plugins.computeengine.integration.ITUtil.initClient; +import static com.google.jenkins.plugins.computeengine.integration.ITUtil.initCloud; +import static com.google.jenkins.plugins.computeengine.integration.ITUtil.initCredentials; +import static com.google.jenkins.plugins.computeengine.integration.ITUtil.instanceConfigurationBuilder; +import static com.google.jenkins.plugins.computeengine.integration.ITUtil.teardownResources; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * Integration test suite for {@link ComputeEngineCloud}. + * Verifies that instances with preempted flag will be restarted when preempted. + */ +@Log +public class ComputeEngineCloudRestartPreemptedIT { + + @ClassRule + public static Timeout timeout = new Timeout(15 * TEST_TIMEOUT_MULTIPLIER, TimeUnit.MINUTES); + + @ClassRule public static JenkinsRule jenkinsRule = new JenkinsRule(); + + private static ComputeClient client; + private static Map label = + getLabel(ComputeEngineCloudRestartPreemptedIT.class); + private static Collection planned; + + @BeforeClass + public static void init() throws Exception { + log.info("init"); + initCredentials(jenkinsRule); + ComputeEngineCloud cloud = initCloud(jenkinsRule); + client = initClient(jenkinsRule, label, log); + + InstanceConfiguration configuration = + instanceConfigurationBuilder() + .startupScript(DEB_JAVA_STARTUP_SCRIPT) + .numExecutorsStr(NUM_EXECUTORS) + .labels(LABEL) + .template(NULL_TEMPLATE) + .preemptible(true) + .googleLabels(label) + .build(); + + cloud.setConfigurations(Lists.newArrayList(configuration)); + + planned = cloud.provision(new LabelAtom(LABEL), 1); + } + + @AfterClass + public static void teardown() throws IOException { + teardownResources(client, label, log); + } + + @Test + public void testIfNodeWasPreempted() throws Exception { + Iterator iterator = planned.iterator(); + PlannedNode plannedNode = iterator.next(); + String name = plannedNode.displayName; + plannedNode.future.get(); + Node node = jenkinsRule.jenkins.getNode(name); + + ComputeEngineComputer computer = (ComputeEngineComputer) node.toComputer(); + assertTrue("Configuration was set as preemptible but saw as not", computer.getPreemptible()); + + System.out.println("Sending mainatanance event to " + name); + client.simulateMaintenanceEvent(PROJECT_ID, ZONE, name); + Awaitility.await() + .timeout(1, TimeUnit.MINUTES) + .until(computer::getPreempted); + } +} From 49a5b6cc0cde9b15b71809ce2dce81173eb2b7ae Mon Sep 17 00:00:00 2001 From: Karol Lassak Date: Fri, 14 Jun 2019 11:24:42 +0200 Subject: [PATCH 04/13] Post review fixes --- .../computeengine/ComputeEngineComputer.java | 12 ++++- .../ComputeEngineRetentionStrategy.java | 49 +++++++++---------- .../computeengine/PreemptedCheckCallable.java | 15 ++++-- .../ComputeEngineCloudRestartPreemptedIT.java | 12 +++-- 4 files changed, 52 insertions(+), 36 deletions(-) diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java index 360be132..653fe414 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java @@ -80,6 +80,11 @@ public String getShortDescription() { }); } + /** + * Check if instance is preemptible. + * + * @return true if instance was set as preemptible. + */ public boolean getPreemptible() { try { return getInstance().getScheduling().getPreemptible(); @@ -88,6 +93,11 @@ public boolean getPreemptible() { } } + /** + * Check if instance was actually preempted. + * + * @return true if instance was preempted (we can use it to reschedule job in this case). + */ public boolean getPreempted() { try { return preemptedFuture != null && preemptedFuture.isDone() && preemptedFuture.get(); @@ -168,8 +178,6 @@ public HttpResponse doDoDelete() throws IOException { ComputeEngineInstance node = getNode(); if (node != null) { try { - ComputeEngineCloud cloud = getCloud(); - node.terminate(); } catch (InterruptedException ie) { // Termination Exception diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java index 241ab33d..7879887b 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java @@ -16,22 +16,17 @@ package com.google.jenkins.plugins.computeengine; import com.google.common.collect.ImmutableList; -import hudson.model.Action; -import hudson.model.Cause; -import hudson.model.CauseAction; -import hudson.model.Executor; -import hudson.model.ExecutorListener; -import hudson.model.Job; -import hudson.model.Queue; +import hudson.model.*; import hudson.security.ACL; import hudson.security.ACLContext; import hudson.slaves.RetentionStrategy; -import java.util.List; -import java.util.logging.Level; import jenkins.model.Jenkins; import lombok.extern.java.Log; import org.jenkinsci.plugins.durabletask.executors.OnceRetentionStrategy; +import java.util.List; +import java.util.logging.Level; + /** * A strategy that allows: - setting one shot instances {@link OnceRetentionStrategy} - in case of * preemption of GCP instance to restart preempted tasks @@ -39,16 +34,15 @@ @Log public class ComputeEngineRetentionStrategy extends RetentionStrategy implements ExecutorListener { - private final OnceRetentionStrategy delegate; private final boolean oneShot; /** * Creates the retention strategy. * - * @param retentionTimeMinutes number of minutes of idleness after which to kill the slave; serves - * a backup in case the strategy fails to detect the end of a task - * @param oneShot create one shot instance strategy + * @param retentionTimeMinutes Number of minutes of idleness after which to kill the slave; serves + * a backup in case the strategy fails to detect the end of a task. + * @param oneShot Create one shot instance strategy. */ ComputeEngineRetentionStrategy(int retentionTimeMinutes, boolean oneShot) { this.oneShot = oneShot; @@ -74,7 +68,9 @@ public void taskAccepted(Executor executor, Queue.Task task) { @Override public void taskCompleted(Executor executor, Queue.Task task, long durationMS) { - checkPreempted(executor, task); + if (wasPreempted(executor)) { + rescheduleTask(task); + } if (oneShot) { delegate.taskCompleted(executor, task, durationMS); } @@ -83,7 +79,9 @@ public void taskCompleted(Executor executor, Queue.Task task, long durationMS) { @Override public void taskCompletedWithProblems( Executor executor, Queue.Task task, long durationMS, Throwable problems) { - checkPreempted(executor, task); + if (wasPreempted(executor)) { + rescheduleTask(task); + } if (oneShot) { delegate.taskCompletedWithProblems(executor, task, durationMS, problems); } @@ -98,19 +96,18 @@ private Queue.Task getBaseTask(Queue.Task task) { return parent; } - private void checkPreempted(Executor executor, Queue.Task task) { + private boolean wasPreempted(Executor executor) { ComputeEngineComputer computer = (ComputeEngineComputer) executor.getOwner(); - final boolean preemptible = computer.getPreemptible(); final boolean preempted = computer.getPreempted(); + return preempted; + } + + private void rescheduleTask(Queue.Task task) { Queue.Task baseTask = getBaseTask(task); - if (preemptible && preempted) { - log.log(Level.INFO, baseTask + " is preemptive and was preempted"); - List actions = generateActionsForTask(task); - try (ACLContext notUsed = ACL.as(task.getDefaultAuthentication())) { - Jenkins.get().getQueue().schedule2(baseTask, 0, actions); - } - } else if (preemptible) { - log.log(Level.INFO, baseTask + " is preemptive and was NOT preempted"); + log.log(Level.INFO, baseTask + " was preempted, rescheduling"); + List actions = generateActionsForTask(task); + try (ACLContext notUsed = ACL.as(task.getDefaultAuthentication())) { + Jenkins.get().getQueue().schedule2(baseTask, 0, actions); } } @@ -119,7 +116,7 @@ private List generateActionsForTask(Queue.Task task) { try { final Job job = (Job) baseTask; final List causes = job.getLastBuild().getCauses(); - log.log(Level.FINE, "Causes: " + causes); + log.log(Level.FINE, "Original causes: " + causes); } catch (Exception e) { log.log(Level.WARNING, "Exception for " + baseTask, e); } diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/PreemptedCheckCallable.java b/src/main/java/com/google/jenkins/plugins/computeengine/PreemptedCheckCallable.java index ff40d36d..8cd37945 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/PreemptedCheckCallable.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/PreemptedCheckCallable.java @@ -58,11 +58,7 @@ final class PreemptedCheckCallable extends MasterToSlaveCallable Date: Fri, 14 Jun 2019 12:45:13 +0200 Subject: [PATCH 05/13] Made tests repetitive --- .../computeengine/ComputeEngineComputer.java | 5 +++-- .../ComputeEngineCloudRestartPreemptedIT.java | 13 +++++-------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java index 653fe414..fe0dbf69 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java @@ -48,8 +48,9 @@ void onConnected(TaskListener listener) throws IOException { node.onConnected(); if (getPreemptible()) { String nodeName = node.getNodeName(); - log.log( - Level.INFO, "Instance " + nodeName + " is preemptive, setting up preemption listener"); + final String msg = "Instance " + nodeName + " is preemptive, setting up preemption listener"; + log.log(Level.INFO, msg); + listener.getLogger().println(msg); preemptedFuture = getChannel().callAsync(new PreemptedCheckCallable(listener)); getChannel() .addListener( diff --git a/src/test/java/com/google/jenkins/plugins/computeengine/integration/ComputeEngineCloudRestartPreemptedIT.java b/src/test/java/com/google/jenkins/plugins/computeengine/integration/ComputeEngineCloudRestartPreemptedIT.java index 5fb7e77a..f534d199 100644 --- a/src/test/java/com/google/jenkins/plugins/computeengine/integration/ComputeEngineCloudRestartPreemptedIT.java +++ b/src/test/java/com/google/jenkins/plugins/computeengine/integration/ComputeEngineCloudRestartPreemptedIT.java @@ -71,13 +71,13 @@ public class ComputeEngineCloudRestartPreemptedIT { private static ComputeClient client; private static Map label = getLabel(ComputeEngineCloudRestartPreemptedIT.class); - private static Collection planned; + private static ComputeEngineCloud cloud; @BeforeClass public static void init() throws Exception { log.info("init"); initCredentials(jenkinsRule); - ComputeEngineCloud cloud = initCloud(jenkinsRule); + cloud = initCloud(jenkinsRule); client = initClient(jenkinsRule, label, log); InstanceConfiguration configuration = @@ -93,8 +93,6 @@ public static void init() throws Exception { .build(); cloud.setConfigurations(Lists.newArrayList(configuration)); - - planned = cloud.provision(new LabelAtom(LABEL), 1); } @AfterClass @@ -104,6 +102,7 @@ public static void teardown() throws IOException { @Test public void testIfNodeWasPreempted() throws Exception { + Collection planned = cloud.provision(new LabelAtom(LABEL), 1); Iterator iterator = planned.iterator(); PlannedNode plannedNode = iterator.next(); String name = plannedNode.displayName; @@ -113,12 +112,10 @@ public void testIfNodeWasPreempted() throws Exception { ComputeEngineComputer computer = (ComputeEngineComputer) node.toComputer(); assertTrue("Configuration was set as preemptible but saw as not", computer.getPreemptible()); - log.info("Start checking"); - Thread.sleep(TimeUnit.MINUTES.toMillis(5)); Awaitility.await() .timeout(5, TimeUnit.MINUTES) - .until(node::isAcceptingTasks); - log.info("Sending mainatanance event to " + name); + .until(() -> computer.getLog().contains("listening to metadata for preemption even")); + client.simulateMaintenanceEvent(PROJECT_ID, ZONE, name); Awaitility.await() .timeout(5, TimeUnit.MINUTES) From a3d15eec8b5e902a016771aa0a44f988ad04f86a Mon Sep 17 00:00:00 2001 From: Karol Lassak Date: Fri, 14 Jun 2019 15:49:14 +0200 Subject: [PATCH 06/13] Full blown test resheduling job --- .../computeengine/ComputeEngineCloud.java | 1 - .../computeengine/ComputeEngineComputer.java | 7 +- .../ComputeEngineRetentionStrategy.java | 9 ++- .../computeengine/InstanceConfiguration.java | 1 - .../computeengine/client/ComputeClient.java | 3 +- .../ComputeEngineCloudRestartPreemptedIT.java | 71 +++++++++---------- 6 files changed, 45 insertions(+), 47 deletions(-) diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineCloud.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineCloud.java index bc46bb58..05581e29 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineCloud.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineCloud.java @@ -43,7 +43,6 @@ import hudson.util.FormValidation; import hudson.util.HttpResponses; import hudson.util.ListBoxModel; - import java.io.IOException; import java.io.PrintStream; import java.util.ArrayList; diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java index fe0dbf69..37e1129d 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java @@ -48,7 +48,8 @@ void onConnected(TaskListener listener) throws IOException { node.onConnected(); if (getPreemptible()) { String nodeName = node.getNodeName(); - final String msg = "Instance " + nodeName + " is preemptive, setting up preemption listener"; + final String msg = + "Instance " + nodeName + " is preemptive, setting up preemption listener"; log.log(Level.INFO, msg); listener.getLogger().println(msg); preemptedFuture = getChannel().callAsync(new PreemptedCheckCallable(listener)); @@ -83,7 +84,7 @@ public String getShortDescription() { /** * Check if instance is preemptible. - * + * * @return true if instance was set as preemptible. */ public boolean getPreemptible() { @@ -96,7 +97,7 @@ public boolean getPreemptible() { /** * Check if instance was actually preempted. - * + * * @return true if instance was preempted (we can use it to reschedule job in this case). */ public boolean getPreempted() { diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java index 7879887b..a4b00eed 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java @@ -20,13 +20,12 @@ import hudson.security.ACL; import hudson.security.ACLContext; import hudson.slaves.RetentionStrategy; +import java.util.List; +import java.util.logging.Level; import jenkins.model.Jenkins; import lombok.extern.java.Log; import org.jenkinsci.plugins.durabletask.executors.OnceRetentionStrategy; -import java.util.List; -import java.util.logging.Level; - /** * A strategy that allows: - setting one shot instances {@link OnceRetentionStrategy} - in case of * preemption of GCP instance to restart preempted tasks @@ -41,8 +40,8 @@ public class ComputeEngineRetentionStrategy extends RetentionStrategy label = - getLabel(ComputeEngineCloudRestartPreemptedIT.class); + private static Map label = getLabel(ComputeEngineCloudRestartPreemptedIT.class); private static ComputeEngineCloud cloud; @BeforeClass @@ -88,8 +78,6 @@ public static void init() throws Exception { .template(NULL_TEMPLATE) .preemptible(true) .googleLabels(label) - .launchTimeoutSecondsStr("10000") - .retentionTimeMinutesStr("30") .build(); cloud.setConfigurations(Lists.newArrayList(configuration)); @@ -108,17 +96,28 @@ public void testIfNodeWasPreempted() throws Exception { String name = plannedNode.displayName; plannedNode.future.get(); Node node = jenkinsRule.jenkins.getNode(name); - + ComputeEngineComputer computer = (ComputeEngineComputer) node.toComputer(); assertTrue("Configuration was set as preemptible but saw as not", computer.getPreemptible()); + FreeStyleProject project = jenkinsRule.createFreeStyleProject(); + Builder step = new Shell("sleep 20"); + project.getBuildersList().add(step); + project.setAssignedLabel(new LabelAtom(LABEL)); + QueueTaskFuture taskFuture = project.scheduleBuild2(0); + Awaitility.await() - .timeout(5, TimeUnit.MINUTES) - .until(() -> computer.getLog().contains("listening to metadata for preemption even")); - + .timeout(5, TimeUnit.MINUTES) + .until(() -> computer.getLog().contains("listening to metadata for preemption event")); + client.simulateMaintenanceEvent(PROJECT_ID, ZONE, name); + Awaitility.await().timeout(5, TimeUnit.MINUTES).until(computer::getPreempted); + + FreeStyleBuild freeStyleBuild = taskFuture.get(); + assertEquals(FAILURE, freeStyleBuild.getResult()); + Awaitility.await() - .timeout(5, TimeUnit.MINUTES) - .until(computer::getPreempted); + .timeout(5, TimeUnit.MINUTES) + .until(() -> freeStyleBuild.getNextBuild() != null && freeStyleBuild.getNextBuild().getResult() == SUCCESS); } } From 709d015dc9204767d6d413f604588ee67336b942 Mon Sep 17 00:00:00 2001 From: Karol Lassak Date: Fri, 14 Jun 2019 19:31:24 +0200 Subject: [PATCH 07/13] Remove NPE errors checking --- .../plugins/computeengine/ComputeEngineComputer.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java index 37e1129d..6951887f 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java @@ -17,6 +17,7 @@ package com.google.jenkins.plugins.computeengine; import com.google.api.services.compute.model.Instance; +import com.google.api.services.compute.model.Scheduling; import hudson.model.Executor; import hudson.model.Result; import hudson.model.TaskListener; @@ -89,8 +90,9 @@ public String getShortDescription() { */ public boolean getPreemptible() { try { - return getInstance().getScheduling().getPreemptible(); - } catch (IOException | NullPointerException e) { + Scheduling scheduling = getInstance().getScheduling(); + return scheduling != null && scheduling.getPreemptible(); + } catch (IOException e) { return false; } } From 57c694b54c61129472b5e29e4cf4f099ca0b5747 Mon Sep 17 00:00:00 2001 From: Karol Lassak Date: Fri, 14 Jun 2019 19:52:34 +0200 Subject: [PATCH 08/13] Logging + full imports --- .../computeengine/ComputeEngineComputer.java | 2 + .../ComputeEngineRetentionStrategy.java | 17 ++++++--- .../ComputeEngineCloudRestartPreemptedIT.java | 37 +++++++++++++------ 3 files changed, 40 insertions(+), 16 deletions(-) diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java index 6951887f..9db20cc6 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java @@ -93,6 +93,7 @@ public boolean getPreemptible() { Scheduling scheduling = getInstance().getScheduling(); return scheduling != null && scheduling.getPreemptible(); } catch (IOException e) { + log.log(Level.WARNING, "Error when getting preemptible status", e); return false; } } @@ -106,6 +107,7 @@ public boolean getPreempted() { try { return preemptedFuture != null && preemptedFuture.isDone() && preemptedFuture.get(); } catch (InterruptedException | ExecutionException e) { + log.log(Level.WARNING, "Error when getting preempted status", e); return false; } } diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java index a4b00eed..69191c55 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java @@ -16,16 +16,23 @@ package com.google.jenkins.plugins.computeengine; import com.google.common.collect.ImmutableList; -import hudson.model.*; +import hudson.model.Action; +import hudson.model.Cause; +import hudson.model.CauseAction; +import hudson.model.Executor; +import hudson.model.ExecutorListener; +import hudson.model.Job; +import hudson.model.Queue; import hudson.security.ACL; import hudson.security.ACLContext; import hudson.slaves.RetentionStrategy; -import java.util.List; -import java.util.logging.Level; import jenkins.model.Jenkins; import lombok.extern.java.Log; import org.jenkinsci.plugins.durabletask.executors.OnceRetentionStrategy; +import java.util.List; +import java.util.logging.Level; + /** * A strategy that allows: - setting one shot instances {@link OnceRetentionStrategy} - in case of * preemption of GCP instance to restart preempted tasks @@ -40,8 +47,8 @@ public class ComputeEngineRetentionStrategy extends RetentionStrategy label = getLabel(ComputeEngineCloudRestartPreemptedIT.class); From cc813fab820c55dbaad1d8f07fd103c48beeec08 Mon Sep 17 00:00:00 2001 From: Karol Lassak Date: Fri, 14 Jun 2019 23:14:28 +0200 Subject: [PATCH 09/13] Order of imports.. why its non standard.. wth --- .../ComputeEngineRetentionStrategy.java | 9 ++- .../ComputeEngineCloudRestartPreemptedIT.java | 55 ++++++++++--------- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java index 69191c55..7582013b 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineRetentionStrategy.java @@ -26,13 +26,12 @@ import hudson.security.ACL; import hudson.security.ACLContext; import hudson.slaves.RetentionStrategy; +import java.util.List; +import java.util.logging.Level; import jenkins.model.Jenkins; import lombok.extern.java.Log; import org.jenkinsci.plugins.durabletask.executors.OnceRetentionStrategy; -import java.util.List; -import java.util.logging.Level; - /** * A strategy that allows: - setting one shot instances {@link OnceRetentionStrategy} - in case of * preemption of GCP instance to restart preempted tasks @@ -47,8 +46,8 @@ public class ComputeEngineRetentionStrategy extends RetentionStrategy label = getLabel(ComputeEngineCloudRestartPreemptedIT.class); @@ -133,6 +131,9 @@ public void testIfNodeWasPreempted() throws Exception { Awaitility.await() .timeout(5, TimeUnit.MINUTES) - .until(() -> freeStyleBuild.getNextBuild() != null && freeStyleBuild.getNextBuild().getResult() == SUCCESS); + .until( + () -> + freeStyleBuild.getNextBuild() != null + && freeStyleBuild.getNextBuild().getResult() == SUCCESS); } } From e59a67a45722f5decf9a41e3b809e0a986bc5398 Mon Sep 17 00:00:00 2001 From: Karol Lassak Date: Sat, 22 Jun 2019 11:09:02 +0200 Subject: [PATCH 10/13] Increasing timeouts --- .../integration/ComputeEngineCloudRestartPreemptedIT.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/test/java/com/google/jenkins/plugins/computeengine/integration/ComputeEngineCloudRestartPreemptedIT.java b/src/test/java/com/google/jenkins/plugins/computeengine/integration/ComputeEngineCloudRestartPreemptedIT.java index 99c96165..5dff070f 100644 --- a/src/test/java/com/google/jenkins/plugins/computeengine/integration/ComputeEngineCloudRestartPreemptedIT.java +++ b/src/test/java/com/google/jenkins/plugins/computeengine/integration/ComputeEngineCloudRestartPreemptedIT.java @@ -68,7 +68,7 @@ @Log public class ComputeEngineCloudRestartPreemptedIT { @ClassRule - public static Timeout timeout = new Timeout(15 * TEST_TIMEOUT_MULTIPLIER, TimeUnit.MINUTES); + public static Timeout timeout = new Timeout(20 * TEST_TIMEOUT_MULTIPLIER, TimeUnit.MINUTES); @ClassRule public static JenkinsRule jenkinsRule = new JenkinsRule(); @@ -120,17 +120,17 @@ public void testIfNodeWasPreempted() throws Exception { QueueTaskFuture taskFuture = project.scheduleBuild2(0); Awaitility.await() - .timeout(5, TimeUnit.MINUTES) + .timeout(7, TimeUnit.MINUTES) .until(() -> computer.getLog().contains("listening to metadata for preemption event")); client.simulateMaintenanceEvent(PROJECT_ID, ZONE, name); - Awaitility.await().timeout(5, TimeUnit.MINUTES).until(computer::getPreempted); + Awaitility.await().timeout(8, TimeUnit.MINUTES).until(computer::getPreempted); FreeStyleBuild freeStyleBuild = taskFuture.get(); assertEquals(FAILURE, freeStyleBuild.getResult()); Awaitility.await() - .timeout(5, TimeUnit.MINUTES) + .timeout(9, TimeUnit.MINUTES) .until( () -> freeStyleBuild.getNextBuild() != null From 1427c759feb74f9cc8cfaf6d496981ad4e915452 Mon Sep 17 00:00:00 2001 From: Karol Lassak Date: Thu, 25 Jul 2019 19:26:43 +0200 Subject: [PATCH 11/13] Closing channel on preemption --- .../computeengine/ComputeEngineComputer.java | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java index 9db20cc6..88e24b89 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java @@ -24,8 +24,8 @@ import hudson.remoting.Channel; import hudson.slaves.AbstractCloudComputer; import java.io.IOException; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; import java.util.logging.Level; import jenkins.model.CauseOfInterruption; import lombok.extern.java.Log; @@ -37,7 +37,7 @@ public class ComputeEngineComputer extends AbstractCloudComputer { private volatile Instance instance; - private Future preemptedFuture; + private CompletableFuture preemptedFuture; public ComputeEngineComputer(ComputeEngineInstance slave) { super(slave); @@ -53,7 +53,19 @@ void onConnected(TaskListener listener) throws IOException { "Instance " + nodeName + " is preemptive, setting up preemption listener"; log.log(Level.INFO, msg); listener.getLogger().println(msg); - preemptedFuture = getChannel().callAsync(new PreemptedCheckCallable(listener)); + preemptedFuture = + CompletableFuture.supplyAsync(() -> { + try { + final Boolean value = getChannel().callAsync(new PreemptedCheckCallable(listener)).get(); + log.log(Level.INFO, "Got information that node was preempted with value [" + value + "]"); + if (value) { + getChannel().close(); + } + return value; + } catch (InterruptedException|ExecutionException|IOException e) { + throw new RuntimeException(e); + } + }, threadPoolForRemoting); getChannel() .addListener( new Channel.Listener() { From dd83497e337ffa522fb0669f0ce6043510bfc292 Mon Sep 17 00:00:00 2001 From: Karol Lassak Date: Thu, 25 Jul 2019 20:12:00 +0200 Subject: [PATCH 12/13] Simplify code a bit --- .../computeengine/ComputeEngineComputer.java | 47 ++++++++----------- .../ComputeEngineComputerListener.java | 3 +- 2 files changed, 20 insertions(+), 30 deletions(-) diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java index 88e24b89..89c02a65 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java @@ -21,7 +21,6 @@ import hudson.model.Executor; import hudson.model.Result; import hudson.model.TaskListener; -import hudson.remoting.Channel; import hudson.slaves.AbstractCloudComputer; import java.io.IOException; import java.util.concurrent.CompletableFuture; @@ -43,7 +42,7 @@ public ComputeEngineComputer(ComputeEngineInstance slave) { super(slave); } - void onConnected(TaskListener listener) throws IOException { + void onConnected(TaskListener listener) { ComputeEngineInstance node = getNode(); if (node != null) { node.onConnected(); @@ -53,36 +52,28 @@ void onConnected(TaskListener listener) throws IOException { "Instance " + nodeName + " is preemptive, setting up preemption listener"; log.log(Level.INFO, msg); listener.getLogger().println(msg); - preemptedFuture = - CompletableFuture.supplyAsync(() -> { - try { - final Boolean value = getChannel().callAsync(new PreemptedCheckCallable(listener)).get(); - log.log(Level.INFO, "Got information that node was preempted with value [" + value + "]"); - if (value) { - getChannel().close(); - } - return value; - } catch (InterruptedException|ExecutionException|IOException e) { - throw new RuntimeException(e); - } - }, threadPoolForRemoting); - getChannel() - .addListener( - new Channel.Listener() { - @Override - public void onClosed(Channel channel, IOException cause) { - log.log(Level.FINE, "Got channel close event"); - if (getPreempted()) { - log.log( - Level.FINE, "Preempted node channel closed, terminating all executors"); - getExecutors().forEach(executor -> interruptExecutor(executor, nodeName)); - } - } - }); + preemptedFuture = + CompletableFuture.supplyAsync( + () -> getPreemptedStatus(listener, nodeName), threadPoolForRemoting); } } } + private Boolean getPreemptedStatus(TaskListener listener, String nodeName) { + try { + final Boolean value = getChannel().callAsync(new PreemptedCheckCallable(listener)).get(); + log.log(Level.FINE, "Got information that node was preempted with value [" + value + "]"); + if (value) { + log.log(Level.FINE, "Preempted node channel closed, terminating all executors"); + getExecutors().forEach(executor -> interruptExecutor(executor, nodeName)); + getChannel().close(); + } + return value; + } catch (InterruptedException | ExecutionException | IOException e) { + throw new RuntimeException(e); + } + } + private void interruptExecutor(Executor executor, String nodeName) { log.log(Level.INFO, "Terminating executor " + executor + " node " + nodeName); executor.interrupt( diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputerListener.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputerListener.java index 6d6b7ac7..06ff14cb 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputerListener.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputerListener.java @@ -20,12 +20,11 @@ import hudson.model.Computer; import hudson.model.TaskListener; import hudson.slaves.ComputerListener; -import java.io.IOException; @Extension public class ComputeEngineComputerListener extends ComputerListener { @Override - public void onOnline(Computer c, TaskListener listener) throws IOException { + public void onOnline(Computer c, TaskListener listener) { if (c instanceof ComputeEngineComputer) { ComputeEngineComputer computer = (ComputeEngineComputer) c; computer.onConnected(listener); From 47ba7c285f4e99aa71e36cba791b71513c568887 Mon Sep 17 00:00:00 2001 From: Karol Lassak Date: Thu, 25 Jul 2019 21:24:49 +0200 Subject: [PATCH 13/13] Bit more simplification --- .../plugins/computeengine/ComputeEngineComputer.java | 8 ++++---- .../integration/ComputeEngineCloudRestartPreemptedIT.java | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java index 89c02a65..181b29d1 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputer.java @@ -61,15 +61,15 @@ void onConnected(TaskListener listener) { private Boolean getPreemptedStatus(TaskListener listener, String nodeName) { try { - final Boolean value = getChannel().callAsync(new PreemptedCheckCallable(listener)).get(); + boolean value = getChannel().call(new PreemptedCheckCallable(listener)); log.log(Level.FINE, "Got information that node was preempted with value [" + value + "]"); if (value) { - log.log(Level.FINE, "Preempted node channel closed, terminating all executors"); - getExecutors().forEach(executor -> interruptExecutor(executor, nodeName)); + log.log(Level.FINE, "Preempted node was preempted, terminating all executors"); getChannel().close(); + getExecutors().forEach(executor -> interruptExecutor(executor, nodeName)); } return value; - } catch (InterruptedException | ExecutionException | IOException e) { + } catch (InterruptedException | IOException e) { throw new RuntimeException(e); } } diff --git a/src/test/java/com/google/jenkins/plugins/computeengine/integration/ComputeEngineCloudRestartPreemptedIT.java b/src/test/java/com/google/jenkins/plugins/computeengine/integration/ComputeEngineCloudRestartPreemptedIT.java index 5dff070f..bf638fab 100644 --- a/src/test/java/com/google/jenkins/plugins/computeengine/integration/ComputeEngineCloudRestartPreemptedIT.java +++ b/src/test/java/com/google/jenkins/plugins/computeengine/integration/ComputeEngineCloudRestartPreemptedIT.java @@ -91,6 +91,7 @@ public static void init() throws Exception { .template(NULL_TEMPLATE) .preemptible(true) .googleLabels(label) + .oneShot(false) .build(); cloud.setConfigurations(Lists.newArrayList(configuration)); @@ -114,7 +115,7 @@ public void testIfNodeWasPreempted() throws Exception { assertTrue("Configuration was set as preemptible but saw as not", computer.getPreemptible()); FreeStyleProject project = jenkinsRule.createFreeStyleProject(); - Builder step = new Shell("sleep 20"); + Builder step = new Shell("sleep 60"); project.getBuildersList().add(step); project.setAssignedLabel(new LabelAtom(LABEL)); QueueTaskFuture taskFuture = project.scheduleBuild2(0);