diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputerLauncher.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputerLauncher.java index b0b0be1c..62245006 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputerLauncher.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputerLauncher.java @@ -16,6 +16,7 @@ package com.google.jenkins.plugins.computeengine; +import com.google.api.client.googleapis.json.GoogleJsonResponseException; import com.google.api.services.compute.model.AccessConfig; import com.google.api.services.compute.model.Instance; import com.google.api.services.compute.model.NetworkInterface; @@ -40,6 +41,7 @@ import java.io.PrintStream; import java.net.InetSocketAddress; import java.net.Proxy; +import java.net.SocketTimeoutException; import java.util.Base64; import java.util.Optional; import java.util.logging.Level; @@ -154,14 +156,16 @@ public void launch(SlaveComputer slaveComputer, TaskListener listener) { } if (opError != null) { LOGGER.info(String.format( - "Launch failed while waiting for operation %s to complete. Operation error was %s", + "Launch failed while waiting for operation %s to complete. Operation error was %s. Terminating instance.", insertOperationId, opError.getErrors().get(0).getMessage())); + terminateNode(computer, listener); return; } } catch (InterruptedException e) { LOGGER.info(String.format( - "Launch failed while waiting for operation %s to complete. Operation error was %s", + "Launch failed while waiting for operation %s to complete. Operation error was %s. Terminating instance", insertOperationId, opError.getErrors().get(0).getMessage())); + terminateNode(computer, listener); return; } @@ -214,19 +218,26 @@ public void launch(SlaveComputer slaveComputer, TaskListener listener) { launch(computer, listener); } catch (IOException ioe) { ioe.printStackTrace(listener.error(ioe.getMessage())); - node = (ComputeEngineInstance) slaveComputer.getNode(); - if (node != null) { - try { - node.terminate(); - } catch (Exception e) { - listener.error(String.format("Failed to terminate node %s", node.getDisplayName())); - } - } + terminateNode(slaveComputer, listener); } catch (InterruptedException ie) { } } + private static void terminateNode(SlaveComputer slaveComputer, TaskListener listener) { + ComputeEngineInstance node = (ComputeEngineInstance) slaveComputer.getNode(); + if (node != null) { + try { + node.terminate(); + } catch (Exception e) { + listener.error(String.format("Failed to terminate node %s", node.getDisplayName())); + } + } else { + LOGGER.fine( + String.format("Tried to terminate unknown node from computer %s", slaveComputer.getDisplayName())); + } + } + private boolean testCommand( ComputeEngineComputer computer, Connection conn, @@ -343,6 +354,10 @@ protected Connection connectToSsh(ComputeEngineComputer computer, TaskListener l + ")"); } Instance instance = computer.refreshInstance(); + // the instance will be null when the node is terminated + if (instance == null) { + return null; + } String host = ""; @@ -410,10 +425,25 @@ protected Connection connectToSsh(ComputeEngineComputer computer, TaskListener l SSH_TIMEOUT_MILLIS); logInfo(computer, listener, "Connected via SSH."); return conn; - } catch (IOException e) { + } catch (GoogleJsonResponseException e) { + if (e.getStatusCode() == 404) { + log( + LOGGER, + Level.SEVERE, + listener, + String.format("Instance %s not found. Terminating instance.", computer.getName())); + terminateNode(computer, listener); + } + } catch (SocketTimeoutException e) { // keep retrying until SSH comes up - logInfo(computer, listener, "Failed to connect via ssh: " + e.getMessage()); - logInfo(computer, listener, "Waiting for SSH to come up. Sleeping 5."); + logInfo(computer, listener, String.format("Failed to connect via ssh: %s", e.getMessage())); + logInfo( + computer, + listener, + String.format("Waiting for SSH to come up. Sleeping %d.", SSH_SLEEP_MILLIS / 1000)); + Thread.sleep(SSH_SLEEP_MILLIS); + } catch (IOException e) { + logWarning(computer, listener, String.format("An error occured: %s", e.getMessage())); Thread.sleep(SSH_SLEEP_MILLIS); } } diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineInstance.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineInstance.java index ec0f92a2..6bf1bd8e 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineInstance.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineInstance.java @@ -16,8 +16,11 @@ package com.google.jenkins.plugins.computeengine; +import static com.google.jenkins.plugins.computeengine.ComputeEngineCloud.CLOUD_ID_LABEL_KEY; + import com.google.cloud.graphite.platforms.plugin.client.ComputeClient.OperationException; import com.google.common.base.Strings; +import com.google.common.collect.ImmutableMap; import com.google.jenkins.plugins.computeengine.ssh.GoogleKeyCredential; import edu.umd.cs.findbugs.annotations.Nullable; import hudson.Extension; @@ -30,6 +33,7 @@ import hudson.slaves.RetentionStrategy; import java.io.IOException; import java.util.Collections; +import java.util.Map; import java.util.Optional; import java.util.logging.Level; import java.util.logging.Logger; @@ -130,9 +134,16 @@ protected void _terminate(TaskListener listener) throws IOException, Interrupted .createSnapshotSync(cloud.getProjectId(), this.zone, this.getNodeName(), createSnapshotTimeout); } - // If the instance is running, attempt to terminate it. This is an async call and we + Map filterLabel = ImmutableMap.of(CLOUD_ID_LABEL_KEY, cloud.getInstanceId()); + var instanceExistsInCloud = + cloud.getClient().listInstancesWithLabel(cloud.getProjectId(), filterLabel).stream() + .anyMatch(instance -> instance.getName().equals(name)); + + // If the instance exists in the cloud, attempt to terminate it. This is an async call and we // return immediately, hoping for the best. - cloud.getClient().terminateInstanceAsync(cloud.getProjectId(), zone, name); + if (instanceExistsInCloud) { + cloud.getClient().terminateInstanceAsync(cloud.getProjectId(), zone, name); + } } catch (CloudNotFoundException cnfe) { listener.error(cnfe.getMessage()); } catch (OperationException oe) { diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineLinuxLauncher.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineLinuxLauncher.java index 928f3c38..50fc679c 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineLinuxLauncher.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineLinuxLauncher.java @@ -85,6 +85,9 @@ private Optional bootstrap( logInfo(computer, listener, "Authenticating as " + node.getSshUser()); try { bootstrapConn = connectToSsh(computer, listener); + if (bootstrapConn == null) { + break; + } isAuthenticated = bootstrapConn.authenticateWithPublicKey( node.getSshUser(), Secret.toString(keyCred.getPrivateKey()).toCharArray(), diff --git a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineWindowsLauncher.java b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineWindowsLauncher.java index 30fae98e..317eeeac 100644 --- a/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineWindowsLauncher.java +++ b/src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineWindowsLauncher.java @@ -92,6 +92,9 @@ private Optional bootstrap(ComputeEngineComputer computer, TaskListe logInfo(computer, listener, "Authenticating as " + node.getSshUser()); try { bootstrapConn = connectToSsh(computer, listener); + if (bootstrapConn == null) { + break; + } isAuthenticated = authenticateSSH(node.getSshUser(), windowsConfig, bootstrapConn, listener); } catch (IOException e) { logException(computer, listener, "Exception trying to authenticate", e);