Skip to content

Commit

Permalink
Merge pull request #489 from Artmorse/issue-381
Browse files Browse the repository at this point in the history
Terminate the instance when 404 occured.
  • Loading branch information
batmat authored Dec 12, 2024
2 parents 005441d + 181e799 commit 8aeebbd
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package com.google.jenkins.plugins.computeengine;

import com.google.api.client.googleapis.json.GoogleJsonResponseException;
import com.google.api.services.compute.model.AccessConfig;
import com.google.api.services.compute.model.Instance;
import com.google.api.services.compute.model.NetworkInterface;
Expand All @@ -40,6 +41,7 @@
import java.io.PrintStream;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.SocketTimeoutException;
import java.util.Base64;
import java.util.Optional;
import java.util.logging.Level;
Expand Down Expand Up @@ -154,14 +156,16 @@ public void launch(SlaveComputer slaveComputer, TaskListener listener) {
}
if (opError != null) {
LOGGER.info(String.format(
"Launch failed while waiting for operation %s to complete. Operation error was %s",
"Launch failed while waiting for operation %s to complete. Operation error was %s. Terminating instance.",
insertOperationId, opError.getErrors().get(0).getMessage()));
terminateNode(computer, listener);
return;
}
} catch (InterruptedException e) {
LOGGER.info(String.format(
"Launch failed while waiting for operation %s to complete. Operation error was %s",
"Launch failed while waiting for operation %s to complete. Operation error was %s. Terminating instance",
insertOperationId, opError.getErrors().get(0).getMessage()));
terminateNode(computer, listener);
return;
}

Expand Down Expand Up @@ -214,19 +218,26 @@ public void launch(SlaveComputer slaveComputer, TaskListener listener) {
launch(computer, listener);
} catch (IOException ioe) {
ioe.printStackTrace(listener.error(ioe.getMessage()));
node = (ComputeEngineInstance) slaveComputer.getNode();
if (node != null) {
try {
node.terminate();
} catch (Exception e) {
listener.error(String.format("Failed to terminate node %s", node.getDisplayName()));
}
}
terminateNode(slaveComputer, listener);
} catch (InterruptedException ie) {

}
}

private static void terminateNode(SlaveComputer slaveComputer, TaskListener listener) {
ComputeEngineInstance node = (ComputeEngineInstance) slaveComputer.getNode();
if (node != null) {
try {
node.terminate();
} catch (Exception e) {
listener.error(String.format("Failed to terminate node %s", node.getDisplayName()));
}
} else {
LOGGER.fine(
String.format("Tried to terminate unknown node from computer %s", slaveComputer.getDisplayName()));
}
}

private boolean testCommand(
ComputeEngineComputer computer,
Connection conn,
Expand Down Expand Up @@ -343,6 +354,10 @@ protected Connection connectToSsh(ComputeEngineComputer computer, TaskListener l
+ ")");
}
Instance instance = computer.refreshInstance();
// the instance will be null when the node is terminated
if (instance == null) {
return null;
}

String host = "";

Expand Down Expand Up @@ -410,10 +425,25 @@ protected Connection connectToSsh(ComputeEngineComputer computer, TaskListener l
SSH_TIMEOUT_MILLIS);
logInfo(computer, listener, "Connected via SSH.");
return conn;
} catch (IOException e) {
} catch (GoogleJsonResponseException e) {
if (e.getStatusCode() == 404) {
log(
LOGGER,
Level.SEVERE,
listener,
String.format("Instance %s not found. Terminating instance.", computer.getName()));
terminateNode(computer, listener);
}
} catch (SocketTimeoutException e) {
// keep retrying until SSH comes up
logInfo(computer, listener, "Failed to connect via ssh: " + e.getMessage());
logInfo(computer, listener, "Waiting for SSH to come up. Sleeping 5.");
logInfo(computer, listener, String.format("Failed to connect via ssh: %s", e.getMessage()));
logInfo(
computer,
listener,
String.format("Waiting for SSH to come up. Sleeping %d.", SSH_SLEEP_MILLIS / 1000));
Thread.sleep(SSH_SLEEP_MILLIS);
} catch (IOException e) {
logWarning(computer, listener, String.format("An error occured: %s", e.getMessage()));

Check warning on line 446 in src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineComputerLauncher.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered lines

Lines 161-446 are not covered by tests
Thread.sleep(SSH_SLEEP_MILLIS);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@

package com.google.jenkins.plugins.computeengine;

import static com.google.jenkins.plugins.computeengine.ComputeEngineCloud.CLOUD_ID_LABEL_KEY;

import com.google.cloud.graphite.platforms.plugin.client.ComputeClient.OperationException;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableMap;
import com.google.jenkins.plugins.computeengine.ssh.GoogleKeyCredential;
import edu.umd.cs.findbugs.annotations.Nullable;
import hudson.Extension;
Expand All @@ -30,6 +33,7 @@
import hudson.slaves.RetentionStrategy;
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import java.util.Optional;
import java.util.logging.Level;
import java.util.logging.Logger;
Expand Down Expand Up @@ -130,9 +134,16 @@ protected void _terminate(TaskListener listener) throws IOException, Interrupted
.createSnapshotSync(cloud.getProjectId(), this.zone, this.getNodeName(), createSnapshotTimeout);
}

// If the instance is running, attempt to terminate it. This is an async call and we
Map<String, String> filterLabel = ImmutableMap.of(CLOUD_ID_LABEL_KEY, cloud.getInstanceId());
var instanceExistsInCloud =
cloud.getClient().listInstancesWithLabel(cloud.getProjectId(), filterLabel).stream()
.anyMatch(instance -> instance.getName().equals(name));

// If the instance exists in the cloud, attempt to terminate it. This is an async call and we
// return immediately, hoping for the best.
cloud.getClient().terminateInstanceAsync(cloud.getProjectId(), zone, name);
if (instanceExistsInCloud) {

Check warning on line 144 in src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineInstance.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered lines

Lines 137-144 are not covered by tests
cloud.getClient().terminateInstanceAsync(cloud.getProjectId(), zone, name);
}
} catch (CloudNotFoundException cnfe) {
listener.error(cnfe.getMessage());
} catch (OperationException oe) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ private Optional<Connection> bootstrap(
logInfo(computer, listener, "Authenticating as " + node.getSshUser());
try {
bootstrapConn = connectToSsh(computer, listener);
if (bootstrapConn == null) {
break;

Check warning on line 89 in src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineLinuxLauncher.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered lines

Lines 88-89 are not covered by tests
}
isAuthenticated = bootstrapConn.authenticateWithPublicKey(
node.getSshUser(),
Secret.toString(keyCred.getPrivateKey()).toCharArray(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ private Optional<Connection> bootstrap(ComputeEngineComputer computer, TaskListe
logInfo(computer, listener, "Authenticating as " + node.getSshUser());
try {
bootstrapConn = connectToSsh(computer, listener);
if (bootstrapConn == null) {
break;

Check warning on line 96 in src/main/java/com/google/jenkins/plugins/computeengine/ComputeEngineWindowsLauncher.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered lines

Lines 95-96 are not covered by tests
}
isAuthenticated = authenticateSSH(node.getSshUser(), windowsConfig, bootstrapConn, listener);
} catch (IOException e) {
logException(computer, listener, "Exception trying to authenticate", e);
Expand Down

0 comments on commit 8aeebbd

Please sign in to comment.