From 1408c044b196d1a919f78c45829076f270cdeca7 Mon Sep 17 00:00:00 2001 From: Rushabh Shah Date: Wed, 1 Dec 2021 00:36:07 -0500 Subject: [PATCH] HBASE-26468 Region Server doesn't exit cleanly incase it crashes. (#3865) (#3862) Signed-off-by: Duo Zhang Signed-off-by: Geoffrey Jacoby Signed-off-by: Viraj Jasani --- .../org/apache/hadoop/hbase/util/Threads.java | 31 +++++++++++++++++++ .../hadoop/hbase/util/ServerCommandLine.java | 21 +++++++++++-- 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Threads.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Threads.java index 8b1859599820..2e267ac2220a 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Threads.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Threads.java @@ -25,10 +25,12 @@ import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.nio.charset.StandardCharsets; +import java.util.Set; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadFactory; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; import org.apache.yetus.audience.InterfaceAudience; @@ -266,4 +268,33 @@ public static void printThreadInfo(PrintStream stream, String title) { Preconditions.checkNotNull(PrintThreadInfoLazyHolder.HELPER, "Cannot find method. Check hadoop jars linked").printThreadInfo(stream, title); } + + /** + * Checks whether any non-daemon thread is running. + * @return true if there are non daemon threads running, otherwise false + */ + public static boolean isNonDaemonThreadRunning() { + AtomicInteger nonDaemonThreadCount = new AtomicInteger(); + Set threads = Thread.getAllStackTraces().keySet(); + threads.forEach(t -> { + // Exclude current thread + if (t.getId() != Thread.currentThread().getId() && !t.isDaemon()) { + nonDaemonThreadCount.getAndIncrement(); + LOG.info("Non daemon thread {} is still alive", t.getName()); + LOG.info(printStackTrace(t)); + } + }); + return nonDaemonThreadCount.get() > 0; + } + + /* + Print stack trace of the passed thread + */ + public static String printStackTrace(Thread t) { + StringBuilder sb = new StringBuilder(); + for (StackTraceElement frame: t.getStackTrace()) { + sb.append("\n").append(" ").append(frame.toString()); + } + return sb.toString(); + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ServerCommandLine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ServerCommandLine.java index f99a0908cd6a..7f89d59fbca7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ServerCommandLine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ServerCommandLine.java @@ -36,6 +36,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.hbase.util.Threads.isNonDaemonThreadRunning; /** * Base class for command lines that start up various HBase daemons. @@ -141,8 +142,10 @@ public static void logProcessInfo(Configuration conf) { } /** - * Parse and run the given command line. This may exit the JVM if - * a nonzero exit code is returned from run(). + * Parse and run the given command line. This will exit the JVM with + * the exit code returned from run(). + * If return code is 0, wait for atmost 30 seconds for all non-daemon threads to quit, + * otherwise exit the jvm */ public void doMain(String args[]) { try { @@ -150,6 +153,20 @@ public void doMain(String args[]) { if (ret != 0) { System.exit(ret); } + // Return code is 0 here. + boolean forceStop = false; + long startTime = EnvironmentEdgeManager.currentTime(); + while (isNonDaemonThreadRunning()) { + if (EnvironmentEdgeManager.currentTime() - startTime > 30 * 1000) { + forceStop = true; + break; + } + Thread.sleep(1000); + } + if (forceStop) { + LOG.error("Failed to stop all non-daemon threads, so terminating JVM"); + System.exit(-1); + } } catch (Exception e) { LOG.error("Failed to run", e); System.exit(-1);