From f67b7183cc57638589b2303185237d6612fb9ad3 Mon Sep 17 00:00:00 2001 From: Gus Heck <46900717+gus-asf@users.noreply.github.com> Date: Mon, 18 Mar 2024 12:51:59 -0400 Subject: [PATCH] SOLR-17203 fix calculation for CpuAllowedLimit (#2347) --- .../solr/handler/RequestHandlerBase.java | 10 ++-- .../apache/solr/search/CpuAllowedLimit.java | 32 ++++++----- .../org/apache/solr/util/ThreadCpuTimer.java | 56 ++++++++++--------- .../solr/search/TestCpuAllowedLimit.java | 7 +++ 4 files changed, 59 insertions(+), 46 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java index b9da9f73156..ff020ddebd3 100644 --- a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java +++ b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java @@ -242,15 +242,15 @@ public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) { rsp.setHttpCaching(false); } } catch (Exception e) { - e = normalizeReceivedException(req, e); - processErrorMetricsOnException(e, metrics); - rsp.setException(e); + Exception normalized = normalizeReceivedException(req, e); + processErrorMetricsOnException(normalized, metrics); + rsp.setException(normalized); } finally { long elapsed = timer.stop(); metrics.totalTime.inc(elapsed); - if (publishCpuTime) { - Optional cpuTime = threadCpuTimer.getCpuTimeMs(); + if (publishCpuTime && threadCpuTimer != null) { + Optional cpuTime = threadCpuTimer.getElapsedCpuMs(); if (cpuTime.isPresent()) { // add CPU_TIME if not already added by SearchHandler NamedList header = rsp.getResponseHeader(); diff --git a/solr/core/src/java/org/apache/solr/search/CpuAllowedLimit.java b/solr/core/src/java/org/apache/solr/search/CpuAllowedLimit.java index d63888b2726..aa3a1fae7b2 100644 --- a/solr/core/src/java/org/apache/solr/search/CpuAllowedLimit.java +++ b/solr/core/src/java/org/apache/solr/search/CpuAllowedLimit.java @@ -17,25 +17,29 @@ package org.apache.solr.search; import com.google.common.annotations.VisibleForTesting; -import java.lang.invoke.MethodHandles; import java.util.concurrent.TimeUnit; +import net.jcip.annotations.NotThreadSafe; import org.apache.lucene.index.QueryTimeout; import org.apache.solr.common.params.CommonParams; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrRequestInfo; import org.apache.solr.util.ThreadCpuTimer; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * Enforces a CPU-time based timeout on a given SolrQueryRequest, as specified by the {@code * cpuAllowed} query parameter. + * + *

Since this class uses {@link ThreadCpuTimer} it is irrevocably lock-hostile and can never be + * exposed to multiple threads, even if guarded by synchronization. Normally this is attached to + * objects ultimately held by a ThreadLocal in {@link SolrRequestInfo} to provide safe usage on the + * assumption that such objects are not shared to other threads. + * + * @see ThreadCpuTimer */ +@NotThreadSafe public class CpuAllowedLimit implements QueryTimeout { - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - private final long limitAtNs; private final ThreadCpuTimer threadCpuTimer; + private final long requestedTimeoutNs; /** * Create an object to represent a CPU time limit for the current request. NOTE: this @@ -49,6 +53,8 @@ public CpuAllowedLimit(SolrQueryRequest req) { throw new IllegalArgumentException("Thread CPU time monitoring is not available."); } SolrRequestInfo solrRequestInfo = SolrRequestInfo.getRequestInfo(); + // get existing timer if available to ensure sub-queries can't reset/exceed the intended time + // constraint. threadCpuTimer = solrRequestInfo != null ? solrRequestInfo.getThreadCpuTimer() : new ThreadCpuTimer(); long reqCpuLimit = req.getParams().getLong(CommonParams.CPU_ALLOWED, -1L); @@ -57,18 +63,14 @@ public CpuAllowedLimit(SolrQueryRequest req) { throw new IllegalArgumentException( "Check for limit with hasCpuLimit(req) before creating a CpuAllowedLimit"); } - // calculate when the time limit is reached, account for the time already spent - limitAtNs = - threadCpuTimer.getStartCpuTimeNs() - + TimeUnit.NANOSECONDS.convert(reqCpuLimit, TimeUnit.MILLISECONDS); + // calculate the time when the limit is reached, e.g. account for the time already spent + requestedTimeoutNs = TimeUnit.NANOSECONDS.convert(reqCpuLimit, TimeUnit.MILLISECONDS); } @VisibleForTesting CpuAllowedLimit(long limitMs) { this.threadCpuTimer = new ThreadCpuTimer(); - limitAtNs = - threadCpuTimer.getCurrentCpuTimeNs() - + TimeUnit.NANOSECONDS.convert(limitMs, TimeUnit.MILLISECONDS); + requestedTimeoutNs = TimeUnit.NANOSECONDS.convert(limitMs, TimeUnit.MILLISECONDS); } /** Return true if the current request has a parameter with a valid value of the limit. */ @@ -76,9 +78,9 @@ static boolean hasCpuLimit(SolrQueryRequest req) { return req.getParams().getLong(CommonParams.CPU_ALLOWED, -1L) > 0L; } - /** Return true if a max limit value is set and the current usage has exceeded the limit. */ + /** Return true if usage has exceeded the limit. */ @Override public boolean shouldExit() { - return limitAtNs - threadCpuTimer.getCurrentCpuTimeNs() < 0L; + return threadCpuTimer.getElapsedCpuNs() > requestedTimeoutNs; } } diff --git a/solr/core/src/java/org/apache/solr/util/ThreadCpuTimer.java b/solr/core/src/java/org/apache/solr/util/ThreadCpuTimer.java index 468df99382b..054a5310d14 100644 --- a/solr/core/src/java/org/apache/solr/util/ThreadCpuTimer.java +++ b/solr/core/src/java/org/apache/solr/util/ThreadCpuTimer.java @@ -21,6 +21,7 @@ import java.lang.management.ThreadMXBean; import java.util.Optional; import java.util.concurrent.TimeUnit; +import net.jcip.annotations.NotThreadSafe; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -29,8 +30,13 @@ * {@link java.lang.management.ThreadMXBean}. * *

Calling code should create an instance of this class when starting the operation, and then can - * get the {@link #getCpuTimeMs()} at any time thereafter. + * get the {@link #getElapsedCpuMs()} at any time thereafter. + * + *

This class is irrevocably not thread safe. Never allow instances of this class to be exposed + * to more than one thread. Acquiring an external lock will not be sufficient. This class can be + * considered "lock-hostile" due to its caching of timing information for a specific thread. */ +@NotThreadSafe public class ThreadCpuTimer { private static final long UNSUPPORTED = -1; public static final String CPU_TIME = "cpuTime"; @@ -56,15 +62,11 @@ public class ThreadCpuTimer { private final long startCpuTimeNanos; /** - * Create an instance to track the current thread's usage of CPU. The usage information can later - * be retrieved by any thread by calling {@link #getCpuTimeMs()}. + * Create an instance to track the current thread's usage of CPU. Usage information can later be + * retrieved by calling {@link #getElapsedCpuMs()}. Timing starts immediately upon construction. */ public ThreadCpuTimer() { - if (THREAD_MX_BEAN != null) { - this.startCpuTimeNanos = THREAD_MX_BEAN.getCurrentThreadCpuTime(); - } else { - this.startCpuTimeNanos = UNSUPPORTED; - } + this.startCpuTimeNanos = getThreadTotalCpuNs(); } public static boolean isSupported() { @@ -72,45 +74,47 @@ public static boolean isSupported() { } /** - * Return the initial value of CPU time for this thread when this instance was first created. - * NOTE: absolute value returned by this method has no meaning by itself, it should only be used - * when comparing elapsed time between this value and {@link #getCurrentCpuTimeNs()}. + * Return CPU time consumed by this thread since the construction of this timer object. * * @return current value, or {@link #UNSUPPORTED} if not supported. */ - public long getStartCpuTimeNs() { - return startCpuTimeNanos; + public long getElapsedCpuNs() { + return this.startCpuTimeNanos != UNSUPPORTED + ? getThreadTotalCpuNs() - this.startCpuTimeNanos + : UNSUPPORTED; } /** - * Return current value of CPU time for this thread. + * Get the cpu time for the current thread since {@link Thread#start()} without throwing an + * exception. * - * @return current value, or {@link #UNSUPPORTED} if not supported. + * @see ThreadMXBean#getCurrentThreadCpuTime() for important details + * @return the number of nanoseconds of cpu consumed by this thread since {@code Thread.start()}. */ - public long getCurrentCpuTimeNs() { + private long getThreadTotalCpuNs() { if (THREAD_MX_BEAN != null) { - return this.startCpuTimeNanos != UNSUPPORTED - ? THREAD_MX_BEAN.getCurrentThreadCpuTime() - this.startCpuTimeNanos - : UNSUPPORTED; + return THREAD_MX_BEAN.getCurrentThreadCpuTime(); } else { return UNSUPPORTED; } } /** - * Get the CPU usage information for the thread that created this {@link ThreadCpuTimer}. The - * information will track the thread's cpu since the creation of this {@link ThreadCpuTimer} - * instance, if the VM's cpu tracking is disabled, returned value will be {@link #UNSUPPORTED}. + * Get the CPU usage information for the current thread since it created this {@link + * ThreadCpuTimer}. The result is undefined if called by any other thread. + * + * @return the thread's cpu since the creation of this {@link ThreadCpuTimer} instance. If the + * VM's cpu tracking is disabled, returned value will be {@link #UNSUPPORTED}. */ - public Optional getCpuTimeMs() { - long cpuTimeNs = getCurrentCpuTimeNs(); + public Optional getElapsedCpuMs() { + long cpuTimeNs = getElapsedCpuNs(); return cpuTimeNs != UNSUPPORTED ? Optional.of(TimeUnit.MILLISECONDS.convert(cpuTimeNs, TimeUnit.NANOSECONDS)) - : Optional.of(UNSUPPORTED); + : Optional.empty(); } @Override public String toString() { - return getCpuTimeMs().toString(); + return getElapsedCpuMs().map(String::valueOf).orElse("UNSUPPORTED"); } } diff --git a/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java b/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java index 88c279a5d64..dac4ec5124c 100644 --- a/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java +++ b/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java @@ -208,4 +208,11 @@ public void testDistribLimit() throws Exception { // System.err.println("rsp=" + rsp.jsonStr()); assertNotNull("should have partial results", rsp.getHeader().get("partialResults")); } + + @Test + public void testDistribLimit2() throws Exception { + // This looks silly, but it actually guards against: + // https://issues.apache.org/jira/browse/SOLR-17203 + testDistribLimit(); + } }