Skip to content

Commit

Permalink
SOLR-17203 fix calculation for CpuAllowedLimit (#2347)
Browse files Browse the repository at this point in the history
  • Loading branch information
gus-asf authored Mar 18, 2024
1 parent a4fbad5 commit f67b718
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -242,15 +242,15 @@ public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
rsp.setHttpCaching(false);
}
} catch (Exception e) {
e = normalizeReceivedException(req, e);
processErrorMetricsOnException(e, metrics);
rsp.setException(e);
Exception normalized = normalizeReceivedException(req, e);
processErrorMetricsOnException(normalized, metrics);
rsp.setException(normalized);
} finally {
long elapsed = timer.stop();
metrics.totalTime.inc(elapsed);

if (publishCpuTime) {
Optional<Long> cpuTime = threadCpuTimer.getCpuTimeMs();
if (publishCpuTime && threadCpuTimer != null) {
Optional<Long> cpuTime = threadCpuTimer.getElapsedCpuMs();
if (cpuTime.isPresent()) {
// add CPU_TIME if not already added by SearchHandler
NamedList<Object> header = rsp.getResponseHeader();
Expand Down
32 changes: 17 additions & 15 deletions solr/core/src/java/org/apache/solr/search/CpuAllowedLimit.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,29 @@
package org.apache.solr.search;

import com.google.common.annotations.VisibleForTesting;
import java.lang.invoke.MethodHandles;
import java.util.concurrent.TimeUnit;
import net.jcip.annotations.NotThreadSafe;
import org.apache.lucene.index.QueryTimeout;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.util.ThreadCpuTimer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Enforces a CPU-time based timeout on a given SolrQueryRequest, as specified by the {@code
* cpuAllowed} query parameter.
*
* <p>Since this class uses {@link ThreadCpuTimer} it is irrevocably lock-hostile and can never be
* exposed to multiple threads, even if guarded by synchronization. Normally this is attached to
* objects ultimately held by a ThreadLocal in {@link SolrRequestInfo} to provide safe usage on the
* assumption that such objects are not shared to other threads.
*
* @see ThreadCpuTimer
*/
@NotThreadSafe
public class CpuAllowedLimit implements QueryTimeout {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

private final long limitAtNs;
private final ThreadCpuTimer threadCpuTimer;
private final long requestedTimeoutNs;

/**
* Create an object to represent a CPU time limit for the current request. NOTE: this
Expand All @@ -49,6 +53,8 @@ public CpuAllowedLimit(SolrQueryRequest req) {
throw new IllegalArgumentException("Thread CPU time monitoring is not available.");
}
SolrRequestInfo solrRequestInfo = SolrRequestInfo.getRequestInfo();
// get existing timer if available to ensure sub-queries can't reset/exceed the intended time
// constraint.
threadCpuTimer =
solrRequestInfo != null ? solrRequestInfo.getThreadCpuTimer() : new ThreadCpuTimer();
long reqCpuLimit = req.getParams().getLong(CommonParams.CPU_ALLOWED, -1L);
Expand All @@ -57,28 +63,24 @@ public CpuAllowedLimit(SolrQueryRequest req) {
throw new IllegalArgumentException(
"Check for limit with hasCpuLimit(req) before creating a CpuAllowedLimit");
}
// calculate when the time limit is reached, account for the time already spent
limitAtNs =
threadCpuTimer.getStartCpuTimeNs()
+ TimeUnit.NANOSECONDS.convert(reqCpuLimit, TimeUnit.MILLISECONDS);
// calculate the time when the limit is reached, e.g. account for the time already spent
requestedTimeoutNs = TimeUnit.NANOSECONDS.convert(reqCpuLimit, TimeUnit.MILLISECONDS);
}

@VisibleForTesting
CpuAllowedLimit(long limitMs) {
this.threadCpuTimer = new ThreadCpuTimer();
limitAtNs =
threadCpuTimer.getCurrentCpuTimeNs()
+ TimeUnit.NANOSECONDS.convert(limitMs, TimeUnit.MILLISECONDS);
requestedTimeoutNs = TimeUnit.NANOSECONDS.convert(limitMs, TimeUnit.MILLISECONDS);
}

/** Return true if the current request has a parameter with a valid value of the limit. */
static boolean hasCpuLimit(SolrQueryRequest req) {
return req.getParams().getLong(CommonParams.CPU_ALLOWED, -1L) > 0L;
}

/** Return true if a max limit value is set and the current usage has exceeded the limit. */
/** Return true if usage has exceeded the limit. */
@Override
public boolean shouldExit() {
return limitAtNs - threadCpuTimer.getCurrentCpuTimeNs() < 0L;
return threadCpuTimer.getElapsedCpuNs() > requestedTimeoutNs;
}
}
56 changes: 30 additions & 26 deletions solr/core/src/java/org/apache/solr/util/ThreadCpuTimer.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.lang.management.ThreadMXBean;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
import net.jcip.annotations.NotThreadSafe;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -29,8 +30,13 @@
* {@link java.lang.management.ThreadMXBean}.
*
* <p>Calling code should create an instance of this class when starting the operation, and then can
* get the {@link #getCpuTimeMs()} at any time thereafter.
* get the {@link #getElapsedCpuMs()} at any time thereafter.
*
* <p>This class is irrevocably not thread safe. Never allow instances of this class to be exposed
* to more than one thread. Acquiring an external lock will not be sufficient. This class can be
* considered "lock-hostile" due to its caching of timing information for a specific thread.
*/
@NotThreadSafe
public class ThreadCpuTimer {
private static final long UNSUPPORTED = -1;
public static final String CPU_TIME = "cpuTime";
Expand All @@ -56,61 +62,59 @@ public class ThreadCpuTimer {
private final long startCpuTimeNanos;

/**
* Create an instance to track the current thread's usage of CPU. The usage information can later
* be retrieved by any thread by calling {@link #getCpuTimeMs()}.
* Create an instance to track the current thread's usage of CPU. Usage information can later be
* retrieved by calling {@link #getElapsedCpuMs()}. Timing starts immediately upon construction.
*/
public ThreadCpuTimer() {
if (THREAD_MX_BEAN != null) {
this.startCpuTimeNanos = THREAD_MX_BEAN.getCurrentThreadCpuTime();
} else {
this.startCpuTimeNanos = UNSUPPORTED;
}
this.startCpuTimeNanos = getThreadTotalCpuNs();
}

public static boolean isSupported() {
return THREAD_MX_BEAN != null;
}

/**
* Return the initial value of CPU time for this thread when this instance was first created.
* NOTE: absolute value returned by this method has no meaning by itself, it should only be used
* when comparing elapsed time between this value and {@link #getCurrentCpuTimeNs()}.
* Return CPU time consumed by this thread since the construction of this timer object.
*
* @return current value, or {@link #UNSUPPORTED} if not supported.
*/
public long getStartCpuTimeNs() {
return startCpuTimeNanos;
public long getElapsedCpuNs() {
return this.startCpuTimeNanos != UNSUPPORTED
? getThreadTotalCpuNs() - this.startCpuTimeNanos
: UNSUPPORTED;
}

/**
* Return current value of CPU time for this thread.
* Get the cpu time for the current thread since {@link Thread#start()} without throwing an
* exception.
*
* @return current value, or {@link #UNSUPPORTED} if not supported.
* @see ThreadMXBean#getCurrentThreadCpuTime() for important details
* @return the number of nanoseconds of cpu consumed by this thread since {@code Thread.start()}.
*/
public long getCurrentCpuTimeNs() {
private long getThreadTotalCpuNs() {
if (THREAD_MX_BEAN != null) {
return this.startCpuTimeNanos != UNSUPPORTED
? THREAD_MX_BEAN.getCurrentThreadCpuTime() - this.startCpuTimeNanos
: UNSUPPORTED;
return THREAD_MX_BEAN.getCurrentThreadCpuTime();
} else {
return UNSUPPORTED;
}
}

/**
* Get the CPU usage information for the thread that created this {@link ThreadCpuTimer}. The
* information will track the thread's cpu since the creation of this {@link ThreadCpuTimer}
* instance, if the VM's cpu tracking is disabled, returned value will be {@link #UNSUPPORTED}.
* Get the CPU usage information for the current thread since it created this {@link
* ThreadCpuTimer}. The result is undefined if called by any other thread.
*
* @return the thread's cpu since the creation of this {@link ThreadCpuTimer} instance. If the
* VM's cpu tracking is disabled, returned value will be {@link #UNSUPPORTED}.
*/
public Optional<Long> getCpuTimeMs() {
long cpuTimeNs = getCurrentCpuTimeNs();
public Optional<Long> getElapsedCpuMs() {
long cpuTimeNs = getElapsedCpuNs();
return cpuTimeNs != UNSUPPORTED
? Optional.of(TimeUnit.MILLISECONDS.convert(cpuTimeNs, TimeUnit.NANOSECONDS))
: Optional.of(UNSUPPORTED);
: Optional.empty();
}

@Override
public String toString() {
return getCpuTimeMs().toString();
return getElapsedCpuMs().map(String::valueOf).orElse("UNSUPPORTED");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -208,4 +208,11 @@ public void testDistribLimit() throws Exception {
// System.err.println("rsp=" + rsp.jsonStr());
assertNotNull("should have partial results", rsp.getHeader().get("partialResults"));
}

@Test
public void testDistribLimit2() throws Exception {
// This looks silly, but it actually guards against:
// https://issues.apache.org/jira/browse/SOLR-17203
testDistribLimit();
}
}

0 comments on commit f67b718

Please sign in to comment.