From abbc695ad4f1a9956a634e06d7794b34df3e1683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B8ydahl?= Date: Wed, 6 Sep 2023 15:35:53 +0200 Subject: [PATCH] SOLR-16954 Make Circuit Breakers available for Update Requests (#1871) Co-authored-by: Christine Poerschke --- solr/CHANGES.txt | 2 + .../handler/ContentStreamHandlerBase.java | 35 ++++++++++++ .../solr/handler/component/SearchHandler.java | 13 ++--- .../util/circuitbreaker/CircuitBreaker.java | 54 +++++++++++++++++++ .../CircuitBreakerRegistry.java | 44 ++++++++------- .../solrconfig-pluggable-circuitbreaker.xml | 12 +++++ .../solr/util/BaseTestCircuitBreaker.java | 47 +++++++++++++--- .../pages/circuit-breakers.adoc | 31 +++++++++-- 8 files changed, 198 insertions(+), 40 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 13cf7644f01..5013377536d 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -72,6 +72,8 @@ New Features --------------------- * SOLR-16654: Add support for node-level caches (Michael Gibney) +* SOLR-16954: Make Circuit Breakers available for Update Requests (janhoy, Christine Poerschke, Pierre Salagnac) + Improvements --------------------- * SOLR-16490: `/admin/cores?action=backupcore` now has a v2 equivalent, available at diff --git a/solr/core/src/java/org/apache/solr/handler/ContentStreamHandlerBase.java b/solr/core/src/java/org/apache/solr/handler/ContentStreamHandlerBase.java index f00c68b39e4..fb40af3c081 100644 --- a/solr/core/src/java/org/apache/solr/handler/ContentStreamHandlerBase.java +++ b/solr/core/src/java/org/apache/solr/handler/ContentStreamHandlerBase.java @@ -16,6 +16,11 @@ */ package org.apache.solr.handler; +import static org.apache.solr.common.params.CommonParams.FAILURE; +import static org.apache.solr.common.params.CommonParams.STATUS; + +import java.util.List; +import org.apache.solr.client.solrj.SolrRequest.SolrRequestType; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.ContentStream; @@ -26,6 +31,8 @@ import org.apache.solr.update.SolrCoreState; import org.apache.solr.update.processor.UpdateRequestProcessor; import org.apache.solr.update.processor.UpdateRequestProcessorChain; +import org.apache.solr.util.circuitbreaker.CircuitBreaker; +import org.apache.solr.util.circuitbreaker.CircuitBreakerRegistry; /** * Shares common code between various handlers that manipulate {@link @@ -49,6 +56,10 @@ public void init(NamedList args) { @Override public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { + if (checkCircuitBreakers(req, rsp)) { + return; // Circuit breaker tripped, return immediately + } + /* We track update requests so that we can preserve consistency by waiting for them to complete on a node shutdown and then immediately trigger a leader election without waiting for the core to close. @@ -101,6 +112,30 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw } } + /** + * Check if {@link SolrRequestType#UPDATE} circuit breakers are tripped. Override this method in + * sub classes that do not want to check circuit breakers. + * + * @return true if circuit breakers are tripped, false otherwise. + */ + protected boolean checkCircuitBreakers(SolrQueryRequest req, SolrQueryResponse rsp) { + CircuitBreakerRegistry circuitBreakerRegistry = req.getCore().getCircuitBreakerRegistry(); + if (circuitBreakerRegistry.isEnabled(SolrRequestType.UPDATE)) { + List trippedCircuitBreakers = + circuitBreakerRegistry.checkTripped(SolrRequestType.UPDATE); + if (trippedCircuitBreakers != null) { + String errorMessage = CircuitBreakerRegistry.toErrorMessage(trippedCircuitBreakers); + rsp.add(STATUS, FAILURE); + rsp.setException( + new SolrException( + CircuitBreaker.getErrorCode(trippedCircuitBreakers), + "Circuit Breakers tripped " + errorMessage)); + return true; + } + } + return false; + } + protected abstract ContentStreamLoader newLoader( SolrQueryRequest req, UpdateRequestProcessor processor); } diff --git a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java index 0d5b6c27c00..10f99ac49f8 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java @@ -35,6 +35,7 @@ import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.index.ExitableDirectoryReader; import org.apache.lucene.search.TotalHits; +import org.apache.solr.client.solrj.SolrRequest.SolrRequestType; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.cloud.ZkController; import org.apache.solr.common.SolrDocumentList; @@ -330,8 +331,8 @@ protected ResponseBuilder newResponseBuilder( } /** - * Check if circuit breakers are tripped. Override this method in sub classes that do not want to - * check circuit breakers. + * Check if {@link SolrRequestType#QUERY} circuit breakers are tripped. Override this method in + * sub classes that do not want to check circuit breakers. * * @return true if circuit breakers are tripped, false otherwise. */ @@ -340,18 +341,18 @@ protected boolean checkCircuitBreakers( final RTimerTree timer = rb.isDebug() ? req.getRequestTimer() : null; final CircuitBreakerRegistry circuitBreakerRegistry = req.getCore().getCircuitBreakerRegistry(); - if (circuitBreakerRegistry.isEnabled()) { + if (circuitBreakerRegistry.isEnabled(SolrRequestType.QUERY)) { List trippedCircuitBreakers; if (timer != null) { RTimerTree subt = timer.sub("circuitbreaker"); rb.setTimer(subt); - trippedCircuitBreakers = circuitBreakerRegistry.checkTripped(); + trippedCircuitBreakers = circuitBreakerRegistry.checkTripped(SolrRequestType.QUERY); rb.getTimer().stop(); } else { - trippedCircuitBreakers = circuitBreakerRegistry.checkTripped(); + trippedCircuitBreakers = circuitBreakerRegistry.checkTripped(SolrRequestType.QUERY); } if (trippedCircuitBreakers != null) { @@ -359,7 +360,7 @@ protected boolean checkCircuitBreakers( rsp.add(STATUS, FAILURE); rsp.setException( new SolrException( - SolrException.ErrorCode.SERVICE_UNAVAILABLE, + CircuitBreaker.getErrorCode(trippedCircuitBreakers), "Circuit Breakers tripped " + errorMessage)); return true; } diff --git a/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreaker.java b/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreaker.java index 8959c35e7a8..3082bf82e4a 100644 --- a/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreaker.java +++ b/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreaker.java @@ -17,6 +17,12 @@ package org.apache.solr.util.circuitbreaker; +import java.util.List; +import java.util.Locale; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.solr.client.solrj.SolrRequest.SolrRequestType; +import org.apache.solr.common.SolrException; import org.apache.solr.common.util.NamedList; import org.apache.solr.util.SolrPluginUtils; import org.apache.solr.util.plugin.NamedListInitializedPlugin; @@ -36,6 +42,10 @@ * @lucene.experimental */ public abstract class CircuitBreaker implements NamedListInitializedPlugin { + // Only query requests are checked by default + private Set requestTypes = Set.of(SolrRequestType.QUERY); + private final List SUPPORTED_TYPES = + List.of(SolrRequestType.QUERY, SolrRequestType.UPDATE); @Override public void init(NamedList args) { @@ -52,4 +62,48 @@ public CircuitBreaker() {} /** Get error message when the circuit breaker triggers */ public abstract String getErrorMessage(); + + /** + * Set the request types for which this circuit breaker should be checked. If not called, the + * circuit breaker will be checked for the {@link SolrRequestType#QUERY} request type only. + * + * @param requestTypes list of strings representing request types + * @throws IllegalArgumentException if the request type is not valid + */ + public void setRequestTypes(List requestTypes) { + this.requestTypes = + requestTypes.stream() + .map(t -> SolrRequestType.valueOf(t.toUpperCase(Locale.ROOT))) + .peek( + t -> { + if (!SUPPORTED_TYPES.contains(t)) { + throw new IllegalArgumentException( + String.format( + Locale.ROOT, + "Request type %s is not supported for circuit breakers", + t.name())); + } + }) + .collect(Collectors.toSet()); + } + + public Set getRequestTypes() { + return requestTypes; + } + + /** + * Return the proper error code to use in exception. For legacy use of {@link CircuitBreaker} we + * return 503 for backward compatibility, else return 429. + * + * @deprecated Remove in 10.0 + */ + @Deprecated(since = "9.4") + public static SolrException.ErrorCode getErrorCode(List trippedCircuitBreakers) { + if (trippedCircuitBreakers != null + && trippedCircuitBreakers.stream().anyMatch(cb -> cb instanceof CircuitBreakerManager)) { + return SolrException.ErrorCode.SERVICE_UNAVAILABLE; + } else { + return SolrException.ErrorCode.TOO_MANY_REQUESTS; + } + } } diff --git a/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerRegistry.java b/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerRegistry.java index 0cb3fb0a378..84c2f61fb9b 100644 --- a/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerRegistry.java +++ b/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerRegistry.java @@ -19,7 +19,11 @@ import com.google.common.annotations.VisibleForTesting; import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import org.apache.solr.client.solrj.SolrRequest.SolrRequestType; /** * Keeps track of all registered circuit breaker instances for various request types. Responsible @@ -30,27 +34,37 @@ */ public class CircuitBreakerRegistry { - private final List circuitBreakerList = new ArrayList<>(); + private final Map> circuitBreakerMap = new HashMap<>(); public CircuitBreakerRegistry() {} public void register(CircuitBreaker circuitBreaker) { - circuitBreakerList.add(circuitBreaker); + circuitBreaker + .getRequestTypes() + .forEach( + r -> { + List list = + circuitBreakerMap.computeIfAbsent(r, k -> new ArrayList<>()); + list.add(circuitBreaker); + }); } @VisibleForTesting public void deregisterAll() { - circuitBreakerList.clear(); + circuitBreakerMap.clear(); } + /** * Check and return circuit breakers that have triggered * + * @param requestType {@link SolrRequestType} to check for. * @return CircuitBreakers which have triggered, null otherwise. */ - public List checkTripped() { + public List checkTripped(SolrRequestType requestType) { List triggeredCircuitBreakers = null; - for (CircuitBreaker circuitBreaker : circuitBreakerList) { + for (CircuitBreaker circuitBreaker : + circuitBreakerMap.getOrDefault(requestType, Collections.emptyList())) { if (circuitBreaker.isTripped()) { if (triggeredCircuitBreakers == null) { triggeredCircuitBreakers = new ArrayList<>(); @@ -63,22 +77,6 @@ public List checkTripped() { return triggeredCircuitBreakers; } - /** - * Returns true if *any* circuit breaker has triggered, false if none have triggered. - * - *

NOTE: This method short circuits the checking of circuit breakers -- the method will return - * as soon as it finds a circuit breaker that has triggered. - */ - public boolean checkAnyTripped() { - for (CircuitBreaker circuitBreaker : circuitBreakerList) { - if (circuitBreaker.isTripped()) { - return true; - } - } - - return false; - } - /** * Construct the final error message to be printed when circuit breakers trip. * @@ -96,7 +94,7 @@ public static String toErrorMessage(List circuitBreakerList) { return sb.toString(); } - public boolean isEnabled() { - return !circuitBreakerList.isEmpty(); + public boolean isEnabled(SolrRequestType requestType) { + return circuitBreakerMap.containsKey(requestType); } } diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-pluggable-circuitbreaker.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-pluggable-circuitbreaker.xml index 660e7952e70..ad0efe5a444 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-pluggable-circuitbreaker.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-pluggable-circuitbreaker.xml @@ -80,6 +80,18 @@ 75 + + update + + + + + 80 + diff --git a/solr/core/src/test/org/apache/solr/util/BaseTestCircuitBreaker.java b/solr/core/src/test/org/apache/solr/util/BaseTestCircuitBreaker.java index 372ddf80810..025db34ed76 100644 --- a/solr/core/src/test/org/apache/solr/util/BaseTestCircuitBreaker.java +++ b/solr/core/src/test/org/apache/solr/util/BaseTestCircuitBreaker.java @@ -32,6 +32,7 @@ import org.apache.solr.common.util.SolrNamedThreadFactory; import org.apache.solr.util.circuitbreaker.CPUCircuitBreaker; import org.apache.solr.util.circuitbreaker.CircuitBreaker; +import org.apache.solr.util.circuitbreaker.CircuitBreakerManager; import org.apache.solr.util.circuitbreaker.MemoryCircuitBreaker; import org.hamcrest.MatcherAssert; import org.junit.After; @@ -79,21 +80,42 @@ public void testCBAlwaysTrips() { }); } - public void testCBFakeMemoryPressure() { + public void testCBFakeMemoryPressure() throws Exception { removeAllExistingCircuitBreakers(); - CircuitBreaker circuitBreaker = new FakeMemoryPressureCircuitBreaker(); - MemoryCircuitBreaker memoryCircuitBreaker = (MemoryCircuitBreaker) circuitBreaker; + // Update and query will not trip + h.update( + "1john smith"); + h.query(req("name:\"john smith\"")); - memoryCircuitBreaker.setThreshold(75); + MemoryCircuitBreaker searchBreaker = new FakeMemoryPressureCircuitBreaker(); + searchBreaker.setThreshold(80); + // Default request type is "query" + // searchBreaker.setRequestTypes(List.of("query")); + h.getCore().getCircuitBreakerRegistry().register(searchBreaker); - h.getCore().getCircuitBreakerRegistry().register(circuitBreaker); + // Query will trip, but not update due to defaults + expectThrows(SolrException.class, () -> h.query(req("name:\"john smith\""))); + h.update( + "2john smith"); + + MemoryCircuitBreaker updateBreaker = new FakeMemoryPressureCircuitBreaker(); + updateBreaker.setThreshold(75); + updateBreaker.setRequestTypes(List.of("update")); + h.getCore().getCircuitBreakerRegistry().register(updateBreaker); + // Now also update will trip expectThrows( SolrException.class, - () -> { - h.query(req("name:\"john smith\"")); - }); + () -> + h.update( + "1john smith")); + } + + public void testBadRequestType() { + expectThrows( + IllegalArgumentException.class, + () -> new MemoryCircuitBreaker().setRequestTypes(List.of("badRequestType"))); } public void testBuildingMemoryPressure() { @@ -238,6 +260,15 @@ public void testResponseWithCBTiming() { "//lst[@name='process']/double[@name='time']"); } + public void testErrorCode() { + assertEquals( + SolrException.ErrorCode.SERVICE_UNAVAILABLE, + CircuitBreaker.getErrorCode(List.of(new CircuitBreakerManager()))); + assertEquals( + SolrException.ErrorCode.TOO_MANY_REQUESTS, + CircuitBreaker.getErrorCode(List.of(new MemoryCircuitBreaker()))); + } + private void removeAllExistingCircuitBreakers() { h.getCore().getCircuitBreakerRegistry().deregisterAll(); } diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/circuit-breakers.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/circuit-breakers.adoc index a96658e61bf..b517a2dd195 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/circuit-breakers.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/circuit-breakers.adoc @@ -22,18 +22,19 @@ resource configuration. == When To Use Circuit Breakers Circuit breakers should be used when the user wishes to trade request throughput for a higher Solr stability. -If circuit breakers are enabled, requests may be rejected under the condition of high node duress with an appropriate HTTP error code (typically 503). +If circuit breakers are enabled, requests may be rejected under the condition of high node duress with HTTP error code 429 'Too Many Requests'. It is up to the client to handle this error and potentially build a retrial logic as this should ideally be a transient situation. == Circuit Breaker Configurations All circuit breaker configurations are listed as independent `` entries in `solrconfig.xml` as shown below. +A circuit breaker can register itself to trip for query requests and/or update requests. By default only search requests are affected. A user may register multiple circuit breakers of the same type with different thresholds for each request type. == Currently Supported Circuit Breakers === JVM Heap Usage -This circuit breaker tracks JVM heap memory usage and rejects incoming search requests with a 503 error code if the heap usage exceeds a configured percentage of maximum heap allocated to the JVM (-Xmx). +This circuit breaker tracks JVM heap memory usage and rejects incoming requests with a 429 error code if the heap usage exceeds a configured percentage of maximum heap allocated to the JVM (-Xmx). The main configuration for this circuit breaker is controlling the threshold percentage at which the breaker will trip. To enable and configure the JVM heap usage based circuit breaker, add the following: @@ -72,8 +73,32 @@ To enable and configure the CPU utilization based circuit breaker: The `threshold` is defined in units of CPU utilization. +== Advanced example + +In this example we will prevent update requests above 80% CPU load, and prevent query requests above 95% CPU load. Supported request types are `query` and `update`. +This would prevent expensive bulk updates from impacting search. Note also the support for short-form class name. + +[source,xml] +---- + + + 80 + + update + + + + + 95 + + query + + + +---- + == Performance Considerations -It is worth noting that while JVM or CPU circuit breakers do not add any noticeable overhead per query, having too many circuit breakers checked for a single request can cause a performance overhead. +It is worth noting that while JVM or CPU circuit breakers do not add any noticeable overhead per request, having too many circuit breakers checked for a single request can cause a performance overhead. In addition, it is a good practice to exponentially back off while retrying requests on a busy node.