Skip to content

Commit

Permalink
SOLR-15056: add circuit breaker for CPU, fix load circuit breaker (#96)
Browse files Browse the repository at this point in the history
Co-authored-by: Jan Høydahl <[email protected]>
  • Loading branch information
wrunderwood and janhoy authored Sep 20, 2023
1 parent c289744 commit 51c1a78
Show file tree
Hide file tree
Showing 8 changed files with 265 additions and 85 deletions.
4 changes: 4 additions & 0 deletions solr/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ New Features

* SOLR-16954: Make Circuit Breakers available for Update Requests (janhoy, Christine Poerschke, Pierre Salagnac)

* SOLR-15056: A new Circuit breaker for percentage of CPU utilization is added. The former "CPU" circuit breaker
is now more correctly named LoadAverageCircuitBreaker as it trips on system load average which is not a percentage.
Users of legacy CircuitBreakerManager are not affected by this change. (Walter Underwood, janhoy, Christine Poerschke, Atri Sharma)

* SOLR-15771: bin/auth creates reasonable roles and permissions for security: 'search', 'index', 'admin', and 'superadmin' and assigns user superadmin role. (Eric Pugh, janhoy)

* SOLR-15367: Convert "rid" functionality into a default Tracer (Alex Deparvu, David Smiley)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,56 +17,63 @@

package org.apache.solr.util.circuitbreaker;

import com.codahale.metrics.Gauge;
import com.codahale.metrics.Metric;
import java.lang.invoke.MethodHandles;
import java.lang.management.ManagementFactory;
import java.lang.management.OperatingSystemMXBean;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.metrics.SolrMetricManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Tracks current CPU usage and triggers if the specified threshold is breached.
*
* <p>This circuit breaker gets the average CPU load over the last minute and uses that data to take
* a decision. We depend on OperatingSystemMXBean which does not allow a configurable interval of
* collection of data. //TODO: Use Codahale Meter to calculate the value locally.
*
* <p>The configuration to define which mode to use and the trigger threshold are defined in
* solrconfig.xml
* <p>This circuit breaker gets the recent average CPU usage and uses that data to take a decision.
* We depend on OperatingSystemMXBean which does not allow a configurable interval of collection of
* data.
*/
public class CPUCircuitBreaker extends CircuitBreaker {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final OperatingSystemMXBean operatingSystemMXBean =
ManagementFactory.getOperatingSystemMXBean();

private boolean enabled = true;
private double cpuUsageThreshold;
private final SolrCore core;

private static final ThreadLocal<Double> seenCPUUsage = ThreadLocal.withInitial(() -> 0.0);

private static final ThreadLocal<Double> allowedCPUUsage = ThreadLocal.withInitial(() -> 0.0);

public CPUCircuitBreaker() {
public CPUCircuitBreaker(SolrCore core) {
super();
}

public void setThreshold(double threshold) {
this.cpuUsageThreshold = threshold;
this.core = core;
}

@Override
public boolean isTripped() {

double localAllowedCPUUsage = getCpuUsageThreshold();
public void init(NamedList<?> args) {
super.init(args);
double localSeenCPUUsage = calculateLiveCPUUsage();

if (localSeenCPUUsage < 0) {
if (log.isWarnEnabled()) {
String msg = "Unable to get CPU usage";

log.warn(msg);
String msg =
"Initialization failure for CPU circuit breaker. Unable to get 'systemCpuLoad', not supported by the JVM?";
if (log.isErrorEnabled()) {
log.error(msg);
}
enabled = false;
}
}

@Override
public boolean isTripped() {
if (!enabled) {
if (log.isDebugEnabled()) {
log.debug("CPU circuit breaker is disabled due to initialization failure.");
}
return false;
}
double localAllowedCPUUsage = getCpuUsageThreshold();
double localSeenCPUUsage = calculateLiveCPUUsage();

allowedCPUUsage.set(localAllowedCPUUsage);

Expand All @@ -84,11 +91,50 @@ public String getErrorMessage() {
+ allowedCPUUsage.get();
}

public void setThreshold(double thresholdValueInPercentage) {
if (thresholdValueInPercentage > 100) {
throw new IllegalArgumentException("Invalid Invalid threshold value.");
}

if (thresholdValueInPercentage <= 0) {
throw new IllegalStateException("Threshold cannot be less than or equal to zero");
}
cpuUsageThreshold = thresholdValueInPercentage;
}

public double getCpuUsageThreshold() {
return cpuUsageThreshold;
}

/**
* Calculate the CPU usage for the system in percentage.
*
* @return Percent CPU usage of -1 if value could not be obtained.
*/
protected double calculateLiveCPUUsage() {
return operatingSystemMXBean.getSystemLoadAverage();
// TODO: Use Codahale Meter to calculate the value
Metric metric =
this.core
.getCoreContainer()
.getMetricManager()
.registry("solr.jvm")
.getMetrics()
.get("os.systemCpuLoad");

if (metric == null) {
return -1.0;
}

if (metric instanceof Gauge) {
@SuppressWarnings({"rawtypes"})
Gauge gauge = (Gauge) metric;
// unwrap if needed
if (gauge instanceof SolrMetricManager.GaugeWrapper) {
gauge = ((SolrMetricManager.GaugeWrapper) gauge).getGauge();
}
return (Double) gauge.getValue() * 100;
}

return -1.0; // Unable to unpack metric
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
import org.slf4j.LoggerFactory;

/**
* Single CircuitBreaker that registers both a Memory and a CPU CircuitBreaker. This is only for
* backward compatibility with the 9.x versions prior to 9.4.
* Single CircuitBreaker that registers both a Memory and a LoadAverage CircuitBreaker. This is only
* for backward compatibility with the 9.x versions prior to 9.4.
*
* @deprecated Use individual Circuit Breakers instead
*/
Expand All @@ -36,7 +36,7 @@ public class CircuitBreakerManager extends CircuitBreaker {
private int memThreshold = 100;
private int cpuThreshold = 100;
private MemoryCircuitBreaker memCB;
private CPUCircuitBreaker cpuCB;
private LoadAverageCircuitBreaker cpuCB;

public CircuitBreakerManager() {
super();
Expand Down Expand Up @@ -71,7 +71,8 @@ public void init(NamedList<?> args) {
memCB.setThreshold(memThreshold);
}
if (cpuEnabled) {
cpuCB = new CPUCircuitBreaker();
// In SOLR-15056 CPUCircuitBreaker was renamed to LoadAverageCircuitBreaker, need back-compat
cpuCB = new LoadAverageCircuitBreaker();
cpuCB.setThreshold(cpuThreshold);
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.solr.util.circuitbreaker;

import java.lang.invoke.MethodHandles;
import java.lang.management.ManagementFactory;
import java.lang.management.OperatingSystemMXBean;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Tracks current system load average and triggers if the specified threshold is breached.
*
* <p>This circuit breaker gets the load average (length of the run queue) over the last minute and
* uses that data to take a decision. We depend on OperatingSystemMXBean which does not allow a
* configurable interval of collection of data.
*/
public class LoadAverageCircuitBreaker extends CircuitBreaker {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final OperatingSystemMXBean operatingSystemMXBean =
ManagementFactory.getOperatingSystemMXBean();

private double loadAverageThreshold;

// Assumption -- the value of these parameters will be set correctly before invoking
// getDebugInfo()
private static final ThreadLocal<Double> seenLoadAverage = ThreadLocal.withInitial(() -> 0.0);

private static final ThreadLocal<Double> allowedLoadAverage = ThreadLocal.withInitial(() -> 0.0);

public LoadAverageCircuitBreaker() {
super();
}

@Override
public boolean isTripped() {
double localAllowedLoadAverage = getLoadAverageThreshold();
double localSeenLoadAverage = calculateLiveLoadAverage();

if (localSeenLoadAverage < 0) {
if (log.isWarnEnabled()) {
String msg = "Unable to get load average";

log.warn(msg);
}

return false;
}

allowedLoadAverage.set(localAllowedLoadAverage);

seenLoadAverage.set(localSeenLoadAverage);

return (localSeenLoadAverage >= localAllowedLoadAverage);
}

@Override
public String getErrorMessage() {
return "Load Average Circuit Breaker triggered as seen load average is above allowed threshold."
+ "Seen load average "
+ seenLoadAverage.get()
+ " and allocated threshold "
+ allowedLoadAverage.get();
}

public void setThreshold(double thresholdValueUnbounded) {
if (thresholdValueUnbounded <= 0) {
throw new IllegalStateException("Threshold cannot be less than or equal to zero");
}
loadAverageThreshold = thresholdValueUnbounded;
}

public double getLoadAverageThreshold() {
return loadAverageThreshold;
}

protected double calculateLiveLoadAverage() {
return operatingSystemMXBean.getSystemLoadAverage();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@
<double name="threshold">75</double>
</circuitBreaker>

<circuitBreaker class="solr.LoadAverageCircuitBreaker">
<double name="threshold">3</double>
</circuitBreaker>

<initParams path="/select">
<lst name="defaults">
<str name="df">text</str>
Expand Down
Loading

0 comments on commit 51c1a78

Please sign in to comment.