Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SOLR-15056: add circuit breaker for CPU, fix load circuit breaker #96

Merged
merged 4 commits into from
Sep 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions solr/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ New Features

* SOLR-16954: Make Circuit Breakers available for Update Requests (janhoy, Christine Poerschke, Pierre Salagnac)

* SOLR-15056: A new Circuit breaker for percentage of CPU utilization is added. The former "CPU" circuit breaker
is now more correctly named LoadAverageCircuitBreaker as it trips on system load average which is not a percentage.
Users of legacy CircuitBreakerManager are not affected by this change. (Walter Underwood, janhoy, Christine Poerschke, Atri Sharma)

* SOLR-15771: bin/auth creates reasonable roles and permissions for security: 'search', 'index', 'admin', and 'superadmin' and assigns user superadmin role. (Eric Pugh, janhoy)

* SOLR-15367: Convert "rid" functionality into a default Tracer (Alex Deparvu, David Smiley)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,56 +17,63 @@

package org.apache.solr.util.circuitbreaker;

import com.codahale.metrics.Gauge;
import com.codahale.metrics.Metric;
import java.lang.invoke.MethodHandles;
import java.lang.management.ManagementFactory;
import java.lang.management.OperatingSystemMXBean;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.metrics.SolrMetricManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Tracks current CPU usage and triggers if the specified threshold is breached.
*
* <p>This circuit breaker gets the average CPU load over the last minute and uses that data to take
* a decision. We depend on OperatingSystemMXBean which does not allow a configurable interval of
* collection of data. //TODO: Use Codahale Meter to calculate the value locally.
*
* <p>The configuration to define which mode to use and the trigger threshold are defined in
* solrconfig.xml
* <p>This circuit breaker gets the recent average CPU usage and uses that data to take a decision.
* We depend on OperatingSystemMXBean which does not allow a configurable interval of collection of
wrunderwood marked this conversation as resolved.
Show resolved Hide resolved
* data.
*/
public class CPUCircuitBreaker extends CircuitBreaker {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final OperatingSystemMXBean operatingSystemMXBean =
ManagementFactory.getOperatingSystemMXBean();

private boolean enabled = true;
private double cpuUsageThreshold;
private final SolrCore core;

private static final ThreadLocal<Double> seenCPUUsage = ThreadLocal.withInitial(() -> 0.0);

private static final ThreadLocal<Double> allowedCPUUsage = ThreadLocal.withInitial(() -> 0.0);

public CPUCircuitBreaker() {
public CPUCircuitBreaker(SolrCore core) {
super();
}

public void setThreshold(double threshold) {
this.cpuUsageThreshold = threshold;
this.core = core;
}

@Override
public boolean isTripped() {

double localAllowedCPUUsage = getCpuUsageThreshold();
public void init(NamedList<?> args) {
super.init(args);
double localSeenCPUUsage = calculateLiveCPUUsage();

if (localSeenCPUUsage < 0) {
if (log.isWarnEnabled()) {
String msg = "Unable to get CPU usage";

log.warn(msg);
String msg =
"Initialization failure for CPU circuit breaker. Unable to get 'systemCpuLoad', not supported by the JVM?";
if (log.isErrorEnabled()) {
log.error(msg);
}
enabled = false;
}
}

@Override
public boolean isTripped() {
if (!enabled) {
if (log.isDebugEnabled()) {
log.debug("CPU circuit breaker is disabled due to initialization failure.");
}
return false;
}
double localAllowedCPUUsage = getCpuUsageThreshold();
double localSeenCPUUsage = calculateLiveCPUUsage();

allowedCPUUsage.set(localAllowedCPUUsage);

Expand All @@ -84,11 +91,50 @@ public String getErrorMessage() {
+ allowedCPUUsage.get();
}

public void setThreshold(double thresholdValueInPercentage) {
if (thresholdValueInPercentage > 100) {
throw new IllegalArgumentException("Invalid Invalid threshold value.");
}

if (thresholdValueInPercentage <= 0) {
throw new IllegalStateException("Threshold cannot be less than or equal to zero");
}
cpuUsageThreshold = thresholdValueInPercentage;
}

public double getCpuUsageThreshold() {
return cpuUsageThreshold;
}

/**
* Calculate the CPU usage for the system in percentage.
*
* @return Percent CPU usage of -1 if value could not be obtained.
*/
protected double calculateLiveCPUUsage() {
wrunderwood marked this conversation as resolved.
Show resolved Hide resolved
return operatingSystemMXBean.getSystemLoadAverage();
// TODO: Use Codahale Meter to calculate the value
Metric metric =
this.core
.getCoreContainer()
.getMetricManager()
.registry("solr.jvm")
.getMetrics()
.get("os.systemCpuLoad");

if (metric == null) {
return -1.0;
}

if (metric instanceof Gauge) {
@SuppressWarnings({"rawtypes"})
Gauge gauge = (Gauge) metric;
// unwrap if needed
if (gauge instanceof SolrMetricManager.GaugeWrapper) {
gauge = ((SolrMetricManager.GaugeWrapper) gauge).getGauge();
}
return (Double) gauge.getValue() * 100;
}

return -1.0; // Unable to unpack metric
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
import org.slf4j.LoggerFactory;

/**
* Single CircuitBreaker that registers both a Memory and a CPU CircuitBreaker. This is only for
* backward compatibility with the 9.x versions prior to 9.4.
* Single CircuitBreaker that registers both a Memory and a LoadAverage CircuitBreaker. This is only
* for backward compatibility with the 9.x versions prior to 9.4.
*
* @deprecated Use individual Circuit Breakers instead
*/
Expand All @@ -36,7 +36,7 @@ public class CircuitBreakerManager extends CircuitBreaker {
private int memThreshold = 100;
private int cpuThreshold = 100;
private MemoryCircuitBreaker memCB;
private CPUCircuitBreaker cpuCB;
private LoadAverageCircuitBreaker cpuCB;

public CircuitBreakerManager() {
super();
Expand Down Expand Up @@ -71,7 +71,8 @@ public void init(NamedList<?> args) {
memCB.setThreshold(memThreshold);
}
if (cpuEnabled) {
cpuCB = new CPUCircuitBreaker();
// In SOLR-15056 CPUCircuitBreaker was renamed to LoadAverageCircuitBreaker, need back-compat
cpuCB = new LoadAverageCircuitBreaker();
janhoy marked this conversation as resolved.
Show resolved Hide resolved
cpuCB.setThreshold(cpuThreshold);
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.solr.util.circuitbreaker;

import java.lang.invoke.MethodHandles;
import java.lang.management.ManagementFactory;
import java.lang.management.OperatingSystemMXBean;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Tracks current system load average and triggers if the specified threshold is breached.
*
* <p>This circuit breaker gets the load average (length of the run queue) over the last minute and
* uses that data to take a decision. We depend on OperatingSystemMXBean which does not allow a
* configurable interval of collection of data.
*/
public class LoadAverageCircuitBreaker extends CircuitBreaker {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final OperatingSystemMXBean operatingSystemMXBean =
ManagementFactory.getOperatingSystemMXBean();

private double loadAverageThreshold;

// Assumption -- the value of these parameters will be set correctly before invoking
// getDebugInfo()
private static final ThreadLocal<Double> seenLoadAverage = ThreadLocal.withInitial(() -> 0.0);

private static final ThreadLocal<Double> allowedLoadAverage = ThreadLocal.withInitial(() -> 0.0);

public LoadAverageCircuitBreaker() {
super();
}

@Override
public boolean isTripped() {
double localAllowedLoadAverage = getLoadAverageThreshold();
double localSeenLoadAverage = calculateLiveLoadAverage();

if (localSeenLoadAverage < 0) {
if (log.isWarnEnabled()) {
janhoy marked this conversation as resolved.
Show resolved Hide resolved
String msg = "Unable to get load average";

log.warn(msg);
}

return false;
}

allowedLoadAverage.set(localAllowedLoadAverage);

seenLoadAverage.set(localSeenLoadAverage);

return (localSeenLoadAverage >= localAllowedLoadAverage);
}

@Override
public String getErrorMessage() {
return "Load Average Circuit Breaker triggered as seen load average is above allowed threshold."
+ "Seen load average "
+ seenLoadAverage.get()
+ " and allocated threshold "
+ allowedLoadAverage.get();
}

public void setThreshold(double thresholdValueUnbounded) {
if (thresholdValueUnbounded <= 0) {
throw new IllegalStateException("Threshold cannot be less than or equal to zero");
}
loadAverageThreshold = thresholdValueUnbounded;
}

public double getLoadAverageThreshold() {
return loadAverageThreshold;
}

protected double calculateLiveLoadAverage() {
return operatingSystemMXBean.getSystemLoadAverage();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@
<double name="threshold">75</double>
</circuitBreaker>

<circuitBreaker class="solr.LoadAverageCircuitBreaker">
<double name="threshold">3</double>
</circuitBreaker>

<initParams path="/select">
<lst name="defaults">
<str name="df">text</str>
Expand Down
Loading
Loading