diff --git a/api/envoy/api/v2/cluster/BUILD b/api/envoy/api/v2/cluster/BUILD
index 69168ad0cf24..8ac86d1fd7ad 100644
--- a/api/envoy/api/v2/cluster/BUILD
+++ b/api/envoy/api/v2/cluster/BUILD
@@ -7,6 +7,7 @@ licenses(["notice"])  # Apache 2
 api_proto_package(
     deps = [
         "//envoy/api/v2/core:pkg",
+        "//envoy/type:pkg",
         "@com_github_cncf_udpa//udpa/annotations:pkg",
     ],
 )
diff --git a/api/envoy/api/v2/cluster/circuit_breaker.proto b/api/envoy/api/v2/cluster/circuit_breaker.proto
index ab9f5b068381..a40eca8929d0 100644
--- a/api/envoy/api/v2/cluster/circuit_breaker.proto
+++ b/api/envoy/api/v2/cluster/circuit_breaker.proto
@@ -3,6 +3,7 @@ syntax = "proto3";
 package envoy.api.v2.cluster;
 
 import "envoy/api/v2/core/base.proto";
+import "envoy/type/percent.proto";
 
 import "google/protobuf/wrappers.proto";
 
@@ -23,8 +24,23 @@ option (udpa.annotations.file_migrate).move_to_package = "envoy.config.cluster.v
 message CircuitBreakers {
   // A Thresholds defines CircuitBreaker settings for a
   // :ref:`RoutingPriority<envoy_api_enum_core.RoutingPriority>`.
-  // [#next-free-field: 8]
+  // [#next-free-field: 9]
   message Thresholds {
+    message RetryBudget {
+      // Specifies the limit on concurrent retries as a percentage of the sum of active requests and
+      // active pending requests. For example, if there are 100 active requests and the
+      // budget_percent is set to 25, there may be 25 active retries.
+      //
+      // This parameter is optional. Defaults to 20%.
+      type.Percent budget_percent = 1;
+
+      // Specifies the minimum retry concurrency allowed for the retry budget. The limit on the
+      // number of active retries may never go below this number.
+      //
+      // This parameter is optional. Defaults to 3.
+      google.protobuf.UInt32Value min_retry_concurrency = 2;
+    }
+
     // The :ref:`RoutingPriority<envoy_api_enum_core.RoutingPriority>`
     // the specified CircuitBreaker settings apply to.
     core.RoutingPriority priority = 1 [(validate.rules).enum = {defined_only: true}];
@@ -45,9 +61,23 @@ message CircuitBreakers {
     // upstream cluster. If not specified, the default is 3.
     google.protobuf.UInt32Value max_retries = 5;
 
+    // Specifies a limit on concurrent retries in relation to the number of active requests. This
+    // parameter is optional.
+    //
+    // .. note::
+    //
+    //    If this field is set, the retry budget will override any configured retry circuit
+    //    breaker.
+    RetryBudget retry_budget = 8;
+
     // If track_remaining is true, then stats will be published that expose
     // the number of resources remaining until the circuit breakers open. If
     // not specified, the default is false.
+    //
+    // .. note::
+    //
+    //    If a retry budget is used in lieu of the max_retries circuit breaker,
+    //    the remaining retry resources remaining will not be tracked.
     bool track_remaining = 6;
 
     // The maximum number of connection pools per cluster that Envoy will concurrently support at
diff --git a/api/envoy/config/cluster/v3alpha/circuit_breaker.proto b/api/envoy/config/cluster/v3alpha/circuit_breaker.proto
index b9c1cb729451..a84cec0d2872 100644
--- a/api/envoy/config/cluster/v3alpha/circuit_breaker.proto
+++ b/api/envoy/config/cluster/v3alpha/circuit_breaker.proto
@@ -3,6 +3,7 @@ syntax = "proto3";
 package envoy.config.cluster.v3alpha;
 
 import "envoy/config/core/v3alpha/base.proto";
+import "envoy/type/v3alpha/percent.proto";
 
 import "google/protobuf/wrappers.proto";
 
@@ -24,11 +25,29 @@ message CircuitBreakers {
 
   // A Thresholds defines CircuitBreaker settings for a
   // :ref:`RoutingPriority<envoy_api_enum_config.core.v3alpha.RoutingPriority>`.
-  // [#next-free-field: 8]
+  // [#next-free-field: 9]
   message Thresholds {
     option (udpa.annotations.versioning).previous_message_type =
         "envoy.api.v2.cluster.CircuitBreakers.Thresholds";
 
+    message RetryBudget {
+      option (udpa.annotations.versioning).previous_message_type =
+          "envoy.api.v2.cluster.CircuitBreakers.Thresholds.RetryBudget";
+
+      // Specifies the limit on concurrent retries as a percentage of the sum of active requests and
+      // active pending requests. For example, if there are 100 active requests and the
+      // budget_percent is set to 25, there may be 25 active retries.
+      //
+      // This parameter is optional. Defaults to 20%.
+      type.v3alpha.Percent budget_percent = 1;
+
+      // Specifies the minimum retry concurrency allowed for the retry budget. The limit on the
+      // number of active retries may never go below this number.
+      //
+      // This parameter is optional. Defaults to 3.
+      google.protobuf.UInt32Value min_retry_concurrency = 2;
+    }
+
     // The :ref:`RoutingPriority<envoy_api_enum_config.core.v3alpha.RoutingPriority>`
     // the specified CircuitBreaker settings apply to.
     core.v3alpha.RoutingPriority priority = 1 [(validate.rules).enum = {defined_only: true}];
@@ -49,9 +68,23 @@ message CircuitBreakers {
     // upstream cluster. If not specified, the default is 3.
     google.protobuf.UInt32Value max_retries = 5;
 
+    // Specifies a limit on concurrent retries in relation to the number of active requests. This
+    // parameter is optional.
+    //
+    // .. note::
+    //
+    //    If this field is set, the retry budget will override any configured retry circuit
+    //    breaker.
+    RetryBudget retry_budget = 8;
+
     // If track_remaining is true, then stats will be published that expose
     // the number of resources remaining until the circuit breakers open. If
     // not specified, the default is false.
+    //
+    // .. note::
+    //
+    //    If a retry budget is used in lieu of the max_retries circuit breaker,
+    //    the remaining retry resources remaining will not be tracked.
     bool track_remaining = 6;
 
     // The maximum number of connection pools per cluster that Envoy will concurrently support at
diff --git a/docs/root/configuration/upstream/cluster_manager/cluster_runtime.rst b/docs/root/configuration/upstream/cluster_manager/cluster_runtime.rst
index 195d025c24bc..34e0cb9058eb 100644
--- a/docs/root/configuration/upstream/cluster_manager/cluster_runtime.rst
+++ b/docs/root/configuration/upstream/cluster_manager/cluster_runtime.rst
@@ -165,3 +165,9 @@ circuit_breakers.<cluster_name>.<priority>.max_requests
 
 circuit_breakers.<cluster_name>.<priority>.max_retries
   :ref:`Max retries circuit breaker setting <envoy_api_field_cluster.CircuitBreakers.Thresholds.max_retries>`
+
+circuit_breakers.<cluster_name>.<priority>.retry_budget.budget_percent
+  :ref:`Max retries circuit breaker setting <envoy_api_field_cluster.CircuitBreakers.Thresholds.RetryBudget.budget_percent>`
+
+circuit_breakers.<cluster_name>.<priority>.retry_budget.min_retry_concurrency
+  :ref:`Max retries circuit breaker setting <envoy_api_field_cluster.CircuitBreakers.Thresholds.RetryBudget.min_retry_concurrency>`
diff --git a/docs/root/configuration/upstream/cluster_manager/cluster_stats.rst b/docs/root/configuration/upstream/cluster_manager/cluster_stats.rst
index 57bd438109b6..b464dcdf78bb 100644
--- a/docs/root/configuration/upstream/cluster_manager/cluster_stats.rst
+++ b/docs/root/configuration/upstream/cluster_manager/cluster_stats.rst
@@ -74,7 +74,7 @@ Every cluster has a statistics tree rooted at *cluster.<name>.* with the followi
   upstream_rq_tx_reset, Counter, Total requests that were reset locally
   upstream_rq_retry, Counter, Total request retries
   upstream_rq_retry_success, Counter, Total request retry successes
-  upstream_rq_retry_overflow, Counter, Total requests not retried due to circuit breaking
+  upstream_rq_retry_overflow, Counter, Total requests not retried due to circuit breaking or exceeding the retry budget
   upstream_flow_control_paused_reading_total, Counter, Total number of times flow control paused reading from upstream
   upstream_flow_control_resumed_reading_total, Counter, Total number of times flow control resumed reading from upstream
   upstream_flow_control_backed_up_total, Counter, Total number of times the upstream connection backed up and paused reads from downstream
diff --git a/docs/root/intro/arch_overview/http/http_routing.rst b/docs/root/intro/arch_overview/http/http_routing.rst
index fddf3f964cbd..bfbd268a56f1 100644
--- a/docs/root/intro/arch_overview/http/http_routing.rst
+++ b/docs/root/intro/arch_overview/http/http_routing.rst
@@ -106,6 +106,8 @@ headers <config_http_filters_router_headers_consumed>`. The following configurat
 * **Retry conditions**: Envoy can retry on different types of conditions depending on application
   requirements. For example, network failure, all 5xx response codes, idempotent 4xx response codes,
   etc.
+* **Retry budgets**: Envoy can limit the proportion of active requests via :ref:`retry budgets <envoy_api_field_cluster.CircuitBreakers.Thresholds.retry_budget>` that can be retries to
+  prevent their contribution to large increases in traffic volume.
 * **Host selection retry plugins**: Envoy can be configured to apply additional logic to the host
   selection logic when selecting hosts for retries. Specifying a
   :ref:`retry host predicate <envoy_api_field_route.RetryPolicy.retry_host_predicate>`
diff --git a/docs/root/intro/arch_overview/upstream/circuit_breaking.rst b/docs/root/intro/arch_overview/upstream/circuit_breaking.rst
index 57dc097dba90..f96066416a78 100644
--- a/docs/root/intro/arch_overview/upstream/circuit_breaking.rst
+++ b/docs/root/intro/arch_overview/upstream/circuit_breaking.rst
@@ -30,8 +30,8 @@ configure and code each application independently. Envoy supports various types
   overflows the :ref:`upstream_rq_pending_overflow <config_cluster_manager_cluster_stats>` counter
   for the cluster will increment.
 * **Cluster maximum active retries**: The maximum number of retries that can be outstanding to all
-  hosts in a cluster at any given time. In general we recommend aggressively circuit breaking
-  retries so that retries for sporadic failures are allowed but the overall retry volume cannot
+  hosts in a cluster at any given time. In general we recommend using :ref:`retry budgets <envoy_api_field_cluster.CircuitBreakers.Thresholds.retry_budget>`; however, if static circuit breaking is preferred it should aggressively circuit break
+  retries. This is so that retries for sporadic failures are allowed, but the overall retry volume cannot
   explode and cause large scale cascading failure. If this circuit breaker overflows the
   :ref:`upstream_rq_retry_overflow <config_cluster_manager_cluster_stats>` counter for the cluster
   will increment.
diff --git a/docs/root/intro/version_history.rst b/docs/root/intro/version_history.rst
index d246a0ffcf28..c5df944fa29f 100644
--- a/docs/root/intro/version_history.rst
+++ b/docs/root/intro/version_history.rst
@@ -29,13 +29,13 @@ Version history
 * redis: performance improvement for larger split commands by avoiding string copies.
 * redis: correctly follow MOVE/ASK redirection for mirrored clusters.
 * redis: add :ref:`host_degraded_refresh_threshold <envoy_api_field_config.cluster.redis.RedisClusterConfig.host_degraded_refresh_threshold>` and :ref:`failure_refresh_threshold <envoy_api_field_config.cluster.redis.RedisClusterConfig.failure_refresh_threshold>` to refresh topology when nodes are degraded or when requests fails.
-* router: added support for REQ(header-name) :ref:`header formatter <config_http_conn_man_headers_custom_request_headers>`.
+* router check tool: added support for testing and marking coverage for routes of runtime fraction 0.
 * router: added :ref:`request_mirror_policies<envoy_api_field_route.RouteAction.request_mirror_policies>` to support sending multiple mirrored requests in one route.
-* router: allow using a :ref:`query parameter
-  <envoy_api_field_route.RouteAction.HashPolicy.query_parameter>` for HTTP consistent hashing.
-* router: skip the Location header when the response code is not a 201 or a 3xx.
+* router: added support for REQ(header-name) :ref:`header formatter <config_http_conn_man_headers_custom_request_headers>`.
+* router: added support for percentage-based :ref:`retry budgets <envoy_api_field_cluster.CircuitBreakers.Thresholds.retry_budget>`
+* router: allow using a :ref:`query parameter <envoy_api_field_route.RouteAction.HashPolicy.query_parameter>` for HTTP consistent hashing.
 * router: exposed DOWNSTREAM_REMOTE_ADDRESS as custom HTTP request/response headers.
-* router check tool: added support for testing and marking coverage for routes of runtime fraction 0.
+* router: skip the Location header when the response code is not a 201 or a 3xx.
 * server: added the :option:`--disable-extensions` CLI option, to disable extensions at startup.
 * server: fixed a bug in config validation for configs with runtime layers.
 * tcp_proxy: added :ref:`ClusterWeight.metadata_match<envoy_api_field_config.filter.network.tcp_proxy.v2.TcpProxy.WeightedCluster.ClusterWeight.metadata_match>`.
diff --git a/generated_api_shadow/envoy/api/v2/cluster/BUILD b/generated_api_shadow/envoy/api/v2/cluster/BUILD
index 69168ad0cf24..8ac86d1fd7ad 100644
--- a/generated_api_shadow/envoy/api/v2/cluster/BUILD
+++ b/generated_api_shadow/envoy/api/v2/cluster/BUILD
@@ -7,6 +7,7 @@ licenses(["notice"])  # Apache 2
 api_proto_package(
     deps = [
         "//envoy/api/v2/core:pkg",
+        "//envoy/type:pkg",
         "@com_github_cncf_udpa//udpa/annotations:pkg",
     ],
 )
diff --git a/generated_api_shadow/envoy/api/v2/cluster/circuit_breaker.proto b/generated_api_shadow/envoy/api/v2/cluster/circuit_breaker.proto
index ab9f5b068381..a40eca8929d0 100644
--- a/generated_api_shadow/envoy/api/v2/cluster/circuit_breaker.proto
+++ b/generated_api_shadow/envoy/api/v2/cluster/circuit_breaker.proto
@@ -3,6 +3,7 @@ syntax = "proto3";
 package envoy.api.v2.cluster;
 
 import "envoy/api/v2/core/base.proto";
+import "envoy/type/percent.proto";
 
 import "google/protobuf/wrappers.proto";
 
@@ -23,8 +24,23 @@ option (udpa.annotations.file_migrate).move_to_package = "envoy.config.cluster.v
 message CircuitBreakers {
   // A Thresholds defines CircuitBreaker settings for a
   // :ref:`RoutingPriority<envoy_api_enum_core.RoutingPriority>`.
-  // [#next-free-field: 8]
+  // [#next-free-field: 9]
   message Thresholds {
+    message RetryBudget {
+      // Specifies the limit on concurrent retries as a percentage of the sum of active requests and
+      // active pending requests. For example, if there are 100 active requests and the
+      // budget_percent is set to 25, there may be 25 active retries.
+      //
+      // This parameter is optional. Defaults to 20%.
+      type.Percent budget_percent = 1;
+
+      // Specifies the minimum retry concurrency allowed for the retry budget. The limit on the
+      // number of active retries may never go below this number.
+      //
+      // This parameter is optional. Defaults to 3.
+      google.protobuf.UInt32Value min_retry_concurrency = 2;
+    }
+
     // The :ref:`RoutingPriority<envoy_api_enum_core.RoutingPriority>`
     // the specified CircuitBreaker settings apply to.
     core.RoutingPriority priority = 1 [(validate.rules).enum = {defined_only: true}];
@@ -45,9 +61,23 @@ message CircuitBreakers {
     // upstream cluster. If not specified, the default is 3.
     google.protobuf.UInt32Value max_retries = 5;
 
+    // Specifies a limit on concurrent retries in relation to the number of active requests. This
+    // parameter is optional.
+    //
+    // .. note::
+    //
+    //    If this field is set, the retry budget will override any configured retry circuit
+    //    breaker.
+    RetryBudget retry_budget = 8;
+
     // If track_remaining is true, then stats will be published that expose
     // the number of resources remaining until the circuit breakers open. If
     // not specified, the default is false.
+    //
+    // .. note::
+    //
+    //    If a retry budget is used in lieu of the max_retries circuit breaker,
+    //    the remaining retry resources remaining will not be tracked.
     bool track_remaining = 6;
 
     // The maximum number of connection pools per cluster that Envoy will concurrently support at
diff --git a/generated_api_shadow/envoy/config/cluster/v3alpha/circuit_breaker.proto b/generated_api_shadow/envoy/config/cluster/v3alpha/circuit_breaker.proto
index b9c1cb729451..a84cec0d2872 100644
--- a/generated_api_shadow/envoy/config/cluster/v3alpha/circuit_breaker.proto
+++ b/generated_api_shadow/envoy/config/cluster/v3alpha/circuit_breaker.proto
@@ -3,6 +3,7 @@ syntax = "proto3";
 package envoy.config.cluster.v3alpha;
 
 import "envoy/config/core/v3alpha/base.proto";
+import "envoy/type/v3alpha/percent.proto";
 
 import "google/protobuf/wrappers.proto";
 
@@ -24,11 +25,29 @@ message CircuitBreakers {
 
   // A Thresholds defines CircuitBreaker settings for a
   // :ref:`RoutingPriority<envoy_api_enum_config.core.v3alpha.RoutingPriority>`.
-  // [#next-free-field: 8]
+  // [#next-free-field: 9]
   message Thresholds {
     option (udpa.annotations.versioning).previous_message_type =
         "envoy.api.v2.cluster.CircuitBreakers.Thresholds";
 
+    message RetryBudget {
+      option (udpa.annotations.versioning).previous_message_type =
+          "envoy.api.v2.cluster.CircuitBreakers.Thresholds.RetryBudget";
+
+      // Specifies the limit on concurrent retries as a percentage of the sum of active requests and
+      // active pending requests. For example, if there are 100 active requests and the
+      // budget_percent is set to 25, there may be 25 active retries.
+      //
+      // This parameter is optional. Defaults to 20%.
+      type.v3alpha.Percent budget_percent = 1;
+
+      // Specifies the minimum retry concurrency allowed for the retry budget. The limit on the
+      // number of active retries may never go below this number.
+      //
+      // This parameter is optional. Defaults to 3.
+      google.protobuf.UInt32Value min_retry_concurrency = 2;
+    }
+
     // The :ref:`RoutingPriority<envoy_api_enum_config.core.v3alpha.RoutingPriority>`
     // the specified CircuitBreaker settings apply to.
     core.v3alpha.RoutingPriority priority = 1 [(validate.rules).enum = {defined_only: true}];
@@ -49,9 +68,23 @@ message CircuitBreakers {
     // upstream cluster. If not specified, the default is 3.
     google.protobuf.UInt32Value max_retries = 5;
 
+    // Specifies a limit on concurrent retries in relation to the number of active requests. This
+    // parameter is optional.
+    //
+    // .. note::
+    //
+    //    If this field is set, the retry budget will override any configured retry circuit
+    //    breaker.
+    RetryBudget retry_budget = 8;
+
     // If track_remaining is true, then stats will be published that expose
     // the number of resources remaining until the circuit breakers open. If
     // not specified, the default is false.
+    //
+    // .. note::
+    //
+    //    If a retry budget is used in lieu of the max_retries circuit breaker,
+    //    the remaining retry resources remaining will not be tracked.
     bool track_remaining = 6;
 
     // The maximum number of connection pools per cluster that Envoy will concurrently support at
diff --git a/include/envoy/runtime/runtime.h b/include/envoy/runtime/runtime.h
index ffaf40b7e0ce..40b9c308ee3f 100644
--- a/include/envoy/runtime/runtime.h
+++ b/include/envoy/runtime/runtime.h
@@ -208,6 +208,13 @@ class Snapshot {
    */
   virtual const std::string& get(const std::string& key) const PURE;
 
+  /**
+   * Returns whether the key has any value set.
+   * @param key supplies the key to check.
+   * @return bool if the key exists.
+   */
+  virtual bool exists(const std::string& key) const PURE;
+
   /**
    * Fetch an integer runtime key. Runtime keys larger than ~2^53 may not be accurately converted
    * into integers and will return default_value.
diff --git a/include/envoy/upstream/resource_manager.h b/include/envoy/upstream/resource_manager.h
index 3571b64e4111..c10ff89c033f 100644
--- a/include/envoy/upstream/resource_manager.h
+++ b/include/envoy/upstream/resource_manager.h
@@ -47,6 +47,11 @@ class Resource {
    * @return the current maximum allowed number of this resource.
    */
   virtual uint64_t max() PURE;
+
+  /**
+   * @return the current resource count.
+   */
+  virtual uint64_t count() const PURE;
 };
 
 /**
diff --git a/source/common/http/http1/conn_pool.cc b/source/common/http/http1/conn_pool.cc
index 97eb6fbe8cc9..7a7f57592bae 100644
--- a/source/common/http/http1/conn_pool.cc
+++ b/source/common/http/http1/conn_pool.cc
@@ -262,11 +262,20 @@ ConnPoolImpl::StreamWrapper::StreamWrapper(StreamDecoder& response_decoder, Acti
   StreamEncoderWrapper::inner_.getStream().addCallbacks(*this);
   parent_.parent_.host_->cluster().stats().upstream_rq_active_.inc();
   parent_.parent_.host_->stats().rq_active_.inc();
+
+  // TODO (tonya11en): At the time of writing, there is no way to mix different versions of HTTP
+  // traffic in the same cluster, so incrementing the request count in the per-cluster resource
+  // manager will not affect circuit breaking in any unexpected ways. Ideally, outstanding requests
+  // counts would be tracked the same way in all HTTP versions.
+  //
+  // See: https://github.com/envoyproxy/envoy/issues/9215
+  parent_.parent_.host_->cluster().resourceManager(parent_.parent_.priority_).requests().inc();
 }
 
 ConnPoolImpl::StreamWrapper::~StreamWrapper() {
   parent_.parent_.host_->cluster().stats().upstream_rq_active_.dec();
   parent_.parent_.host_->stats().rq_active_.dec();
+  parent_.parent_.host_->cluster().resourceManager(parent_.parent_.priority_).requests().dec();
 }
 
 void ConnPoolImpl::StreamWrapper::onEncodeComplete() { encode_complete_ = true; }
diff --git a/source/common/runtime/runtime_impl.h b/source/common/runtime/runtime_impl.h
index c8a3a71b6170..e6cc0a80f745 100644
--- a/source/common/runtime/runtime_impl.h
+++ b/source/common/runtime/runtime_impl.h
@@ -98,6 +98,7 @@ class SnapshotImpl : public Snapshot,
   double getDouble(const std::string& key, double default_value) const override;
   bool getBoolean(absl::string_view key, bool value) const override;
   const std::vector<OverrideLayerConstPtr>& getLayers() const override;
+  bool exists(const std::string& key) const override { return values_.contains(key); }
 
   static Entry createEntry(const std::string& value);
   static Entry createEntry(const ProtobufWkt::Value& value);
diff --git a/source/common/upstream/resource_manager_impl.h b/source/common/upstream/resource_manager_impl.h
index 8ba81886f56f..f111efb38e5c 100644
--- a/source/common/upstream/resource_manager_impl.h
+++ b/source/common/upstream/resource_manager_impl.h
@@ -28,17 +28,20 @@ class ResourceManagerImpl : public ResourceManager {
   ResourceManagerImpl(Runtime::Loader& runtime, const std::string& runtime_key,
                       uint64_t max_connections, uint64_t max_pending_requests,
                       uint64_t max_requests, uint64_t max_retries, uint64_t max_connection_pools,
-                      ClusterCircuitBreakersStats cb_stats)
+                      ClusterCircuitBreakersStats cb_stats, absl::optional<double> budget_percent,
+                      absl::optional<uint32_t> min_retry_concurrency)
       : connections_(max_connections, runtime, runtime_key + "max_connections", cb_stats.cx_open_,
                      cb_stats.remaining_cx_),
         pending_requests_(max_pending_requests, runtime, runtime_key + "max_pending_requests",
                           cb_stats.rq_pending_open_, cb_stats.remaining_pending_),
         requests_(max_requests, runtime, runtime_key + "max_requests", cb_stats.rq_open_,
                   cb_stats.remaining_rq_),
-        retries_(max_retries, runtime, runtime_key + "max_retries", cb_stats.rq_retry_open_,
-                 cb_stats.remaining_retries_),
         connection_pools_(max_connection_pools, runtime, runtime_key + "max_connection_pools",
-                          cb_stats.cx_pool_open_, cb_stats.remaining_cx_pools_) {}
+                          cb_stats.cx_pool_open_, cb_stats.remaining_cx_pools_),
+        retries_(budget_percent, min_retry_concurrency, max_retries, runtime,
+                 runtime_key + "retry_budget.", runtime_key + "max_retries",
+                 cb_stats.rq_retry_open_, cb_stats.remaining_retries_, requests_,
+                 pending_requests_) {}
 
   // Upstream::ResourceManager
   Resource& connections() override { return connections_; }
@@ -72,6 +75,7 @@ class ResourceManagerImpl : public ResourceManager {
       open_gauge_.set(canCreate() ? 0 : 1);
     }
     uint64_t max() override { return runtime_.snapshot().getInteger(runtime_key_, max_); }
+    uint64_t count() const override { return current_.load(); }
 
     /**
      * We set the gauge instead of incrementing and decrementing because,
@@ -94,8 +98,7 @@ class ResourceManagerImpl : public ResourceManager {
 
     /**
      * A gauge to notify the live circuit breaker state. The gauge is set to 0
-     * to notify that the circuit breaker is closed, or to 1 to notify that it
-     * is open.
+     * to notify that the circuit breaker is not yet triggered.
      */
     Stats::Gauge& open_gauge_;
 
@@ -105,11 +108,95 @@ class ResourceManagerImpl : public ResourceManager {
     Stats::Gauge& remaining_;
   };
 
+  class RetryBudgetImpl : public Resource {
+  public:
+    RetryBudgetImpl(absl::optional<double> budget_percent,
+                    absl::optional<uint32_t> min_retry_concurrency, uint64_t max_retries,
+                    Runtime::Loader& runtime, const std::string& retry_budget_runtime_key,
+                    const std::string& max_retries_runtime_key, Stats::Gauge& open_gauge,
+                    Stats::Gauge& remaining, const Resource& requests,
+                    const Resource& pending_requests)
+        : runtime_(runtime),
+          max_retry_resource_(max_retries, runtime, max_retries_runtime_key, open_gauge, remaining),
+          budget_percent_(budget_percent), min_retry_concurrency_(min_retry_concurrency),
+          budget_percent_key_(retry_budget_runtime_key + "budget_percent"),
+          min_retry_concurrency_key_(retry_budget_runtime_key + "min_retry_concurrency"),
+          requests_(requests), pending_requests_(pending_requests), remaining_(remaining) {}
+
+    // Upstream::Resource
+    bool canCreate() override {
+      if (!useRetryBudget()) {
+        return max_retry_resource_.canCreate();
+      }
+      clearRemainingGauge();
+      return count() < max();
+    }
+    void inc() override {
+      max_retry_resource_.inc();
+      clearRemainingGauge();
+    }
+    void dec() override {
+      max_retry_resource_.dec();
+      clearRemainingGauge();
+    }
+    void decBy(uint64_t amount) override {
+      max_retry_resource_.decBy(amount);
+      clearRemainingGauge();
+    }
+    uint64_t max() override {
+      if (!useRetryBudget()) {
+        return max_retry_resource_.max();
+      }
+
+      const uint64_t current_active = requests_.count() + pending_requests_.count();
+      const double budget_percent = runtime_.snapshot().getDouble(
+          budget_percent_key_, budget_percent_ ? *budget_percent_ : 20.0);
+      const uint32_t min_retry_concurrency = runtime_.snapshot().getInteger(
+          min_retry_concurrency_key_, min_retry_concurrency_ ? *min_retry_concurrency_ : 3);
+
+      clearRemainingGauge();
+
+      // We enforce that the retry concurrency is never allowed to go below the
+      // min_retry_concurrency, even if the configured percent of the current active requests
+      // yields a value that is smaller.
+      return std::max<uint64_t>(budget_percent / 100.0 * current_active, min_retry_concurrency);
+    }
+    uint64_t count() const override { return max_retry_resource_.count(); }
+
+  private:
+    bool useRetryBudget() const {
+      return runtime_.snapshot().exists(budget_percent_key_) ||
+             runtime_.snapshot().exists(min_retry_concurrency_key_) || budget_percent_ ||
+             min_retry_concurrency_;
+    }
+
+    // If the retry budget is in use, the stats tracking remaining retries do not make sense since
+    // they would dependent on other resources that can change without a call to this object.
+    // Therefore, the gauge should just be reset to 0.
+    void clearRemainingGauge() {
+      if (useRetryBudget()) {
+        remaining_.set(0);
+      }
+    }
+
+    Runtime::Loader& runtime_;
+    // The max_retry resource is nested within the budget to maintain state if the retry budget is
+    // toggled.
+    ResourceImpl max_retry_resource_;
+    const absl::optional<double> budget_percent_;
+    const absl::optional<uint32_t> min_retry_concurrency_;
+    const std::string budget_percent_key_;
+    const std::string min_retry_concurrency_key_;
+    const Resource& requests_;
+    const Resource& pending_requests_;
+    Stats::Gauge& remaining_;
+  };
+
   ResourceImpl connections_;
   ResourceImpl pending_requests_;
   ResourceImpl requests_;
-  ResourceImpl retries_;
   ResourceImpl connection_pools_;
+  RetryBudgetImpl retries_;
 };
 
 using ResourceManagerImplPtr = std::unique_ptr<ResourceManagerImpl>;
diff --git a/source/common/upstream/upstream_impl.cc b/source/common/upstream/upstream_impl.cc
index 51f2008121c9..e4ac13cfe86b 100644
--- a/source/common/upstream/upstream_impl.cc
+++ b/source/common/upstream/upstream_impl.cc
@@ -35,6 +35,7 @@
 #include "common/network/socket_option_factory.h"
 #include "common/protobuf/protobuf.h"
 #include "common/protobuf/utility.h"
+#include "common/router/config_utility.h"
 #include "common/runtime/runtime_impl.h"
 #include "common/upstream/eds.h"
 #include "common/upstream/health_checker_impl.h"
@@ -1095,6 +1096,9 @@ ResourceManagerImplPtr ClusterInfoImpl::ResourceManagers::load(
       [priority](const envoy::config::cluster::v3alpha::CircuitBreakers::Thresholds& threshold) {
         return threshold.priority() == priority;
       });
+
+  absl::optional<double> budget_percent;
+  absl::optional<uint32_t> min_retry_concurrency;
   if (it != thresholds.cend()) {
     max_connections = PROTOBUF_GET_WRAPPED_OR_DEFAULT(*it, max_connections, max_connections);
     max_pending_requests =
@@ -1104,11 +1108,25 @@ ResourceManagerImplPtr ClusterInfoImpl::ResourceManagers::load(
     track_remaining = it->track_remaining();
     max_connection_pools =
         PROTOBUF_GET_WRAPPED_OR_DEFAULT(*it, max_connection_pools, max_connection_pools);
+    if (it->has_retry_budget()) {
+      // The budget_percent and min_retry_concurrency values do not set defaults like the other
+      // members of the 'threshold' message, because the behavior of the retry circuit breaker
+      // changes depending on whether it has been configured. Therefore, it's necessary to manually
+      // check if the threshold message has a retry budget configured and only set the values if so.
+      budget_percent = it->retry_budget().has_budget_percent()
+                           ? PROTOBUF_GET_WRAPPED_REQUIRED(it->retry_budget(), budget_percent)
+                           : budget_percent;
+      min_retry_concurrency =
+          it->retry_budget().has_min_retry_concurrency()
+              ? PROTOBUF_GET_WRAPPED_REQUIRED(it->retry_budget(), min_retry_concurrency)
+              : min_retry_concurrency;
+    }
   }
   return std::make_unique<ResourceManagerImpl>(
       runtime, runtime_prefix, max_connections, max_pending_requests, max_requests, max_retries,
       max_connection_pools,
-      ClusterInfoImpl::generateCircuitBreakersStats(stats_scope, priority_name, track_remaining));
+      ClusterInfoImpl::generateCircuitBreakersStats(stats_scope, priority_name, track_remaining),
+      budget_percent, min_retry_concurrency);
 }
 
 PriorityStateManager::PriorityStateManager(ClusterImplBase& cluster,
diff --git a/test/common/http/http1/conn_pool_test.cc b/test/common/http/http1/conn_pool_test.cc
index 023f8c614f8a..b8fe1eec6c22 100644
--- a/test/common/http/http1/conn_pool_test.cc
+++ b/test/common/http/http1/conn_pool_test.cc
@@ -150,6 +150,8 @@ struct ActiveTestRequest {
 
   ActiveTestRequest(Http1ConnPoolImplTest& parent, size_t client_index, Type type)
       : parent_(parent), client_index_(client_index) {
+    uint64_t active_rq_observed =
+        parent_.cluster_->resourceManager(Upstream::ResourcePriority::Default).requests().count();
     uint64_t current_rq_total = parent_.cluster_->stats_.upstream_rq_total_.value();
     if (type == Type::CreateConnection) {
       parent.conn_pool_.expectClientCreate();
@@ -175,6 +177,10 @@ struct ActiveTestRequest {
     }
     if (type != Type::Pending) {
       EXPECT_EQ(current_rq_total + 1, parent_.cluster_->stats_.upstream_rq_total_.value());
+      EXPECT_EQ(active_rq_observed + 1,
+                parent_.cluster_->resourceManager(Upstream::ResourcePriority::Default)
+                    .requests()
+                    .count());
     }
   }
 
diff --git a/test/common/router/retry_state_impl_test.cc b/test/common/router/retry_state_impl_test.cc
index 03717b147359..b9fb178588d2 100644
--- a/test/common/router/retry_state_impl_test.cc
+++ b/test/common/router/retry_state_impl_test.cc
@@ -27,6 +27,8 @@ namespace {
 
 class RouterRetryStateImplTest : public testing::Test {
 public:
+  enum TestResourceType { Connection, Request, PendingRequest, Retry };
+
   RouterRetryStateImplTest() : callback_([this]() -> void { callback_ready_.ready(); }) {
     ON_CALL(runtime_.snapshot_, featureEnabled("upstream.use_retry", 100))
         .WillByDefault(Return(true));
@@ -47,6 +49,46 @@ class RouterRetryStateImplTest : public testing::Test {
     EXPECT_CALL(*retry_timer_, enableTimer(_, _));
   }
 
+  void incrOutstandingResource(TestResourceType resource, uint32_t num) {
+    for (uint32_t i = 0; i < num; ++i) {
+      switch (resource) {
+      case TestResourceType::Retry:
+        cluster_.resourceManager(Upstream::ResourcePriority::Default).retries().inc();
+        resource_manager_cleanup_tasks_.emplace_back([this]() {
+          cluster_.resourceManager(Upstream::ResourcePriority::Default).retries().dec();
+        });
+        break;
+      case TestResourceType::Connection:
+        cluster_.resourceManager(Upstream::ResourcePriority::Default).connections().inc();
+        resource_manager_cleanup_tasks_.emplace_back([this]() {
+          cluster_.resourceManager(Upstream::ResourcePriority::Default).connections().dec();
+        });
+        break;
+      case TestResourceType::Request:
+        cluster_.resourceManager(Upstream::ResourcePriority::Default).requests().inc();
+        resource_manager_cleanup_tasks_.emplace_back([this]() {
+          cluster_.resourceManager(Upstream::ResourcePriority::Default).requests().dec();
+        });
+        break;
+      case TestResourceType::PendingRequest:
+        cluster_.resourceManager(Upstream::ResourcePriority::Default).pendingRequests().inc();
+        resource_manager_cleanup_tasks_.emplace_back([this]() {
+          cluster_.resourceManager(Upstream::ResourcePriority::Default).pendingRequests().dec();
+        });
+        break;
+      }
+    }
+  }
+
+  void cleanupOutstandingResources() {
+    for (auto& task : resource_manager_cleanup_tasks_) {
+      task();
+    }
+    resource_manager_cleanup_tasks_.clear();
+  }
+
+  void TearDown() override { cleanupOutstandingResources(); }
+
   NiceMock<TestRetryPolicy> policy_;
   NiceMock<Upstream::MockClusterInfo> cluster_;
   NiceMock<Runtime::MockLoader> runtime_;
@@ -56,6 +98,7 @@ class RouterRetryStateImplTest : public testing::Test {
   RetryStatePtr state_;
   ReadyWatcher callback_ready_;
   RetryState::DoRetryCallback callback_;
+  std::vector<std::function<void()>> resource_manager_cleanup_tasks_;
 
   const Http::StreamResetReason remote_reset_{Http::StreamResetReason::RemoteReset};
   const Http::StreamResetReason remote_refused_stream_reset_{
@@ -893,6 +936,111 @@ TEST_F(RouterRetryStateImplTest, NoPreferredOverLimitExceeded) {
   EXPECT_EQ(RetryStatus::No, state_->shouldRetryHeaders(good_response_headers, callback_));
 }
 
+TEST_F(RouterRetryStateImplTest, BudgetAvailableRetries) {
+  // Expect no available retries from resource manager and override the max_retries CB via retry
+  // budget. As configured, there are no allowed retries via max_retries CB.
+  cluster_.resetResourceManagerWithRetryBudget(
+      0 /* cx */, 0 /* rq_pending */, 0 /* rq */, 0 /* rq_retry */, 0 /* conn_pool */,
+      20.0 /* budget_percent */, 3 /* min_retry_concurrency */);
+
+  Http::TestHeaderMapImpl request_headers{{"x-envoy-retry-on", "5xx"}};
+
+  setup(request_headers);
+  EXPECT_TRUE(state_->enabled());
+
+  expectTimerCreateAndEnable();
+  Http::TestHeaderMapImpl response_headers{{":status", "500"}};
+  EXPECT_EQ(RetryStatus::Yes, state_->shouldRetryHeaders(response_headers, callback_));
+}
+
+TEST_F(RouterRetryStateImplTest, BudgetNoAvailableRetries) {
+  // Expect no available retries from resource manager. Override the max_retries CB via a retry
+  // budget that won't let any retries. As configured, there are 5 allowed retries via max_retries
+  // CB.
+  cluster_.resetResourceManagerWithRetryBudget(
+      0 /* cx */, 0 /* rq_pending */, 20 /* rq */, 5 /* rq_retry */, 0 /* conn_pool */,
+      0 /* budget_percent */, 0 /* min_retry_concurrency */);
+
+  Http::TestHeaderMapImpl request_headers{{"x-envoy-retry-on", "5xx"}};
+
+  setup(request_headers);
+  EXPECT_TRUE(state_->enabled());
+
+  Http::TestHeaderMapImpl response_headers{{":status", "500"}};
+  EXPECT_EQ(RetryStatus::NoOverflow, state_->shouldRetryHeaders(response_headers, callback_));
+}
+
+TEST_F(RouterRetryStateImplTest, BudgetVerifyMinimumConcurrency) {
+  // Expect no available retries from resource manager.
+  cluster_.resetResourceManagerWithRetryBudget(
+      0 /* cx */, 0 /* rq_pending */, 0 /* rq */, 0 /* rq_retry */, 0 /* conn_pool */,
+      20.0 /* budget_percent */, 3 /* min_retry_concurrency */);
+
+  Http::TestHeaderMapImpl request_headers{{"x-envoy-retry-on", "5xx"},
+                                          {"x-envoy-max-retries", "42"}};
+  Http::TestHeaderMapImpl response_headers{{":status", "500"}};
+
+  setup(request_headers);
+  EXPECT_TRUE(state_->enabled());
+
+  // Load up 2 outstanding retries and verify the 3rd one is allowed when there are no outstanding
+  // requests. This verifies the minimum allowed outstanding retries before the budget is scaled
+  // with the request concurrency.
+  incrOutstandingResource(TestResourceType::Retry, 2);
+
+  expectTimerCreateAndEnable();
+  EXPECT_EQ(RetryStatus::Yes, state_->shouldRetryHeaders(response_headers, callback_));
+
+  // 3 outstanding retries.
+  incrOutstandingResource(TestResourceType::Retry, 1);
+
+  EXPECT_EQ(RetryStatus::NoOverflow, state_->shouldRetryHeaders(response_headers, callback_));
+
+  incrOutstandingResource(TestResourceType::Request, 20);
+
+  EXPECT_CALL(*retry_timer_, enableTimer(_, _));
+  EXPECT_EQ(RetryStatus::Yes, state_->shouldRetryHeaders(response_headers, callback_));
+
+  // 4 outstanding retries.
+  incrOutstandingResource(TestResourceType::Retry, 1);
+
+  EXPECT_EQ(RetryStatus::NoOverflow, state_->shouldRetryHeaders(response_headers, callback_));
+
+  // Override via runtime and expect successful retry.
+  EXPECT_CALL(cluster_.runtime_.snapshot_, exists("fake_clusterretry_budget.budget_percent"))
+      .WillRepeatedly(Return(true));
+  EXPECT_CALL(cluster_.runtime_.snapshot_, getDouble("fake_clusterretry_budget.budget_percent", _))
+      .WillRepeatedly(Return(100.0));
+
+  EXPECT_CALL(*retry_timer_, enableTimer(_, _));
+  EXPECT_EQ(RetryStatus::Yes, state_->shouldRetryHeaders(response_headers, callback_));
+}
+
+TEST_F(RouterRetryStateImplTest, BudgetRuntimeSetOnly) {
+  // Expect no available retries from resource manager, so no retries allowed according to
+  // max_retries CB. Don't configure retry budgets. We'll rely on runtime config only.
+  cluster_.resetResourceManager(0 /* cx */, 0 /* rq_pending */, 0 /* rq */, 0 /* rq_retry */,
+                                0 /* conn_pool */);
+
+  EXPECT_CALL(cluster_.runtime_.snapshot_, exists("fake_clusterretry_budget.min_retry_concurrency"))
+      .WillRepeatedly(Return(true));
+  EXPECT_CALL(cluster_.runtime_.snapshot_, exists("fake_clusterretry_budget.budget_percent"))
+      .WillRepeatedly(Return(true));
+  EXPECT_CALL(cluster_.runtime_.snapshot_, getDouble("fake_clusterretry_budget.budget_percent", _))
+      .WillRepeatedly(Return(20.0));
+
+  Http::TestHeaderMapImpl request_headers{{"x-envoy-retry-on", "5xx"}};
+
+  setup(request_headers);
+  EXPECT_TRUE(state_->enabled());
+
+  incrOutstandingResource(TestResourceType::Retry, 2);
+
+  expectTimerCreateAndEnable();
+  Http::TestHeaderMapImpl response_headers{{":status", "500"}};
+  EXPECT_EQ(RetryStatus::Yes, state_->shouldRetryHeaders(response_headers, callback_));
+}
+
 } // namespace
 } // namespace Router
 } // namespace Envoy
diff --git a/test/common/runtime/runtime_impl_test.cc b/test/common/runtime/runtime_impl_test.cc
index 7049e96cac84..02482e6c7e61 100644
--- a/test/common/runtime/runtime_impl_test.cc
+++ b/test/common/runtime/runtime_impl_test.cc
@@ -186,6 +186,11 @@ TEST_F(DiskLoaderImplTest, All) {
   EXPECT_EQ("hello\nworld", loader_->snapshot().get("subdir.file3"));
   EXPECT_EQ("", loader_->snapshot().get("invalid"));
 
+  // Existence checking.
+  EXPECT_EQ(true, loader_->snapshot().exists("file2"));
+  EXPECT_EQ(true, loader_->snapshot().exists("subdir.file3"));
+  EXPECT_EQ(false, loader_->snapshot().exists("invalid"));
+
   // Integer getting.
   EXPECT_EQ(1UL, loader_->snapshot().getInteger("file1", 1));
   EXPECT_EQ(2UL, loader_->snapshot().getInteger("file3", 1));
diff --git a/test/common/upstream/resource_manager_impl_test.cc b/test/common/upstream/resource_manager_impl_test.cc
index 52ec0350a76e..2cb3e3ae4f7c 100644
--- a/test/common/upstream/resource_manager_impl_test.cc
+++ b/test/common/upstream/resource_manager_impl_test.cc
@@ -27,7 +27,8 @@ TEST(ResourceManagerImplTest, RuntimeResourceManager) {
   ResourceManagerImpl resource_manager(
       runtime, "circuit_breakers.runtime_resource_manager_test.default.", 0, 0, 0, 1, 0,
       ClusterCircuitBreakersStats{
-          ALL_CLUSTER_CIRCUIT_BREAKERS_STATS(POOL_GAUGE(store), POOL_GAUGE(store))});
+          ALL_CLUSTER_CIRCUIT_BREAKERS_STATS(POOL_GAUGE(store), POOL_GAUGE(store))},
+      absl::nullopt, absl::nullopt);
 
   EXPECT_CALL(
       runtime.snapshot_,
@@ -58,6 +59,7 @@ TEST(ResourceManagerImplTest, RuntimeResourceManager) {
       .WillRepeatedly(Return(0U));
   EXPECT_EQ(0U, resource_manager.retries().max());
   EXPECT_FALSE(resource_manager.retries().canCreate());
+
   EXPECT_CALL(
       runtime.snapshot_,
       getInteger("circuit_breakers.runtime_resource_manager_test.default.max_connection_pools", 0U))
@@ -65,6 +67,15 @@ TEST(ResourceManagerImplTest, RuntimeResourceManager) {
       .WillRepeatedly(Return(5U));
   EXPECT_EQ(5U, resource_manager.connectionPools().max());
   EXPECT_TRUE(resource_manager.connectionPools().canCreate());
+
+  // Verify retry budgets override max_retries.
+  EXPECT_CALL(runtime.snapshot_, exists(_)).WillRepeatedly(Return(true));
+  EXPECT_CALL(runtime.snapshot_, getInteger("circuit_breakers.runtime_resource_manager_test."
+                                            "default.retry_budget.min_retry_concurrency",
+                                            _))
+      .WillRepeatedly(Return(5U));
+  EXPECT_EQ(5U, resource_manager.retries().max());
+  EXPECT_TRUE(resource_manager.retries().canCreate());
 }
 
 TEST(ResourceManagerImplTest, RemainingResourceGauges) {
@@ -73,13 +84,16 @@ TEST(ResourceManagerImplTest, RemainingResourceGauges) {
 
   auto stats = ClusterCircuitBreakersStats{
       ALL_CLUSTER_CIRCUIT_BREAKERS_STATS(POOL_GAUGE(store), POOL_GAUGE(store))};
-  ResourceManagerImpl resource_manager(
-      runtime, "circuit_breakers.runtime_resource_manager_test.default.", 1, 2, 1, 0, 3, stats);
+  ResourceManagerImpl resource_manager(runtime,
+                                       "circuit_breakers.runtime_resource_manager_test.default.", 1,
+                                       2, 1, 0, 3, stats, absl::nullopt, absl::nullopt);
 
   // Test remaining_cx_ gauge
   EXPECT_EQ(1U, resource_manager.connections().max());
   EXPECT_EQ(1U, stats.remaining_cx_.value());
+  EXPECT_EQ(0U, resource_manager.connections().count());
   resource_manager.connections().inc();
+  EXPECT_EQ(1U, resource_manager.connections().count());
   EXPECT_EQ(0U, stats.remaining_cx_.value());
   resource_manager.connections().dec();
   EXPECT_EQ(1U, stats.remaining_cx_.value());
@@ -87,7 +101,9 @@ TEST(ResourceManagerImplTest, RemainingResourceGauges) {
   // Test remaining_pending_ gauge
   EXPECT_EQ(2U, resource_manager.pendingRequests().max());
   EXPECT_EQ(2U, stats.remaining_pending_.value());
+  EXPECT_EQ(0U, resource_manager.pendingRequests().count());
   resource_manager.pendingRequests().inc();
+  EXPECT_EQ(1U, resource_manager.pendingRequests().count());
   EXPECT_EQ(1U, stats.remaining_pending_.value());
   resource_manager.pendingRequests().inc();
   EXPECT_EQ(0U, stats.remaining_pending_.value());
@@ -95,11 +111,14 @@ TEST(ResourceManagerImplTest, RemainingResourceGauges) {
   EXPECT_EQ(1U, stats.remaining_pending_.value());
   resource_manager.pendingRequests().dec();
   EXPECT_EQ(2U, stats.remaining_pending_.value());
+  EXPECT_EQ(2U, stats.remaining_pending_.value());
 
   // Test remaining_rq_ gauge
   EXPECT_EQ(1U, resource_manager.requests().max());
   EXPECT_EQ(1U, stats.remaining_rq_.value());
+  EXPECT_EQ(0U, resource_manager.requests().count());
   resource_manager.requests().inc();
+  EXPECT_EQ(1U, resource_manager.requests().count());
   EXPECT_EQ(0U, stats.remaining_rq_.value());
   resource_manager.requests().dec();
   EXPECT_EQ(1U, stats.remaining_rq_.value());
@@ -108,18 +127,42 @@ TEST(ResourceManagerImplTest, RemainingResourceGauges) {
   // despite having more retries than the configured max
   EXPECT_EQ(0U, resource_manager.retries().max());
   EXPECT_EQ(0U, stats.remaining_retries_.value());
+  EXPECT_EQ(0U, resource_manager.retries().count());
   resource_manager.retries().inc();
+  EXPECT_EQ(1U, resource_manager.retries().count());
   EXPECT_EQ(0U, stats.remaining_retries_.value());
   resource_manager.retries().dec();
 
   // Test remaining_cx_pools gauge.
   EXPECT_EQ(3U, resource_manager.connectionPools().max());
   EXPECT_EQ(3U, stats.remaining_cx_pools_.value());
+  EXPECT_EQ(0U, resource_manager.connectionPools().count());
   resource_manager.connectionPools().inc();
+  EXPECT_EQ(1U, resource_manager.connectionPools().count());
   EXPECT_EQ(2U, stats.remaining_cx_pools_.value());
   resource_manager.connectionPools().dec();
   EXPECT_EQ(3U, stats.remaining_cx_pools_.value());
 }
+
+TEST(ResourceManagerImplTest, RetryBudgetOverrideGauge) {
+  NiceMock<Runtime::MockLoader> runtime;
+  Stats::IsolatedStoreImpl store;
+
+  auto stats = ClusterCircuitBreakersStats{
+      ALL_CLUSTER_CIRCUIT_BREAKERS_STATS(POOL_GAUGE(store), POOL_GAUGE(store))};
+
+  // Test retry budgets disable remaining_retries gauge (it should always be 0).
+  ResourceManagerImpl rm(runtime, "circuit_breakers.runtime_resource_manager_test.default.", 1, 2,
+                         1, 0, 3, stats, 20.0, 5);
+
+  EXPECT_EQ(5U, rm.retries().max());
+  EXPECT_EQ(0U, stats.remaining_retries_.value());
+  EXPECT_EQ(0U, rm.retries().count());
+  rm.retries().inc();
+  EXPECT_EQ(1U, rm.retries().count());
+  EXPECT_EQ(0U, stats.remaining_retries_.value());
+  rm.retries().dec();
+}
 } // namespace
 } // namespace Upstream
 } // namespace Envoy
diff --git a/test/integration/stats_integration_test.cc b/test/integration/stats_integration_test.cc
index d849d212f304..2835145c0fde 100644
--- a/test/integration/stats_integration_test.cc
+++ b/test/integration/stats_integration_test.cc
@@ -267,6 +267,7 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithFakeSymbolTable) {
   // 2019/11/15  9040     43371       44000   build: update protobuf to 3.10.1
   // 2019/11/15  9040     43403       44000   upstream: track whether cluster is local
   // 2019/12/10  8779     42919       43500   use var-length coding for name length
+  // 2020/01/07  9069     43413       43500   upstream: Implement retry concurrency budgets
 
   // Note: when adjusting this value: EXPECT_MEMORY_EQ is active only in CI
   // 'release' builds, where we control the platform and tool-chain. So you
@@ -280,8 +281,8 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithFakeSymbolTable) {
   // If you encounter a failure here, please see
   // https://github.com/envoyproxy/envoy/blob/master/source/docs/stats.md#stats-memory-tests
   // for details on how to fix.
-  EXPECT_MEMORY_EQ(m_per_cluster, 42919); // 104 bytes higher than a debug build.
-  EXPECT_MEMORY_LE(m_per_cluster, 43500);
+  EXPECT_MEMORY_EQ(m_per_cluster, 43413); // 104 bytes higher than a debug build.
+  EXPECT_MEMORY_LE(m_per_cluster, 44000);
 }
 
 TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithRealSymbolTable) {
@@ -314,7 +315,8 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithRealSymbolTable) {
   // 2019/11/01  8859     35221       36000   build: switch to libc++ by default
   // 2019/11/15  9040     35029       35500   build: update protobuf to 3.10.1
   // 2019/11/15  9040     35061       35500   upstream: track whether cluster is local
-  // 2019/12/10  8779     35053       35000   use var-length coding for name lengths
+  // 2019/12/20  8779     35053       35000   use var-length coding for name lengths
+  // 2020/01/07  9069     35548       35700   upstream: Implement retry concurrency budgets
 
   // Note: when adjusting this value: EXPECT_MEMORY_EQ is active only in CI
   // 'release' builds, where we control the platform and tool-chain. So you
@@ -328,8 +330,8 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithRealSymbolTable) {
   // If you encounter a failure here, please see
   // https://github.com/envoyproxy/envoy/blob/master/source/docs/stats.md#stats-memory-tests
   // for details on how to fix.
-  EXPECT_MEMORY_EQ(m_per_cluster, 35053); // 104 bytes higher than a debug build.
-  EXPECT_MEMORY_LE(m_per_cluster, 35500);
+  EXPECT_MEMORY_EQ(m_per_cluster, 35548); // 104 bytes higher than a debug build.
+  EXPECT_MEMORY_LE(m_per_cluster, 35700);
 }
 
 TEST_P(ClusterMemoryTestRunner, MemoryLargeHostSizeWithStats) {
diff --git a/test/mocks/runtime/mocks.cc b/test/mocks/runtime/mocks.cc
index 2556987922c1..330c4a53d22b 100644
--- a/test/mocks/runtime/mocks.cc
+++ b/test/mocks/runtime/mocks.cc
@@ -18,6 +18,7 @@ MockSnapshot::MockSnapshot() {
   ON_CALL(*this, getInteger(_, _)).WillByDefault(ReturnArg<1>());
   ON_CALL(*this, getDouble(_, _)).WillByDefault(ReturnArg<1>());
   ON_CALL(*this, getBoolean(_, _)).WillByDefault(ReturnArg<1>());
+  ON_CALL(*this, exists(_)).WillByDefault(Return(false));
 }
 
 MockSnapshot::~MockSnapshot() = default;
diff --git a/test/mocks/runtime/mocks.h b/test/mocks/runtime/mocks.h
index 8722d06824ff..3ed731f903db 100644
--- a/test/mocks/runtime/mocks.h
+++ b/test/mocks/runtime/mocks.h
@@ -56,6 +56,7 @@ class MockSnapshot : public Snapshot {
                           const envoy::type::v3alpha::FractionalPercent& default_value,
                           uint64_t random_value));
   MOCK_CONST_METHOD1(get, const std::string&(const std::string& key));
+  MOCK_CONST_METHOD1(exists, bool(const std::string& key));
   MOCK_CONST_METHOD2(getInteger, uint64_t(const std::string& key, uint64_t default_value));
   MOCK_CONST_METHOD2(getDouble, double(const std::string& key, double default_value));
   MOCK_CONST_METHOD2(getBoolean, bool(absl::string_view key, bool default_value));
diff --git a/test/mocks/upstream/cluster_info.cc b/test/mocks/upstream/cluster_info.cc
index d58a91a9b5fb..6bc22cccec37 100644
--- a/test/mocks/upstream/cluster_info.cc
+++ b/test/mocks/upstream/cluster_info.cc
@@ -42,9 +42,9 @@ MockClusterInfo::MockClusterInfo()
       load_report_stats_(ClusterInfoImpl::generateLoadReportStats(load_report_stats_store_)),
       circuit_breakers_stats_(
           ClusterInfoImpl::generateCircuitBreakersStats(stats_store_, "default", true)),
-      resource_manager_(new Upstream::ResourceManagerImpl(runtime_, "fake_key", 1, 1024, 1024, 1,
-                                                          std::numeric_limits<uint64_t>::max(),
-                                                          circuit_breakers_stats_)) {
+      resource_manager_(new Upstream::ResourceManagerImpl(
+          runtime_, "fake_key", 1, 1024, 1024, 1, std::numeric_limits<uint64_t>::max(),
+          circuit_breakers_stats_, absl::nullopt, absl::nullopt)) {
   ON_CALL(*this, connectTimeout()).WillByDefault(Return(std::chrono::milliseconds(1)));
   ON_CALL(*this, idleTimeout()).WillByDefault(Return(absl::optional<std::chrono::milliseconds>()));
   ON_CALL(*this, name()).WillByDefault(ReturnRef(name_));
diff --git a/test/mocks/upstream/cluster_info.h b/test/mocks/upstream/cluster_info.h
index 05038a00d2f3..6a1c38ba1793 100644
--- a/test/mocks/upstream/cluster_info.h
+++ b/test/mocks/upstream/cluster_info.h
@@ -70,7 +70,16 @@ class MockClusterInfo : public ClusterInfo {
   void resetResourceManager(uint64_t cx, uint64_t rq_pending, uint64_t rq, uint64_t rq_retry,
                             uint64_t conn_pool) {
     resource_manager_ = std::make_unique<ResourceManagerImpl>(
-        runtime_, name_, cx, rq_pending, rq, rq_retry, conn_pool, circuit_breakers_stats_);
+        runtime_, name_, cx, rq_pending, rq, rq_retry, conn_pool, circuit_breakers_stats_,
+        absl::nullopt, absl::nullopt);
+  }
+
+  void resetResourceManagerWithRetryBudget(uint64_t cx, uint64_t rq_pending, uint64_t rq,
+                                           uint64_t rq_retry, uint64_t conn_pool,
+                                           double budget_percent, uint32_t min_retry_concurrency) {
+    resource_manager_ = std::make_unique<ResourceManagerImpl>(
+        runtime_, name_, cx, rq_pending, rq, rq_retry, conn_pool, circuit_breakers_stats_,
+        budget_percent, min_retry_concurrency);
   }
 
   // Upstream::ClusterInfo