diff --git a/api/aperture/policy/language/v1/flowcontrol.proto b/api/aperture/policy/language/v1/flowcontrol.proto index 9b022e5239..ae73673bef 100644 --- a/api/aperture/policy/language/v1/flowcontrol.proto +++ b/api/aperture/policy/language/v1/flowcontrol.proto @@ -281,7 +281,7 @@ message RateLimiter { // // :::info // -// See also [_Load Scheduler_ overview](/concepts/scheduler/load-scheduler.md). +// See also [_Load Scheduler_ overview](/concepts/request-prioritization/load-scheduler.md). // // ::: // @@ -416,7 +416,7 @@ message Scheduler { // :::info // // See also [workload definition in the concepts - // section](/concepts/scheduler/scheduler.md#workload). + // section](/concepts/scheduler.md#workload). // // ::: repeated Workload workloads = 1; // @gotags: validate:"dive" diff --git a/api/buf.lock b/api/buf.lock index a563a9bf42..f815d7c225 100644 --- a/api/buf.lock +++ b/api/buf.lock @@ -9,8 +9,8 @@ deps: - remote: buf.build owner: envoyproxy repository: envoy - commit: 53333dc2a8944f15a2e47288aebb54c0 - digest: shake256:c0423eef8c867a4a1f4c1f20d6f3b5a80aba61d610e66568ec90f3bd06a096541189e9de2a1da7bf6489eaed40015cbe760823d09882d7b12aecf896b7250c8d + commit: c941fabf06ef4a1c83625afcaa4b34e1 + digest: shake256:41c78a1709e5ee9fa85bd40d954f333b1a04880f10aaf5591e5a3af3c1eccbb43840e4cd96684619fada66d9a32e40a57efc34888c9ac7afe798df206f41102a - remote: buf.build owner: envoyproxy repository: protoc-gen-validate diff --git a/api/gen/proto/go/aperture/policy/language/v1/flowcontrol.pb.go b/api/gen/proto/go/aperture/policy/language/v1/flowcontrol.pb.go index 5e4fe4d4f7..851f2666fe 100644 --- a/api/gen/proto/go/aperture/policy/language/v1/flowcontrol.pb.go +++ b/api/gen/proto/go/aperture/policy/language/v1/flowcontrol.pb.go @@ -670,7 +670,7 @@ func (x *RateLimiter) GetOutPorts() *RateLimiter_Outs { // // :::info // -// See also [_Load Scheduler_ overview](/concepts/scheduler/load-scheduler.md). +// See also [_Load Scheduler_ overview](/concepts/request-prioritization/load-scheduler.md). // // ::: // @@ -794,7 +794,7 @@ type Scheduler struct { // :::info // // See also [workload definition in the concepts - // section](/concepts/scheduler/scheduler.md#workload). + // section](/concepts/scheduler.md#workload). // // ::: Workloads []*Scheduler_Workload `protobuf:"bytes,1,rep,name=workloads,proto3" json:"workloads,omitempty" validate:"dive"` // @gotags: validate:"dive" diff --git a/blueprints/gen/jsonschema/_definitions.json b/blueprints/gen/jsonschema/_definitions.json index 3b6dbbc227..2344737a80 100644 --- a/blueprints/gen/jsonschema/_definitions.json +++ b/blueprints/gen/jsonschema/_definitions.json @@ -2280,7 +2280,7 @@ "additionalProperties": false }, "LoadScheduler": { - "description": ":::info\n\nSee also [_Load Scheduler_ overview](/concepts/scheduler/load-scheduler.md).\n\n:::\n\nTo make scheduling decisions the Flows are mapped into Workloads by providing match rules.\nA workload determines the priority and cost of admitting each Flow that belongs to it.\nScheduling of Flows is based on Weighted Fair Queuing principles.\n\nThe signal at port `load_multiplier` determines the fraction of incoming tokens that get admitted. The signals gets acted on once every 10 seconds.", + "description": ":::info\n\nSee also [_Load Scheduler_ overview](/concepts/request-prioritization/load-scheduler.md).\n\n:::\n\nTo make scheduling decisions the Flows are mapped into Workloads by providing match rules.\nA workload determines the priority and cost of admitting each Flow that belongs to it.\nScheduling of Flows is based on Weighted Fair Queuing principles.\n\nThe signal at port `load_multiplier` determines the fraction of incoming tokens that get admitted. The signals gets acted on once every 10 seconds.", "properties": { "dry_run": { "description": "Decides whether to run the load scheduler in dry-run mode. In dry run mode the scheduler acts as pass through to all flow and does not queue flows.\nIt is useful for observing the behavior of load scheduler without disrupting any real traffic.", @@ -3608,7 +3608,7 @@ "type": "string" }, "workloads": { - "description": "List of workloads to be used in scheduler.\n\nCategorizing flows into workloads\nallows for load throttling to be \"intelligent\" instead of queueing flows in an arbitrary order.\nThere are two aspects of this \"intelligence\":\n* Scheduler can more precisely calculate concurrency if it understands\n that flows belonging to different classes have different weights (for example, insert queries compared to select queries).\n* Setting different priorities to different workloads lets the scheduler\n avoid dropping important traffic during overload.\n\nEach workload in this list specifies also a matcher that is used to\ndetermine which flow will be categorized into which workload.\nIn case of multiple matching workloads, the first matching one will be used.\nIf none of workloads match, `default_workload` will be used.\n\n:::info\n\nSee also [workload definition in the concepts\nsection](/concepts/scheduler/scheduler.md#workload).\n\n:::\n\n", + "description": "List of workloads to be used in scheduler.\n\nCategorizing flows into workloads\nallows for load throttling to be \"intelligent\" instead of queueing flows in an arbitrary order.\nThere are two aspects of this \"intelligence\":\n* Scheduler can more precisely calculate concurrency if it understands\n that flows belonging to different classes have different weights (for example, insert queries compared to select queries).\n* Setting different priorities to different workloads lets the scheduler\n avoid dropping important traffic during overload.\n\nEach workload in this list specifies also a matcher that is used to\ndetermine which flow will be categorized into which workload.\nIn case of multiple matching workloads, the first matching one will be used.\nIf none of workloads match, `default_workload` will be used.\n\n:::info\n\nSee also [workload definition in the concepts\nsection](/concepts/scheduler.md#workload).\n\n:::\n\n", "items": { "$ref": "#/definitions/SchedulerWorkload", "type": "object" diff --git a/docs/content/assets/openapiv2/aperture-controller.swagger.yaml b/docs/content/assets/openapiv2/aperture-controller.swagger.yaml index 8cce00e638..85fe01831b 100644 --- a/docs/content/assets/openapiv2/aperture-controller.swagger.yaml +++ b/docs/content/assets/openapiv2/aperture-controller.swagger.yaml @@ -2484,7 +2484,7 @@ definitions: description: |- :::info - See also [_Load Scheduler_ overview](/concepts/scheduler/load-scheduler.md). + See also [_Load Scheduler_ overview](/concepts/request-prioritization/load-scheduler.md). ::: @@ -3927,7 +3927,7 @@ definitions: :::info See also [workload definition in the concepts - section](/concepts/scheduler/scheduler.md#workload). + section](/concepts/scheduler.md#workload). ::: diff --git a/docs/content/assets/openapiv2/aperture.swagger.yaml b/docs/content/assets/openapiv2/aperture.swagger.yaml index e778e089b6..ad9885d0a6 100644 --- a/docs/content/assets/openapiv2/aperture.swagger.yaml +++ b/docs/content/assets/openapiv2/aperture.swagger.yaml @@ -3174,7 +3174,7 @@ definitions: description: |- :::info - See also [_Load Scheduler_ overview](/concepts/scheduler/load-scheduler.md). + See also [_Load Scheduler_ overview](/concepts/request-prioritization/load-scheduler.md). ::: @@ -4765,7 +4765,7 @@ definitions: :::info See also [workload definition in the concepts - section](/concepts/scheduler/scheduler.md#workload). + section](/concepts/scheduler.md#workload). ::: diff --git a/docs/content/concepts/advanced/advanced.md b/docs/content/concepts/advanced/advanced.md index 8d66da9a01..93ea5defea 100644 --- a/docs/content/concepts/advanced/advanced.md +++ b/docs/content/concepts/advanced/advanced.md @@ -1,6 +1,6 @@ --- title: Advanced -sidebar_position: 8 +sidebar_position: 10 --- ```mdx-code-block diff --git a/docs/content/concepts/advanced/agent-group.md b/docs/content/concepts/advanced/agent-group.md index b6c8df8fc0..24c377229a 100644 --- a/docs/content/concepts/advanced/agent-group.md +++ b/docs/content/concepts/advanced/agent-group.md @@ -45,7 +45,8 @@ details. synchronization. Agents within the same group form a peer-to-peer network to synchronize fine-grained per label counters. These counters are crucial for [rate-limiting](/concepts/rate-limiter.md) and for implementing global token - buckets used in [quota scheduling](/concepts/scheduler/quota-scheduler.md). + buckets used in + [quota scheduling](/concepts/request-prioritization/quota-scheduler.md). Additionally, all Agents within an agent group instantiate the same set of flow control components as defined in the [policies](/concepts/advanced/policy.md) running at the Controller. This diff --git a/docs/content/concepts/scheduler/assets/img/scheduler-dark.svg b/docs/content/concepts/assets/img/scheduler-dark.svg similarity index 100% rename from docs/content/concepts/scheduler/assets/img/scheduler-dark.svg rename to docs/content/concepts/assets/img/scheduler-dark.svg diff --git a/docs/content/concepts/scheduler/assets/img/scheduler-light.svg b/docs/content/concepts/assets/img/scheduler-light.svg similarity index 100% rename from docs/content/concepts/scheduler/assets/img/scheduler-light.svg rename to docs/content/concepts/assets/img/scheduler-light.svg diff --git a/docs/content/concepts/cache.md b/docs/content/concepts/cache.md index dd0ef776a0..307d570675 100644 --- a/docs/content/concepts/cache.md +++ b/docs/content/concepts/cache.md @@ -1,6 +1,6 @@ --- title: Cache -sidebar_position: 7 +sidebar_position: 9 --- Aperture's _Cache_ can be used to reduce the load on a service by caching the diff --git a/docs/content/concepts/concurrency-limiter.md b/docs/content/concepts/concurrency-limiter.md new file mode 100644 index 0000000000..67432af8ba --- /dev/null +++ b/docs/content/concepts/concurrency-limiter.md @@ -0,0 +1,53 @@ +--- +title: Concurrency Limiter +sidebar_position: 6 +--- + +:::info See also + +[_Concurrency Limiter_ reference][reference] + +::: + +The _Concurrency Limiter_ component is used to enforce in-flight request quotas +to prevent overloads. It can also be used to enforce limits per entity such as a +user to ensure fair access across users. Essentially, providing an added layer +of protection in additional to per-user rate limits. + +_Concurrency Limiter_ can limit the number of concurrent requests to a control +point or certain labels that match within the control point. It achieves this by +maintaining a ledger of in-flight requests. If the number of in-flight requests +exceeds the configured limit, the _Concurrency Limiter_ rejects new requests +until the number of in-flight requests drops below the limit. The in-flight +requests are maintained by the Agents based on the flow start and end calls made +from the SDKs. Alternatively, for proxy integrations, the flow end is inferred +as the access log stream is received from the underlying middleware or proxy. + +## Distributed Request Ledgers {#distributed-request-ledgers} + +For each configured [_Concurrency Limiter Component_][reference], every matching +Aperture Agent instantiates a copy of the _Concurrency Limiter_. Although each +agent has its own copy of the component, they all share the in-flight request +ledger through a distributed cache. This means that they work together as a +single _Concurrency Limiter_, providing seamless coordination and control across +Agents. The Agents within an [agent group][agent-group] constantly share state +and detect failures using a gossip protocol. + +## Lifecycle of a Request {#lifecycle-of-a-request} + +The _Concurrency Limiter_ maintains a ledger of in-flight requests. The ledger +is updated by the Agents based on the flow start and end calls made from the +SDKs. Alternatively, for proxy integrations, the flow end is inferred as the +access log stream is received from the underlying middleware or proxy. + +### Max In-flight Duration {#max-in-flight-duration} + +In case of failures at the SDK or middleware/proxy, the flow end call might not +be made. To prevent stale entries in the ledger, the _Concurrency Limiter_ +allows the definition of a maximum in-flight duration. This can be set according +to the maximum time a request is expected to take. If the request exceeds the +configured duration, it is automatically removed from the ledger by the +_Concurrency Limiter_. + +[reference]: /reference/configuration/spec.md#concurrency-limiter +[agent-group]: /concepts/selector.md#agent-group diff --git a/docs/content/concepts/control-point.md b/docs/content/concepts/control-point.md index 331ca492b9..686de22f8e 100644 --- a/docs/content/concepts/control-point.md +++ b/docs/content/concepts/control-point.md @@ -24,8 +24,23 @@ or configured when integrating with API Gateways or Service Meshes. To empower Aperture to act at any of the control points, integrations need to be installed to be able to interact with the Aperture Agent. Here are the two -primary types of control points: HTTP/gRPC control points and Feature Control -Points. +primary types of control points: Feature control points and HTTP/gRPC control +points. + +### Feature Control Points + +Feature control points are facilitated by the [Aperture SDKs](/sdk/sdk.md), +which are available for various popular programming languages. These SDKs allow +any function call or code snippet within the service code to be wrapped as a +feature control point. In Aperture's context, every execution of the feature is +seen as a flow. + +The SDK offers an API to initiate a flow, which corresponds to a +[`flowcontrol.v1.Check`][flowcontrol-proto] call into the Agent. The response +from this call comprises a decision on whether to accept or reject the flow. The +execution of a feature might be gated based on this decision. There is also an +API to end a flow, which creates an OpenTelemetry span representing the flow and +dispatches it to the Agent. ### HTTP/gRPC Control Points @@ -45,21 +60,6 @@ identified by [PatchContext](https://istio.io/latest/docs/reference/config/networking/envoy-filter/#EnvoyFilter-PatchContext) of Istio's EnvoyFilter CRD. -### Feature Control Points - -Feature control points are facilitated by the [Aperture SDKs](/sdk/sdk.md), -which are available for a variety of popular programming languages. These SDKs -allow any function call or code snippet within the service code to be wrapped as -a feature control point. In Aperture's context, every execution of the feature -is seen as a flow. - -The SDK offers an API to initiate a flow, which corresponds to a -[`flowcontrol.v1.Check`][flowcontrol-proto] call into the Agent. The response -from this call comprises a decision on whether to accept or reject the flow. The -execution of a feature might be gated based on this decision. There is also an -API to end a flow, which creates an OpenTelemetry span representing the flow and -dispatches it to the Agent. - ## Understanding Control Points diff --git a/docs/content/concepts/flow-label.md b/docs/content/concepts/flow-label.md index 15833b562d..b4545a861c 100644 --- a/docs/content/concepts/flow-label.md +++ b/docs/content/concepts/flow-label.md @@ -219,9 +219,9 @@ For _Classifier_ created labels, you can disable this behavior by setting [selectors]: ./selector.md [classifier]: ./advanced/classifier.md -[workload]: ./scheduler/scheduler.md#workload +[workload]: ./scheduler.md#workload [ratelimiter]: ./rate-limiter.md -[quota-scheduler]: ./scheduler/quota-scheduler.md +[quota-scheduler]: ./request-prioritization/quota-scheduler.md [flux-meter]: ./advanced/flux-meter.md [baggage]: https://www.w3.org/TR/baggage/#baggage-http-header-format [traces]: diff --git a/docs/content/concepts/flow-lifecycle.md b/docs/content/concepts/flow-lifecycle.md index 5865438e97..3eb2565ef8 100644 --- a/docs/content/concepts/flow-lifecycle.md +++ b/docs/content/concepts/flow-lifecycle.md @@ -13,8 +13,6 @@ keywords: import Zoom from 'react-medium-image-zoom'; ``` -## Flow Lifecycle - The lifecycle of a flow begins when a service initiates it, requesting a decision from the Aperture Agent. As the flow enters the Aperture Agent, it embarks on a journey through multiple stages before a final decision is made. @@ -62,19 +60,19 @@ components for that stage. regulating excessive requests in accordance with per-label limits. - **Caches** reduce the cost of operations and alleviate the load on constrained services by preventing duplicate requests to pay-per-use services. -- [**Schedulers**](./scheduler/scheduler.md) offer on-demand queuing based on a - token bucket algorithm, and prioritize requests using weighted fair queuing. +- [**Schedulers**](./scheduler.md) offer on-demand queuing based on a token + bucket algorithm, and prioritize requests using weighted fair queuing. Multiple matching schedulers can evaluate concurrently, with each having the power to drop a flow. There are two variants: - - The [**Load Scheduler**](./scheduler/load-scheduler.md) oversees the current - token rate in relation to the past token rate, adjusting as required based - on health signals from a service. This scheduler type facilitates active - service protection. - - The [**Quota Scheduler**](./scheduler/quota-scheduler.md) uses a global - token bucket as a ledger, managing the token distribution across all Agents. - It proves especially effective in environments with strict global rate - limits, as it allows for strategic prioritization of requests when reaching - quota limits. + - The [**Load Scheduler**](./request-prioritization/load-scheduler.md) + oversees the current token rate in relation to the past token rate, + adjusting as required based on health signals from a service. This scheduler + type facilitates active service protection. + - The [**Quota Scheduler**](./request-prioritization/quota-scheduler.md) uses + a global token bucket as a ledger, managing the token distribution across + all Agents. It proves especially effective in environments with strict + global rate limits, as it allows for strategic prioritization of requests + when reaching quota limits. After traversing these stages, the flow's decision is sent back to the initiating service. diff --git a/docs/content/concepts/rate-limiter.md b/docs/content/concepts/rate-limiter.md index 1623e78f03..7de25d94be 100644 --- a/docs/content/concepts/rate-limiter.md +++ b/docs/content/concepts/rate-limiter.md @@ -9,9 +9,10 @@ sidebar_position: 5 ::: -The _Rate Limiter_ component can be used to prevent recurring overloads by -proactively regulating heavy-hitters. It achieves this by accepting or rejecting -incoming flows based on per-label limits, which are configured using the +The _Rate Limiter_ component can be used to ensure fair access and manage costs +by regulating the number of requests made by an entity over time. It achieves +this by accepting or rejecting incoming requests based on per-label limits, +which are configured using the [token bucket algorithm](https://en.wikipedia.org/wiki/Token_bucket). The _Rate Limiter_ is a component of Aperture's [policy][policies] system, and @@ -84,13 +85,13 @@ inaccuracy within a (small) time window (sync interval). The _Rate Limiter_ component accepts or rejects incoming flows based on per-label limits, configured as the maximum number of requests per a given period of time. The rate-limiting label is chosen from the -[flow-label][flow-label] with a specific key, enabling distinct limits per user +[flow-label][flow-label] with a specific key, enabling distinct limits per-user as identified by unique values of the label. -:::tip +:::info -The limit value is provided as a signal within the circuit. It can be set -dynamically based on the circuit's logic. +Refer to the [Per-user Rate Limiting guide][guide] for more information on how +to use the _Rate Limiter_ using [aperture-js][aperture-js] SDK. ::: @@ -98,3 +99,5 @@ dynamically based on the circuit's logic. [agent-group]: /concepts/selector.md#agent-group [policies]: /concepts/advanced/policy.md [flow-label]: /concepts/flow-label.md +[guide]: /guides/per-user-rate-limiting.md +[aperture-js]: https://github.com/fluxninja/aperture-js diff --git a/docs/content/concepts/request-prioritization/concurrency-scheduler.md b/docs/content/concepts/request-prioritization/concurrency-scheduler.md new file mode 100644 index 0000000000..38eb26ab14 --- /dev/null +++ b/docs/content/concepts/request-prioritization/concurrency-scheduler.md @@ -0,0 +1,44 @@ +--- +title: Concurrency Scheduler +keywords: + - scheduler + - concurrency + - queuing +sidebar_position: 2 +--- + +:::info See Also + +Concurrency Scheduler +[Reference](/reference/configuration/spec.md#concurrency-scheduler) + +::: + +The _Concurrency Scheduler_ is used to schedule requests based on importance +while ensuring that the application adheres to concurrency limits. + +The _Concurrency Scheduler_ can be thought of as a combination of a +[_Scheduler_](../scheduler.md) and a +[_Concurrency Limiter_](../concurrency-limiter.md). It essentially provides +scheduling capabilities atop a _Concurrency Limiter_. Similar to the +_Concurrency Limiter_, this component takes `max_concurrency` as an input port +which determines the maximum number of in-flight requests in the global request +ledger. + +The global request ledger is shared among Agents in an +[agent group](../advanced/agent-group.md). This ledger records the total number +of in-flight requests across the Agents. If the ledger exceeds the configured +`max_concurrency`, new requests are queued until the number of in-flight +requests drops below the limit or +[until timeout](../scheduler.md#queue-timeout). + +In a scenario where the maximum concurrency is known upfront, the _Concurrency +Scheduler_ becomes particularly beneficial to enforce concurrency limits on a +per-service basis. + +The _Concurrency Scheduler_ also allows the definition of +[workloads](../scheduler.md#workload), a property of the scheduler, which allows +for strategic prioritization of requests when faced with concurrency +constraints. As a result, the _Concurrency Scheduler_ ensures adherence to the +concurrency limits and simultaneously offers a mechanism to prioritize requests +based on their importance. diff --git a/docs/content/concepts/scheduler/load-scheduler.md b/docs/content/concepts/request-prioritization/load-scheduler.md similarity index 89% rename from docs/content/concepts/scheduler/load-scheduler.md rename to docs/content/concepts/request-prioritization/load-scheduler.md index a0b451f70c..5e5ae137ec 100644 --- a/docs/content/concepts/scheduler/load-scheduler.md +++ b/docs/content/concepts/request-prioritization/load-scheduler.md @@ -4,7 +4,7 @@ keywords: - scheduler - service protection - queuing -sidebar_position: 1 +sidebar_position: 3 --- :::info See Also @@ -17,16 +17,11 @@ The _Load Scheduler_ is used to throttle request rates dynamically during high load, therefore protecting services from overloads and cascading failures. It uses a local token bucket for estimating the allowed token rate. The fill rate of the token bucket gets adjusted by the controller based on the specified -policy. Since this component builds upon the [_Scheduler_](./scheduler.md), it +policy. Since this component builds upon the [_Scheduler_](../scheduler.md), it allows defining workloads along with their priority and tokens. The scheduler employs weighted fair queuing of requests to achieve graceful degradation of applications. -This diagram illustrates the working of a load scheduler. - -![Scheduler](./assets/img/load-scheduler-light.svg#gh-light-mode-only) -![Scheduler](./assets/img/load-scheduler-dark.svg#gh-dark-mode-only) - The _Load Scheduler_'s throttling behavior is controlled by the signal at its `load_multiplier` input port. As the policy circuit adjusts the signal at the load multiplier port, it gets translated to the token refill rate at the Agents. @@ -45,9 +40,8 @@ algorithm. Any request that fails to be scheduled within its designated timeout is rejected. Additionally, by defining workloads with varying priorities and weights -(tokens), the load scheduler (and others mentioned below) can prioritize certain -requests over others, facilitating graceful service degradation during -high-traffic periods. +(tokens), the load scheduler can prioritize certain requests over others, +facilitating graceful service degradation during high-traffic periods. ## Load Schedulers {#load-schedulers} diff --git a/docs/content/concepts/scheduler/quota-scheduler.md b/docs/content/concepts/request-prioritization/quota-scheduler.md similarity index 60% rename from docs/content/concepts/scheduler/quota-scheduler.md rename to docs/content/concepts/request-prioritization/quota-scheduler.md index 2e0daca9a5..f8a0736aa6 100644 --- a/docs/content/concepts/scheduler/quota-scheduler.md +++ b/docs/content/concepts/request-prioritization/quota-scheduler.md @@ -5,7 +5,7 @@ keywords: - service protection - queuing - quota -sidebar_position: 2 +sidebar_position: 1 --- :::info See Also @@ -18,32 +18,38 @@ The _Quota Scheduler_ is used to schedule requests based on importance while ensuring that the application adheres to third-party API rate limits or inter-service API quotas. -This diagram illustrates the working of a quota scheduler. - -![Scheduler](./assets/img/quota-scheduler-light.svg#gh-light-mode-only) -![Scheduler](./assets/img/quota-scheduler-dark.svg#gh-dark-mode-only) - The _Quota Scheduler_ can be thought of as a combination of a -[_Scheduler_](./scheduler.md) and a [_Rate Limiter_](../rate-limiter.md). It +[_Scheduler_](../scheduler.md) and a [_Rate Limiter_](../rate-limiter.md). It essentially provides scheduling capabilities atop a _Rate Limiter_. In the policy circuit, this component takes the same input ports as a _Rate Limiter_, namely `fill_rate` and `bucket_capacity`. These ports facilitate adjustment of the global token bucket, which can be used to model an API quota or rate limit. -The token bucket is used as a shared ledger for Agents in an + +The token bucket represents a fixed quota that is divided among the Agents. It +is used as a shared ledger for Agents in an [agent group](../advanced/agent-group.md). This ledger records the total -available tokens that can be distributed across the Agents. +available tokens that can be distributed across the Agents. Tokens are consumed +from it when admitting requests. If the ledger runs out of tokens, new requests +are queued until more tokens become available or +[until timeout](../scheduler.md#queue-timeout). In a scenario where the token fill rate and bucket capacity (API quota) is known upfront, the _Quota Scheduler_ becomes particularly beneficial to enforce -client-side rate limits. The tokens represent a fixed quota that is divided -among the Agents. Each agent has access to this global ledger and consumes -tokens from it when admitting requests. If the ledger runs out of tokens, new -requests are queued until more tokens become available or -[until timeout](./scheduler.md#queue-timeout). +client-side rate limits. The _Quota Scheduler_ also allows the definition of -[workloads](./scheduler.md#workload), a property of the scheduler, which allows +[workloads](../scheduler.md#workload), a property of the scheduler, which allows for strategic prioritization of requests when faced with quota constraints. As a result, the _Quota Scheduler_ ensures adherence to the API's rate limits and simultaneously offers a mechanism to prioritize requests based on their importance. + +:::info + +Refer to the [API Quota Management guide][guide] for more information on how to +use the _Quota Scheduler_ using [aperture-js][aperture-js] SDK. + +::: + +[guide]: /guides/api-quota-management.md +[aperture-js]: https://github.com/fluxninja/aperture-js diff --git a/docs/content/concepts/request-prioritization/request-prioritization.md b/docs/content/concepts/request-prioritization/request-prioritization.md new file mode 100644 index 0000000000..4068947f5f --- /dev/null +++ b/docs/content/concepts/request-prioritization/request-prioritization.md @@ -0,0 +1,10 @@ +--- +title: Request Prioritization +sidebar_position: 8 +--- + +```mdx-code-block +import DocCardList from '@theme/DocCardList'; +``` + + diff --git a/docs/content/concepts/scheduler.md b/docs/content/concepts/scheduler.md new file mode 100644 index 0000000000..57a01f407c --- /dev/null +++ b/docs/content/concepts/scheduler.md @@ -0,0 +1,114 @@ +--- +title: Scheduler +keywords: + - scheduler + - tokens + - priority + - queuing + - actuators +sidebar_position: 7 +--- + +:::info See Also + +Scheduler [Reference](/reference/configuration/spec.md#scheduler) + +::: + +## Overview {#overview} + +Schedulers provide a mechanism for prioritizing requests based on importance +when the service capacity is limited. Scheduling of requests is based on a +[weighted fair queuing](https://en.wikipedia.org/wiki/Weighted_fair_queueing) +algorithm that ensures equitable resource allocation across workloads, factoring +in the [priorities](#priority) and [tokens](#tokens) (weight) of each request. + +Service capacity limits can be determined based on one of the following +techniques: + +1. [Quota Scheduling](./request-prioritization/quota-scheduler.md): Global + [token buckets](https://en.wikipedia.org/wiki/Token_bucket) are used to track + the request rate quota. The limit is based on a known limit, such as + third-party API rate limits or inter-service API quotas. +2. [Concurrency Scheduling](./request-prioritization/load-scheduler.md): Global + token counters are used to track the concurrency. The limit is set based on + the concurrent processing capacity of the service. +3. [Load Scheduling](./request-prioritization/load-scheduler.md): Uses a token + bucket local to each agent, which gets adjusted based on the past token rate + at the agent. The limit is adjusted based on load at the service, such as + CPU, queue length or response latency. + +Each request to the scheduler seeks tokens from the underlying token bucket. If +tokens are available, the request gets admitted. If tokens are not readily +available, requests are queued, waiting either until tokens become accessible or +until a [timeout](#queue-timeout) occurs. + +This diagram illustrates the working of a scheduler for workload prioritization. + +![Scheduler](./assets/img/scheduler-light.svg#gh-light-mode-only) +![Scheduler](./assets/img/scheduler-dark.svg#gh-dark-mode-only) + +### Workload {#workload} + +Workloads group requests based on common [_Flow Labels_](./flow-label.md). +Developers can send the workload parameters as flow labels using Aperture SDKs. +The label keys used to identify workloads are configurable. See +[\*\_label_key parameters](/reference/configuration/spec.md#scheduler) + +Alternately, a list of +[Workloads](/reference/configuration/spec.md#scheduler-workload) can be defined +inside the Scheduler specification using [label matcher][label-matcher] rules. + +### Priority {#priority} + +Priority represents the importance of a request compared to the other requests +in the queue. It varies from 0 to any positive number, indicating the urgency +level, with higher numbers denoting higher priority. The position of a flow in +the queue is computed based on its virtual finish time using the following +formula: + +$$ +inverted\_priority = {\frac {1} {priority}} +$$ + +$$ +virtual\_finish\_time = virtual\_time + \left(tokens \cdot inverted\_priority\right) +$$ + +### Tokens {#tokens} + +Tokens represent the cost for admitting a specific request. They can be defined +either through request labels or through workload definition inside a policy. If +not specified, the default token value is assumed to be 1 for each request, thus +representing the number of requests. Estimating tokens accurately for each +request helps fairer flow control decisions. + +Tokens are determined in the following order of precedence: + +- Specified in the flow labels. +- Estimated tokens (see + [`workload_latency_based_tokens`](/reference/configuration/spec.md#load-scheduler) + setting). +- Specified in the `Workload.tokens` setting. + +### Queue Timeout {#queue-timeout} + +The queue timeout is determined by the gRPC timeout provided on the +[`flowcontrol.v1.Check`][flowcontrol-proto] call. When a request is made, it +includes a timeout value that specifies the maximum duration the request can +wait in the queue. If the request receives the necessary tokens within this +timeout duration, it is admitted. Otherwise, the timeout expires before the +tokens are available, the request is rejected. Thus, the timeout prevents +requests from waiting excessively long. + +Developers can set the gRPC timeout on each `startFlow` call. In case of +middlewares, gRPC timeout is configured statically specific to each middleware +integration, e.g. through Envoy filter. + +The timeout can also be configured using the `queue_timeout` parameter in the +[workload parameters](/reference/configuration/spec#scheduler-workload-parameters). +The smaller of the two timeouts is used. + +[label-matcher]: ./selector.md#label-matcher +[flowcontrol-proto]: + https://buf.build/fluxninja/aperture/docs/main:aperture.flowcontrol.check.v1 diff --git a/docs/content/concepts/scheduler/assets/img/load-scheduler-dark.svg b/docs/content/concepts/scheduler/assets/img/load-scheduler-dark.svg deleted file mode 100644 index f8aa65d7fb..0000000000 --- a/docs/content/concepts/scheduler/assets/img/load-scheduler-dark.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/docs/content/concepts/scheduler/assets/img/load-scheduler-light.svg b/docs/content/concepts/scheduler/assets/img/load-scheduler-light.svg deleted file mode 100644 index 4f03d5094a..0000000000 --- a/docs/content/concepts/scheduler/assets/img/load-scheduler-light.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/docs/content/concepts/scheduler/assets/img/quota-scheduler-dark.svg b/docs/content/concepts/scheduler/assets/img/quota-scheduler-dark.svg deleted file mode 100644 index b8990bb327..0000000000 --- a/docs/content/concepts/scheduler/assets/img/quota-scheduler-dark.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/docs/content/concepts/scheduler/assets/img/quota-scheduler-light.svg b/docs/content/concepts/scheduler/assets/img/quota-scheduler-light.svg deleted file mode 100644 index b172f55973..0000000000 --- a/docs/content/concepts/scheduler/assets/img/quota-scheduler-light.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/docs/content/concepts/scheduler/scheduler.md b/docs/content/concepts/scheduler/scheduler.md deleted file mode 100644 index 75d135bf05..0000000000 --- a/docs/content/concepts/scheduler/scheduler.md +++ /dev/null @@ -1,120 +0,0 @@ ---- -title: Scheduler -keywords: - - scheduler - - tokens - - priority - - queuing - - actuators -sidebar_position: 6 ---- - -:::info See Also - -Scheduler [Reference](/reference/configuration/spec.md#scheduler) - -::: - -## Overview {#overview} - -Schedulers provide a mechanism for throttling and scheduling requests based on -importance when service resources are limited. The throttling is achieved -through [token buckets](https://en.wikipedia.org/wiki/Token_bucket). To gain -admittance, each request must obtain tokens from the bucket. When tokens are -depleted, incoming requests enter a queue, awaiting admittance based on a -[weighted fair queuing](https://en.wikipedia.org/wiki/Weighted_fair_queueing) -algorithm. This algorithm ensures equitable resource allocation across -workloads, factoring in the priority and weight (tokens) of each request. - -This diagram illustrates the working of a scheduler for workload prioritization. - -![Scheduler](./assets/img/scheduler-light.svg#gh-light-mode-only) -![Scheduler](./assets/img/scheduler-dark.svg#gh-dark-mode-only) - -Aperture offers two variants of scheduler: -[_Load Scheduler_](./load-scheduler.md) and -[_Quota Scheduler_](./quota-scheduler.md). While both use the same weighted fair -queuing-based scheduling algorithm, they differ in the throttling mechanism by -employing distinct types of token buckets. The _Load Scheduler_ uses a token -bucket local to each agent, which gets adjusted based on the past token rate at -the agent. This is useful for service protection scenarios since it provides a -robust mechanism to relatively adjust the token rate. The _Quota Scheduler_, -uses a centralized token bucket within an -[agent group](../advanced/agent-group.md). This is useful for scenarios -involving known limits, such as third-party API rate limits or inter-service API -quotas. - -### Workload {#workload} - -Workloads are groups of requests based on common -[_Flow Labels_](../flow-label.md). Workloads are expressed by [label -matcher][label-matcher] rules in the _Scheduler_ definition. Aperture Agents -schedule workloads based on their [priorities](#priority) and [tokens](#tokens). - -### Priority {#priority} - -Priority represents the importance of a request compared to the other requests -in the queue. It varies from 0 to an unlimited positive integer, indicating the -urgency level, with higher numbers denoting higher priority. The position of a -flow in the queue is computed based on its virtual finish time using the -following formula: - -$$ -inverted\_priority = {\frac {1} {priority}} -$$ - -$$ -virtual\_finish\_time = virtual\_time + \left(tokens \cdot inverted\_priority\right) -$$ - -To manage prioritized requests, the scheduler seeks tokens from the token -bucket. If tokens are available, the request gets admitted. In cases where -tokens are not readily available, requests are queued, waiting either until -tokens become accessible or until a timeout occurs - the latter being dependent -on the workload or [`flowcontrol.v1.Check`][flowcontrol-proto] call timeout. - -### Tokens {#tokens} - -Tokens represent the cost for admitting a specific request. Typically, tokens -are based on the estimated response time of a request. Estimating the number of -tokens for each request within a workload is critical for making effective flow -control decisions. - -Aperture can automatically estimate the tokens for each workload based on -historical latency measurements. See the `workload_latency_based_tokens` -[configuration](/reference/configuration/spec.md#load-scheduler-parameters) for -more details. The latency based token calculation is aligned with -[Little's Law](https://en.wikipedia.org/wiki/Little%27s_law), which relates -response times, arrival rate, and the system concurrency (number of in-flight -requests). - -Alternatively, tokens can also be represented as the number of requests instead -of response times. For example, when scheduling access to external APIs that -have strict rate limits (global quota). In this case, the number of tokens -represents the number of requests that can be made to the API within a given -time window. - -Tokens are determined in the following order of precedence: - -- Specified in the flow labels. -- Estimated tokens (see - [`workload_latency_based_tokens`](/reference/configuration/spec.md#load-scheduler) - setting). -- Specified in the `Workload.tokens` setting. - -### Queue Timeout {#queue-timeout} - -The queue timeout is determined by the gRPC timeout provided on the -[`flowcontrol.v1.Check`][flowcontrol-proto] call. When a request is made, it -includes a timeout value that specifies the maximum duration the request can -wait in the queue. If the request receives the necessary tokens within this -timeout duration, it is admitted. Otherwise, if the timeout expires before the -tokens are available, the request is rejected. - -The gRPC timeout on the [`flowcontrol.v1.Check`][flowcontrol-proto] call is set -in the Envoy filter and the SDK during initialization. It serves as an upper -bound on the queue timeout, preventing requests from waiting excessively long. - -[label-matcher]: ../selector.md#label-matcher -[flowcontrol-proto]: - https://buf.build/fluxninja/aperture/docs/main:aperture.flowcontrol.check.v1 diff --git a/docs/content/concepts/selector.md b/docs/content/concepts/selector.md index 66ca16ff3e..22e328711b 100644 --- a/docs/content/concepts/selector.md +++ b/docs/content/concepts/selector.md @@ -297,7 +297,7 @@ label_matcher: [label]: ./flow-label.md [flux-meter]: ./advanced/flux-meter.md -[load-scheduler]: ./scheduler/load-scheduler.md +[load-scheduler]: ./request-prioritization/load-scheduler.md [classifier]: ./advanced/classifier.md [label-matcher]: /reference/configuration/spec.md#label-matcher [gateway]: /aperture-for-infra/integrations/gateway/gateway.md diff --git a/docs/content/faq.md b/docs/content/faq.md index 582fab81ff..0f2fb9c90e 100644 --- a/docs/content/faq.md +++ b/docs/content/faq.md @@ -109,8 +109,8 @@ The following results were observed: | Istio Proxy | 1.81 mean, 2.11 max | 12.5 mean, 20.8 max | [rate-limiter]: /concepts/rate-limiter.md -[load-scheduler]: /concepts/scheduler/load-scheduler.md -[scheduler]: /concepts/scheduler/scheduler.md +[load-scheduler]: /concepts/request-prioritization/load-scheduler.md +[scheduler]: /concepts/scheduler.md [flux-meter]: /concepts/advanced/flux-meter.md [classifier]: /concepts/advanced/classifier.md [flow-label]: /concepts/flow-label.md diff --git a/docs/content/introduction.md b/docs/content/introduction.md index 55e9652897..753f123c37 100644 --- a/docs/content/introduction.md +++ b/docs/content/introduction.md @@ -50,25 +50,26 @@ To sign-up to Aperture Cloud, [click here][sign-up]. Create precise rate limiters controlling burst-capacity and fill-rate tailored to business-specific labels. Refer to the [Rate Limiting](guides/per-user-rate-limiting.md) guide for more details. -- 📊 [**API Quota Management**](concepts/scheduler/quota-scheduler.md): Maintain - compliance with external API quotas with a global token bucket and smart - request queuing. This feature regulates requests aimed at external services, - ensuring that the usage remains within prescribed rate limits and avoids - penalties or additional costs. Refer to the +- 📊 + [**API Quota Management**](concepts/request-prioritization/quota-scheduler.md): + Maintain compliance with external API quotas with a global token bucket and + smart request queuing. This feature regulates requests aimed at external + services, ensuring that the usage remains within prescribed rate limits and + avoids penalties or additional costs. Refer to the [API Quota Management](guides/api-quota-management.md) guide for more details. -- 🛡️ [**Adaptive Queuing**](concepts/scheduler/load-scheduler.md): Enhance - resource utilization and safeguard against abrupt service overloads with an - intelligent queue at the entry point of services. This queue dynamically - adjusts the rate of requests based on live service health, thereby mitigating - potential service disruptions and ensuring optimal performance under all load - conditions. Refer to the +- 🛡️ [**Adaptive Queuing**](concepts/request-prioritization/load-scheduler.md): + Enhance resource utilization and safeguard against abrupt service overloads + with an intelligent queue at the entry point of services. This queue + dynamically adjusts the rate of requests based on live service health, thereby + mitigating potential service disruptions and ensuring optimal performance + under all load conditions. Refer to the [Service Load Management](aperture-for-infra/guides/service-load-management/service-load-management.md) and [Database Load Management](aperture-for-infra/guides/database-load-management/database-load-management.md) guides for more details. -- 🎯 [**Workload Prioritization**](concepts/scheduler/scheduler.md): Safeguard - crucial user experience pathways and ensure prioritized access to external - APIs by strategically prioritizing workloads. With +- 🎯 [**Workload Prioritization**](concepts/scheduler.md): Safeguard crucial + user experience pathways and ensure prioritized access to external APIs by + strategically prioritizing workloads. With [weighted fair queuing](https://en.wikipedia.org/wiki/Weighted_fair_queueing), Aperture aligns resource distribution with business value and urgency of requests. Workload prioritization applies to API Quota Management and Adaptive diff --git a/docs/content/reference/blueprints/load-scheduling/average-latency.md b/docs/content/reference/blueprints/load-scheduling/average-latency.md index 7b1e414367..6f040faf5d 100644 --- a/docs/content/reference/blueprints/load-scheduling/average-latency.md +++ b/docs/content/reference/blueprints/load-scheduling/average-latency.md @@ -27,12 +27,12 @@ At a high level, this policy works as follows: prevents accepting all the traffic at once after an overload, which can again lead to an overload. - Load Scheduler: The accepted token rate at the service is throttled by a - [weighted-fair queuing scheduler](/concepts/scheduler/scheduler.md). The - output of the adjustments to accepted token rate made by gradient controller - and optimizer logic are translated to a load multiplier that is synchronized - with Aperture Agents through etcd. The load multiplier adjusts (increases or - decreases) the token bucket fill rates based on the incoming token rate - observed at each agent. + [weighted-fair queuing scheduler](/concepts/scheduler.md). The output of the + adjustments to accepted token rate made by gradient controller and optimizer + logic are translated to a load multiplier that is synchronized with Aperture + Agents through etcd. The load multiplier adjusts (increases or decreases) the + token bucket fill rates based on the incoming token rate observed at each + agent. The following PromQL query (with appropriate filters) is used as `SIGNAL` for the load scheduler: diff --git a/docs/content/reference/configuration/spec.md b/docs/content/reference/configuration/spec.md index e3c54071c7..86b8a53a05 100644 --- a/docs/content/reference/configuration/spec.md +++ b/docs/content/reference/configuration/spec.md @@ -5653,7 +5653,8 @@ active service protection :::info -See also [_Load Scheduler_ overview](/concepts/scheduler/load-scheduler.md). +See also +[_Load Scheduler_ overview](/concepts/request-prioritization/load-scheduler.md). ::: @@ -8878,7 +8879,7 @@ workloads, the first matching one will be used. If none of workloads match, :::info See also -[workload definition in the concepts section](/concepts/scheduler/scheduler.md#workload). +[workload definition in the concepts section](/concepts/scheduler.md#workload). ::: diff --git a/docs/content/reference/observability/prometheus-metrics/agent.md b/docs/content/reference/observability/prometheus-metrics/agent.md index b696fdd43d..7f84192292 100644 --- a/docs/content/reference/observability/prometheus-metrics/agent.md +++ b/docs/content/reference/observability/prometheus-metrics/agent.md @@ -56,7 +56,7 @@ This document describes the Prometheus metrics generated by Aperture Agents. | Name | Example | Description | | -------------- | -------------------------------------------- | ----------------------------------------------------------------- | -| agent_group | default | Agent Group of the policy that Flux Meter belongs to | +| agent_group | default | Agent Group of the policy that Load Scheduler belongs to | | instance | aperture-agent-cbfnp | Host instance of the Aperture Agent | | job | aperture-self | The configured job name that the target belongs to | | process_uuid | dc0e82af-6730-4f70-8228-ee91da53ac5f | Host instance's UUID | @@ -85,7 +85,37 @@ This document describes the Prometheus metrics generated by Aperture Agents. | Name | Example | Description | | --------------- | ---------------------------------------------- | ----------------------------------------------------------------- | -| agent_group | default | Agent Group of the policy that FluxMeter belongs to | +| agent_group | default | Agent Group of the policy that Rate Limiter belongs to | +| instance | aperture-agent-cbfnp | Host instance of the Aperture Agent | +| job | aperture-self | The configured job name that the target belongs to | +| process_uuid | dc0e82af-6730-4f70-8228-ee91da53ac5f | Host instance's UUID | +| policy_name | service1-demo-app | Name of the policy. | +| policy_hash | 5kZjjSgDAtGWmLnDT67SmQhZdHVmz0+GvKcOGTfWMVo= | Hash of the policy used for checking the integrity of the policy. | +| component_id | 13 | Index of the component in order of specification in the policy. | +| decision_type | DECISION_TYPE_ACCEPTED, DECISION_TYPE_REJECTED | Whether the flow was accepted or not | +| limiter_dropped | true, false | Whether this particular limiter has dropped the request. | + + + +## Concurrency Limiter + +### Metrics + + + +| Name | Type | Labels | Unit | Description | +| --------------------------------- | ------- | ---------------------------------------------------------------------------------------------------------------- | --------------- | ------------------------------------------------------------------------- | +| concurrency_limiter_counter_total | Counter | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, decision_type, limiter_dropped | count (no unit) | A counter measuring the number of times Concurrency Limiter was triggered | + + + +### Labels + + + +| Name | Example | Description | +| --------------- | ---------------------------------------------- | ----------------------------------------------------------------- | +| agent_group | default | Agent Group of the policy that Concurrency Limiter belongs to | | instance | aperture-agent-cbfnp | Host instance of the Aperture Agent | | job | aperture-self | The configured job name that the target belongs to | | process_uuid | dc0e82af-6730-4f70-8228-ee91da53ac5f | Host instance's UUID | @@ -115,7 +145,7 @@ This document describes the Prometheus metrics generated by Aperture Agents. | Name | Example | Description | | --------------- | ---------------------------------------------- | ----------------------------------------------------------------- | -| agent_group | default | Agent Group of the policy that FluxMeter belongs to | +| agent_group | default | Agent Group of the policy that Sampler belongs to | | instance | aperture-agent-cbfnp | Host instance of the Aperture Agent | | job | aperture-self | The configured job name that the target belongs to | | process_uuid | dc0e82af-6730-4f70-8228-ee91da53ac5f | Host instance's UUID | @@ -145,7 +175,7 @@ This document describes the Prometheus metrics generated by Aperture Agents. | Name | Example | Description | | ---------------- | -------------------------------------------- | ----------------------------------------------------------------- | -| agent_group | default | Agent Group of the policy that FluxMeter belongs to | +| agent_group | default | Agent Group of the policy that Classifier belongs to | | instance | aperture-agent-cbfnp | Host instance of the Aperture Agent | | job | aperture-self | The configured job name that the target belongs to | | process_uuid | dc0e82af-6730-4f70-8228-ee91da53ac5f | Host instance's UUID | @@ -161,11 +191,12 @@ This document describes the Prometheus metrics generated by Aperture Agents. -| Name | Type | Labels | Unit | Description | -| -------------------------------- | ------- | ------------------------------------------------------- | --------------- | ----------------------------------------------- | -| flowcontrol_requests_total | Counter | agent_group, instance, job, process_uuid | count (no unit) | Total number of aperture check requests handled | -| flowcontrol_decisions_total | Counter | agent_group, instance, job, process_uuid, decision_type | count (no unit) | Number of aperture check decisions | -| flowcontrol_reject_reasons_total | Counter | agent_group, instance, job, process_uuid, reject_reason | count (no unit) | Number of reject reasons other than unspecified | +| Name | Type | Labels | Unit | Description | +| -------------------------------- | ------- | ---------------------------------------------------------------------- | --------------- | ----------------------------------------------- | +| flowcontrol_requests_total | Counter | control_point, agent_group, instance, job, process_uuid | count (no unit) | Total number of aperture check requests handled | +| flowcontrol_decisions_total | Counter | control_point, agent_group, instance, job, process_uuid, decision_type | count (no unit) | Number of aperture check decisions | +| flowcontrol_reject_reasons_total | Counter | control_point, agent_group, instance, job, process_uuid, reject_reason | count (no unit) | Number of reject reasons other than unspecified | +| flowcontrol_ends_total | Counter | control_point, agent_group, instance, job, process_uuid | count (no unit) | Total number of flow end calls handled | @@ -175,7 +206,7 @@ This document describes the Prometheus metrics generated by Aperture Agents. | Name | Example | Description | | ------------- | ------------------------------------------------------------------------------ | -------------------------------------------------------- | -| agent_group | default | Agent Group of the policy that FluxMeter belongs to | +| agent_group | default | Agent Group of the Aperture Agent | | instance | aperture-agent-cbfnp | Host instance of the Aperture Agent | | job | aperture-self | The configured job name that the target belongs to | | process_uuid | dc0e82af-6730-4f70-8228-ee91da53ac5f | Host instance's UUID | @@ -205,7 +236,7 @@ This document describes the Prometheus metrics generated by Aperture Agents. | Name | Example | Description | | --------------------- | ------------------------------------ | --------------------------------------------------------- | -| agent_group | default | Agent Group of the policy that FluxMeter belongs to | +| agent_group | default | Agent Group of the Aperture Agent | | instance | aperture-agent-cbfnp | Host instance of the Aperture Agent | | job | aperture-self | The configured job name that the target belongs to | | process_uuid | dc0e82af-6730-4f70-8228-ee91da53ac5f | Host instance's UUID | @@ -240,7 +271,7 @@ This document describes the Prometheus metrics generated by Aperture Agents. | Name | Example | Description | | --------------- | ---------------------------------------------- | ----------------------------------------------------------------- | -| agent_group | default | Agent Group of the policy that Flux Meter belongs to | +| agent_group | default | Agent Group of the policy that Scheduler belongs to | | instance | aperture-agent-cbfnp | Host instance of the Aperture Agent | | job | aperture-self | The configured job name that the target belongs to | | process_uuid | dc0e82af-6730-4f70-8228-ee91da53ac5f | Host instance's UUID | diff --git a/docs/gen/policy/policy.yaml b/docs/gen/policy/policy.yaml index 48e8095ba8..f10704bde2 100644 --- a/docs/gen/policy/policy.yaml +++ b/docs/gen/policy/policy.yaml @@ -2418,7 +2418,7 @@ definitions: description: |- :::info - See also [_Load Scheduler_ overview](/concepts/scheduler/load-scheduler.md). + See also [_Load Scheduler_ overview](/concepts/request-prioritization/load-scheduler.md). ::: @@ -3841,7 +3841,7 @@ definitions: :::info See also [workload definition in the concepts - section](/concepts/scheduler/scheduler.md#workload). + section](/concepts/scheduler.md#workload). ::: diff --git a/pkg/policies/flowcontrol/service/check/metrics.go b/pkg/policies/flowcontrol/service/check/metrics.go index 5d256d2562..d43a36343a 100644 --- a/pkg/policies/flowcontrol/service/check/metrics.go +++ b/pkg/policies/flowcontrol/service/check/metrics.go @@ -52,8 +52,7 @@ type PrometheusMetrics struct { registry *prometheus.Registry // Flow control service metrics - // TODO: 3 gauges for 3 types of flowcontrol decisions - checkReceivedTotal prometheus.Counter + checkReceivedTotal prometheus.CounterVec checkDecision prometheus.CounterVec rejectReason prometheus.CounterVec flowEndTotal prometheus.CounterVec @@ -75,11 +74,11 @@ func (pm *PrometheusMetrics) allMetrics() []prometheus.Collector { func NewPrometheusMetrics(registry *prometheus.Registry) (*PrometheusMetrics, error) { pm := &PrometheusMetrics{ registry: registry, - checkReceivedTotal: prometheus.NewCounter( + checkReceivedTotal: *prometheus.NewCounterVec( prometheus.CounterOpts{ Name: metrics.FlowControlRequestsMetricName, Help: "Total number of aperture check requests handled", - }, + }, []string{metrics.ControlPointLabel, metrics.AgentGroupLabel}, ), checkDecision: *prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -119,7 +118,10 @@ func (pm *PrometheusMetrics) CheckResponse( controlPoint string, agentInfo *agentinfo.AgentInfo, ) { - pm.checkReceivedTotal.Inc() + pm.checkReceivedTotal.With(prometheus.Labels{ + metrics.ControlPointLabel: controlPoint, + metrics.AgentGroupLabel: agentInfo.GetAgentGroup(), + }).Inc() pm.checkDecision.With(prometheus.Labels{ metrics.ControlPointLabel: controlPoint, metrics.FlowControlCheckDecisionTypeLabel: decision.Enum().String(),