Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fairness #3134

Merged
merged 9 commits into from
Jan 10, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ repos:
- id: codespell
args:
- "-L inport,atleast,iteraction"
- "--ignore-words=./.github/styles/Vocab/FluxNinja/accept.txt"
- "--ignore-words=./.github/styles/config/vocabularies/FluxNinja/accept.txt"
exclude: ^(.*\.svg|yarn.lock|go.sum|.*/go.sum|.circleci/.*|docs/content/reference/api/.*|api/gen/.*|sdks/aperture-java/lib/core/src/main/java/com/fluxninja/generated/.*|sdks/aperture-py/aperture_sdk/_gen/.*|blueprints/gen/.*|sdks/aperture-js/docs/.*|sdks/aperture-js/README.md|sdks/aperture-py/docs/.*|playground/resources/nginx/.*|\.github/.*|.*\.libsonnet|playground/resources/demo-ui/package-lock\.json|sdks/aperture-js/example/package-lock.json)$
- repo: https://github.com/DavidAnson/markdownlint-cli2
rev: v0.7.1
Expand Down
3 changes: 3 additions & 0 deletions api/aperture/policy/language/v1/flowcontrol.proto
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,9 @@ message Scheduler {
// Key for a flow label that can be used to provide workloads for this request.
// If this parameter is not provided, the workloads for the flow will be determined by the matched workload's name in the policy.
string workload_label_key = 10;

// Key for a flow label that is used to enforce fairness among requests in a workload. If not specified, requests within a workload of the same priority are admitted in a FIFO manner.
string fairness_label_key = 11;
}

// _AIMD Load Scheduler_ uses a Gradient Controller to throttle the token rate based on the deviation of the signal from the setpoint.
Expand Down
4 changes: 2 additions & 2 deletions api/buf.lock
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ deps:
- remote: buf.build
owner: envoyproxy
repository: envoy
commit: 39f01805b8754956a78bc77bda4c3761
digest: shake256:a7e7c7675618056879597d6205e394e85b63b71fd1766818042e6fd9b5c4fd623bede2784a9c635dced61fb40a22610812f164ed40c7a0cecbe50c0ecc76952d
commit: 209ef7e618764e45afca2d0e5aa39a89
digest: shake256:a0054a180db39da894083467f8bd98d47bbb81b7ae36658d1c7f7ce05a422964406745ce3a41b8ed13738c1a8e5fd36b3e5aed9ab94244edb913ae51f64f1851
- remote: buf.build
owner: envoyproxy
repository: protoc-gen-validate
Expand Down
1,146 changes: 579 additions & 567 deletions api/gen/proto/go/aperture/policy/language/v1/flowcontrol.pb.go

Large diffs are not rendered by default.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions blueprints/gen/jsonschema/_definitions.json
Original file line number Diff line number Diff line change
Expand Up @@ -3595,6 +3595,10 @@
"description": "This field allows you to override the default HTTP status code (`503 Service Unavailable`) that is returned when a request is denied.\n\n",
"x-go-tag-default": "ServiceUnavailable"
},
"fairness_label_key": {
"description": "Key for a flow label that is used to enforce fairness among requests in a workload. If not specified, requests within a workload of the same priority are admitted in a FIFO manner.",
"type": "string"
},
"priority_label_key": {
"description": "Key for a flow label that can be used to override the default priority for this flow.\nThe value associated with this key must be a valid number. Higher numbers means higher priority.\nIf this parameter is not provided, the priority for the flow will be determined by the matched workload's priority.",
"type": "string"
Expand Down
6 changes: 6 additions & 0 deletions blueprints/gen/v1/scheduler.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@
withDeniedResponseStatusCodeMixin(denied_response_status_code):: {
denied_response_status_code+: denied_response_status_code,
},
withFairnessLabelKey(fairness_label_key):: {
fairness_label_key: fairness_label_key,
},
withFairnessLabelKeyMixin(fairness_label_key):: {
fairness_label_key+: fairness_label_key,
},
withPriorityLabelKey(priority_label_key):: {
priority_label_key: priority_label_key,
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,26 +193,4 @@ function(datasourceName, policyName, componentID, extraFilters={})
),
];

local legendFormat = '{{ instance }} - {{ policy_name }}';
local row10 = [
barGaugePanel(
'WFQ Scheduler Flows',
datasourceName,
'avg(wfq_flows_total{%(filters)s})' % { filters: stringFilters },
x=0,
h=6,
w=12,
legendFormat=legendFormat
),
barGaugePanel(
'WFQ Scheduler Heap Requests',
datasourceName,
'avg(wfq_requests_total{%(filters)s})' % { filters: stringFilters },
x=12,
h=6,
w=12,
legendFormat=legendFormat
),
];

[row1, row2, row3, row4, row5, row6, row7, row8, row9, row10]
[row1, row2, row3, row4, row5, row6, row7, row8, row9]
Original file line number Diff line number Diff line change
Expand Up @@ -3890,6 +3890,9 @@ definitions:
This field allows you to override the default HTTP status code (`503 Service Unavailable`) that is returned when a request is denied.

x-go-tag-default: ServiceUnavailable
fairness_label_key:
description: Key for a flow label that is used to enforce fairness among requests in a workload. If not specified, requests within a workload of the same priority are admitted in a FIFO manner.
type: string
priority_label_key:
description: |-
Key for a flow label that can be used to override the default priority for this flow.
Expand Down
3 changes: 3 additions & 0 deletions docs/content/assets/openapiv2/aperture.swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4728,6 +4728,9 @@ definitions:
This field allows you to override the default HTTP status code (`503 Service Unavailable`) that is returned when a request is denied.

x-go-tag-default: ServiceUnavailable
fairness_label_key:
description: Key for a flow label that is used to enforce fairness among requests in a workload. If not specified, requests within a workload of the same priority are admitted in a FIFO manner.
type: string
priority_label_key:
description: |-
Key for a flow label that can be used to override the default priority for this flow.
Expand Down
14 changes: 14 additions & 0 deletions docs/content/reference/configuration/spec.md
Original file line number Diff line number Diff line change
Expand Up @@ -8805,6 +8805,20 @@ Parameters to be used if none of workloads specified in `workloads` match.
This field allows you to override the default HTTP status code
(`503 Service Unavailable`) that is returned when a request is denied.

</dd>
<dt>fairness_label_key</dt>
<dd>

<!-- vale off -->

(string)

<!-- vale on -->

Key for a flow label that is used to enforce fairness among requests in a
workload. If not specified, requests within a workload of the same priority are
admitted in a FIFO manner.

</dd>
<dt>priority_label_key</dt>
<dd>
Expand Down
27 changes: 14 additions & 13 deletions docs/content/reference/observability/prometheus-metrics/agent.md
Original file line number Diff line number Diff line change
Expand Up @@ -252,19 +252,20 @@ This document describes the Prometheus metrics generated by Aperture Agents.

<!-- vale off -->

| Name | Type | Labels | Unit | Description |
| ----------------------------------- | ------- | -------------------------------------------------------------------------------------------------------------------------------- | --------------- | --------------------------------------------------------------------------------- |
| wfq_flows_total | Gauge | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id | count (no unit) | A gauge that tracks the number of flows in the WFQScheduler |
| wfq_requests_total | Gauge | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id | count (no unit) | A gauge that tracks the number of queued requests in the WFQScheduler |
| token_bucket_lm_ratio | Gauge | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id | percentage | A gauge that tracks the load multiplier |
| token_bucket_fill_rate | Gauge | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id | tokens/s | A gauge that tracks the fill rate of token bucket |
| token_bucket_capacity_total | Gauge | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id | count (no unit) | A gauge that tracks the capacity of token bucket |
| token_bucket_available_tokens_total | Gauge | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id | count (no unit) | A gauge that tracks the number of tokens available in token bucket |
| workload_requests_total | Counter | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, workload_index, decision_type, limiter_dropped | count (no unit) | A counter of workload requests |
| request_in_queue_duration_ms | Summary | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, workload_index | ms | Metric used for grouping durations for requests by workload in queue of Scheduler |
| workload_preempted_tokens | Summary | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, workload_index | token | Metric used for counting tokens preempted per request |
| workload_delayed_tokens | Summary | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, workload_index | token | Metric used for counting tokens delayed per request |
| workload_on_time_total | Counter | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, workload_index, decision_type, limiter_dropped | count (no unit) | Metric used for counting requests that are on time, neither preempted nor delayed |
| Name | Type | Labels | Unit | Description |
| ----------------------------------- | ------- | -------------------------------------------------------------------------------------------------------------------------------- | --------------- | -------------------------------------------------------------------------------------------------------------------------------------------- |
| token_bucket_lm_ratio | Gauge | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id | percentage | A gauge that tracks the load multiplier |
| token_bucket_fill_rate | Gauge | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id | tokens/s | A gauge that tracks the fill rate of token bucket |
| token_bucket_capacity_total | Gauge | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id | count (no unit) | A gauge that tracks the capacity of token bucket |
| token_bucket_available_tokens_total | Gauge | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id | count (no unit) | A gauge that tracks the number of tokens available in token bucket |
| workload_requests_total | Counter | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, workload_index, decision_type, limiter_dropped | count (no unit) | A counter of workload requests |
| request_in_queue_duration_ms | Summary | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, workload_index | ms | Metric used for grouping durations for requests by workload in queue of Scheduler |
| workload_preempted_tokens | Summary | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, workload_index | token | Metric used for counting tokens preempted per request measured end-to-end in the scheduler across all workloads. |
| workload_delayed_tokens | Summary | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, workload_index | token | Metric used for counting tokens delayed per request measured end-to-end in the scheduler across all workloads. |
| workload_on_time_total | Counter | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, workload_index, decision_type, limiter_dropped | count (no unit) | Metric used for counting requests that are on time, neither preempted nor delayed measured end-to-end in the scheduler across all workloads. |
| fairness_preempted_tokens | Summary | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, fairness_index | token | Metric used for counting tokens preempted per request measured at fairness queues within the same workload. |
| fairness_delayed_tokens | Summary | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, fairness_index | token | Metric used for counting tokens delayed per request measured at fairness queues within the same workload. |
| fairness_on_time_total | Counter | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, fairness_index, decision_type, limiter_dropped | count (no unit) | Metric used for counting requests that are on time, neither preempted nor delayed measured at fairness queues within the same workload. |

<!-- vale on -->

Expand Down
3 changes: 3 additions & 0 deletions docs/gen/policy/policy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3804,6 +3804,9 @@ definitions:
This field allows you to override the default HTTP status code (`503 Service Unavailable`) that is returned when a request is denied.

x-go-tag-default: ServiceUnavailable
fairness_label_key:
description: Key for a flow label that is used to enforce fairness among requests in a workload. If not specified, requests within a workload of the same priority are admitted in a FIFO manner.
type: string
priority_label_key:
description: |-
Key for a flow label that can be used to override the default priority for this flow.
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ require (
github.com/buger/jsonparser v1.1.1
github.com/buraksezer/olric v0.0.0-00010101000000-000000000000
github.com/cenkalti/backoff/v4 v4.2.1
github.com/cespare/xxhash v1.1.0
github.com/charmbracelet/bubbletea v0.25.0
github.com/clarketm/json v1.17.1
github.com/containerd/cgroups v1.1.0
Expand Down
16 changes: 9 additions & 7 deletions pkg/metrics/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,18 @@ const (
WorkloadCounterMetricName = "workload_requests_total"
// RequestInQueueDurationMetricName - metric used for grouping durations for requests in queue of Scheduler.
RequestInQueueDurationMetricName = "request_in_queue_duration_ms"
// WorkloadPreemptedTokensMetricName - metric used for counting tokens preempted per request.
// WorkloadPreemptedTokensMetricName - metric used for counting tokens preempted per request measured end-to-end in the scheduler across all workloads.
WorkloadPreemptedTokensMetricName = "workload_preempted_tokens"
// WorkloadDelayedTokensMetricName - metric used for counting tokens delayed per request.
// WorkloadDelayedTokensMetricName - metric used for counting tokens delayed per request measured end-to-end in the scheduler across all workloads.
WorkloadDelayedTokensMetricName = "workload_delayed_tokens"
// WorkloadOnTimeMetricName - metric used for counting requests that are on time, neither preempted nor delayed.
// WorkloadOnTimeMetricName - metric used for counting requests that are on time, neither preempted nor delayed measured end-to-end in the scheduler across all workloads.
WorkloadOnTimeMetricName = "workload_on_time_total"
// FairnessPreemptedTokensMetricName - metric used for counting tokens preempted per request measured at fairness queues within the same workload.
FairnessPreemptedTokensMetricName = "fairness_preempted_tokens"
// FairnessDelayedTokensMetricName - metric used for counting tokens delayed per request measured at fairness queues within the same workload.
FairnessDelayedTokensMetricName = "fairness_delayed_tokens"
// FairnessOnTimeMetricName - metric used for counting requests that are on time, neither preempted nor delayed measured at fairness queues within the same workload.
FairnessOnTimeMetricName = "fairness_on_time_total"

// IncomingTokensMetricName - total work measured in tokens of all incoming requests.
IncomingTokensMetricName = "incoming_tokens_total"
Expand All @@ -120,10 +126,6 @@ const (
// RejectedTokensMetricName - total work measured in tokens of all rejected requests.
RejectedTokensMetricName = "rejected_tokens_total"

// WFQFlowsMetricName - weighted fair queuing number of flows gauge.
WFQFlowsMetricName = "wfq_flows_total"
// WFQRequestsMetricName - weighted fair queuing number of requests gauge.
WFQRequestsMetricName = "wfq_requests_total"
// TokenBucketLMMetricName - a gauge that tracks the load multiplier.
TokenBucketLMMetricName = "token_bucket_lm_ratio"
// TokenBucketFillRateMetricName - a gauge that tracks the fill rate of token bucket.
Expand Down
Loading