Fairness (#3134)

### Description of change ##### Checklist - [ ] Tested in playground or other setup - [ ] Screenshot (Grafana) from playground added to PR for 15+ minute run - [ ] Documentation is changed or added - [ ] Tests and/or benchmarks are included - [ ] Breaking changes  ## Summary by CodeRabbit - **New Features** - Introduced `fairness_label_key` for enforcing fairness in workload scheduling. - Added new fairness-related metrics for observability. - **Documentation** - Updated configuration specification with `fairness_label_key` details. - Revised observability documentation to reflect new metrics. - **Refactor** - Enhanced the Scheduler interface with additional workload identification. - Updated scheduling logic to incorporate fairness in request handling. - **Chores** - Adjusted pre-commit spell check configuration. - **Bug Fixes** - Removed obsolete metrics related to previous scheduling mechanisms.  --------- Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Co-authored-by: Harjot Gill <[email protected]>
fluxninja · Jan 10, 2024 · 243418a · 243418a
1 parent 31a34a9
commit 243418a
Show file tree

Hide file tree

Showing 25 changed files with 1,024 additions and 837 deletions.
diff --git a/.github/styles/Vocab/FluxNinja/accept.txt → .../config/vocabularies/FluxNinja/accept.txt b/.github/styles/Vocab/FluxNinja/accept.txt → .../config/vocabularies/FluxNinja/accept.txt
diff --git a/.github/styles/Vocab/FluxNinja/reject.txt → .../config/vocabularies/FluxNinja/reject.txt b/.github/styles/Vocab/FluxNinja/reject.txt → .../config/vocabularies/FluxNinja/reject.txt
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -255,7 +255,7 @@ repos:
       - id: codespell
         args:
           - "-L inport,atleast,iteraction"
-          - "--ignore-words=./.github/styles/Vocab/FluxNinja/accept.txt"
+          - "--ignore-words=./.github/styles/config/vocabularies/FluxNinja/accept.txt"
         exclude: ^(.*\.svg|yarn.lock|go.sum|.*/go.sum|.circleci/.*|docs/content/reference/api/.*|api/gen/.*|sdks/aperture-java/lib/core/src/main/java/com/fluxninja/generated/.*|sdks/aperture-py/aperture_sdk/_gen/.*|blueprints/gen/.*|sdks/aperture-js/docs/.*|sdks/aperture-js/README.md|sdks/aperture-py/docs/.*|playground/resources/nginx/.*|\.github/.*|.*\.libsonnet|playground/resources/demo-ui/package-lock\.json|sdks/aperture-js/example/package-lock.json)$
   - repo: https://github.com/DavidAnson/markdownlint-cli2
     rev: v0.7.1

diff --git a/api/aperture/policy/language/v1/flowcontrol.proto b/api/aperture/policy/language/v1/flowcontrol.proto
@@ -455,6 +455,9 @@ message Scheduler {
   // Key for a flow label that can be used to provide workloads for this request.
   // If this parameter is not provided, the workloads for the flow will be determined by the matched workload's name in the policy.
   string workload_label_key = 10;
+
+  // Key for a flow label that is used to enforce fairness among requests in a workload. If not specified, requests within a workload of the same priority are admitted in a FIFO manner.
+  string fairness_label_key = 11;
 }
 
 // _AIMD Load Scheduler_ uses a Gradient Controller to throttle the token rate based on the deviation of the signal from the setpoint.

diff --git a/api/buf.lock b/api/buf.lock
@@ -9,8 +9,8 @@ deps:
   - remote: buf.build
     owner: envoyproxy
     repository: envoy
-    commit: 10791366aeb94287b5f8b62d1be8ec2e
-    digest: shake256:193299dfe4b65518327fccde2953e05969f23919eaabd68e8ad73bf3a88465620749671b1db7f76de02d38cba6d9a4b19ad947f7ab987de353c3333e683452ca
+    commit: 209ef7e618764e45afca2d0e5aa39a89
+    digest: shake256:a0054a180db39da894083467f8bd98d47bbb81b7ae36658d1c7f7ce05a422964406745ce3a41b8ed13738c1a8e5fd36b3e5aed9ab94244edb913ae51f64f1851
   - remote: buf.build
     owner: envoyproxy
     repository: protoc-gen-validate

diff --git a/api/gen/proto/go/aperture/policy/language/v1/flowcontrol.pb.go b/api/gen/proto/go/aperture/policy/language/v1/flowcontrol.pb.go
diff --git a/api/gen/proto/go/aperture/policy/language/v1/flowcontrol.pb.validate.go b/api/gen/proto/go/aperture/policy/language/v1/flowcontrol.pb.validate.go
diff --git a/api/gen/proto/go/aperture/policy/language/v1/flowcontrol_vtproto.pb.go b/api/gen/proto/go/aperture/policy/language/v1/flowcontrol_vtproto.pb.go
diff --git a/blueprints/gen/jsonschema/_definitions.json b/blueprints/gen/jsonschema/_definitions.json
@@ -3595,6 +3595,10 @@
           "description": "This field allows you to override the default HTTP status code (`503 Service Unavailable`) that is returned when a request is denied.\n\n",
           "x-go-tag-default": "ServiceUnavailable"
         },
+        "fairness_label_key": {
+          "description": "Key for a flow label that is used to enforce fairness among requests in a workload. If not specified, requests within a workload of the same priority are admitted in a FIFO manner.",
+          "type": "string"
+        },
         "priority_label_key": {
           "description": "Key for a flow label that can be used to override the default priority for this flow.\nThe value associated with this key must be a valid number. Higher numbers means higher priority.\nIf this parameter is not provided, the priority for the flow will be determined by the matched workload's priority.",
           "type": "string"

diff --git a/blueprints/gen/v1/scheduler.libsonnet b/blueprints/gen/v1/scheduler.libsonnet
@@ -19,6 +19,12 @@
   withDeniedResponseStatusCodeMixin(denied_response_status_code):: {
     denied_response_status_code+: denied_response_status_code,
   },
+  withFairnessLabelKey(fairness_label_key):: {
+    fairness_label_key: fairness_label_key,
+  },
+  withFairnessLabelKeyMixin(fairness_label_key):: {
+    fairness_label_key+: fairness_label_key,
+  },
   withPriorityLabelKey(priority_label_key):: {
     priority_label_key: priority_label_key,
   },

diff --git a/dashboards/grafana/dashboards/summary/scheduler/rows-fn.libsonnet b/dashboards/grafana/dashboards/summary/scheduler/rows-fn.libsonnet
@@ -193,26 +193,4 @@ function(datasourceName, policyName, componentID, extraFilters={})
     ),
   ];
 
-  local legendFormat = '{{ instance }} - {{ policy_name }}';
-  local row10 = [
-    barGaugePanel(
-      'WFQ Scheduler Flows',
-      datasourceName,
-      'avg(wfq_flows_total{%(filters)s})' % { filters: stringFilters },
-      x=0,
-      h=6,
-      w=12,
-      legendFormat=legendFormat
-    ),
-    barGaugePanel(
-      'WFQ Scheduler Heap Requests',
-      datasourceName,
-      'avg(wfq_requests_total{%(filters)s})' % { filters: stringFilters },
-      x=12,
-      h=6,
-      w=12,
-      legendFormat=legendFormat
-    ),
-  ];
-
-  [row1, row2, row3, row4, row5, row6, row7, row8, row9, row10]
+  [row1, row2, row3, row4, row5, row6, row7, row8, row9]
diff --git a/docs/content/assets/openapiv2/aperture-controller.swagger.yaml b/docs/content/assets/openapiv2/aperture-controller.swagger.yaml
@@ -3890,6 +3890,9 @@ definitions:
                     This field allows you to override the default HTTP status code (`503 Service Unavailable`) that is returned when a request is denied.
 
                 x-go-tag-default: ServiceUnavailable
+            fairness_label_key:
+                description: Key for a flow label that is used to enforce fairness among requests in a workload. If not specified, requests within a workload of the same priority are admitted in a FIFO manner.
+                type: string
             priority_label_key:
                 description: |-
                     Key for a flow label that can be used to override the default priority for this flow.

diff --git a/docs/content/assets/openapiv2/aperture.swagger.yaml b/docs/content/assets/openapiv2/aperture.swagger.yaml
@@ -4728,6 +4728,9 @@ definitions:
                     This field allows you to override the default HTTP status code (`503 Service Unavailable`) that is returned when a request is denied.
 
                 x-go-tag-default: ServiceUnavailable
+            fairness_label_key:
+                description: Key for a flow label that is used to enforce fairness among requests in a workload. If not specified, requests within a workload of the same priority are admitted in a FIFO manner.
+                type: string
             priority_label_key:
                 description: |-
                     Key for a flow label that can be used to override the default priority for this flow.

diff --git a/docs/content/reference/configuration/spec.md b/docs/content/reference/configuration/spec.md
@@ -8805,6 +8805,20 @@ Parameters to be used if none of workloads specified in `workloads` match.
 This field allows you to override the default HTTP status code
 (`503 Service Unavailable`) that is returned when a request is denied.
 
+</dd>
+<dt>fairness_label_key</dt>
+<dd>
+
+<!-- vale off -->
+
+(string)
+
+<!-- vale on -->
+
+Key for a flow label that is used to enforce fairness among requests in a
+workload. If not specified, requests within a workload of the same priority are
+admitted in a FIFO manner.
+
 </dd>
 <dt>priority_label_key</dt>
 <dd>

diff --git a/docs/content/reference/observability/prometheus-metrics/agent.md b/docs/content/reference/observability/prometheus-metrics/agent.md
@@ -252,19 +252,20 @@ This document describes the Prometheus metrics generated by Aperture Agents.
 
 <!-- vale off -->
 
-| Name                                | Type    | Labels                                                                                                                           | Unit            | Description                                                                       |
-| ----------------------------------- | ------- | -------------------------------------------------------------------------------------------------------------------------------- | --------------- | --------------------------------------------------------------------------------- |
-| wfq_flows_total                     | Gauge   | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id                                                 | count (no unit) | A gauge that tracks the number of flows in the WFQScheduler                       |
-| wfq_requests_total                  | Gauge   | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id                                                 | count (no unit) | A gauge that tracks the number of queued requests in the WFQScheduler             |
-| token_bucket_lm_ratio               | Gauge   | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id                                                 | percentage      | A gauge that tracks the load multiplier                                           |
-| token_bucket_fill_rate              | Gauge   | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id                                                 | tokens/s        | A gauge that tracks the fill rate of token bucket                                 |
-| token_bucket_capacity_total         | Gauge   | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id                                                 | count (no unit) | A gauge that tracks the capacity of token bucket                                  |
-| token_bucket_available_tokens_total | Gauge   | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id                                                 | count (no unit) | A gauge that tracks the number of tokens available in token bucket                |
-| workload_requests_total             | Counter | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, workload_index, decision_type, limiter_dropped | count (no unit) | A counter of workload requests                                                    |
-| request_in_queue_duration_ms        | Summary | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, workload_index                                 | ms              | Metric used for grouping durations for requests by workload in queue of Scheduler |
-| workload_preempted_tokens           | Summary | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, workload_index                                 | token           | Metric used for counting tokens preempted per request                             |
-| workload_delayed_tokens             | Summary | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, workload_index                                 | token           | Metric used for counting tokens delayed per request                               |
-| workload_on_time_total              | Counter | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, workload_index, decision_type, limiter_dropped | count (no unit) | Metric used for counting requests that are on time, neither preempted nor delayed |
+| Name                                | Type    | Labels                                                                                                                           | Unit            | Description                                                                                                                                  |
+| ----------------------------------- | ------- | -------------------------------------------------------------------------------------------------------------------------------- | --------------- | -------------------------------------------------------------------------------------------------------------------------------------------- |
+| token_bucket_lm_ratio               | Gauge   | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id                                                 | percentage      | A gauge that tracks the load multiplier                                                                                                      |
+| token_bucket_fill_rate              | Gauge   | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id                                                 | tokens/s        | A gauge that tracks the fill rate of token bucket                                                                                            |
+| token_bucket_capacity_total         | Gauge   | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id                                                 | count (no unit) | A gauge that tracks the capacity of token bucket                                                                                             |
+| token_bucket_available_tokens_total | Gauge   | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id                                                 | count (no unit) | A gauge that tracks the number of tokens available in token bucket                                                                           |
+| workload_requests_total             | Counter | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, workload_index, decision_type, limiter_dropped | count (no unit) | A counter of workload requests                                                                                                               |
+| request_in_queue_duration_ms        | Summary | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, workload_index                                 | ms              | Metric used for grouping durations for requests by workload in queue of Scheduler                                                            |
+| workload_preempted_tokens           | Summary | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, workload_index                                 | token           | Metric used for counting tokens preempted per request measured end-to-end in the scheduler across all workloads.                             |
+| workload_delayed_tokens             | Summary | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, workload_index                                 | token           | Metric used for counting tokens delayed per request measured end-to-end in the scheduler across all workloads.                               |
+| workload_on_time_total              | Counter | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, workload_index, decision_type, limiter_dropped | count (no unit) | Metric used for counting requests that are on time, neither preempted nor delayed measured end-to-end in the scheduler across all workloads. |
+| fairness_preempted_tokens           | Summary | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, fairness_index                                 | token           | Metric used for counting tokens preempted per request measured at fairness queues within the same workload.                                  |
+| fairness_delayed_tokens             | Summary | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, fairness_index                                 | token           | Metric used for counting tokens delayed per request measured at fairness queues within the same workload.                                    |
+| fairness_on_time_total              | Counter | agent_group, instance, job, process_uuid, policy_name, policy_hash, component_id, fairness_index, decision_type, limiter_dropped | count (no unit) | Metric used for counting requests that are on time, neither preempted nor delayed measured at fairness queues within the same workload.      |
 
 <!-- vale on -->
 

diff --git a/docs/gen/policy/policy.yaml b/docs/gen/policy/policy.yaml
@@ -3804,6 +3804,9 @@ definitions:
                     This field allows you to override the default HTTP status code (`503 Service Unavailable`) that is returned when a request is denied.
 
                 x-go-tag-default: ServiceUnavailable
+            fairness_label_key:
+                description: Key for a flow label that is used to enforce fairness among requests in a workload. If not specified, requests within a workload of the same priority are admitted in a FIFO manner.
+                type: string
             priority_label_key:
                 description: |-
                     Key for a flow label that can be used to override the default priority for this flow.

diff --git a/go.mod b/go.mod
@@ -11,6 +11,7 @@ require (
 	github.com/buger/jsonparser v1.1.1
 	github.com/buraksezer/olric v0.0.0-00010101000000-000000000000
 	github.com/cenkalti/backoff/v4 v4.2.1
+	github.com/cespare/xxhash v1.1.0
 	github.com/charmbracelet/bubbletea v0.25.0
 	github.com/clarketm/json v1.17.1
 	github.com/containerd/cgroups v1.1.0

diff --git a/pkg/metrics/schema.go b/pkg/metrics/schema.go
@@ -106,12 +106,18 @@ const (
 	WorkloadCounterMetricName = "workload_requests_total"
 	// RequestInQueueDurationMetricName - metric used for grouping durations for requests in queue of Scheduler.
 	RequestInQueueDurationMetricName = "request_in_queue_duration_ms"
-	// WorkloadPreemptedTokensMetricName - metric used for counting tokens preempted per request.
+	// WorkloadPreemptedTokensMetricName - metric used for counting tokens preempted per request measured end-to-end in the scheduler across all workloads.
 	WorkloadPreemptedTokensMetricName = "workload_preempted_tokens"
-	// WorkloadDelayedTokensMetricName - metric used for counting tokens delayed per request.
+	// WorkloadDelayedTokensMetricName - metric used for counting tokens delayed per request measured end-to-end in the scheduler across all workloads.
 	WorkloadDelayedTokensMetricName = "workload_delayed_tokens"
-	// WorkloadOnTimeMetricName - metric used for counting requests that are on time, neither preempted nor delayed.
+	// WorkloadOnTimeMetricName - metric used for counting requests that are on time, neither preempted nor delayed measured end-to-end in the scheduler across all workloads.
 	WorkloadOnTimeMetricName = "workload_on_time_total"
+	// FairnessPreemptedTokensMetricName - metric used for counting tokens preempted per request measured at fairness queues within the same workload.
+	FairnessPreemptedTokensMetricName = "fairness_preempted_tokens"
+	// FairnessDelayedTokensMetricName - metric used for counting tokens delayed per request measured at fairness queues within the same workload.
+	FairnessDelayedTokensMetricName = "fairness_delayed_tokens"
+	// FairnessOnTimeMetricName - metric used for counting requests that are on time, neither preempted nor delayed measured at fairness queues within the same workload.
+	FairnessOnTimeMetricName = "fairness_on_time_total"
 
 	// IncomingTokensMetricName - total work measured in tokens of all incoming requests.
 	IncomingTokensMetricName = "incoming_tokens_total"
@@ -120,10 +126,6 @@ const (
 	// RejectedTokensMetricName - total work measured in tokens of all rejected requests.
 	RejectedTokensMetricName = "rejected_tokens_total"
 
-	// WFQFlowsMetricName - weighted fair queuing number of flows gauge.
-	WFQFlowsMetricName = "wfq_flows_total"
-	// WFQRequestsMetricName - weighted fair queuing number of requests gauge.
-	WFQRequestsMetricName = "wfq_requests_total"
 	// TokenBucketLMMetricName - a gauge that tracks the load multiplier.
 	TokenBucketLMMetricName = "token_bucket_lm_ratio"
 	// TokenBucketFillRateMetricName - a gauge that tracks the fill rate of token bucket.