Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve panels in the playground grafana #299

Merged
merged 9 commits into from
Sep 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion manifests/charts/istioconfig/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ agentservice:
port: 80
otlpPort: 4317

authzGrpcTimeout: 0.01s
authzGrpcTimeout: 0.5s
maxRequestBytes: 8192
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ local k = import 'github.com/jsonnet-libs/k8s-libsonnet/1.22/main.libsonnet';

local demoApp = import 'apps/demoapp/main.libsonnet';
local latencyGradientPolicy = import 'github.com/fluxninja/aperture-blueprints/lib/1.0/policies/latency-gradient.libsonnet';
local aperture = import 'github.com/fluxninja/aperture/libsonnet/1.0/main.libsonnet';

local Workload = aperture.v1.SchedulerWorkload;
local LabelMatcher = aperture.v1.LabelMatcher;
local WorkloadWithLabelMatcher = aperture.v1.SchedulerWorkloadAndLabelMatcher;

local demoappMixin =
demoApp {
Expand Down Expand Up @@ -32,6 +37,22 @@ local policy = latencyGradientPolicy({
serviceSelector+: {
service: 'service1-demo-app.demoapp.svc.cluster.local',
},
concurrencyLimiter+: {
defaultWorkload: {
priority: 20,
timeout: '0.025s',
},
workloads: [
WorkloadWithLabelMatcher.new(
workload=Workload.withPriority(50) + Workload.withTimeout('0.025s'),
label_matcher=LabelMatcher.withMatchLabels({ 'request_header_user-type': 'guest' })
),
WorkloadWithLabelMatcher.new(
workload=Workload.withPriority(200) + Workload.withTimeout('0.025s'),
label_matcher=LabelMatcher.withMatchLabels({ 'request_header_user-type': 'subscriber' })
),
],
},
}).policy;

{
Expand Down
10 changes: 5 additions & 5 deletions manifests/k8s/tanka/jsonnetfile.lock.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
"subdir": ""
}
},
"version": "f41e919200a8283c21d8d3669caab6505e72972d",
"sum": "EZe68l9CbQtOwVyJQd7XezZcwE8zXZtjNi+s5MN3WCg="
"version": "99977877059d086c9a4253b26778fc567f381e52",
"sum": "eF4lEXF1Wq8/jXEhIEPK+zjATFGjcju+guoad5sflrk="
},
{
"source": {
Expand All @@ -18,8 +18,8 @@
"subdir": "lib/1.0"
}
},
"version": "f41e919200a8283c21d8d3669caab6505e72972d",
"sum": "VZw1LdVqxkqL6mbWTHVPWfzcjrkkM8FyDkmv2HoJddc="
"version": "99977877059d086c9a4253b26778fc567f381e52",
"sum": "cQ5J2PMhqaw3NqkMAEml9ocF86HAvwgOS7pxVeBrvJY="
},
{
"source": {
Expand All @@ -28,7 +28,7 @@
"subdir": "libsonnet/1.0"
}
},
"version": "7ac610333b11916c02915943c5e9953c5ac111de",
"version": "5408d30c44d75de5fec6a4c7de66023e753eb00c",
"sum": "CEvwNmmpYPp6Yp1F0mqO1tTpTcaKUvnWge5Vh1G64k0="
},
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
local grafanaOperator = import 'github.com/jsonnet-libs/grafana-operator-libsonnet/4.3/main.libsonnet';
local kubernetesMixin = import 'github.com/kubernetes-monitoring/kubernetes-mixin/mixin.libsonnet';

local decisionDashboard = import 'github.com/fluxninja/aperture-blueprints/lib/1.0/dashboards/decision.libsonnet';
local policyDashboard = import 'github.com/fluxninja/aperture-blueprints/lib/1.0/dashboards/latency-gradient.libsonnet';

local grafana = grafanaOperator.integreatly.v1alpha1.grafana;
local dashboard = grafanaOperator.integreatly.v1alpha1.grafanaDashboard;
Expand Down Expand Up @@ -40,7 +40,7 @@ local dashboards =
[
dashboard.new('example-dashboard') +
dashboard.metadata.withLabels({ 'fluxninja.com/grafana-instance': 'aperture-grafana' }) +
dashboard.spec.withJson(std.manifestJsonEx(decisionDashboard({
dashboard.spec.withJson(std.manifestJsonEx(policyDashboard({
policyName: 'service1-demo-app',
}).dashboard, indent=' ')) +
dashboard.spec.withDatasources({
Expand Down
8 changes: 4 additions & 4 deletions pkg/flowcontrol/common/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,25 +48,25 @@ func NewPrometheusMetrics(registry *prometheus.Registry) (*PrometheusMetrics, er
registry: registry,
checkReceivedTotal: prometheus.NewCounter(
prometheus.CounterOpts{
Name: metrics.FlowControlCheckRequestsMetricName,
Name: metrics.FlowControlRequestsMetricName,
Help: "Total number of aperture check requests handled",
},
),
checkDecision: *prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: metrics.FlowControlCheckDecisionsMetricName,
Name: metrics.FlowControlDecisionsMetricName,
Help: "Number of aperture check decisions",
}, []string{metrics.FlowControlCheckDecisionTypeLabel},
),
errorReason: *prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: metrics.FlowControlCheckErrorReasonMetricName,
Name: metrics.FlowControlErrorReasonMetricName,
Help: "Number of error reasons other than unspecified",
}, []string{metrics.FlowControlCheckErrorReasonLabel},
),
rejectReason: *prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: metrics.FlowControlCheckRejectReasonMetricName,
Name: metrics.FlowControlRejectReasonMetricName,
Help: "Number of reject reasons other than unspecified",
}, []string{metrics.FlowControlCheckRejectReasonLabel},
),
Expand Down
34 changes: 13 additions & 21 deletions pkg/metrics/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,30 +27,22 @@ const (
WFQFlowsMetricName = "wfq_flows"
// WFQRequestsMetricName - weighted fair queuing number of requests gauge.
WFQRequestsMetricName = "wfq_requests"
// FlowControlCheckRequestsMetricName - counter for Check requests for flowcontrol.
FlowControlCheckRequestsMetricName = "flowcontrol_check_requests_total"
// FlowControlCheckDecisionsMetricName - counter for Check requests per decision type.
FlowControlCheckDecisionsMetricName = "flowcontrol_check_decisions_total"
// FlowControlCheckErrorReasonMetricName - metric for error reason on FCS Check requests.
FlowControlCheckErrorReasonMetricName = "flowcontrol_check_error_reason_total"
// FlowControlCheckRejectReasonMetricName - metric for reject reason on FCS Check requests.
FlowControlCheckRejectReasonMetricName = "flowcontrol_check_reject_reason_total"
// FlowControlRequestsMetricName - counter for Check requests for flowcontrol.
FlowControlRequestsMetricName = "flowcontrol_requests_count"
// FlowControlDecisionsMetricName - counter for Check requests per decision type.
FlowControlDecisionsMetricName = "flowcontrol_decisions_count"
// FlowControlErrorReasonMetricName - metric for error reason on FCS Check requests.
FlowControlErrorReasonMetricName = "flowcontrol_error_reason_count"
// FlowControlRejectReasonMetricName - metric for reject reason on FCS Check requests.
FlowControlRejectReasonMetricName = "flowcontrol_reject_reason_count"
// TokenBucketMetricName - a gauge that tracks the load shed factor.
TokenBucketMetricName = "token_bucket_lsf"
// TokenBucketFillRateMetricName - a gauge that tracks the fill rate of token bucket.
TokenBucketFillRateMetricName = "token_bucket_bucket_fill_rate"
TokenBucketFillRateMetricName = "token_bucket_fill_rate"
// TokenBucketCapacityMetricName - a gauge that tracks the capacity of token bucket.
TokenBucketCapacityMetricName = "token_bucket_bucket_capacity"
TokenBucketCapacityMetricName = "token_bucket_capacity"
// TokenBucketAvailableMetricName - a gauge that tracks the number of tokens available in token bucket.
TokenBucketAvailableMetricName = "token_bucket_available_tokens"
// GroupJobRegisteredMetricName - current number of group job registered.
GroupJobRegisteredMetricName = "group_job_registered_number"
// GroupJobScheduledMetricName - current number of group job scheduled.
GroupJobScheduledMetricName = "group_job_scheduled_number"
// GroupJobCompletedMetricName - total number of group job completed.
GroupJobCompletedMetricName = "group_job_completed_total"
// GroupJobLatencyMetricName - the latency of the group jobs.
GroupJobLatencyMetricName = "group_job_latency_seconds"

// PROMETHEUS LABELS.

Expand All @@ -75,11 +67,11 @@ const (
// ResponseStatusCodeLabel - label from response status code.
ResponseStatusCodeLabel = "response_status_code"
// FlowControlCheckDecisionTypeLabel - label for decision type dropped or accepted.
FlowControlCheckDecisionTypeLabel = "flowcontrol_check_decision_type"
FlowControlCheckDecisionTypeLabel = "decision_type"
// FlowControlCheckErrorReasonLabel - label for error reason on FCS Check request.
FlowControlCheckErrorReasonLabel = "flowcontrol_check_error_reason"
FlowControlCheckErrorReasonLabel = "error_reason"
// FlowControlCheckRejectReasonLabel - label for reject reason on FCS Check request.
FlowControlCheckRejectReasonLabel = "flowcontrol_check_reject_reason"
FlowControlCheckRejectReasonLabel = "reject_reason"

// DEFAULTS.

Expand Down
15 changes: 7 additions & 8 deletions pkg/otelcollector/metricsprocessor/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,19 +225,19 @@ func (p *metricsProcessor) updateMetrics(
statusCodeStr := statusCode.StringVal()

for _, decision := range checkResponse.LimiterDecisions {
workload := ""
if cl := decision.GetConcurrencyLimiter(); cl != nil {
workload = cl.GetWorkloadIndex()
}
labels := map[string]string{
metrics.PolicyNameLabel: decision.PolicyName,
metrics.PolicyHashLabel: decision.PolicyHash,
metrics.ComponentIndexLabel: fmt.Sprintf("%d", decision.ComponentIndex),
metrics.DecisionTypeLabel: checkResponse.DecisionType.String(),
metrics.WorkloadIndexLabel: workload,
}
log.Trace().Msgf("labels: %v", labels)

workload := ""
if cl := decision.GetConcurrencyLimiter(); cl != nil {
workload = cl.GetWorkloadIndex()
}
err = p.updateMetricsForWorkload(labels, latency, workload)
err = p.updateMetricsForWorkload(labels, latency)
if err != nil {
return err
}
Expand All @@ -250,8 +250,7 @@ func (p *metricsProcessor) updateMetrics(
return nil
}

func (p *metricsProcessor) updateMetricsForWorkload(labels map[string]string, latency float64, workload string) error {
labels[metrics.WorkloadIndexLabel] = workload
func (p *metricsProcessor) updateMetricsForWorkload(labels map[string]string, latency float64) error {
latencyHistogram, err := p.workloadLatencyHistogram.GetMetricWith(labels)
if err != nil {
log.Warn().Err(err).Msg("Getting latency histogram")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,11 @@ func newLoadShedActuatorFactory(
errMulti = multierr.Append(errMulti, err)
}
if !prometheusRegistry.Unregister(f.tokenBucketFillRateGaugeVec) {
err := fmt.Errorf("failed to unregister token_bucket_bucket_fill_rate metric")
err := fmt.Errorf("failed to unregister token_bucket_fill_rate metric")
errMulti = multierr.Append(errMulti, err)
}
if !prometheusRegistry.Unregister(f.tokenBucketBucketCapacityGaugeVec) {
err := fmt.Errorf("failed to unregister token_bucket_bucket_capacity metric")
err := fmt.Errorf("failed to unregister token_bucket_capacity metric")
errMulti = multierr.Append(errMulti, err)
}
if !prometheusRegistry.Unregister(f.tokenBucketAvailableTokensGaugeVec) {
Expand Down Expand Up @@ -218,11 +218,11 @@ func (lsaFactory *loadShedActuatorFactory) newLoadShedActuator(conLimiter *concu
}
deleted = lsaFactory.tokenBucketFillRateGaugeVec.Delete(metricLabels)
if !deleted {
errMulti = multierr.Append(errMulti, errors.New("failed to delete token_bucket_bucket_fill_rate gauge from its metric vector"))
errMulti = multierr.Append(errMulti, errors.New("failed to delete token_bucket_fill_rate gauge from its metric vector"))
}
deleted = lsaFactory.tokenBucketBucketCapacityGaugeVec.Delete(metricLabels)
if !deleted {
errMulti = multierr.Append(errMulti, errors.New("failed to delete token_bucket_bucket_capacity gauge from its metric vector"))
errMulti = multierr.Append(errMulti, errors.New("failed to delete token_bucket_capacity gauge from its metric vector"))
}
deleted = lsaFactory.tokenBucketAvailableTokensGaugeVec.Delete(metricLabels)
if !deleted {
Expand Down
10 changes: 5 additions & 5 deletions tools/load_generator/scenarios/load_test.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ import http from "k6/http";
import { check } from "k6";

export let vuStages = [
{ duration: "30s", target: 5 }, // simulate ramp-up of traffic from 0 to 5 users over 30 seconds
{ duration: "30s", target: 5 }, // stay at 5 users for 30s minutes
{ duration: "2m", target: 15 }, // ramp-up to 10 users over 1 minutes
{ duration: "1s", target: 5 }, // simulate ramp-up of traffic from 0 to 5 users over 30 seconds
{ duration: "2m", target: 5 }, // stay at 5 users for 30s minutes
{ duration: "1m", target: 15 }, // ramp-up to 10 users over 1 minutes
{ duration: "2m", target: 15 }, // stay at 10 users for 2 minutes (peak hour)
{ duration: "10s", target: 5 }, // ramp-down to 5 users in 10 seconds
{ duration: "2m", target: 5 }, // stay at to 5 users in 30 seconds
{ duration: "1s", target: 5 }, // ramp-down to 5 users in 10 seconds
{ duration: "5m", target: 5 }, // stay at to 5 users in 30 seconds
];

export let options = {
Expand Down