Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Truncate status in reporter #524

Merged
merged 5 commits into from
Sep 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions changelog/v0.30.5/truncate-status.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
changelog:
- type: FIX
resolvesIssue: false
description: >-
Truncate size of statuses written to k8s / etcd to protect against large keys stored in k8s backend.
issueLink: https://github.com/solo-io/solo-kit/issues/523
46 changes: 46 additions & 0 deletions pkg/api/v2/reporter/reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package reporter

import (
"context"
"sort"
"strings"

"k8s.io/client-go/util/retry"
Expand All @@ -15,6 +16,12 @@ import (
"github.com/solo-io/solo-kit/pkg/errors"
)

const (
// 1024 chars = 1kb
MaxStatusBytes = 1024
MaxStatusKeys = 100
)

type Report struct {
Warnings []string
Errors error
Expand Down Expand Up @@ -261,6 +268,7 @@ func (r *reporter) WriteReports(ctx context.Context, resourceErrs ResourceReport
return errors.Errorf("reporter: was passed resource of kind %v but no client to support it", kind)
}
status := r.StatusFromReport(report, subresourceStatuses)
status = trimStatus(status)
resourceStatus := r.statusClient.GetStatus(resource)

if status.Equal(resourceStatus) {
Expand Down Expand Up @@ -339,3 +347,41 @@ func (r *reporter) StatusFromReport(report Report, subresourceStatuses map[strin
Messages: messages,
}
}

func trimStatus(status *core.Status) *core.Status {
elcasteel marked this conversation as resolved.
Show resolved Hide resolved
// truncate status reason to a kilobyte, with max 100 keys in subresource statuses
return trimStatusForMaxSize(status, MaxStatusBytes, MaxStatusKeys)
}

func trimStatusForMaxSize(status *core.Status, bytesPerKey, maxKeys int) *core.Status {
if status == nil {
return nil
}
if len(status.Reason) > bytesPerKey {
status.Reason = status.Reason[:bytesPerKey]
}

if len(status.SubresourceStatuses) > maxKeys {
// sort for idempotency
keys := make([]string, 0, len(status.SubresourceStatuses))
for key := range status.SubresourceStatuses {
keys = append(keys, key)
}
sort.Strings(keys)
trimmedSubresourceStatuses := make(map[string]*core.Status, maxKeys)
for _, key := range keys[:maxKeys] {
trimmedSubresourceStatuses[key] = status.SubresourceStatuses[key]
}
status.SubresourceStatuses = trimmedSubresourceStatuses
}

for key, childStatus := range status.SubresourceStatuses {
// divide by two so total memory usage is bounded at: (num_keys * bytes_per_key) + (num_keys / 2 * bytes_per_key / 2) + ...
// 100 * 1024b + 50 * 512b + 25 * 256b + 12 * 128b + 6 * 64b + 3 * 32b + 1 * 16b ~= 136 kilobytes
//
// 2147483647 bytes is k8s -> etcd limit in grpc connection. 2147483647 / 136 ~= 15788 resources at limit before we see an issue
// https://github.com/solo-io/solo-projects/issues/4120
status.SubresourceStatuses[key] = trimStatusForMaxSize(childStatus, bytesPerKey/2, maxKeys/2)
}
return status
}
92 changes: 92 additions & 0 deletions pkg/api/v2/reporter/reporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package reporter_test
import (
"context"
"fmt"
"strings"

"github.com/solo-io/go-utils/contextutils"

Expand Down Expand Up @@ -36,6 +37,7 @@ var _ = Describe("Reporter", func() {
fakeResourceClient = memory.NewResourceClient(memory.NewInMemoryResourceCache(), &v1.FakeResource{})
reporter = rep.NewReporter("test", statusClient, mockResourceClient, fakeResourceClient)
})

It("reports errors for resources", func() {
r1, err := mockResourceClient.Write(v1.NewMockResource("", "mocky"), clients.WriteOpts{})
Expect(err).NotTo(HaveOccurred())
Expand Down Expand Up @@ -96,6 +98,96 @@ var _ = Describe("Reporter", func() {
}))
})

It("truncates large errors", func() {
r1, err := mockResourceClient.Write(v1.NewMockResource("", "mocky"), clients.WriteOpts{})
Expect(err).NotTo(HaveOccurred())

var sb strings.Builder
for i := 0; i < rep.MaxStatusBytes+1; i++ {
sb.WriteString("a")
}

// an error larger than our max (1kb) that should be truncated
veryLargeError := sb.String()

trimmedErr := veryLargeError[:rep.MaxStatusBytes] // we expect to trim this to 1kb in parent. 1/2 more for each nested subresource
recursivelyTrimmedErr := veryLargeError[:rep.MaxStatusBytes/2] // we expect to trim this to 1kb/2
childRecursivelyTrimmedErr := veryLargeError[:rep.MaxStatusBytes/4] // we expect to trim this to 1kb/4

childSubresourceStatuses := map[string]*core.Status{}
for i := 0; i < rep.MaxStatusKeys+1; i++ { // we have numerous keys, and expect to trim to num(parentkeys)/2 (i.e. rep.MaxStatusKeys/2)
var sb strings.Builder
for j := 0; j <= i; j++ {
sb.WriteString("a")
}
childSubresourceStatuses[fmt.Sprintf("child-subresource-%s", sb.String())] = &core.Status{
State: core.Status_Warning,
Reason: veryLargeError,
ReportedBy: "test",
SubresourceStatuses: nil, // intentionally nil; only test recursive call once
}
}

subresourceStatuses := map[string]*core.Status{}
for i := 0; i < rep.MaxStatusKeys+1; i++ { // we have numerous keys, and expect to trim to 100 keys (rep.MaxStatusKeys)
var sb strings.Builder
for j := 0; j <= i; j++ {
sb.WriteString("a")
}
subresourceStatuses[fmt.Sprintf("parent-subresource-%s", sb.String())] = &core.Status{
State: core.Status_Warning,
Reason: veryLargeError,
ReportedBy: "test",
SubresourceStatuses: childSubresourceStatuses,
}
}

trimmedChildSubresourceStatuses := map[string]*core.Status{}
for i := 0; i < rep.MaxStatusKeys/2; i++ { // we expect to trim to num(parentkeys)/2 (i.e. rep.MaxStatusKeys/2)
var sb strings.Builder
for j := 0; j <= i; j++ {
sb.WriteString("a")
}
trimmedChildSubresourceStatuses[fmt.Sprintf("child-subresource-%s", sb.String())] = &core.Status{
State: core.Status_Warning,
Reason: childRecursivelyTrimmedErr,
ReportedBy: "test",
SubresourceStatuses: nil, // intentionally nil; only test recursive call once
}
}

trimmedSubresourceStatuses := map[string]*core.Status{}
for i := 0; i < rep.MaxStatusKeys; i++ { // we expect to trim to 100 keys (rep.MaxStatusKeys)
var sb strings.Builder
for j := 0; j <= i; j++ {
sb.WriteString("a")
}
trimmedSubresourceStatuses[fmt.Sprintf("parent-subresource-%s", sb.String())] = &core.Status{
State: core.Status_Warning,
Reason: recursivelyTrimmedErr,
ReportedBy: "test",
SubresourceStatuses: trimmedChildSubresourceStatuses,
}
}

resourceErrs := rep.ResourceReports{
r1.(*v1.MockResource): rep.Report{Errors: fmt.Errorf(veryLargeError)},
}
err = reporter.WriteReports(context.TODO(), resourceErrs, subresourceStatuses)
Expect(err).NotTo(HaveOccurred())

r1, err = mockResourceClient.Read(r1.GetMetadata().Namespace, r1.GetMetadata().Name, clients.ReadOpts{})
Expect(err).NotTo(HaveOccurred())

status := statusClient.GetStatus(r1.(*v1.MockResource))
Expect(status).To(Equal(&core.Status{
State: 2,
Reason: trimmedErr,
ReportedBy: "test",
SubresourceStatuses: trimmedSubresourceStatuses,
}))
})

It("handles conflict", func() {
r1, err := mockResourceClient.Write(v1.NewMockResource("", "mocky"), clients.WriteOpts{})
Expect(err).NotTo(HaveOccurred())
Expand Down