-
Notifications
You must be signed in to change notification settings - Fork 2.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix MaxSilences limit causes incomplete updates of existing silences #3877
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -564,6 +564,13 @@ func (s *Silences) getSilence(id string) (*pb.Silence, bool) { | |
return msil.Silence, true | ||
} | ||
|
||
func (s *Silences) toMeshSilence(sil *pb.Silence) *pb.MeshSilence { | ||
return &pb.MeshSilence{ | ||
Silence: sil, | ||
ExpiresAt: sil.EndsAt.Add(s.retention), | ||
} | ||
} | ||
|
||
func (s *Silences) setSilence(sil *pb.Silence, now time.Time, skipValidate bool) error { | ||
sil.UpdatedAt = now | ||
|
||
|
@@ -573,10 +580,7 @@ func (s *Silences) setSilence(sil *pb.Silence, now time.Time, skipValidate bool) | |
} | ||
} | ||
|
||
msil := &pb.MeshSilence{ | ||
Silence: sil, | ||
ExpiresAt: sil.EndsAt.Add(s.retention), | ||
} | ||
msil := s.toMeshSilence(sil) | ||
b, err := marshalMeshSilence(msil) | ||
if err != nil { | ||
return err | ||
|
@@ -612,25 +616,27 @@ func (s *Silences) Set(sil *pb.Silence) error { | |
return ErrNotFound | ||
} | ||
|
||
if ok { | ||
if canUpdate(prev, sil, now) { | ||
return s.setSilence(sil, now, false) | ||
} | ||
if getState(prev, s.nowUTC()) != types.SilenceStateExpired { | ||
// We cannot update the silence, expire the old one. | ||
if err := s.expire(prev.Id); err != nil { | ||
return fmt.Errorf("expire previous silence: %w", err) | ||
} | ||
} | ||
if ok && canUpdate(prev, sil, now) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code should run before checking the |
||
return s.setSilence(sil, now, false) | ||
} | ||
|
||
// If we got here it's either a new silence or a replacing one. | ||
// If we got here it's either a new silence or a replacing one (which would | ||
// also create a new silence) so we need to make sure we have capacity for | ||
// the new silence. | ||
if s.limits.MaxSilences != nil { | ||
if m := s.limits.MaxSilences(); m > 0 && len(s.st)+1 > m { | ||
return fmt.Errorf("exceeded maximum number of silences: %d (limit: %d)", len(s.st), m) | ||
} | ||
} | ||
|
||
if ok && getState(prev, s.nowUTC()) != types.SilenceStateExpired { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The entire change here is moving the expiration of the existing silence to run after checking the |
||
// We cannot update the silence, expire the old one to leave a history of | ||
// the silence before modification. | ||
if err := s.expire(prev.Id); err != nil { | ||
return fmt.Errorf("expire previous silence: %w", err) | ||
} | ||
} | ||
|
||
uid, err := uuid.NewV4() | ||
if err != nil { | ||
return fmt.Errorf("generate uuid: %w", err) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ package silence | |
|
||
import ( | ||
"bytes" | ||
"fmt" | ||
"os" | ||
"runtime" | ||
"sort" | ||
|
@@ -470,32 +471,24 @@ func TestSilenceLimits(t *testing.T) { | |
EndsAt: time.Now().Add(5 * time.Minute), | ||
} | ||
require.NoError(t, s.Set(sil1)) | ||
require.NotEqual(t, "", sil1.Id) | ||
|
||
// Insert sil2 should fail because maximum number of silences | ||
// has been exceeded. | ||
// Insert sil2 should fail because maximum number of silences has been | ||
// exceeded. | ||
sil2 := &pb.Silence{ | ||
Matchers: []*pb.Matcher{{Name: "a", Pattern: "b"}}, | ||
Matchers: []*pb.Matcher{{Name: "c", Pattern: "d"}}, | ||
StartsAt: time.Now(), | ||
EndsAt: time.Now().Add(5 * time.Minute), | ||
} | ||
require.EqualError(t, s.Set(sil2), "exceeded maximum number of silences: 1 (limit: 1)") | ||
require.Equal(t, "", sil2.Id) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have removed all checks for |
||
|
||
// Expire sil1 and run the GC. This should allow sil2 to be | ||
// inserted. | ||
// Expire sil1 and run the GC. This should allow sil2 to be inserted. | ||
require.NoError(t, s.Expire(sil1.Id)) | ||
n, err := s.GC() | ||
require.NoError(t, err) | ||
require.Equal(t, 1, n) | ||
|
||
require.NoError(t, s.Set(sil2)) | ||
require.NotEqual(t, "", sil2.Id) | ||
|
||
// Should be able to update sil2 without hitting the limit. | ||
require.NoError(t, s.Set(sil2)) | ||
|
||
// Expire sil2. | ||
// Expire sil2 and run the GC. | ||
require.NoError(t, s.Expire(sil2.Id)) | ||
n, err = s.GC() | ||
require.NoError(t, err) | ||
|
@@ -505,26 +498,55 @@ func TestSilenceLimits(t *testing.T) { | |
sil3 := &pb.Silence{ | ||
Matchers: []*pb.Matcher{ | ||
{ | ||
Name: strings.Repeat("a", 2<<9), | ||
Pattern: strings.Repeat("b", 2<<9), | ||
Name: strings.Repeat("e", 2<<9), | ||
Pattern: strings.Repeat("f", 2<<9), | ||
}, | ||
{ | ||
Name: strings.Repeat("c", 2<<9), | ||
Pattern: strings.Repeat("d", 2<<9), | ||
Name: strings.Repeat("g", 2<<9), | ||
Pattern: strings.Repeat("h", 2<<9), | ||
}, | ||
}, | ||
CreatedBy: strings.Repeat("e", 2<<9), | ||
Comment: strings.Repeat("f", 2<<9), | ||
CreatedBy: strings.Repeat("i", 2<<9), | ||
Comment: strings.Repeat("j", 2<<9), | ||
StartsAt: time.Now(), | ||
EndsAt: time.Now().Add(5 * time.Minute), | ||
} | ||
err = s.Set(sil3) | ||
require.Error(t, err) | ||
// Do not check the exact size as it can change between consecutive runs | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed this. |
||
// due to padding. | ||
require.Contains(t, err.Error(), "silence exceeded maximum size") | ||
// TODO: Once we fix https://github.com/prometheus/alertmanager/issues/3878 this should be require.Equal. | ||
require.NotEqual(t, "", sil3.Id) | ||
require.EqualError(t, s.Set(sil3), fmt.Sprintf("silence exceeded maximum size: %d bytes (limit: 4096 bytes)", s.toMeshSilence(sil3).Size())) | ||
|
||
// Should be able to insert sil4. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added additional tests here to cover the following cases:
|
||
sil4 := &pb.Silence{ | ||
Matchers: []*pb.Matcher{{Name: "k", Pattern: "l"}}, | ||
StartsAt: time.Now(), | ||
EndsAt: time.Now().Add(5 * time.Minute), | ||
} | ||
require.NoError(t, s.Set(sil4)) | ||
|
||
// Should be able to update sil4 without modifications. It is expected to | ||
// keep the same ID. | ||
sil5 := cloneSilence(sil4) | ||
require.NoError(t, s.Set(sil5)) | ||
require.Equal(t, sil4.Id, sil5.Id) | ||
|
||
// Should be able to update the comment. It is also expected to keep the | ||
// same ID. | ||
sil6 := cloneSilence(sil5) | ||
sil6.Comment = "m" | ||
require.NoError(t, s.Set(sil6)) | ||
require.Equal(t, sil5.Id, sil6.Id) | ||
|
||
// Should not be able to update the start and end time as this requires | ||
// sil6 to be expired and a new silence to be created. However, this would | ||
// exceed the maximum number of silences, which counts both active and | ||
// expired silences. | ||
sil7 := cloneSilence(sil6) | ||
sil7.StartsAt = time.Now().Add(5 * time.Minute) | ||
sil7.EndsAt = time.Now().Add(10 * time.Minute) | ||
require.EqualError(t, s.Set(sil7), "exceeded maximum number of silences: 1 (limit: 1)") | ||
|
||
// sil6 should not be expired because the update failed. | ||
sil6, err = s.QueryOne(QIDs(sil6.Id)) | ||
require.NoError(t, err) | ||
require.Equal(t, types.SilenceStateActive, getState(sil6, s.nowUTC())) | ||
} | ||
|
||
func TestSilenceNoLimits(t *testing.T) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've moved this to a function for two reasons:
It allows us to check the full error in unit tests. For example:
We will use it in the next PR where we fix the same bug but for maximum size limits and validation errors.