Skip to content

Commit

Permalink
Lifecycler: return the number of healthy instances in lifecycler's zo…
Browse files Browse the repository at this point in the history
…ne (#266)

**What this PR does:**
This PR enriches `Lifecycler` with the `HealthyInstancesInZoneCount()` function, that returns the number of healthy instances in ring belonging to the lifecycler's zone, updated during the last heartbeat period.
A test (`TestLifecycler_InstancesInZoneCount`) showing the expected behaviour of `HealthyInstancesInZoneCount()` has been added.

**Which issue(s) this PR fixes:**
This PR is a pre-requisite for fixing the issue 4208.
  • Loading branch information
duricanikolic authored Feb 15, 2023
1 parent 02a38c3 commit b6e015a
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@
* `<prefix>_cache_operation_skipped_total{backend="[memcached|redis]",...}`
* `<prefix>_cache_operations_total{backend="[memcached|redis]",...}`
* `<prefix>_cache_requests_total{backend="[memcached|redis]",...}`
* [ENHANCEMENT] Lifecycler: Added `HealthyInstancesInZoneCount` method returning the number of healthy instances in the ring that are registered in lifecycler's zone, updated during the last heartbeat period. #266
* [BUGFIX] spanlogger: Support multiple tenant IDs. #59
* [BUGFIX] Memberlist: fixed corrupted packets when sending compound messages with more than 255 messages or messages bigger than 64KB. #85
* [BUGFIX] Ring: `ring_member_ownership_percent` and `ring_tokens_owned` metrics are not updated on scale down. #109
Expand Down
24 changes: 19 additions & 5 deletions ring/lifecycler.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,10 @@ type Lifecycler struct {
readySince time.Time

// Keeps stats updated at every heartbeat period
countersLock sync.RWMutex
healthyInstancesCount int
zonesCount int
countersLock sync.RWMutex
healthyInstancesCount int
healthyInstancesInZoneCount int
zonesCount int

lifecyclerMetrics *LifecyclerMetrics
logger log.Logger
Expand Down Expand Up @@ -383,6 +384,15 @@ func (i *Lifecycler) HealthyInstancesCount() int {
return i.healthyInstancesCount
}

// HealthyInstancesInZoneCount returns the number of instances in the ring that are registered in
// this lifecycler's zone, updated during the last heartbeat period.
func (i *Lifecycler) HealthyInstancesInZoneCount() int {
i.countersLock.RLock()
defer i.countersLock.RUnlock()

return i.healthyInstancesInZoneCount
}

// ZonesCount returns the number of zones for which there's at least 1 instance registered
// in the ring.
func (i *Lifecycler) ZonesCount() int {
Expand Down Expand Up @@ -795,24 +805,28 @@ func (i *Lifecycler) changeState(ctx context.Context, state InstanceState) error

func (i *Lifecycler) updateCounters(ringDesc *Desc) {
healthyInstancesCount := 0
zones := map[string]struct{}{}
zones := map[string]int{}

if ringDesc != nil {
now := time.Now()

for _, ingester := range ringDesc.Ingesters {
zones[ingester.Zone] = struct{}{}
if _, ok := zones[ingester.Zone]; !ok {
zones[ingester.Zone] = 0
}

// Count the number of healthy instances for Write operation.
if ingester.IsHealthy(Write, i.cfg.RingConfig.HeartbeatTimeout, now) {
healthyInstancesCount++
zones[ingester.Zone]++
}
}
}

// Update counters
i.countersLock.Lock()
i.healthyInstancesCount = healthyInstancesCount
i.healthyInstancesInZoneCount = zones[i.cfg.Zone]
i.zonesCount = len(zones)
i.countersLock.Unlock()
}
Expand Down
107 changes: 107 additions & 0 deletions ring/lifecycler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,113 @@ func TestLifecycler_HealthyInstancesCount(t *testing.T) {
})
}

func TestLifecycler_InstancesInZoneCount(t *testing.T) {
ringStore, closer := consul.NewInMemoryClient(GetCodec(), log.NewNopLogger(), nil)
t.Cleanup(func() { assert.NoError(t, closer.Close()) })

var ringConfig Config
flagext.DefaultValues(&ringConfig)
ringConfig.KVStore.Mock = ringStore

instances := []struct {
zone string
healthy bool
expectedHealthyInstancesInZoneCount int
expectedZonesCount int
}{
{
zone: "zone-a",
healthy: true,
// after adding a healthy instance in zone-a, expectedHealthyInstancesInZoneCount in zone-a becomes 1
expectedHealthyInstancesInZoneCount: 1,
// after adding a healthy instance in zone-a, expectedZonesCount is 1
expectedZonesCount: 1,
},
{
zone: "zone-a",
healthy: false,
// after adding an unhealthy instance in zone-a, expectedHealthyInstancesInZoneCount in zone-a remains 1
expectedHealthyInstancesInZoneCount: 1,
// zone-a was already added, so expectedZonesCount remains 1
expectedZonesCount: 1,
},
{
zone: "zone-a",
healthy: true,
// after adding a healthy instance in zone-a, expectedHealthyInstancesInZoneCount in zone-a becomes 2
expectedHealthyInstancesInZoneCount: 2,
// zone-a was already added, so expectedZonesCount remains 1
expectedZonesCount: 1,
},
{
zone: "zone-b",
healthy: true,
// after adding a healthy instance in zone-b, expectedHealthyInstancesInZoneCount in zone-b becomes 1
expectedHealthyInstancesInZoneCount: 1,
// after adding a healthy instance in zone-b, expectedZonesCount becomes 2
expectedZonesCount: 2,
},
{
zone: "zone-c",
healthy: false,
// after adding an unhealthy instance in zone-c, expectedHealthyInstancesInZoneCount in zone-c remains 0
expectedHealthyInstancesInZoneCount: 0,
// after adding an unhealthy instance in zone-c, expectedZonesCount becomes 3
expectedZonesCount: 3,
},
{
zone: "zone-c",
healthy: true,
// after adding a healthy instance in zone-c, expectedHealthyInstancesInZoneCount in zone-c becomes 1
expectedHealthyInstancesInZoneCount: 1,
// zone-c was already added, so expectedZonesCount remains 3
expectedZonesCount: 3,
},
{
zone: "zone-b",
healthy: true,
// after adding a healthy instance in zone-b, expectedHealthyInstancesInZoneCount in zone-b becomes 2
expectedHealthyInstancesInZoneCount: 2,
// zone-b was already added, so expectedZonesCount remains 3
expectedZonesCount: 3,
},
}

expectedHealthInstancesCounter := 0
for idx, instance := range instances {
ctx := context.Background()

// Register an instance to the ring.
cfg := testLifecyclerConfig(ringConfig, fmt.Sprintf("instance-%d", idx))
cfg.HeartbeatPeriod = 100 * time.Millisecond
joinWaitMs := 1000
// unhealthy instances join the ring after 1min (60000ms), which exceeds the 1000ms waiting time
joinAfterMs := 60000
if instance.healthy {
expectedHealthInstancesCounter++
// healthy instances join after 100ms, which is within the 1000ms timeout
joinAfterMs = 100
}
cfg.JoinAfter = time.Duration(joinAfterMs) * time.Millisecond
cfg.Zone = instance.zone

lifecycler, err := NewLifecycler(cfg, &nopFlushTransferer{}, "instance", ringKey, true, log.NewNopLogger(), nil)
require.NoError(t, err)
assert.Equal(t, 0, lifecycler.HealthyInstancesInZoneCount())

require.NoError(t, services.StartAndAwaitRunning(ctx, lifecycler))
defer services.StopAndAwaitTerminated(ctx, lifecycler) // nolint:errcheck

// Wait until joined.
test.Poll(t, time.Duration(joinWaitMs)*time.Millisecond, expectedHealthInstancesCounter, func() interface{} {
return lifecycler.HealthyInstancesCount()
})

require.Equal(t, instance.expectedHealthyInstancesInZoneCount, lifecycler.HealthyInstancesInZoneCount())
require.Equal(t, instance.expectedZonesCount, lifecycler.ZonesCount())
}
}

func TestLifecycler_ZonesCount(t *testing.T) {
ringStore, closer := consul.NewInMemoryClient(GetCodec(), log.NewNopLogger(), nil)
t.Cleanup(func() { assert.NoError(t, closer.Close()) })
Expand Down

0 comments on commit b6e015a

Please sign in to comment.