From 1bb65d64b4ebb664806da30a8f899cfcb77cdee3 Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Mon, 8 Apr 2024 08:58:07 +0000 Subject: [PATCH] [8.13](backport #38561) [libbeat] Cache processor docs and memory fixes. (#38745) Change cache processor documentation from write_period to write_interval. Fix expires heap cleanup on partial file writes: popping the expiries heap completely on every partial write caused a panic on the following put operations at the time of expiry check. Fix expires infinite growth when large TTLs and recurring keys are cached: when TTLs are large and keys are recurrent, we kept track of an arbitrary amount of expiry heap entries for the same key, causing memory to grow. (cherry picked from commit b1e4abc4b052bbd8533b3622746fe527f285f156) --------- Co-authored-by: Marc Guasch --- CHANGELOG.next.asciidoc | 3 + libbeat/processors/cache/docs/cache.asciidoc | 2 +- libbeat/processors/cache/file_store.go | 4 +- libbeat/processors/cache/file_store_test.go | 103 +++++++++++++++++++ libbeat/processors/cache/mem_store.go | 20 ++-- libbeat/processors/cache/mem_store_test.go | 97 +++++++++++++++++ 6 files changed, 220 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 4ecb1a9827d..068315a7f47 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -53,6 +53,9 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff] - Upgraded apache arrow library used in x-pack/libbeat/reader/parquet from v11 to v12.0.1 in order to fix cross-compilation issues {pull}35640[35640] - Fix panic when MaxRetryInterval is specified, but RetryInterval is not {pull}35820[35820] - Support build of projects outside of beats directory {pull}36126[36126] +- Change cache processor documentation from `write_period` to `write_interval`. {pull}38561[38561] +- Fix cache processor expiries heap cleanup on partial file writes. {pull}38561[38561] +- Fix cache processor expiries infinite growth when large a large TTL is used and recurring keys are cached. {pull}38561[38561] *Auditbeat* diff --git a/libbeat/processors/cache/docs/cache.asciidoc b/libbeat/processors/cache/docs/cache.asciidoc index 6c5ccf2d197..bdd9629dea2 100644 --- a/libbeat/processors/cache/docs/cache.asciidoc +++ b/libbeat/processors/cache/docs/cache.asciidoc @@ -54,7 +54,7 @@ One of `backend.memory.id` or `backend.file.id` must be provided. `backend.capacity`:: The number of elements that can be stored in the cache. `put` operations that would cause the capacity to be exceeded will result in evictions of the oldest elements. Values at or below zero indicate no limit. The capacity should not be lower than the number of elements that are expected to be referenced when processing the input as evicted elements are lost. The default is `0`, no limit. `backend.memory.id`:: The ID of a memory-based cache. Use the same ID across instance to reference the same cache. `backend.file.id`:: The ID of a file-based cache. Use the same ID across instance to reference the same cache. -`backend.file.write_period`:: The interval between periodic cache writes to the backing file. Valid time units are h, m, s, ms, us/µs and ns. Periodic writes are only made if `backend.file.write_period` is greater than zero. The contents are always written out to the backing file when the processor is closed. Default is zero, no periodic writes. +`backend.file.write_interval`:: The interval between periodic cache writes to the backing file. Valid time units are h, m, s, ms, us/µs and ns. Periodic writes are only made if `backend.file.write_interval` is greater than zero. The contents are always written out to the backing file when the processor is closed. Default is zero, no periodic writes. One of `put`, `get` or `delete` must be provided. diff --git a/libbeat/processors/cache/file_store.go b/libbeat/processors/cache/file_store.go index 1ab4ab21ae4..701410e1710 100644 --- a/libbeat/processors/cache/file_store.go +++ b/libbeat/processors/cache/file_store.go @@ -283,8 +283,8 @@ func (c *fileStore) writeState(final bool) { enc := json.NewEncoder(f) enc.SetEscapeHTML(false) now := time.Now() - for c.expiries.Len() != 0 { - e := c.expiries.pop() + for i := 0; i < c.expiries.Len(); i++ { + e := c.expiries[i] if e.Expires.Before(now) { // Don't write expired elements. continue diff --git a/libbeat/processors/cache/file_store_test.go b/libbeat/processors/cache/file_store_test.go index 22d43083e04..163899eca0b 100644 --- a/libbeat/processors/cache/file_store_test.go +++ b/libbeat/processors/cache/file_store_test.go @@ -352,6 +352,109 @@ var fileStoreTests = []struct { {Key: "three", Value: 3.0}, }, }, + { + name: "periodic_write", + cfg: config{ + Store: &storeConfig{ + File: &fileConfig{ID: "test"}, + Capacity: 1000, + Effort: 10, + }, + Get: &getConfig{}, + }, + want: &fileStore{path: "testdata/periodic_write", memStore: memStore{ + id: "test", + cache: map[string]*CacheEntry{}, + refs: 1, + // TTL, capacity and effort are set only by put. + ttl: -1, + cap: -1, + effort: -1, + }}, + steps: []fileStoreTestSteps{ + 0: { + doTo: func(s *fileStore) error { + putCfg := config{ + Store: &storeConfig{ + File: &fileConfig{ID: "test"}, + Capacity: 1000, + Effort: 10, + }, + Put: &putConfig{ + TTL: ptrTo(time.Second), + }, + } + s.add(putCfg) + return nil + }, + want: &fileStore{path: "testdata/periodic_write", memStore: memStore{ + id: "test", + cache: map[string]*CacheEntry{}, + refs: 2, + dirty: false, + ttl: time.Second, + cap: 1000, + effort: 10, + }}, + }, + 1: { + doTo: func(s *fileStore) error { + s.Put("one", 1) + s.Put("two", 2) + s.Put("three", 3) + return nil + }, + want: &fileStore{path: "testdata/periodic_write", memStore: memStore{ + id: "test", + cache: map[string]*CacheEntry{ + "one": {Key: "one", Value: int(1), index: 0}, + "two": {Key: "two", Value: int(2), index: 1}, + "three": {Key: "three", Value: int(3), index: 2}, + }, + expiries: expiryHeap{ + {Key: "one", Value: int(1), index: 0}, + {Key: "two", Value: int(2), index: 1}, + {Key: "three", Value: int(3), index: 2}, + }, + refs: 2, + dirty: true, + ttl: time.Second, + cap: 1000, + effort: 10, + }}, + }, + 2: { + doTo: func(s *fileStore) error { + s.writeState(false) + return nil + }, + want: &fileStore{path: "testdata/periodic_write", memStore: memStore{ + id: "test", + cache: map[string]*CacheEntry{ + "one": {Key: "one", Value: int(1), index: 0}, + "two": {Key: "two", Value: int(2), index: 1}, + "three": {Key: "three", Value: int(3), index: 2}, + }, + expiries: expiryHeap{ + {Key: "one", Value: int(1), index: 0}, + {Key: "two", Value: int(2), index: 1}, + {Key: "three", Value: int(3), index: 2}, + }, + refs: 2, + dirty: false, + ttl: time.Second, + cap: 1000, + effort: 10, + }}, + }, + }, + wantPersisted: []*CacheEntry{ + // Numeric values are float due to JSON round-trip. + {Key: "one", Value: 1.0}, + {Key: "two", Value: 2.0}, + {Key: "three", Value: 3.0}, + }, + }, } func TestFileStore(t *testing.T) { diff --git a/libbeat/processors/cache/mem_store.go b/libbeat/processors/cache/mem_store.go index a2fec841d27..09ea2c81f53 100644 --- a/libbeat/processors/cache/mem_store.go +++ b/libbeat/processors/cache/mem_store.go @@ -172,13 +172,21 @@ func (c *memStore) Put(key string, val any) error { defer c.mu.Unlock() now := time.Now() c.evictExpired(now) - e := &CacheEntry{ - Key: key, - Value: val, - Expires: now.Add(c.ttl), + // If the key is being overwritten we remove its previous expiry entry + // this will prevent expiries heap to grow with large TTLs and recurring keys. + if prev, found := c.cache[key]; found { + prev.Value = val + prev.Expires = now.Add(c.ttl) + heap.Fix(&c.expiries, prev.index) + } else { + e := &CacheEntry{ + Key: key, + Value: val, + Expires: now.Add(c.ttl), + } + c.cache[key] = e + heap.Push(&c.expiries, e) } - c.cache[key] = e - heap.Push(&c.expiries, e) c.dirty = true return nil } diff --git a/libbeat/processors/cache/mem_store_test.go b/libbeat/processors/cache/mem_store_test.go index 4a6cf500e05..d6ca53694ce 100644 --- a/libbeat/processors/cache/mem_store_test.go +++ b/libbeat/processors/cache/mem_store_test.go @@ -335,6 +335,103 @@ var memStoreTests = []struct { }, }, }, + { + name: "re-hit", + cfg: config{ + Store: &storeConfig{ + Memory: &memConfig{"test"}, + Capacity: 1000, + Effort: 10, + }, + Get: &getConfig{}, + }, + want: &memStore{ + id: "test", + cache: map[string]*CacheEntry{}, + refs: 1, + // TTL, capacity and effort are set only by put. + ttl: -1, + cap: -1, + effort: -1, + }, + steps: []memStoreTestSteps{ + 0: { + doTo: func(s *memStore) error { + putCfg := config{ + Store: &storeConfig{ + Memory: &memConfig{"test"}, + Capacity: 1000, + Effort: 10, + }, + Put: &putConfig{ + TTL: ptrTo(10 * time.Minute), + }, + } + s.add(putCfg) + return nil + }, + want: &memStore{ + id: "test", + cache: map[string]*CacheEntry{}, + refs: 2, + dirty: false, + ttl: 10 * time.Minute, + cap: 1000, + effort: 10, + }, + }, + 1: { + doTo: func(s *memStore) error { + s.Put("one", 1) + s.Put("two", 2) + s.Put("three", 3) + return nil + }, + want: &memStore{ + id: "test", + cache: map[string]*CacheEntry{ + "one": {Key: "one", Value: int(1), index: 0}, + "two": {Key: "two", Value: int(2), index: 1}, + "three": {Key: "three", Value: int(3), index: 2}, + }, + expiries: expiryHeap{ + {Key: "one", Value: int(1), index: 0}, + {Key: "two", Value: int(2), index: 1}, + {Key: "three", Value: int(3), index: 2}, + }, + refs: 2, + dirty: true, + ttl: 10 * time.Minute, + cap: 1000, + effort: 10, + }, + }, + 2: { + doTo: func(s *memStore) error { + s.Put("one", 1) + return nil + }, + want: &memStore{ + id: "test", + cache: map[string]*CacheEntry{ + "one": {Key: "one", Value: int(1), index: 1}, + "two": {Key: "two", Value: int(2), index: 0}, + "three": {Key: "three", Value: int(3), index: 2}, + }, + expiries: expiryHeap{ + {Key: "two", Value: int(2), index: 0}, + {Key: "one", Value: int(1), index: 1}, + {Key: "three", Value: int(3), index: 2}, + }, + refs: 2, + dirty: true, + ttl: 10 * time.Minute, + cap: 1000, + effort: 10, + }, + }, + }, + }, } func TestMemStore(t *testing.T) {