From da25ac2e7f7eef8c2daf1f8b6dae34e780c7fd96 Mon Sep 17 00:00:00 2001 From: ahrav Date: Thu, 16 May 2024 14:38:50 -0700 Subject: [PATCH] remove redundant chunking (#2855) --- pkg/engine/engine.go | 64 +++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 33 deletions(-) diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 7c1e2ca1bb60..14228d133d67 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -611,43 +611,41 @@ func (e *Engine) detectorWorker(ctx context.Context) { // Reuse the same map to avoid allocations. const avgDetectorsPerChunk = 8 chunkSpecificDetectors := make(map[ahocorasick.DetectorKey]detectors.Detector, avgDetectorsPerChunk) - for originalChunk := range e.ChunksChan() { - for chunk := range sources.Chunker(originalChunk) { - atomic.AddUint64(&e.metrics.BytesScanned, uint64(len(chunk.Data))) - for _, decoder := range e.decoders { - decoded := decoder.FromChunk(chunk) - if decoded == nil { - ctx.Logger().V(4).Info("no decoder found for chunk", "chunk", chunk) - continue - } + for chunk := range e.ChunksChan() { + atomic.AddUint64(&e.metrics.BytesScanned, uint64(len(chunk.Data))) + for _, decoder := range e.decoders { + decoded := decoder.FromChunk(chunk) + if decoded == nil { + ctx.Logger().V(4).Info("no decoder found for chunk", "chunk", chunk) + continue + } - matchingDetectors := e.ahoCorasickCore.PopulateMatchingDetectors(string(decoded.Chunk.Data), chunkSpecificDetectors) - if len(chunkSpecificDetectors) > 1 && !e.verificationOverlap { - wgVerificationOverlap.Add(1) - e.verificationOverlapChunksChan <- verificationOverlapChunk{ - chunk: *decoded.Chunk, - detectors: matchingDetectors, - decoder: decoded.DecoderType, - verificationOverlapWgDoneFn: wgVerificationOverlap.Done, - } - // Empty the map. - for k := range chunkSpecificDetectors { - delete(chunkSpecificDetectors, k) - } - continue + matchingDetectors := e.ahoCorasickCore.PopulateMatchingDetectors(string(decoded.Chunk.Data), chunkSpecificDetectors) + if len(chunkSpecificDetectors) > 1 && !e.verificationOverlap { + wgVerificationOverlap.Add(1) + e.verificationOverlapChunksChan <- verificationOverlapChunk{ + chunk: *decoded.Chunk, + detectors: matchingDetectors, + decoder: decoded.DecoderType, + verificationOverlapWgDoneFn: wgVerificationOverlap.Done, } - - for k, detector := range chunkSpecificDetectors { - decoded.Chunk.Verify = e.verify - wgDetect.Add(1) - e.detectableChunksChan <- detectableChunk{ - chunk: *decoded.Chunk, - detector: detector, - decoder: decoded.DecoderType, - wgDoneFn: wgDetect.Done, - } + // Empty the map. + for k := range chunkSpecificDetectors { delete(chunkSpecificDetectors, k) } + continue + } + + for k, detector := range chunkSpecificDetectors { + decoded.Chunk.Verify = e.verify + wgDetect.Add(1) + e.detectableChunksChan <- detectableChunk{ + chunk: *decoded.Chunk, + detector: detector, + decoder: decoded.DecoderType, + wgDoneFn: wgDetect.Done, + } + delete(chunkSpecificDetectors, k) } } atomic.AddUint64(&e.metrics.ChunksScanned, 1)