-
Notifications
You must be signed in to change notification settings - Fork 24.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix circuit breaker leak in MultiTerms aggregation #79362
Conversation
Pinging @elastic/es-analytics-geo (Team:Analytics) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM in the sense that (1) the tests fail iff the fix is reverted and (2) the script I had to reproduce the problem no longer exhibits a leak.
DELETE /test
# 200 OK
# {
# "acknowledged": true
# }
PUT /test
{
"mappings": {
"properties": {
"field1": {
"type": "keyword"
},
"field2": {
"type": "keyword"
}
}
},
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
}
}
# 200 OK
# {
# "shards_acknowledged": true,
# "acknowledged": true,
# "index": "test"
# }
POST /test/_bulk
{"index":{}}
{"field1":"aaa","field2":"bb"}
{"index":{}}
{"field1":"aaa","field2":"cc"}
{"index":{}}
{"field1":"aaa","field2":"dd"}
{"index":{}}
{"field1":"aaa","field2":"ee"}
{"index":{}}
{"field1":"aaa","field2":"ff"}
{"index":{}}
{"field1":"aaa","field2":"gg"}
{"index":{}}
{"field1":"aaa","field2":"hh"}
{"index":{}}
{"field1":"aaa","field2":"jj"}
{"index":{}}
{"field1":"aaa","field2":"kk"}
{"index":{}}
{"field1":"aaa","field2":"ll"}
# 200 OK
# {
# "took": 87,
# "items": [
# {
# "index": {
# "status": 201,
# "_type": "_doc",
# "_primary_term": 1,
# "_id": "PR50k3wBIyZAxwBsvzs1",
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "_index": "test",
# "result": "created",
# "_version": 1,
# "_seq_no": 0
# }
# },
# {
# "index": {
# "status": 201,
# "_type": "_doc",
# "_primary_term": 1,
# "_id": "Ph50k3wBIyZAxwBsvzs3",
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "_index": "test",
# "result": "created",
# "_version": 1,
# "_seq_no": 1
# }
# },
# {
# "index": {
# "status": 201,
# "_type": "_doc",
# "_primary_term": 1,
# "_id": "Px50k3wBIyZAxwBsvzs3",
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "_index": "test",
# "result": "created",
# "_version": 1,
# "_seq_no": 2
# }
# },
# {
# "index": {
# "status": 201,
# "_type": "_doc",
# "_primary_term": 1,
# "_id": "QB50k3wBIyZAxwBsvzs3",
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "_index": "test",
# "result": "created",
# "_version": 1,
# "_seq_no": 3
# }
# },
# {
# "index": {
# "status": 201,
# "_type": "_doc",
# "_primary_term": 1,
# "_id": "QR50k3wBIyZAxwBsvzs3",
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "_index": "test",
# "result": "created",
# "_version": 1,
# "_seq_no": 4
# }
# },
# {
# "index": {
# "status": 201,
# "_type": "_doc",
# "_primary_term": 1,
# "_id": "Qh50k3wBIyZAxwBsvzs3",
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "_index": "test",
# "result": "created",
# "_version": 1,
# "_seq_no": 5
# }
# },
# {
# "index": {
# "status": 201,
# "_type": "_doc",
# "_primary_term": 1,
# "_id": "Qx50k3wBIyZAxwBsvzs3",
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "_index": "test",
# "result": "created",
# "_version": 1,
# "_seq_no": 6
# }
# },
# {
# "index": {
# "status": 201,
# "_type": "_doc",
# "_primary_term": 1,
# "_id": "RB50k3wBIyZAxwBsvzs3",
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "_index": "test",
# "result": "created",
# "_version": 1,
# "_seq_no": 7
# }
# },
# {
# "index": {
# "status": 201,
# "_type": "_doc",
# "_primary_term": 1,
# "_id": "RR50k3wBIyZAxwBsvzs3",
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "_index": "test",
# "result": "created",
# "_version": 1,
# "_seq_no": 8
# }
# },
# {
# "index": {
# "status": 201,
# "_type": "_doc",
# "_primary_term": 1,
# "_id": "Rh50k3wBIyZAxwBsvzs3",
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "_index": "test",
# "result": "created",
# "_version": 1,
# "_seq_no": 9
# }
# }
# ],
# "errors": false
# }
POST /test/_refresh
# 200 OK
# {
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# }
# }
GET /_nodes/stats/breaker?filter_path=nodes.*.breakers.request
# 200 OK
# {
# "nodes": {
# "zKIBvy-7QpKw-Lizpwi0vA": {
# "breakers": {
# "request": {
# "estimated_size": "0b",
# "overhead": 1.0,
# "estimated_size_in_bytes": 0,
# "limit_size": "614.3mb",
# "tripped": 0,
# "limit_size_in_bytes": 644245094
# }
# }
# }
# }
# }
POST /_cache/clear
# 200 OK
# {
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# }
# }
GET /test/_search
{
"size": 0,
"aggs": {
"test": {
"multi_terms": {
"size": 1,
"terms": [
{
"field": "field1"
},
{
"field": "field2"
}
]
}
}
}
}
# 200 OK
# {
# "aggregations": {
# "test": {
# "buckets": [
# {
# "doc_count": 1,
# "key_as_string": "aaa|bb",
# "key": [
# "aaa",
# "bb"
# ]
# }
# ],
# "doc_count_error_upper_bound": 0,
# "sum_other_doc_count": 9
# }
# },
# "took": 26,
# "_shards": {
# "skipped": 0,
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "timed_out": false,
# "hits": {
# "max_score": null,
# "total": {
# "value": 10,
# "relation": "eq"
# },
# "hits": []
# }
# }
GET /_nodes/stats/breaker?filter_path=nodes.*.breakers.request
# 200 OK
# {
# "nodes": {
# "zKIBvy-7QpKw-Lizpwi0vA": {
# "breakers": {
# "request": {
# "estimated_size": "744b",
# "overhead": 1.0,
# "estimated_size_in_bytes": 744,
# "limit_size": "614.3mb",
# "tripped": 0,
# "limit_size_in_bytes": 644245094
# }
# }
# }
# }
# }
POST /_cache/clear
# 200 OK
# {
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# }
# }
GET /test/_search
{
"size": 0,
"aggs": {
"test": {
"multi_terms": {
"size": 1,
"terms": [
{
"field": "field1"
},
{
"field": "field2"
}
]
}
}
}
}
# 200 OK
# {
# "aggregations": {
# "test": {
# "buckets": [
# {
# "doc_count": 1,
# "key_as_string": "aaa|bb",
# "key": [
# "aaa",
# "bb"
# ]
# }
# ],
# "doc_count_error_upper_bound": 0,
# "sum_other_doc_count": 9
# }
# },
# "took": 3,
# "_shards": {
# "skipped": 0,
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "timed_out": false,
# "hits": {
# "max_score": null,
# "total": {
# "value": 10,
# "relation": "eq"
# },
# "hits": []
# }
# }
GET /_nodes/stats/breaker?filter_path=nodes.*.breakers.request
# 200 OK
# {
# "nodes": {
# "zKIBvy-7QpKw-Lizpwi0vA": {
# "breakers": {
# "request": {
# "estimated_size": "1.4kb",
# "overhead": 1.0,
# "estimated_size_in_bytes": 1488,
# "limit_size": "614.3mb",
# "tripped": 0,
# "limit_size_in_bytes": 644245094
# }
# }
# }
# }
# }
Leaving it to Igor for a final look tho, this isn't code which which I'm familiar.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍
I do think it'd be good to randomize whether or not we pre-allocate AggregatorTestCase
. I'll bet we'd have caught this earlier if we did that.
@nik9000 I added logic in AggregatorTestCase to randomise preallocation. |
@@ -471,7 +471,7 @@ protected ScriptService getMockScriptService() { | |||
indexSettings, | |||
query, | |||
breakerService, | |||
builder.bytesToPreallocate(), | |||
randomBoolean() ? 0 : builder.bytesToPreallocate(), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nice
@@ -105,7 +105,7 @@ protected CategorizeTextAggregator( | |||
@Override | |||
protected void doClose() { | |||
super.doClose(); | |||
Releasables.close(this.analyzer, this.bytesRefHash); | |||
Releasables.close(this.analyzer, this.bytesRefHash, bucketOrds); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@benwtrent looks like you had a leak that I didn't catch. I'm quite thankful for leak detecting test cases.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for fixing CategorizeTextAggregator.java as well!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great catch! Thanks!
@elasticmachine update branch |
@elasticmachine run elasticsearch-ci/part-1 |
The MultiTermsAggregator creates a BytesKeyedBucketOrds that never gets closed and therefore it might leak the memory allocated into the circuit breaker.
💔 Backport failed
You can use sqren/backport to manually backport by running |
The MultiTermsAggregator creates a BytesKeyedBucketOrds that never gets closed and therefore it might leak the memory allocated into the circuit breaker.
* upstream/master: (34 commits) Add extensionName() to security extension (elastic#79329) More robust and consistent allowAll indicesAccessControl (elastic#79415) Fix circuit breaker leak in MultiTerms aggregation (elastic#79362) guard geoline aggregation from parents aggegator that emit empty buckets (elastic#79129) Vector tiles: increase the size of the envelope used to clip geometries (elastic#79030) Revert "[ML] Add queue_capacity setting to start deployment API (elastic#79369)" (elastic#79374) Convert token service license object to LicensedFeature (elastic#79284) [TEST] Fix ShardPathTests for MDP (elastic#79393) Fix fleet search API with no checkpints (elastic#79400) Reduce BWC version for transient settings (elastic#79396) EQL: Rename a test class for eclipse (elastic#79254) Use search_coordination threadpool in field caps (elastic#79378) Use query param instead of a system property for opting in for new cluster health response code (elastic#79351) Add new kNN search endpoint (elastic#79013) Disable BWC tests Convert auditing license object to LicensedFeature (elastic#79280) Update BWC versions after backport of elastic#78551 Enable InstantiatingObjectParser to pass context as a first argument (elastic#79206) Move xcontent filtering tests (elastic#79298) Update links to Fleet/Agent docs (elastic#79303) ...
The MultiTermsAggregator creates a BytesKeyedBucketOrds that never gets closed and therefore it might leak the memory allocated into the circuit breaker.