From 26b04c1ab9b3f106dcac5ecaadc7917d9f111207 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Wed, 18 Nov 2020 10:34:48 +0000 Subject: [PATCH] [ML] Truncate long audit messages (#64849) Truncate ML audit messages at 8191 characters including the appended ".... (truncated)" text. --- .../notifications/AbstractAuditMessage.java | 41 ++++++++++- .../AbstractAuditMessageTests.java | 71 +++++++++++++++++++ 2 files changed, 111 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/notifications/AbstractAuditMessage.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/notifications/AbstractAuditMessage.java index 0948b8bce3104..1b57609189b7a 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/notifications/AbstractAuditMessage.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/common/notifications/AbstractAuditMessage.java @@ -28,6 +28,14 @@ public abstract class AbstractAuditMessage implements ToXContentObject { public static final ParseField NODE_NAME = new ParseField("node_name"); public static final ParseField JOB_TYPE = new ParseField("job_type"); + private static final String TRUNCATED_SUFFIX = "... (truncated)"; + /** + * The max length of an audit message in characters is 32766 / 4 = 8191 + * where 32766 is the limit in bytes Lucene sets for a term field + * and 4 is the max number of bytes required to represent a UTF8 character. + */ + public static final int MAX_AUDIT_MESSAGE_CHARS = 8191; + protected static final ConstructingObjectParser createParser( String name, AbstractAuditMessageFactory messageFactory, ParseField resourceField) { @@ -88,7 +96,13 @@ public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params par if (resourceId != null) { builder.field(getResourceField(), resourceId); } - builder.field(MESSAGE.getPreferredName(), message); + + if (message.length() > MAX_AUDIT_MESSAGE_CHARS) { + assert message.length() > MAX_AUDIT_MESSAGE_CHARS : "Audit message is unexpectedly large"; + builder.field(MESSAGE.getPreferredName(), truncateMessage(message, MAX_AUDIT_MESSAGE_CHARS)); + } else { + builder.field(MESSAGE.getPreferredName(), message); + } builder.field(LEVEL.getPreferredName(), level); builder.field(TIMESTAMP.getPreferredName(), timestamp.getTime()); if (nodeName != null) { @@ -134,4 +148,29 @@ public boolean equals(Object obj) { * @return resource id field name used when storing a new message */ protected abstract String getResourceField(); + + /** + * Truncate the message and append {@value #TRUNCATED_SUFFIX} so + * that the resulting string does not exceed {@code maxLength} characters + * + * {@code message} must be at least {@code maxLength} long + * + * @param message The message to truncate. Must have length of at least maxLength + * @param maxLength The length to truncate to + * @return The truncated string ending int {@value #TRUNCATED_SUFFIX} + */ + static String truncateMessage(String message, int maxLength) { + StringBuilder sb = new StringBuilder(maxLength); + sb.append(message, 0, maxLength - TRUNCATED_SUFFIX.length()); + int lastWhitespace = sb.lastIndexOf(" "); + if (lastWhitespace < 0) { + // no space char + lastWhitespace = maxLength - TRUNCATED_SUFFIX.length(); + } else { + lastWhitespace++; // point to next char which is a non-space char + } + sb.replace(lastWhitespace, lastWhitespace + TRUNCATED_SUFFIX.length(), TRUNCATED_SUFFIX); + sb.delete(lastWhitespace + TRUNCATED_SUFFIX.length(), sb.length()); + return sb.toString(); + } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/notifications/AbstractAuditMessageTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/notifications/AbstractAuditMessageTests.java index 275ae604e6946..e88909bb433a1 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/notifications/AbstractAuditMessageTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/common/notifications/AbstractAuditMessageTests.java @@ -6,13 +6,20 @@ package org.elasticsearch.xpack.core.common.notifications; import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.test.AbstractXContentTestCase; +import java.io.IOException; import java.util.Date; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.lessThanOrEqualTo; public class AbstractAuditMessageTests extends AbstractXContentTestCase { @@ -79,6 +86,70 @@ public void testNewError() { assertThat(message.getNodeName(), equalTo(NODE_NAME)); } + public void testLongMessageIsTruncated() throws IOException { + AbstractAuditMessage longMessage = new AbstractAuditMessage( + randomBoolean() ? null : randomAlphaOfLength(10), + "thisis17charslong".repeat(490), + randomFrom(Level.values()), + new Date(), + randomBoolean() ? null : randomAlphaOfLengthBetween(1, 20) + ) { + @Override + public String getJobType() { + return "unused"; + } + + @Override + protected String getResourceField() { + return "unused"; + } + }; + + assertThat(longMessage.getMessage().length(), greaterThan(AbstractAuditMessage.MAX_AUDIT_MESSAGE_CHARS)); + + // serialise the message and check the new message is truncated + XContentType xContentType = randomFrom(XContentType.values()); + BytesReference originalXContent = XContentHelper.toXContent(longMessage, xContentType, randomBoolean()); + XContentParser parser = createParser(XContentFactory.xContent(xContentType), originalXContent); + AbstractAuditMessage parsed = doParseInstance(parser); + assertThat(parsed.getMessage().length(), equalTo(AbstractAuditMessage.MAX_AUDIT_MESSAGE_CHARS)); + } + + public void testTruncateString() { + String message = "a short message short message short message short message short message"; + String truncated = AbstractAuditMessage.truncateMessage(message, 20); + assertEquals("a ... (truncated)", truncated); + assertThat(truncated.length(), lessThanOrEqualTo(20)); + + truncated = AbstractAuditMessage.truncateMessage(message, 23); + assertEquals("a short ... (truncated)", truncated); + assertThat(truncated.length(), lessThanOrEqualTo(23)); + + truncated = AbstractAuditMessage.truncateMessage(message, 31); + assertEquals("a short message ... (truncated)", truncated); + assertThat(truncated.length(), lessThanOrEqualTo(31)); + + truncated = AbstractAuditMessage.truncateMessage(message, 32); + assertEquals("a short message ... (truncated)", truncated); + assertThat(truncated.length(), lessThanOrEqualTo(32)); + } + + public void testTruncateString_noSpaceChar() { + String message = "ashortmessageshortmessageshortmessageshortmessageshortmessage"; + String truncated = AbstractAuditMessage.truncateMessage(message, 20); + assertEquals("ashor... (truncated)", truncated); + assertEquals(20, truncated.length()); + truncated = AbstractAuditMessage.truncateMessage(message, 25); + assertEquals("ashortmess... (truncated)", truncated); + assertEquals(25, truncated.length()); + } + + public void testTruncateString_tabsInsteadOfSpaces() { + String truncated = AbstractAuditMessage.truncateMessage("a\tshort\tmessage\tshort\tmessage", 25); + assertEquals("a\tshort\tme... (truncated)", truncated); + assertEquals(25, truncated.length()); + } + @Override protected TestAuditMessage doParseInstance(XContentParser parser) { return TestAuditMessage.PARSER.apply(parser, null);