Skip to content

Commit

Permalink
[ML] Truncate long audit messages (elastic#64849)
Browse files Browse the repository at this point in the history
Truncate ML audit messages at 8191 characters including the appended
".... (truncated)" text.
  • Loading branch information
davidkyle authored Nov 18, 2020
1 parent ba5f454 commit 26b04c1
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ public abstract class AbstractAuditMessage implements ToXContentObject {
public static final ParseField NODE_NAME = new ParseField("node_name");
public static final ParseField JOB_TYPE = new ParseField("job_type");

private static final String TRUNCATED_SUFFIX = "... (truncated)";
/**
* The max length of an audit message in characters is 32766 / 4 = 8191
* where 32766 is the limit in bytes Lucene sets for a term field
* and 4 is the max number of bytes required to represent a UTF8 character.
*/
public static final int MAX_AUDIT_MESSAGE_CHARS = 8191;

protected static final <T extends AbstractAuditMessage> ConstructingObjectParser<T, Void> createParser(
String name, AbstractAuditMessageFactory<T> messageFactory, ParseField resourceField) {

Expand Down Expand Up @@ -88,7 +96,13 @@ public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params par
if (resourceId != null) {
builder.field(getResourceField(), resourceId);
}
builder.field(MESSAGE.getPreferredName(), message);

if (message.length() > MAX_AUDIT_MESSAGE_CHARS) {
assert message.length() > MAX_AUDIT_MESSAGE_CHARS : "Audit message is unexpectedly large";
builder.field(MESSAGE.getPreferredName(), truncateMessage(message, MAX_AUDIT_MESSAGE_CHARS));
} else {
builder.field(MESSAGE.getPreferredName(), message);
}
builder.field(LEVEL.getPreferredName(), level);
builder.field(TIMESTAMP.getPreferredName(), timestamp.getTime());
if (nodeName != null) {
Expand Down Expand Up @@ -134,4 +148,29 @@ public boolean equals(Object obj) {
* @return resource id field name used when storing a new message
*/
protected abstract String getResourceField();

/**
* Truncate the message and append {@value #TRUNCATED_SUFFIX} so
* that the resulting string does not exceed {@code maxLength} characters
*
* {@code message} must be at least {@code maxLength} long
*
* @param message The message to truncate. Must have length of at least maxLength
* @param maxLength The length to truncate to
* @return The truncated string ending int {@value #TRUNCATED_SUFFIX}
*/
static String truncateMessage(String message, int maxLength) {
StringBuilder sb = new StringBuilder(maxLength);
sb.append(message, 0, maxLength - TRUNCATED_SUFFIX.length());
int lastWhitespace = sb.lastIndexOf(" ");
if (lastWhitespace < 0) {
// no space char
lastWhitespace = maxLength - TRUNCATED_SUFFIX.length();
} else {
lastWhitespace++; // point to next char which is a non-space char
}
sb.replace(lastWhitespace, lastWhitespace + TRUNCATED_SUFFIX.length(), TRUNCATED_SUFFIX);
sb.delete(lastWhitespace + TRUNCATED_SUFFIX.length(), sb.length());
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,20 @@
package org.elasticsearch.xpack.core.common.notifications;

import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.test.AbstractXContentTestCase;

import java.io.IOException;
import java.util.Date;

import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.lessThanOrEqualTo;

public class AbstractAuditMessageTests extends AbstractXContentTestCase<AbstractAuditMessageTests.TestAuditMessage> {

Expand Down Expand Up @@ -79,6 +86,70 @@ public void testNewError() {
assertThat(message.getNodeName(), equalTo(NODE_NAME));
}

public void testLongMessageIsTruncated() throws IOException {
AbstractAuditMessage longMessage = new AbstractAuditMessage(
randomBoolean() ? null : randomAlphaOfLength(10),
"thisis17charslong".repeat(490),
randomFrom(Level.values()),
new Date(),
randomBoolean() ? null : randomAlphaOfLengthBetween(1, 20)
) {
@Override
public String getJobType() {
return "unused";
}

@Override
protected String getResourceField() {
return "unused";
}
};

assertThat(longMessage.getMessage().length(), greaterThan(AbstractAuditMessage.MAX_AUDIT_MESSAGE_CHARS));

// serialise the message and check the new message is truncated
XContentType xContentType = randomFrom(XContentType.values());
BytesReference originalXContent = XContentHelper.toXContent(longMessage, xContentType, randomBoolean());
XContentParser parser = createParser(XContentFactory.xContent(xContentType), originalXContent);
AbstractAuditMessage parsed = doParseInstance(parser);
assertThat(parsed.getMessage().length(), equalTo(AbstractAuditMessage.MAX_AUDIT_MESSAGE_CHARS));
}

public void testTruncateString() {
String message = "a short message short message short message short message short message";
String truncated = AbstractAuditMessage.truncateMessage(message, 20);
assertEquals("a ... (truncated)", truncated);
assertThat(truncated.length(), lessThanOrEqualTo(20));

truncated = AbstractAuditMessage.truncateMessage(message, 23);
assertEquals("a short ... (truncated)", truncated);
assertThat(truncated.length(), lessThanOrEqualTo(23));

truncated = AbstractAuditMessage.truncateMessage(message, 31);
assertEquals("a short message ... (truncated)", truncated);
assertThat(truncated.length(), lessThanOrEqualTo(31));

truncated = AbstractAuditMessage.truncateMessage(message, 32);
assertEquals("a short message ... (truncated)", truncated);
assertThat(truncated.length(), lessThanOrEqualTo(32));
}

public void testTruncateString_noSpaceChar() {
String message = "ashortmessageshortmessageshortmessageshortmessageshortmessage";
String truncated = AbstractAuditMessage.truncateMessage(message, 20);
assertEquals("ashor... (truncated)", truncated);
assertEquals(20, truncated.length());
truncated = AbstractAuditMessage.truncateMessage(message, 25);
assertEquals("ashortmess... (truncated)", truncated);
assertEquals(25, truncated.length());
}

public void testTruncateString_tabsInsteadOfSpaces() {
String truncated = AbstractAuditMessage.truncateMessage("a\tshort\tmessage\tshort\tmessage", 25);
assertEquals("a\tshort\tme... (truncated)", truncated);
assertEquals(25, truncated.length());
}

@Override
protected TestAuditMessage doParseInstance(XContentParser parser) {
return TestAuditMessage.PARSER.apply(parser, null);
Expand Down

0 comments on commit 26b04c1

Please sign in to comment.