Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #1310: Count JSON tokens #1296

Merged
merged 13 commits into from
Jun 18, 2024
3 changes: 3 additions & 0 deletions release-notes/VERSION-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ a pure JSON library.
#1277: Add back Java 22 optimisation in FastDoubleParser
#1305: Make helper methods of `WriterBasedJsonGenerator` non-final to allow overriding
(contributed by @zhangOranges)
#1310: Add new `StreamReadConstraints` (`maxTokenCount`) to limit maximum number
of Tokens allowed per document
(implemented by @pjfanning)

2.17.2 (not yet released)

Expand Down
12 changes: 12 additions & 0 deletions src/main/java/com/fasterxml/jackson/core/JsonParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -864,6 +864,18 @@ public void setCurrentValue(Object v) {
assignCurrentValue(v);
}

/**
* Get an approximate count of the number of tokens that have been read.
* This count is likely to be only updated if {@link StreamReadConstraints.Builder#maxTokenCount(long)}
* has been used to set a limit on the number of tokens that can be read.
*
* @return the number of tokens that have been read (-1 if the count is not available)
* @since 2.18
*/
public long getTokenCount() {
cowtowncoder marked this conversation as resolved.
Show resolved Hide resolved
return -1L;
}

/*
/**********************************************************
/* Buffer handling
Expand Down
112 changes: 107 additions & 5 deletions src/main/java/com/fasterxml/jackson/core/StreamReadConstraints.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ public class StreamReadConstraints
*/
public static final long DEFAULT_MAX_DOC_LEN = -1L;

/**
* Default setting for maximum token count:
* see {@link Builder#maxTokenCount} for details.
*/
public static final long DEFAULT_MAX_TOKEN_COUNT = -1L;

/**
* @since 2.16
*/
Expand Down Expand Up @@ -74,6 +80,7 @@ public class StreamReadConstraints

protected final int _maxNestingDepth;
protected final long _maxDocLen;
protected final long _maxTokenCount;

protected final int _maxNumLen;
protected final int _maxStringLen;
Expand Down Expand Up @@ -112,6 +119,7 @@ public static void overrideDefaultStreamReadConstraints(final StreamReadConstrai

public static final class Builder {
private long maxDocLen;
private long maxTokenCount;
private int maxNestingDepth;
private int maxNumLen;
private int maxStringLen;
Expand Down Expand Up @@ -156,6 +164,31 @@ public Builder maxDocumentLength(long maxDocLen) {
return this;
}

/**
* Sets the maximum allowed token count (for positive values over 0) or
* indicate that any count is acceptable ({@code 0} or negative number).
*
* <p>
* A token is a single unit of input, such as a number, a string, an object
* start or end, or an array start or end.
* </p>
*
* @param maxTokenCount the maximum allowed token count if positive number above 0; otherwise
* ({@code 0} or negative number) means "unlimited".
*
* @return this builder
*
* @since 2.18
*/
public Builder maxTokenCount(long maxTokenCount) {
// Negative values and 0 mean "unlimited", mark with -1L
if (maxTokenCount <= 0L) {
maxTokenCount = -1L;
}
this.maxTokenCount = maxTokenCount;
return this;
}

/**
* Sets the maximum number length (in chars or bytes, depending on input context).
* The default is 1000.
Expand Down Expand Up @@ -220,14 +253,15 @@ public Builder maxNameLength(final int maxNameLen) {
}

Builder() {
this(DEFAULT_MAX_DEPTH, DEFAULT_MAX_DOC_LEN,
this(DEFAULT_MAX_DEPTH, DEFAULT_MAX_DOC_LEN, DEFAULT_MAX_TOKEN_COUNT,
DEFAULT_MAX_NUM_LEN, DEFAULT_MAX_STRING_LEN, DEFAULT_MAX_NAME_LEN);
}

Builder(final int maxNestingDepth, final long maxDocLen,
Builder(final int maxNestingDepth, final long maxDocLen, final long maxTokenCount,
final int maxNumLen, final int maxStringLen, final int maxNameLen) {
this.maxNestingDepth = maxNestingDepth;
this.maxDocLen = maxDocLen;
this.maxTokenCount = maxTokenCount;
this.maxNumLen = maxNumLen;
this.maxStringLen = maxStringLen;
this.maxNameLen = maxNameLen;
Expand All @@ -236,14 +270,15 @@ public Builder maxNameLength(final int maxNameLen) {
Builder(StreamReadConstraints src) {
maxNestingDepth = src._maxNestingDepth;
maxDocLen = src._maxDocLen;
maxTokenCount = src._maxTokenCount;
maxNumLen = src._maxNumLen;
maxStringLen = src._maxStringLen;
maxNameLen = src._maxNameLen;
}

public StreamReadConstraints build() {
return new StreamReadConstraints(maxNestingDepth, maxDocLen,
maxNumLen, maxStringLen, maxNameLen);
maxNumLen, maxStringLen, maxNameLen, maxTokenCount);
}
}

Expand All @@ -257,7 +292,7 @@ public StreamReadConstraints build() {
protected StreamReadConstraints(final int maxNestingDepth, final long maxDocLen,
final int maxNumLen, final int maxStringLen) {
this(maxNestingDepth, maxDocLen,
maxNumLen, maxStringLen, DEFAULT_MAX_NAME_LEN);
maxNumLen, maxStringLen, DEFAULT_MAX_NAME_LEN, DEFAULT_MAX_TOKEN_COUNT);
}

/**
Expand All @@ -269,13 +304,30 @@ protected StreamReadConstraints(final int maxNestingDepth, final long maxDocLen,
*
* @since 2.16
*/
@Deprecated // since 2.18
protected StreamReadConstraints(final int maxNestingDepth, final long maxDocLen,
final int maxNumLen, final int maxStringLen, final int maxNameLen) {
this(maxNestingDepth, maxDocLen, maxNumLen, maxStringLen, maxNameLen, DEFAULT_MAX_TOKEN_COUNT);
}

/**
* @param maxNestingDepth Maximum input document nesting to allow
* @param maxDocLen Maximum input document length to allow
* @param maxNumLen Maximum number representation length to allow
* @param maxStringLen Maximum String value length to allow
* @param maxNameLen Maximum Object property name length to allow
* @param maxTokenCount Maximum number of tokens to allow
*
* @since 2.18
*/
protected StreamReadConstraints(final int maxNestingDepth, final long maxDocLen,
final int maxNumLen, final int maxStringLen, final int maxNameLen) {
final int maxNumLen, final int maxStringLen, final int maxNameLen, final long maxTokenCount) {
_maxNestingDepth = maxNestingDepth;
_maxDocLen = maxDocLen;
_maxNumLen = maxNumLen;
_maxStringLen = maxStringLen;
_maxNameLen = maxNameLen;
_maxTokenCount = maxTokenCount;
}

public static Builder builder() {
Expand Down Expand Up @@ -337,6 +389,31 @@ public boolean hasMaxDocumentLength() {
return _maxDocLen > 0L;
}

/**
* Accessor for maximum token count.
* see {@link Builder#maxTokenCount(long)} for details.
*
* @return Maximum allowed token count
* @since 2.18
*/
public long getMaxTokenCount() {
return _maxTokenCount;
}

/**
* Convenience method, basically same as:
*<pre>
* getMaxTokenCount() &gt; 0L
*</pre>
*
* @return {@code True} if this constraints instance has a limit for maximum
* token count to enforce; {@code false} otherwise.
* @since 2.18
*/
public boolean hasMaxTokenCount() {
return _maxTokenCount > 0L;
}

/**
* Accessor for maximum length of numbers to decode.
* see {@link Builder#maxNumberLength(int)} for details.
Expand Down Expand Up @@ -419,6 +496,31 @@ public void validateDocumentLength(long len) throws StreamConstraintsException
}
}

/**
* Convenience method that can be used to verify that the
* token count does not exceed the maximum specified by this
* constraints object (if any): if it does, a
* {@link StreamConstraintsException}
* is thrown.
*
* @param count Current token count for processed document content
*
* @throws StreamConstraintsException If length exceeds maximum
*
* @since 2.18
*/
public void validateTokenCount(long count) throws StreamConstraintsException
{
// for performance reasons, it is assumed that users check hasMaxTokenCount()
// before calling this method - this method will not work properly if hasMaxTokenCount() is false
if (count > _maxTokenCount) {
throw _constructException(
"Token count (%d) exceeds the maximum allowed (%d, from %s)",
count, _maxTokenCount,
_constrainRef("getMaxTokenCount"));
}
}

/*
/**********************************************************************
/* Convenience methods for validation, token lengths
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,20 @@ public abstract class ParserMinimalBase extends JsonParser
*/
protected JsonToken _currToken;

/**
* Current count of tokens
*
* @since 2.18
*/
protected long _tokenCount;

/**
* Whether or not to track the token count due a {@link StreamReadConstraints} maxTokenCount > 0.
*
* @since 2.18
*/
protected final boolean _hasMaxTokenCount;

/**
* Last cleared token, if any: that is, value that was in
* effect when {@link #clearCurrentToken} was called.
Expand All @@ -175,6 +189,7 @@ public abstract class ParserMinimalBase extends JsonParser
protected ParserMinimalBase() {
super();
_streamReadConstraints = StreamReadConstraints.defaults();
_hasMaxTokenCount = _streamReadConstraints.hasMaxTokenCount();
}

@Deprecated // since 2.18
Expand All @@ -186,12 +201,14 @@ protected ParserMinimalBase(int features) {
protected ParserMinimalBase(StreamReadConstraints src) {
super();
_streamReadConstraints = (src == null) ? StreamReadConstraints.defaults() : src;
_hasMaxTokenCount = _streamReadConstraints.hasMaxTokenCount();
}

// @since 2.18
protected ParserMinimalBase(int features, StreamReadConstraints src) {
super(features);
_streamReadConstraints = (src == null) ? StreamReadConstraints.defaults() : src;
_hasMaxTokenCount = _streamReadConstraints.hasMaxTokenCount();
}

// NOTE: had base impl in 2.3 and before; but shouldn't
Expand Down Expand Up @@ -327,6 +344,11 @@ public JsonParser skipChildren() throws IOException

// public abstract JsonLocation getCurrentLocation();

@Override // since 2.18
public long getTokenCount() {
return _tokenCount;
}

/*
/**********************************************************
/* Public API, token state overrides
Expand Down Expand Up @@ -827,9 +849,11 @@ protected final void _wrapError(String msg, Throwable t) throws JsonParseExcepti

protected final JsonToken _updateToken(final JsonToken token) throws StreamConstraintsException {
_currToken = token;
if (_hasMaxTokenCount) {
_streamReadConstraints.validateTokenCount(++_tokenCount);
}
return token;
}

protected final JsonToken _updateTokenToNull() {
return (_currToken = null);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import com.fasterxml.jackson.core.exc.StreamConstraintsException;
import com.fasterxml.jackson.core.testsupport.AsyncReaderWrapper;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.fail;

// [core#1047]: Add max-name-length constraints
Expand All @@ -20,6 +21,10 @@ class LargeDocReadTest extends AsyncTestBase
.streamReadConstraints(StreamReadConstraints.builder().maxDocumentLength(10_000L).build())
.build();

private final JsonFactory JSON_F_MAX_TOKENS_1K = JsonFactory.builder()
.streamReadConstraints(StreamReadConstraints.builder().maxTokenCount(1_000L).build())
.build();

// Test name that is below default max name
@Test
void largeNameBytes() throws Exception {
Expand Down Expand Up @@ -83,6 +88,18 @@ void largeNameWithSmallLimitAsync() throws Exception
}
}

@Test
void tokenLimitBytes() throws Exception {
final String doc = generateJSON(StreamReadConstraints.defaults().getMaxNameLength() - 100);
try (JsonParser p = createParserUsingStream(JSON_F_MAX_TOKENS_1K, doc, "UTF-8")) {
consumeTokens(p);
fail("expected StreamConstraintsException");
} catch (StreamConstraintsException e) {
assertEquals("Token count (1001) exceeds the maximum allowed (1000, from `StreamReadConstraints.getMaxTokenCount()`)",
e.getMessage());
}
}

private void consumeTokens(JsonParser p) throws IOException {
while (p.nextToken() != null) {
;
Expand Down
Loading