diff --git a/release-notes/VERSION-2.x b/release-notes/VERSION-2.x index ab06c101d3..728102e847 100644 --- a/release-notes/VERSION-2.x +++ b/release-notes/VERSION-2.x @@ -35,6 +35,9 @@ a pure JSON library. #1277: Add back Java 22 optimisation in FastDoubleParser #1305: Make helper methods of `WriterBasedJsonGenerator` non-final to allow overriding (contributed by @zhangOranges) +#1310: Add new `StreamReadConstraints` (`maxTokenCount`) to limit maximum number + of Tokens allowed per document + (implemented by @pjfanning) 2.17.2 (not yet released) diff --git a/src/main/java/com/fasterxml/jackson/core/JsonParser.java b/src/main/java/com/fasterxml/jackson/core/JsonParser.java index 4265d48437..b0a38a4909 100644 --- a/src/main/java/com/fasterxml/jackson/core/JsonParser.java +++ b/src/main/java/com/fasterxml/jackson/core/JsonParser.java @@ -781,6 +781,18 @@ public JsonLocation currentTokenLocation() { return getTokenLocation(); } + /** + * Get an approximate count of the number of tokens that have been read. + * This count is likely to be only updated if {@link StreamReadConstraints.Builder#maxTokenCount(long)} + * has been used to set a limit on the number of tokens that can be read. + * + * @return the number of tokens that have been read (-1 if the count is not available) + * @since 2.18 + */ + public long currentTokenCount() { + return -1L; + } + /** * Deprecated alias for {@link #currentLocation()} (removed from Jackson 3.0). * diff --git a/src/main/java/com/fasterxml/jackson/core/StreamReadConstraints.java b/src/main/java/com/fasterxml/jackson/core/StreamReadConstraints.java index c7586d2d3c..b460ab4152 100644 --- a/src/main/java/com/fasterxml/jackson/core/StreamReadConstraints.java +++ b/src/main/java/com/fasterxml/jackson/core/StreamReadConstraints.java @@ -43,6 +43,12 @@ public class StreamReadConstraints */ public static final long DEFAULT_MAX_DOC_LEN = -1L; + /** + * Default setting for maximum token count: + * see {@link Builder#maxTokenCount} for details. + */ + public static final long DEFAULT_MAX_TOKEN_COUNT = -1L; + /** * @since 2.16 */ @@ -74,6 +80,7 @@ public class StreamReadConstraints protected final int _maxNestingDepth; protected final long _maxDocLen; + protected final long _maxTokenCount; protected final int _maxNumLen; protected final int _maxStringLen; @@ -112,6 +119,7 @@ public static void overrideDefaultStreamReadConstraints(final StreamReadConstrai public static final class Builder { private long maxDocLen; + private long maxTokenCount; private int maxNestingDepth; private int maxNumLen; private int maxStringLen; @@ -156,6 +164,31 @@ public Builder maxDocumentLength(long maxDocLen) { return this; } + /** + * Sets the maximum allowed token count (for positive values over 0) or + * indicate that any count is acceptable ({@code 0} or negative number). + * + *

+ * A token is a single unit of input, such as a number, a string, an object + * start or end, or an array start or end. + *

+ * + * @param maxTokenCount the maximum allowed token count if positive number above 0; otherwise + * ({@code 0} or negative number) means "unlimited". + * + * @return this builder + * + * @since 2.18 + */ + public Builder maxTokenCount(long maxTokenCount) { + // Negative values and 0 mean "unlimited", mark with -1L + if (maxTokenCount <= 0L) { + maxTokenCount = -1L; + } + this.maxTokenCount = maxTokenCount; + return this; + } + /** * Sets the maximum number length (in chars or bytes, depending on input context). * The default is 1000. @@ -220,14 +253,15 @@ public Builder maxNameLength(final int maxNameLen) { } Builder() { - this(DEFAULT_MAX_DEPTH, DEFAULT_MAX_DOC_LEN, + this(DEFAULT_MAX_DEPTH, DEFAULT_MAX_DOC_LEN, DEFAULT_MAX_TOKEN_COUNT, DEFAULT_MAX_NUM_LEN, DEFAULT_MAX_STRING_LEN, DEFAULT_MAX_NAME_LEN); } - Builder(final int maxNestingDepth, final long maxDocLen, + Builder(final int maxNestingDepth, final long maxDocLen, final long maxTokenCount, final int maxNumLen, final int maxStringLen, final int maxNameLen) { this.maxNestingDepth = maxNestingDepth; this.maxDocLen = maxDocLen; + this.maxTokenCount = maxTokenCount; this.maxNumLen = maxNumLen; this.maxStringLen = maxStringLen; this.maxNameLen = maxNameLen; @@ -236,6 +270,7 @@ public Builder maxNameLength(final int maxNameLen) { Builder(StreamReadConstraints src) { maxNestingDepth = src._maxNestingDepth; maxDocLen = src._maxDocLen; + maxTokenCount = src._maxTokenCount; maxNumLen = src._maxNumLen; maxStringLen = src._maxStringLen; maxNameLen = src._maxNameLen; @@ -243,7 +278,7 @@ public Builder maxNameLength(final int maxNameLen) { public StreamReadConstraints build() { return new StreamReadConstraints(maxNestingDepth, maxDocLen, - maxNumLen, maxStringLen, maxNameLen); + maxNumLen, maxStringLen, maxNameLen, maxTokenCount); } } @@ -257,7 +292,7 @@ public StreamReadConstraints build() { protected StreamReadConstraints(final int maxNestingDepth, final long maxDocLen, final int maxNumLen, final int maxStringLen) { this(maxNestingDepth, maxDocLen, - maxNumLen, maxStringLen, DEFAULT_MAX_NAME_LEN); + maxNumLen, maxStringLen, DEFAULT_MAX_NAME_LEN, DEFAULT_MAX_TOKEN_COUNT); } /** @@ -269,13 +304,30 @@ protected StreamReadConstraints(final int maxNestingDepth, final long maxDocLen, * * @since 2.16 */ + @Deprecated // since 2.18 + protected StreamReadConstraints(final int maxNestingDepth, final long maxDocLen, + final int maxNumLen, final int maxStringLen, final int maxNameLen) { + this(maxNestingDepth, maxDocLen, maxNumLen, maxStringLen, maxNameLen, DEFAULT_MAX_TOKEN_COUNT); + } + + /** + * @param maxNestingDepth Maximum input document nesting to allow + * @param maxDocLen Maximum input document length to allow + * @param maxNumLen Maximum number representation length to allow + * @param maxStringLen Maximum String value length to allow + * @param maxNameLen Maximum Object property name length to allow + * @param maxTokenCount Maximum number of tokens to allow + * + * @since 2.18 + */ protected StreamReadConstraints(final int maxNestingDepth, final long maxDocLen, - final int maxNumLen, final int maxStringLen, final int maxNameLen) { + final int maxNumLen, final int maxStringLen, final int maxNameLen, final long maxTokenCount) { _maxNestingDepth = maxNestingDepth; _maxDocLen = maxDocLen; _maxNumLen = maxNumLen; _maxStringLen = maxStringLen; _maxNameLen = maxNameLen; + _maxTokenCount = maxTokenCount; } public static Builder builder() { @@ -337,6 +389,31 @@ public boolean hasMaxDocumentLength() { return _maxDocLen > 0L; } + /** + * Accessor for maximum token count. + * see {@link Builder#maxTokenCount(long)} for details. + * + * @return Maximum allowed token count + * @since 2.18 + */ + public long getMaxTokenCount() { + return _maxTokenCount; + } + + /** + * Convenience method, basically same as: + *
+     *  getMaxTokenCount() > 0L
+     *
+ * + * @return {@code True} if this constraints instance has a limit for maximum + * token count to enforce; {@code false} otherwise. + * @since 2.18 + */ + public boolean hasMaxTokenCount() { + return _maxTokenCount > 0L; + } + /** * Accessor for maximum length of numbers to decode. * see {@link Builder#maxNumberLength(int)} for details. @@ -419,6 +496,31 @@ public void validateDocumentLength(long len) throws StreamConstraintsException } } + /** + * Convenience method that can be used to verify that the + * token count does not exceed the maximum specified by this + * constraints object (if any): if it does, a + * {@link StreamConstraintsException} + * is thrown. + * + * @param count Current token count for processed document content + * + * @throws StreamConstraintsException If length exceeds maximum + * + * @since 2.18 + */ + public void validateTokenCount(long count) throws StreamConstraintsException + { + // for performance reasons, it is assumed that users check hasMaxTokenCount() + // before calling this method - this method will not work properly if hasMaxTokenCount() is false + if (count > _maxTokenCount) { + throw _constructException( + "Token count (%d) exceeds the maximum allowed (%d, from %s)", + count, _maxTokenCount, + _constrainRef("getMaxTokenCount")); + } + } + /* /********************************************************************** /* Convenience methods for validation, token lengths diff --git a/src/main/java/com/fasterxml/jackson/core/base/ParserMinimalBase.java b/src/main/java/com/fasterxml/jackson/core/base/ParserMinimalBase.java index 91925848e7..9e73d7b12d 100644 --- a/src/main/java/com/fasterxml/jackson/core/base/ParserMinimalBase.java +++ b/src/main/java/com/fasterxml/jackson/core/base/ParserMinimalBase.java @@ -159,6 +159,20 @@ public abstract class ParserMinimalBase extends JsonParser */ protected JsonToken _currToken; + /** + * Current count of tokens, if tracked (see {@link #_trackMaxTokenCount}) + * + * @since 2.18 + */ + protected long _tokenCount; + + /** + * Whether or not to track the token count due a {@link StreamReadConstraints} maxTokenCount > 0. + * + * @since 2.18 + */ + protected final boolean _trackMaxTokenCount; + /** * Last cleared token, if any: that is, value that was in * effect when {@link #clearCurrentToken} was called. @@ -175,6 +189,7 @@ public abstract class ParserMinimalBase extends JsonParser protected ParserMinimalBase() { super(); _streamReadConstraints = StreamReadConstraints.defaults(); + _trackMaxTokenCount = _streamReadConstraints.hasMaxTokenCount(); } @Deprecated // since 2.18 @@ -186,12 +201,14 @@ protected ParserMinimalBase(int features) { protected ParserMinimalBase(StreamReadConstraints src) { super(); _streamReadConstraints = (src == null) ? StreamReadConstraints.defaults() : src; + _trackMaxTokenCount = _streamReadConstraints.hasMaxTokenCount(); } // @since 2.18 protected ParserMinimalBase(int features, StreamReadConstraints src) { super(features); _streamReadConstraints = (src == null) ? StreamReadConstraints.defaults() : src; + _trackMaxTokenCount = _streamReadConstraints.hasMaxTokenCount(); } // NOTE: had base impl in 2.3 and before; but shouldn't @@ -311,9 +328,6 @@ public JsonParser skipChildren() throws IOException */ protected abstract void _handleEOF() throws JsonParseException; - //public JsonToken getCurrentToken() - //public boolean hasCurrentToken() - @Deprecated // since 2.17 -- still need to implement @Override public abstract String getCurrentName() throws IOException; @@ -327,6 +341,11 @@ public JsonParser skipChildren() throws IOException // public abstract JsonLocation getCurrentLocation(); + @Override // since 2.18 + public long currentTokenCount() { + return _tokenCount; + } + /* /********************************************************** /* Public API, token state overrides @@ -827,9 +846,11 @@ protected final void _wrapError(String msg, Throwable t) throws JsonParseExcepti protected final JsonToken _updateToken(final JsonToken token) throws StreamConstraintsException { _currToken = token; + if (_trackMaxTokenCount) { + _streamReadConstraints.validateTokenCount(++_tokenCount); + } return token; } - protected final JsonToken _updateTokenToNull() { return (_currToken = null); } diff --git a/src/main/java/com/fasterxml/jackson/core/util/JsonParserDelegate.java b/src/main/java/com/fasterxml/jackson/core/util/JsonParserDelegate.java index f69e85002a..950ccfa3d9 100644 --- a/src/main/java/com/fasterxml/jackson/core/util/JsonParserDelegate.java +++ b/src/main/java/com/fasterxml/jackson/core/util/JsonParserDelegate.java @@ -155,6 +155,9 @@ public boolean requiresCustomCodec() { @Override public JsonLocation currentLocation() { return delegate.currentLocation(); } @Override public JsonLocation currentTokenLocation() { return delegate.currentTokenLocation(); } + @Override // since 2.18 + public long currentTokenCount() { return delegate.currentTokenCount(); } + @Override @Deprecated public JsonToken getCurrentToken() { return delegate.getCurrentToken(); } diff --git a/src/test/java/com/fasterxml/jackson/core/constraints/LargeDocReadTest.java b/src/test/java/com/fasterxml/jackson/core/constraints/LargeDocReadTest.java index b184ee87b2..14c4a84a9d 100644 --- a/src/test/java/com/fasterxml/jackson/core/constraints/LargeDocReadTest.java +++ b/src/test/java/com/fasterxml/jackson/core/constraints/LargeDocReadTest.java @@ -9,6 +9,7 @@ import com.fasterxml.jackson.core.exc.StreamConstraintsException; import com.fasterxml.jackson.core.testsupport.AsyncReaderWrapper; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.fail; // [core#1047]: Add max-name-length constraints @@ -20,6 +21,10 @@ class LargeDocReadTest extends AsyncTestBase .streamReadConstraints(StreamReadConstraints.builder().maxDocumentLength(10_000L).build()) .build(); + private final JsonFactory JSON_F_MAX_TOKENS_1K = JsonFactory.builder() + .streamReadConstraints(StreamReadConstraints.builder().maxTokenCount(1_000L).build()) + .build(); + // Test name that is below default max name @Test void largeNameBytes() throws Exception { @@ -83,6 +88,18 @@ void largeNameWithSmallLimitAsync() throws Exception } } + @Test + void tokenLimitBytes() throws Exception { + final String doc = generateJSON(StreamReadConstraints.defaults().getMaxNameLength() - 100); + try (JsonParser p = createParserUsingStream(JSON_F_MAX_TOKENS_1K, doc, "UTF-8")) { + consumeTokens(p); + fail("expected StreamConstraintsException"); + } catch (StreamConstraintsException e) { + assertEquals("Token count (1001) exceeds the maximum allowed (1000, from `StreamReadConstraints.getMaxTokenCount()`)", + e.getMessage()); + } + } + private void consumeTokens(JsonParser p) throws IOException { while (p.nextToken() != null) { ; diff --git a/src/test/java/com/fasterxml/jackson/core/constraints/TokenCountTest.java b/src/test/java/com/fasterxml/jackson/core/constraints/TokenCountTest.java new file mode 100644 index 0000000000..787a33e37b --- /dev/null +++ b/src/test/java/com/fasterxml/jackson/core/constraints/TokenCountTest.java @@ -0,0 +1,165 @@ +package com.fasterxml.jackson.core.constraints; + +import java.nio.ByteBuffer; + +import com.fasterxml.jackson.core.JUnit5TestBase; +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.StreamReadConstraints; +import com.fasterxml.jackson.core.json.async.NonBlockingByteBufferJsonParser; +import com.fasterxml.jackson.core.json.async.NonBlockingJsonParser; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * Set of basic unit tests for verifying that the token count + * functionality works as expected. + */ +public class TokenCountTest extends JUnit5TestBase { + private final static JsonFactory JSON_FACTORY = JsonFactory.builder() + .streamReadConstraints(StreamReadConstraints.builder().maxTokenCount(Long.MAX_VALUE).build()) + .build(); + private final static String ARRAY_DOC = a2q("{ 'nums': [1,2,3,4,5,6,7,8,9,10] }"); + private final static String SHORT_ARRAY_DOC = a2q("{ 'nums': [1,2,3] }"); + + @Test + void arrayDoc() throws Exception + { + for (int mode : ALL_MODES) { + _testArrayDoc(mode); + } + } + + @Test + void arrayDocNonBlockingArray() throws Exception + { + final byte[] input = ARRAY_DOC.getBytes("UTF-8"); + try (NonBlockingJsonParser p = (NonBlockingJsonParser) JSON_FACTORY.createNonBlockingByteArrayParser()) { + p.feedInput(input, 0, input.length); + p.endOfInput(); + _testArrayDoc(p); + } + } + + @Test + void arrayDocNonBlockingBuffer() throws Exception + { + final byte[] input = ARRAY_DOC.getBytes("UTF-8"); + try (NonBlockingByteBufferJsonParser p = (NonBlockingByteBufferJsonParser) JSON_FACTORY.createNonBlockingByteBufferParser()) { + p.feedInput(ByteBuffer.wrap(input, 0, input.length)); + p.endOfInput(); + _testArrayDoc(p); + } + } + + @Test + void shortArrayDoc() throws Exception + { + for (int mode : ALL_MODES) { + _testShortArrayDoc(mode); + } + } + + @Test + void shortArrayDocNonBlockingArray() throws Exception + { + final byte[] input = SHORT_ARRAY_DOC.getBytes("UTF-8"); + try (NonBlockingJsonParser p = (NonBlockingJsonParser) JSON_FACTORY.createNonBlockingByteArrayParser()) { + p.feedInput(input, 0, input.length); + p.endOfInput(); + _testShortArrayDoc(p); + } + } + + @Test + void shortArrayDocNonBlockingBuffer() throws Exception + { + final byte[] input = SHORT_ARRAY_DOC.getBytes("UTF-8"); + try (NonBlockingByteBufferJsonParser p = (NonBlockingByteBufferJsonParser) + JSON_FACTORY.createNonBlockingByteBufferParser()) { + p.feedInput(ByteBuffer.wrap(input, 0, input.length)); + p.endOfInput(); + _testShortArrayDoc(p); + } + } + + @Test + void sampleDoc() throws Exception + { + for (int mode : ALL_MODES) { + _testSampleDoc(mode); + } + } + + @Test + void sampleDocNonBlockingArray() throws Exception + { + final byte[] input = SAMPLE_DOC_JSON_SPEC.getBytes("UTF-8"); + try (NonBlockingJsonParser p = (NonBlockingJsonParser) JSON_FACTORY.createNonBlockingByteArrayParser()) { + p.feedInput(input, 0, input.length); + p.endOfInput(); + _testSampleDoc(p); + } + } + + @Test + void sampleDocNonBlockingBuffer() throws Exception + { + final byte[] input = SAMPLE_DOC_JSON_SPEC.getBytes("UTF-8"); + try (NonBlockingByteBufferJsonParser p = (NonBlockingByteBufferJsonParser) + JSON_FACTORY.createNonBlockingByteBufferParser()) { + p.feedInput(ByteBuffer.wrap(input, 0, input.length)); + p.endOfInput(); + _testSampleDoc(p); + } + } + + private void _testArrayDoc(int mode) throws Exception + { + try (JsonParser p = createParser(JSON_FACTORY, mode, ARRAY_DOC)) { + _testArrayDoc(p); + } + } + + private void _testArrayDoc(JsonParser p) throws Exception + { + assertEquals(0, p.currentTokenCount()); + consumeTokens(p); + assertEquals(15, p.currentTokenCount()); + } + + private void _testShortArrayDoc(int mode) throws Exception + { + try (JsonParser p = createParser(JSON_FACTORY, mode, SHORT_ARRAY_DOC)) { + _testShortArrayDoc(p); + } + } + + private void _testShortArrayDoc(JsonParser p) throws Exception + { + assertEquals(0, p.currentTokenCount()); + consumeTokens(p); + assertEquals(8, p.currentTokenCount()); + } + + private void _testSampleDoc(int mode) throws Exception + { + try (JsonParser p = createParser(JSON_FACTORY, mode, SAMPLE_DOC_JSON_SPEC)) { + _testSampleDoc(p); + } + } + + private void _testSampleDoc(JsonParser p) throws Exception + { + assertEquals(0, p.currentTokenCount()); + consumeTokens(p); + assertEquals(27, p.currentTokenCount()); + } + + private void consumeTokens(JsonParser p) throws Exception { + while (p.nextToken() != null) { + ; + } + } +}