-
Notifications
You must be signed in to change notification settings - Fork 4.9k
Fix BytesConsumed when reading JSON numbers within a multi-segment ReadOnlySequence via Utf8JsonReader #40303
Changes from 4 commits
fa7a4fe
f54b5c0
5e8a26d
a420295
abd5363
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -66,7 +66,9 @@ public Utf8JsonReader(ReadOnlySequence<byte> jsonData, bool isFinalBlock, JsonRe | |
{ | ||
_currentPosition = jsonData.Start; | ||
_nextPosition = _currentPosition; | ||
if (_buffer.Length == 0) | ||
|
||
bool firstSegmentIsEmpty = _buffer.Length == 0; | ||
if (firstSegmentIsEmpty) | ||
{ | ||
// Once we find a non-empty segment, we need to set current position to it. | ||
// Therefore, track the next position in a copy before it gets advanced to the next segment. | ||
|
@@ -84,7 +86,15 @@ public Utf8JsonReader(ReadOnlySequence<byte> jsonData, bool isFinalBlock, JsonRe | |
} | ||
} | ||
|
||
_isLastSegment = !jsonData.TryGet(ref _nextPosition, out _, advance: true) && isFinalBlock; // Don't re-order to avoid short-circuiting | ||
// If firstSegmentIsEmpty is true, | ||
// only check if we have reached the last segment but do not advance _nextPosition. The while loop above already advanced it. | ||
// Otherwise, we would end up skipping a segment (i.e. advance = false). | ||
// If firstSegmentIsEmpty is false, | ||
// make sure to advance _nextPosition so that it is no longer the same as _currentPosition (i.e. advance = true). | ||
_isLastSegment = !jsonData.TryGet(ref _nextPosition, out _, advance: !firstSegmentIsEmpty) && isFinalBlock; // Don't re-order to avoid short-circuiting | ||
|
||
Debug.Assert(!_nextPosition.Equals(_currentPosition)); | ||
|
||
_isMultiSegment = true; | ||
} | ||
} | ||
|
@@ -1309,6 +1319,7 @@ private bool TryGetNumberMultiSegment(ReadOnlySpan<byte> data, out int consumed) | |
|
||
private ConsumeNumberResult ConsumeNegativeSignMultiSegment(ref ReadOnlySpan<byte> data, ref int i) | ||
{ | ||
Debug.Assert(i == 0); | ||
byte nextByte = data[i]; | ||
|
||
if (nextByte == '-') | ||
|
@@ -1329,7 +1340,8 @@ private ConsumeNumberResult ConsumeNegativeSignMultiSegment(ref ReadOnlySpan<byt | |
} | ||
return ConsumeNumberResult.NeedMoreData; | ||
} | ||
_totalConsumed++; | ||
Debug.Assert(i == 1); | ||
_totalConsumed += i; | ||
HasValueSequence = true; | ||
i = 0; | ||
data = _buffer; | ||
|
@@ -1347,9 +1359,10 @@ private ConsumeNumberResult ConsumeNegativeSignMultiSegment(ref ReadOnlySpan<byt | |
private ConsumeNumberResult ConsumeZeroMultiSegment(ref ReadOnlySpan<byte> data, ref int i) | ||
{ | ||
Debug.Assert(data[i] == (byte)'0'); | ||
Debug.Assert(i == 0 || i == 1); | ||
i++; | ||
_bytePositionInLine++; | ||
byte nextByte = default; | ||
byte nextByte; | ||
if (i < data.Length) | ||
{ | ||
nextByte = data[i]; | ||
|
@@ -1377,7 +1390,7 @@ private ConsumeNumberResult ConsumeZeroMultiSegment(ref ReadOnlySpan<byte> data, | |
return ConsumeNumberResult.NeedMoreData; | ||
} | ||
|
||
_totalConsumed++; | ||
_totalConsumed += i; | ||
HasValueSequence = true; | ||
i = 0; | ||
data = _buffer; | ||
|
@@ -1522,6 +1535,7 @@ private ConsumeNumberResult ConsumeSignMultiSegment(ref ReadOnlySpan<byte> data, | |
} | ||
return ConsumeNumberResult.NeedMoreData; | ||
} | ||
_totalConsumed += i; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The responsibility of updating this value seems to be in weird places. Why isn't it updated along with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably. I wanted to minimize a refactor like this in this PR (particularly because we want this fix in 3.0). I think some of this logic can be refactored to be clearer (which includes what you mentioned). I had tried to keep the structure of single-segment and multi-segment code similar (and the code has several exit points due to re-entrancy and partial data), which is partially why we have this as a side effect. |
||
HasValueSequence = true; | ||
i = 0; | ||
data = _buffer; | ||
|
@@ -1547,7 +1561,7 @@ private ConsumeNumberResult ConsumeSignMultiSegment(ref ReadOnlySpan<byte> data, | |
} | ||
return ConsumeNumberResult.NeedMoreData; | ||
} | ||
_totalConsumed++; | ||
_totalConsumed += i; | ||
HasValueSequence = true; | ||
i = 0; | ||
data = _buffer; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,6 +37,108 @@ public static void InitialStateMultiSegment() | |
Assert.False(json.Read()); | ||
} | ||
|
||
[Fact] | ||
public static void EmptyJsonMultiSegmentIsInvalid() | ||
{ | ||
ReadOnlyMemory<byte> dataMemory = Array.Empty<byte>(); | ||
|
||
var firstSegment = new BufferSegment<byte>(dataMemory.Slice(0, 0)); | ||
ReadOnlyMemory<byte> secondMem = dataMemory.Slice(0, 0); | ||
BufferSegment<byte> secondSegment = firstSegment.Append(secondMem); | ||
|
||
var sequence = new ReadOnlySequence<byte>(firstSegment, 0, secondSegment, secondMem.Length); | ||
|
||
Assert.ThrowsAny<JsonException>(() => | ||
{ | ||
var json = new Utf8JsonReader(sequence, isFinalBlock: true, state: default); | ||
|
||
Assert.Equal(0, json.BytesConsumed); | ||
Assert.Equal(0, json.TokenStartIndex); | ||
Assert.Equal(0, json.CurrentDepth); | ||
Assert.Equal(JsonTokenType.None, json.TokenType); | ||
Assert.NotEqual(default, json.Position); | ||
Assert.False(json.HasValueSequence); | ||
Assert.True(json.ValueSpan.SequenceEqual(default)); | ||
Assert.True(json.ValueSequence.IsEmpty); | ||
|
||
Assert.Equal(64, json.CurrentState.Options.MaxDepth); | ||
Assert.False(json.CurrentState.Options.AllowTrailingCommas); | ||
Assert.Equal(JsonCommentHandling.Disallow, json.CurrentState.Options.CommentHandling); | ||
|
||
json.Read(); // this should throw | ||
}); | ||
} | ||
|
||
[Theory] | ||
[InlineData("2e2", 200)] | ||
[InlineData("2e+2", 200)] | ||
[InlineData("123e-01", 12.3)] | ||
[InlineData("0", 0)] | ||
[InlineData("0.1", 0.1)] | ||
[InlineData("-0", 0)] | ||
[InlineData("-0.1", -0.1)] | ||
[InlineData(" 2e2 ", 200)] | ||
[InlineData(" 2e+2 ", 200)] | ||
[InlineData(" 123e-01 ", 12.3)] | ||
[InlineData(" 0 ", 0)] | ||
[InlineData(" 0.1 ", 0.1)] | ||
[InlineData(" -0 ", 0)] | ||
[InlineData(" -0.1 ", -0.1)] | ||
[InlineData("[2e2]", 200)] | ||
[InlineData("[2e+2]", 200)] | ||
[InlineData("[123e-01]", 12.3)] | ||
[InlineData("[0]", 0)] | ||
[InlineData("[0.1]", 0.1)] | ||
[InlineData("[-0]", 0)] | ||
[InlineData("[-0.1]", -0.1)] | ||
[InlineData("{\"foo\": 2e2}", 200)] | ||
[InlineData("{\"foo\": 2e+2}", 200)] | ||
[InlineData("{\"foo\": 123e-01}", 12.3)] | ||
[InlineData("{\"foo\": 0}", 0)] | ||
[InlineData("{\"foo\": 0.1}", 0.1)] | ||
[InlineData("{\"foo\": -0}", 0)] | ||
[InlineData("{\"foo\": -0.1}", -0.1)] | ||
public static void ReadJsonWithNumberVariousSegmentSizes(string input, double expectedValue) | ||
{ | ||
byte[] utf8 = Encoding.UTF8.GetBytes(input); | ||
|
||
var jsonReader = new Utf8JsonReader(utf8); | ||
ReadDoubleHelper(ref jsonReader, utf8.Length, expectedValue); | ||
|
||
ReadOnlySequence<byte> sequence = JsonTestHelper.GetSequence(utf8, 1); | ||
jsonReader = new Utf8JsonReader(sequence); | ||
ReadDoubleHelper(ref jsonReader, utf8.Length, expectedValue); | ||
|
||
for (int splitLocation = 0; splitLocation < utf8.Length; splitLocation++) | ||
{ | ||
sequence = JsonTestHelper.CreateSegments(utf8, splitLocation); | ||
jsonReader = new Utf8JsonReader(sequence); | ||
ReadDoubleHelper(ref jsonReader, utf8.Length, expectedValue); | ||
} | ||
|
||
for (int firstSplit = 0; firstSplit < utf8.Length; firstSplit++) | ||
{ | ||
for (int secondSplit = firstSplit; secondSplit < utf8.Length; secondSplit++) | ||
{ | ||
sequence = JsonTestHelper.CreateSegments(utf8, firstSplit, secondSplit); | ||
jsonReader = new Utf8JsonReader(sequence); | ||
ReadDoubleHelper(ref jsonReader, utf8.Length, expectedValue); | ||
} | ||
} | ||
} | ||
|
||
private static void ReadDoubleHelper(ref Utf8JsonReader jsonReader, int expectedBytesConsumed, double expectedValue) | ||
{ | ||
while (jsonReader.Read()) | ||
{ | ||
if (jsonReader.TokenType == JsonTokenType.Number) | ||
{ | ||
Assert.Equal(expectedValue, jsonReader.GetDouble()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there anything tighter you can assert here? Assert.Equals(jsonReader.GetValueSpanOrSegmentLength(), jsonReader.BytesConsumed - jsonReader.TokenStartIndex); maybe? (fake method as shorthand for the inevitable if) |
||
} | ||
} | ||
Assert.Equal(expectedBytesConsumed, jsonReader.BytesConsumed); | ||
} | ||
|
||
[Fact] | ||
public static void InitialStateSimpleCtorMultiSegment() | ||
{ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why += i rather than ++ if we guarantee i == 1?
Similarly, earlier on, there's a
i >= data.Length
check... should that just bedata.Length == 1
? data.Length can't be 0 there, and i can't be > 1 (if the asserts are to be believed).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One of the original buggy cases came about because in certain methods I was doing
_totalConsumed++
because I had reasoned about i being 1. However, I was mistaken and_totalConsumed += i
was correct (for all cases). So, for consistency, I decided to keep this the same across all methods and add aDebug.Assert
for this particular method since it is the only case where i is guaranteed to be 1 (it is the first one called). I can change it to++
if you still think the consistency isn't worth it.