Skip to content

Commit

Permalink
Fix BytesConsumed when reading JSON numbers within a multi-segment Re…
Browse files Browse the repository at this point in the history
…adOnlySequence via Utf8JsonReader (dotnet#40303)

* Add tests for reading numbers when data is within a multi-segment
ReadOnlySequence.

* Test rename and add debug.assert.

* Test clean up - remove unnecessary/expensive Debug.Assert.

* Test and code cleanup.

* Address feedback - add stronger test condition for token length.
  • Loading branch information
ahsonkhan committed Aug 19, 2019
1 parent 961b953 commit c576e34
Show file tree
Hide file tree
Showing 3 changed files with 160 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ public Utf8JsonReader(ReadOnlySequence<byte> jsonData, bool isFinalBlock, JsonRe
{
_currentPosition = jsonData.Start;
_nextPosition = _currentPosition;
if (_buffer.Length == 0)

bool firstSegmentIsEmpty = _buffer.Length == 0;
if (firstSegmentIsEmpty)
{
// Once we find a non-empty segment, we need to set current position to it.
// Therefore, track the next position in a copy before it gets advanced to the next segment.
Expand All @@ -84,7 +86,15 @@ public Utf8JsonReader(ReadOnlySequence<byte> jsonData, bool isFinalBlock, JsonRe
}
}

_isLastSegment = !jsonData.TryGet(ref _nextPosition, out _, advance: true) && isFinalBlock; // Don't re-order to avoid short-circuiting
// If firstSegmentIsEmpty is true,
// only check if we have reached the last segment but do not advance _nextPosition. The while loop above already advanced it.
// Otherwise, we would end up skipping a segment (i.e. advance = false).
// If firstSegmentIsEmpty is false,
// make sure to advance _nextPosition so that it is no longer the same as _currentPosition (i.e. advance = true).
_isLastSegment = !jsonData.TryGet(ref _nextPosition, out _, advance: !firstSegmentIsEmpty) && isFinalBlock; // Don't re-order to avoid short-circuiting

Debug.Assert(!_nextPosition.Equals(_currentPosition));

_isMultiSegment = true;
}
}
Expand Down Expand Up @@ -1310,6 +1320,7 @@ private bool TryGetNumberMultiSegment(ReadOnlySpan<byte> data, out int consumed)

private ConsumeNumberResult ConsumeNegativeSignMultiSegment(ref ReadOnlySpan<byte> data, ref int i)
{
Debug.Assert(i == 0);
byte nextByte = data[i];

if (nextByte == '-')
Expand All @@ -1330,7 +1341,8 @@ private ConsumeNumberResult ConsumeNegativeSignMultiSegment(ref ReadOnlySpan<byt
}
return ConsumeNumberResult.NeedMoreData;
}
_totalConsumed++;
Debug.Assert(i == 1);
_totalConsumed += i;
HasValueSequence = true;
i = 0;
data = _buffer;
Expand All @@ -1348,9 +1360,10 @@ private ConsumeNumberResult ConsumeNegativeSignMultiSegment(ref ReadOnlySpan<byt
private ConsumeNumberResult ConsumeZeroMultiSegment(ref ReadOnlySpan<byte> data, ref int i)
{
Debug.Assert(data[i] == (byte)'0');
Debug.Assert(i == 0 || i == 1);
i++;
_bytePositionInLine++;
byte nextByte = default;
byte nextByte;
if (i < data.Length)
{
nextByte = data[i];
Expand Down Expand Up @@ -1378,7 +1391,7 @@ private ConsumeNumberResult ConsumeZeroMultiSegment(ref ReadOnlySpan<byte> data,
return ConsumeNumberResult.NeedMoreData;
}

_totalConsumed++;
_totalConsumed += i;
HasValueSequence = true;
i = 0;
data = _buffer;
Expand Down Expand Up @@ -1523,6 +1536,7 @@ private ConsumeNumberResult ConsumeSignMultiSegment(ref ReadOnlySpan<byte> data,
}
return ConsumeNumberResult.NeedMoreData;
}
_totalConsumed += i;
HasValueSequence = true;
i = 0;
data = _buffer;
Expand All @@ -1548,7 +1562,7 @@ private ConsumeNumberResult ConsumeSignMultiSegment(ref ReadOnlySpan<byte> data,
}
return ConsumeNumberResult.NeedMoreData;
}
_totalConsumed++;
_totalConsumed += i;
HasValueSequence = true;
i = 0;
data = _buffer;
Expand Down
40 changes: 35 additions & 5 deletions src/System.Text.Json/tests/JsonTestHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

using System.Buffers;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Text.Json.Tests;
Expand Down Expand Up @@ -151,20 +152,49 @@ public static ReadOnlySequence<byte> CreateSegments(byte[] data)
return new ReadOnlySequence<byte>(firstSegment, 0, secondSegment, secondMem.Length);
}

public static ReadOnlySequence<byte> GetSequence(byte[] _dataUtf8, int segmentSize)
public static ReadOnlySequence<byte> CreateSegments(byte[] data, int splitLocation)
{
int numberOfSegments = _dataUtf8.Length / segmentSize + 1;
Debug.Assert(splitLocation <= data.Length);

ReadOnlyMemory<byte> dataMemory = data;

var firstSegment = new BufferSegment<byte>(dataMemory.Slice(0, splitLocation));
ReadOnlyMemory<byte> secondMem = dataMemory.Slice(splitLocation);
BufferSegment<byte> secondSegment = firstSegment.Append(secondMem);

return new ReadOnlySequence<byte>(firstSegment, 0, secondSegment, secondMem.Length);
}

public static ReadOnlySequence<byte> CreateSegments(byte[] data, int firstSplit, int secondSplit)
{
Debug.Assert(firstSplit <= data.Length && secondSplit <= data.Length && firstSplit <= secondSplit);

ReadOnlyMemory<byte> dataMemory = data;

var firstSegment = new BufferSegment<byte>(dataMemory.Slice(0, firstSplit));
ReadOnlyMemory<byte> secondMem = dataMemory.Slice(firstSplit, secondSplit - firstSplit);
BufferSegment<byte> secondSegment = firstSegment.Append(secondMem);

ReadOnlyMemory<byte> thirdMem = dataMemory.Slice(secondSplit);
BufferSegment<byte> thirdSegment = secondSegment.Append(thirdMem);

return new ReadOnlySequence<byte>(firstSegment, 0, thirdSegment, thirdMem.Length);
}

public static ReadOnlySequence<byte> GetSequence(byte[] dataUtf8, int segmentSize)
{
int numberOfSegments = dataUtf8.Length / segmentSize + 1;
byte[][] buffers = new byte[numberOfSegments][];

for (int j = 0; j < numberOfSegments - 1; j++)
{
buffers[j] = new byte[segmentSize];
Array.Copy(_dataUtf8, j * segmentSize, buffers[j], 0, segmentSize);
Array.Copy(dataUtf8, j * segmentSize, buffers[j], 0, segmentSize);
}

int remaining = _dataUtf8.Length % segmentSize;
int remaining = dataUtf8.Length % segmentSize;
buffers[numberOfSegments - 1] = new byte[remaining];
Array.Copy(_dataUtf8, _dataUtf8.Length - remaining, buffers[numberOfSegments - 1], 0, remaining);
Array.Copy(dataUtf8, dataUtf8.Length - remaining, buffers[numberOfSegments - 1], 0, remaining);

return BufferFactory.Create(buffers);
}
Expand Down
105 changes: 105 additions & 0 deletions src/System.Text.Json/tests/Utf8JsonReaderTests.MultiSegment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,111 @@ public static void InitialStateMultiSegment()
Assert.False(json.Read());
}

[Fact]
public static void EmptyJsonMultiSegmentIsInvalid()
{
ReadOnlyMemory<byte> dataMemory = Array.Empty<byte>();

var firstSegment = new BufferSegment<byte>(dataMemory.Slice(0, 0));
ReadOnlyMemory<byte> secondMem = dataMemory.Slice(0, 0);
BufferSegment<byte> secondSegment = firstSegment.Append(secondMem);

var sequence = new ReadOnlySequence<byte>(firstSegment, 0, secondSegment, secondMem.Length);

Assert.ThrowsAny<JsonException>(() =>
{
var json = new Utf8JsonReader(sequence, isFinalBlock: true, state: default);

Assert.Equal(0, json.BytesConsumed);
Assert.Equal(0, json.TokenStartIndex);
Assert.Equal(0, json.CurrentDepth);
Assert.Equal(JsonTokenType.None, json.TokenType);
Assert.NotEqual(default, json.Position);
Assert.False(json.HasValueSequence);
Assert.True(json.ValueSpan.SequenceEqual(default));
Assert.True(json.ValueSequence.IsEmpty);

Assert.Equal(64, json.CurrentState.Options.MaxDepth);
Assert.False(json.CurrentState.Options.AllowTrailingCommas);
Assert.Equal(JsonCommentHandling.Disallow, json.CurrentState.Options.CommentHandling);

json.Read(); // this should throw
});
}

[Theory]
[InlineData("2e2", 200, 3)]
[InlineData("2e+2", 200, 4)]
[InlineData("123e-01", 12.3, 7)]
[InlineData("0", 0, 1)]
[InlineData("0.1", 0.1, 3)]
[InlineData("-0", 0, 2)]
[InlineData("-0.1", -0.1, 4)]
[InlineData(" 2e2 ", 200, 3)]
[InlineData(" 2e+2 ", 200, 4)]
[InlineData(" 123e-01 ", 12.3, 7)]
[InlineData(" 0 ", 0, 1)]
[InlineData(" 0.1 ", 0.1, 3)]
[InlineData(" -0 ", 0, 2)]
[InlineData(" -0.1 ", -0.1, 4)]
[InlineData("[2e2]", 200, 3)]
[InlineData("[2e+2]", 200, 4)]
[InlineData("[123e-01]", 12.3, 7)]
[InlineData("[0]", 0, 1)]
[InlineData("[0.1]", 0.1, 3)]
[InlineData("[-0]", 0, 2)]
[InlineData("[-0.1]", -0.1, 4)]
[InlineData("{\"foo\": 2e2}", 200, 3)]
[InlineData("{\"foo\": 2e+2}", 200, 4)]
[InlineData("{\"foo\": 123e-01}", 12.3, 7)]
[InlineData("{\"foo\": 0}", 0, 1)]
[InlineData("{\"foo\": 0.1}", 0.1, 3)]
[InlineData("{\"foo\": -0}", 0, 2)]
[InlineData("{\"foo\": -0.1}", -0.1, 4)]
public static void ReadJsonWithNumberVariousSegmentSizes(string input, double expectedValue, long expectedTokenLength)
{
byte[] utf8 = Encoding.UTF8.GetBytes(input);

var jsonReader = new Utf8JsonReader(utf8);
ReadDoubleHelper(ref jsonReader, utf8.Length, expectedValue, expectedTokenLength);

ReadOnlySequence<byte> sequence = JsonTestHelper.GetSequence(utf8, 1);
jsonReader = new Utf8JsonReader(sequence);
ReadDoubleHelper(ref jsonReader, utf8.Length, expectedValue, expectedTokenLength);

for (int splitLocation = 0; splitLocation < utf8.Length; splitLocation++)
{
sequence = JsonTestHelper.CreateSegments(utf8, splitLocation);
jsonReader = new Utf8JsonReader(sequence);
ReadDoubleHelper(ref jsonReader, utf8.Length, expectedValue, expectedTokenLength);
}

for (int firstSplit = 0; firstSplit < utf8.Length; firstSplit++)
{
for (int secondSplit = firstSplit; secondSplit < utf8.Length; secondSplit++)
{
sequence = JsonTestHelper.CreateSegments(utf8, firstSplit, secondSplit);
jsonReader = new Utf8JsonReader(sequence);
ReadDoubleHelper(ref jsonReader, utf8.Length, expectedValue, expectedTokenLength);
}
}
}

private static void ReadDoubleHelper(ref Utf8JsonReader jsonReader, int expectedBytesConsumed, double expectedValue, long expectedTokenLength)
{
while (jsonReader.Read())
{
if (jsonReader.TokenType == JsonTokenType.Number)
{
long tokenLength = jsonReader.HasValueSequence ? jsonReader.ValueSequence.Length : jsonReader.ValueSpan.Length;
Assert.Equal(expectedTokenLength, tokenLength);
Assert.Equal(tokenLength, jsonReader.BytesConsumed - jsonReader.TokenStartIndex);
Assert.Equal(expectedValue, jsonReader.GetDouble());
}
}
Assert.Equal(expectedBytesConsumed, jsonReader.BytesConsumed);
}

[Fact]
public static void InitialStateSimpleCtorMultiSegment()
{
Expand Down

0 comments on commit c576e34

Please sign in to comment.