From cd0fa34e1ab2a7792293be09aff40f2bf2d3d856 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Thu, 26 May 2022 16:58:27 -0700 Subject: [PATCH 1/3] Fix StreamReader EOF handling and improve perf --- .../tests/StreamReader/StreamReaderTests.cs | 153 +++++++++ .../src/System/IO/StreamReader.cs | 301 ++++++++++-------- 2 files changed, 323 insertions(+), 131 deletions(-) diff --git a/src/libraries/System.IO/tests/StreamReader/StreamReaderTests.cs b/src/libraries/System.IO/tests/StreamReader/StreamReaderTests.cs index b8098fdd3ac0a..2d0a6c7679052 100644 --- a/src/libraries/System.IO/tests/StreamReader/StreamReaderTests.cs +++ b/src/libraries/System.IO/tests/StreamReader/StreamReaderTests.cs @@ -676,5 +676,158 @@ public void StreamReader_WithOptionalArguments() Assert.False(tempStream.CanRead); } } + + [Fact] + public void Read_ShortStream_PerformsFinalFlushCorrectly() + { + MemoryStream memStream = new MemoryStream(new byte[] { 0x61 /* 'a' */, 0xF0 }); + + // First, use ReadToEnd API. + + memStream.Position = 0; + StreamReader reader = new StreamReader(memStream, Encoding.UTF8); + Assert.Equal("a\uFFFD", reader.ReadToEnd()); + + // Next, use Read() API. + + memStream.Position = 0; + reader = new StreamReader(memStream, Encoding.UTF8); + Assert.Equal('a', reader.Read()); + Assert.Equal('\uFFFD', reader.Read()); + Assert.Equal(-1, reader.Read()); + + // Next, use Read(Span) API. + + StringBuilder builder = new StringBuilder(); + Span destBuffer = new char[1024]; + memStream.Position = 0; + reader = new StreamReader(memStream, Encoding.UTF8); + int charsRead; + while ((charsRead = reader.Read(destBuffer)) > 0) + { + builder.Append(destBuffer.Slice(0, charsRead)); + } + Assert.Equal("a\uFFFD", builder.ToString()); + + // Finally, use ReadLine API. + + memStream.Position = 0; + reader = new StreamReader(memStream, Encoding.UTF8); + Assert.Equal("a\uFFFD", reader.ReadLine()); + Assert.Null(reader.ReadLine()); + } + + [Fact] + public void Read_LongStreamIntoShortBuffer_PerformsFinalFlushCorrectly() + { + MemoryStream memStream = new MemoryStream(); + memStream.Write(Enumerable.Repeat((byte)'a', 32 * 1024).ToArray()); + memStream.WriteByte(0xF0); + string expected = new string('a', 32 * 1024) + "\uFFFD"; + + // First, use ReadToEnd API. + + memStream.Position = 0; + StreamReader reader = new StreamReader(memStream, encoding: Encoding.UTF8, bufferSize: 32); + Assert.Equal(expected, reader.ReadToEnd()); + + // Next, use Read() API. + + memStream.Position = 0; + reader = new StreamReader(memStream, encoding: Encoding.UTF8, bufferSize: 32); + for (int i = 0; i < 32 * 1024; i++) + { + Assert.Equal('a', reader.Read()); + } + Assert.Equal('\uFFFD', reader.Read()); + Assert.Equal(-1, reader.Read()); + + // Next, use Read(Span) API. + + StringBuilder builder = new StringBuilder(); + Span destBuffer = new char[47]; // prime number, because why not + memStream.Position = 0; + reader = new StreamReader(memStream, encoding: Encoding.UTF8, bufferSize: 32); + int charsRead; + while ((charsRead = reader.Read(destBuffer)) > 0) + { + builder.Append(destBuffer.Slice(0, charsRead)); + } + Assert.Equal(expected, builder.ToString()); + + // Finally, use ReadLine API. + + memStream.Position = 0; + reader = new StreamReader(memStream, encoding: Encoding.UTF8, bufferSize: 32); + Assert.Equal(expected, reader.ReadLine()); + Assert.Null(reader.ReadLine()); + } + + [Fact] + public async Task ReadAsync_ShortStream_PerformsFinalFlushCorrectly() + { + MemoryStream memStream = new MemoryStream(new byte[] { 0x61 /* 'a' */, 0xF0 }); + + // First, use ReadToEndAsync API. + + memStream.Position = 0; + StreamReader reader = new StreamReader(memStream, Encoding.UTF8); + Assert.Equal("a\uFFFD", await reader.ReadToEndAsync()); + + // Next, use ReadAsync(Memory) API. + + StringBuilder builder = new StringBuilder(); + Memory destBuffer = new char[1024]; + memStream.Position = 0; + reader = new StreamReader(memStream, Encoding.UTF8); + int charsRead; + while ((charsRead = await reader.ReadAsync(destBuffer)) > 0) + { + builder.Append(destBuffer.Slice(0, charsRead)); + } + Assert.Equal("a\uFFFD", builder.ToString()); + + // Finally, use ReadLineAsync API. + + memStream.Position = 0; + reader = new StreamReader(memStream, Encoding.UTF8); + Assert.Equal("a\uFFFD", await reader.ReadLineAsync()); + Assert.Null(await reader.ReadLineAsync()); + } + + [Fact] + public async Task ReadAsync_LongStreamIntoShortBuffer_PerformsFinalFlushCorrectly() + { + MemoryStream memStream = new MemoryStream(); + memStream.Write(Enumerable.Repeat((byte)'a', 32 * 1024).ToArray()); + memStream.WriteByte(0xF0); + string expected = new string('a', 32 * 1024) + "\uFFFD"; + + // First, use ReadToEndAsync API. + + memStream.Position = 0; + StreamReader reader = new StreamReader(memStream, encoding: Encoding.UTF8, bufferSize: 32); + Assert.Equal(expected, await reader.ReadToEndAsync()); + + // Next, use Read(Memory) API. + + StringBuilder builder = new StringBuilder(); + Memory destBuffer = new char[47]; // prime number, because why not + memStream.Position = 0; + reader = new StreamReader(memStream, encoding: Encoding.UTF8, bufferSize: 32); + int charsRead; + while ((charsRead = await reader.ReadAsync(destBuffer)) > 0) + { + builder.Append(destBuffer.Slice(0, charsRead)); + } + Assert.Equal(expected, builder.ToString()); + + // Finally, use ReadLineAsync API. + + memStream.Position = 0; + reader = new StreamReader(memStream, encoding: Encoding.UTF8, bufferSize: 32); + Assert.Equal(expected, await reader.ReadLineAsync()); + Assert.Null(await reader.ReadLineAsync()); + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/IO/StreamReader.cs b/src/libraries/System.Private.CoreLib/src/System/IO/StreamReader.cs index b0d39ede93eea..8da0cbddcdf25 100644 --- a/src/libraries/System.Private.CoreLib/src/System/IO/StreamReader.cs +++ b/src/libraries/System.Private.CoreLib/src/System/IO/StreamReader.cs @@ -167,7 +167,14 @@ public StreamReader(Stream stream, Encoding? encoding = null, bool detectEncodin _maxCharsPerBuffer = encoding.GetMaxCharCount(bufferSize); _charBuffer = new char[_maxCharsPerBuffer]; _detectEncoding = detectEncodingFromByteOrderMarks; - _checkPreamble = encoding.Preamble.Length > 0; + + // If the preamble length is larger than the byte buffer length, + // we'll never match it and will enter an infinite loop. This + // should never happen in practice, but just in case, we'll skip + // the preamble check for absurdly long preambles. + int preambleLength = encoding.Preamble.Length; + _checkPreamble = preambleLength > 0 && preambleLength <= bufferSize; + _closable = !leaveOpen; } @@ -462,16 +469,15 @@ public override int ReadBlock(Span buffer) private void CompressBuffer(int n) { Debug.Assert(_byteLen >= n, "CompressBuffer was called with a number of bytes greater than the current buffer length. Are two threads using this StreamReader at the same time?"); - Buffer.BlockCopy(_byteBuffer, n, _byteBuffer, 0, _byteLen - n); + _ = _byteBuffer.Length; // allow JIT to prove object is not null + new ReadOnlySpan(_byteBuffer, n, _byteLen - n).CopyTo(_byteBuffer); _byteLen -= n; } private void DetectEncoding() { - if (_byteLen < 2) - { - return; - } + Debug.Assert(_byteLen >= 2, "Caller should've validated that at least 2 bytes were available."); + _detectEncoding = false; bool changedEncoding = false; if (_byteBuffer[0] == 0xFE && _byteBuffer[1] == 0xFF) @@ -541,28 +547,31 @@ private bool IsPreamble() { if (!_checkPreamble) { - return _checkPreamble; + return false; } - ReadOnlySpan preamble = _encoding.Preamble; + return IsPreambleWorker(); // move this call out of the hot path + bool IsPreambleWorker() + { + Debug.Assert(_checkPreamble); + ReadOnlySpan preamble = _encoding.Preamble; - Debug.Assert(_bytePos <= preamble.Length, "_compressPreamble was called with the current bytePos greater than the preamble buffer length. Are two threads using this StreamReader at the same time?"); - int len = (_byteLen >= (preamble.Length)) ? (preamble.Length - _bytePos) : (_byteLen - _bytePos); + Debug.Assert(_bytePos < preamble.Length, "_compressPreamble was called with the current bytePos greater than the preamble buffer length. Are two threads using this StreamReader at the same time?"); + int len = Math.Min(_byteLen, preamble.Length); - for (int i = 0; i < len; i++, _bytePos++) - { - if (_byteBuffer[_bytePos] != preamble[_bytePos]) + for (int i = _bytePos; i < len; i++) { - _bytePos = 0; - _checkPreamble = false; - break; + if (_byteBuffer[i] != preamble[i]) + { + _bytePos = 0; // preamble match failed; back up to beginning of buffer + _checkPreamble = false; + return false; + } } - } + _bytePos = len; // we've matched all bytes up to this point - Debug.Assert(_bytePos <= preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?"); + Debug.Assert(_bytePos <= preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?"); - if (_checkPreamble) - { if (_bytePos == preamble.Length) { // We have a match @@ -571,9 +580,9 @@ private bool IsPreamble() _checkPreamble = false; _detectEncoding = false; } - } - return _checkPreamble; + return _checkPreamble; + } } internal virtual int ReadBuffer() @@ -586,6 +595,8 @@ internal virtual int ReadBuffer() _byteLen = 0; } + bool eofReached = false; + do { if (_checkPreamble) @@ -596,16 +607,8 @@ internal virtual int ReadBuffer() if (len == 0) { - // EOF but we might have buffered bytes from previous - // attempt to detect preamble that needs to be decoded now - if (_byteLen > 0) - { - _charLen += _decoder.GetChars(_byteBuffer, 0, _byteLen, _charBuffer, _charLen); - // Need to zero out the byteLen after we consume these bytes so that we don't keep infinitely hitting this code path - _bytePos = _byteLen = 0; - } - - return _charLen; + eofReached = true; + break; } _byteLen += len; @@ -616,9 +619,10 @@ internal virtual int ReadBuffer() _byteLen = _stream.Read(_byteBuffer, 0, _byteBuffer.Length); Debug.Assert(_byteLen >= 0, "Stream.Read returned a negative number! This is a bug in your stream class."); - if (_byteLen == 0) // We're at EOF + if (_byteLen == 0) { - return _charLen; + eofReached = true; + break; } } @@ -642,8 +646,22 @@ internal virtual int ReadBuffer() DetectEncoding(); } - _charLen += _decoder.GetChars(_byteBuffer, 0, _byteLen, _charBuffer, _charLen); + Debug.Assert(_charPos == 0 && _charLen == 0, "We shouldn't be trying to decode more data if we made progress in an earlier iteration."); + _charLen = _decoder.GetChars(_byteBuffer, 0, _byteLen, _charBuffer, 0, flush: false); } while (_charLen == 0); + + if (eofReached) + { + // EOF has been reached - perform final flush. + // We need to reset _bytePos and _byteLen just in case we hadn't + // finished processing the preamble before we reached EOF. + + Debug.Assert(_charPos == 0 && _charLen == 0, "We shouldn't be looking for EOF unless we have an empty char buffer."); + _charLen = _decoder.GetChars(_byteBuffer, 0, _byteLen, _charBuffer, 0, flush: true); + _bytePos = 0; + _byteLen = 0; + } + return _charLen; } @@ -665,6 +683,7 @@ private int ReadBuffer(Span userBuffer, out bool readToUserBuffer) _byteLen = 0; } + bool eofReached = false; int charsRead = 0; // As a perf optimization, we can decode characters DIRECTLY into a @@ -692,23 +711,8 @@ private int ReadBuffer(Span userBuffer, out bool readToUserBuffer) if (len == 0) { - // EOF but we might have buffered bytes from previous - // attempt to detect preamble that needs to be decoded now - if (_byteLen > 0) - { - if (readToUserBuffer) - { - charsRead = _decoder.GetChars(new ReadOnlySpan(_byteBuffer, 0, _byteLen), userBuffer.Slice(charsRead), flush: false); - _charLen = 0; // StreamReader's buffer is empty. - } - else - { - charsRead = _decoder.GetChars(_byteBuffer, 0, _byteLen, _charBuffer, charsRead); - _charLen += charsRead; // Number of chars in StreamReader's buffer. - } - } - - return charsRead; + eofReached = true; + break; } _byteLen += len; @@ -716,13 +720,12 @@ private int ReadBuffer(Span userBuffer, out bool readToUserBuffer) else { Debug.Assert(_bytePos == 0, "bytePos can be non zero only when we are trying to _checkPreamble. Are two threads using this StreamReader at the same time?"); - _byteLen = _stream.Read(_byteBuffer, 0, _byteBuffer.Length); - Debug.Assert(_byteLen >= 0, "Stream.Read returned a negative number! This is a bug in your stream class."); - if (_byteLen == 0) // EOF + if (_byteLen == 0) { + eofReached = true; break; } } @@ -750,19 +753,39 @@ private int ReadBuffer(Span userBuffer, out bool readToUserBuffer) readToUserBuffer = userBuffer.Length >= _maxCharsPerBuffer; } - _charPos = 0; + Debug.Assert(charsRead == 0 && _charPos == 0 && _charLen == 0, "We shouldn't be trying to decode more data if we made progress in an earlier iteration."); if (readToUserBuffer) { - charsRead += _decoder.GetChars(new ReadOnlySpan(_byteBuffer, 0, _byteLen), userBuffer.Slice(charsRead), flush: false); - _charLen = 0; // StreamReader's buffer is empty. + charsRead = _decoder.GetChars(new ReadOnlySpan(_byteBuffer, 0, _byteLen), userBuffer, flush: false); } else { - charsRead = _decoder.GetChars(_byteBuffer, 0, _byteLen, _charBuffer, charsRead); - _charLen += charsRead; // Number of chars in StreamReader's buffer. + charsRead = _decoder.GetChars(_byteBuffer, 0, _byteLen, _charBuffer, 0, flush: false); + _charLen = charsRead; // Number of chars in StreamReader's buffer. } } while (charsRead == 0); + if (eofReached) + { + // EOF has been reached - perform final flush. + // We need to reset _bytePos and _byteLen just in case we hadn't + // finished processing the preamble before we reached EOF. + + Debug.Assert(charsRead == 0 && _charPos == 0 && _charLen == 0, "We shouldn't be looking for EOF unless we have an empty char buffer."); + + if (readToUserBuffer) + { + charsRead = _decoder.GetChars(new ReadOnlySpan(_byteBuffer, 0, _byteLen), userBuffer, flush: true); + } + else + { + charsRead = _decoder.GetChars(_byteBuffer, 0, _byteLen, _charBuffer, 0, flush: true); + _charLen = charsRead; // Number of chars in StreamReader's buffer. + } + _bytePos = 0; + _byteLen = 0; + } + _isBlocked &= charsRead < userBuffer.Length; return charsRead; @@ -791,42 +814,50 @@ private int ReadBuffer(Span userBuffer, out bool readToUserBuffer) StringBuilder? sb = null; do { - int i = _charPos; - do + // Look for '\r' or \'n'. + ReadOnlySpan charBufferSpan = _charBuffer.AsSpan(_charPos, _charLen - _charPos); + Debug.Assert(!charBufferSpan.IsEmpty, "ReadBuffer returned > 0 but didn't bump _charLen?"); + + int idxOfNewline = charBufferSpan.IndexOfAny('\r', '\n'); + if (idxOfNewline >= 0) { - char ch = _charBuffer[i]; - // Note the following common line feed chars: - // \n - UNIX \r\n - DOS \r - Mac - if (ch == '\r' || ch == '\n') + string retVal; + if (sb is null) { - string s; - if (sb != null) - { - sb.Append(_charBuffer, _charPos, i - _charPos); - s = sb.ToString(); - } - else - { - s = new string(_charBuffer, _charPos, i - _charPos); - } - _charPos = i + 1; - if (ch == '\r' && (_charPos < _charLen || ReadBuffer() > 0)) + retVal = new string(charBufferSpan.Slice(0, idxOfNewline)); + } + else + { + sb.Append(charBufferSpan.Slice(0, idxOfNewline)); + retVal = StringBuilderCache.GetStringAndRelease(sb); + } + + char matchedChar = charBufferSpan[idxOfNewline]; + _charPos += idxOfNewline + 1; + + // If we found '\r', consume any immediately following '\n'. + if (matchedChar == '\r') + { + if (_charPos < _charLen || ReadBuffer() > 0) { if (_charBuffer[_charPos] == '\n') { _charPos++; } } - return s; } - i++; - } while (i < _charLen); - i = _charLen - _charPos; - sb ??= new StringBuilder(i + 80); - sb.Append(_charBuffer, _charPos, i); + return retVal; + } + + // We didn't find '\r' or '\n'. Add it to the StringBuilder + // and loop until we reach a newline or EOF. + + sb ??= StringBuilderCache.Acquire(charBufferSpan.Length + 80); + sb.Append(charBufferSpan); } while (ReadBuffer() > 0); - return sb.ToString(); + + return StringBuilderCache.GetStringAndRelease(sb); } public override Task ReadLineAsync() => @@ -887,57 +918,56 @@ private int ReadBuffer(Span userBuffer, out bool readToUserBuffer) } StringBuilder? sb = null; - do { char[] tmpCharBuffer = _charBuffer; int tmpCharLen = _charLen; int tmpCharPos = _charPos; - int i = tmpCharPos; - do - { - char ch = tmpCharBuffer[i]; + // Look for '\r' or \'n'. + Debug.Assert(tmpCharPos < tmpCharLen, "ReadBuffer returned > 0 but didn't bump _charLen?"); - // Note the following common line feed chars: - // \n - UNIX \r\n - DOS \r - Mac - if (ch == '\r' || ch == '\n') + int idxOfNewline = tmpCharBuffer.AsSpan(tmpCharPos, tmpCharLen - tmpCharPos).IndexOfAny('\r', '\n'); + if (idxOfNewline >= 0) + { + string retVal; + if (sb is null) { - string s; - - if (sb != null) - { - sb.Append(tmpCharBuffer, tmpCharPos, i - tmpCharPos); - s = sb.ToString(); - } - else - { - s = new string(tmpCharBuffer, tmpCharPos, i - tmpCharPos); - } + retVal = new string(tmpCharBuffer, tmpCharPos, idxOfNewline); + } + else + { + sb.Append(tmpCharBuffer, tmpCharPos, idxOfNewline); + retVal = StringBuilderCache.GetStringAndRelease(sb); + } - _charPos = tmpCharPos = i + 1; + tmpCharPos += idxOfNewline; + char matchedChar = tmpCharBuffer[tmpCharPos++]; + _charPos = tmpCharPos; - if (ch == '\r' && (tmpCharPos < tmpCharLen || (await ReadBufferAsync(cancellationToken).ConfigureAwait(false)) > 0)) + // If we found '\r', consume any immediately following '\n'. + if (matchedChar == '\r') + { + if (tmpCharPos < tmpCharLen || (await ReadBufferAsync(cancellationToken).ConfigureAwait(false)) > 0) { - tmpCharPos = _charPos; - if (_charBuffer[tmpCharPos] == '\n') + if (_charBuffer[_charPos] == '\n') { - _charPos = ++tmpCharPos; + _charPos++; } } - - return s; } - i++; - } while (i < tmpCharLen); + return retVal; + } + + // We didn't find '\r' or '\n'. Add it to the StringBuilder + // and loop until we reach a newline or EOF. - i = tmpCharLen - tmpCharPos; - sb ??= new StringBuilder(i + 80); - sb.Append(tmpCharBuffer, tmpCharPos, i); + sb ??= StringBuilderCache.Acquire(tmpCharLen - tmpCharPos + 80); + sb.Append(tmpCharBuffer, tmpCharPos, tmpCharLen - tmpCharPos); } while (await ReadBufferAsync(cancellationToken).ConfigureAwait(false) > 0); - return sb.ToString(); + return StringBuilderCache.GetStringAndRelease(sb); } public override Task ReadToEndAsync() => ReadToEndAsync(default); @@ -1291,27 +1321,22 @@ private async ValueTask ReadBufferAsync(CancellationToken cancellationToken { _byteLen = 0; } + + bool eofReached = false; + do { if (_checkPreamble) { Debug.Assert(_bytePos <= _encoding.Preamble.Length, "possible bug in _compressPreamble. Are two threads using this StreamReader at the same time?"); int tmpBytePos = _bytePos; - int len = await tmpStream.ReadAsync(new Memory(tmpByteBuffer, tmpBytePos, tmpByteBuffer.Length - tmpBytePos), cancellationToken).ConfigureAwait(false); + int len = await tmpStream.ReadAsync(tmpByteBuffer.AsMemory(tmpBytePos), cancellationToken).ConfigureAwait(false); Debug.Assert(len >= 0, "Stream.Read returned a negative number! This is a bug in your stream class."); if (len == 0) { - // EOF but we might have buffered bytes from previous - // attempt to detect preamble that needs to be decoded now - if (_byteLen > 0) - { - _charLen += _decoder.GetChars(tmpByteBuffer, 0, _byteLen, _charBuffer, _charLen); - // Need to zero out the _byteLen after we consume these bytes so that we don't keep infinitely hitting this code path - _bytePos = 0; _byteLen = 0; - } - - return _charLen; + eofReached = true; + break; } _byteLen += len; @@ -1322,9 +1347,10 @@ private async ValueTask ReadBufferAsync(CancellationToken cancellationToken _byteLen = await tmpStream.ReadAsync(new Memory(tmpByteBuffer), cancellationToken).ConfigureAwait(false); Debug.Assert(_byteLen >= 0, "Stream.Read returned a negative number! Bug in stream class."); - if (_byteLen == 0) // We're at EOF + if (_byteLen == 0) { - return _charLen; + eofReached = true; + break; } } @@ -1348,9 +1374,22 @@ private async ValueTask ReadBufferAsync(CancellationToken cancellationToken DetectEncoding(); } - _charLen += _decoder.GetChars(tmpByteBuffer, 0, _byteLen, _charBuffer, _charLen); + Debug.Assert(_charPos == 0 && _charLen == 0, "We shouldn't be trying to decode more data if we made progress in an earlier iteration."); + _charLen = _decoder.GetChars(tmpByteBuffer, 0, _byteLen, _charBuffer, 0, flush: false); } while (_charLen == 0); + if (eofReached) + { + // EOF has been reached - perform final flush. + // We need to reset _bytePos and _byteLen just in case we hadn't + // finished processing the preamble before we reached EOF. + + Debug.Assert(_charPos == 0 && _charLen == 0, "We shouldn't be looking for EOF unless we have an empty char buffer."); + _charLen = _decoder.GetChars(_byteBuffer, 0, _byteLen, _charBuffer, 0, flush: true); + _bytePos = 0; + _byteLen = 0; + } + return _charLen; } From 56380932ced17fe5357a069d9e49c682e10aa8bd Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 4 Nov 2022 22:01:30 -0400 Subject: [PATCH 2/3] Address PR feedback --- .../src/System/IO/StreamReader.cs | 100 +++++++++++------- 1 file changed, 63 insertions(+), 37 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/IO/StreamReader.cs b/src/libraries/System.Private.CoreLib/src/System/IO/StreamReader.cs index 8da0cbddcdf25..fda51281c6007 100644 --- a/src/libraries/System.Private.CoreLib/src/System/IO/StreamReader.cs +++ b/src/libraries/System.Private.CoreLib/src/System/IO/StreamReader.cs @@ -1,6 +1,8 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; +using System.Buffers.Binary; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Text; @@ -469,8 +471,9 @@ public override int ReadBlock(Span buffer) private void CompressBuffer(int n) { Debug.Assert(_byteLen >= n, "CompressBuffer was called with a number of bytes greater than the current buffer length. Are two threads using this StreamReader at the same time?"); - _ = _byteBuffer.Length; // allow JIT to prove object is not null - new ReadOnlySpan(_byteBuffer, n, _byteLen - n).CopyTo(_byteBuffer); + byte[] byteBuffer = _byteBuffer; + _ = byteBuffer.Length; // allow JIT to prove object is not null + new ReadOnlySpan(byteBuffer, n, _byteLen - n).CopyTo(byteBuffer); _byteLen -= n; } @@ -478,20 +481,22 @@ private void DetectEncoding() { Debug.Assert(_byteLen >= 2, "Caller should've validated that at least 2 bytes were available."); + byte[] byteBuffer = _byteBuffer; _detectEncoding = false; bool changedEncoding = false; - if (_byteBuffer[0] == 0xFE && _byteBuffer[1] == 0xFF) + + ushort firstTwoBytes = BinaryPrimitives.ReadUInt16LittleEndian(byteBuffer); + if (firstTwoBytes == 0xFFFE) { // Big Endian Unicode - _encoding = Encoding.BigEndianUnicode; CompressBuffer(2); changedEncoding = true; } - else if (_byteBuffer[0] == 0xFF && _byteBuffer[1] == 0xFE) + else if (firstTwoBytes == 0xFEFF) { // Little Endian Unicode, or possibly little endian UTF32 - if (_byteLen < 4 || _byteBuffer[2] != 0 || _byteBuffer[3] != 0) + if (_byteLen < 4 || byteBuffer[2] != 0 || byteBuffer[3] != 0) { _encoding = Encoding.Unicode; CompressBuffer(2); @@ -504,15 +509,14 @@ private void DetectEncoding() changedEncoding = true; } } - else if (_byteLen >= 3 && _byteBuffer[0] == 0xEF && _byteBuffer[1] == 0xBB && _byteBuffer[2] == 0xBF) + else if (_byteLen >= 3 && firstTwoBytes == 0xBBEF && byteBuffer[2] == 0xBF) { // UTF-8 _encoding = Encoding.UTF8; CompressBuffer(3); changedEncoding = true; } - else if (_byteLen >= 4 && _byteBuffer[0] == 0 && _byteBuffer[1] == 0 && - _byteBuffer[2] == 0xFE && _byteBuffer[3] == 0xFF) + else if (_byteLen >= 4 && firstTwoBytes == 0 && byteBuffer[2] == 0xFE && byteBuffer[3] == 0xFF) { // Big Endian UTF32 _encoding = new UTF32Encoding(bigEndian: true, byteOrderMark: true); @@ -529,7 +533,7 @@ private void DetectEncoding() if (changedEncoding) { _decoder = _encoding.GetDecoder(); - int newMaxCharsPerBuffer = _encoding.GetMaxCharCount(_byteBuffer.Length); + int newMaxCharsPerBuffer = _encoding.GetMaxCharCount(byteBuffer.Length); if (newMaxCharsPerBuffer > _maxCharsPerBuffer) { _charBuffer = new char[newMaxCharsPerBuffer]; @@ -811,7 +815,7 @@ private int ReadBuffer(Span userBuffer, out bool readToUserBuffer) } } - StringBuilder? sb = null; + var vsb = new ValueStringBuilder(stackalloc char[256]); do { // Look for '\r' or \'n'. @@ -822,14 +826,14 @@ private int ReadBuffer(Span userBuffer, out bool readToUserBuffer) if (idxOfNewline >= 0) { string retVal; - if (sb is null) + if (vsb.Length == 0) { retVal = new string(charBufferSpan.Slice(0, idxOfNewline)); } else { - sb.Append(charBufferSpan.Slice(0, idxOfNewline)); - retVal = StringBuilderCache.GetStringAndRelease(sb); + retVal = string.Concat(vsb.AsSpan(), charBufferSpan.Slice(0, idxOfNewline)); + vsb.Dispose(); } char matchedChar = charBufferSpan[idxOfNewline]; @@ -853,11 +857,10 @@ private int ReadBuffer(Span userBuffer, out bool readToUserBuffer) // We didn't find '\r' or '\n'. Add it to the StringBuilder // and loop until we reach a newline or EOF. - sb ??= StringBuilderCache.Acquire(charBufferSpan.Length + 80); - sb.Append(charBufferSpan); + vsb.Append(charBufferSpan); } while (ReadBuffer() > 0); - return StringBuilderCache.GetStringAndRelease(sb); + return vsb.ToString(); } public override Task ReadLineAsync() => @@ -917,38 +920,40 @@ private int ReadBuffer(Span userBuffer, out bool readToUserBuffer) return null; } - StringBuilder? sb = null; + string retVal; + char[]? arrayPoolBuffer = null; + int arrayPoolBufferPos = 0; + do { - char[] tmpCharBuffer = _charBuffer; - int tmpCharLen = _charLen; - int tmpCharPos = _charPos; + char[] charBuffer = _charBuffer; + int charLen = _charLen; + int charPos = _charPos; // Look for '\r' or \'n'. - Debug.Assert(tmpCharPos < tmpCharLen, "ReadBuffer returned > 0 but didn't bump _charLen?"); + Debug.Assert(charPos < charLen, "ReadBuffer returned > 0 but didn't bump _charLen?"); - int idxOfNewline = tmpCharBuffer.AsSpan(tmpCharPos, tmpCharLen - tmpCharPos).IndexOfAny('\r', '\n'); + int idxOfNewline = charBuffer.AsSpan(charPos, charLen - charPos).IndexOfAny('\r', '\n'); if (idxOfNewline >= 0) { - string retVal; - if (sb is null) + if (arrayPoolBuffer is null) { - retVal = new string(tmpCharBuffer, tmpCharPos, idxOfNewline); + retVal = new string(charBuffer, charPos, idxOfNewline); } else { - sb.Append(tmpCharBuffer, tmpCharPos, idxOfNewline); - retVal = StringBuilderCache.GetStringAndRelease(sb); + retVal = string.Concat(arrayPoolBuffer.AsSpan(0, arrayPoolBufferPos), charBuffer.AsSpan(charPos, idxOfNewline)); + ArrayPool.Shared.Return(arrayPoolBuffer); } - tmpCharPos += idxOfNewline; - char matchedChar = tmpCharBuffer[tmpCharPos++]; - _charPos = tmpCharPos; + charPos += idxOfNewline; + char matchedChar = charBuffer[charPos++]; + _charPos = charPos; // If we found '\r', consume any immediately following '\n'. if (matchedChar == '\r') { - if (tmpCharPos < tmpCharLen || (await ReadBufferAsync(cancellationToken).ConfigureAwait(false)) > 0) + if (charPos < charLen || (await ReadBufferAsync(cancellationToken).ConfigureAwait(false)) > 0) { if (_charBuffer[_charPos] == '\n') { @@ -960,14 +965,35 @@ private int ReadBuffer(Span userBuffer, out bool readToUserBuffer) return retVal; } - // We didn't find '\r' or '\n'. Add it to the StringBuilder + // We didn't find '\r' or '\n'. Add the read data to the pooled buffer // and loop until we reach a newline or EOF. + if (arrayPoolBuffer is null) + { + arrayPoolBuffer = ArrayPool.Shared.Rent(charLen - charPos + 80); + } + else if ((arrayPoolBuffer.Length - arrayPoolBufferPos) < (charLen - charPos)) + { + char[] newBuffer = ArrayPool.Shared.Rent(checked(arrayPoolBufferPos + charLen - charPos)); + arrayPoolBuffer.AsSpan(0, arrayPoolBufferPos).CopyTo(newBuffer); + ArrayPool.Shared.Return(arrayPoolBuffer); + arrayPoolBuffer = newBuffer; + } + charBuffer.AsSpan(charPos, charLen - charPos).CopyTo(arrayPoolBuffer.AsSpan(arrayPoolBufferPos)); + arrayPoolBufferPos += charLen - charPos; + } + while (await ReadBufferAsync(cancellationToken).ConfigureAwait(false) > 0); - sb ??= StringBuilderCache.Acquire(tmpCharLen - tmpCharPos + 80); - sb.Append(tmpCharBuffer, tmpCharPos, tmpCharLen - tmpCharPos); - } while (await ReadBufferAsync(cancellationToken).ConfigureAwait(false) > 0); + if (arrayPoolBuffer is not null) + { + retVal = new string(arrayPoolBuffer, 0, arrayPoolBufferPos); + ArrayPool.Shared.Return(arrayPoolBuffer); + } + else + { + retVal = string.Empty; + } - return StringBuilderCache.GetStringAndRelease(sb); + return retVal; } public override Task ReadToEndAsync() => ReadToEndAsync(default); From ff4b2451f6826aef886016fb9ab3ea21a5ada0b1 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Sat, 5 Nov 2022 09:36:21 -0400 Subject: [PATCH 3/3] Remove erroneous asserts --- .../src/System/IO/StreamReader.cs | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/IO/StreamReader.cs b/src/libraries/System.Private.CoreLib/src/System/IO/StreamReader.cs index fda51281c6007..472e0c4f91060 100644 --- a/src/libraries/System.Private.CoreLib/src/System/IO/StreamReader.cs +++ b/src/libraries/System.Private.CoreLib/src/System/IO/StreamReader.cs @@ -1226,20 +1226,12 @@ internal override async ValueTask ReadAsyncInternal(Memory buffer, Ca _charPos = 0; if (readToUserBuffer) { - n += _decoder.GetChars(new ReadOnlySpan(tmpByteBuffer, 0, _byteLen), buffer.Span.Slice(charsRead), flush: false); - - // Why did the bytes yield no chars? - Debug.Assert(n > 0); - + n = _decoder.GetChars(new ReadOnlySpan(tmpByteBuffer, 0, _byteLen), buffer.Span.Slice(charsRead), flush: false); _charLen = 0; // StreamReader's buffer is empty. } else { n = _decoder.GetChars(tmpByteBuffer, 0, _byteLen, _charBuffer, 0); - - // Why did the bytes yield no chars? - Debug.Assert(n > 0); - _charLen += n; // Number of chars in StreamReader's buffer. } } while (n == 0);