Skip to content

Commit

Permalink
Fix File.ReadAllBytes{Async} for virtual file system files (dotnet/co…
Browse files Browse the repository at this point in the history
…refx#28388)

Some file systems, like procfs, can return 0 for a file's length, even though it actually contains on-demand computed contents.  This breaks File.ReadAllBytes{Async}, which currently assumes that a length of 0 means the file is empty.  This commit fixes it by changing the assumption to mean that a length of 0 means we can't depend on the Length and instead need to read until EOF.

Commit migrated from dotnet/corefx@77bf407
  • Loading branch information
stephentoub authored Mar 23, 2018
1 parent 88ffbb9 commit 0b2f137
Show file tree
Hide file tree
Showing 3 changed files with 201 additions and 13 deletions.
109 changes: 96 additions & 13 deletions src/libraries/System.IO.FileSystem/src/System/IO/File.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ namespace System.IO
// routines such as Delete, etc.
public static class File
{
private const int MaxByteArrayLength = 0x7FFFFFC7;
private static Encoding s_UTF8NoBOM;

internal const int DefaultBufferSize = 4096;
Expand Down Expand Up @@ -324,7 +325,15 @@ public static byte[] ReadAllBytes(string path)
{
long fileLength = fs.Length;
if (fileLength > int.MaxValue)
{
throw new IOException(SR.IO_FileTooLong2GB);
}
else if (fileLength == 0)
{
// Some file systems (e.g. procfs on Linux) return 0 for length even when there's content.
// Thus we need to assume 0 doesn't mean empty.
return ReadAllBytesUnknownLength(fs);
}

int index = 0;
int count = (int)fileLength;
Expand All @@ -341,6 +350,50 @@ public static byte[] ReadAllBytes(string path)
}
}

private static byte[] ReadAllBytesUnknownLength(FileStream fs)
{
byte[] rentedArray = null;
Span<byte> buffer = stackalloc byte[512];
try
{
int bytesRead = 0;
while (true)
{
if (bytesRead == buffer.Length)
{
uint newLength = (uint)buffer.Length * 2;
if (newLength > MaxByteArrayLength)
{
newLength = (uint)Math.Max(MaxByteArrayLength, buffer.Length + 1);
}

byte[] tmp = ArrayPool<byte>.Shared.Rent((int)newLength);
buffer.CopyTo(tmp);
if (rentedArray != null)
{
ArrayPool<byte>.Shared.Return(rentedArray);
}
buffer = rentedArray = tmp;
}

Debug.Assert(bytesRead < buffer.Length);
int n = fs.Read(buffer.Slice(bytesRead));
if (n == 0)
{
return buffer.Slice(0, bytesRead).ToArray();
}
bytesRead += n;
}
}
finally
{
if (rentedArray != null)
{
ArrayPool<byte>.Shared.Return(rentedArray);
}
}
}

public static void WriteAllBytes(string path, byte[] bytes)
{
if (path == null)
Expand Down Expand Up @@ -709,31 +762,23 @@ private static async Task<string> InternalReadAllTextAsync(string path, Encoding
return Task.FromCanceled<byte[]>(cancellationToken);
}

FileStream fs = new FileStream(
path, FileMode.Open, FileAccess.Read, FileShare.Read, DefaultBufferSize,
var fs = new FileStream(
path, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize: 1, // bufferSize == 1 used to avoid unnecessary buffer in FileStream
FileOptions.Asynchronous | FileOptions.SequentialScan);

bool returningInternalTask = false;
try
{
long fileLength = fs.Length;
if (cancellationToken.IsCancellationRequested)
{
return Task.FromCanceled<byte[]>(cancellationToken);
}

if (fileLength > int.MaxValue)
{
return Task.FromException<byte[]>(new IOException(SR.IO_FileTooLong2GB));
}

if (fileLength == 0)
{
return Task.FromResult(Array.Empty<byte>());
}

returningInternalTask = true;
return InternalReadAllBytesAsync(fs, (int)fileLength, cancellationToken);
return fileLength > 0 ?
InternalReadAllBytesAsync(fs, (int)fileLength, cancellationToken) :
InternalReadAllBytesUnknownLengthAsync(fs, cancellationToken);
}
finally
{
Expand Down Expand Up @@ -765,6 +810,44 @@ private static async Task<byte[]> InternalReadAllBytesAsync(FileStream fs, int c
}
}

private static async Task<byte[]> InternalReadAllBytesUnknownLengthAsync(FileStream fs, CancellationToken cancellationToken)
{
byte[] rentedArray = ArrayPool<byte>.Shared.Rent(512);
try
{
int bytesRead = 0;
while (true)
{
if (bytesRead == rentedArray.Length)
{
uint newLength = (uint)rentedArray.Length * 2;
if (newLength > MaxByteArrayLength)
{
newLength = (uint)Math.Max(MaxByteArrayLength, rentedArray.Length + 1);
}

byte[] tmp = ArrayPool<byte>.Shared.Rent((int)newLength);
Buffer.BlockCopy(rentedArray, 0, tmp, 0, bytesRead);
ArrayPool<byte>.Shared.Return(rentedArray);
rentedArray = tmp;
}

Debug.Assert(bytesRead < rentedArray.Length);
int n = await fs.ReadAsync(rentedArray.AsMemory(bytesRead), cancellationToken).ConfigureAwait(false);
if (n == 0)
{
return rentedArray.AsSpan().Slice(0, bytesRead).ToArray();
}
bytesRead += n;
}
}
finally
{
fs.Dispose();
ArrayPool<byte>.Shared.Return(rentedArray);
}
}

public static Task WriteAllBytesAsync(string path, byte[] bytes, CancellationToken cancellationToken = default(CancellationToken))
{
if (path == null)
Expand Down
53 changes: 53 additions & 0 deletions src/libraries/System.IO.FileSystem/tests/File/ReadWriteAllBytes.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

using System.Runtime.InteropServices;
using System.Text;
using System.Threading.Tasks;
using Xunit;

namespace System.IO.Tests
Expand Down Expand Up @@ -120,5 +121,57 @@ public void WriteToReadOnlyFile()
File.SetAttributes(path, FileAttributes.Normal);
}
}

[Fact]
public void EmptyFile_ReturnsEmptyArray()
{
string path = GetTestFilePath();
File.Create(path).Dispose();
Assert.Equal(0, File.ReadAllBytes(path).Length);
}

[Theory]
[PlatformSpecific(TestPlatforms.Linux)]
[InlineData("/proc/cmdline")]
[InlineData("/proc/version")]
[InlineData("/proc/filesystems")]
public void ProcFs_EqualsReadAllText(string path)
{
byte[] bytes = null;
string text = null;

const int NumTries = 3; // some of these could theoretically change between reads, so allow retries just in case
for (int i = 1; i <= NumTries; i++)
{
try
{
bytes = File.ReadAllBytes(path);
text = File.ReadAllText(path);
Assert.Equal(text, Encoding.UTF8.GetString(bytes));
}
catch when (i < NumTries) { }
}
}

[Theory]
[PlatformSpecific(TestPlatforms.Linux)]
public void ReadAllBytes_ProcFs_Uptime_ContainsTwoNumbers()
{
string text = Encoding.UTF8.GetString(File.ReadAllBytes("/proc/uptime"));
string[] parts = text.Split(new [] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
Assert.Equal(2, parts.Length);
Assert.True(double.TryParse(parts[0].Trim(), out _));
Assert.True(double.TryParse(parts[1].Trim(), out _));
}

[Theory]
[PlatformSpecific(TestPlatforms.Linux)]
[InlineData("/proc/meminfo")]
[InlineData("/proc/stat")]
[InlineData("/proc/cpuinfo")]
public void ProcFs_NotEmpty(string path)
{
Assert.InRange(File.ReadAllBytes(path).Length, 1, int.MaxValue);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -134,5 +134,57 @@ public async Task WriteToReadOnlyFileAsync()
File.SetAttributes(path, FileAttributes.Normal);
}
}

[Fact]
public async Task EmptyFile_ReturnsEmptyArray()
{
string path = GetTestFilePath();
File.Create(path).Dispose();
Assert.Equal(0, (await File.ReadAllBytesAsync(path)).Length);
}

[Theory]
[PlatformSpecific(TestPlatforms.Linux)]
[InlineData("/proc/cmdline")]
[InlineData("/proc/version")]
[InlineData("/proc/filesystems")]
public async Task ProcFs_EqualsReadAllText(string path)
{
byte[] bytes = null;
string text = null;

const int NumTries = 3; // some of these could theoretically change between reads, so allow retries just in case
for (int i = 1; i <= NumTries; i++)
{
try
{
bytes = await File.ReadAllBytesAsync(path);
text = await File.ReadAllTextAsync(path);
Assert.Equal(text, Encoding.UTF8.GetString(bytes));
}
catch when (i < NumTries) { }
}
}

[Theory]
[PlatformSpecific(TestPlatforms.Linux)]
public async Task ReadAllBytes_ProcFs_Uptime_ContainsTwoNumbers()
{
string text = Encoding.UTF8.GetString(await File.ReadAllBytesAsync("/proc/uptime"));
string[] parts = text.Split(' ', StringSplitOptions.RemoveEmptyEntries);
Assert.Equal(2, parts.Length);
Assert.True(double.TryParse(parts[0].Trim(), out _));
Assert.True(double.TryParse(parts[1].Trim(), out _));
}

[Theory]
[PlatformSpecific(TestPlatforms.Linux)]
[InlineData("/proc/meminfo")]
[InlineData("/proc/stat")]
[InlineData("/proc/cpuinfo")]
public async Task ProcFs_NotEmpty(string path)
{
Assert.InRange((await File.ReadAllBytesAsync(path)).Length, 1, int.MaxValue);
}
}
}

0 comments on commit 0b2f137

Please sign in to comment.