From 7be7de1c29a01505b89b84b624578ebb210fcbf9 Mon Sep 17 00:00:00 2001 From: AR-May <67507805+AR-May@users.noreply.github.com> Date: Wed, 1 Sep 2021 17:06:54 +0200 Subject: [PATCH 01/11] Fix the hash function: remove unnecessary allocations. --- src/Tasks/Hash.cs | 101 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 79 insertions(+), 22 deletions(-) diff --git a/src/Tasks/Hash.cs b/src/Tasks/Hash.cs index 81699764e51..2f0c9a80ef8 100644 --- a/src/Tasks/Hash.cs +++ b/src/Tasks/Hash.cs @@ -18,8 +18,6 @@ namespace Microsoft.Build.Tasks /// public class Hash : TaskExtension { - private const char ItemSeparatorCharacter = '\u2028'; - /// /// Items from which to generate a hash. /// @@ -37,61 +35,120 @@ public class Hash : TaskExtension [Output] public string HashResult { get; set; } + private static readonly char ItemSeparatorCharacter = '\u2028'; + + private static readonly Encoding encoding = Encoding.UTF8; + + private static readonly byte[] ItemSeparatorCharacterBytes = encoding.GetBytes(new char[] { ItemSeparatorCharacter }); + + private const int sha1BufferSize = 512; + + private const int maxInputChunkLength = 256; + /// /// Execute the task. /// public override bool Execute() { - if (ItemsToHash?.Length > 0) + if (ItemsToHash != null && ItemsToHash.Length > 0) { using (var sha1 = SHA1.Create()) { - var concatenatedItemStringSize = ComputeStringSize(ItemsToHash); + // Buffer in which bytes of the strings are to be stored until their number reachs the limit size. + // Once the limit is reached, the sha1.TransformBlock is be run on all the bytes of this buffer. + byte[] sha1Buffer = System.Buffers.ArrayPool.Shared.Rent(sha1BufferSize); - var hashStringSize = sha1.HashSize; + int maxItemStringSize = encoding.GetMaxByteCount(Math.Min(ComputeMaxItemSpecLength(ItemsToHash), maxInputChunkLength)); + byte[] bytesBuffer = System.Buffers.ArrayPool.Shared.Rent(maxItemStringSize); - using (var stringBuilder = new ReuseableStringBuilder(Math.Max(concatenatedItemStringSize, hashStringSize))) + int bufferLength = 0; + for (int i = 0; i < ItemsToHash.Length; i++) { - foreach (var item in ItemsToHash) + string itemSpec = IgnoreCase ? ItemsToHash[i].ItemSpec.ToUpperInvariant() : ItemsToHash[i].ItemSpec; + + // Slice the itemSpec string into chunks of reasonable size and add them to sha1 buffer. + int itemSpecLength = itemSpec.Length; + int itemSpecPosition = 0; + while (itemSpecLength > 0) { - string itemSpec = item.ItemSpec; - stringBuilder.Append(IgnoreCase ? itemSpec.ToUpperInvariant() : itemSpec); - stringBuilder.Append(ItemSeparatorCharacter); + int charsToProcess = Math.Min(itemSpecLength, maxInputChunkLength); + int bytesNumber = encoding.GetBytes(itemSpec, itemSpecPosition, charsToProcess, bytesBuffer, 0); + itemSpecPosition += charsToProcess; + itemSpecLength -= charsToProcess; + + AddBytesToSha1Buffer(sha1, sha1Buffer, ref bufferLength, sha1BufferSize, bytesBuffer, bytesNumber); } - var hash = sha1.ComputeHash(Encoding.UTF8.GetBytes(stringBuilder.ToString())); + AddBytesToSha1Buffer(sha1, sha1Buffer, ref bufferLength, sha1BufferSize, ItemSeparatorCharacterBytes, ItemSeparatorCharacterBytes.Length); + } - stringBuilder.Clear(); + sha1.TransformFinalBlock(sha1Buffer, 0, bufferLength); - foreach (var b in hash) + using (var stringBuilder = new ReuseableStringBuilder(sha1.HashSize)) + { + foreach (var b in sha1.Hash) { stringBuilder.Append(b.ToString("x2")); } - HashResult = stringBuilder.ToString(); } } } - return true; } - private int ComputeStringSize(ITaskItem[] itemsToHash) + /// + /// Add bytes to the sha1 buffer. Once the limit size is reached, sha1.TransformBlock is called and the buffer is flushed. + /// + /// Hashing algorithm sha1. + /// The sha1 buffer which stores bytes of the strings. Bytes should be added to this buffer. + /// Number of used bytes of the sha1 buffer. + /// The size of sha1 buffer. + /// Bytes buffer which contains bytes to be written to sha1 buffer. + /// Amount of bytes that are to be added to sha1 buffer. + /// + private bool AddBytesToSha1Buffer(SHA1 sha1, byte[] sha1Buffer, ref int sha1BufferLength, int sha1BufferSize, byte[] bytesBuffer, int bytesNumber) { - if (itemsToHash.Length == 0) + int bytesProcessedNumber = 0; + while (sha1BufferLength + bytesNumber >= sha1BufferSize) { - return 0; + int sha1BufferFreeSpace = sha1BufferSize - sha1BufferLength; + + if (sha1BufferLength == 0) + { + // If sha1 buffer is empty and bytes number is big enough there is no need to copy bytes to sha1 buffer. + // Pass the bytes to TransformBlock right away. + sha1.TransformBlock(bytesBuffer, 0, sha1BufferSize, null, 0); + } + else + { + Array.Copy(bytesBuffer, bytesProcessedNumber, sha1Buffer, sha1BufferLength, sha1BufferFreeSpace); + sha1.TransformBlock(sha1Buffer, 0, sha1BufferSize, null, 0); + sha1BufferLength = 0; + } + + bytesProcessedNumber += sha1BufferFreeSpace; + bytesNumber -= sha1BufferFreeSpace; } - var totalItemSize = 0; + Array.Copy(bytesBuffer, bytesProcessedNumber, sha1Buffer, sha1BufferLength, bytesNumber); + sha1BufferLength += bytesNumber; + return true; + } + + private int ComputeMaxItemSpecLength(ITaskItem[] itemsToHash) + { + int maxItemSpecLength = 0; foreach (var item in itemsToHash) { - totalItemSize += item.ItemSpec.Length; + if (item.ItemSpec.Length > maxItemSpecLength) + { + maxItemSpecLength = item.ItemSpec.Length; + } } - // Add one ItemSeparatorCharacter per item - return totalItemSize + itemsToHash.Length; + return maxItemSpecLength; } } } From 21e03ac254cfb0f45800aee77bd49736dc0fa455 Mon Sep 17 00:00:00 2001 From: AR-May <67507805+AR-May@users.noreply.github.com> Date: Wed, 22 Dec 2021 21:17:55 +0100 Subject: [PATCH 02/11] Addressing review comments. --- src/Tasks/Hash.cs | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/src/Tasks/Hash.cs b/src/Tasks/Hash.cs index 2f0c9a80ef8..cde42a3f6d8 100644 --- a/src/Tasks/Hash.cs +++ b/src/Tasks/Hash.cs @@ -58,8 +58,7 @@ public override bool Execute() // Once the limit is reached, the sha1.TransformBlock is be run on all the bytes of this buffer. byte[] sha1Buffer = System.Buffers.ArrayPool.Shared.Rent(sha1BufferSize); - int maxItemStringSize = encoding.GetMaxByteCount(Math.Min(ComputeMaxItemSpecLength(ItemsToHash), maxInputChunkLength)); - byte[] bytesBuffer = System.Buffers.ArrayPool.Shared.Rent(maxItemStringSize); + byte[] bytesBuffer = System.Buffers.ArrayPool.Shared.Rent(encoding.GetMaxByteCount(maxInputChunkLength)); int bufferLength = 0; for (int i = 0; i < ItemsToHash.Length; i++) @@ -106,8 +105,7 @@ public override bool Execute() /// The size of sha1 buffer. /// Bytes buffer which contains bytes to be written to sha1 buffer. /// Amount of bytes that are to be added to sha1 buffer. - /// - private bool AddBytesToSha1Buffer(SHA1 sha1, byte[] sha1Buffer, ref int sha1BufferLength, int sha1BufferSize, byte[] bytesBuffer, int bytesNumber) + private void AddBytesToSha1Buffer(SHA1 sha1, byte[] sha1Buffer, ref int sha1BufferLength, int sha1BufferSize, byte[] bytesBuffer, int bytesNumber) { int bytesProcessedNumber = 0; while (sha1BufferLength + bytesNumber >= sha1BufferSize) @@ -133,22 +131,6 @@ private bool AddBytesToSha1Buffer(SHA1 sha1, byte[] sha1Buffer, ref int sha1Buff Array.Copy(bytesBuffer, bytesProcessedNumber, sha1Buffer, sha1BufferLength, bytesNumber); sha1BufferLength += bytesNumber; - - return true; - } - - private int ComputeMaxItemSpecLength(ITaskItem[] itemsToHash) - { - int maxItemSpecLength = 0; - foreach (var item in itemsToHash) - { - if (item.ItemSpec.Length > maxItemSpecLength) - { - maxItemSpecLength = item.ItemSpec.Length; - } - } - - return maxItemSpecLength; } } } From 01d37cfc3a9c547278db7fbb46ed1af3041a9329 Mon Sep 17 00:00:00 2001 From: AR-May <67507805+AR-May@users.noreply.github.com> Date: Sat, 1 Jan 2022 17:37:20 +0100 Subject: [PATCH 03/11] Fix bug. --- src/Tasks/Hash.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Tasks/Hash.cs b/src/Tasks/Hash.cs index cde42a3f6d8..73c12c470ba 100644 --- a/src/Tasks/Hash.cs +++ b/src/Tasks/Hash.cs @@ -116,7 +116,7 @@ private void AddBytesToSha1Buffer(SHA1 sha1, byte[] sha1Buffer, ref int sha1Buff { // If sha1 buffer is empty and bytes number is big enough there is no need to copy bytes to sha1 buffer. // Pass the bytes to TransformBlock right away. - sha1.TransformBlock(bytesBuffer, 0, sha1BufferSize, null, 0); + sha1.TransformBlock(bytesBuffer, bytesProcessedNumber, sha1BufferSize, null, 0); } else { From affd674f40a49c358d6868667c752bd99a26b3a8 Mon Sep 17 00:00:00 2001 From: AR-May <67507805+AR-May@users.noreply.github.com> Date: Mon, 3 Jan 2022 17:25:45 +0100 Subject: [PATCH 04/11] Addressing review comments. --- src/Tasks/Hash.cs | 52 ++++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/src/Tasks/Hash.cs b/src/Tasks/Hash.cs index 73c12c470ba..81cb199b21a 100644 --- a/src/Tasks/Hash.cs +++ b/src/Tasks/Hash.cs @@ -35,11 +35,11 @@ public class Hash : TaskExtension [Output] public string HashResult { get; set; } - private static readonly char ItemSeparatorCharacter = '\u2028'; + private static readonly char s_itemSeparatorCharacter = '\u2028'; - private static readonly Encoding encoding = Encoding.UTF8; + private static readonly Encoding s_encoding = Encoding.UTF8; - private static readonly byte[] ItemSeparatorCharacterBytes = encoding.GetBytes(new char[] { ItemSeparatorCharacter }); + private static readonly byte[] s_itemSeparatorCharacterBytes = s_encoding.GetBytes(new char[] { s_itemSeparatorCharacter }); private const int sha1BufferSize = 512; @@ -50,15 +50,15 @@ public class Hash : TaskExtension /// public override bool Execute() { - if (ItemsToHash != null && ItemsToHash.Length > 0) + if (ItemsToHash?.Length > 0) { using (var sha1 = SHA1.Create()) { - // Buffer in which bytes of the strings are to be stored until their number reachs the limit size. + // Buffer in which bytes of the strings are to be stored until their number reaches the limit size. // Once the limit is reached, the sha1.TransformBlock is be run on all the bytes of this buffer. byte[] sha1Buffer = System.Buffers.ArrayPool.Shared.Rent(sha1BufferSize); - byte[] bytesBuffer = System.Buffers.ArrayPool.Shared.Rent(encoding.GetMaxByteCount(maxInputChunkLength)); + byte[] byteBuffer = System.Buffers.ArrayPool.Shared.Rent(s_encoding.GetMaxByteCount(maxInputChunkLength)); int bufferLength = 0; for (int i = 0; i < ItemsToHash.Length; i++) @@ -66,19 +66,15 @@ public override bool Execute() string itemSpec = IgnoreCase ? ItemsToHash[i].ItemSpec.ToUpperInvariant() : ItemsToHash[i].ItemSpec; // Slice the itemSpec string into chunks of reasonable size and add them to sha1 buffer. - int itemSpecLength = itemSpec.Length; - int itemSpecPosition = 0; - while (itemSpecLength > 0) + for (int itemSpecPosition = 0; itemSpecPosition < itemSpec.Length; itemSpecPosition += maxInputChunkLength) { - int charsToProcess = Math.Min(itemSpecLength, maxInputChunkLength); - int bytesNumber = encoding.GetBytes(itemSpec, itemSpecPosition, charsToProcess, bytesBuffer, 0); - itemSpecPosition += charsToProcess; - itemSpecLength -= charsToProcess; + int charsToProcess = Math.Min(itemSpec.Length - itemSpecPosition, maxInputChunkLength); + int byteCount = s_encoding.GetBytes(itemSpec, itemSpecPosition, charsToProcess, byteBuffer, 0); - AddBytesToSha1Buffer(sha1, sha1Buffer, ref bufferLength, sha1BufferSize, bytesBuffer, bytesNumber); + AddBytesToSha1Buffer(sha1, sha1Buffer, ref bufferLength, sha1BufferSize, byteBuffer, byteCount); } - AddBytesToSha1Buffer(sha1, sha1Buffer, ref bufferLength, sha1BufferSize, ItemSeparatorCharacterBytes, ItemSeparatorCharacterBytes.Length); + AddBytesToSha1Buffer(sha1, sha1Buffer, ref bufferLength, sha1BufferSize, s_itemSeparatorCharacterBytes, s_itemSeparatorCharacterBytes.Length); } sha1.TransformFinalBlock(sha1Buffer, 0, bufferLength); @@ -101,36 +97,36 @@ public override bool Execute() /// /// Hashing algorithm sha1. /// The sha1 buffer which stores bytes of the strings. Bytes should be added to this buffer. - /// Number of used bytes of the sha1 buffer. + /// Number of used bytes of the sha1 buffer. /// The size of sha1 buffer. - /// Bytes buffer which contains bytes to be written to sha1 buffer. - /// Amount of bytes that are to be added to sha1 buffer. - private void AddBytesToSha1Buffer(SHA1 sha1, byte[] sha1Buffer, ref int sha1BufferLength, int sha1BufferSize, byte[] bytesBuffer, int bytesNumber) + /// Bytes buffer which contains bytes to be written to sha1 buffer. + /// Amount of bytes that are to be added to sha1 buffer. + private void AddBytesToSha1Buffer(SHA1 sha1, byte[] sha1Buffer, ref int sha1BufferPosition, int sha1BufferSize, byte[] byteBuffer, int byteCount) { int bytesProcessedNumber = 0; - while (sha1BufferLength + bytesNumber >= sha1BufferSize) + while (sha1BufferPosition + byteCount >= sha1BufferSize) { - int sha1BufferFreeSpace = sha1BufferSize - sha1BufferLength; + int sha1BufferFreeSpace = sha1BufferSize - sha1BufferPosition; - if (sha1BufferLength == 0) + if (sha1BufferPosition == 0) { // If sha1 buffer is empty and bytes number is big enough there is no need to copy bytes to sha1 buffer. // Pass the bytes to TransformBlock right away. - sha1.TransformBlock(bytesBuffer, bytesProcessedNumber, sha1BufferSize, null, 0); + sha1.TransformBlock(byteBuffer, bytesProcessedNumber, sha1BufferSize, null, 0); } else { - Array.Copy(bytesBuffer, bytesProcessedNumber, sha1Buffer, sha1BufferLength, sha1BufferFreeSpace); + Array.Copy(byteBuffer, bytesProcessedNumber, sha1Buffer, sha1BufferPosition, sha1BufferFreeSpace); sha1.TransformBlock(sha1Buffer, 0, sha1BufferSize, null, 0); - sha1BufferLength = 0; + sha1BufferPosition = 0; } bytesProcessedNumber += sha1BufferFreeSpace; - bytesNumber -= sha1BufferFreeSpace; + byteCount -= sha1BufferFreeSpace; } - Array.Copy(bytesBuffer, bytesProcessedNumber, sha1Buffer, sha1BufferLength, bytesNumber); - sha1BufferLength += bytesNumber; + Array.Copy(byteBuffer, bytesProcessedNumber, sha1Buffer, sha1BufferPosition, byteCount); + sha1BufferPosition += byteCount; } } } From dd1939ddb8f597edd5bbb9622b79afe8d253f0d0 Mon Sep 17 00:00:00 2001 From: AR-May <67507805+AR-May@users.noreply.github.com> Date: Mon, 3 Jan 2022 17:37:46 +0100 Subject: [PATCH 05/11] Addressing review comments - 2. --- src/Tasks/Hash.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Tasks/Hash.cs b/src/Tasks/Hash.cs index 81cb199b21a..a633e5b332e 100644 --- a/src/Tasks/Hash.cs +++ b/src/Tasks/Hash.cs @@ -60,7 +60,7 @@ public override bool Execute() byte[] byteBuffer = System.Buffers.ArrayPool.Shared.Rent(s_encoding.GetMaxByteCount(maxInputChunkLength)); - int bufferLength = 0; + int sha1BufferPosition = 0; for (int i = 0; i < ItemsToHash.Length; i++) { string itemSpec = IgnoreCase ? ItemsToHash[i].ItemSpec.ToUpperInvariant() : ItemsToHash[i].ItemSpec; @@ -71,13 +71,13 @@ public override bool Execute() int charsToProcess = Math.Min(itemSpec.Length - itemSpecPosition, maxInputChunkLength); int byteCount = s_encoding.GetBytes(itemSpec, itemSpecPosition, charsToProcess, byteBuffer, 0); - AddBytesToSha1Buffer(sha1, sha1Buffer, ref bufferLength, sha1BufferSize, byteBuffer, byteCount); + AddBytesToSha1Buffer(sha1, sha1Buffer, ref sha1BufferPosition, sha1BufferSize, byteBuffer, byteCount); } - AddBytesToSha1Buffer(sha1, sha1Buffer, ref bufferLength, sha1BufferSize, s_itemSeparatorCharacterBytes, s_itemSeparatorCharacterBytes.Length); + AddBytesToSha1Buffer(sha1, sha1Buffer, ref sha1BufferPosition, sha1BufferSize, s_itemSeparatorCharacterBytes, s_itemSeparatorCharacterBytes.Length); } - sha1.TransformFinalBlock(sha1Buffer, 0, bufferLength); + sha1.TransformFinalBlock(sha1Buffer, 0, sha1BufferPosition); using (var stringBuilder = new ReuseableStringBuilder(sha1.HashSize)) { From 15850f0ec467550601342379a352a04b6776b730 Mon Sep 17 00:00:00 2001 From: AR-May <67507805+AR-May@users.noreply.github.com> Date: Tue, 11 Jan 2022 10:13:46 +0100 Subject: [PATCH 06/11] Addressing review comments - 3. --- src/Tasks/Hash.cs | 78 ++++++++++++++++++++++++++++++----------------- 1 file changed, 50 insertions(+), 28 deletions(-) diff --git a/src/Tasks/Hash.cs b/src/Tasks/Hash.cs index a633e5b332e..07cb5213d97 100644 --- a/src/Tasks/Hash.cs +++ b/src/Tasks/Hash.cs @@ -18,6 +18,20 @@ namespace Microsoft.Build.Tasks /// public class Hash : TaskExtension { + private static readonly char s_itemSeparatorCharacter = '\u2028'; + + private static readonly Encoding s_encoding = Encoding.UTF8; + + private static readonly byte[] s_itemSeparatorCharacterBytes = s_encoding.GetBytes(new char[] { s_itemSeparatorCharacter }); + + // Size of buffer where bytes of the strings are stored until sha1.TransformBlock is be run on them. + // It is needed to get a balance between amount of costly sha1.TransformBlock calls and amount of allocated memory. + private const int sha1BufferSize = 512; + + // Size of chunks in which ItemSpecs would be cut. + // String of size 169 gives no more than ~512 bytes in utf8 encoding. + private const int maxInputChunkLength = 169; + /// /// Items from which to generate a hash. /// @@ -35,16 +49,6 @@ public class Hash : TaskExtension [Output] public string HashResult { get; set; } - private static readonly char s_itemSeparatorCharacter = '\u2028'; - - private static readonly Encoding s_encoding = Encoding.UTF8; - - private static readonly byte[] s_itemSeparatorCharacterBytes = s_encoding.GetBytes(new char[] { s_itemSeparatorCharacter }); - - private const int sha1BufferSize = 512; - - private const int maxInputChunkLength = 256; - /// /// Execute the task. /// @@ -56,36 +60,54 @@ public override bool Execute() { // Buffer in which bytes of the strings are to be stored until their number reaches the limit size. // Once the limit is reached, the sha1.TransformBlock is be run on all the bytes of this buffer. - byte[] sha1Buffer = System.Buffers.ArrayPool.Shared.Rent(sha1BufferSize); + byte[] sha1Buffer = null; - byte[] byteBuffer = System.Buffers.ArrayPool.Shared.Rent(s_encoding.GetMaxByteCount(maxInputChunkLength)); + // Buffer in which bytes of items' ItemSpec are to be stored. + byte[] byteBuffer = null; - int sha1BufferPosition = 0; - for (int i = 0; i < ItemsToHash.Length; i++) + try { - string itemSpec = IgnoreCase ? ItemsToHash[i].ItemSpec.ToUpperInvariant() : ItemsToHash[i].ItemSpec; + sha1Buffer = System.Buffers.ArrayPool.Shared.Rent(sha1BufferSize); + byteBuffer = System.Buffers.ArrayPool.Shared.Rent(s_encoding.GetMaxByteCount(maxInputChunkLength)); - // Slice the itemSpec string into chunks of reasonable size and add them to sha1 buffer. - for (int itemSpecPosition = 0; itemSpecPosition < itemSpec.Length; itemSpecPosition += maxInputChunkLength) + int sha1BufferPosition = 0; + for (int i = 0; i < ItemsToHash.Length; i++) { - int charsToProcess = Math.Min(itemSpec.Length - itemSpecPosition, maxInputChunkLength); - int byteCount = s_encoding.GetBytes(itemSpec, itemSpecPosition, charsToProcess, byteBuffer, 0); + string itemSpec = IgnoreCase ? ItemsToHash[i].ItemSpec.ToUpperInvariant() : ItemsToHash[i].ItemSpec; - AddBytesToSha1Buffer(sha1, sha1Buffer, ref sha1BufferPosition, sha1BufferSize, byteBuffer, byteCount); - } + // Slice the itemSpec string into chunks of reasonable size and add them to sha1 buffer. + for (int itemSpecPosition = 0; itemSpecPosition < itemSpec.Length; itemSpecPosition += maxInputChunkLength) + { + int charsToProcess = Math.Min(itemSpec.Length - itemSpecPosition, maxInputChunkLength); + int byteCount = s_encoding.GetBytes(itemSpec, itemSpecPosition, charsToProcess, byteBuffer, 0); - AddBytesToSha1Buffer(sha1, sha1Buffer, ref sha1BufferPosition, sha1BufferSize, s_itemSeparatorCharacterBytes, s_itemSeparatorCharacterBytes.Length); - } + AddBytesToSha1Buffer(sha1, sha1Buffer, ref sha1BufferPosition, sha1BufferSize, byteBuffer, byteCount); + } - sha1.TransformFinalBlock(sha1Buffer, 0, sha1BufferPosition); + AddBytesToSha1Buffer(sha1, sha1Buffer, ref sha1BufferPosition, sha1BufferSize, s_itemSeparatorCharacterBytes, s_itemSeparatorCharacterBytes.Length); + } - using (var stringBuilder = new ReuseableStringBuilder(sha1.HashSize)) + sha1.TransformFinalBlock(sha1Buffer, 0, sha1BufferPosition); + + using (var stringBuilder = new ReuseableStringBuilder(sha1.HashSize)) + { + foreach (var b in sha1.Hash) + { + stringBuilder.Append(b.ToString("x2")); + } + HashResult = stringBuilder.ToString(); + } + } + finally { - foreach (var b in sha1.Hash) + if (sha1Buffer != null) + { + System.Buffers.ArrayPool.Shared.Return(sha1Buffer); + } + if (byteBuffer != null) { - stringBuilder.Append(b.ToString("x2")); + System.Buffers.ArrayPool.Shared.Return(byteBuffer); } - HashResult = stringBuilder.ToString(); } } } From f086dba400a5476470972618be56af230cead02f Mon Sep 17 00:00:00 2001 From: AR-May <67507805+AR-May@users.noreply.github.com> Date: Tue, 11 Jan 2022 11:27:50 +0100 Subject: [PATCH 07/11] Add unit tests. --- src/Tasks.UnitTests/Hash_Tests.cs | 95 +++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/src/Tasks.UnitTests/Hash_Tests.cs b/src/Tasks.UnitTests/Hash_Tests.cs index e2b64378d47..7f6a08ec4ad 100644 --- a/src/Tasks.UnitTests/Hash_Tests.cs +++ b/src/Tasks.UnitTests/Hash_Tests.cs @@ -43,6 +43,100 @@ public void HashTaskEmptyInputTest() Assert.Null(zeroLengthItemsHash); } + [Fact] + public void HashTaskLargeInputCountTest() + { + // This hash was pre-computed. If the implementation changes it may need to be adjusted. + var expectedHash = "8a996bbcb5e481981c2fba7ac408e20d0b4360a5"; + + ITaskItem[] itemsToHash = new ITaskItem[1000]; + for (int i = 0; i < itemsToHash.Length; i++) + { + itemsToHash[i] = new TaskItem($"Item{i}"); + } + + var actualHash = ExecuteHashTask(itemsToHash); + Assert.Equal(expectedHash, actualHash); + } + + [Fact] + public void HashTaskLargeInputSizeTest() + { + // This hash was pre-computed. If the implementation changes it may need to be adjusted. + var expectedHash = "0509142dd3d3a733f30a52a0eec37cd727d46122"; + + string[] array = new string[1000]; + for (int i = 0; i < array.Length; i++) + { + array[i] = $"Item{i}"; + } + ITaskItem[] itemsToHash = new ITaskItem[] { new TaskItem(string.Join("", array)) }; + + var actualHash = ExecuteHashTask(itemsToHash); + Assert.Equal(expectedHash, actualHash); + } + + [Fact] + public void HashTaskLargeInputCountAndSizeTest() + { + // This hash was pre-computed. If the implementation changes it may need to be adjusted. + var expectedHash = "be23ae7b09f1e14fa5a17de87ddae2c3ec62f967"; + + int inputSize = 1000; + string[][] array = new string[inputSize][]; + for (int i = 0; i < array.Length; i++) + { + array[i] = new string[inputSize]; + for (int j = 0; j < array[i].Length; j++) + { + array[i][j] = $"Item{i}{j}"; + } + } + + ITaskItem[] itemsToHash = new ITaskItem[inputSize]; + for (int i = 0; i < itemsToHash.Length; i++) + { + itemsToHash[i] = new TaskItem(string.Join("", array[i])); + } + + var actualHash = ExecuteHashTask(itemsToHash); + + Assert.Equal(expectedHash, actualHash); + } + +#pragma warning disable CA5350 + [Fact] + public void HashTaskDifferentInputSizesTest() + { + int maxInputSize = 2000; + string input = ""; + using (var sha1 = System.Security.Cryptography.SHA1.Create()) + { + using (var stringBuilder = new ReuseableStringBuilder(sha1.HashSize)) + { + for (int i = 0; i < maxInputSize; i++) + { + input += "a"; + ITaskItem[] itemsToHash = new ITaskItem[] { new TaskItem(input) }; + + var actualHash = ExecuteHashTask(itemsToHash); + + byte[] hash = sha1.ComputeHash(System.Text.Encoding.UTF8.GetBytes(input+'\u2028')); + stringBuilder.Clear(); + foreach (var b in hash) + { + stringBuilder.Append(b.ToString("x2")); + } + string expectedHash = stringBuilder.ToString(); + + Assert.Equal(expectedHash, actualHash); + } + } + } + + } +#pragma warning restore CA5350 + [Fact] public void HashTaskIgnoreCaseTest() { @@ -88,5 +182,6 @@ private string ExecuteHashTask(ITaskItem[] items, bool ignoreCase = false) return hashTask.HashResult; } + } } From 19c60f4854fbcdc239d14ed237fdccdb90c4c95b Mon Sep 17 00:00:00 2001 From: AR-May <67507805+AR-May@users.noreply.github.com> Date: Wed, 12 Jan 2022 13:07:17 +0100 Subject: [PATCH 08/11] Fix build error. --- src/Tasks/Hash.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Tasks/Hash.cs b/src/Tasks/Hash.cs index 07cb5213d97..4f34fa794b4 100644 --- a/src/Tasks/Hash.cs +++ b/src/Tasks/Hash.cs @@ -18,7 +18,7 @@ namespace Microsoft.Build.Tasks /// public class Hash : TaskExtension { - private static readonly char s_itemSeparatorCharacter = '\u2028'; + private const char s_itemSeparatorCharacter = '\u2028'; private static readonly Encoding s_encoding = Encoding.UTF8; From 67b0a901ad69fead6762ed8d1e554f7ba825e810 Mon Sep 17 00:00:00 2001 From: AR-May <67507805+AR-May@users.noreply.github.com> Date: Wed, 12 Jan 2022 17:07:09 +0100 Subject: [PATCH 09/11] Addressing review comments - 4. --- src/Tasks.UnitTests/Hash_Tests.cs | 2 ++ src/Tasks/Hash.cs | 48 +++++++++++++++---------------- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/src/Tasks.UnitTests/Hash_Tests.cs b/src/Tasks.UnitTests/Hash_Tests.cs index 7f6a08ec4ad..02cad2ed8b3 100644 --- a/src/Tasks.UnitTests/Hash_Tests.cs +++ b/src/Tasks.UnitTests/Hash_Tests.cs @@ -105,6 +105,8 @@ public void HashTaskLargeInputCountAndSizeTest() } #pragma warning disable CA5350 + // This test verifies that hash computes correctly for various numbers of characters. + // We would like to process edge of the buffer use cases regardless on the size of the buffer. [Fact] public void HashTaskDifferentInputSizesTest() { diff --git a/src/Tasks/Hash.cs b/src/Tasks/Hash.cs index 4f34fa794b4..06176070dd1 100644 --- a/src/Tasks/Hash.cs +++ b/src/Tasks/Hash.cs @@ -5,7 +5,6 @@ using System.Security.Cryptography; using System.Text; using Microsoft.Build.Framework; -using Microsoft.Build.Shared; namespace Microsoft.Build.Tasks { @@ -18,19 +17,17 @@ namespace Microsoft.Build.Tasks /// public class Hash : TaskExtension { - private const char s_itemSeparatorCharacter = '\u2028'; - + private const char ItemSeparatorCharacter = '\u2028'; private static readonly Encoding s_encoding = Encoding.UTF8; - - private static readonly byte[] s_itemSeparatorCharacterBytes = s_encoding.GetBytes(new char[] { s_itemSeparatorCharacter }); + private static readonly byte[] s_itemSeparatorCharacterBytes = s_encoding.GetBytes(new char[] { ItemSeparatorCharacter }); // Size of buffer where bytes of the strings are stored until sha1.TransformBlock is be run on them. // It is needed to get a balance between amount of costly sha1.TransformBlock calls and amount of allocated memory. - private const int sha1BufferSize = 512; + private const int Sha1BufferSize = 512; // Size of chunks in which ItemSpecs would be cut. - // String of size 169 gives no more than ~512 bytes in utf8 encoding. - private const int maxInputChunkLength = 169; + // We have chosen this length so itemSpecChunkByteBuffer rented from ArrayPool will be close but not bigger than 512. + private const int MaxInputChunkLength = 169; /// /// Items from which to generate a hash. @@ -63,12 +60,12 @@ public override bool Execute() byte[] sha1Buffer = null; // Buffer in which bytes of items' ItemSpec are to be stored. - byte[] byteBuffer = null; + byte[] itemSpecChunkByteBuffer = null; try { - sha1Buffer = System.Buffers.ArrayPool.Shared.Rent(sha1BufferSize); - byteBuffer = System.Buffers.ArrayPool.Shared.Rent(s_encoding.GetMaxByteCount(maxInputChunkLength)); + sha1Buffer = System.Buffers.ArrayPool.Shared.Rent(Sha1BufferSize); + itemSpecChunkByteBuffer = System.Buffers.ArrayPool.Shared.Rent(s_encoding.GetMaxByteCount(MaxInputChunkLength)); int sha1BufferPosition = 0; for (int i = 0; i < ItemsToHash.Length; i++) @@ -76,15 +73,15 @@ public override bool Execute() string itemSpec = IgnoreCase ? ItemsToHash[i].ItemSpec.ToUpperInvariant() : ItemsToHash[i].ItemSpec; // Slice the itemSpec string into chunks of reasonable size and add them to sha1 buffer. - for (int itemSpecPosition = 0; itemSpecPosition < itemSpec.Length; itemSpecPosition += maxInputChunkLength) + for (int itemSpecPosition = 0; itemSpecPosition < itemSpec.Length; itemSpecPosition += MaxInputChunkLength) { - int charsToProcess = Math.Min(itemSpec.Length - itemSpecPosition, maxInputChunkLength); - int byteCount = s_encoding.GetBytes(itemSpec, itemSpecPosition, charsToProcess, byteBuffer, 0); + int charsToProcess = Math.Min(itemSpec.Length - itemSpecPosition, MaxInputChunkLength); + int byteCount = s_encoding.GetBytes(itemSpec, itemSpecPosition, charsToProcess, itemSpecChunkByteBuffer, 0); - AddBytesToSha1Buffer(sha1, sha1Buffer, ref sha1BufferPosition, sha1BufferSize, byteBuffer, byteCount); + sha1BufferPosition = AddBytesToSha1Buffer(sha1, sha1Buffer, sha1BufferPosition, Sha1BufferSize, itemSpecChunkByteBuffer, byteCount); } - AddBytesToSha1Buffer(sha1, sha1Buffer, ref sha1BufferPosition, sha1BufferSize, s_itemSeparatorCharacterBytes, s_itemSeparatorCharacterBytes.Length); + sha1BufferPosition = AddBytesToSha1Buffer(sha1, sha1Buffer, sha1BufferPosition, Sha1BufferSize, s_itemSeparatorCharacterBytes, s_itemSeparatorCharacterBytes.Length); } sha1.TransformFinalBlock(sha1Buffer, 0, sha1BufferPosition); @@ -104,9 +101,9 @@ public override bool Execute() { System.Buffers.ArrayPool.Shared.Return(sha1Buffer); } - if (byteBuffer != null) + if (itemSpecChunkByteBuffer != null) { - System.Buffers.ArrayPool.Shared.Return(byteBuffer); + System.Buffers.ArrayPool.Shared.Return(itemSpecChunkByteBuffer); } } } @@ -123,9 +120,10 @@ public override bool Execute() /// The size of sha1 buffer. /// Bytes buffer which contains bytes to be written to sha1 buffer. /// Amount of bytes that are to be added to sha1 buffer. - private void AddBytesToSha1Buffer(SHA1 sha1, byte[] sha1Buffer, ref int sha1BufferPosition, int sha1BufferSize, byte[] byteBuffer, int byteCount) + /// Updated sha1BufferPosition. + private int AddBytesToSha1Buffer(SHA1 sha1, byte[] sha1Buffer, int sha1BufferPosition, int sha1BufferSize, byte[] byteBuffer, int byteCount) { - int bytesProcessedNumber = 0; + int bytesProcessed = 0; while (sha1BufferPosition + byteCount >= sha1BufferSize) { int sha1BufferFreeSpace = sha1BufferSize - sha1BufferPosition; @@ -134,21 +132,23 @@ private void AddBytesToSha1Buffer(SHA1 sha1, byte[] sha1Buffer, ref int sha1Buff { // If sha1 buffer is empty and bytes number is big enough there is no need to copy bytes to sha1 buffer. // Pass the bytes to TransformBlock right away. - sha1.TransformBlock(byteBuffer, bytesProcessedNumber, sha1BufferSize, null, 0); + sha1.TransformBlock(byteBuffer, bytesProcessed, sha1BufferSize, null, 0); } else { - Array.Copy(byteBuffer, bytesProcessedNumber, sha1Buffer, sha1BufferPosition, sha1BufferFreeSpace); + Array.Copy(byteBuffer, bytesProcessed, sha1Buffer, sha1BufferPosition, sha1BufferFreeSpace); sha1.TransformBlock(sha1Buffer, 0, sha1BufferSize, null, 0); sha1BufferPosition = 0; } - bytesProcessedNumber += sha1BufferFreeSpace; + bytesProcessed += sha1BufferFreeSpace; byteCount -= sha1BufferFreeSpace; } - Array.Copy(byteBuffer, bytesProcessedNumber, sha1Buffer, sha1BufferPosition, byteCount); + Array.Copy(byteBuffer, bytesProcessed, sha1Buffer, sha1BufferPosition, byteCount); sha1BufferPosition += byteCount; + + return sha1BufferPosition; } } } From 1fac1821bca1b050db49ee0e27a8d58dc14ef840 Mon Sep 17 00:00:00 2001 From: AR-May <67507805+AR-May@users.noreply.github.com> Date: Thu, 13 Jan 2022 15:20:05 +0100 Subject: [PATCH 10/11] Update unit tests. --- src/Tasks.UnitTests/Hash_Tests.cs | 43 ++++++++----------------------- 1 file changed, 11 insertions(+), 32 deletions(-) diff --git a/src/Tasks.UnitTests/Hash_Tests.cs b/src/Tasks.UnitTests/Hash_Tests.cs index 02cad2ed8b3..2ad2e4e84e1 100644 --- a/src/Tasks.UnitTests/Hash_Tests.cs +++ b/src/Tasks.UnitTests/Hash_Tests.cs @@ -76,34 +76,6 @@ public void HashTaskLargeInputSizeTest() Assert.Equal(expectedHash, actualHash); } - [Fact] - public void HashTaskLargeInputCountAndSizeTest() - { - // This hash was pre-computed. If the implementation changes it may need to be adjusted. - var expectedHash = "be23ae7b09f1e14fa5a17de87ddae2c3ec62f967"; - - int inputSize = 1000; - string[][] array = new string[inputSize][]; - for (int i = 0; i < array.Length; i++) - { - array[i] = new string[inputSize]; - for (int j = 0; j < array[i].Length; j++) - { - array[i][j] = $"Item{i}{j}"; - } - } - - ITaskItem[] itemsToHash = new ITaskItem[inputSize]; - for (int i = 0; i < itemsToHash.Length; i++) - { - itemsToHash[i] = new TaskItem(string.Join("", array[i])); - } - - var actualHash = ExecuteHashTask(itemsToHash); - - Assert.Equal(expectedHash, actualHash); - } - #pragma warning disable CA5350 // This test verifies that hash computes correctly for various numbers of characters. // We would like to process edge of the buffer use cases regardless on the size of the buffer. @@ -116,14 +88,21 @@ public void HashTaskDifferentInputSizesTest() { using (var stringBuilder = new ReuseableStringBuilder(sha1.HashSize)) { + MockEngine mockEngine = new(); for (int i = 0; i < maxInputSize; i++) { input += "a"; - ITaskItem[] itemsToHash = new ITaskItem[] { new TaskItem(input) }; - var actualHash = ExecuteHashTask(itemsToHash); - - byte[] hash = sha1.ComputeHash(System.Text.Encoding.UTF8.GetBytes(input+'\u2028')); + Hash hashTask = new() + { + BuildEngine = mockEngine, + ItemsToHash = new ITaskItem[] { new TaskItem(input) }, + IgnoreCase = false + }; + Assert.True(hashTask.Execute()); + string actualHash = hashTask.HashResult; + + byte[] hash = sha1.ComputeHash(System.Text.Encoding.UTF8.GetBytes(input + '\u2028')); stringBuilder.Clear(); foreach (var b in hash) { From e38f7b9fe059366a8f94012d6f885642bce5d523 Mon Sep 17 00:00:00 2001 From: AR-May <67507805+AR-May@users.noreply.github.com> Date: Thu, 13 Jan 2022 15:43:02 +0100 Subject: [PATCH 11/11] Small fixes: codestyle and etc. --- src/Tasks.UnitTests/Hash_Tests.cs | 46 ++++++++++++++----------------- src/Tasks/Hash.cs | 4 +-- 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/src/Tasks.UnitTests/Hash_Tests.cs b/src/Tasks.UnitTests/Hash_Tests.cs index 2ad2e4e84e1..9d0aed2e921 100644 --- a/src/Tasks.UnitTests/Hash_Tests.cs +++ b/src/Tasks.UnitTests/Hash_Tests.cs @@ -86,35 +86,32 @@ public void HashTaskDifferentInputSizesTest() string input = ""; using (var sha1 = System.Security.Cryptography.SHA1.Create()) { - using (var stringBuilder = new ReuseableStringBuilder(sha1.HashSize)) + var stringBuilder = new System.Text.StringBuilder(sha1.HashSize); + MockEngine mockEngine = new(); + for (int i = 0; i < maxInputSize; i++) { - MockEngine mockEngine = new(); - for (int i = 0; i < maxInputSize; i++) + input += "a"; + + Hash hashTask = new() + { + BuildEngine = mockEngine, + ItemsToHash = new ITaskItem[] { new TaskItem(input) }, + IgnoreCase = false + }; + Assert.True(hashTask.Execute()); + string actualHash = hashTask.HashResult; + + byte[] hash = sha1.ComputeHash(System.Text.Encoding.UTF8.GetBytes(input + '\u2028')); + stringBuilder.Clear(); + foreach (var b in hash) { - input += "a"; - - Hash hashTask = new() - { - BuildEngine = mockEngine, - ItemsToHash = new ITaskItem[] { new TaskItem(input) }, - IgnoreCase = false - }; - Assert.True(hashTask.Execute()); - string actualHash = hashTask.HashResult; - - byte[] hash = sha1.ComputeHash(System.Text.Encoding.UTF8.GetBytes(input + '\u2028')); - stringBuilder.Clear(); - foreach (var b in hash) - { - stringBuilder.Append(b.ToString("x2")); - } - string expectedHash = stringBuilder.ToString(); - - Assert.Equal(expectedHash, actualHash); + stringBuilder.Append(b.ToString("x2")); } + string expectedHash = stringBuilder.ToString(); + + Assert.Equal(expectedHash, actualHash); } } - } #pragma warning restore CA5350 @@ -163,6 +160,5 @@ private string ExecuteHashTask(ITaskItem[] items, bool ignoreCase = false) return hashTask.HashResult; } - } } diff --git a/src/Tasks/Hash.cs b/src/Tasks/Hash.cs index 06176070dd1..b4beb876015 100644 --- a/src/Tasks/Hash.cs +++ b/src/Tasks/Hash.cs @@ -21,7 +21,7 @@ public class Hash : TaskExtension private static readonly Encoding s_encoding = Encoding.UTF8; private static readonly byte[] s_itemSeparatorCharacterBytes = s_encoding.GetBytes(new char[] { ItemSeparatorCharacter }); - // Size of buffer where bytes of the strings are stored until sha1.TransformBlock is be run on them. + // Size of buffer where bytes of the strings are stored until sha1.TransformBlock is to be run on them. // It is needed to get a balance between amount of costly sha1.TransformBlock calls and amount of allocated memory. private const int Sha1BufferSize = 512; @@ -56,7 +56,7 @@ public override bool Execute() using (var sha1 = SHA1.Create()) { // Buffer in which bytes of the strings are to be stored until their number reaches the limit size. - // Once the limit is reached, the sha1.TransformBlock is be run on all the bytes of this buffer. + // Once the limit is reached, the sha1.TransformBlock is to be run on all the bytes of this buffer. byte[] sha1Buffer = null; // Buffer in which bytes of items' ItemSpec are to be stored.