From b47ba443be41fe6ba65494a03327a9609d5fce56 Mon Sep 17 00:00:00 2001 From: David Justo Date: Tue, 2 Jan 2024 22:09:08 -0800 Subject: [PATCH 1/5] add simple log --- src/DurableTask.Netherite/StorageLayer/Faster/FasterKV.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/DurableTask.Netherite/StorageLayer/Faster/FasterKV.cs b/src/DurableTask.Netherite/StorageLayer/Faster/FasterKV.cs index 00d26c87..2cfd1d85 100644 --- a/src/DurableTask.Netherite/StorageLayer/Faster/FasterKV.cs +++ b/src/DurableTask.Netherite/StorageLayer/Faster/FasterKV.cs @@ -255,8 +255,9 @@ public override Task FindCheckpointAsync(bool logIsEmpty) } // recover Faster - this.blobManager.TraceHelper.FasterProgress($"Recovering FasterKV"); + this.blobManager.TraceHelper.FasterProgress($"Recovering FasterKV - Entering fht.RecoverAsync"); await this.fht.RecoverAsync(this.partition.Settings.FasterTuningParameters?.NumPagesToPreload ?? 1, true, -1, this.terminationToken); + this.blobManager.TraceHelper.FasterProgress($"Recovering FasterKV - Returned from fht.RecoverAsync"); this.mainSession = this.CreateASession($"main-{this.blobManager.IncarnationTimestamp:o}", false); for (int i = 0; i < this.querySessions.Length; i++) { From 6b91ad32e4ddf75e5bb414de6a1d799700aeb804 Mon Sep 17 00:00:00 2001 From: David Justo Date: Thu, 4 Jan 2024 10:29:45 -0800 Subject: [PATCH 2/5] add detail column to FasterAzureStorageAccessCompleted --- .../StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs | 4 +++- .../StorageLayer/Faster/AzureBlobs/StorageOperations.cs | 4 ++-- .../StorageLayer/Faster/FasterTraceHelper.cs | 8 ++++---- src/DurableTask.Netherite/Tracing/EtwSource.cs | 6 +++--- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs index b963a914..37198458 100644 --- a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs +++ b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs @@ -498,6 +498,8 @@ unsafe Task ReadFromBlobUnsafeAsync(BlobUtilsV12.PageBlobClients blob, long sour async Task ReadFromBlobAsync(UnmanagedMemoryStream stream, BlobUtilsV12.PageBlobClients blob, long sourceAddress, uint readLength, long id) { + uint originalReadLength = readLength; + long originalSourceAddress = sourceAddress; using (stream) { long offset = 0; @@ -510,7 +512,7 @@ await this.BlobManager.PerformWithRetriesAsync( true, "PageBlobClient.DownloadStreamingAsync", "ReadFromDevice", - $"id={id} readLength={length} sourceAddress={sourceAddress + offset}", + $"id={id} readLength={length} sourceAddress={sourceAddress + offset} originalReadLength={originalReadLength} originalSourceAddress={originalSourceAddress}", blob.Default.Name, 1000 + (int)length / 1000, true, diff --git a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/StorageOperations.cs b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/StorageOperations.cs index 952aec51..477e37ea 100644 --- a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/StorageOperations.cs +++ b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/StorageOperations.cs @@ -75,7 +75,7 @@ public async Task PerformWithRetriesAsync( this.TraceHelper.FasterPerfWarning($"storage operation {name} ({intent}) took {stopwatch.Elapsed.TotalSeconds:F1}s on attempt {numAttempts}, which is excessive; {data}"); } - this.TraceHelper.FasterAzureStorageAccessCompleted(intent, size, name, target, stopwatch.Elapsed.TotalMilliseconds, numAttempts); + this.TraceHelper.FasterAzureStorageAccessCompleted(intent, size, name, target, stopwatch.Elapsed.TotalMilliseconds, numAttempts, data); return; } @@ -171,7 +171,7 @@ public void PerformWithRetries( stopwatch.Stop(); this.StorageTracer?.FasterStorageProgress($"storage operation {name} ({intent}) succeeded on attempt {numAttempts}; target={target} latencyMs={stopwatch.Elapsed.TotalMilliseconds:F1} size={size} {data} "); - this.TraceHelper.FasterAzureStorageAccessCompleted(intent, size, name, target, stopwatch.Elapsed.TotalMilliseconds, numAttempts); + this.TraceHelper.FasterAzureStorageAccessCompleted(intent, size, name, target, stopwatch.Elapsed.TotalMilliseconds, numAttempts, data); if (stopwatch.ElapsedMilliseconds > expectedLatencyBound) { diff --git a/src/DurableTask.Netherite/StorageLayer/Faster/FasterTraceHelper.cs b/src/DurableTask.Netherite/StorageLayer/Faster/FasterTraceHelper.cs index 63d0bb86..af312e7e 100644 --- a/src/DurableTask.Netherite/StorageLayer/Faster/FasterTraceHelper.cs +++ b/src/DurableTask.Netherite/StorageLayer/Faster/FasterTraceHelper.cs @@ -146,13 +146,13 @@ public void FasterStorageProgress(string details) } } - public void FasterAzureStorageAccessCompleted(string intent, long size, string operation, string target, double latency, int attempt) + public void FasterAzureStorageAccessCompleted(string intent, long size, string operation, string target, double latency, int attempt, string details) { if (this.logLevelLimit <= LogLevel.Debug) { - this.logger.LogDebug("Part{partition:D2} storage access completed intent={intent} size={size} operation={operation} target={target} latency={latency} attempt={attempt}", - this.partitionId, intent, size, operation, target, latency, attempt); - EtwSource.Log.FasterAzureStorageAccessCompleted(this.account, this.taskHub, this.partitionId, intent, size, operation, target, latency, attempt, TraceUtils.AppName, TraceUtils.ExtensionVersion); + this.logger.LogDebug("Part{partition:D2} storage access completed intent={intent} size={size} operation={operation} target={target} latency={latency} attempt={attempt} details={details}", + this.partitionId, intent, size, operation, target, latency, attempt, details); + EtwSource.Log.FasterAzureStorageAccessCompleted(this.account, this.taskHub, this.partitionId, intent, size, operation, target, latency, attempt, details, TraceUtils.AppName, TraceUtils.ExtensionVersion); } } diff --git a/src/DurableTask.Netherite/Tracing/EtwSource.cs b/src/DurableTask.Netherite/Tracing/EtwSource.cs index 9b90bebb..d13f5744 100644 --- a/src/DurableTask.Netherite/Tracing/EtwSource.cs +++ b/src/DurableTask.Netherite/Tracing/EtwSource.cs @@ -378,11 +378,11 @@ public void FasterStorageProgress(string Account, string TaskHub, int PartitionI this.WriteEvent(265, Account, TaskHub, PartitionId, Details, AppName, ExtensionVersion); } - [Event(266, Level = EventLevel.Verbose, Version = 1)] - public void FasterAzureStorageAccessCompleted(string Account, string TaskHub, int PartitionId, string Intent, long Size, string Operation, string Target, double Latency, int Attempt, string AppName, string ExtensionVersion) + [Event(266, Level = EventLevel.Verbose, Version = 2)] + public void FasterAzureStorageAccessCompleted(string Account, string TaskHub, int PartitionId, string Intent, long Size, string Operation, string Target, double Latency, int Attempt, string Details, string AppName, string ExtensionVersion) { SetCurrentThreadActivityId(serviceInstanceId); - this.WriteEvent(266, Account, TaskHub, PartitionId, Intent, Size, Operation, Target, Latency, Attempt, AppName, ExtensionVersion); + this.WriteEvent(266, Account, TaskHub, PartitionId, Intent, Size, Operation, Target, Latency, Attempt, Details, AppName, ExtensionVersion); } [Event(267, Level = EventLevel.Warning, Version = 1)] From 1befd9383212f8edb9116a62bb1ec95121927243 Mon Sep 17 00:00:00 2001 From: David Justo Date: Thu, 4 Jan 2024 10:38:52 -0800 Subject: [PATCH 3/5] refactor read range log --- .../StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs index 37198458..1d8bee82 100644 --- a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs +++ b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs @@ -498,8 +498,9 @@ unsafe Task ReadFromBlobUnsafeAsync(BlobUtilsV12.PageBlobClients blob, long sour async Task ReadFromBlobAsync(UnmanagedMemoryStream stream, BlobUtilsV12.PageBlobClients blob, long sourceAddress, uint readLength, long id) { - uint originalReadLength = readLength; - long originalSourceAddress = sourceAddress; + long readRangeStart = sourceAddress; + long readRangeEnd = readRangeStart + readLength; + string operationReadRange = $"[{readRangeStart}, {readRangeEnd}]"; using (stream) { long offset = 0; @@ -512,7 +513,7 @@ await this.BlobManager.PerformWithRetriesAsync( true, "PageBlobClient.DownloadStreamingAsync", "ReadFromDevice", - $"id={id} readLength={length} sourceAddress={sourceAddress + offset} originalReadLength={originalReadLength} originalSourceAddress={originalSourceAddress}", + $"id={id} readLength={length} sourceAddress={sourceAddress + offset} operationReadRange={operationReadRange}", blob.Default.Name, 1000 + (int)length / 1000, true, From 785fb2315b848a5b74d6499aed9c8ac69454b40a Mon Sep 17 00:00:00 2001 From: sebastianburckhardt Date: Thu, 4 Jan 2024 15:31:24 -0800 Subject: [PATCH 4/5] pass just the position parameter instead of printing full details for FasterStorageAccessCompleted --- .../Faster/AzureBlobs/AzureStorageDevice.cs | 11 ++++- .../Faster/AzureBlobs/BlobEntry.cs | 1 + .../Faster/AzureBlobs/BlobManager.cs | 47 ++++++++++++------- .../Faster/AzureBlobs/StorageOperations.cs | 6 ++- .../StorageLayer/Faster/FasterTraceHelper.cs | 8 ++-- .../Tracing/EtwSource.cs | 6 +-- 6 files changed, 51 insertions(+), 28 deletions(-) diff --git a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs index 1d8bee82..b3517e11 100644 --- a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs +++ b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs @@ -115,6 +115,7 @@ await this.BlobManager.PerformWithRetriesAsync( this.underLease, "BlobContainerClient.GetBlobsAsync", "RecoverDevice", + 0, $"continuationToken={continuationToken}", this.pageBlobDirectory.ToString(), 2000, @@ -295,6 +296,7 @@ public override void RemoveSegmentAsync(int segment, AsyncCallback callback, IAs this.underLease, "BlobBaseClient.DeleteAsync", "DeleteDeviceSegment", + 0, "", entry.PageBlob.Default.Name, 5000, @@ -330,6 +332,7 @@ Task Delete(BlobEntry entry) this.underLease, "BlobBaseClient.DeleteAsync", "DeleteDevice", + 0, "", entry.PageBlob.Default.Name, 5000, @@ -448,13 +451,15 @@ async Task WritePortionToBlobAsync(UnmanagedMemoryStream stream, BlobEntry blobE { using (stream) { + var position = destinationAddress + offset; long originalStreamPosition = stream.Position; await this.BlobManager.PerformWithRetriesAsync( BlobManager.AsynchronousStorageWriteMaxConcurrency, true, "PageBlobClient.UploadPagesAsync", "WriteToDevice", - $"id={id} length={length} destinationAddress={destinationAddress + offset}", + position, + $"id={id} position={position} length={length}", blobEntry.PageBlob.Default.Name, 1000 + (int)length / 1000, true, @@ -506,6 +511,7 @@ async Task ReadFromBlobAsync(UnmanagedMemoryStream stream, BlobUtilsV12.PageBlob long offset = 0; while (readLength > 0) { + var position = sourceAddress + offset; var length = Math.Min(readLength, MAX_DOWNLOAD_SIZE); await this.BlobManager.PerformWithRetriesAsync( @@ -513,7 +519,8 @@ await this.BlobManager.PerformWithRetriesAsync( true, "PageBlobClient.DownloadStreamingAsync", "ReadFromDevice", - $"id={id} readLength={length} sourceAddress={sourceAddress + offset} operationReadRange={operationReadRange}", + position, + $"id={id} position={position} length={length} operationReadRange={operationReadRange}", blob.Default.Name, 1000 + (int)length / 1000, true, diff --git a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/BlobEntry.cs b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/BlobEntry.cs index 54fd8542..b9d9002f 100644 --- a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/BlobEntry.cs +++ b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/BlobEntry.cs @@ -67,6 +67,7 @@ await this.azureStorageDevice.BlobManager.PerformWithRetriesAsync( true, "PageBlobClient.CreateAsync", "CreateDevice", + 0, "", pageBlob.Default.Name, 3000, diff --git a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/BlobManager.cs b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/BlobManager.cs index e26089e3..58e94df9 100644 --- a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/BlobManager.cs +++ b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/BlobManager.cs @@ -585,6 +585,7 @@ await this.PerformWithRetriesAsync( false, "CloudBlockBlob.UploadFromByteArrayAsync", "CreateCommitLog", + 0, "", this.eventLogCommitBlob.Default.Name, 2000, @@ -797,6 +798,7 @@ await this.PerformWithRetriesAsync( true, "BlobContainerClient.GetBlobsAsync", "RemoveObsoleteCheckpoints", + 0, "", directory.Prefix, 1000, @@ -834,6 +836,7 @@ async Task DeleteCheckpointDirectory(IEnumerable blobsToDelete) false, "BlobUtils.ForceDeleteAsync", "DeleteCheckpointDirectory", + 0, "", blobName, 1000, @@ -882,6 +885,7 @@ void ILogCommitManager.Commit(long beginAddress, long untilAddress, byte[] commi false, "BlockBlobClient.Upload", "WriteCommitLogMetadata", + 0, "", this.eventLogCommitBlob.Default.Name, 1000, @@ -960,6 +964,7 @@ byte[] ILogCommitManager.GetCommitMetadata(long commitNum) false, "BlobClient.DownloadTo", "ReadCommitLogMetadata", + 0, "", this.eventLogCommitBlob.Name, 1000, @@ -1067,6 +1072,7 @@ await this.PerformWithRetriesAsync( requireLease: true, "BlockBlobClient.DownloadContentAsync", "FindCheckpointsAsync", + 0, "", checkpointCompletedBlob.Name, 1000, @@ -1115,23 +1121,24 @@ void ICheckpointManager.CommitIndexCheckpoint(Guid indexToken, byte[] commitMeta var metaFileBlob = partDir.GetBlockBlobClient(this.GetIndexCheckpointMetaBlobName(indexToken)); this.PerformWithRetries( - false, - "BlockBlobClient.OpenWrite", - "WriteIndexCheckpointMetadata", - $"token={indexToken} size={commitMetadata.Length}", - metaFileBlob.Name, - 1000, - true, - (numAttempts) => - { - var client = metaFileBlob.WithRetries; - using var blobStream = client.OpenWrite(overwrite: true); - using var writer = new BinaryWriter(blobStream); - writer.Write(commitMetadata.Length); - writer.Write(commitMetadata); - writer.Flush(); - return (commitMetadata.Length, true); - }); + false, + "BlockBlobClient.OpenWrite", + "WriteIndexCheckpointMetadata", + 0, + $"token={indexToken} size={commitMetadata.Length}", + metaFileBlob.Name, + 1000, + true, + (numAttempts) => + { + var client = metaFileBlob.WithRetries; + using var blobStream = client.OpenWrite(overwrite: true); + using var writer = new BinaryWriter(blobStream); + writer.Write(commitMetadata.Length); + writer.Write(commitMetadata); + writer.Flush(); + return (commitMetadata.Length, true); + }); this.CheckpointInfo.IndexToken = indexToken; this.StorageTracer?.FasterStorageProgress($"StorageOpReturned ICheckpointManager.CommitIndexCheckpoint, target={metaFileBlob.Name}"); @@ -1155,6 +1162,7 @@ void ICheckpointManager.CommitLogCheckpoint(Guid logToken, byte[] commitMetadata false, "BlockBlobClient.OpenWrite", "WriteHybridLogCheckpointMetadata", + 0, $"token={logToken}", metaFileBlob.Name, 1000, @@ -1198,6 +1206,7 @@ byte[] ICheckpointManager.GetIndexCheckpointMetadata(Guid indexToken) false, "BlockBlobClient.OpenRead", "ReadIndexCheckpointMetadata", + 0, "", metaFileBlob.Name, 1000, @@ -1235,6 +1244,7 @@ byte[] ICheckpointManager.GetLogCheckpointMetadata(Guid logToken, DeltaLog delta false, "BlockBlobClient.OpenRead", "ReadLogCheckpointMetadata", + 0, "", metaFileBlob.Name, 1000, @@ -1362,6 +1372,7 @@ await this.PerformWithRetriesAsync( false, "BlockBlobClient.UploadAsync", "WriteSingletons", + 0, "", singletonsBlob.Name, 1000 + singletons.Length / 5000, @@ -1395,6 +1406,7 @@ await this.PerformWithRetriesAsync( true, "BlobBaseClient.DownloadToAsync", "ReadSingletons", + 0, "", singletonsBlob.Name, 20000, @@ -1428,6 +1440,7 @@ await this.PerformWithRetriesAsync( true, "BlockBlobClient.UploadAsync", "WriteCheckpointMetadata", + 0, "", checkpointCompletedBlob.Name, 1000, diff --git a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/StorageOperations.cs b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/StorageOperations.cs index 477e37ea..7f29d089 100644 --- a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/StorageOperations.cs +++ b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/StorageOperations.cs @@ -21,6 +21,7 @@ public async Task PerformWithRetriesAsync( bool requireLease, string name, string intent, + long position, string data, string target, int expectedLatencyBound, @@ -75,7 +76,7 @@ public async Task PerformWithRetriesAsync( this.TraceHelper.FasterPerfWarning($"storage operation {name} ({intent}) took {stopwatch.Elapsed.TotalSeconds:F1}s on attempt {numAttempts}, which is excessive; {data}"); } - this.TraceHelper.FasterAzureStorageAccessCompleted(intent, size, name, target, stopwatch.Elapsed.TotalMilliseconds, numAttempts, data); + this.TraceHelper.FasterAzureStorageAccessCompleted(intent, position, size, name, target, stopwatch.Elapsed.TotalMilliseconds, numAttempts); return; } @@ -134,6 +135,7 @@ public void PerformWithRetries( bool requireLease, string name, string intent, + long position, string data, string target, int expectedLatencyBound, @@ -171,7 +173,7 @@ public void PerformWithRetries( stopwatch.Stop(); this.StorageTracer?.FasterStorageProgress($"storage operation {name} ({intent}) succeeded on attempt {numAttempts}; target={target} latencyMs={stopwatch.Elapsed.TotalMilliseconds:F1} size={size} {data} "); - this.TraceHelper.FasterAzureStorageAccessCompleted(intent, size, name, target, stopwatch.Elapsed.TotalMilliseconds, numAttempts, data); + this.TraceHelper.FasterAzureStorageAccessCompleted(intent, position, size, name, target, stopwatch.Elapsed.TotalMilliseconds, numAttempts); if (stopwatch.ElapsedMilliseconds > expectedLatencyBound) { diff --git a/src/DurableTask.Netherite/StorageLayer/Faster/FasterTraceHelper.cs b/src/DurableTask.Netherite/StorageLayer/Faster/FasterTraceHelper.cs index af312e7e..480ab694 100644 --- a/src/DurableTask.Netherite/StorageLayer/Faster/FasterTraceHelper.cs +++ b/src/DurableTask.Netherite/StorageLayer/Faster/FasterTraceHelper.cs @@ -146,13 +146,13 @@ public void FasterStorageProgress(string details) } } - public void FasterAzureStorageAccessCompleted(string intent, long size, string operation, string target, double latency, int attempt, string details) + public void FasterAzureStorageAccessCompleted(string intent, long position, long size, string operation, string target, double latency, int attempt) { if (this.logLevelLimit <= LogLevel.Debug) { - this.logger.LogDebug("Part{partition:D2} storage access completed intent={intent} size={size} operation={operation} target={target} latency={latency} attempt={attempt} details={details}", - this.partitionId, intent, size, operation, target, latency, attempt, details); - EtwSource.Log.FasterAzureStorageAccessCompleted(this.account, this.taskHub, this.partitionId, intent, size, operation, target, latency, attempt, details, TraceUtils.AppName, TraceUtils.ExtensionVersion); + this.logger.LogDebug("Part{partition:D2} storage access completed intent={intent} position={position} size={size} operation={operation} target={target} latency={latency} attempt={attempt}", + this.partitionId, intent, position, size, operation, target, latency, attempt); + EtwSource.Log.FasterAzureStorageAccessCompleted(this.account, this.taskHub, this.partitionId, intent, position, size, operation, target, latency, attempt, TraceUtils.AppName, TraceUtils.ExtensionVersion); } } diff --git a/src/DurableTask.Netherite/Tracing/EtwSource.cs b/src/DurableTask.Netherite/Tracing/EtwSource.cs index d13f5744..ee00531d 100644 --- a/src/DurableTask.Netherite/Tracing/EtwSource.cs +++ b/src/DurableTask.Netherite/Tracing/EtwSource.cs @@ -378,11 +378,11 @@ public void FasterStorageProgress(string Account, string TaskHub, int PartitionI this.WriteEvent(265, Account, TaskHub, PartitionId, Details, AppName, ExtensionVersion); } - [Event(266, Level = EventLevel.Verbose, Version = 2)] - public void FasterAzureStorageAccessCompleted(string Account, string TaskHub, int PartitionId, string Intent, long Size, string Operation, string Target, double Latency, int Attempt, string Details, string AppName, string ExtensionVersion) + [Event(266, Level = EventLevel.Verbose, Version = 3)] + public void FasterAzureStorageAccessCompleted(string Account, string TaskHub, int PartitionId, string Intent, long Position, long Size, string Operation, string Target, double Latency, int Attempt, string AppName, string ExtensionVersion) { SetCurrentThreadActivityId(serviceInstanceId); - this.WriteEvent(266, Account, TaskHub, PartitionId, Intent, Size, Operation, Target, Latency, Attempt, Details, AppName, ExtensionVersion); + this.WriteEvent(266, Account, TaskHub, PartitionId, Intent, Position, Size, Operation, Target, Latency, Attempt, AppName, ExtensionVersion); } [Event(267, Level = EventLevel.Warning, Version = 1)] From 8be561ac84d0c0f9f0b712e7202a72575e16c3d3 Mon Sep 17 00:00:00 2001 From: sebastianburckhardt Date: Mon, 12 Feb 2024 14:43:47 -0800 Subject: [PATCH 5/5] put position and id information into details --- .../Faster/AzureBlobs/AzureStorageDevice.cs | 7 +---- .../Faster/AzureBlobs/BlobEntry.cs | 1 - .../Faster/AzureBlobs/BlobManager.cs | 13 --------- .../Faster/AzureBlobs/StorageOperations.cs | 28 +++++++++---------- .../StorageLayer/Faster/FasterTraceHelper.cs | 8 +++--- .../Tracing/EtwSource.cs | 4 +-- 6 files changed, 20 insertions(+), 41 deletions(-) diff --git a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs index b3517e11..0dbe9c6c 100644 --- a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs +++ b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/AzureStorageDevice.cs @@ -115,7 +115,6 @@ await this.BlobManager.PerformWithRetriesAsync( this.underLease, "BlobContainerClient.GetBlobsAsync", "RecoverDevice", - 0, $"continuationToken={continuationToken}", this.pageBlobDirectory.ToString(), 2000, @@ -296,8 +295,7 @@ public override void RemoveSegmentAsync(int segment, AsyncCallback callback, IAs this.underLease, "BlobBaseClient.DeleteAsync", "DeleteDeviceSegment", - 0, - "", + $"id={id}", entry.PageBlob.Default.Name, 5000, true, @@ -332,7 +330,6 @@ Task Delete(BlobEntry entry) this.underLease, "BlobBaseClient.DeleteAsync", "DeleteDevice", - 0, "", entry.PageBlob.Default.Name, 5000, @@ -458,7 +455,6 @@ await this.BlobManager.PerformWithRetriesAsync( true, "PageBlobClient.UploadPagesAsync", "WriteToDevice", - position, $"id={id} position={position} length={length}", blobEntry.PageBlob.Default.Name, 1000 + (int)length / 1000, @@ -519,7 +515,6 @@ await this.BlobManager.PerformWithRetriesAsync( true, "PageBlobClient.DownloadStreamingAsync", "ReadFromDevice", - position, $"id={id} position={position} length={length} operationReadRange={operationReadRange}", blob.Default.Name, 1000 + (int)length / 1000, diff --git a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/BlobEntry.cs b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/BlobEntry.cs index b9d9002f..54fd8542 100644 --- a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/BlobEntry.cs +++ b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/BlobEntry.cs @@ -67,7 +67,6 @@ await this.azureStorageDevice.BlobManager.PerformWithRetriesAsync( true, "PageBlobClient.CreateAsync", "CreateDevice", - 0, "", pageBlob.Default.Name, 3000, diff --git a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/BlobManager.cs b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/BlobManager.cs index 58e94df9..b0fd8e77 100644 --- a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/BlobManager.cs +++ b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/BlobManager.cs @@ -585,7 +585,6 @@ await this.PerformWithRetriesAsync( false, "CloudBlockBlob.UploadFromByteArrayAsync", "CreateCommitLog", - 0, "", this.eventLogCommitBlob.Default.Name, 2000, @@ -798,7 +797,6 @@ await this.PerformWithRetriesAsync( true, "BlobContainerClient.GetBlobsAsync", "RemoveObsoleteCheckpoints", - 0, "", directory.Prefix, 1000, @@ -836,7 +834,6 @@ async Task DeleteCheckpointDirectory(IEnumerable blobsToDelete) false, "BlobUtils.ForceDeleteAsync", "DeleteCheckpointDirectory", - 0, "", blobName, 1000, @@ -885,7 +882,6 @@ void ILogCommitManager.Commit(long beginAddress, long untilAddress, byte[] commi false, "BlockBlobClient.Upload", "WriteCommitLogMetadata", - 0, "", this.eventLogCommitBlob.Default.Name, 1000, @@ -964,7 +960,6 @@ byte[] ILogCommitManager.GetCommitMetadata(long commitNum) false, "BlobClient.DownloadTo", "ReadCommitLogMetadata", - 0, "", this.eventLogCommitBlob.Name, 1000, @@ -1072,7 +1067,6 @@ await this.PerformWithRetriesAsync( requireLease: true, "BlockBlobClient.DownloadContentAsync", "FindCheckpointsAsync", - 0, "", checkpointCompletedBlob.Name, 1000, @@ -1124,7 +1118,6 @@ void ICheckpointManager.CommitIndexCheckpoint(Guid indexToken, byte[] commitMeta false, "BlockBlobClient.OpenWrite", "WriteIndexCheckpointMetadata", - 0, $"token={indexToken} size={commitMetadata.Length}", metaFileBlob.Name, 1000, @@ -1162,7 +1155,6 @@ void ICheckpointManager.CommitLogCheckpoint(Guid logToken, byte[] commitMetadata false, "BlockBlobClient.OpenWrite", "WriteHybridLogCheckpointMetadata", - 0, $"token={logToken}", metaFileBlob.Name, 1000, @@ -1206,7 +1198,6 @@ byte[] ICheckpointManager.GetIndexCheckpointMetadata(Guid indexToken) false, "BlockBlobClient.OpenRead", "ReadIndexCheckpointMetadata", - 0, "", metaFileBlob.Name, 1000, @@ -1244,7 +1235,6 @@ byte[] ICheckpointManager.GetLogCheckpointMetadata(Guid logToken, DeltaLog delta false, "BlockBlobClient.OpenRead", "ReadLogCheckpointMetadata", - 0, "", metaFileBlob.Name, 1000, @@ -1372,7 +1362,6 @@ await this.PerformWithRetriesAsync( false, "BlockBlobClient.UploadAsync", "WriteSingletons", - 0, "", singletonsBlob.Name, 1000 + singletons.Length / 5000, @@ -1406,7 +1395,6 @@ await this.PerformWithRetriesAsync( true, "BlobBaseClient.DownloadToAsync", "ReadSingletons", - 0, "", singletonsBlob.Name, 20000, @@ -1440,7 +1428,6 @@ await this.PerformWithRetriesAsync( true, "BlockBlobClient.UploadAsync", "WriteCheckpointMetadata", - 0, "", checkpointCompletedBlob.Name, 1000, diff --git a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/StorageOperations.cs b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/StorageOperations.cs index 7f29d089..a07acbb1 100644 --- a/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/StorageOperations.cs +++ b/src/DurableTask.Netherite/StorageLayer/Faster/AzureBlobs/StorageOperations.cs @@ -21,8 +21,7 @@ public async Task PerformWithRetriesAsync( bool requireLease, string name, string intent, - long position, - string data, + string details, string target, int expectedLatencyBound, bool isCritical, @@ -60,7 +59,7 @@ public async Task PerformWithRetriesAsync( this.PartitionErrorHandler.Token.ThrowIfCancellationRequested(); - this.StorageTracer?.FasterStorageProgress($"storage operation {name} ({intent}) started attempt {numAttempts}; target={target} {data}"); + this.StorageTracer?.FasterStorageProgress($"storage operation {name} ({intent}) started attempt {numAttempts}; target={target} {details}"); stopwatch.Restart(); @@ -69,14 +68,14 @@ public async Task PerformWithRetriesAsync( long size = await operationAsync(numAttempts).ConfigureAwait(false); stopwatch.Stop(); - this.StorageTracer?.FasterStorageProgress($"storage operation {name} ({intent}) succeeded on attempt {numAttempts}; target={target} latencyMs={stopwatch.Elapsed.TotalMilliseconds:F1} {data}"); + this.StorageTracer?.FasterStorageProgress($"storage operation {name} ({intent}) succeeded on attempt {numAttempts}; target={target} latencyMs={stopwatch.Elapsed.TotalMilliseconds:F1} {details}"); if (stopwatch.ElapsedMilliseconds > expectedLatencyBound) { - this.TraceHelper.FasterPerfWarning($"storage operation {name} ({intent}) took {stopwatch.Elapsed.TotalSeconds:F1}s on attempt {numAttempts}, which is excessive; {data}"); + this.TraceHelper.FasterPerfWarning($"storage operation {name} ({intent}) took {stopwatch.Elapsed.TotalSeconds:F1}s on attempt {numAttempts}, which is excessive; {details}"); } - this.TraceHelper.FasterAzureStorageAccessCompleted(intent, position, size, name, target, stopwatch.Elapsed.TotalMilliseconds, numAttempts); + this.TraceHelper.FasterAzureStorageAccessCompleted(intent, size, name, details, target, stopwatch.Elapsed.TotalMilliseconds, numAttempts); return; } @@ -92,7 +91,7 @@ public async Task PerformWithRetriesAsync( if (BlobUtils.IsTimeout(e)) { - this.TraceHelper.FasterPerfWarning($"storage operation {name} ({intent}) timed out on attempt {numAttempts} after {stopwatch.Elapsed.TotalSeconds:F1}s, retrying now; target={target} {data}"); + this.TraceHelper.FasterPerfWarning($"storage operation {name} ({intent}) timed out on attempt {numAttempts} after {stopwatch.Elapsed.TotalSeconds:F1}s, retrying now; target={target} {details}"); } else { @@ -104,7 +103,7 @@ public async Task PerformWithRetriesAsync( } catch (Azure.RequestFailedException ex) when (BlobUtilsV12.PreconditionFailed(ex) && readETagAsync != null) { - this.StorageTracer?.FasterStorageProgress($"storage operation {name} ({intent}) failed precondition on attempt {numAttempts}; target={target} latencyMs={stopwatch.Elapsed.TotalMilliseconds:F1} {data}"); + this.StorageTracer?.FasterStorageProgress($"storage operation {name} ({intent}) failed precondition on attempt {numAttempts}; target={target} latencyMs={stopwatch.Elapsed.TotalMilliseconds:F1} {details}"); mustReadETagFirst = true; continue; } @@ -135,8 +134,7 @@ public void PerformWithRetries( bool requireLease, string name, string intent, - long position, - string data, + string details, string target, int expectedLatencyBound, bool isCritical, @@ -158,7 +156,7 @@ public void PerformWithRetries( this.PartitionErrorHandler.Token.ThrowIfCancellationRequested(); - this.StorageTracer?.FasterStorageProgress($"storage operation {name} ({intent}) started attempt {numAttempts}; target={target} {data}"); + this.StorageTracer?.FasterStorageProgress($"storage operation {name} ({intent}) started attempt {numAttempts}; target={target} {details}"); stopwatch.Restart(); this.FaultInjector?.StorageAccess(this, name, intent, target); @@ -171,13 +169,13 @@ public void PerformWithRetries( } stopwatch.Stop(); - this.StorageTracer?.FasterStorageProgress($"storage operation {name} ({intent}) succeeded on attempt {numAttempts}; target={target} latencyMs={stopwatch.Elapsed.TotalMilliseconds:F1} size={size} {data} "); + this.StorageTracer?.FasterStorageProgress($"storage operation {name} ({intent}) succeeded on attempt {numAttempts}; target={target} latencyMs={stopwatch.Elapsed.TotalMilliseconds:F1} size={size} {details} "); - this.TraceHelper.FasterAzureStorageAccessCompleted(intent, position, size, name, target, stopwatch.Elapsed.TotalMilliseconds, numAttempts); + this.TraceHelper.FasterAzureStorageAccessCompleted(intent, size, name, details, target, stopwatch.Elapsed.TotalMilliseconds, numAttempts); if (stopwatch.ElapsedMilliseconds > expectedLatencyBound) { - this.TraceHelper.FasterPerfWarning($"storage operation {name} ({intent}) took {stopwatch.Elapsed.TotalSeconds:F1}s on attempt {numAttempts}, which is excessive; {data}"); + this.TraceHelper.FasterPerfWarning($"storage operation {name} ({intent}) took {stopwatch.Elapsed.TotalSeconds:F1}s on attempt {numAttempts}, which is excessive; {details}"); } return; @@ -193,7 +191,7 @@ public void PerformWithRetries( stopwatch.Stop(); if (BlobUtils.IsTimeout(e)) { - this.TraceHelper.FasterPerfWarning($"storage operation {name} ({intent}) timed out on attempt {numAttempts} after {stopwatch.Elapsed.TotalSeconds:F1}s, retrying now; target={target} {data}"); + this.TraceHelper.FasterPerfWarning($"storage operation {name} ({intent}) timed out on attempt {numAttempts} after {stopwatch.Elapsed.TotalSeconds:F1}s, retrying now; target={target} {details}"); } else { diff --git a/src/DurableTask.Netherite/StorageLayer/Faster/FasterTraceHelper.cs b/src/DurableTask.Netherite/StorageLayer/Faster/FasterTraceHelper.cs index 480ab694..e42650f9 100644 --- a/src/DurableTask.Netherite/StorageLayer/Faster/FasterTraceHelper.cs +++ b/src/DurableTask.Netherite/StorageLayer/Faster/FasterTraceHelper.cs @@ -146,13 +146,13 @@ public void FasterStorageProgress(string details) } } - public void FasterAzureStorageAccessCompleted(string intent, long position, long size, string operation, string target, double latency, int attempt) + public void FasterAzureStorageAccessCompleted(string intent, long size, string operation, string details, string target, double latency, int attempt) { if (this.logLevelLimit <= LogLevel.Debug) { - this.logger.LogDebug("Part{partition:D2} storage access completed intent={intent} position={position} size={size} operation={operation} target={target} latency={latency} attempt={attempt}", - this.partitionId, intent, position, size, operation, target, latency, attempt); - EtwSource.Log.FasterAzureStorageAccessCompleted(this.account, this.taskHub, this.partitionId, intent, position, size, operation, target, latency, attempt, TraceUtils.AppName, TraceUtils.ExtensionVersion); + this.logger.LogDebug("Part{partition:D2} storage access completed intent={intent} size={size} operation={operation} {details} target={target} latency={latency} attempt={attempt}", + this.partitionId, intent, size, operation, details, target, latency, attempt); + EtwSource.Log.FasterAzureStorageAccessCompleted(this.account, this.taskHub, this.partitionId, intent, size, operation, details, target, latency, attempt, TraceUtils.AppName, TraceUtils.ExtensionVersion); } } diff --git a/src/DurableTask.Netherite/Tracing/EtwSource.cs b/src/DurableTask.Netherite/Tracing/EtwSource.cs index ee00531d..2d5cf6d9 100644 --- a/src/DurableTask.Netherite/Tracing/EtwSource.cs +++ b/src/DurableTask.Netherite/Tracing/EtwSource.cs @@ -379,10 +379,10 @@ public void FasterStorageProgress(string Account, string TaskHub, int PartitionI } [Event(266, Level = EventLevel.Verbose, Version = 3)] - public void FasterAzureStorageAccessCompleted(string Account, string TaskHub, int PartitionId, string Intent, long Position, long Size, string Operation, string Target, double Latency, int Attempt, string AppName, string ExtensionVersion) + public void FasterAzureStorageAccessCompleted(string Account, string TaskHub, int PartitionId, string Intent,long Size, string Operation, string Details, string Target, double Latency, int Attempt, string AppName, string ExtensionVersion) { SetCurrentThreadActivityId(serviceInstanceId); - this.WriteEvent(266, Account, TaskHub, PartitionId, Intent, Position, Size, Operation, Target, Latency, Attempt, AppName, ExtensionVersion); + this.WriteEvent(266, Account, TaskHub, PartitionId, Intent, Size, Operation, Details, Target, Latency, Attempt, AppName, ExtensionVersion); } [Event(267, Level = EventLevel.Warning, Version = 1)]