From 390e49ff96f4587f3c6041169e0ee63ae72e1b39 Mon Sep 17 00:00:00 2001 From: Heejae Chang Date: Mon, 1 Jun 2015 20:01:50 -0700 Subject: [PATCH] make formatting performance better this performance improvement is particularly for devdiv bug # 1089540 this makes the file in the bug to be formatted in several seconds compared to several minutes on my machine. there were several issues. each one fixed by #1, use concurrency on gathering operations. #2, don't use too much time to split work to chunks if that requires more work than actually formatting. #3, don't blindly set beginning of a file as inseparable start point for certain formatting options. ... but these don't actually address the most impactful root cause of this perf issues. which is perf issue of GetPrevious/GetNextToken API in compiler. (https://github.com/dotnet/roslyn/issues/3244) formatter internally uses GetDescendantTokens to get all tokens at once and cache them which takes less than 1 second for the entire file (2M bytes size) in the bug. and use the cache internally. but certain part of formatter (Rule Provider) can't use that internal cache, so it has to use the GetPrevious/GetNextToken to move around tokens, which in this particular bug, takes more than 40 seconds on my machine. and that is not even for entire file. (less than 1/12 of tokens) I opened a bug to compiler team, hopely so that we can get better perf on those APIs. in this PR, I mitigated the issue either by making more things to run concurrently or by changing logic which requires those APIs. --- .../Formatting/Context/FormattingContext.cs | 44 ++++--- .../AbstractFormatEngine.Partitioner.cs | 111 +++++++++++------- .../Formatting/Engine/AbstractFormatEngine.cs | 38 +++--- .../Core/Portable/Log/FunctionId.cs | 3 +- 4 files changed, 123 insertions(+), 73 deletions(-) diff --git a/src/Workspaces/Core/Portable/Formatting/Context/FormattingContext.cs b/src/Workspaces/Core/Portable/Formatting/Context/FormattingContext.cs index e812a7160aa8d..3eaf74b97ce7b 100644 --- a/src/Workspaces/Core/Portable/Formatting/Context/FormattingContext.cs +++ b/src/Workspaces/Core/Portable/Formatting/Context/FormattingContext.cs @@ -198,7 +198,7 @@ public void AddIndentBlockOperation(IndentBlockOperation operation) // relative indentation case where indentation depends on other token if (operation.IsRelativeIndentation) { - var inseparableRegionStartingPosition = operation.Option.IsOn(IndentBlockOption.RelativeToFirstTokenOnBaseTokenLine) ? 0 : operation.BaseToken.FullSpan.Start; + var inseparableRegionStartingPosition = operation.Option.IsOn(IndentBlockOption.RelativeToFirstTokenOnBaseTokenLine) ? _tokenStream.FirstTokenOfBaseTokenLine(operation.BaseToken).FullSpan.Start : operation.BaseToken.FullSpan.Start; var relativeIndentationGetter = new Lazy(() => { var indentationDelta = operation.IndentationDeltaOrPosition * this.OptionSet.GetOption(FormattingOptions.IndentationSize, _language); @@ -409,25 +409,37 @@ public IEnumerable GetAllRelativeIndentBlockOperations() return _relativeIndentationTree.GetIntersectingInOrderIntervals(this.TreeData.StartPosition, this.TreeData.EndPosition, this).Select(i => i.Operation); } - public SyntaxToken GetEndTokenForRelativeIndentationSpan(SyntaxToken token) + public bool TryGetEndTokenForRelativeIndentationSpan(SyntaxToken token, int maxChainDepth, out SyntaxToken endToken, CancellationToken cancellationToken) { - var span = token.Span; - var indentationData = _relativeIndentationTree.GetSmallestContainingInterval(span.Start, 0); - if (indentationData == null) - { - // this means the given token is not inside of inseparable regions - return token; - } + endToken = default(SyntaxToken); - // recursively find the end token outside of inseparable regions - var nextToken = indentationData.EndToken.GetNextToken(includeZeroWidth: true); - if (nextToken.RawKind == 0) + var depth = 0; + while (true) { - // reached end of tree - return default(SyntaxToken); - } + cancellationToken.ThrowIfCancellationRequested(); + + if (depth++ > maxChainDepth) + { + return false; + } + + var span = token.Span; + var indentationData = _relativeIndentationTree.GetSmallestContainingInterval(span.Start, 0); + if (indentationData == null) + { + // this means the given token is not inside of inseparable regions + endToken = token; + return true; + } - return GetEndTokenForRelativeIndentationSpan(nextToken); + // recursively find the end token outside of inseparable regions + token = indentationData.EndToken.GetNextToken(includeZeroWidth: true); + if (token.RawKind == 0) + { + // reached end of tree + return true; + } + } } private AnchorData GetAnchorData(SyntaxToken token) diff --git a/src/Workspaces/Core/Portable/Formatting/Engine/AbstractFormatEngine.Partitioner.cs b/src/Workspaces/Core/Portable/Formatting/Engine/AbstractFormatEngine.Partitioner.cs index 42afb2618277e..9f7b013bc2699 100644 --- a/src/Workspaces/Core/Portable/Formatting/Engine/AbstractFormatEngine.Partitioner.cs +++ b/src/Workspaces/Core/Portable/Formatting/Engine/AbstractFormatEngine.Partitioner.cs @@ -2,7 +2,8 @@ using System; using System.Collections.Generic; -using Microsoft.CodeAnalysis.Text; +using System.Threading; +using Microsoft.CodeAnalysis.Internal.Log; using Roslyn.Utilities; namespace Microsoft.CodeAnalysis.Formatting @@ -28,60 +29,88 @@ public Partitioner(FormattingContext context, TokenStream tokenStream, TokenPair _operationPairs = operationPairs; } - public List> GetPartitions(int partitionCount) + public List> GetPartitions(int partitionCount, CancellationToken cancellationToken) { - Contract.ThrowIfFalse(partitionCount > 0); - - var list = new List>(); - - // too small items in the list. give out one list - int perPartition = _operationPairs.Length / partitionCount; - if (perPartition < 10 || partitionCount <= 1 || _operationPairs.Length < MinimumItemsPerPartition) + using (Logger.LogBlock(FunctionId.Formatting_Partitions, cancellationToken)) { - list.Add(GetOperationPairsFromTo(0, _operationPairs.Length)); - return list; - } + Contract.ThrowIfFalse(partitionCount > 0); - // split items up to the partition count with about same number of items if possible - // this algorithm has one problem. if there is an operation that marked whole tree - // as inseparable region, then it wouldn't go into the inseparable regions to find - // local parts that could run concurrently; which means everything will run - // synchronously. - var currentOperationIndex = 0; - while (currentOperationIndex < _operationPairs.Length) - { - var nextPartitionStartOperationIndex = Math.Min(currentOperationIndex + perPartition, _operationPairs.Length); - if (nextPartitionStartOperationIndex >= _operationPairs.Length) + var list = new List>(); + + // too small items in the list. give out one list + int perPartition = _operationPairs.Length / partitionCount; + if (perPartition < 10 || partitionCount <= 1 || _operationPairs.Length < MinimumItemsPerPartition) { - // reached end of operation pairs - list.Add(GetOperationPairsFromTo(currentOperationIndex, _operationPairs.Length)); - break; + list.Add(GetOperationPairsFromTo(0, _operationPairs.Length)); + return list; } - var nextToken = _context.GetEndTokenForRelativeIndentationSpan(_operationPairs[nextPartitionStartOperationIndex].Token1); - if (nextToken.RawKind == 0) + // split items up to the partition count with about same number of items if possible + // this algorithm has one problem. if there is an operation that marked whole tree + // as inseparable region, then it wouldn't go into the inseparable regions to find + // local parts that could run concurrently; which means everything will run + // synchronously. + var currentOperationIndex = 0; + while (currentOperationIndex < _operationPairs.Length) { - // reached the last token in the tree - list.Add(GetOperationPairsFromTo(currentOperationIndex, _operationPairs.Length)); - break; + int nextPartitionStartOperationIndex; + if (!TryGetNextPartitionIndex(currentOperationIndex, perPartition, out nextPartitionStartOperationIndex)) + { + // reached end of operation pairs + list.Add(GetOperationPairsFromTo(currentOperationIndex, _operationPairs.Length)); + break; + } + + var nextToken = GetNextPartitionToken(nextPartitionStartOperationIndex, perPartition, cancellationToken); + if (nextToken.RawKind == 0) + { + // reached the last token in the tree + list.Add(GetOperationPairsFromTo(currentOperationIndex, _operationPairs.Length)); + break; + } + + var nextTokenWithIndex = _tokenStream.GetTokenData(nextToken); + if (nextTokenWithIndex.IndexInStream < 0) + { + // first token for next partition is out side of valid token stream + list.Add(GetOperationPairsFromTo(currentOperationIndex, _operationPairs.Length)); + break; + } + + Contract.ThrowIfFalse(currentOperationIndex < nextTokenWithIndex.IndexInStream); + Contract.ThrowIfFalse(nextTokenWithIndex.IndexInStream <= _operationPairs.Length); + + list.Add(GetOperationPairsFromTo(currentOperationIndex, nextTokenWithIndex.IndexInStream)); + currentOperationIndex = nextTokenWithIndex.IndexInStream; } - var nextTokenWithIndex = _tokenStream.GetTokenData(nextToken); - if (nextTokenWithIndex.IndexInStream < 0) + return list; + } + } + + private SyntaxToken GetNextPartitionToken(int index, int perPartition, CancellationToken cancellationToken) + { + while (true) + { + SyntaxToken nextToken; + if (_context.TryGetEndTokenForRelativeIndentationSpan(_operationPairs[index].Token1, 10, out nextToken, cancellationToken)) { - // first token for next partition is out side of valid token stream - list.Add(GetOperationPairsFromTo(currentOperationIndex, _operationPairs.Length)); - break; + return nextToken; } - Contract.ThrowIfFalse(currentOperationIndex < nextTokenWithIndex.IndexInStream); - Contract.ThrowIfFalse(nextTokenWithIndex.IndexInStream <= _operationPairs.Length); - - list.Add(GetOperationPairsFromTo(currentOperationIndex, nextTokenWithIndex.IndexInStream)); - currentOperationIndex = nextTokenWithIndex.IndexInStream; + // we couldn't determine how to split chunks in short time period. make partition bigger + if (!TryGetNextPartitionIndex(index, perPartition, out index)) + { + // reached end of operation pairs + return default(SyntaxToken); + } } + } - return list; + private bool TryGetNextPartitionIndex(int index, int perPartition, out int nextIndex) + { + nextIndex = Math.Min(index + perPartition, _operationPairs.Length); + return nextIndex < _operationPairs.Length; } private IEnumerable GetOperationPairsFromTo(int from, int to) diff --git a/src/Workspaces/Core/Portable/Formatting/Engine/AbstractFormatEngine.cs b/src/Workspaces/Core/Portable/Formatting/Engine/AbstractFormatEngine.cs index 82609c600af6a..ac6f211cbe903 100644 --- a/src/Workspaces/Core/Portable/Formatting/Engine/AbstractFormatEngine.cs +++ b/src/Workspaces/Core/Portable/Formatting/Engine/AbstractFormatEngine.cs @@ -211,26 +211,34 @@ protected virtual NodeOperations CreateNodeOperationTasks(CancellationToken canc private List AddOperations(List nodes, Action, SyntaxNode> addOperations, CancellationToken cancellationToken) { - var operations = new List(); - var list = new List(); - - for (int i = 0; i < nodes.Count; i++) + using (var localOperations = new ThreadLocal>(() => new List(), trackAllValues: true)) + using (var localList = new ThreadLocal>(() => new List(), trackAllValues: false)) { - cancellationToken.ThrowIfCancellationRequested(); - addOperations(list, nodes[i]); - - foreach (var element in list) + // find out which executor we want to use. + var taskExecutor = nodes.Count > (1000 * Environment.ProcessorCount) ? TaskExecutor.Concurrent : TaskExecutor.Synchronous; + taskExecutor.ForEach(nodes, n => { - if (element != null) + cancellationToken.ThrowIfCancellationRequested(); + + var list = localList.Value; + addOperations(list, n); + + foreach (var element in list) { - operations.Add(element); + if (element != null) + { + localOperations.Value.Add(element); + } } - } - list.Clear(); - } + list.Clear(); + }, cancellationToken); - return operations; + var operations = new List(localOperations.Values.Sum(v => v.Count)); + operations.AddRange(localOperations.Values.SelectMany(v => v)); + + return operations; + } } private Task CreateTokenOperationTask( @@ -441,7 +449,7 @@ private void ApplySpaceAndWrappingOperations( var partitioner = new Partitioner(context, tokenStream, tokenOperations); // always create task 1 more than current processor count - var partitions = partitioner.GetPartitions(this.TaskExecutor == TaskExecutor.Synchronous ? 1 : Environment.ProcessorCount + 1); + var partitions = partitioner.GetPartitions(this.TaskExecutor == TaskExecutor.Synchronous ? 1 : Environment.ProcessorCount + 1, cancellationToken); var tasks = new Task[partitions.Count]; for (int i = 0; i < partitions.Count; i++) diff --git a/src/Workspaces/Core/Portable/Log/FunctionId.cs b/src/Workspaces/Core/Portable/Log/FunctionId.cs index 3dbdffa6b2307..95350f5a8d114 100644 --- a/src/Workspaces/Core/Portable/Log/FunctionId.cs +++ b/src/Workspaces/Core/Portable/Log/FunctionId.cs @@ -118,6 +118,7 @@ internal enum FunctionId Formatting_AggregateCreateFormattedRoot, Formatting_CreateTextChanges, Formatting_CreateFormattedRoot, + Formatting_Partitions, SmartIndentation_Start, SmartIndentation_OpenCurly, @@ -303,6 +304,6 @@ internal enum FunctionId VirtualMemory_MemoryLow, Extension_Exception, - WorkCoordinator_WaitForHigherPriorityOperationsAsync, + WorkCoordinator_WaitForHigherPriorityOperationsAsync } }