Skip to content

Commit

Permalink
Merge pull request #3247 from heejaechang/formatperf
Browse files Browse the repository at this point in the history
make formatting performance better

...

this performance improvement is particularly for devdiv bug # 1089540

this makes the file in the bug to be formatted in several seconds compared to several minutes on my machine.

there were several issues. each one fixed by

#1, use concurrency on gathering operations.
#2, don't use too much time to split work to chunks if that requires more work than actually formatting.
#3, don't blindly set beginning of a file as inseparable start point for certain formatting options.

...

but these don't actually address the most impactful root cause of this perf issues. which is perf issue of GetPrevious/GetNextToken API in compiler.
(#3244)

formatter internally uses GetDescendantTokens to get all tokens at once and cache them which takes less than 1 second for the entire file (2M bytes size) in the bug. and use the cache internally.

but certain part of formatter (Rule Provider) can't use that internal cache, so it has to use the GetPrevious/GetNextToken to move around tokens, which in this particular bug, takes more than 40 seconds on my machine. and that is not even for entire file. (less than 1/12 of tokens)

I opened a bug to compiler team, hopely so that we can get better perf on those APIs.

in this PR, I mitigated the issue either by making more things to run concurrently or by changing logic which requires those APIs.
  • Loading branch information
heejaechang committed Jun 4, 2015
2 parents c3fdfb1 + e952752 commit 0382e3e
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 73 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ public void AddIndentBlockOperation(IndentBlockOperation operation)
// relative indentation case where indentation depends on other token
if (operation.IsRelativeIndentation)
{
var inseparableRegionStartingPosition = operation.Option.IsOn(IndentBlockOption.RelativeToFirstTokenOnBaseTokenLine) ? 0 : operation.BaseToken.FullSpan.Start;
var inseparableRegionStartingPosition = operation.Option.IsOn(IndentBlockOption.RelativeToFirstTokenOnBaseTokenLine) ? _tokenStream.FirstTokenOfBaseTokenLine(operation.BaseToken).FullSpan.Start : operation.BaseToken.FullSpan.Start;
var relativeIndentationGetter = new Lazy<int>(() =>
{
var indentationDelta = operation.IndentationDeltaOrPosition * this.OptionSet.GetOption(FormattingOptions.IndentationSize, _language);
Expand Down Expand Up @@ -409,25 +409,37 @@ public IEnumerable<IndentBlockOperation> GetAllRelativeIndentBlockOperations()
return _relativeIndentationTree.GetIntersectingInOrderIntervals(this.TreeData.StartPosition, this.TreeData.EndPosition, this).Select(i => i.Operation);
}

public SyntaxToken GetEndTokenForRelativeIndentationSpan(SyntaxToken token)
public bool TryGetEndTokenForRelativeIndentationSpan(SyntaxToken token, int maxChainDepth, out SyntaxToken endToken, CancellationToken cancellationToken)
{
var span = token.Span;
var indentationData = _relativeIndentationTree.GetSmallestContainingInterval(span.Start, 0);
if (indentationData == null)
{
// this means the given token is not inside of inseparable regions
return token;
}
endToken = default(SyntaxToken);

// recursively find the end token outside of inseparable regions
var nextToken = indentationData.EndToken.GetNextToken(includeZeroWidth: true);
if (nextToken.RawKind == 0)
var depth = 0;
while (true)
{
// reached end of tree
return default(SyntaxToken);
}
cancellationToken.ThrowIfCancellationRequested();

if (depth++ > maxChainDepth)
{
return false;
}

var span = token.Span;
var indentationData = _relativeIndentationTree.GetSmallestContainingInterval(span.Start, 0);
if (indentationData == null)
{
// this means the given token is not inside of inseparable regions
endToken = token;
return true;
}

return GetEndTokenForRelativeIndentationSpan(nextToken);
// recursively find the end token outside of inseparable regions
token = indentationData.EndToken.GetNextToken(includeZeroWidth: true);
if (token.RawKind == 0)
{
// reached end of tree
return true;
}
}
}

private AnchorData GetAnchorData(SyntaxToken token)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

using System;
using System.Collections.Generic;
using Microsoft.CodeAnalysis.Text;
using System.Threading;
using Microsoft.CodeAnalysis.Internal.Log;
using Roslyn.Utilities;

namespace Microsoft.CodeAnalysis.Formatting
Expand All @@ -28,60 +29,88 @@ public Partitioner(FormattingContext context, TokenStream tokenStream, TokenPair
_operationPairs = operationPairs;
}

public List<IEnumerable<TokenPairWithOperations>> GetPartitions(int partitionCount)
public List<IEnumerable<TokenPairWithOperations>> GetPartitions(int partitionCount, CancellationToken cancellationToken)
{
Contract.ThrowIfFalse(partitionCount > 0);

var list = new List<IEnumerable<TokenPairWithOperations>>();

// too small items in the list. give out one list
int perPartition = _operationPairs.Length / partitionCount;
if (perPartition < 10 || partitionCount <= 1 || _operationPairs.Length < MinimumItemsPerPartition)
using (Logger.LogBlock(FunctionId.Formatting_Partitions, cancellationToken))
{
list.Add(GetOperationPairsFromTo(0, _operationPairs.Length));
return list;
}
Contract.ThrowIfFalse(partitionCount > 0);

// split items up to the partition count with about same number of items if possible
// this algorithm has one problem. if there is an operation that marked whole tree
// as inseparable region, then it wouldn't go into the inseparable regions to find
// local parts that could run concurrently; which means everything will run
// synchronously.
var currentOperationIndex = 0;
while (currentOperationIndex < _operationPairs.Length)
{
var nextPartitionStartOperationIndex = Math.Min(currentOperationIndex + perPartition, _operationPairs.Length);
if (nextPartitionStartOperationIndex >= _operationPairs.Length)
var list = new List<IEnumerable<TokenPairWithOperations>>();

// too small items in the list. give out one list
int perPartition = _operationPairs.Length / partitionCount;
if (perPartition < 10 || partitionCount <= 1 || _operationPairs.Length < MinimumItemsPerPartition)
{
// reached end of operation pairs
list.Add(GetOperationPairsFromTo(currentOperationIndex, _operationPairs.Length));
break;
list.Add(GetOperationPairsFromTo(0, _operationPairs.Length));
return list;
}

var nextToken = _context.GetEndTokenForRelativeIndentationSpan(_operationPairs[nextPartitionStartOperationIndex].Token1);
if (nextToken.RawKind == 0)
// split items up to the partition count with about same number of items if possible
// this algorithm has one problem. if there is an operation that marked whole tree
// as inseparable region, then it wouldn't go into the inseparable regions to find
// local parts that could run concurrently; which means everything will run
// synchronously.
var currentOperationIndex = 0;
while (currentOperationIndex < _operationPairs.Length)
{
// reached the last token in the tree
list.Add(GetOperationPairsFromTo(currentOperationIndex, _operationPairs.Length));
break;
int nextPartitionStartOperationIndex;
if (!TryGetNextPartitionIndex(currentOperationIndex, perPartition, out nextPartitionStartOperationIndex))
{
// reached end of operation pairs
list.Add(GetOperationPairsFromTo(currentOperationIndex, _operationPairs.Length));
break;
}

var nextToken = GetNextPartitionToken(nextPartitionStartOperationIndex, perPartition, cancellationToken);
if (nextToken.RawKind == 0)
{
// reached the last token in the tree
list.Add(GetOperationPairsFromTo(currentOperationIndex, _operationPairs.Length));
break;
}

var nextTokenWithIndex = _tokenStream.GetTokenData(nextToken);
if (nextTokenWithIndex.IndexInStream < 0)
{
// first token for next partition is out side of valid token stream
list.Add(GetOperationPairsFromTo(currentOperationIndex, _operationPairs.Length));
break;
}

Contract.ThrowIfFalse(currentOperationIndex < nextTokenWithIndex.IndexInStream);
Contract.ThrowIfFalse(nextTokenWithIndex.IndexInStream <= _operationPairs.Length);

list.Add(GetOperationPairsFromTo(currentOperationIndex, nextTokenWithIndex.IndexInStream));
currentOperationIndex = nextTokenWithIndex.IndexInStream;
}

var nextTokenWithIndex = _tokenStream.GetTokenData(nextToken);
if (nextTokenWithIndex.IndexInStream < 0)
return list;
}
}

private SyntaxToken GetNextPartitionToken(int index, int perPartition, CancellationToken cancellationToken)
{
while (true)
{
SyntaxToken nextToken;
if (_context.TryGetEndTokenForRelativeIndentationSpan(_operationPairs[index].Token1, 10, out nextToken, cancellationToken))
{
// first token for next partition is out side of valid token stream
list.Add(GetOperationPairsFromTo(currentOperationIndex, _operationPairs.Length));
break;
return nextToken;
}

Contract.ThrowIfFalse(currentOperationIndex < nextTokenWithIndex.IndexInStream);
Contract.ThrowIfFalse(nextTokenWithIndex.IndexInStream <= _operationPairs.Length);

list.Add(GetOperationPairsFromTo(currentOperationIndex, nextTokenWithIndex.IndexInStream));
currentOperationIndex = nextTokenWithIndex.IndexInStream;
// we couldn't determine how to split chunks in short time period. make partition bigger
if (!TryGetNextPartitionIndex(index, perPartition, out index))
{
// reached end of operation pairs
return default(SyntaxToken);
}
}
}

return list;
private bool TryGetNextPartitionIndex(int index, int perPartition, out int nextIndex)
{
nextIndex = Math.Min(index + perPartition, _operationPairs.Length);
return nextIndex < _operationPairs.Length;
}

private IEnumerable<TokenPairWithOperations> GetOperationPairsFromTo(int from, int to)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,26 +211,34 @@ protected virtual NodeOperations CreateNodeOperationTasks(CancellationToken canc

private List<T> AddOperations<T>(List<SyntaxNode> nodes, Action<List<T>, SyntaxNode> addOperations, CancellationToken cancellationToken)
{
var operations = new List<T>();
var list = new List<T>();

for (int i = 0; i < nodes.Count; i++)
using (var localOperations = new ThreadLocal<List<T>>(() => new List<T>(), trackAllValues: true))
using (var localList = new ThreadLocal<List<T>>(() => new List<T>(), trackAllValues: false))
{
cancellationToken.ThrowIfCancellationRequested();
addOperations(list, nodes[i]);

foreach (var element in list)
// find out which executor we want to use.
var taskExecutor = nodes.Count > (1000 * Environment.ProcessorCount) ? TaskExecutor.Concurrent : TaskExecutor.Synchronous;
taskExecutor.ForEach(nodes, n =>
{
if (element != null)
cancellationToken.ThrowIfCancellationRequested();

var list = localList.Value;
addOperations(list, n);

foreach (var element in list)
{
operations.Add(element);
if (element != null)
{
localOperations.Value.Add(element);
}
}
}

list.Clear();
}
list.Clear();
}, cancellationToken);

return operations;
var operations = new List<T>(localOperations.Values.Sum(v => v.Count));
operations.AddRange(localOperations.Values.SelectMany(v => v));

return operations;
}
}

private Task<TokenPairWithOperations[]> CreateTokenOperationTask(
Expand Down Expand Up @@ -441,7 +449,7 @@ private void ApplySpaceAndWrappingOperations(
var partitioner = new Partitioner(context, tokenStream, tokenOperations);

// always create task 1 more than current processor count
var partitions = partitioner.GetPartitions(this.TaskExecutor == TaskExecutor.Synchronous ? 1 : Environment.ProcessorCount + 1);
var partitions = partitioner.GetPartitions(this.TaskExecutor == TaskExecutor.Synchronous ? 1 : Environment.ProcessorCount + 1, cancellationToken);

var tasks = new Task[partitions.Count];
for (int i = 0; i < partitions.Count; i++)
Expand Down
3 changes: 2 additions & 1 deletion src/Workspaces/Core/Portable/Log/FunctionId.cs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ internal enum FunctionId
Formatting_AggregateCreateFormattedRoot,
Formatting_CreateTextChanges,
Formatting_CreateFormattedRoot,
Formatting_Partitions,

SmartIndentation_Start,
SmartIndentation_OpenCurly,
Expand Down Expand Up @@ -303,6 +304,6 @@ internal enum FunctionId
VirtualMemory_MemoryLow,
Extension_Exception,

WorkCoordinator_WaitForHigherPriorityOperationsAsync,
WorkCoordinator_WaitForHigherPriorityOperationsAsync
}
}

0 comments on commit 0382e3e

Please sign in to comment.