Skip to content

Commit

Permalink
Sql-formatter optimization (#368)
Browse files Browse the repository at this point in the history
* Optimize sql-formatter (1) - avoid unnecessary string allocation when tokenizing and formatting

* Optimize sql-formatter (2) - remove RegexOptions.Compiled

* cleanup

* cleanup

* revert method access level to internal in TokenHelper

* add regex default match timeout & compiled option to static regex fields
  • Loading branch information
akarboush authored Feb 12, 2022
1 parent ff0b23b commit cc455ff
Show file tree
Hide file tree
Showing 19 changed files with 375 additions and 345 deletions.
312 changes: 169 additions & 143 deletions src/dev/impl/DevToys/Helpers/SqlFormatter/Core/Formatter.cs

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions src/dev/impl/DevToys/Helpers/SqlFormatter/Core/InlineBlock.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ internal sealed class InlineBlock
/// </summary>
/// <param name="tokens">Array of all tokens</param>
/// <param name="index">Current token position</param>
internal void BeginIfPossible(IReadOnlyList<Token> tokens, int index)
internal void BeginIfPossible(IReadOnlyList<Token> tokens, int index, ReadOnlySpan<char> valueSpan)
{
if (_level == 0 && IsInlineBlock(tokens, index))
if (_level == 0 && IsInlineBlock(tokens, index, valueSpan))
{
_level = 1;
}
Expand Down Expand Up @@ -59,15 +59,15 @@ internal bool IsActive()
/// Check if this should be an inline parentheses block.
/// Examples are "NOW()", "COUNT(*)", "int(10)", key(`somecolumn`), DECIMAL(7,2)
/// </summary>
private bool IsInlineBlock(IReadOnlyList<Token> tokens, int index)
private bool IsInlineBlock(IReadOnlyList<Token> tokens, int index, ReadOnlySpan<char> valueSpan)
{
int length = 0;
int level = 0;

for (int i = index; i < tokens.Count; i++)
{
Token token = tokens[i];
length += token.Value.Length;
length += token.Length;

// Overran max length
if (length > InlineMaxLength)
Expand All @@ -88,7 +88,7 @@ private bool IsInlineBlock(IReadOnlyList<Token> tokens, int index)
}
}

if (IsForbiddenToken(token))
if (IsForbiddenToken(token, valueSpan))
{
return false;
}
Expand All @@ -99,14 +99,14 @@ private bool IsInlineBlock(IReadOnlyList<Token> tokens, int index)
/// <summary>
/// Reserved words that cause newlines, comments and semicolons are not allowed inside inline parentheses block
/// </summary>
private bool IsForbiddenToken(Token token)
private bool IsForbiddenToken(Token token, ReadOnlySpan<char> valueSpan)
{
return
token.Type == TokenType.ReservedTopLevel
|| token.Type == TokenType.ReservedNewLine
// || token.Type == TokenType.LineComment
|| token.Type == TokenType.BlockComment
|| string.Equals(token.Value, ";", StringComparison.Ordinal);
|| (token.Length == 1 && valueSpan[0] == ';');
}
}
}
11 changes: 6 additions & 5 deletions src/dev/impl/DevToys/Helpers/SqlFormatter/Core/Params.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,20 @@ public Params(IReadOnlyDictionary<string, string>? parameters)
_params = parameters;
}

internal string? Get(Token token)
internal string? Get(string key)
{
if (_params is null)
{
return token.Value;
return null;
}

if (!string.IsNullOrEmpty(token.Key))
if (key is not null && key.Length != 0)
{
return _params[token.Key!];
_params.TryGetValue(key, out string? paramValue);
return paramValue;
}

return _params.Values.ToArray()[_index++];
return _params.ElementAtOrDefault(_index++).Value ?? null;
}
}
}
25 changes: 13 additions & 12 deletions src/dev/impl/DevToys/Helpers/SqlFormatter/Core/RegexFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ namespace DevToys.Helpers.SqlFormatter.Core
{
internal static class RegexFactory
{
private static readonly Regex SpecialCharacterRegex = new(@"[.*+?^${}()|[\]\\]", RegexOptions.Compiled);
internal readonly static TimeSpan DefaultMatchTimeout = TimeSpan.FromSeconds(1);
private static readonly Regex SpecialCharacterRegex = new(@"[.*+?^${}()|[\]\\]", RegexOptions.Compiled, DefaultMatchTimeout);
private static readonly Dictionary<string, string> Patterns = new Dictionary<string, string>()
{
{ "``", "((`[^`]*($|`))+)" },
Expand All @@ -29,33 +30,33 @@ internal static Regex CreateOperatorRegex(IEnumerable<string> multiLetterOperato
IOrderedEnumerable<string> sortedOperators = SortByLengthDesc(multiLetterOperators);
IEnumerable<string> escapedOperators = sortedOperators.Select(item => EscapeSpecialCharacters(item));
string operators = string.Join("|", escapedOperators);
return new Regex(@$"^({operators}|.)", RegexOptions.Compiled);
return new Regex(@$"^({operators}|.)", RegexOptions.None, DefaultMatchTimeout);
}

internal static Regex CreateLineCommentRegex(string[] lineCommentTypes)
{
return new Regex("^((?:" + string.Join('|', lineCommentTypes.Select(item => EscapeSpecialCharacters(item))) + ").*?)(?:\\r\\n|\\r|\\n|$)", RegexOptions.Compiled | RegexOptions.Singleline);
return new Regex($"^((?:{string.Join('|', lineCommentTypes.Select(item => EscapeSpecialCharacters(item)))}).*?)(?:\\r\\n|\\r|\\n|$)", RegexOptions.Singleline, DefaultMatchTimeout);
}

internal static Regex CreateReservedWordRegex(string[] reservedWords)
{
if (reservedWords.Length == 0)
{
return new Regex(@"^\b$", RegexOptions.Compiled);
return new Regex(@"^\b$", RegexOptions.None, DefaultMatchTimeout);
}

string reservedWordsPattern = string.Join('|', SortByLengthDesc(reservedWords)).Replace(" ", "\\s+");
return new Regex(@$"^({reservedWordsPattern})\b", RegexOptions.IgnoreCase | RegexOptions.Compiled);
return new Regex(@$"^({reservedWordsPattern})\b", RegexOptions.IgnoreCase, DefaultMatchTimeout);
}

internal static Regex CreateWordRegex(string[] specialCharacters)
{
return new Regex(@"^([\p{L}\p{M}\p{Nd}\p{Pc}\p{Cf}\p{Cs}\p{Co}" + string.Join(string.Empty, specialCharacters) + "]+)", RegexOptions.Compiled);
return new Regex(@"^([\p{L}\p{M}\p{Nd}\p{Pc}\p{Cf}\p{Cs}\p{Co}" + $"{string.Join(string.Empty, specialCharacters)}]+)", RegexOptions.None, DefaultMatchTimeout);
}

internal static Regex CreateStringRegex(string[] stringTypes)
{
return new Regex("^(" + CreateStringPattern(stringTypes) + ")", RegexOptions.Compiled);
return new Regex($"^({CreateStringPattern(stringTypes)})", RegexOptions.None, DefaultMatchTimeout);
}

/// <summary>
Expand All @@ -74,21 +75,21 @@ internal static string CreateStringPattern(string[] stringTypes)
return string.Join('|', stringTypes.Select(item => Patterns[item]));
}

internal static Regex? CreatePlaceholderRegex(string[] types, string pattern)
internal static Regex? CreatePlaceholderRegex(char[] types, string pattern)
{
if (types is null || types.Length == 0)
{
return null;
}

string typesRegex = string.Join('|', types.Select(item => EscapeSpecialCharacters(item)));
string typesRegex = string.Join('|', types.Select(item => EscapeSpecialCharacters(item.ToString())));

return new Regex("^((?:" + typesRegex + ")(?:" + pattern + "))", RegexOptions.Compiled);
return new Regex($"^((?:{typesRegex})(?:{pattern}))", RegexOptions.None, DefaultMatchTimeout);
}

internal static Regex CreateParenRegex(string[] parens)
{
return new Regex("^(" + string.Join('|', parens.Select(item => EscapeParen(item))) + ")", RegexOptions.IgnoreCase | RegexOptions.Compiled);
return new Regex($"^({string.Join('|', parens.Select(item => EscapeParen(item)))})", RegexOptions.IgnoreCase, DefaultMatchTimeout);
}

private static string EscapeParen(string paren)
Expand All @@ -101,7 +102,7 @@ private static string EscapeParen(string paren)
else
{
// longer word
return "\\b" + paren + "\\b";
return $"\\b{paren}\\b";
}
}

Expand Down
19 changes: 9 additions & 10 deletions src/dev/impl/DevToys/Helpers/SqlFormatter/Core/Token.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,19 @@

namespace DevToys.Helpers.SqlFormatter.Core
{
internal sealed class Token
internal struct Token
{
internal string Value { get; }
internal readonly int Index { get; }
internal readonly int Length { get; }
internal int PrecedingWitespaceLength { get; set; }
internal readonly TokenType Type { get; }

internal TokenType Type { get; }

internal string? WhitespaceBefore { get; set; }

internal string? Key { get; set; }

public Token(string value, TokenType type)
public Token(int index, int length, TokenType type, int precedingWitespaceLength = 0)
{
Value = value;
Index = index;
Length = length;
Type = type;
PrecedingWitespaceLength = precedingWitespaceLength;
}
}
}
43 changes: 18 additions & 25 deletions src/dev/impl/DevToys/Helpers/SqlFormatter/Core/TokenHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,53 +7,46 @@ namespace DevToys.Helpers.SqlFormatter.Core
{
internal static class TokenHelper
{
private static readonly TimeSpan TimeOut = TimeSpan.FromMilliseconds(500);
private static readonly Regex AndRegex = new("^AND$", RegexOptions.IgnoreCase | RegexOptions.Compiled, TimeOut);
private static readonly Regex BetweenRegex = new("^BETWEEN$", RegexOptions.IgnoreCase | RegexOptions.Compiled, TimeOut);
private static readonly Regex LimitRegex = new("^LIMIT$", RegexOptions.IgnoreCase | RegexOptions.Compiled, TimeOut);
private static readonly Regex SetRegex = new("^SET$", RegexOptions.IgnoreCase | RegexOptions.Compiled, TimeOut);
private static readonly Regex ByRegex = new("^BY$", RegexOptions.IgnoreCase | RegexOptions.Compiled, TimeOut);
private static readonly Regex WindowRegex = new("^WINDOW$", RegexOptions.IgnoreCase | RegexOptions.Compiled, TimeOut);
private static readonly Regex EndRegex = new("^END$", RegexOptions.IgnoreCase | RegexOptions.Compiled, TimeOut);

internal static bool IsAnd(Token? token)
internal static bool IsAnd(this Token token, ReadOnlySpan<char> tokenValueSpan)
{
return IsToken(token, TokenType.ReservedNewLine, AndRegex);
return IsToken(token.Type, TokenType.ReservedNewLine, tokenValueSpan, "AND".AsSpan());
}

internal static bool isBetween(Token? token)
internal static bool IsBetween(this Token token, ReadOnlySpan<char> tokenValueSpan)
{
return IsToken(token, TokenType.Reserved, BetweenRegex);
return IsToken(token.Type, TokenType.Reserved, tokenValueSpan, "BETWEEN".AsSpan());
}

internal static bool isLimit(Token? token)
internal static bool IsLimit(this Token token, ReadOnlySpan<char> tokenValueSpan)
{
return IsToken(token, TokenType.ReservedTopLevel, LimitRegex);
return IsToken(token.Type, TokenType.ReservedTopLevel, tokenValueSpan, "LIMIT".AsSpan());
}

internal static bool isSet(Token? token)
internal static bool IsSet(this Token token, ReadOnlySpan<char> tokenValueSpan)
{
return IsToken(token, TokenType.ReservedTopLevel, SetRegex);
return IsToken(token.Type, TokenType.ReservedTopLevel, tokenValueSpan, "SET".AsSpan());
}

internal static bool isBy(Token? token)
internal static bool IsBy(this Token token, ReadOnlySpan<char> tokenValueSpan)
{
return IsToken(token, TokenType.Reserved, ByRegex);
return IsToken(token.Type, TokenType.Reserved, tokenValueSpan, "BY".AsSpan());
}

internal static bool isWindow(Token? token)
internal static bool IsWindow(this Token token, ReadOnlySpan<char> tokenValueSpan)
{
return IsToken(token, TokenType.ReservedTopLevel, WindowRegex);
return IsToken(token.Type, TokenType.ReservedTopLevel, tokenValueSpan, "WINDOW".AsSpan());
}

internal static bool isEnd(Token? token)
internal static bool IsEnd(this Token token, ReadOnlySpan<char> tokenValueSpan)
{
return IsToken(token, TokenType.CloseParen, EndRegex);
return IsToken(token.Type, TokenType.CloseParen, tokenValueSpan, "END".AsSpan());
}

private static bool IsToken(Token? token, TokenType type, Regex regex)
private static bool IsToken(TokenType type, TokenType otherType,
ReadOnlySpan<char> tokenValueSpan, ReadOnlySpan<char> otherSpan)
{
return token?.Type == type && regex.IsMatch(token?.Value);
return type == otherType &&
tokenValueSpan.Equals(otherSpan, StringComparison.OrdinalIgnoreCase);
}
}
}
Loading

0 comments on commit cc455ff

Please sign in to comment.