Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix emoji performance issue when multiple pipeline needed #308

Merged
merged 1 commit into from
Feb 20, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 38 additions & 42 deletions src/Markdig/Extensions/Emoji/EmojiParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ public class EmojiParser : InlineParser
{
private static readonly Dictionary<string, string> EmojiToUnicodeDefault;
private static readonly Dictionary<string, string> SmileyToEmojiDefault;
private static readonly CompactPrefixTree<string> EmojiPrefixTreeDefault;
private static readonly CompactPrefixTree<string> EmojiSmileyPrefixTreeDefault;
private static readonly char[] EmojiOpeningCharactersDefault;
private static readonly char[] EmojiSmileyOpeningCharactersDefault;

private CompactPrefixTree<string> _emojiPrefixTree;

Expand Down Expand Up @@ -55,45 +59,9 @@ public Dictionary<string, string> SmileyToEmoji
}

public override void Initialize()
{
// Don't allocate a new dictionary if we don't need it
var emojiToUnicode = _emojiToUnicode ?? EmojiToUnicodeDefault;

if (EnableSmiley)
{
// Don't allocate a new dictionary if we don't need it
var smileyToEmoji = _smileyToEmoji ?? SmileyToEmojiDefault;

int jointCount = emojiToUnicode.Count + smileyToEmoji.Count;
// Count * 2 seems to be a good fit for the data set
_emojiPrefixTree = new CompactPrefixTree<string>(jointCount, jointCount * 2);
foreach (var emoji in emojiToUnicode)
_emojiPrefixTree.Add(emoji);

// This is not the best data set for the prefix tree as it will have to check the first character linearly
// A work-around would require a bunch of substrings / removing the leading ':' from emojis, neither one is pretty
// This way we sacrifice a few microseconds for not introducing breaking changes, emojis aren't all that common anyhow

var firstChars = new HashSet<char> { ':' };

foreach (var smiley in smileyToEmoji)
{
if (!emojiToUnicode.TryGetValue(smiley.Value, out string unicode))
throw new ArgumentException("Invalid smiley target: {0} is not present in the emoji dictionary", smiley.Value);

firstChars.Add(smiley.Key[0]);

if (!_emojiPrefixTree.TryAdd(smiley.Key, unicode))
throw new ArgumentException("Smiley {0} is already present in the Emoji dictionary", smiley.Key);
}

OpeningCharacters = new List<char>(firstChars).ToArray();
}
else
{
OpeningCharacters = new[] { ':' };
_emojiPrefixTree = new CompactPrefixTree<string>(emojiToUnicode);
};
{
OpeningCharacters = EnableSmiley ? EmojiSmileyOpeningCharactersDefault : EmojiOpeningCharactersDefault;
_emojiPrefixTree = EnableSmiley ? EmojiSmileyPrefixTreeDefault : EmojiPrefixTreeDefault;
}

public override bool Match(InlineProcessor processor, ref StringSlice slice)
Expand Down Expand Up @@ -127,8 +95,8 @@ public override bool Match(InlineProcessor processor, ref StringSlice slice)
slice.Start += match.Key.Length;

return true;
}
}
#region Emojis and Smileys
static EmojiParser()
{
Expand Down Expand Up @@ -1091,7 +1059,35 @@ static EmojiParser()
{"<=", ":custom_arrow_left_strong:" },
{"=>", ":custom_arrow_right_strong:" },
{"<=>", ":custom_arrow_left_right_strong:" },
};
};

// Build Emoji and Smiley CompactPrefixTree
EmojiPrefixTreeDefault = new CompactPrefixTree<string>(EmojiToUnicodeDefault);

int jointCount = EmojiToUnicodeDefault.Count + SmileyToEmojiDefault.Count;
// Count * 2 seems to be a good fit for the data set
EmojiSmileyPrefixTreeDefault = new CompactPrefixTree<string>(jointCount, jointCount * 2);

// This is not the best data set for the prefix tree as it will have to check the first character linearly
// A work-around would require a bunch of substrings / removing the leading ':' from emojis, neither one is pretty
// This way we sacrifice a few microseconds for not introducing breaking changes, emojis aren't all that common anyhow

var firstChars = new HashSet<char> { ':' };
foreach (var emoji in EmojiToUnicodeDefault)
EmojiSmileyPrefixTreeDefault.Add(emoji);
foreach (var smiley in SmileyToEmojiDefault)
{
if (!EmojiToUnicodeDefault.TryGetValue(smiley.Value, out string unicode))
throw new ArgumentException("Invalid smiley target: {0} is not present in the emoji dictionary", smiley.Value);

firstChars.Add(smiley.Key[0]);

if (!EmojiSmileyPrefixTreeDefault.TryAdd(smiley.Key, unicode))
throw new ArgumentException("Smiley {0} is already present in the Emoji dictionary", smiley.Key);
}

EmojiOpeningCharactersDefault = new[] { ':' };
EmojiSmileyOpeningCharactersDefault = new List<char>(firstChars).ToArray();
}
#endregion
}
Expand Down