From 1681f77c9c77da1126c85d2c3d2fcd4fa94393ae Mon Sep 17 00:00:00 2001 From: Julius Hardt Date: Tue, 1 May 2018 15:14:50 +0200 Subject: [PATCH] Avoid substring allocations in WebUtility.HtmlDecode (#29402) * Avoid substring allocations in WebUtility.HtmlDecode * Update changes to HtmlDecode based on feedback - Use regular Dictionary instead of LowLevelDictionary - Use AsSpan overload instead of AsSpan() and Slice - Avoid shift by variable amount - Use helper method to generate keys to make the code easier to maintain * Assert that entity length is <= 8 in ToUInt64Key - Add assert to ToUInt64Key - Replace default with 0 --- .../src/System/Net/WebUtility.cs | 552 +++++++++--------- 1 file changed, 290 insertions(+), 262 deletions(-) diff --git a/src/System.Runtime.Extensions/src/System/Net/WebUtility.cs b/src/System.Runtime.Extensions/src/System/Net/WebUtility.cs index e968f3cbd7ba..1c0c0570fac9 100644 --- a/src/System.Runtime.Extensions/src/System/Net/WebUtility.cs +++ b/src/System.Runtime.Extensions/src/System/Net/WebUtility.cs @@ -196,11 +196,11 @@ private static void HtmlDecode(string value, StringBuilder output) uint parsedValue; if (value[entityOffset + 1] == 'x' || value[entityOffset + 1] == 'X') { - parsedSuccessfully = uint.TryParse(value.Substring(entityOffset + 2, entityLength - 2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out parsedValue); + parsedSuccessfully = uint.TryParse(value.AsSpan(entityOffset + 2, entityLength - 2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out parsedValue); } else { - parsedSuccessfully = uint.TryParse(value.Substring(entityOffset + 1, entityLength - 1), NumberStyles.Integer, CultureInfo.InvariantCulture, out parsedValue); + parsedSuccessfully = uint.TryParse(value.AsSpan(entityOffset + 1, entityLength - 1), NumberStyles.Integer, CultureInfo.InvariantCulture, out parsedValue); } if (parsedSuccessfully) @@ -231,7 +231,7 @@ private static void HtmlDecode(string value, StringBuilder output) } else { - string entity = value.Substring(entityOffset, entityLength); + ReadOnlySpan entity = value.AsSpan(entityOffset, entityLength); i = index; // already looked at everything until semicolon char entityChar = HtmlEntities.Lookup(entity); @@ -773,269 +773,297 @@ static HtmlEntities() private const int Count = 253; // maps entity strings => unicode chars - private static readonly LowLevelDictionary s_lookupTable = - new LowLevelDictionary(Count, StringComparer.Ordinal) + private static readonly Dictionary s_lookupTable = + new Dictionary(Count) { - ["quot"] = '\x0022', - ["amp"] = '\x0026', - ["apos"] = '\x0027', - ["lt"] = '\x003c', - ["gt"] = '\x003e', - ["nbsp"] = '\x00a0', - ["iexcl"] = '\x00a1', - ["cent"] = '\x00a2', - ["pound"] = '\x00a3', - ["curren"] = '\x00a4', - ["yen"] = '\x00a5', - ["brvbar"] = '\x00a6', - ["sect"] = '\x00a7', - ["uml"] = '\x00a8', - ["copy"] = '\x00a9', - ["ordf"] = '\x00aa', - ["laquo"] = '\x00ab', - ["not"] = '\x00ac', - ["shy"] = '\x00ad', - ["reg"] = '\x00ae', - ["macr"] = '\x00af', - ["deg"] = '\x00b0', - ["plusmn"] = '\x00b1', - ["sup2"] = '\x00b2', - ["sup3"] = '\x00b3', - ["acute"] = '\x00b4', - ["micro"] = '\x00b5', - ["para"] = '\x00b6', - ["middot"] = '\x00b7', - ["cedil"] = '\x00b8', - ["sup1"] = '\x00b9', - ["ordm"] = '\x00ba', - ["raquo"] = '\x00bb', - ["frac14"] = '\x00bc', - ["frac12"] = '\x00bd', - ["frac34"] = '\x00be', - ["iquest"] = '\x00bf', - ["Agrave"] = '\x00c0', - ["Aacute"] = '\x00c1', - ["Acirc"] = '\x00c2', - ["Atilde"] = '\x00c3', - ["Auml"] = '\x00c4', - ["Aring"] = '\x00c5', - ["AElig"] = '\x00c6', - ["Ccedil"] = '\x00c7', - ["Egrave"] = '\x00c8', - ["Eacute"] = '\x00c9', - ["Ecirc"] = '\x00ca', - ["Euml"] = '\x00cb', - ["Igrave"] = '\x00cc', - ["Iacute"] = '\x00cd', - ["Icirc"] = '\x00ce', - ["Iuml"] = '\x00cf', - ["ETH"] = '\x00d0', - ["Ntilde"] = '\x00d1', - ["Ograve"] = '\x00d2', - ["Oacute"] = '\x00d3', - ["Ocirc"] = '\x00d4', - ["Otilde"] = '\x00d5', - ["Ouml"] = '\x00d6', - ["times"] = '\x00d7', - ["Oslash"] = '\x00d8', - ["Ugrave"] = '\x00d9', - ["Uacute"] = '\x00da', - ["Ucirc"] = '\x00db', - ["Uuml"] = '\x00dc', - ["Yacute"] = '\x00dd', - ["THORN"] = '\x00de', - ["szlig"] = '\x00df', - ["agrave"] = '\x00e0', - ["aacute"] = '\x00e1', - ["acirc"] = '\x00e2', - ["atilde"] = '\x00e3', - ["auml"] = '\x00e4', - ["aring"] = '\x00e5', - ["aelig"] = '\x00e6', - ["ccedil"] = '\x00e7', - ["egrave"] = '\x00e8', - ["eacute"] = '\x00e9', - ["ecirc"] = '\x00ea', - ["euml"] = '\x00eb', - ["igrave"] = '\x00ec', - ["iacute"] = '\x00ed', - ["icirc"] = '\x00ee', - ["iuml"] = '\x00ef', - ["eth"] = '\x00f0', - ["ntilde"] = '\x00f1', - ["ograve"] = '\x00f2', - ["oacute"] = '\x00f3', - ["ocirc"] = '\x00f4', - ["otilde"] = '\x00f5', - ["ouml"] = '\x00f6', - ["divide"] = '\x00f7', - ["oslash"] = '\x00f8', - ["ugrave"] = '\x00f9', - ["uacute"] = '\x00fa', - ["ucirc"] = '\x00fb', - ["uuml"] = '\x00fc', - ["yacute"] = '\x00fd', - ["thorn"] = '\x00fe', - ["yuml"] = '\x00ff', - ["OElig"] = '\x0152', - ["oelig"] = '\x0153', - ["Scaron"] = '\x0160', - ["scaron"] = '\x0161', - ["Yuml"] = '\x0178', - ["fnof"] = '\x0192', - ["circ"] = '\x02c6', - ["tilde"] = '\x02dc', - ["Alpha"] = '\x0391', - ["Beta"] = '\x0392', - ["Gamma"] = '\x0393', - ["Delta"] = '\x0394', - ["Epsilon"] = '\x0395', - ["Zeta"] = '\x0396', - ["Eta"] = '\x0397', - ["Theta"] = '\x0398', - ["Iota"] = '\x0399', - ["Kappa"] = '\x039a', - ["Lambda"] = '\x039b', - ["Mu"] = '\x039c', - ["Nu"] = '\x039d', - ["Xi"] = '\x039e', - ["Omicron"] = '\x039f', - ["Pi"] = '\x03a0', - ["Rho"] = '\x03a1', - ["Sigma"] = '\x03a3', - ["Tau"] = '\x03a4', - ["Upsilon"] = '\x03a5', - ["Phi"] = '\x03a6', - ["Chi"] = '\x03a7', - ["Psi"] = '\x03a8', - ["Omega"] = '\x03a9', - ["alpha"] = '\x03b1', - ["beta"] = '\x03b2', - ["gamma"] = '\x03b3', - ["delta"] = '\x03b4', - ["epsilon"] = '\x03b5', - ["zeta"] = '\x03b6', - ["eta"] = '\x03b7', - ["theta"] = '\x03b8', - ["iota"] = '\x03b9', - ["kappa"] = '\x03ba', - ["lambda"] = '\x03bb', - ["mu"] = '\x03bc', - ["nu"] = '\x03bd', - ["xi"] = '\x03be', - ["omicron"] = '\x03bf', - ["pi"] = '\x03c0', - ["rho"] = '\x03c1', - ["sigmaf"] = '\x03c2', - ["sigma"] = '\x03c3', - ["tau"] = '\x03c4', - ["upsilon"] = '\x03c5', - ["phi"] = '\x03c6', - ["chi"] = '\x03c7', - ["psi"] = '\x03c8', - ["omega"] = '\x03c9', - ["thetasym"] = '\x03d1', - ["upsih"] = '\x03d2', - ["piv"] = '\x03d6', - ["ensp"] = '\x2002', - ["emsp"] = '\x2003', - ["thinsp"] = '\x2009', - ["zwnj"] = '\x200c', - ["zwj"] = '\x200d', - ["lrm"] = '\x200e', - ["rlm"] = '\x200f', - ["ndash"] = '\x2013', - ["mdash"] = '\x2014', - ["lsquo"] = '\x2018', - ["rsquo"] = '\x2019', - ["sbquo"] = '\x201a', - ["ldquo"] = '\x201c', - ["rdquo"] = '\x201d', - ["bdquo"] = '\x201e', - ["dagger"] = '\x2020', - ["Dagger"] = '\x2021', - ["bull"] = '\x2022', - ["hellip"] = '\x2026', - ["permil"] = '\x2030', - ["prime"] = '\x2032', - ["Prime"] = '\x2033', - ["lsaquo"] = '\x2039', - ["rsaquo"] = '\x203a', - ["oline"] = '\x203e', - ["frasl"] = '\x2044', - ["euro"] = '\x20ac', - ["image"] = '\x2111', - ["weierp"] = '\x2118', - ["real"] = '\x211c', - ["trade"] = '\x2122', - ["alefsym"] = '\x2135', - ["larr"] = '\x2190', - ["uarr"] = '\x2191', - ["rarr"] = '\x2192', - ["darr"] = '\x2193', - ["harr"] = '\x2194', - ["crarr"] = '\x21b5', - ["lArr"] = '\x21d0', - ["uArr"] = '\x21d1', - ["rArr"] = '\x21d2', - ["dArr"] = '\x21d3', - ["hArr"] = '\x21d4', - ["forall"] = '\x2200', - ["part"] = '\x2202', - ["exist"] = '\x2203', - ["empty"] = '\x2205', - ["nabla"] = '\x2207', - ["isin"] = '\x2208', - ["notin"] = '\x2209', - ["ni"] = '\x220b', - ["prod"] = '\x220f', - ["sum"] = '\x2211', - ["minus"] = '\x2212', - ["lowast"] = '\x2217', - ["radic"] = '\x221a', - ["prop"] = '\x221d', - ["infin"] = '\x221e', - ["ang"] = '\x2220', - ["and"] = '\x2227', - ["or"] = '\x2228', - ["cap"] = '\x2229', - ["cup"] = '\x222a', - ["int"] = '\x222b', - ["there4"] = '\x2234', - ["sim"] = '\x223c', - ["cong"] = '\x2245', - ["asymp"] = '\x2248', - ["ne"] = '\x2260', - ["equiv"] = '\x2261', - ["le"] = '\x2264', - ["ge"] = '\x2265', - ["sub"] = '\x2282', - ["sup"] = '\x2283', - ["nsub"] = '\x2284', - ["sube"] = '\x2286', - ["supe"] = '\x2287', - ["oplus"] = '\x2295', - ["otimes"] = '\x2297', - ["perp"] = '\x22a5', - ["sdot"] = '\x22c5', - ["lceil"] = '\x2308', - ["rceil"] = '\x2309', - ["lfloor"] = '\x230a', - ["rfloor"] = '\x230b', - ["lang"] = '\x2329', - ["rang"] = '\x232a', - ["loz"] = '\x25ca', - ["spades"] = '\x2660', - ["clubs"] = '\x2663', - ["hearts"] = '\x2665', - ["diams"] = '\x2666', + [ToUInt64Key("quot")] = '\x0022', + [ToUInt64Key("amp")] = '\x0026', + [ToUInt64Key("apos")] = '\x0027', + [ToUInt64Key("lt")] = '\x003c', + [ToUInt64Key("gt")] = '\x003e', + [ToUInt64Key("nbsp")] = '\x00a0', + [ToUInt64Key("iexcl")] = '\x00a1', + [ToUInt64Key("cent")] = '\x00a2', + [ToUInt64Key("pound")] = '\x00a3', + [ToUInt64Key("curren")] = '\x00a4', + [ToUInt64Key("yen")] = '\x00a5', + [ToUInt64Key("brvbar")] = '\x00a6', + [ToUInt64Key("sect")] = '\x00a7', + [ToUInt64Key("uml")] = '\x00a8', + [ToUInt64Key("copy")] = '\x00a9', + [ToUInt64Key("ordf")] = '\x00aa', + [ToUInt64Key("laquo")] = '\x00ab', + [ToUInt64Key("not")] = '\x00ac', + [ToUInt64Key("shy")] = '\x00ad', + [ToUInt64Key("reg")] = '\x00ae', + [ToUInt64Key("macr")] = '\x00af', + [ToUInt64Key("deg")] = '\x00b0', + [ToUInt64Key("plusmn")] = '\x00b1', + [ToUInt64Key("sup2")] = '\x00b2', + [ToUInt64Key("sup3")] = '\x00b3', + [ToUInt64Key("acute")] = '\x00b4', + [ToUInt64Key("micro")] = '\x00b5', + [ToUInt64Key("para")] = '\x00b6', + [ToUInt64Key("middot")] = '\x00b7', + [ToUInt64Key("cedil")] = '\x00b8', + [ToUInt64Key("sup1")] = '\x00b9', + [ToUInt64Key("ordm")] = '\x00ba', + [ToUInt64Key("raquo")] = '\x00bb', + [ToUInt64Key("frac14")] = '\x00bc', + [ToUInt64Key("frac12")] = '\x00bd', + [ToUInt64Key("frac34")] = '\x00be', + [ToUInt64Key("iquest")] = '\x00bf', + [ToUInt64Key("Agrave")] = '\x00c0', + [ToUInt64Key("Aacute")] = '\x00c1', + [ToUInt64Key("Acirc")] = '\x00c2', + [ToUInt64Key("Atilde")] = '\x00c3', + [ToUInt64Key("Auml")] = '\x00c4', + [ToUInt64Key("Aring")] = '\x00c5', + [ToUInt64Key("AElig")] = '\x00c6', + [ToUInt64Key("Ccedil")] = '\x00c7', + [ToUInt64Key("Egrave")] = '\x00c8', + [ToUInt64Key("Eacute")] = '\x00c9', + [ToUInt64Key("Ecirc")] = '\x00ca', + [ToUInt64Key("Euml")] = '\x00cb', + [ToUInt64Key("Igrave")] = '\x00cc', + [ToUInt64Key("Iacute")] = '\x00cd', + [ToUInt64Key("Icirc")] = '\x00ce', + [ToUInt64Key("Iuml")] = '\x00cf', + [ToUInt64Key("ETH")] = '\x00d0', + [ToUInt64Key("Ntilde")] = '\x00d1', + [ToUInt64Key("Ograve")] = '\x00d2', + [ToUInt64Key("Oacute")] = '\x00d3', + [ToUInt64Key("Ocirc")] = '\x00d4', + [ToUInt64Key("Otilde")] = '\x00d5', + [ToUInt64Key("Ouml")] = '\x00d6', + [ToUInt64Key("times")] = '\x00d7', + [ToUInt64Key("Oslash")] = '\x00d8', + [ToUInt64Key("Ugrave")] = '\x00d9', + [ToUInt64Key("Uacute")] = '\x00da', + [ToUInt64Key("Ucirc")] = '\x00db', + [ToUInt64Key("Uuml")] = '\x00dc', + [ToUInt64Key("Yacute")] = '\x00dd', + [ToUInt64Key("THORN")] = '\x00de', + [ToUInt64Key("szlig")] = '\x00df', + [ToUInt64Key("agrave")] = '\x00e0', + [ToUInt64Key("aacute")] = '\x00e1', + [ToUInt64Key("acirc")] = '\x00e2', + [ToUInt64Key("atilde")] = '\x00e3', + [ToUInt64Key("auml")] = '\x00e4', + [ToUInt64Key("aring")] = '\x00e5', + [ToUInt64Key("aelig")] = '\x00e6', + [ToUInt64Key("ccedil")] = '\x00e7', + [ToUInt64Key("egrave")] = '\x00e8', + [ToUInt64Key("eacute")] = '\x00e9', + [ToUInt64Key("ecirc")] = '\x00ea', + [ToUInt64Key("euml")] = '\x00eb', + [ToUInt64Key("igrave")] = '\x00ec', + [ToUInt64Key("iacute")] = '\x00ed', + [ToUInt64Key("icirc")] = '\x00ee', + [ToUInt64Key("iuml")] = '\x00ef', + [ToUInt64Key("eth")] = '\x00f0', + [ToUInt64Key("ntilde")] = '\x00f1', + [ToUInt64Key("ograve")] = '\x00f2', + [ToUInt64Key("oacute")] = '\x00f3', + [ToUInt64Key("ocirc")] = '\x00f4', + [ToUInt64Key("otilde")] = '\x00f5', + [ToUInt64Key("ouml")] = '\x00f6', + [ToUInt64Key("divide")] = '\x00f7', + [ToUInt64Key("oslash")] = '\x00f8', + [ToUInt64Key("ugrave")] = '\x00f9', + [ToUInt64Key("uacute")] = '\x00fa', + [ToUInt64Key("ucirc")] = '\x00fb', + [ToUInt64Key("uuml")] = '\x00fc', + [ToUInt64Key("yacute")] = '\x00fd', + [ToUInt64Key("thorn")] = '\x00fe', + [ToUInt64Key("yuml")] = '\x00ff', + [ToUInt64Key("OElig")] = '\x0152', + [ToUInt64Key("oelig")] = '\x0153', + [ToUInt64Key("Scaron")] = '\x0160', + [ToUInt64Key("scaron")] = '\x0161', + [ToUInt64Key("Yuml")] = '\x0178', + [ToUInt64Key("fnof")] = '\x0192', + [ToUInt64Key("circ")] = '\x02c6', + [ToUInt64Key("tilde")] = '\x02dc', + [ToUInt64Key("Alpha")] = '\x0391', + [ToUInt64Key("Beta")] = '\x0392', + [ToUInt64Key("Gamma")] = '\x0393', + [ToUInt64Key("Delta")] = '\x0394', + [ToUInt64Key("Epsilon")] = '\x0395', + [ToUInt64Key("Zeta")] = '\x0396', + [ToUInt64Key("Eta")] = '\x0397', + [ToUInt64Key("Theta")] = '\x0398', + [ToUInt64Key("Iota")] = '\x0399', + [ToUInt64Key("Kappa")] = '\x039a', + [ToUInt64Key("Lambda")] = '\x039b', + [ToUInt64Key("Mu")] = '\x039c', + [ToUInt64Key("Nu")] = '\x039d', + [ToUInt64Key("Xi")] = '\x039e', + [ToUInt64Key("Omicron")] = '\x039f', + [ToUInt64Key("Pi")] = '\x03a0', + [ToUInt64Key("Rho")] = '\x03a1', + [ToUInt64Key("Sigma")] = '\x03a3', + [ToUInt64Key("Tau")] = '\x03a4', + [ToUInt64Key("Upsilon")] = '\x03a5', + [ToUInt64Key("Phi")] = '\x03a6', + [ToUInt64Key("Chi")] = '\x03a7', + [ToUInt64Key("Psi")] = '\x03a8', + [ToUInt64Key("Omega")] = '\x03a9', + [ToUInt64Key("alpha")] = '\x03b1', + [ToUInt64Key("beta")] = '\x03b2', + [ToUInt64Key("gamma")] = '\x03b3', + [ToUInt64Key("delta")] = '\x03b4', + [ToUInt64Key("epsilon")] = '\x03b5', + [ToUInt64Key("zeta")] = '\x03b6', + [ToUInt64Key("eta")] = '\x03b7', + [ToUInt64Key("theta")] = '\x03b8', + [ToUInt64Key("iota")] = '\x03b9', + [ToUInt64Key("kappa")] = '\x03ba', + [ToUInt64Key("lambda")] = '\x03bb', + [ToUInt64Key("mu")] = '\x03bc', + [ToUInt64Key("nu")] = '\x03bd', + [ToUInt64Key("xi")] = '\x03be', + [ToUInt64Key("omicron")] = '\x03bf', + [ToUInt64Key("pi")] = '\x03c0', + [ToUInt64Key("rho")] = '\x03c1', + [ToUInt64Key("sigmaf")] = '\x03c2', + [ToUInt64Key("sigma")] = '\x03c3', + [ToUInt64Key("tau")] = '\x03c4', + [ToUInt64Key("upsilon")] = '\x03c5', + [ToUInt64Key("phi")] = '\x03c6', + [ToUInt64Key("chi")] = '\x03c7', + [ToUInt64Key("psi")] = '\x03c8', + [ToUInt64Key("omega")] = '\x03c9', + [ToUInt64Key("thetasym")] = '\x03d1', + [ToUInt64Key("upsih")] = '\x03d2', + [ToUInt64Key("piv")] = '\x03d6', + [ToUInt64Key("ensp")] = '\x2002', + [ToUInt64Key("emsp")] = '\x2003', + [ToUInt64Key("thinsp")] = '\x2009', + [ToUInt64Key("zwnj")] = '\x200c', + [ToUInt64Key("zwj")] = '\x200d', + [ToUInt64Key("lrm")] = '\x200e', + [ToUInt64Key("rlm")] = '\x200f', + [ToUInt64Key("ndash")] = '\x2013', + [ToUInt64Key("mdash")] = '\x2014', + [ToUInt64Key("lsquo")] = '\x2018', + [ToUInt64Key("rsquo")] = '\x2019', + [ToUInt64Key("sbquo")] = '\x201a', + [ToUInt64Key("ldquo")] = '\x201c', + [ToUInt64Key("rdquo")] = '\x201d', + [ToUInt64Key("bdquo")] = '\x201e', + [ToUInt64Key("dagger")] = '\x2020', + [ToUInt64Key("Dagger")] = '\x2021', + [ToUInt64Key("bull")] = '\x2022', + [ToUInt64Key("hellip")] = '\x2026', + [ToUInt64Key("permil")] = '\x2030', + [ToUInt64Key("prime")] = '\x2032', + [ToUInt64Key("Prime")] = '\x2033', + [ToUInt64Key("lsaquo")] = '\x2039', + [ToUInt64Key("rsaquo")] = '\x203a', + [ToUInt64Key("oline")] = '\x203e', + [ToUInt64Key("frasl")] = '\x2044', + [ToUInt64Key("euro")] = '\x20ac', + [ToUInt64Key("image")] = '\x2111', + [ToUInt64Key("weierp")] = '\x2118', + [ToUInt64Key("real")] = '\x211c', + [ToUInt64Key("trade")] = '\x2122', + [ToUInt64Key("alefsym")] = '\x2135', + [ToUInt64Key("larr")] = '\x2190', + [ToUInt64Key("uarr")] = '\x2191', + [ToUInt64Key("rarr")] = '\x2192', + [ToUInt64Key("darr")] = '\x2193', + [ToUInt64Key("harr")] = '\x2194', + [ToUInt64Key("crarr")] = '\x21b5', + [ToUInt64Key("lArr")] = '\x21d0', + [ToUInt64Key("uArr")] = '\x21d1', + [ToUInt64Key("rArr")] = '\x21d2', + [ToUInt64Key("dArr")] = '\x21d3', + [ToUInt64Key("hArr")] = '\x21d4', + [ToUInt64Key("forall")] = '\x2200', + [ToUInt64Key("part")] = '\x2202', + [ToUInt64Key("exist")] = '\x2203', + [ToUInt64Key("empty")] = '\x2205', + [ToUInt64Key("nabla")] = '\x2207', + [ToUInt64Key("isin")] = '\x2208', + [ToUInt64Key("notin")] = '\x2209', + [ToUInt64Key("ni")] = '\x220b', + [ToUInt64Key("prod")] = '\x220f', + [ToUInt64Key("sum")] = '\x2211', + [ToUInt64Key("minus")] = '\x2212', + [ToUInt64Key("lowast")] = '\x2217', + [ToUInt64Key("radic")] = '\x221a', + [ToUInt64Key("prop")] = '\x221d', + [ToUInt64Key("infin")] = '\x221e', + [ToUInt64Key("ang")] = '\x2220', + [ToUInt64Key("and")] = '\x2227', + [ToUInt64Key("or")] = '\x2228', + [ToUInt64Key("cap")] = '\x2229', + [ToUInt64Key("cup")] = '\x222a', + [ToUInt64Key("int")] = '\x222b', + [ToUInt64Key("there4")] = '\x2234', + [ToUInt64Key("sim")] = '\x223c', + [ToUInt64Key("cong")] = '\x2245', + [ToUInt64Key("asymp")] = '\x2248', + [ToUInt64Key("ne")] = '\x2260', + [ToUInt64Key("equiv")] = '\x2261', + [ToUInt64Key("le")] = '\x2264', + [ToUInt64Key("ge")] = '\x2265', + [ToUInt64Key("sub")] = '\x2282', + [ToUInt64Key("sup")] = '\x2283', + [ToUInt64Key("nsub")] = '\x2284', + [ToUInt64Key("sube")] = '\x2286', + [ToUInt64Key("supe")] = '\x2287', + [ToUInt64Key("oplus")] = '\x2295', + [ToUInt64Key("otimes")] = '\x2297', + [ToUInt64Key("perp")] = '\x22a5', + [ToUInt64Key("sdot")] = '\x22c5', + [ToUInt64Key("lceil")] = '\x2308', + [ToUInt64Key("rceil")] = '\x2309', + [ToUInt64Key("lfloor")] = '\x230a', + [ToUInt64Key("rfloor")] = '\x230b', + [ToUInt64Key("lang")] = '\x2329', + [ToUInt64Key("rang")] = '\x232a', + [ToUInt64Key("loz")] = '\x25ca', + [ToUInt64Key("spades")] = '\x2660', + [ToUInt64Key("clubs")] = '\x2663', + [ToUInt64Key("hearts")] = '\x2665', + [ToUInt64Key("diams")] = '\x2666', }; - public static char Lookup(string entity) + public static char Lookup(ReadOnlySpan entity) { - char theChar; - s_lookupTable.TryGetValue(entity, out theChar); - return theChar; + // To avoid an allocation, keys of type "ulong" are used in the lookup table. + // Since all entity strings comprise 8 characters or less and are ASCII-only, they "fit" into an ulong (8 bytes). + if (entity.Length <= 8) + { + s_lookupTable.TryGetValue(ToUInt64Key(entity), out char result); + return result; + } + else + { + // Currently, there are no entities that are longer than 8 characters. + return (char)0; + } + } + + private static ulong ToUInt64Key(ReadOnlySpan entity) + { + // The ulong key is the reversed single-byte character representation of the actual entity string. + Debug.Assert(entity.Length <= 8); + + ulong key = 0; + for (int i = 0; i < entity.Length; i++) + { + if (entity[i] > 0xFF) + { + return 0; + } + + key = (key << 8) | entity[i]; + } + + return key; } } }