Skip to content

Commit

Permalink
Disable optimization which sometimes results in incorrect case sensit…
Browse files Browse the repository at this point in the history
…ivity in FrozenCollections (#94667)

* Add failing tests

* Fix incorrect case sensitivity in FrozenDictionary and FrozenSet for some cases

fixes #93974

* When hashing the entire string, case sensitivity of hash and equals should be the same

* Address code review comments

* Only ignore case insensitivity if entire string is ASCII non-letters

* Code review comments

* Undo some new lines

* Fixed tests - incorrect leftover from previous PR
  • Loading branch information
andrewjsaid authored and eiriktsarpalis committed Nov 14, 2023
1 parent 4c6ce46 commit 5fd9852
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public static AnalysisResults Analyze(
AnalysisResults results;
if (minLength == 0 || !TryUseSubstring(uniqueStrings, ignoreCase, minLength, maxLength, out results))
{
results = CreateAnalysisResults(uniqueStrings, ignoreCase, minLength, maxLength, 0, 0, static (s, _, _) => s.AsSpan());
results = CreateAnalysisResults(uniqueStrings, ignoreCase, minLength, maxLength, 0, 0, isSubstring: false, static (s, _, _) => s.AsSpan());
}

return results;
Expand Down Expand Up @@ -71,7 +71,7 @@ private static bool TryUseSubstring(ReadOnlySpan<string> uniqueStrings, bool ign
if (HasSufficientUniquenessFactor(set, uniqueStrings))
{
results = CreateAnalysisResults(
uniqueStrings, ignoreCase, minLength, maxLength, index, count,
uniqueStrings, ignoreCase, minLength, maxLength, index, count, isSubstring: true,
static (string s, int index, int count) => s.AsSpan(index, count));
return true;
}
Expand All @@ -96,7 +96,7 @@ private static bool TryUseSubstring(ReadOnlySpan<string> uniqueStrings, bool ign
if (HasSufficientUniquenessFactor(set, uniqueStrings))
{
results = CreateAnalysisResults(
uniqueStrings, ignoreCase, minLength, maxLength, comparer.Index, count,
uniqueStrings, ignoreCase, minLength, maxLength, comparer.Index, count, isSubstring: true,
static (string s, int index, int count) => s.AsSpan(s.Length + index, count));
return true;
}
Expand All @@ -110,7 +110,7 @@ private static bool TryUseSubstring(ReadOnlySpan<string> uniqueStrings, bool ign
}

private static AnalysisResults CreateAnalysisResults(
ReadOnlySpan<string> uniqueStrings, bool ignoreCase, int minLength, int maxLength, int index, int count, GetSpan getSubstringSpan)
ReadOnlySpan<string> uniqueStrings, bool ignoreCase, int minLength, int maxLength, int index, int count, bool isSubstring, GetSpan getSubstringSpan)
{
// Start off by assuming all strings are ASCII
bool allAsciiIfIgnoreCase = true;
Expand All @@ -120,11 +120,11 @@ private static AnalysisResults CreateAnalysisResults(
// substrings are ASCII, so we check each.
if (ignoreCase)
{
// Further, if the ASCII substrings don't contain any letters, then we can
// Further, if the ASCII keys (in their entirety) don't contain any letters, then we can
// actually perform the comparison as case-sensitive even if case-insensitive
// was requested, as there's nothing that would compare equally to the substring
// other than the substring itself.
bool canSwitchIgnoreCaseToCaseSensitive = true;
bool canSwitchIgnoreCaseHashToCaseSensitive = !isSubstring;

foreach (string s in uniqueStrings)
{
Expand All @@ -135,20 +135,20 @@ private static AnalysisResults CreateAnalysisResults(
if (!IsAllAscii(substring))
{
allAsciiIfIgnoreCase = false;
canSwitchIgnoreCaseToCaseSensitive = false;
canSwitchIgnoreCaseHashToCaseSensitive = false;
break;
}

// All substrings so far are still ASCII only. If this one contains any ASCII
// letters, mark that we can't switch to case-sensitive.
if (canSwitchIgnoreCaseToCaseSensitive && ContainsAnyLetters(substring))
if (canSwitchIgnoreCaseHashToCaseSensitive && ContainsAnyLetters(substring))
{
canSwitchIgnoreCaseToCaseSensitive = false;
canSwitchIgnoreCaseHashToCaseSensitive = false;
}
}

// If we can switch to case-sensitive, do so.
if (canSwitchIgnoreCaseToCaseSensitive)
if (canSwitchIgnoreCaseHashToCaseSensitive)
{
ignoreCase = false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,15 @@ public static IEnumerable<object[]> StringStringData() =>
Enumerable.Range(0, 100).Select(i => $"{i:D2}ABCDEFGH\U0001F600").ToArray(), // left justified substring non-ascii
Enumerable.Range(0, 100).Select(i => $"ABCDEFGH\U0001F600{i:D2}").ToArray(), // right justified substring non-ascii
Enumerable.Range(0, 20).Select(i => i.ToString("D2")).Select(s => (char)(s[0] + 128) + "" + (char)(s[1] + 128)).ToArray(), // left-justified non-ascii

Enumerable.Range(0, 10).Select(i => $"{i}ABCDefgh").ToArray(), // left justified single char ascii, mixed casing
Enumerable.Range(0, 10).Select(i => $"ABCDefgh{i}").ToArray(), // right justified single char ascii, mixed casing
Enumerable.Range(0, 100).Select(i => $"{i:D2}ABCDefgh").ToArray(), // left justified substring ascii, mixed casing
Enumerable.Range(0, 100).Select(i => $"ABCDefgh{i:D2}").ToArray(), // right justified substring ascii, mixed casing
Enumerable.Range(0, 10).Select(i => $"{i}ABCDefgh\U0001F600").ToArray(), // left justified single char non-ascii, mixed casing
Enumerable.Range(0, 10).Select(i => $"ABCDefgh\U0001F600{i}").ToArray(), // right justified single char non-ascii, mixed casing
Enumerable.Range(0, 100).Select(i => $"{i:D2}ABCDefgh\U0001F600").ToArray(), // left justified substring non-ascii, mixed casing
Enumerable.Range(0, 100).Select(i => $"ABCDefgh\U0001F600{i:D2}").ToArray(), // right justified substring non-ascii, mixed casing
}
select new object[] { keys.ToDictionary(i => i, i => i, comparer) };

Expand Down Expand Up @@ -191,6 +200,23 @@ private static void FrozenDictionaryWorker<TKey, TValue>(Dictionary<TKey, TValue
Assert.True(frozen.TryGetValue(pair.Key, out TValue value));
Assert.Equal(pair.Value, value);
}

if (typeof(TKey) == typeof(string) && ReferenceEquals(frozen.Comparer, StringComparer.OrdinalIgnoreCase))
{
foreach (KeyValuePair<TKey, TValue> pair in source)
{
TKey keyUpper = (TKey)(object)((string)(object)pair.Key).ToUpper();
bool isValidTest = frozen.Comparer.Equals(pair.Key, keyUpper);
if (isValidTest)
{
Assert.Equal(pair.Value, frozen.GetValueRefOrNullRef(keyUpper));
Assert.Equal(pair.Value, frozen[keyUpper]);
Assert.True(frozen.TryGetValue(keyUpper, out TValue value));
Assert.Equal(pair.Value, value);
}
}
}

foreach (KeyValuePair<TKey, TValue> pair in frozen)
{
Assert.True(source.TryGetValue(pair.Key, out TValue value));
Expand Down Expand Up @@ -269,6 +295,22 @@ private void FrozenSetWorker<TKey, TValue>(Dictionary<TKey, TValue> source)
Assert.True(frozen.TryGetValue(pair.Key, out TKey actualKey));
Assert.Equal(pair.Key, actualKey);
}

if (typeof(TKey) == typeof(string) && ReferenceEquals(frozen.Comparer, StringComparer.OrdinalIgnoreCase))
{
foreach (KeyValuePair<TKey, TValue> pair in source)
{
TKey keyUpper = (TKey)(object)((string)(object)pair.Key).ToUpper();
bool isValidTest = frozen.Comparer.Equals(pair.Key, keyUpper);
if (isValidTest)
{
Assert.True(frozen.Contains(keyUpper));
Assert.True(frozen.TryGetValue(keyUpper, out TKey actualKey));
Assert.Equal(pair.Key, actualKey);
}
}
}

foreach (TKey key in frozen)
{
Assert.True(source.TryGetValue(key, out _));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,14 @@ public static void LeftHandCaseInsensitive()
Assert.False(r.AllAsciiIfIgnoreCase);
Assert.Equal(7, r.HashIndex);
Assert.Equal(1, r.HashCount);

r = RunAnalysis(new[] { "1abc", "2abc", "3abc", "4abc", "5abc", "6abc" }, true);
Assert.False(r.RightJustifiedSubstring);
Assert.True(r.IgnoreCase);
Assert.True(r.AllAsciiIfIgnoreCase);
Assert.Equal(0, r.HashIndex);
Assert.Equal(1, r.HashCount);

}

[Fact]
Expand Down

0 comments on commit 5fd9852

Please sign in to comment.