Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[browser][non-icu] HybridGlobalization checking for prefix/suffix #85093

Merged
merged 11 commits into from
Apr 22, 2023
Merged
14 changes: 14 additions & 0 deletions docs/design/features/hybrid-globalization.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,17 @@ hiraganaBig.localeCompare(katakanaSmall, "en-US", { sensitivity: "base" }) // 0;
`IgnoreKanaType | IgnoreWidth | IgnoreSymbols | IgnoreNonSpace`

`IgnoreKanaType | IgnoreWidth | IgnoreSymbols | IgnoreNonSpace | IgnoreCase`


**String starts with / ends with**

Affected public APIs:
- CompareInfo.IsPrefix
- CompareInfo.IsSuffix
- String.StartsWith
- String.EndsWith

Web API does not expose locale-sensitive endsWith/startsWith function. As a workaround, both strings get normalized and weightless characters are removed. Resulting strings are cut to the same length and comparison is performed. This approach results in the same compare option limitations as described under **String comparison**. Because we are normalizing strings to be able to cut them, we cannot calculate the match length on the original strings. Methods that calculate this information throw PlatformNotSupported exception:

- [CompareInfo.IsPrefix](https://learn.microsoft.com/en-us/dotnet/api/system.globalization.compareinfo.isprefix?view=net-8.0#system-globalization-compareinfo-isprefix(system-readonlyspan((system-char))-system-readonlyspan((system-char))-system-globalization-compareoptions-system-int32@))
- [CompareInfo.IsSuffix](https://learn.microsoft.com/en-us/dotnet/api/system.globalization.compareinfo.issuffix?view=net-8.0#system-globalization-compareinfo-issuffix(system-readonlyspan((system-char))-system-readonlyspan((system-char))-system-globalization-compareoptions-system-int32@))
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,11 @@ internal static unsafe partial class JsGlobalization
{
[MethodImplAttribute(MethodImplOptions.InternalCall)]
internal static extern unsafe int CompareString(out string exceptionMessage, in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options);

[MethodImplAttribute(MethodImplOptions.InternalCall)]
internal static extern unsafe bool StartsWith(out string exceptionMessage, in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options);

[MethodImplAttribute(MethodImplOptions.InternalCall)]
internal static extern unsafe bool EndsWith(out string exceptionMessage, in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,8 @@

namespace System.Globalization.Tests
{
public class CompareInfoIsPrefixTests
public class CompareInfoIsPrefixTests : CompareInfoTestsBase
{
private static CompareInfo s_invariantCompare = CultureInfo.InvariantCulture.CompareInfo;
private static CompareInfo s_germanCompare = new CultureInfo("de-DE").CompareInfo;
private static CompareInfo s_hungarianCompare = new CultureInfo("hu-HU").CompareInfo;
private static CompareInfo s_turkishCompare = new CultureInfo("tr-TR").CompareInfo;
private static CompareInfo s_frenchCompare = new CultureInfo("fr-FR").CompareInfo;

public static IEnumerable<object[]> IsPrefix_TestData()
{
// Empty strings
Expand All @@ -31,7 +25,8 @@ public static IEnumerable<object[]> IsPrefix_TestData()
yield return new object[] { s_invariantCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.Ordinal, false, 0 };
yield return new object[] { s_hungarianCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.Ordinal, false, 0 };
yield return new object[] { s_invariantCompare, "dz", "d", CompareOptions.None, true, 1 };
yield return new object[] { s_hungarianCompare, "dz", "d", CompareOptions.None, false, 0 };
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
yield return new object[] { s_hungarianCompare, "dz", "d", CompareOptions.None, false, 0 };
yield return new object[] { s_hungarianCompare, "dz", "d", CompareOptions.Ordinal, true, 1 };

// Turkish
Expand All @@ -56,7 +51,7 @@ public static IEnumerable<object[]> IsPrefix_TestData()
yield return new object[] { s_invariantCompare, "\u00C0nimal", "a\u0300", CompareOptions.Ordinal, false, 0 };
yield return new object[] { s_invariantCompare, "\u00C0nimal", "a\u0300", CompareOptions.OrdinalIgnoreCase, false, 0 };
yield return new object[] { s_invariantCompare, "FooBar", "Foo\u0400Bar", CompareOptions.Ordinal, false, 0 };
yield return new object[] { s_invariantCompare, "FooBA\u0300R", "FooB\u00C0R", CompareOptions.IgnoreNonSpace, true, 7 };
yield return new object[] { s_invariantCompare, "FooBA\u0300R", "FooB\u00C0R", supportedIgnoreNonSpaceOption, true, 7 };
yield return new object[] { s_invariantCompare, "o\u0308", "o", CompareOptions.None, false, 0 };
yield return new object[] { s_invariantCompare, "o\u0308", "o", CompareOptions.Ordinal, true, 1 };
yield return new object[] { s_invariantCompare, "o\u0000\u0308", "o", CompareOptions.None, true, 1 };
Expand All @@ -76,16 +71,20 @@ public static IEnumerable<object[]> IsPrefix_TestData()
yield return new object[] { s_invariantCompare, "\uD800\uD800", "\uD800\uD800", CompareOptions.None, true, 2 };

// Ignore symbols
yield return new object[] { s_invariantCompare, "Test's can be interesting", "Tests", CompareOptions.IgnoreSymbols, true, 6 };
if (!PlatformDetection.IsHybridGlobalizationOnBrowser) // only a few symbols are ignored
yield return new object[] { s_invariantCompare, "Test's can be interesting", "Tests", CompareOptions.IgnoreSymbols, true, 6 };
yield return new object[] { s_invariantCompare, "Test's can be interesting", "Tests", CompareOptions.None, false, 0 };

// Platform differences
bool useNls = PlatformDetection.IsNlsGlobalization;
if (useNls)
if (useNls || PlatformDetection.IsHybridGlobalizationOnBrowser)
{
yield return new object[] { s_hungarianCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.None, true, 7 };
yield return new object[] { s_invariantCompare, "''Tests", "Tests", CompareOptions.IgnoreSymbols, true, 7 };
yield return new object[] { s_frenchCompare, "\u0153", "oe", CompareOptions.None, true, 1 };
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
{
yield return new object[] { s_hungarianCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.None, true, 7 };
yield return new object[] { s_invariantCompare, "''Tests", "Tests", CompareOptions.IgnoreSymbols, true, 7 };
yield return new object[] { s_frenchCompare, "\u0153", "oe", CompareOptions.None, true, 1 };
}
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800", CompareOptions.None, true, 1 };
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800", CompareOptions.IgnoreCase, true, 1 };
}
Expand All @@ -106,12 +105,18 @@ public static IEnumerable<object[]> IsPrefix_TestData()
}

// Prefixes where matched length does not equal value string length
yield return new object[] { s_invariantCompare, "dzxyz", "\u01F3", CompareOptions.IgnoreNonSpace, true, 2 };
yield return new object[] { s_invariantCompare, "\u01F3xyz", "dz", CompareOptions.IgnoreNonSpace, true, 1 };
yield return new object[] { s_germanCompare, "Strasse xyz", "stra\u00DFe", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, true, 7 };
yield return new object[] { s_germanCompare, "Strasse xyz", "xtra\u00DFe", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, false, 0 };
yield return new object[] { s_germanCompare, "stra\u00DFe xyz", "Strasse", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, true, 6 };
yield return new object[] { s_germanCompare, "stra\u00DFe xyz", "Xtrasse", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, false, 0 };
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
{
yield return new object[] { s_invariantCompare, "dzxyz", "\u01F3", supportedIgnoreNonSpaceOption, true, 2 };
yield return new object[] { s_invariantCompare, "\u01F3xyz", "dz", supportedIgnoreNonSpaceOption, true, 1 };
}
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
{
yield return new object[] { s_germanCompare, "Strasse xyz", "stra\u00DFe", supportedIgnoreCaseIgnoreNonSpaceOptions, true, 7 };
yield return new object[] { s_germanCompare, "stra\u00DFe xyz", "Strasse", supportedIgnoreCaseIgnoreNonSpaceOptions, true, 6 };
}
yield return new object[] { s_germanCompare, "Strasse xyz", "xtra\u00DFe", supportedIgnoreCaseIgnoreNonSpaceOptions, false, 0 };
yield return new object[] { s_germanCompare, "stra\u00DFe xyz", "Xtrasse", supportedIgnoreCaseIgnoreNonSpaceOptions, false, 0 };
}

[Theory]
Expand Down Expand Up @@ -140,8 +145,11 @@ public void IsPrefix(CompareInfo compareInfo, string source, string value, Compa
valueBoundedMemory.MakeReadonly();

Assert.Equal(expected, compareInfo.IsPrefix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options));
Assert.Equal(expected, compareInfo.IsPrefix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options, out int actualMatchLength));
Assert.Equal(expectedMatchLength, actualMatchLength);
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
{
Assert.Equal(expected, compareInfo.IsPrefix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options, out int actualMatchLength));
Assert.Equal(expectedMatchLength, actualMatchLength);
}
}

[Fact]
Expand All @@ -150,7 +158,7 @@ public void IsPrefix_UnassignedUnicode()
bool result = PlatformDetection.IsNlsGlobalization ? true : false;
int expectedMatchLength = (result) ? 6 : 0;
IsPrefix(s_invariantCompare, "FooBar", "Foo\uFFFFBar", CompareOptions.None, result, expectedMatchLength);
IsPrefix(s_invariantCompare, "FooBar", "Foo\uFFFFBar", CompareOptions.IgnoreNonSpace, result, expectedMatchLength);
IsPrefix(s_invariantCompare, "FooBar", "Foo\uFFFFBar", supportedIgnoreNonSpaceOption, result, expectedMatchLength);
}

[Fact]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,8 @@

namespace System.Globalization.Tests
{
public class CompareInfoIsSuffixTests
public class CompareInfoIsSuffixTests : CompareInfoTestsBase
{
private static CompareInfo s_invariantCompare = CultureInfo.InvariantCulture.CompareInfo;
private static CompareInfo s_germanCompare = new CultureInfo("de-DE").CompareInfo;
private static CompareInfo s_hungarianCompare = new CultureInfo("hu-HU").CompareInfo;
private static CompareInfo s_turkishCompare = new CultureInfo("tr-TR").CompareInfo;
private static CompareInfo s_frenchCompare = new CultureInfo("fr-FR").CompareInfo;
private static CompareInfo s_slovakCompare = new CultureInfo("sk-SK").CompareInfo;

public static IEnumerable<object[]> IsSuffix_TestData()
{
// Empty strings
Expand All @@ -32,12 +25,16 @@ public static IEnumerable<object[]> IsSuffix_TestData()
yield return new object[] { s_invariantCompare, "foobardzsdzs", "rddzs", CompareOptions.None, false, 0 };
yield return new object[] { s_invariantCompare, "foobardzsdzs", "rddzs", CompareOptions.Ordinal, false, 0 };
yield return new object[] { s_invariantCompare, "dz", "z", CompareOptions.None, true, 1 };
yield return new object[] { s_hungarianCompare, "dz", "z", CompareOptions.None, false, 0 };
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
yield return new object[] { s_hungarianCompare, "dz", "z", CompareOptions.None, false, 0 };
yield return new object[] { s_hungarianCompare, "dz", "z", CompareOptions.Ordinal, true, 1 };

// Slovak
yield return new object[] { s_slovakCompare, "ch", "h", CompareOptions.None, false, 0 };
yield return new object[] { s_slovakCompare, "velmi chora", "hora", CompareOptions.None, false, 0 };
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
{
yield return new object[] { s_slovakCompare, "ch", "h", CompareOptions.None, false, 0 };
yield return new object[] { s_slovakCompare, "velmi chora", "hora", CompareOptions.None, false, 0 };
}
yield return new object[] { s_slovakCompare, "chh", "H", CompareOptions.IgnoreCase, true, 1 };

// Turkish
Expand All @@ -62,7 +59,7 @@ public static IEnumerable<object[]> IsSuffix_TestData()
yield return new object[] { s_invariantCompare, "Exhibit \u00C0", "a\u0300", CompareOptions.Ordinal, false, 0 };
yield return new object[] { s_invariantCompare, "Exhibit \u00C0", "a\u0300", CompareOptions.OrdinalIgnoreCase, false, 0 };
yield return new object[] { s_invariantCompare, "FooBar", "Foo\u0400Bar", CompareOptions.Ordinal, false, 0 };
yield return new object[] { s_invariantCompare, "FooBA\u0300R", "FooB\u00C0R", CompareOptions.IgnoreNonSpace, true, 7 };
yield return new object[] { s_invariantCompare, "FooBA\u0300R", "FooB\u00C0R", supportedIgnoreNonSpaceOption, true, 7 };
yield return new object[] { s_invariantCompare, "o\u0308", "o", CompareOptions.None, false, 0 };
yield return new object[] { s_invariantCompare, "o\u0308", "o", CompareOptions.Ordinal, false, 0 };
yield return new object[] { s_invariantCompare, "o\u0308o", "o", CompareOptions.None, true, 1 };
Expand All @@ -83,18 +80,22 @@ public static IEnumerable<object[]> IsSuffix_TestData()
yield return new object[] { s_invariantCompare, "\uD800\uD800", "\uD800\uD800", CompareOptions.None, true, 2 };

// Ignore symbols
yield return new object[] { s_invariantCompare, "More Test's", "Tests", CompareOptions.IgnoreSymbols, true, 6 };
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
yield return new object[] { s_invariantCompare, "More Test's", "Tests", CompareOptions.IgnoreSymbols, true, 6 };
yield return new object[] { s_invariantCompare, "More Test's", "Tests", CompareOptions.None, false, 0 };

// NULL character
yield return new object[] { s_invariantCompare, "a\u0000b", "a\u0000b", CompareOptions.None, true, 3 };
yield return new object[] { s_invariantCompare, "a\u0000b", "b\u0000b", CompareOptions.None, false, 0 };

// Platform differences
if (PlatformDetection.IsNlsGlobalization)
if (PlatformDetection.IsNlsGlobalization || PlatformDetection.IsHybridGlobalizationOnBrowser)
{
yield return new object[] { s_hungarianCompare, "foobardzsdzs", "rddzs", CompareOptions.None, true, 7 };
yield return new object[] { s_frenchCompare, "\u0153", "oe", CompareOptions.None, true, 1 };
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
{
yield return new object[] { s_hungarianCompare, "foobardzsdzs", "rddzs", CompareOptions.None, true, 7 };
yield return new object[] { s_frenchCompare, "\u0153", "oe", CompareOptions.None, true, 1 };
}
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uDC00", CompareOptions.None, true, 1 };
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uDC00", CompareOptions.IgnoreCase, true, 1 };
} else
Expand All @@ -106,12 +107,15 @@ public static IEnumerable<object[]> IsSuffix_TestData()
}

// Suffixes where matched length does not equal value string length
yield return new object[] { s_invariantCompare, "xyzdz", "\u01F3", CompareOptions.IgnoreNonSpace, true, 2 };
yield return new object[] { s_invariantCompare, "xyz\u01F3", "dz", CompareOptions.IgnoreNonSpace, true, 1 };
yield return new object[] { s_germanCompare, "xyz Strasse", "stra\u00DFe", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, true, 7 };
yield return new object[] { s_germanCompare, "xyz Strasse", "xtra\u00DFe", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, false, 0 };
yield return new object[] { s_germanCompare, "xyz stra\u00DFe", "Strasse", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, true, 6 };
yield return new object[] { s_germanCompare, "xyz stra\u00DFe", "Xtrasse", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, false, 0 };
yield return new object[] { s_germanCompare, "xyz Strasse", "xtra\u00DFe", supportedIgnoreCaseIgnoreNonSpaceOptions, false, 0 };
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
{
yield return new object[] { s_invariantCompare, "xyzdz", "\u01F3", supportedIgnoreNonSpaceOption, true, 2 };
yield return new object[] { s_invariantCompare, "xyz\u01F3", "dz", supportedIgnoreNonSpaceOption, true, 1 };
yield return new object[] { s_germanCompare, "xyz stra\u00DFe", "Strasse", supportedIgnoreCaseIgnoreNonSpaceOptions, true, 6 };
yield return new object[] { s_germanCompare, "xyz Strasse", "stra\u00DFe", supportedIgnoreCaseIgnoreNonSpaceOptions, true, 7 };
}
yield return new object[] { s_germanCompare, "xyz stra\u00DFe", "Xtrasse", supportedIgnoreCaseIgnoreNonSpaceOptions, false, 0 };
}

[Theory]
Expand Down Expand Up @@ -140,8 +144,11 @@ public void IsSuffix(CompareInfo compareInfo, string source, string value, Compa
valueBoundedMemory.MakeReadonly();

Assert.Equal(expected, compareInfo.IsSuffix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options));
Assert.Equal(expected, compareInfo.IsSuffix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options, out int actualMatchLength));
Assert.Equal(expectedMatchLength, actualMatchLength);
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
{
Assert.Equal(expected, compareInfo.IsSuffix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options, out int actualMatchLength));
Assert.Equal(expectedMatchLength, actualMatchLength);
}
}

[Fact]
Expand All @@ -151,7 +158,7 @@ public void IsSuffix_UnassignedUnicode()
int expectedMatchLength = (result) ? 6 : 0;

IsSuffix(s_invariantCompare, "FooBar", "Foo\uFFFFBar", CompareOptions.None, result, expectedMatchLength);
IsSuffix(s_invariantCompare, "FooBar", "Foo\uFFFFBar", CompareOptions.IgnoreNonSpace, result, expectedMatchLength);
IsSuffix(s_invariantCompare, "FooBar", "Foo\uFFFFBar", supportedIgnoreNonSpaceOption, result, expectedMatchLength);
}

[Fact]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ public class CompareInfoTestsBase
protected static CompareInfo s_turkishCompare = new CultureInfo("tr-TR").CompareInfo;
protected static CompareInfo s_japaneseCompare = new CultureInfo("ja-JP").CompareInfo;
protected static CompareInfo s_slovakCompare = new CultureInfo("sk-SK").CompareInfo;
protected static CompareInfo s_frenchCompare = new CultureInfo("fr-FR").CompareInfo;
protected static CompareOptions supportedIgnoreNonSpaceOption =
PlatformDetection.IsHybridGlobalizationOnBrowser ?
CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreKanaType :
Expand Down
Loading