Skip to content

Commit

Permalink
[browser][non-icu] HybridGlobalization checking for prefix/suffix (#…
Browse files Browse the repository at this point in the history
…85093)

* Implementation.

* HG does not belong to legacy code.

* No need to create new instance when existing one is exported.

* TextEncoder's behavior varies between hosts.

* Nit

* Cutting prevents us from using IgnoreSymbols.

* Fixed asserts.

* Fix.

* Match platform with behavior.

* Missing changes to prev commit.
  • Loading branch information
ilonatommy authored Apr 22, 2023
1 parent 7c61b98 commit fa5d12e
Show file tree
Hide file tree
Showing 14 changed files with 357 additions and 81 deletions.
17 changes: 17 additions & 0 deletions docs/design/features/hybrid-globalization.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,20 @@ hiraganaBig.localeCompare(katakanaSmall, "en-US", { sensitivity: "base" }) // 0;
`IgnoreKanaType | IgnoreWidth | IgnoreSymbols | IgnoreNonSpace`

`IgnoreKanaType | IgnoreWidth | IgnoreSymbols | IgnoreNonSpace | IgnoreCase`


**String starts with / ends with**

Affected public APIs:
- CompareInfo.IsPrefix
- CompareInfo.IsSuffix
- String.StartsWith
- String.EndsWith

Web API does not expose locale-sensitive endsWith/startsWith function. As a workaround, both strings get normalized and weightless characters are removed. Resulting strings are cut to the same length and comparison is performed. This approach, beyond having the same compare option limitations as described under **String comparison**, has additional limitations connected with the workaround used. Because we are normalizing strings to be able to cut them, we cannot calculate the match length on the original strings. Methods that calculate this information throw PlatformNotSupported exception:

- [CompareInfo.IsPrefix](https://learn.microsoft.com/en-us/dotnet/api/system.globalization.compareinfo.isprefix?view=net-8.0#system-globalization-compareinfo-isprefix(system-readonlyspan((system-char))-system-readonlyspan((system-char))-system-globalization-compareoptions-system-int32@))
- [CompareInfo.IsSuffix](https://learn.microsoft.com/en-us/dotnet/api/system.globalization.compareinfo.issuffix?view=net-8.0#system-globalization-compareinfo-issuffix(system-readonlyspan((system-char))-system-readonlyspan((system-char))-system-globalization-compareoptions-system-int32@))

- `IgnoreSymbols`
Only comparisons that do not skip character types are allowed. E.g. `IgnoreSymbols` skips symbol-chars in comparison/indexing. All `CompareOptions` combinations that include `IgnoreSymbols` throw `PlatformNotSupportedException`.
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,11 @@ internal static unsafe partial class JsGlobalization
{
[MethodImplAttribute(MethodImplOptions.InternalCall)]
internal static extern unsafe int CompareString(out string exceptionMessage, in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options);

[MethodImplAttribute(MethodImplOptions.InternalCall)]
internal static extern unsafe bool StartsWith(out string exceptionMessage, in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options);

[MethodImplAttribute(MethodImplOptions.InternalCall)]
internal static extern unsafe bool EndsWith(out string exceptionMessage, in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,8 @@

namespace System.Globalization.Tests
{
public class CompareInfoIsPrefixTests
public class CompareInfoIsPrefixTests : CompareInfoTestsBase
{
private static CompareInfo s_invariantCompare = CultureInfo.InvariantCulture.CompareInfo;
private static CompareInfo s_germanCompare = new CultureInfo("de-DE").CompareInfo;
private static CompareInfo s_hungarianCompare = new CultureInfo("hu-HU").CompareInfo;
private static CompareInfo s_turkishCompare = new CultureInfo("tr-TR").CompareInfo;
private static CompareInfo s_frenchCompare = new CultureInfo("fr-FR").CompareInfo;

public static IEnumerable<object[]> IsPrefix_TestData()
{
// Empty strings
Expand All @@ -31,7 +25,8 @@ public static IEnumerable<object[]> IsPrefix_TestData()
yield return new object[] { s_invariantCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.Ordinal, false, 0 };
yield return new object[] { s_hungarianCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.Ordinal, false, 0 };
yield return new object[] { s_invariantCompare, "dz", "d", CompareOptions.None, true, 1 };
yield return new object[] { s_hungarianCompare, "dz", "d", CompareOptions.None, false, 0 };
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
yield return new object[] { s_hungarianCompare, "dz", "d", CompareOptions.None, false, 0 };
yield return new object[] { s_hungarianCompare, "dz", "d", CompareOptions.Ordinal, true, 1 };

// Turkish
Expand All @@ -56,7 +51,7 @@ public static IEnumerable<object[]> IsPrefix_TestData()
yield return new object[] { s_invariantCompare, "\u00C0nimal", "a\u0300", CompareOptions.Ordinal, false, 0 };
yield return new object[] { s_invariantCompare, "\u00C0nimal", "a\u0300", CompareOptions.OrdinalIgnoreCase, false, 0 };
yield return new object[] { s_invariantCompare, "FooBar", "Foo\u0400Bar", CompareOptions.Ordinal, false, 0 };
yield return new object[] { s_invariantCompare, "FooBA\u0300R", "FooB\u00C0R", CompareOptions.IgnoreNonSpace, true, 7 };
yield return new object[] { s_invariantCompare, "FooBA\u0300R", "FooB\u00C0R", supportedIgnoreNonSpaceOption, true, 7 };
yield return new object[] { s_invariantCompare, "o\u0308", "o", CompareOptions.None, false, 0 };
yield return new object[] { s_invariantCompare, "o\u0308", "o", CompareOptions.Ordinal, true, 1 };
yield return new object[] { s_invariantCompare, "o\u0000\u0308", "o", CompareOptions.None, true, 1 };
Expand All @@ -76,42 +71,57 @@ public static IEnumerable<object[]> IsPrefix_TestData()
yield return new object[] { s_invariantCompare, "\uD800\uD800", "\uD800\uD800", CompareOptions.None, true, 2 };

// Ignore symbols
yield return new object[] { s_invariantCompare, "Test's can be interesting", "Tests", CompareOptions.IgnoreSymbols, true, 6 };
yield return new object[] { s_invariantCompare, "Test's can be interesting", "Tests", CompareOptions.None, false, 0 };
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
{
yield return new object[] { s_invariantCompare, "Test's can be interesting", "Tests", CompareOptions.IgnoreSymbols, true, 6 };
yield return new object[] { s_invariantCompare, "Test's can be interesting", "Tests", CompareOptions.None, false, 0 };
}

// Platform differences
bool useNls = PlatformDetection.IsNlsGlobalization;
if (useNls)
// in HybridGlobalization on Browser we use TextEncoder that is not supported for v8 and the manual decoding works like NLS
bool behavesLikeNls = PlatformDetection.IsNlsGlobalization ||
(PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsBrowserDomSupportedOrNodeJS);
if (behavesLikeNls)
{
yield return new object[] { s_hungarianCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.None, true, 7 };
yield return new object[] { s_invariantCompare, "''Tests", "Tests", CompareOptions.IgnoreSymbols, true, 7 };
yield return new object[] { s_frenchCompare, "\u0153", "oe", CompareOptions.None, true, 1 };
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
{
yield return new object[] { s_hungarianCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.None, true, 7 };
yield return new object[] { s_invariantCompare, "''Tests", "Tests", CompareOptions.IgnoreSymbols, true, 7 };
yield return new object[] { s_frenchCompare, "\u0153", "oe", CompareOptions.None, true, 1 };
}
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800", CompareOptions.None, true, 1 };
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800", CompareOptions.IgnoreCase, true, 1 };
}
else
{
yield return new object[] { s_hungarianCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.None, false, 0 };
yield return new object[] { s_invariantCompare, "''Tests", "Tests", CompareOptions.IgnoreSymbols, false, 0 };
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
yield return new object[] { s_invariantCompare, "''Tests", "Tests", CompareOptions.IgnoreSymbols, false, 0 };
yield return new object[] { s_frenchCompare, "\u0153", "oe", CompareOptions.None, false, 0 };
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800", CompareOptions.None, false, 0 };
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800", CompareOptions.IgnoreCase, false, 0 };
}

// ICU bugs
// UInt16 overflow: https://unicode-org.atlassian.net/browse/ICU-20832 fixed in https://github.com/unicode-org/icu/pull/840 (ICU 65)
if (useNls || PlatformDetection.ICUVersion.Major >= 65)
if (PlatformDetection.IsNlsGlobalization || PlatformDetection.ICUVersion.Major >= 65)
{
yield return new object[] { s_frenchCompare, "b", new string('a', UInt16.MaxValue + 1), CompareOptions.None, false, 0 };
}

// Prefixes where matched length does not equal value string length
yield return new object[] { s_invariantCompare, "dzxyz", "\u01F3", CompareOptions.IgnoreNonSpace, true, 2 };
yield return new object[] { s_invariantCompare, "\u01F3xyz", "dz", CompareOptions.IgnoreNonSpace, true, 1 };
yield return new object[] { s_germanCompare, "Strasse xyz", "stra\u00DFe", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, true, 7 };
yield return new object[] { s_germanCompare, "Strasse xyz", "xtra\u00DFe", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, false, 0 };
yield return new object[] { s_germanCompare, "stra\u00DFe xyz", "Strasse", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, true, 6 };
yield return new object[] { s_germanCompare, "stra\u00DFe xyz", "Xtrasse", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, false, 0 };
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
{
yield return new object[] { s_invariantCompare, "dzxyz", "\u01F3", supportedIgnoreNonSpaceOption, true, 2 };
yield return new object[] { s_invariantCompare, "\u01F3xyz", "dz", supportedIgnoreNonSpaceOption, true, 1 };
}
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
{
yield return new object[] { s_germanCompare, "Strasse xyz", "stra\u00DFe", supportedIgnoreCaseIgnoreNonSpaceOptions, true, 7 };
yield return new object[] { s_germanCompare, "stra\u00DFe xyz", "Strasse", supportedIgnoreCaseIgnoreNonSpaceOptions, true, 6 };
}
yield return new object[] { s_germanCompare, "Strasse xyz", "xtra\u00DFe", supportedIgnoreCaseIgnoreNonSpaceOptions, false, 0 };
yield return new object[] { s_germanCompare, "stra\u00DFe xyz", "Xtrasse", supportedIgnoreCaseIgnoreNonSpaceOptions, false, 0 };
}

[Theory]
Expand Down Expand Up @@ -140,8 +150,11 @@ public void IsPrefix(CompareInfo compareInfo, string source, string value, Compa
valueBoundedMemory.MakeReadonly();

Assert.Equal(expected, compareInfo.IsPrefix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options));
Assert.Equal(expected, compareInfo.IsPrefix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options, out int actualMatchLength));
Assert.Equal(expectedMatchLength, actualMatchLength);
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
{
Assert.Equal(expected, compareInfo.IsPrefix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options, out int actualMatchLength));
Assert.Equal(expectedMatchLength, actualMatchLength);
}
}

[Fact]
Expand All @@ -150,7 +163,7 @@ public void IsPrefix_UnassignedUnicode()
bool result = PlatformDetection.IsNlsGlobalization ? true : false;
int expectedMatchLength = (result) ? 6 : 0;
IsPrefix(s_invariantCompare, "FooBar", "Foo\uFFFFBar", CompareOptions.None, result, expectedMatchLength);
IsPrefix(s_invariantCompare, "FooBar", "Foo\uFFFFBar", CompareOptions.IgnoreNonSpace, result, expectedMatchLength);
IsPrefix(s_invariantCompare, "FooBar", "Foo\uFFFFBar", supportedIgnoreNonSpaceOption, result, expectedMatchLength);
}

[Fact]
Expand Down
Loading

0 comments on commit fa5d12e

Please sign in to comment.