Skip to content

Commit

Permalink
[browser][non-icu] HybridGlobalization normalization. (#85510)
Browse files Browse the repository at this point in the history
* Normalization.

* Missing measurement.

* Trying to fix trimming error.

* Applied @pavelsavara's idea.

* Code review improvement.

* Feedback: standardize the exceptions.

* Revert changes from #85516 to clean the CI.

* feedback
  • Loading branch information
ilonatommy authored May 24, 2023
1 parent ce689d9 commit 7a11915
Show file tree
Hide file tree
Showing 21 changed files with 403 additions and 147 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ internal static partial class Interop
internal static unsafe partial class JsGlobalization
{
[MethodImplAttribute(MethodImplOptions.InternalCall)]
internal static extern unsafe int CompareString(out string exceptionMessage, in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options);
internal static extern unsafe int CompareString(in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options, out int exceptionalResult, out object result);

[MethodImplAttribute(MethodImplOptions.InternalCall)]
internal static extern unsafe bool StartsWith(out string exceptionMessage, in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options);
internal static extern unsafe bool StartsWith(in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options, out int exceptionalResult, out object result);

[MethodImplAttribute(MethodImplOptions.InternalCall)]
internal static extern unsafe bool EndsWith(out string exceptionMessage, in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options);
internal static extern unsafe bool EndsWith(in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options, out int exceptionalResult, out object result);

[MethodImplAttribute(MethodImplOptions.InternalCall)]
internal static extern unsafe int IndexOf(out string exceptionMessage, in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options, bool fromBeginning);
internal static extern unsafe int IndexOf(in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options, bool fromBeginning, out int exceptionalResult, out object result);
}
}
17 changes: 17 additions & 0 deletions src/libraries/Common/src/Interop/Browser/Interop.Normalization.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Runtime.CompilerServices;
using System.Text;

internal static partial class Interop
{
internal static unsafe partial class JsGlobalization
{
[MethodImplAttribute(MethodImplOptions.InternalCall)]
internal static extern unsafe int IsNormalized(NormalizationForm normalizationForm, in string source, out int exceptionalResult, out object result);

[MethodImplAttribute(MethodImplOptions.InternalCall)]
internal static extern unsafe int NormalizeString(NormalizationForm normalizationForm, in string source, char* dstBuffer, int dstBufferCapacity, out int exceptionalResult, out object result);
}
}
4 changes: 2 additions & 2 deletions src/libraries/Common/src/Interop/Browser/Interop.TextInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ internal static partial class Interop
internal static unsafe partial class JsGlobalization
{
[MethodImplAttribute(MethodImplOptions.InternalCall)]
internal static extern unsafe void ChangeCaseInvariant(out string exceptionMessage, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, bool bToUpper);
internal static extern unsafe void ChangeCaseInvariant(char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, bool bToUpper, out int exceptionalResult, out object result);
[MethodImplAttribute(MethodImplOptions.InternalCall)]
internal static extern unsafe void ChangeCase(out string exceptionMessage, in string culture, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, bool bToUpper);
internal static extern unsafe void ChangeCase(in string culture, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, bool bToUpper, out int exceptionalResult, out object result);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>$(NetCoreAppCurrent)-browser</TargetFrameworks>
<TestRuntime>true</TestRuntime>
<HybridGlobalization>true</HybridGlobalization>
</PropertyGroup>
<ItemGroup>
<Compile Include="..\Normalization\NormalizationAll.cs" />
<Compile Include="..\Normalization\StringNormalizationTests.cs" />
</ItemGroup>

<ItemGroup>
<EmbeddedResource Include="..\Normalization\Data\win8.txt">
<LogicalName>NormalizationDataWin8</LogicalName>
</EmbeddedResource>
<EmbeddedResource Include="..\Normalization\Data\win7.txt">
<LogicalName>NormalizationDataWin7</LogicalName>
</EmbeddedResource>
</ItemGroup>
</Project>
6 changes: 3 additions & 3 deletions src/libraries/System.Globalization/System.Globalization.sln
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Microsoft Visual Studio Solution File, Format Version 12.00
#
#
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Private.CoreLib", "..\..\coreclr\System.Private.CoreLib\System.Private.CoreLib.csproj", "{E269F8BB-F629-4C96-B9B2-03A00D8B1BFB}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TestUtilities.Unicode", "..\Common\tests\TestUtilities.Unicode\TestUtilities.Unicode.csproj", "{79613DED-481D-44EF-BB89-7AC6BD53026B}"
Expand Down Expand Up @@ -34,9 +34,9 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ref", "ref", "{A93AFF96-DB2
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "gen", "gen", "{0378EF1C-9838-4AD0-867D-506FB02F8BBB}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Hybrid.IOS.Tests", "tests\Hybrid\Hybrid.IOS.Tests.csproj", "{16D9996B-A4E1-440B-8D74-C9ED3715158D}"
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "System.Globalization.Hybrid.IOS.Tests", "tests\Hybrid\System.Globalization.Hybrid.IOS.Tests.csproj", "{16D9996B-A4E1-440B-8D74-C9ED3715158D}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Hybrid.WASM.Tests", "tests\Hybrid\Hybrid.WASM.Tests.csproj", "{CAA35471-75A3-41A8-B09D-0CC9822A8E3B}"
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "System.Globalization.Hybrid.WASM.Tests", "tests\Hybrid\System.Globalization.Hybrid.WASM.Tests.csproj", "{CAA35471-75A3-41A8-B09D-0CC9822A8E3B}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -325,14 +325,12 @@ public void ToLower_Netcore(string name, string str, string expected)
[Fact]
public void ToLower_InvalidSurrogates()
{
bool usesTextDecoder = PlatformDetection.IsHybridGlobalizationOnBrowser && PlatformDetection.IsBrowserDomSupportedOrNodeJS;

// Invalid UTF-16 in a string (mismatched surrogate pairs) should be unchanged.
foreach (string cultureName in new string[] { "", "en-US", "fr" })
{
ToLower(cultureName, "\uD83C\uD83C", usesTextDecoder ? "\uFFFD\uFFFD" : "\uD83C\uD83C");
ToLower(cultureName, "BE CAREFUL, \uDF08\uD83C, THIS ONE IS TRICKY", usesTextDecoder ? "be careful, \uFFFD\uFFFD, this one is tricky" : "be careful, \uDF08\uD83C, this one is tricky");
ToLower(cultureName, "BE CAREFUL, \uDF08\uDF08, THIS ONE IS TRICKY", usesTextDecoder ? "be careful, \uFFFD\uFFFD, this one is tricky" : "be careful, \uDF08\uDF08, this one is tricky");
ToLower(cultureName, "BE CAREFUL, \uD83C\uD83C, THIS ONE IS TRICKY", "be careful, \uD83C\uD83C, this one is tricky");
ToLower(cultureName, "BE CAREFUL, \uDF08\uD83C, THIS ONE IS TRICKY", "be careful, \uDF08\uD83C, this one is tricky");
ToLower(cultureName, "BE CAREFUL, \uDF08\uDF08, THIS ONE IS TRICKY", "be careful, \uDF08\uDF08, this one is tricky");
}
}

Expand Down Expand Up @@ -454,14 +452,12 @@ public void ToUpper_netcore(string name, string str, string expected)
[Fact]
public void ToUpper_InvalidSurrogates()
{
bool usesTextDecoder = PlatformDetection.IsHybridGlobalizationOnBrowser && PlatformDetection.IsBrowserDomSupportedOrNodeJS;

// Invalid UTF-16 in a string (mismatched surrogate pairs) should be unchanged.
foreach (string cultureName in new string[] { "", "en-US", "fr"})
{
ToUpper(cultureName, "be careful, \uD83C\uD83C, this one is tricky", usesTextDecoder ? "BE CAREFUL, \uFFFD\uFFFD, THIS ONE IS TRICKY" : "BE CAREFUL, \uD83C\uD83C, THIS ONE IS TRICKY");
ToUpper(cultureName, "be careful, \uDF08\uD83C, this one is tricky", usesTextDecoder ? "BE CAREFUL, \uFFFD\uFFFD, THIS ONE IS TRICKY" : "BE CAREFUL, \uDF08\uD83C, THIS ONE IS TRICKY");
ToUpper(cultureName, "be careful, \uDF08\uDF08, this one is tricky", usesTextDecoder ? "BE CAREFUL, \uFFFD\uFFFD, THIS ONE IS TRICKY" : "BE CAREFUL, \uDF08\uDF08, THIS ONE IS TRICKY");
ToUpper(cultureName, "be careful, \uD83C\uD83C, this one is tricky", "BE CAREFUL, \uD83C\uD83C, THIS ONE IS TRICKY");
ToUpper(cultureName, "be careful, \uDF08\uD83C, this one is tricky", "BE CAREFUL, \uDF08\uD83C, THIS ONE IS TRICKY");
ToUpper(cultureName, "be careful, \uDF08\uDF08, this one is tricky", "BE CAREFUL, \uDF08\uDF08, THIS ONE IS TRICKY");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\Normalization.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\Normalization.Icu.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\Normalization.Nls.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\Normalization.WebAssembly.cs" Condition="'$(TargetsBrowser)' == 'true'" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\NumberFormatInfo.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\NumberStyles.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\Ordinal.cs" />
Expand Down Expand Up @@ -1254,6 +1255,9 @@
<Compile Include="$(MSBuildThisFileDirectory)System\WeakReference.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\WeakReference.T.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\ComAwareWeakReference.cs" />
<Compile Include="$(CommonPath)Interop\Browser\Interop.Normalization.cs" Condition="'$(TargetsBrowser)' == 'true'">
<Link>Common\Interop\Interop.Normalization.cs</Link>
</Compile>
<Compile Include="$(CommonPath)Interop\Browser\Interop.CompareInfo.cs" Condition="'$(TargetsBrowser)' == 'true'">
<Link>Common\Interop\Interop.CompareInfo.cs</Link>
</Compile>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,11 +188,9 @@ private unsafe int IndexOfOrdinalIgnoreCaseHelper(ReadOnlySpan<char> source, Rea
#if TARGET_BROWSER
if (GlobalizationMode.Hybrid)
{
int result = Interop.JsGlobalization.IndexOf(out string exceptionMessage, m_name, b, target.Length, a, source.Length, options, fromBeginning);
if (!string.IsNullOrEmpty(exceptionMessage))
{
throw new Exception(exceptionMessage);
}
int result = Interop.JsGlobalization.IndexOf(m_name, b, target.Length, a, source.Length, options, fromBeginning, out int exception, out object ex_result);
if (exception != 0)
throw new Exception((string)ex_result);
return result;
}
#endif
Expand Down Expand Up @@ -288,7 +286,12 @@ private unsafe int IndexOfOrdinalHelper(ReadOnlySpan<char> source, ReadOnlySpan<
InteropCall:
#if TARGET_BROWSER
if (GlobalizationMode.Hybrid)
return Interop.JsGlobalization.IndexOf(out string exceptionMessage, m_name, b, target.Length, a, source.Length, options, fromBeginning);
{
int result = Interop.JsGlobalization.IndexOf(m_name, b, target.Length, a, source.Length, options, fromBeginning, out int exception, out object ex_result);
if (exception != 0)
throw new Exception((string)ex_result);
return result;
}
#endif
if (fromBeginning)
return Interop.Globalization.IndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,9 @@ private unsafe int JsCompareString(ReadOnlySpan<char> string1, ReadOnlySpan<char
fixed (char* pString1 = &MemoryMarshal.GetReference(string1))
fixed (char* pString2 = &MemoryMarshal.GetReference(string2))
{
cmpResult = Interop.JsGlobalization.CompareString(out string exceptionMessage, cultureName, pString1, string1.Length, pString2, string2.Length, options);

if (!string.IsNullOrEmpty(exceptionMessage))
throw new Exception(exceptionMessage);
cmpResult = Interop.JsGlobalization.CompareString(cultureName, pString1, string1.Length, pString2, string2.Length, options, out int exception, out object ex_result);
if (exception != 0)
throw new Exception((string)ex_result);
}

return cmpResult;
Expand All @@ -69,10 +68,9 @@ private unsafe bool JsStartsWith(ReadOnlySpan<char> source, ReadOnlySpan<char> p
fixed (char* pSource = &MemoryMarshal.GetReference(source))
fixed (char* pPrefix = &MemoryMarshal.GetReference(prefix))
{
result = Interop.JsGlobalization.StartsWith(out string exceptionMessage, cultureName, pSource, source.Length, pPrefix, prefix.Length, options);

if (!string.IsNullOrEmpty(exceptionMessage))
throw new Exception(exceptionMessage);
result = Interop.JsGlobalization.StartsWith(cultureName, pSource, source.Length, pPrefix, prefix.Length, options, out int exception, out object ex_result);
if (exception != 0)
throw new Exception((string)ex_result);
}


Expand All @@ -90,10 +88,9 @@ private unsafe bool JsEndsWith(ReadOnlySpan<char> source, ReadOnlySpan<char> pre
fixed (char* pSource = &MemoryMarshal.GetReference(source))
fixed (char* pPrefix = &MemoryMarshal.GetReference(prefix))
{
result = Interop.JsGlobalization.EndsWith(out string exceptionMessage, cultureName, pSource, source.Length, pPrefix, prefix.Length, options);

if (!string.IsNullOrEmpty(exceptionMessage))
throw new Exception(exceptionMessage);
result = Interop.JsGlobalization.EndsWith(cultureName, pSource, source.Length, pPrefix, prefix.Length, options, out int exception, out object ex_result);
if (exception != 0)
throw new Exception((string)ex_result);
}

return result;
Expand All @@ -118,10 +115,9 @@ private unsafe int JsIndexOfCore(ReadOnlySpan<char> source, ReadOnlySpan<char> t
fixed (char* pSource = &MemoryMarshal.GetReference(source))
fixed (char* pTarget = &MemoryMarshal.GetReference(target))
{
idx = Interop.JsGlobalization.IndexOf(out string exceptionMessage, m_name, pTarget, target.Length, pSource, source.Length, options, fromBeginning);

if (!string.IsNullOrEmpty(exceptionMessage))
throw new Exception(exceptionMessage);
idx = Interop.JsGlobalization.IndexOf(m_name, pTarget, target.Length, pSource, source.Length, options, fromBeginning, out int exception, out object ex_result);
if (exception != 0)
throw new Exception((string)ex_result);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Diagnostics;
using System.Runtime.InteropServices;
using System.Text;

namespace System.Globalization
{
internal static partial class Normalization
{
private static unsafe bool JsIsNormalized(string strInput, NormalizationForm normalizationForm)
{
Debug.Assert(!GlobalizationMode.Invariant);
Debug.Assert(!GlobalizationMode.UseNls);

ValidateArguments(strInput, normalizationForm);

int ret = Interop.JsGlobalization.IsNormalized(normalizationForm, strInput, out int exception, out object ex_result);
if (exception != 0)
throw new Exception((string)ex_result);

return ret == 1;
}

private static unsafe string JsNormalize(string strInput, NormalizationForm normalizationForm)
{
Debug.Assert(!GlobalizationMode.Invariant);
Debug.Assert(!GlobalizationMode.UseNls);

ValidateArguments(strInput, normalizationForm);

char[]? toReturn = null;
try
{
const int StackallocThreshold = 512;

Span<char> buffer = strInput.Length <= StackallocThreshold
? stackalloc char[StackallocThreshold]
: (toReturn = ArrayPool<char>.Shared.Rent(strInput.Length));

for (int attempt = 0; attempt < 2; attempt++)
{
int realLen;
fixed (char* pDest = &MemoryMarshal.GetReference(buffer))
{
realLen = Interop.JsGlobalization.NormalizeString(normalizationForm, strInput, pDest, buffer.Length, out int exception, out object ex_result);
if (exception != 0)
throw new Exception((string)ex_result);
}

if (realLen <= buffer.Length)
{
ReadOnlySpan<char> result = buffer.Slice(0, realLen);
return result.SequenceEqual(strInput)
? strInput
: new string(result);
}

Debug.Assert(realLen > StackallocThreshold);

if (attempt == 0)
{
if (toReturn != null)
{
// Clear toReturn first to ensure we don't return the same buffer twice
char[] temp = toReturn;
toReturn = null;
ArrayPool<char>.Shared.Return(temp);
}

buffer = toReturn = ArrayPool<char>.Shared.Rent(realLen);
}
}

throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput));
}
finally
{
if (toReturn != null)
{
ArrayPool<char>.Shared.Return(toReturn);
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ internal static bool IsNormalized(string strInput, NormalizationForm normalizati

return GlobalizationMode.UseNls ?
NlsIsNormalized(strInput, normalizationForm) :
#if TARGET_BROWSER
GlobalizationMode.Hybrid ?
JsIsNormalized(strInput, normalizationForm) :
#endif
IcuIsNormalized(strInput, normalizationForm);
}

Expand All @@ -33,6 +37,10 @@ internal static string Normalize(string strInput, NormalizationForm normalizatio

return GlobalizationMode.UseNls ?
NlsNormalize(strInput, normalizationForm) :
#if TARGET_BROWSER
GlobalizationMode.Hybrid ?
JsNormalize(strInput, normalizationForm) :
#endif
IcuNormalize(strInput, normalizationForm);
}
}
Expand Down
Loading

0 comments on commit 7a11915

Please sign in to comment.