mirror of
https://github.com/VSadov/Satori.git
synced 2025-06-09 09:34:49 +09:00
[OSX] HybridGlobalization Implement casing functions (#87919)
Implement GlobalizationNative_ChangeCaseNative , GlobalizationNative_ChangeCaseInvariantNative for OSX
This commit is contained in:
parent
c88b3776d0
commit
67b1ede8cb
15 changed files with 240 additions and 26 deletions
|
@ -409,3 +409,33 @@ Affected public APIs:
|
|||
- CompareInfo.GetHashCode
|
||||
|
||||
Apple Native API does not have an equivalent, so they throw `PlatformNotSupportedException`.
|
||||
|
||||
|
||||
## Case change
|
||||
|
||||
Affected public APIs:
|
||||
- TextInfo.ToLower,
|
||||
- TextInfo.ToUpper
|
||||
|
||||
Below function are used from apple native functions:
|
||||
- [uppercaseString](https://developer.apple.com/documentation/foundation/nsstring/1409855-uppercasestring)
|
||||
- [lowercaseString](https://developer.apple.com/documentation/foundation/nsstring/1408467-lowercasestring)
|
||||
- [uppercaseStringWithLocale](https://developer.apple.com/documentation/foundation/nsstring/1413316-uppercasestringwithlocale?language=objc)
|
||||
- [lowercaseStringWithLocale](https://developer.apple.com/documentation/foundation/nsstring/1417298-lowercasestringwithlocale?language=objc)
|
||||
|
||||
Behavioural changes compared to ICU
|
||||
|
||||
- Final sigma behavior correction:
|
||||
|
||||
ICU-based case change does not respect final-sigma rule, but hybrid does, so "ΒΌΛΟΣ" -> "βόλος", not "βόλοσ".
|
||||
|
||||
- Below cases will throw exception because of insufficiently sized destination buffer
|
||||
|
||||
- Capitalizing the German letter ß (sharp S) gives SS when using Apple native functions.
|
||||
|
||||
- Capitalizing ligatures gives different result on Apple platforms, eg. "\uFB00" (ff) uppercase (FF)
|
||||
|
||||
- Capitalizing "\u0149" (ʼn) on Apple platforms returns combination of "\u02BC" (ʼ) and N -> (ʼN)
|
||||
|
||||
|
||||
|
||||
|
|
16
src/libraries/Common/src/Interop/Interop.Casing.OSX.cs
Normal file
16
src/libraries/Common/src/Interop/Interop.Casing.OSX.cs
Normal file
|
@ -0,0 +1,16 @@
|
|||
// Licensed to the .NET Foundation under one or more agreements.
|
||||
// The .NET Foundation licenses this file to you under the MIT license.
|
||||
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
internal static partial class Interop
|
||||
{
|
||||
internal static partial class Globalization
|
||||
{
|
||||
[LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ChangeCaseNative", StringMarshalling = StringMarshalling.Utf16)]
|
||||
internal static unsafe partial int ChangeCaseNative(string localeName, int lNameLen, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, [MarshalAs(UnmanagedType.Bool)] bool bToUpper);
|
||||
|
||||
[LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ChangeCaseInvariantNative", StringMarshalling = StringMarshalling.Utf8)]
|
||||
internal static unsafe partial int ChangeCaseInvariantNative(char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, [MarshalAs(UnmanagedType.Bool)] bool bToUpper);
|
||||
}
|
||||
}
|
|
@ -11,7 +11,8 @@ internal static partial class Interop
|
|||
Success = 0,
|
||||
UnknownError = 1,
|
||||
InsufficientBuffer = 2,
|
||||
OutOfMemory = 3
|
||||
OutOfMemory = 3,
|
||||
InvalidCodePoint = 4,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -38,5 +38,6 @@
|
|||
<Compile Include="..\CompareInfo\CompareInfoTests.LastIndexOf.cs" />
|
||||
<Compile Include="..\CompareInfo\CompareInfoTests.IsPrefix.cs" />
|
||||
<Compile Include="..\CompareInfo\CompareInfoTests.IsSuffix.cs" />
|
||||
<Compile Include="..\System\Globalization\TextInfoTests.cs" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
|
|
|
@ -274,9 +274,9 @@ namespace System.Globalization.Tests
|
|||
// we also don't preform.
|
||||
// Greek Capital Letter Sigma (does not case to U+03C2 with "final sigma" rule).
|
||||
yield return new object[] { cultureName, "\u03A3", "\u03C3" };
|
||||
if (PlatformDetection.IsHybridGlobalizationOnBrowser)
|
||||
if (PlatformDetection.IsHybridGlobalizationOnBrowser || PlatformDetection.IsHybridGlobalizationOnOSX)
|
||||
{
|
||||
// JS is using "final sigma" rule correctly - it's costly to unify it with ICU's behavior
|
||||
// JS and Apple platforms are using "final sigma" rule correctly - it's costly to unify it with ICU's behavior
|
||||
yield return new object[] { cultureName, "O\u03A3", "o\u03C2" };
|
||||
}
|
||||
else
|
||||
|
@ -396,24 +396,30 @@ namespace System.Globalization.Tests
|
|||
// RAINBOW (outside the BMP and does not case)
|
||||
yield return new object[] { cultureName, "\U0001F308", "\U0001F308" };
|
||||
|
||||
if (!PlatformDetection.IsHybridGlobalizationOnOSX)
|
||||
{
|
||||
// Unicode defines some codepoints which expand into multiple codepoints
|
||||
// when cased (see SpecialCasing.txt from UNIDATA for some examples). We have never done
|
||||
// these sorts of expansions, since it would cause string lengths to change when cased,
|
||||
// which is non-intuitive. In addition, there are some context sensitive mappings which
|
||||
// we also don't preform.
|
||||
// es-zed does not case to SS when uppercased.
|
||||
// on OSX, capitalizing the German letter ß (sharp S) gives SS
|
||||
yield return new object[] { cultureName, "\u00DF", "\u00DF" };
|
||||
yield return new object[] { cultureName, "stra\u00DFe", "STRA\u00DFE" };
|
||||
if (!PlatformDetection.IsNlsGlobalization)
|
||||
yield return new object[] { cultureName, "st\uD801\uDC37ra\u00DFe", "ST\uD801\uDC0FRA\u00DFE" };
|
||||
|
||||
// Ligatures do not expand when cased.
|
||||
// on OSX, this is uppercase to "FF"
|
||||
yield return new object[] { cultureName, "\uFB00", "\uFB00" };
|
||||
|
||||
// Precomposed character with no uppercase variant, we don't want to "decompose" this
|
||||
// as part of casing.
|
||||
// on OSX, this is uppercased to "ʼN"
|
||||
yield return new object[] { cultureName, "\u0149", "\u0149" };
|
||||
}
|
||||
}
|
||||
|
||||
// Turkish i
|
||||
foreach (string cultureName in GetTestLocales())
|
||||
|
|
|
@ -389,6 +389,7 @@
|
|||
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.cs" />
|
||||
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.Icu.cs" />
|
||||
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.Nls.cs" />
|
||||
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.OSX.cs" Condition="'$(IsOSXLike)' == 'true'" />
|
||||
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.WebAssembly.cs" Condition="'$(TargetsBrowser)' == 'true'" />
|
||||
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\ThaiBuddhistCalendar.cs" />
|
||||
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TimeSpanFormat.cs" />
|
||||
|
@ -1273,6 +1274,9 @@
|
|||
<Compile Include="$(CommonPath)Interop\Interop.Casing.cs">
|
||||
<Link>Common\Interop\Interop.Casing.cs</Link>
|
||||
</Compile>
|
||||
<Compile Include="$(CommonPath)Interop\Interop.Casing.OSX.cs" Condition="'$(IsOSXLike)' == 'true'">
|
||||
<Link>Common\Interop\Interop.Casing.OSX.cs</Link>
|
||||
</Compile>
|
||||
<Compile Include="$(CommonPath)Interop\Interop.Collation.cs">
|
||||
<Link>Common\Interop\Interop.Collation.cs</Link>
|
||||
</Compile>
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
// Licensed to the .NET Foundation under one or more agreements.
|
||||
// The .NET Foundation licenses this file to you under the MIT license.
|
||||
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace System.Globalization
|
||||
{
|
||||
public partial class TextInfo
|
||||
{
|
||||
internal unsafe void ChangeCaseNative(char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, bool toUpper)
|
||||
{
|
||||
Debug.Assert(!GlobalizationMode.Invariant);
|
||||
Debug.Assert(!GlobalizationMode.UseNls);
|
||||
Debug.Assert(GlobalizationMode.Hybrid);
|
||||
int result;
|
||||
|
||||
if (HasEmptyCultureName)
|
||||
result = Interop.Globalization.ChangeCaseInvariantNative(src, srcLen, dstBuffer, dstBufferCapacity, toUpper);
|
||||
else
|
||||
result = Interop.Globalization.ChangeCaseNative(_cultureName, _cultureName.Length, src, srcLen, dstBuffer, dstBufferCapacity, toUpper);
|
||||
|
||||
if (result != (int)Interop.Globalization.ResultCode.Success)
|
||||
throw new Exception(result == (int)Interop.Globalization.ResultCode.InvalidCodePoint ? "Invalid code point while case changing" :
|
||||
result == (int)Interop.Globalization.ResultCode.InsufficientBuffer ? "Insufficiently sized destination buffer" : "Exception occurred while case changing");
|
||||
}
|
||||
}
|
||||
}
|
|
@ -692,6 +692,12 @@ namespace System.Globalization
|
|||
JsChangeCase(src, srcLen, dstBuffer, dstBufferCapacity, bToUpper);
|
||||
return;
|
||||
}
|
||||
#elif TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
|
||||
if (GlobalizationMode.Hybrid)
|
||||
{
|
||||
ChangeCaseNative(src, srcLen, dstBuffer, dstBufferCapacity, bToUpper);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
IcuChangeCase(src, srcLen, dstBuffer, dstBufferCapacity, bToUpper);
|
||||
}
|
||||
|
|
|
@ -71,7 +71,8 @@ if(HAVE_SYS_ICU)
|
|||
set(icu_shim_sources_base
|
||||
${icu_shim_sources_base}
|
||||
pal_locale.m
|
||||
pal_collation.m)
|
||||
pal_collation.m
|
||||
pal_casing.m)
|
||||
endif()
|
||||
|
||||
addprefix(icu_shim_sources "${ICU_SHIM_PATH}" "${icu_shim_sources_base}")
|
||||
|
|
|
@ -93,7 +93,7 @@ else()
|
|||
endif()
|
||||
|
||||
if (CLR_CMAKE_TARGET_APPLE)
|
||||
set(NATIVEGLOBALIZATION_SOURCES ${NATIVEGLOBALIZATION_SOURCES} pal_locale.m pal_collation.m)
|
||||
set(NATIVEGLOBALIZATION_SOURCES ${NATIVEGLOBALIZATION_SOURCES} pal_locale.m pal_collation.m pal_casing.m)
|
||||
endif()
|
||||
|
||||
# time zone names are filtered out of icu data for the browser and associated functionality is disabled
|
||||
|
|
|
@ -59,16 +59,18 @@ static const Entry s_globalizationNative[] =
|
|||
DllImportEntry(GlobalizationNative_ToUnicode)
|
||||
DllImportEntry(GlobalizationNative_WindowsIdToIanaId)
|
||||
#ifdef __APPLE__
|
||||
DllImportEntry(GlobalizationNative_ChangeCaseInvariantNative)
|
||||
DllImportEntry(GlobalizationNative_ChangeCaseNative)
|
||||
DllImportEntry(GlobalizationNative_CompareStringNative)
|
||||
DllImportEntry(GlobalizationNative_GetLocaleNameNative)
|
||||
DllImportEntry(GlobalizationNative_GetLocaleInfoStringNative)
|
||||
DllImportEntry(GlobalizationNative_EndsWithNative)
|
||||
DllImportEntry(GlobalizationNative_GetLocaleInfoIntNative)
|
||||
DllImportEntry(GlobalizationNative_GetLocaleInfoPrimaryGroupingSizeNative)
|
||||
DllImportEntry(GlobalizationNative_GetLocaleInfoSecondaryGroupingSizeNative)
|
||||
DllImportEntry(GlobalizationNative_GetLocaleInfoStringNative)
|
||||
DllImportEntry(GlobalizationNative_GetLocaleNameNative)
|
||||
DllImportEntry(GlobalizationNative_GetLocaleTimeFormatNative)
|
||||
DllImportEntry(GlobalizationNative_IndexOfNative)
|
||||
DllImportEntry(GlobalizationNative_StartsWithNative)
|
||||
DllImportEntry(GlobalizationNative_EndsWithNative)
|
||||
#endif
|
||||
};
|
||||
|
||||
|
|
|
@ -23,3 +23,19 @@ PALEXPORT void GlobalizationNative_ChangeCaseTurkish(const UChar* lpSrc,
|
|||
int32_t bToUpper);
|
||||
|
||||
PALEXPORT void GlobalizationNative_InitOrdinalCasingPage(int32_t pageNumber, UChar* pTarget);
|
||||
|
||||
#ifdef __APPLE__
|
||||
PALEXPORT int32_t GlobalizationNative_ChangeCaseNative(const uint16_t* localeName,
|
||||
int32_t lNameLength,
|
||||
const uint16_t* lpSrc,
|
||||
int32_t cwSrcLength,
|
||||
uint16_t* lpDst,
|
||||
int32_t cwDstLength,
|
||||
int32_t bToUpper);
|
||||
|
||||
PALEXPORT int32_t GlobalizationNative_ChangeCaseInvariantNative(const uint16_t* lpSrc,
|
||||
int32_t cwSrcLength,
|
||||
uint16_t* lpDst,
|
||||
int32_t cwDstLength,
|
||||
int32_t bToUpper);
|
||||
#endif
|
||||
|
|
103
src/native/libs/System.Globalization.Native/pal_casing.m
Normal file
103
src/native/libs/System.Globalization.Native/pal_casing.m
Normal file
|
@ -0,0 +1,103 @@
|
|||
// Licensed to the .NET Foundation under one or more agreements.
|
||||
// The .NET Foundation licenses this file to you under the MIT license.
|
||||
|
||||
#include "pal_icushim_internal.h"
|
||||
#include "pal_casing.h"
|
||||
#include "pal_errors.h"
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
#if defined(TARGET_OSX) || defined(TARGET_MACCATALYST) || defined(TARGET_IOS) || defined(TARGET_TVOS)
|
||||
|
||||
|
||||
/**
|
||||
* Append a code point to a string, overwriting 1 or 2 code units.
|
||||
* The offset points to the current end of the string contents
|
||||
* and is advanced (post-increment).
|
||||
* "Safe" macro, checks for a valid code point.
|
||||
* Converts code points outside of Basic Multilingual Plane into
|
||||
* corresponding surrogate pairs if sufficient space in the string.
|
||||
* High surrogate range: 0xD800 - 0xDBFF
|
||||
* Low surrogate range: 0xDC00 - 0xDFFF
|
||||
* If the code point is not valid or a trail surrogate does not fit,
|
||||
* then isError is set to true.
|
||||
*
|
||||
* @param buffer const uint16_t * string buffer
|
||||
* @param offset string offset, must be offset<capacity
|
||||
* @param capacity size of the string buffer
|
||||
* @param codePoint code point to append
|
||||
* @param isError output bool set to true if an error occurs, otherwise not modified
|
||||
*/
|
||||
#define Append(buffer, offset, capacity, codePoint, isError) { \
|
||||
if ((offset) >= (capacity)) /* insufficiently sized destination buffer */ { \
|
||||
(isError) = InsufficientBuffer; \
|
||||
} else if ((uint32_t)(codePoint) > 0x10ffff) /* invalid code point */ { \
|
||||
(isError) = InvalidCodePoint; \
|
||||
} else if ((uint32_t)(codePoint) <= 0xffff) { \
|
||||
(buffer)[(offset)++] = (uint16_t)(codePoint); \
|
||||
} else { \
|
||||
(buffer)[(offset)++] = (uint16_t)(((codePoint) >> 10) + 0xd7c0); \
|
||||
(buffer)[(offset)++] = (uint16_t)(((codePoint)&0x3ff) | 0xdc00); \
|
||||
} \
|
||||
}
|
||||
|
||||
/*
|
||||
Function:
|
||||
ChangeCaseNative
|
||||
|
||||
Performs upper or lower casing of a string into a new buffer, taking into account the specified locale.
|
||||
Returns 0 for success, non-zero on failure see ErrorCodes.
|
||||
*/
|
||||
int32_t GlobalizationNative_ChangeCaseNative(const uint16_t* localeName, int32_t lNameLength,
|
||||
const uint16_t* lpSrc, int32_t cwSrcLength, uint16_t* lpDst, int32_t cwDstLength, int32_t bToUpper)
|
||||
{
|
||||
NSLocale *currentLocale;
|
||||
if(localeName == NULL || lNameLength == 0)
|
||||
{
|
||||
currentLocale = [NSLocale systemLocale];
|
||||
}
|
||||
else
|
||||
{
|
||||
NSString *locName = [NSString stringWithCharacters: localeName length: lNameLength];
|
||||
currentLocale = [NSLocale localeWithLocaleIdentifier:locName];
|
||||
}
|
||||
NSString *source = [NSString stringWithCharacters: lpSrc length: cwSrcLength];
|
||||
NSString *result = bToUpper ? [source uppercaseStringWithLocale:currentLocale] : [source lowercaseStringWithLocale:currentLocale];
|
||||
|
||||
int32_t srcIdx = 0, dstIdx = 0, isError = 0;
|
||||
uint16_t dstCodepoint;
|
||||
while (srcIdx < result.length)
|
||||
{
|
||||
dstCodepoint = [result characterAtIndex:srcIdx++];
|
||||
Append(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
|
||||
if (isError)
|
||||
return isError;
|
||||
}
|
||||
return Success;
|
||||
}
|
||||
|
||||
/*
|
||||
Function:
|
||||
ChangeCaseInvariantNative
|
||||
|
||||
Performs upper or lower casing of a string into a new buffer.
|
||||
Returns 0 for success, non-zero on failure see ErrorCodes.
|
||||
*/
|
||||
int32_t GlobalizationNative_ChangeCaseInvariantNative(const uint16_t* lpSrc, int32_t cwSrcLength, uint16_t* lpDst, int32_t cwDstLength, int32_t bToUpper)
|
||||
{
|
||||
NSString *source = [NSString stringWithCharacters: lpSrc length: cwSrcLength];
|
||||
NSString *result = bToUpper ? source.uppercaseString : source.lowercaseString;
|
||||
|
||||
int32_t srcIdx = 0, dstIdx = 0, isError = 0;
|
||||
uint16_t dstCodepoint;
|
||||
while (srcIdx < result.length)
|
||||
{
|
||||
dstCodepoint = [result characterAtIndex:srcIdx++];
|
||||
Append(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
|
||||
if (isError)
|
||||
return isError;
|
||||
}
|
||||
return Success;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -12,5 +12,6 @@ typedef enum
|
|||
Success = 0,
|
||||
UnknownError = 1,
|
||||
InsufficientBuffer = 2,
|
||||
OutOfMemory = 3
|
||||
OutOfMemory = 3,
|
||||
InvalidCodePoint = 4
|
||||
} ResultCode;
|
||||
|
|
|
@ -459,7 +459,7 @@ int32_t GlobalizationNative_GetLocaleInfoIntNative(const char* localeName, Local
|
|||
}
|
||||
case LocaleNumber_ReadingLayout:
|
||||
{
|
||||
NSLocaleLanguageDirection langDir = [NSLocale characterDirectionForLanguage:[[NSLocale currentLocale] objectForKey:NSLocaleLanguageCode]];
|
||||
NSLocaleLanguageDirection langDir = [NSLocale characterDirectionForLanguage:[currentLocale objectForKey:NSLocaleLanguageCode]];
|
||||
// 0 - Left to right (such as en-US)
|
||||
// 1 - Right to left (such as arabic locales)
|
||||
value = NSLocaleLanguageDirectionRightToLeft == langDir ? 1 : 0;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue