Skip to content

Commit

Permalink
ICU-21639 Added an internal utility class to streamline preflighting …
Browse files Browse the repository at this point in the history
…and heap-allocating a char buffer for a locale ID

and changed several internal methods in ULocale to use it, so that they work correctly on locale IDs that are longer
than ULOC_FULLNAME_CAPACITY.
  • Loading branch information
richgillam committed Aug 2, 2021
1 parent 4368f69 commit b03b8be
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 28 deletions.
39 changes: 22 additions & 17 deletions icu4c/source/common/loclikely.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1181,30 +1181,28 @@ _uloc_minimizeSubtags(const char* localeID,
}
}

static UBool
static int32_t
do_canonicalize(const char* localeID,
char* buffer,
int32_t bufferCapacity,
UErrorCode* err)
{
uloc_canonicalize(
int32_t canonicalizedSize = uloc_canonicalize(
localeID,
buffer,
bufferCapacity,
err);

if (*err == U_STRING_NOT_TERMINATED_WARNING ||
*err == U_BUFFER_OVERFLOW_ERROR) {
*err = U_ILLEGAL_ARGUMENT_ERROR;

return FALSE;
return canonicalizedSize;
}
else if (U_FAILURE(*err)) {

return FALSE;
return -1;
}
else {
return TRUE;
return canonicalizedSize;
}
}

Expand Down Expand Up @@ -1241,12 +1239,17 @@ static UBool
_ulocimp_addLikelySubtags(const char* localeID,
icu::ByteSink& sink,
UErrorCode* status) {
char localeBuffer[ULOC_FULLNAME_CAPACITY];

if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
return _uloc_addLikelySubtags(localeBuffer, sink, status);
PreflightingLocaleIDBuffer localeBuffer;
do {
localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(),
localeBuffer.getCapacity(), status);
} while (localeBuffer.needToTryAgain(status));

if (U_SUCCESS(*status)) {
return _uloc_addLikelySubtags(localeBuffer.getBuffer(), sink, status);
} else {
return FALSE;
}
return FALSE;
}

U_CAPI void U_EXPORT2
Expand Down Expand Up @@ -1289,11 +1292,13 @@ U_CAPI void U_EXPORT2
ulocimp_minimizeSubtags(const char* localeID,
icu::ByteSink& sink,
UErrorCode* status) {
char localeBuffer[ULOC_FULLNAME_CAPACITY];

if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
_uloc_minimizeSubtags(localeBuffer, sink, status);
}
PreflightingLocaleIDBuffer localeBuffer;
do {
localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(),
localeBuffer.getCapacity(), status);
} while (localeBuffer.needToTryAgain(status));

_uloc_minimizeSubtags(localeBuffer.getBuffer(), sink, status);
}

// Pairs of (language subtag, + or -) for finding out fast if common languages
Expand Down
29 changes: 18 additions & 11 deletions icu4c/source/common/uloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -478,15 +478,19 @@ static const CanonicalizationMap CANONICALIZE_MAP[] = {
/* Test if the locale id has BCP47 u extension and does not have '@' */
#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
#define _ConvertBCP47(finalID, id, buffer, length,err) UPRV_BLOCK_MACRO_BEGIN { \
if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \
U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \
finalID=id; \
if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \
} else { \
finalID=buffer; \
} \
} UPRV_BLOCK_MACRO_END
static int32_t _ConvertBCP47(
const char*& finalID, const char* id, char* buffer, int32_t length, UErrorCode* err) {
int32_t localeIDSize = uloc_forLanguageTag(id, buffer, length, NULL, err);
if (localeIDSize <= 0 || U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) {
finalID=id;
if (*err == U_STRING_NOT_TERMINATED_WARNING) {
*err = U_BUFFER_OVERFLOW_ERROR;
}
} else {
finalID=buffer;
}
return localeIDSize;
}
/* Gets the size of the shortest subtag in the given localeID. */
static int32_t getShortestSubtagLength(const char *localeID) {
int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
Expand Down Expand Up @@ -1474,7 +1478,7 @@ _canonicalize(const char* localeID,
uint32_t options,
UErrorCode* err) {
int32_t j, fieldCount=0, scriptSize=0, variantSize=0;
char tempBuffer[ULOC_FULLNAME_CAPACITY];
PreflightingLocaleIDBuffer tempBuffer;
const char* origLocaleID;
const char* tmpLocaleID;
const char* keywordAssign = NULL;
Expand All @@ -1485,7 +1489,10 @@ _canonicalize(const char* localeID,
}

if (_hasBCP47Extension(localeID)) {
_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
do {
tempBuffer.requestedCapacity = _ConvertBCP47(tmpLocaleID, localeID,
tempBuffer.getBuffer(), tempBuffer.getCapacity(), err);
} while (tempBuffer.needToTryAgain(err));
} else {
if (localeID==NULL) {
localeID=uloc_getDefault();
Expand Down
68 changes: 68 additions & 0 deletions icu4c/source/common/ulocimp.h
Original file line number Diff line number Diff line change
Expand Up @@ -307,4 +307,72 @@ U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* le
// Return true if the value is already canonicalized.
U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName);

/**
* A utility class for handling locale IDs that may be longer than ULOC_FULLNAME_CAPACITY.
* This encompasses all of the logic to allocate a temporary locale ID buffer on the stack,
* and then, if it's not big enough, reallocate it on the heap and try again.
*
* You use it like this:
* UErrorCode err = U_ZERO_ERROR;
*
* PreflightingLocaleIDBuffer tempBuffer;
* do {
* tempBuffer.requestedCapacity = uloc_doSomething(localeID, tempBuffer.getBuffer(), tempBuffer.getCapacity(), &err);
* } while (tempBuffer.needToTryAgain(&err));
* if (U_SUCCESS(err)) {
* uloc_doSomethingWithTheResult(tempBuffer.getBuffer());
* }
*/
class PreflightingLocaleIDBuffer {
private:
char stackBuffer[ULOC_FULLNAME_CAPACITY];
char* heapBuffer = nullptr;
int32_t capacity = ULOC_FULLNAME_CAPACITY;

public:
int32_t requestedCapacity = ULOC_FULLNAME_CAPACITY;

// No heap allocation. Use only on the stack.
static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete;
static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete;
#if U_HAVE_PLACEMENT_NEW
static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete;
#endif

PreflightingLocaleIDBuffer() {}

~PreflightingLocaleIDBuffer() { uprv_free(heapBuffer); }

char* getBuffer() {
if (heapBuffer == nullptr) {
return stackBuffer;
} else {
return heapBuffer;
}
}

int32_t getCapacity() {
return capacity;
}

bool needToTryAgain(UErrorCode* err) {
if (heapBuffer != nullptr) {
return false;
}

if (*err == U_BUFFER_OVERFLOW_ERROR || *err == U_STRING_NOT_TERMINATED_WARNING) {
int32_t newCapacity = requestedCapacity + 2; // one for the terminating null, one just for paranoia
heapBuffer = static_cast<char*>(uprv_malloc(newCapacity));
if (heapBuffer == nullptr) {
*err = U_MEMORY_ALLOCATION_ERROR;
} else {
*err = U_ZERO_ERROR;
capacity = newCapacity;
}
return U_SUCCESS(*err);
}
return false;
}
};

#endif
44 changes: 44 additions & 0 deletions icu4c/source/test/cintltst/cloctst.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ static void TestBug20370(void);
static void TestBug20321UnicodeLocaleKey(void);

static void TestUsingDefaultWarning(void);
static void TestExcessivelyLongIDs(void);

void PrintDataTable();

Expand Down Expand Up @@ -281,6 +282,7 @@ void addLocaleTest(TestNode** root)
TESTCASE(TestBug20321UnicodeLocaleKey);
TESTCASE(TestUsingDefaultWarning);
TESTCASE(TestBug21449InfiniteLoop);
TESTCASE(TestExcessivelyLongIDs);
}


Expand Down Expand Up @@ -7009,3 +7011,45 @@ static void TestBug21449InfiniteLoop() {
// so the test is considered passed if the call to the API below returns anything at all.
uloc_getDisplayLanguage(invalidLocaleId, invalidLocaleId, NULL, 0, &status);
}

// rdar://79296849 and https://unicode-org.atlassian.net/browse/ICU-21639
static void TestExcessivelyLongIDs(void) {
const char* reallyLongID =
"de-u-cu-eur-em-default-hc-h23-ks-level1-lb-strict-lw-normal-ms-metric"
"-nu-latn-rg-atzzzz-sd-atat1-ss-none-tz-atvie-va-posix";
char minimizedID[ULOC_FULLNAME_CAPACITY];
char maximizedID[ULOC_FULLNAME_CAPACITY];
int32_t actualMinimizedLength = 0;
int32_t actualMaximizedLength = 0;
UErrorCode err = U_ZERO_ERROR;

actualMinimizedLength = uloc_minimizeSubtags(reallyLongID, minimizedID, ULOC_FULLNAME_CAPACITY, &err);
assertTrue("uloc_minimizeSubtags() with too-small buffer didn't fail as expected",
U_FAILURE(err) && actualMinimizedLength > ULOC_FULLNAME_CAPACITY);

err = U_ZERO_ERROR;
actualMaximizedLength = uloc_addLikelySubtags(reallyLongID, maximizedID, ULOC_FULLNAME_CAPACITY, &err);
assertTrue("uloc_addLikelySubtags() with too-small buffer didn't fail as expected",
U_FAILURE(err) && actualMaximizedLength > ULOC_FULLNAME_CAPACITY);

err = U_ZERO_ERROR;
char* realMinimizedID = (char*)uprv_malloc(actualMinimizedLength + 1);
uloc_minimizeSubtags(reallyLongID, realMinimizedID, actualMinimizedLength + 1, &err);
if (assertSuccess("uloc_minimizeSubtags() failed", &err)) {
assertEquals("Wrong result from uloc_minimizeSubtags()",
"de__POSIX@colstrength=primary;currency=eur;em=default;hours=h23;lb=strict;"
"lw=normal;measure=metric;numbers=latn;rg=atzzzz;sd=atat1;ss=none;timezone=Europe/Vienna",
realMinimizedID);
}
uprv_free(realMinimizedID);

char* realMaximizedID = (char*)uprv_malloc(actualMaximizedLength + 1);
uloc_addLikelySubtags(reallyLongID, realMaximizedID, actualMaximizedLength + 1, &err);
if (assertSuccess("uloc_addLikelySubtags() failed", &err)) {
assertEquals("Wrong result from uloc_addLikelySubtags()",
"de_Latn_DE_POSIX@colstrength=primary;currency=eur;em=default;hours=h23;lb=strict;"
"lw=normal;measure=metric;numbers=latn;rg=atzzzz;sd=atat1;ss=none;timezone=Europe/Vienna",
realMaximizedID);
}
uprv_free(realMaximizedID);
}

0 comments on commit b03b8be

Please sign in to comment.