Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for Unicode UTF-8 Identifiers #2120

Closed
8 changes: 8 additions & 0 deletions pxr/base/tf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ pxr_library(tf
type
typeFunctions
typeNotice
unicodeUtils
warning
weakBase
weakPtr
Expand Down Expand Up @@ -172,6 +173,9 @@ pxr_library(tf
CPPFILES
initConfig.cpp
preprocessorUtils.cpp
unicodeCharacterClasses.cpp
unicodeDucetMapping.cpp
unicodeDucetMultiMapping.cpp
pxrDoubleConversion/double-conversion.cc
pxrDoubleConversion/bignum-dtoa.cc
pxrDoubleConversion/bignum.cc
Expand Down Expand Up @@ -423,6 +427,10 @@ pxr_install_test_dir(
SRC testenv/baseline/testTfScriptModuleLoader
DEST testTfScriptModuleLoader/baseline
)
pxr_install_test_dir(
SRC testenv/TfStringUtils_Python
DEST TfStringUtils_Python
)

pxr_register_test(TfAnyUniquePtr
COMMAND "${CMAKE_INSTALL_PREFIX}/tests/testTf TfAnyUniquePtr"
Expand Down
132 changes: 95 additions & 37 deletions pxr/base/tf/stringUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ using std::vector;

PXR_NAMESPACE_OPEN_SCOPE

TF_DEFINE_ENV_SETTING(TF_UTF8_IDENTIFIERS,
false,
"Allow UTF8 strings as identifiers and prim names");

string
TfVStringPrintf(const std::string& fmt, va_list ap)
{
Expand Down Expand Up @@ -241,29 +245,43 @@ TfStringContains(const string &s, const TfToken &substring)
string
TfStringToLower(const string &source)
{
string lower;
size_t length = source.length();

lower.reserve(length);
for (size_t i = 0; i < length; i++) {
lower += tolower(source[i]);
if (UseUTF8Identifiers())
{
return TfUnicodeUtils::UTF8StringToLower(source);
}
else
{
string lower;
size_t length = source.length();

lower.reserve(length);
for (size_t i = 0; i < length; i++) {
lower += tolower(source[i]);
}

return lower;
return lower;
}
}

string
TfStringToUpper(const string &source)
{
string upper;
size_t length = source.length();

upper.reserve(length);
for (size_t i = 0; i < length; i++) {
upper += toupper(source[i]);
if (UseUTF8Identifiers())
{
return TfUnicodeUtils::UTF8StringToUpper(source);
}
else
{
string upper;
size_t length = source.length();

upper.reserve(length);
for (size_t i = 0; i < length; i++) {
upper += toupper(source[i]);
}

return upper;
return upper;
}
}

string
Expand All @@ -273,10 +291,17 @@ TfStringCapitalize(const string& source)
return source;
}

string result(source);
result[0] = toupper(result[0]);
if (UseUTF8Identifiers())
{
return TfUnicodeUtils::UTF8StringCapitalize(source);
}
else
{
string result(source);
result[0] = toupper(result[0]);

return result;
return result;
}
}

string
Expand Down Expand Up @@ -912,6 +937,18 @@ TfDictionaryLessThan::_LessImpl(const string& lstr, const string& rstr) const

}

bool TfCollationOrder::operator()(const string& lhs, const string& rhs) const
{
if (UseUTF8Identifiers())
{
return TfUnicodeUtils::TfUTF8UCALessThan()(lhs, rhs);
}
else
{
return TfDictionaryLessThan()(lhs, rhs);
}
}

std::string
TfStringify(bool v)
{
Expand Down Expand Up @@ -1150,37 +1187,58 @@ TfStringCatPaths( const string &prefix, const string &suffix )
return TfNormPath(prefix + "/" + suffix);
}

bool UseUTF8Identifiers()
{
static bool useUtf8Identifiers =
(TfGetEnvSetting(TF_UTF8_IDENTIFIERS) == true);

return useUtf8Identifiers;
}

std::string
TfMakeValidIdentifier(const std::string &in)
{
std::string result;

if (in.empty()) {
result.push_back('_');
return result;
if (UseUTF8Identifiers())
{
return TfUnicodeUtils::MakeValidUTF8Identifier(in);
}
else
{
std::string result;

result.reserve(in.size());
char const *p = in.c_str();
if (!(('a' <= *p && *p <= 'z') ||
('A' <= *p && *p <= 'Z') ||
*p == '_')) {
result.push_back('_');
} else {
result.push_back(*p);
}
if (in.empty()) {
result.push_back('_');
return result;
}

for (++p; *p; ++p) {
if (!(('a' <= *p && *p <= 'z') ||
('A' <= *p && *p <= 'Z') ||
('0' <= *p && *p <= '9') ||
*p == '_')) {
result.reserve(in.size());
char const *p = in.c_str();
if (!(('a' <= *p && *p <= 'z') ||
('A' <= *p && *p <= 'Z') ||
*p == '_')) {
result.push_back('_');
} else {
result.push_back(*p);
}

for (++p; *p; ++p) {
if (!(('a' <= *p && *p <= 'z') ||
('A' <= *p && *p <= 'Z') ||
('0' <= *p && *p <= '9') ||
*p == '_')) {
result.push_back('_');
} else {
result.push_back(*p);
}
}
return result;
}
return result;
}

std::string
TfMakeValidPrimName(const std::string& in)
erslavin marked this conversation as resolved.
Show resolved Hide resolved
{
return (UseUTF8Identifiers() ? TfUnicodeUtils::MakeValidUTF8PrimName(in) : TfMakeValidIdentifier(in));
}

std::string
Expand Down
Loading