Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add strict validation mode for AR resource identifier schemes #2453

Merged
merged 1 commit into from
Oct 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 71 additions & 1 deletion pxr/usd/ar/resolver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@ TF_DEFINE_ENV_SETTING(
PXR_AR_DISABLE_PLUGIN_URI_RESOLVERS, false,
"Disables plugin URI resolver implementations.");

TF_DEFINE_ENV_SETTING(
PXR_AR_DISABLE_STRICT_SCHEME_VALIDATION, false,
"Disables strict validation for URI/IRI schemes. In future releases, "
"strict validation will be enforced."
);

static TfStaticData<std::string> _preferredResolver;

void
Expand Down Expand Up @@ -127,6 +133,46 @@ class _ResolverInfo
bool implementsScopedCaches = false;
};

// Ensure resource identifier schemes conform during resolver
// initialization. Scheme is assumed to be already casefolded. Resource
// identifier schemes (under both the URI and IRI) specifications must start
// with an ASCII alpha character, followed by any number of ASCII alphanumeric
// or the hyphen, period, and plus characters.
std::pair<bool, std::string>
_ValidateResourceIdentifierScheme(const std::string& caseFoldedScheme) {
if (caseFoldedScheme.empty()) {
return std::make_pair(false, "Scheme cannot be empty");
}
if (caseFoldedScheme[0] > 'z' || caseFoldedScheme[0] < 'a') {
return std::make_pair(false, "Scheme must start with ASCII 'a-z'");
}
const auto it = std::find_if(caseFoldedScheme.begin() + 1,
caseFoldedScheme.end(),
[](const char c) {
return !((c >= '0' && c <= '9') ||
(c >= 'a' && c <= 'z') ||
(c == '-') || (c== '.') || (c=='+'));
});
if (it != caseFoldedScheme.end()) {
if ((((*it) & (1<<7)) == 0)) {
// TODO: Once the UTF-8 character iterator lands, it would be
// helpful to include the invalid UTF-8 character in the error
// message output. As invalid UTF-8 characters may span multiple
// bytes, it can't be trivially identified by the character
// iterator.
return std::make_pair(
false, "Non-ASCII UTF-8 characters not allowed in scheme");
}
else {
return std::make_pair(
- false, TfStringPrintf("Character '%c' not allowed in scheme. "
"Must be ASCII 'a-z', '-', '+', or '.'",
*it));
}
}
return std::make_pair(true, "");
}

std::string
_GetTypeNames(const std::vector<_ResolverInfo>& resolvers)
{
Expand Down Expand Up @@ -1123,7 +1169,31 @@ class _DispatchingResolver final
(*existingResolver)->GetType().GetTypeName().c_str());
}
else {
uriSchemes.push_back(uriScheme);
const auto validation =
_ValidateResourceIdentifierScheme(uriScheme);
if (validation.first) {
uriSchemes.push_back(uriScheme);
}
else if (TfGetEnvSetting(
PXR_AR_DISABLE_STRICT_SCHEME_VALIDATION)){
uriSchemes.push_back(uriScheme);
TF_WARN("'%s' for '%s' is not a valid "
"resource identifier scheme and "
"will be restricted in future releases: %s",
uriScheme.c_str(),
resolverInfo.type.GetTypeName().c_str(),
validation.second.c_str());
} else{
TF_WARN(
"'%s' for '%s' is not a valid resource identifier "
"scheme: %s. Paths with this prefix will be "
"handled by other resolvers. Set "
"PXR_AR_DISABLE_STRICT_SCHEME_VALIDATION to "
"disable strict scheme validation.",
uriScheme.c_str(),
resolverInfo.type.GetTypeName().c_str(),
validation.second.c_str());
}
}
}

Expand Down
16 changes: 16 additions & 0 deletions pxr/usd/ar/testenv/TestArURIResolver_plugInfo.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,24 @@
"uriSchemes": ["test"]
},
"_TestOtherURIResolver": {
"bases": ["_TestURIResolverBase"],
"uriSchemes": ["test-other"]
},
"_TestInvalidUnderbarURIResolver": {
"bases": ["_TestURIResolverBase"],
"uriSchemes": ["test_other"]
},
"_TestInvalidColonURIResolver": {
"bases": ["_TestURIResolverBase"],
"uriSchemes": ["other:test"]
},
"_TestInvalidNonAsciiURIResolver": {
"bases": ["_TestURIResolverBase"],
"uriSchemes": ["test-π-utf8"]
},
"_TestInvalidNumericPrefixResolver": {
"bases": ["_TestURIResolverBase"],
"uriSchemes": ["113-test"]
}
}
}
Expand Down
52 changes: 51 additions & 1 deletion pxr/usd/ar/testenv/TestArURIResolver_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,17 +144,63 @@ class _TestURIResolver
}
};

// Test resolver that handles asset paths of the form "test_other://...."
// Test resolver that handles asset paths of the form "test-other://...."
class _TestOtherURIResolver
: public _TestURIResolverBase
{
public:
_TestOtherURIResolver()
: _TestURIResolverBase("test-other")
{
}
};

// Underbar characters should cause a failure to register under strict mode
class _TestInvalidUnderbarURIResolver
: public _TestURIResolverBase
{
public:
_TestInvalidUnderbarURIResolver()
: _TestURIResolverBase("test_other")
{
}
};

// A colon in the scheme could cause problems when parsing an asset path.
// This should cause a failure to register under strict mode.
class _TestInvalidColonURIResolver
: public _TestURIResolverBase
{
public:
_TestInvalidColonURIResolver()
: _TestURIResolverBase("other:test")
{
}
};

// UTF-8 characters should cause a failure to register under strict mode
class _TestInvalidNonAsciiURIResolver
: public _TestURIResolverBase
{
public:
_TestInvalidNonAsciiURIResolver()
: _TestURIResolverBase("test-π-utf8")
{
}
};

// Schemes starting with numeric characters should cause a failure to
// register under strict mode
class _TestInvalidNumericPrefixResolver
: public _TestURIResolverBase
{
public:
_TestInvalidNumericPrefixResolver()
: _TestURIResolverBase("113-test")
{
}
};

// XXX: Should have a AR_DEFINE_ABSTRACT_RESOLVER macro like
// AR_DEFINE_ABSTRACT_RESOLVER(_TestURIResolverBase, ArResolver)
// to take care of this registration.
Expand All @@ -165,3 +211,7 @@ TF_REGISTRY_FUNCTION(TfType)

AR_DEFINE_RESOLVER(_TestURIResolver, _TestURIResolverBase);
AR_DEFINE_RESOLVER(_TestOtherURIResolver, _TestURIResolverBase);
AR_DEFINE_RESOLVER(_TestInvalidUnderbarURIResolver, _TestURIResolverBase);
AR_DEFINE_RESOLVER(_TestInvalidColonURIResolver, _TestURIResolverBase);
AR_DEFINE_RESOLVER(_TestInvalidNonAsciiURIResolver, _TestURIResolverBase);
AR_DEFINE_RESOLVER(_TestInvalidNumericPrefixResolver, _TestURIResolverBase);
42 changes: 32 additions & 10 deletions pxr/usd/ar/testenv/testArURIResolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,24 +79,46 @@ def test_Resolve(self):
self.assertEqual(resolver.Resolve("test://foo.package[bar.file]"),
"test://foo.package[bar.file]")

self.assertEqual(resolver.Resolve("test_other://foo"),
"test_other://foo")
self.assertEqual(resolver.Resolve("test-other://foo"),
"test-other://foo")
self.assertEqual(
resolver.Resolve("test_other://foo.package[bar.file]"),
"test_other://foo.package[bar.file]")
resolver.Resolve("test-other://foo.package[bar.file]"),
"test-other://foo.package[bar.file]")

# These calls should hit the URI resolver since schemes are
# case-insensitive.
self.assertEqual(resolver.Resolve("TEST://foo"),
self.assertEqual(resolver.Resolve("TEST://foo"),
"TEST://foo")
self.assertEqual(resolver.Resolve("TEST://foo.package[bar.file]"),
self.assertEqual(resolver.Resolve("TEST://foo.package[bar.file]"),
"TEST://foo.package[bar.file]")

self.assertEqual(resolver.Resolve("TEST_OTHER://foo"),
"TEST_OTHER://foo")
self.assertEqual(resolver.Resolve("TEST-OTHER://foo"),
"TEST-OTHER://foo")
self.assertEqual(
resolver.Resolve("TEST_OTHER://foo.package[bar.file]"),
"TEST_OTHER://foo.package[bar.file]")
resolver.Resolve("TEST-OTHER://foo.package[bar.file]"),
"TEST-OTHER://foo.package[bar.file]")

def test_InvalidScheme(self):
resolver = Ar.GetResolver()
invalid_underbar_path = "test_other:/abc.xyz"
invalid_utf8_path = "test-π-utf8:/abc.xyz"
invalid_numeric_prefix_path = "113-test:/abc.xyz"
invalid_colon_path = "other:test:/abc.xyz"
if Tf.GetEnvSetting("PXR_AR_DISABLE_STRICT_SCHEME_VALIDATION"):
self.assertEqual(resolver.Resolve(invalid_underbar_path),
invalid_underbar_path)
self.assertEqual(resolver.Resolve(invalid_utf8_path),
invalid_utf8_path)
self.assertEqual(resolver.Resolve(invalid_numeric_prefix_path),
invalid_numeric_prefix_path)
# Even when strict scheme validation mode is disabled
# schemes with colons in them may fail to resolve
self.assertFalse(resolver.Resolve(invalid_colon_path))
else:
self.assertFalse(resolver.Resolve(invalid_underbar_path))
self.assertFalse(resolver.Resolve(invalid_utf8_path))
self.assertFalse(resolver.Resolve(invalid_numeric_prefix_path))
self.assertFalse(resolver.Resolve(invalid_colon_path))

def test_ResolveForNewAsset(self):
resolver = Ar.GetResolver()
Expand Down