From 8f33a43903bc49863e64d884f11d553e03f9994d Mon Sep 17 00:00:00 2001 From: Matt Kuruc Date: Wed, 31 May 2023 14:26:48 -0700 Subject: [PATCH] Add strict validation mode for AR resource identifier schemes --- pxr/usd/ar/resolver.cpp | 72 ++++++++++++++++++- .../testenv/TestArURIResolver_plugInfo.json | 16 +++++ .../ar/testenv/TestArURIResolver_plugin.cpp | 52 +++++++++++++- pxr/usd/ar/testenv/testArURIResolver.py | 42 ++++++++--- 4 files changed, 170 insertions(+), 12 deletions(-) diff --git a/pxr/usd/ar/resolver.cpp b/pxr/usd/ar/resolver.cpp index ee2be95862..ea4d4101ea 100644 --- a/pxr/usd/ar/resolver.cpp +++ b/pxr/usd/ar/resolver.cpp @@ -87,6 +87,12 @@ TF_DEFINE_ENV_SETTING( PXR_AR_DISABLE_PLUGIN_URI_RESOLVERS, false, "Disables plugin URI resolver implementations."); +TF_DEFINE_ENV_SETTING( + PXR_AR_DISABLE_STRICT_SCHEME_VALIDATION, false, + "Disables strict validation for URI/IRI schemes. In future releases, " + "strict validation will be enforced." +); + static TfStaticData _preferredResolver; void @@ -127,6 +133,46 @@ class _ResolverInfo bool implementsScopedCaches = false; }; +// Ensure resource identifier schemes conform during resolver +// initialization. Scheme is assumed to be already casefolded. Resource +// identifier schemes (under both the URI and IRI) specifications must start +// with an ASCII alpha character, followed by any number of ASCII alphanumeric +// or the hyphen, period, and plus characters. +std::pair +_ValidateResourceIdentifierScheme(const std::string& caseFoldedScheme) { + if (caseFoldedScheme.empty()) { + return std::make_pair(false, "Scheme cannot be empty"); + } + if (caseFoldedScheme[0] > 'z' || caseFoldedScheme[0] < 'a') { + return std::make_pair(false, "Scheme must start with ASCII 'a-z'"); + } + const auto it = std::find_if(caseFoldedScheme.begin() + 1, + caseFoldedScheme.end(), + [](const char c) { + return !((c >= '0' && c <= '9') || + (c >= 'a' && c <= 'z') || + (c == '-') || (c== '.') || (c=='+')); + }); + if (it != caseFoldedScheme.end()) { + if ((((*it) & (1<<7)) == 0)) { + // TODO: Once the UTF-8 character iterator lands, it would be + // helpful to include the invalid UTF-8 character in the error + // message output. As invalid UTF-8 characters may span multiple + // bytes, it can't be trivially identified by the character + // iterator. + return std::make_pair( + false, "Non-ASCII UTF-8 characters not allowed in scheme"); + } + else { + return std::make_pair( +- false, TfStringPrintf("Character '%c' not allowed in scheme. " + "Must be ASCII 'a-z', '-', '+', or '.'", + *it)); + } + } + return std::make_pair(true, ""); +} + std::string _GetTypeNames(const std::vector<_ResolverInfo>& resolvers) { @@ -1123,7 +1169,31 @@ class _DispatchingResolver final (*existingResolver)->GetType().GetTypeName().c_str()); } else { - uriSchemes.push_back(uriScheme); + const auto validation = + _ValidateResourceIdentifierScheme(uriScheme); + if (validation.first) { + uriSchemes.push_back(uriScheme); + } + else if (TfGetEnvSetting( + PXR_AR_DISABLE_STRICT_SCHEME_VALIDATION)){ + uriSchemes.push_back(uriScheme); + TF_WARN("'%s' for '%s' is not a valid " + "resource identifier scheme and " + "will be restricted in future releases: %s", + uriScheme.c_str(), + resolverInfo.type.GetTypeName().c_str(), + validation.second.c_str()); + } else{ + TF_WARN( + "'%s' for '%s' is not a valid resource identifier " + "scheme: %s. Paths with this prefix will be " + "handled by other resolvers. Set " + "PXR_AR_DISABLE_STRICT_SCHEME_VALIDATION to " + "disable strict scheme validation.", + uriScheme.c_str(), + resolverInfo.type.GetTypeName().c_str(), + validation.second.c_str()); + } } } diff --git a/pxr/usd/ar/testenv/TestArURIResolver_plugInfo.json b/pxr/usd/ar/testenv/TestArURIResolver_plugInfo.json index bc78b8d558..f073f219b9 100644 --- a/pxr/usd/ar/testenv/TestArURIResolver_plugInfo.json +++ b/pxr/usd/ar/testenv/TestArURIResolver_plugInfo.json @@ -17,8 +17,24 @@ "uriSchemes": ["test"] }, "_TestOtherURIResolver": { + "bases": ["_TestURIResolverBase"], + "uriSchemes": ["test-other"] + }, + "_TestInvalidUnderbarURIResolver": { "bases": ["_TestURIResolverBase"], "uriSchemes": ["test_other"] + }, + "_TestInvalidColonURIResolver": { + "bases": ["_TestURIResolverBase"], + "uriSchemes": ["other:test"] + }, + "_TestInvalidNonAsciiURIResolver": { + "bases": ["_TestURIResolverBase"], + "uriSchemes": ["test-π-utf8"] + }, + "_TestInvalidNumericPrefixResolver": { + "bases": ["_TestURIResolverBase"], + "uriSchemes": ["113-test"] } } } diff --git a/pxr/usd/ar/testenv/TestArURIResolver_plugin.cpp b/pxr/usd/ar/testenv/TestArURIResolver_plugin.cpp index eb1aa6bcb6..9f4d2a0ed4 100644 --- a/pxr/usd/ar/testenv/TestArURIResolver_plugin.cpp +++ b/pxr/usd/ar/testenv/TestArURIResolver_plugin.cpp @@ -144,17 +144,63 @@ class _TestURIResolver } }; -// Test resolver that handles asset paths of the form "test_other://...." +// Test resolver that handles asset paths of the form "test-other://...." class _TestOtherURIResolver : public _TestURIResolverBase { public: _TestOtherURIResolver() + : _TestURIResolverBase("test-other") + { + } +}; + +// Underbar characters should cause a failure to register under strict mode +class _TestInvalidUnderbarURIResolver + : public _TestURIResolverBase +{ +public: + _TestInvalidUnderbarURIResolver() : _TestURIResolverBase("test_other") { } }; +// A colon in the scheme could cause problems when parsing an asset path. +// This should cause a failure to register under strict mode. +class _TestInvalidColonURIResolver + : public _TestURIResolverBase +{ +public: + _TestInvalidColonURIResolver() + : _TestURIResolverBase("other:test") + { + } +}; + +// UTF-8 characters should cause a failure to register under strict mode +class _TestInvalidNonAsciiURIResolver + : public _TestURIResolverBase +{ +public: + _TestInvalidNonAsciiURIResolver() + : _TestURIResolverBase("test-π-utf8") + { + } +}; + +// Schemes starting with numeric characters should cause a failure to +// register under strict mode +class _TestInvalidNumericPrefixResolver + : public _TestURIResolverBase +{ +public: + _TestInvalidNumericPrefixResolver() + : _TestURIResolverBase("113-test") + { + } +}; + // XXX: Should have a AR_DEFINE_ABSTRACT_RESOLVER macro like // AR_DEFINE_ABSTRACT_RESOLVER(_TestURIResolverBase, ArResolver) // to take care of this registration. @@ -165,3 +211,7 @@ TF_REGISTRY_FUNCTION(TfType) AR_DEFINE_RESOLVER(_TestURIResolver, _TestURIResolverBase); AR_DEFINE_RESOLVER(_TestOtherURIResolver, _TestURIResolverBase); +AR_DEFINE_RESOLVER(_TestInvalidUnderbarURIResolver, _TestURIResolverBase); +AR_DEFINE_RESOLVER(_TestInvalidColonURIResolver, _TestURIResolverBase); +AR_DEFINE_RESOLVER(_TestInvalidNonAsciiURIResolver, _TestURIResolverBase); +AR_DEFINE_RESOLVER(_TestInvalidNumericPrefixResolver, _TestURIResolverBase); diff --git a/pxr/usd/ar/testenv/testArURIResolver.py b/pxr/usd/ar/testenv/testArURIResolver.py index d5ec2ead6e..84ec97e1d0 100644 --- a/pxr/usd/ar/testenv/testArURIResolver.py +++ b/pxr/usd/ar/testenv/testArURIResolver.py @@ -79,24 +79,46 @@ def test_Resolve(self): self.assertEqual(resolver.Resolve("test://foo.package[bar.file]"), "test://foo.package[bar.file]") - self.assertEqual(resolver.Resolve("test_other://foo"), - "test_other://foo") + self.assertEqual(resolver.Resolve("test-other://foo"), + "test-other://foo") self.assertEqual( - resolver.Resolve("test_other://foo.package[bar.file]"), - "test_other://foo.package[bar.file]") + resolver.Resolve("test-other://foo.package[bar.file]"), + "test-other://foo.package[bar.file]") # These calls should hit the URI resolver since schemes are # case-insensitive. - self.assertEqual(resolver.Resolve("TEST://foo"), + self.assertEqual(resolver.Resolve("TEST://foo"), "TEST://foo") - self.assertEqual(resolver.Resolve("TEST://foo.package[bar.file]"), + self.assertEqual(resolver.Resolve("TEST://foo.package[bar.file]"), "TEST://foo.package[bar.file]") - self.assertEqual(resolver.Resolve("TEST_OTHER://foo"), - "TEST_OTHER://foo") + self.assertEqual(resolver.Resolve("TEST-OTHER://foo"), + "TEST-OTHER://foo") self.assertEqual( - resolver.Resolve("TEST_OTHER://foo.package[bar.file]"), - "TEST_OTHER://foo.package[bar.file]") + resolver.Resolve("TEST-OTHER://foo.package[bar.file]"), + "TEST-OTHER://foo.package[bar.file]") + + def test_InvalidScheme(self): + resolver = Ar.GetResolver() + invalid_underbar_path = "test_other:/abc.xyz" + invalid_utf8_path = "test-π-utf8:/abc.xyz" + invalid_numeric_prefix_path = "113-test:/abc.xyz" + invalid_colon_path = "other:test:/abc.xyz" + if Tf.GetEnvSetting("PXR_AR_DISABLE_STRICT_SCHEME_VALIDATION"): + self.assertEqual(resolver.Resolve(invalid_underbar_path), + invalid_underbar_path) + self.assertEqual(resolver.Resolve(invalid_utf8_path), + invalid_utf8_path) + self.assertEqual(resolver.Resolve(invalid_numeric_prefix_path), + invalid_numeric_prefix_path) + # Even when strict scheme validation mode is disabled + # schemes with colons in them may fail to resolve + self.assertFalse(resolver.Resolve(invalid_colon_path)) + else: + self.assertFalse(resolver.Resolve(invalid_underbar_path)) + self.assertFalse(resolver.Resolve(invalid_utf8_path)) + self.assertFalse(resolver.Resolve(invalid_numeric_prefix_path)) + self.assertFalse(resolver.Resolve(invalid_colon_path)) def test_ResolveForNewAsset(self): resolver = Ar.GetResolver()