diff --git a/src/Assert/Assert.php b/src/Assert/Assert.php new file mode 100644 index 00000000..e39a0077 --- /dev/null +++ b/src/Assert/Assert.php @@ -0,0 +1,172 @@ + $arguments + */ + public static function __callStatic(string $name, array $arguments): void + { + // Handle Exception-parameter + $exception = AssertionFailedException::class; + + $last = end($arguments); + if (is_string($last) && class_exists($last) && is_subclass_of($last, Throwable::class)) { + $exception = $last; + array_pop($arguments); + } + + try { + if (method_exists(static::class, $name)) { + call_user_func_array([static::class, $name], $arguments); + return; + } elseif (preg_match('/^nullOr(.*)$/i', $name, $matches)) { + $method = lcfirst($matches[1]); + if (method_exists(static::class, $method)) { + call_user_func_array([static::class, 'nullOr'], [[static::class, $method], $arguments]); + } elseif (method_exists(BaseAssert::class, $method)) { + call_user_func_array([static::class, 'nullOr'], [[BaseAssert::class, $method], $arguments]); + } else { + throw new BadMethodCallException(sprintf("Assertion named `%s` does not exists.", $method)); + } + } elseif (preg_match('/^all(.*)$/i', $name, $matches)) { + $method = lcfirst($matches[1]); + if (method_exists(static::class, $method)) { + call_user_func_array([static::class, 'all'], [[static::class, $method], $arguments]); + } elseif (method_exists(BaseAssert::class, $method)) { + call_user_func_array([static::class, 'all'], [[BaseAssert::class, $method], $arguments]); + } else { + throw new BadMethodCallException(sprintf("Assertion named `%s` does not exists.", $method)); + } + } else { + throw new BadMethodCallException(sprintf("Assertion named `%s` does not exists.", $name)); + } + } catch (InvalidArgumentException $e) { + throw new $exception($e->getMessage()); + } + } + + + /** + * Handle nullOr* for either Webmozart or for our custom assertions + * + * @param callable $method + * @param array $arguments + * @return void + */ + private static function nullOr(callable $method, array $arguments): void + { + $value = reset($arguments); + ($value === null) || call_user_func_array($method, $arguments); + } + + + /** + * all* for our custom assertions + * + * @param callable $method + * @param array $arguments + * @return void + */ + private static function all(callable $method, array $arguments): void + { + $values = array_pop($arguments); + foreach ($values as $value) { + $tmp = $arguments; + array_unshift($tmp, $value); + call_user_func_array($method, $tmp); + } + } + + + /** + * @param mixed $value + * + * @return string + */ + protected static function valueToString(mixed $value): string + { + if (is_resource($value)) { + return 'resource'; + } + + if (null === $value) { + return 'null'; + } + + if (true === $value) { + return 'true'; + } + + if (false === $value) { + return 'false'; + } + + if (is_array($value)) { + return 'array'; + } + + if (is_object($value)) { + if (method_exists($value, '__toString')) { + return $value::class . ': ' . self::valueToString($value->__toString()); + } + + if ($value instanceof DateTime || $value instanceof DateTimeImmutable) { + return $value::class . ': ' . self::valueToString($value->format('c')); + } + + if (function_exists('enum_exists') && enum_exists(get_class($value))) { + return get_class($value) . '::' . $value->name; + } + + return $value::class; + } + + if (is_string($value)) { + return '"' . $value . '"'; + } + + return strval($value); + } +} diff --git a/src/Assert/CustomAssertionTrait.php b/src/Assert/CustomAssertionTrait.php new file mode 100644 index 00000000..28cf39c7 --- /dev/null +++ b/src/Assert/CustomAssertionTrait.php @@ -0,0 +1,181 @@ +-[a-z]++)*+)\s*+\(/' + * ( # Start a capturing group + * [a-z]++ # Match one or more lower-case alpha characters + * (?> # Start an atomic group (no capturing) + * - # Match a hyphen + * [a-z]++ # Match one or more lower-case alpha characters, possessively + * )*+ # Repeat the atomic group zero or more times, + * ) # End of the capturing group + * \s*+ # Match zero or more whitespace characters, possessively + * \( # Match an opening parenthesis + */ + private static string $regex_xpfilter_functions = '/([a-z]++(?>-[a-z]++)*+)\\s*+\\(/'; + + /** + * We use the same rules for matching Axis names as we do for function names. + * The only difference is that we match the '::' instead of the '(' + * so everything that was said about the regular expression for function names + * applies here as well. + * + * '/([a-z]++(?>-[a-z]++)*+)\s*+::' + * ( # Start a capturing group + * [a-z]++ # Match one or more lower-case alpha characters + * (?> # Start an atomic group (no capturing) + * - # Match a hyphen + * [a-z]++ # Match one or more lower-case alpha characters, possessively + * )*+ # Repeat the atomic group zero or more times, + * ) # End of the capturing group + * \s*+ # Match zero or more whitespace characters, possessively + * \( # Match an opening parenthesis + */ + private static string $regex_xpfilter_axes = '/([a-z]++(?>-[a-z]++)*+)\\s*+::/'; + + + /*********************************************************************************** + * NOTE: Custom assertions may be added below this line. * + * They SHOULD be marked as `private` to ensure the call is forced * + * through __callStatic(). * + * Assertions marked `public` are called directly and will * + * not handle any custom exception passed to it. * + ***********************************************************************************/ + + /** + * Check an XPath expression for allowed axes and functions + * The goal is preventing DoS attacks by limiting the complexity of the XPath expression by only allowing + * a select subset of functions and axes. + * The check uses a list of allowed functions and axes, and throws an exception when an unknown function + * or axis is found in the $xpath_expression. + * + * Limitations: + * - The implementation is based on regular expressions, and does not employ an XPath 1.0 parser. It may not + * evaluate all possible valid XPath expressions correctly and cause either false positives for valid + * expressions or false negatives for invalid expressions. + * - The check may still allow expressions that are not safe, I.e. expressions that consist of only + * functions and axes that are deemed "save", but that are still slow to evaluate. The time it takes to + * evaluate an XPath expression depends on the complexity of both the XPath expression and the XML document. + * This check, however, does not take the XML document into account, nor is it aware of the internals of the + * XPath processor that will evaluate the expression. + * - The check was written with the XPath 1.0 syntax in mind, but should work equally well for XPath 2.0 and 3.0. + * + * @param string $value + * @param array $allowed_axes + * @param array $allowed_functions + * @param string $message + */ + private static function allowedXPathFilter( + string $value, + array $allowed_axes = C::DEFAULT_ALLOWED_AXES, + array $allowed_functions = C::DEFAULT_ALLOWED_FUNCTIONS, + string $message = '', + ): void { + BaseAssert::allString($allowed_axes); + BaseAssert::allString($allowed_functions); + BaseAssert::maxLength( + $value, + C::XPATH_FILTER_MAX_LENGTH, + sprintf('XPath Filter exceeds the limit of 100 characters.'), + ); + + $strippedValue = preg_replace( + self::$regex_xpfilter_remove_strings, + // Replace the content with two of the quotes that were matched + "\\1\\1", + $value, + ); + + if ($strippedValue === null) { + throw new Exception("Error in preg_replace."); + } + + /** + * Check if the $xpath_expression uses an XPath function that is not in the list of allowed functions + * + * Look for the function specifier '(' and look for a function name before it. + * Ignoring whitespace before the '(' and the function name. + * All functions must match a string on a list of allowed function names + */ + $matches = []; + $res = preg_match_all(self::$regex_xpfilter_functions, $strippedValue, $matches); + if ($res === false) { + throw new Exception("Error in preg_match_all."); + } + + // Check that all the function names we found are in the list of allowed function names + foreach ($matches[1] as $match) { + if (!in_array($match, $allowed_functions)) { + throw new InvalidArgumentException(sprintf( + $message ?: '\'%s\' is not an allowed XPath function.', + $match, + )); + } + } + + /** + * Check if the $xpath_expression uses an XPath axis that is not in the list of allowed axes + * + * Look for the axis specifier '::' and look for a function name before it. + * Ignoring whitespace before the '::' and the axis name. + * All axes must match a string on a list of allowed axis names + */ + $matches = []; + $res = preg_match_all(self::$regex_xpfilter_axes, $strippedValue, $matches); + if ($res === false) { + throw new Exception("Error in preg_match_all."); + } + + // Check that all the axes names we found are in the list of allowed axes names + foreach ($matches[1] as $match) { + if (!in_array($match, $allowed_axes)) { + throw new InvalidArgumentException(sprintf( + $message ?: '\'%s\' is not an allowed XPath axis.', + $match, + )); + } + } + } +} diff --git a/src/Constants.php b/src/Constants.php index 60712ecc..7cf52dbe 100644 --- a/src/Constants.php +++ b/src/Constants.php @@ -37,18 +37,55 @@ class Constants */ public const XPATH10_URI = 'http://www.w3.org/TR/1999/REC-xpath-19991116'; - /** - * The namespace for the XML Path Language 2.0 - */ - public const XPATH20_URI = 'http://www.w3.org/TR/2010/REC-xpath20-20101214/'; + /** @var array */ + public const DEFAULT_ALLOWED_AXES = [ + 'ancestor', + 'ancestor-or-self', + 'attribute', + 'child', + 'descendant', + 'descendant-or-self', + 'following', + 'following-sibling', + // 'namespace', // By default, we do not allow using the namespace axis + 'parent', + 'preceding', + 'preceding-sibling', + 'self', + ]; - /** - * The namespace for the XML Path Language 3.0 - */ - public const XPATH30_URI = 'https://www.w3.org/TR/2014/REC-xpath-30-20140408/'; + /** @var array */ + public const DEFAULT_ALLOWED_FUNCTIONS = [ + // 'boolean', + // 'ceiling', + // 'concat', + // 'contains', + // 'count', + // 'false', + // 'floor', + // 'id', + // 'lang', + // 'last', + // 'local-name', + // 'name', + // 'namespace-uri', + // 'normalize-space', + 'not', + // 'number', + // 'position', + // 'round', + // 'starts-with', + // 'string', + // 'string-length', + // 'substring', + // 'substring-after', + // 'substring-before', + // 'sum', + // 'text', + // 'translate', + // 'true', + ]; - /** - * The namespace for the XML Path Language 3.1 - */ - public const XPATH31_URI = 'https://www.w3.org/TR/2017/REC-xpath-31-20170321/'; + /** @var int */ + public const XPATH_FILTER_MAX_LENGTH = 100; } diff --git a/tests/Assert/XPathFilterTest.php b/tests/Assert/XPathFilterTest.php new file mode 100644 index 00000000..e12ac7c3 --- /dev/null +++ b/tests/Assert/XPathFilterTest.php @@ -0,0 +1,104 @@ + $axes + * @param array $functions + */ + #[DataProvider('provideXPathFilter')] + public function testDefaultAllowedXPathFilter( + bool $shouldPass, + string $filter, + array $axes = C::DEFAULT_ALLOWED_AXES, + array $functions = C::DEFAULT_ALLOWED_FUNCTIONS, + ): void { + try { + XMLAssert::allowedXPathFilter($filter, $axes, $functions); + $this->assertTrue($shouldPass); + } catch (AssertionFailedException $e) { + $this->assertFalse($shouldPass); + } + } + + + /** + * @return array + */ + public static function provideXPathFilter(): array + { + return [ + // Axes + 'ancestor' => [true, 'ancestor::book'], + 'ancestor-or-self' => [true, 'ancestor-or-self::book'], + 'attribute' => [true, 'attribute::book'], + 'child' => [true, 'child::book'], + 'descendant' => [true, 'descendant::book'], + 'descendant-or-self' => [true, 'descendant-or-self::book'], + 'following' => [true, 'following::book'], + 'following-sibling' => [true, 'following-sibling::book'], + 'namespace' => [false, 'namespace::book'], + 'namespace whitelist' => [true, 'namespace::book', ['namespace']], + 'parent' => [true, 'parent::book'], + 'preceding' => [true, 'preceding::book'], + 'preceding-sibling' => [true, 'preceding-sibling::book'], + 'self' => [true, 'self::book'], + + // Functions + 'boolean' => [false, 'boolean(Data/username/text())'], + 'ceiling' => [false, 'ceiling(//items/item[1]/price)'], + 'concat' => [false, "concat('A', '_', 'B')"], + 'contains' => [false, "contains(//username, 'o')"], + 'count' => [false, "count(//Sales.Order[Sales.Customer_Order/Sales.Customer/Name = 'Jansen'])"], + 'false' => [false, '//Sales.Customer[IsGoldCustomer = false()]'], + 'floor' => [false, 'floor(//items/item[1]/price)'], + 'id' => [false, 'SalesInvoiceLines[id(1)]'], + 'lang' => [false, 'lang("en-US")'], + 'last' => [false, 'last()'], + 'local-name' => [false, 'local-name(SalesInvoiceLines) '], + 'name' => [false, 'name(SalesInvoiceLines)'], + 'namespace-uri' => [false, 'namespace-uri(ReportData)'], + 'normalize-space' => [false, 'normalize-space(" Hello World ")'], + 'not' => [true, "//Sales.Customer[not(Name = 'Jansen')]"], + 'number' => [false, 'number("123")'], + 'position' => [false, 'position()'], + 'round' => [false, 'round(//items/item[1]/price)'], + 'starts-with' => [false, "//Sales.Customer[starts-with(Name, 'Jans')]"], + 'string' => [false, 'string(123)'], + 'string-length' => [false, 'string-length(//email)string-length(//email)'], + 'substring' => [false, "/bookstore/book[substring(title,1,5)='Harry']"], + 'substring-after' => [false, "/bookstore/book[substring-after(title,1,5)='Harry']"], + 'substring-before' => [false, "/bookstore/book[substring-before(title,1,5)='Harry']"], + 'sum' => [false, 'sum(//Sales.Order/TotalPrice)'], + 'text' => [false, '//lastname/text()'], + 'translate' => [false, "translate(//email, '@', '_')"], + 'true' => [false, '//Sales.Customer[IsGoldCustomer = true()]'], + + // Edge-cases + 'unknown axis' => [false, 'unknown::book'], + 'unknown function' => [false, 'unknown()'], + 'too long' => [false, str_pad('a', 120, 'a')], + ]; + } +}