From 88453e64cd86c5b60e8d2fb2c6f953bbc353ffbf Mon Sep 17 00:00:00 2001 From: Franck Ranaivo-Harisoa Date: Mon, 6 Mar 2023 17:06:03 +0100 Subject: [PATCH] [CssSelector] Add suport for :scope --- CHANGELOG.md | 5 +++++ Exception/SyntaxErrorException.php | 5 +++++ Parser/Parser.php | 15 +++++++++++++-- Tests/Parser/ParserTest.php | 7 +++++++ Tests/XPath/TranslatorTest.php | 5 +++++ XPath/Extension/PseudoClassExtension.php | 6 ++++++ 6 files changed, 41 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index de81fa2..c035d6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ CHANGELOG ========= +6.3 +----- + + * Add support for `:scope` + 4.4.0 ----- diff --git a/Exception/SyntaxErrorException.php b/Exception/SyntaxErrorException.php index f73860c..5a9d807 100644 --- a/Exception/SyntaxErrorException.php +++ b/Exception/SyntaxErrorException.php @@ -43,6 +43,11 @@ public static function nestedNot(): self return new self('Got nested ::not().'); } + public static function notAtTheStartOfASelector(string $pseudoElement): self + { + return new self(sprintf('Got immediate child pseudo-element ":%s" not at the start of a selector', $pseudoElement)); + } + public static function stringAsFunctionArgument(): self { return new self('String not allowed as function argument.'); diff --git a/Parser/Parser.php b/Parser/Parser.php index 101df57..5313d34 100644 --- a/Parser/Parser.php +++ b/Parser/Parser.php @@ -19,7 +19,7 @@ * CSS selector parser. * * This component is a port of the Python cssselect library, - * which is copyright Ian Bicking, @see https://github.com/SimonSapin/cssselect. + * which is copyright Ian Bicking, @see https://github.com/scrapy/cssselect. * * @author Jean-François Simon * @@ -192,7 +192,18 @@ private function parseSimpleSelector(TokenStream $stream, bool $insideNegation = if (!$stream->getPeek()->isDelimiter(['('])) { $result = new Node\PseudoNode($result, $identifier); - + if ('Pseudo[Element[*]:scope]' === $result->__toString()) { + $used = \count($stream->getUsed()); + if (!(2 === $used + || 3 === $used && $stream->getUsed()[0]->isWhiteSpace() + || $used >= 3 && $stream->getUsed()[$used - 3]->isDelimiter([',']) + || $used >= 4 + && $stream->getUsed()[$used - 3]->isWhiteSpace() + && $stream->getUsed()[$used - 4]->isDelimiter([',']) + )) { + throw SyntaxErrorException::notAtTheStartOfASelector('scope'); + } + } continue; } diff --git a/Tests/Parser/ParserTest.php b/Tests/Parser/ParserTest.php index af8cf58..a8708ce 100644 --- a/Tests/Parser/ParserTest.php +++ b/Tests/Parser/ParserTest.php @@ -146,6 +146,12 @@ public static function getParserTestData() // unicode escape: \20 == (space) ['*[aval="\'\20 \'"]', ['Attribute[Element[*][aval = \'\' \'\']]']], ["*[aval=\"'\\20\r\n '\"]", ['Attribute[Element[*][aval = \'\' \'\']]']], + [':scope > foo', ['CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]']], + [':scope > foo bar > div', ['CombinedSelector[CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]] Element[bar]] > Element[div]]']], + [':scope > #foo #bar', ['CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > Hash[Element[*]#foo]] Hash[Element[*]#bar]]']], + [':scope', ['Pseudo[Element[*]:scope]']], + ['foo bar, :scope > div', ['CombinedSelector[Element[foo] Element[bar]]', 'CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]']], + ['foo bar,:scope > div', ['CombinedSelector[Element[foo] Element[bar]]', 'CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]']], ]; } @@ -176,6 +182,7 @@ public static function getParserExceptionTestData() [':lang(fr', SyntaxErrorException::unexpectedToken('an argument', new Token(Token::TYPE_FILE_END, '', 8))->getMessage()], [':contains("foo', SyntaxErrorException::unclosedString(10)->getMessage()], ['foo!', SyntaxErrorException::unexpectedToken('selector', new Token(Token::TYPE_DELIMITER, '!', 3))->getMessage()], + [':scope > div :scope header', SyntaxErrorException::notAtTheStartOfASelector('scope')->getMessage()], ]; } diff --git a/Tests/XPath/TranslatorTest.php b/Tests/XPath/TranslatorTest.php index d330600..894b8a0 100644 --- a/Tests/XPath/TranslatorTest.php +++ b/Tests/XPath/TranslatorTest.php @@ -219,6 +219,8 @@ public static function getCssToXPathTestData() ['e + f', "e/following-sibling::*[(name() = 'f') and (position() = 1)]"], ['e ~ f', 'e/following-sibling::f'], ['div#container p', "div[@id = 'container']/descendant-or-self::*/p"], + [':scope > div[dataimg=""]', "*[1]/div[@dataimg = '']"], + [':scope', '*[1]'], ]; } @@ -411,6 +413,9 @@ public static function getHtmlShakespearTestData() ['div[class|=dialog]', 50], // ? Seems right ['div[class!=madeup]', 243], // ? Seems right ['div[class~=dialog]', 51], // ? Seems right + [':scope > div', 1], + [':scope > div > div[class=dialog]', 1], + [':scope > div div', 242], ]; } } diff --git a/XPath/Extension/PseudoClassExtension.php b/XPath/Extension/PseudoClassExtension.php index 36ab582..aada832 100644 --- a/XPath/Extension/PseudoClassExtension.php +++ b/XPath/Extension/PseudoClassExtension.php @@ -30,6 +30,7 @@ public function getPseudoClassTranslators(): array { return [ 'root' => $this->translateRoot(...), + 'scope' => $this->translateScopePseudo(...), 'first-child' => $this->translateFirstChild(...), 'last-child' => $this->translateLastChild(...), 'first-of-type' => $this->translateFirstOfType(...), @@ -45,6 +46,11 @@ public function translateRoot(XPathExpr $xpath): XPathExpr return $xpath->addCondition('not(parent::*)'); } + public function translateScopePseudo(XPathExpr $xpath): XPathExpr + { + return $xpath->addCondition('1'); + } + public function translateFirstChild(XPathExpr $xpath): XPathExpr { return $xpath