From 131653ff21e580a17e1770f2edec77be5468a88f Mon Sep 17 00:00:00 2001 From: Anton Medvedev Date: Thu, 12 Dec 2024 15:15:48 +0100 Subject: [PATCH] New finder --- finder.js | 279 +++++++++++------------------------ finder.ts | 341 +++++++++++++------------------------------ tests/finder.test.js | 79 ++-------- 3 files changed, 200 insertions(+), 499 deletions(-) diff --git a/finder.js b/finder.js index 3e15f94..9c0dc3e 100644 --- a/finder.js +++ b/finder.js @@ -1,119 +1,97 @@ // License: MIT // Author: Anton Medvedev // Source: https://github.com/antonmedv/finder -let config; -let rootDocument; -let start; export function finder(input, options) { - start = new Date(); + //const startTime = new Date() if (input.nodeType !== Node.ELEMENT_NODE) { throw new Error(`Can't generate CSS selector for non-element node type.`); } - if ('html' === input.tagName.toLowerCase()) { + if (input.tagName.toLowerCase() === 'html') { return 'html'; } const defaults = { root: document.body, - idName: (name) => true, - className: (name) => true, - tagName: (name) => true, + idName: (name) => false, attr: (name, value) => false, - seedMinLength: 1, - optimizedMinLength: 2, - threshold: 1000, - maxNumberOfTries: 10000, + className: (name) => wordLike(name), + tagName: (name) => true, timeoutMs: undefined, }; - config = { ...defaults, ...options }; - rootDocument = findRootDocument(config.root, defaults); - let path = bottomUpSearch(input, 'all', () => bottomUpSearch(input, 'two', () => bottomUpSearch(input, 'one', () => bottomUpSearch(input, 'none')))); - if (path) { - const optimized = sort(optimize(path, input)); - if (optimized.length > 0) { - path = optimized[0]; - } - return selector(path); - } - else { - throw new Error(`Selector was not found.`); - } -} -function findRootDocument(rootNode, defaults) { - if (rootNode.nodeType === Node.DOCUMENT_NODE) { - return rootNode; - } - if (rootNode === defaults.root) { - return rootNode.ownerDocument; - } - return rootNode; -} -function bottomUpSearch(input, limit, fallback) { - let path = null; - let stack = []; + const config = { ...defaults, ...options }; + const rootDocument = findRootDocument(config.root, defaults); + const stack = []; let current = input; let i = 0; while (current) { - checkTimeout(); - let level = maybe(id(current)) || - maybe(...attr(current)) || - maybe(...classNames(current)) || - maybe(tagName(current)) || [any()]; - const nth = index(current); - if (limit == 'all') { - if (nth) { - level = level.concat(level.filter(dispensableNth).map((node) => nthChild(node, nth))); - } - } - else if (limit == 'two') { - level = level.slice(0, 1); - if (nth) { - level = level.concat(level.filter(dispensableNth).map((node) => nthChild(node, nth))); - } - } - else if (limit == 'one') { - const [node] = (level = level.slice(0, 1)); - if (nth && dispensableNth(node)) { - level = [nthChild(node, nth)]; - } - } - else if (limit == 'none') { - level = [any()]; - if (nth) { - level = [nthChild(level[0], nth)]; - } - } + const level = tie(current, config); for (let node of level) { node.level = i; } stack.push(level); - if (stack.length >= config.seedMinLength) { - path = findUniquePath(stack, fallback); - if (path) { - break; - } - } current = current.parentElement; i++; + const paths = sort(combinations(stack)); + for (const candidate of paths) { + console.log(selector(candidate)); + if (unique(candidate, rootDocument)) { + return selector(candidate); + } + } } - if (!path) { - path = findUniquePath(stack, fallback); - } - if (!path && fallback) { - return fallback(); - } - return path; + throw new Error(`Selector was not found.`); } -function findUniquePath(stack, fallback) { - const paths = sort(combinations(stack)); - if (paths.length > config.threshold) { - return fallback ? fallback() : null; +function tie(element, config) { + const level = []; + const elementId = element.getAttribute('id'); + if (elementId && config.idName(elementId)) { + level.push({ + name: '#' + CSS.escape(elementId), + penalty: 0, + }); + } + for (let i = 0; i < element.attributes.length; i++) { + const attr = element.attributes[i]; + if (config.attr(attr.name, attr.value)) { + level.push({ + name: `[${CSS.escape(attr.name)}="${CSS.escape(attr.value)}"]`, + penalty: 1, + }); + } + } + for (let i = 0; i < element.classList.length; i++) { + const name = element.classList[i]; + if (config.className(name)) { + level.push({ + name: '.' + CSS.escape(name), + penalty: 2, + }); + } } - for (let candidate of paths) { - if (unique(candidate)) { - return candidate; + const tagName = element.tagName.toLowerCase(); + if (config.tagName(tagName)) { + level.push({ + name: tagName, + penalty: 3, + }); + const index = indexOf(element, tagName); + if (index !== undefined) { + level.push({ + name: `${tagName}:nth-of-type(${index})`, + penalty: 4, + }); } } - return null; + const nth = indexOf(element); + if (nth !== undefined) { + level.push({ + name: `*:nth-child(${nth})`, + penalty: 5, + }); + } + return level; +} +function wordLike(name) { + return /^[a-zA-Z][a-z0-9]*(?:-[a-z0-9]+)*$/.test(name); } function selector(path) { let node = path[0]; @@ -133,69 +111,20 @@ function selector(path) { function penalty(path) { return path.map((node) => node.penalty).reduce((acc, i) => acc + i, 0); } -function unique(path) { - const css = selector(path); - switch (rootDocument.querySelectorAll(css).length) { - case 0: - throw new Error(`Can't select any node with this selector: ${css}`); - case 1: - return true; - default: - return false; - } -} -function id(input) { - const elementId = input.getAttribute('id'); - if (elementId && config.idName(elementId)) { - return { - name: '#' + CSS.escape(elementId), - penalty: 0, - }; - } - return null; -} -function attr(input) { - const attrs = Array.from(input.attributes).filter((attr) => config.attr(attr.name, attr.value)); - return attrs.map((attr) => ({ - name: `[${CSS.escape(attr.name)}="${CSS.escape(attr.value)}"]`, - penalty: 0.5, - })); -} -function classNames(input) { - const names = Array.from(input.classList).filter(config.className); - return names.map((name) => ({ - name: '.' + CSS.escape(name), - penalty: 1, - })); -} -function tagName(input) { - const name = input.tagName.toLowerCase(); - if (config.tagName(name)) { - return { - name, - penalty: 2, - }; - } - return null; -} -function any() { - return { - name: '*', - penalty: 3, - }; -} -function index(input) { +function indexOf(input, tagName) { const parent = input.parentNode; if (!parent) { - return null; + return undefined; } let child = parent.firstChild; if (!child) { - return null; + return undefined; } let i = 0; while (child) { - if (child.nodeType === Node.ELEMENT_NODE) { + if (child.nodeType === Node.ELEMENT_NODE + && (tagName === undefined + || child.tagName.toLowerCase() === tagName)) { i++; } if (child === input) { @@ -205,25 +134,6 @@ function index(input) { } return i; } -function nthChild(node, i) { - return { - name: node.name + `:nth-child(${i})`, - penalty: node.penalty + 1, - }; -} -function dispensableNth(node) { - return node.name !== 'html' && !node.name.startsWith('#'); -} -function maybe(...level) { - const list = level.filter(notEmpty); - if (list.length > 0) { - return list; - } - return null; -} -function notEmpty(value) { - return value !== null && value !== undefined; -} function* combinations(stack, path = []) { if (stack.length > 0) { for (let node of stack[0]) { @@ -237,36 +147,23 @@ function* combinations(stack, path = []) { function sort(paths) { return [...paths].sort((a, b) => penalty(a) - penalty(b)); } -function* optimize(path, input, scope = { - counter: 0, - visited: new Map(), -}) { - if (path.length > 2 && path.length > config.optimizedMinLength) { - for (let i = 1; i < path.length - 1; i++) { - if (scope.counter > config.maxNumberOfTries) { - return; // Okay At least I tried! - } - scope.counter += 1; - const newPath = [...path]; - newPath.splice(i, 1); - const newPathKey = selector(newPath); - if (scope.visited.has(newPathKey)) { - return; - } - if (unique(newPath) && same(newPath, input)) { - yield newPath; - scope.visited.set(newPathKey, true); - yield* optimize(newPath, input, scope); - } - } +function findRootDocument(rootNode, defaults) { + if (rootNode.nodeType === Node.DOCUMENT_NODE) { + return rootNode; } + if (rootNode === defaults.root) { + return rootNode.ownerDocument; + } + return rootNode; } -function same(path, input) { - return rootDocument.querySelector(selector(path)) === input; -} -function checkTimeout() { - const elapsedTime = new Date().getTime() - start.getTime(); - if (config.timeoutMs !== undefined && elapsedTime > config.timeoutMs) { - throw new Error(`Timeout: Can't find a unique selector after ${elapsedTime}ms`); +function unique(path, rootDocument) { + const css = selector(path); + switch (rootDocument.querySelectorAll(css).length) { + case 0: + throw new Error(`Can't select any node with this selector: ${css}`); + case 1: + return true; + default: + return false; } } diff --git a/finder.ts b/finder.ts index f896fb8..6442d7d 100644 --- a/finder.ts +++ b/finder.ts @@ -16,147 +16,112 @@ export type Options = { className: (name: string) => boolean tagName: (name: string) => boolean attr: (name: string, value: string) => boolean - seedMinLength: number - optimizedMinLength: number - threshold: number - maxNumberOfTries: number timeoutMs: number | undefined } -let config: Options -let rootDocument: Document | Element -let start: Date - export function finder(input: Element, options?: Partial): string { - start = new Date() + //const startTime = new Date() if (input.nodeType !== Node.ELEMENT_NODE) { throw new Error(`Can't generate CSS selector for non-element node type.`) } - if ('html' === input.tagName.toLowerCase()) { + if (input.tagName.toLowerCase() === 'html') { return 'html' } const defaults: Options = { root: document.body, - idName: (name: string) => true, - className: (name: string) => true, + idName: wordLike, + className: wordLike, tagName: (name: string) => true, attr: (name: string, value: string) => false, - seedMinLength: 1, - optimizedMinLength: 2, - threshold: 1000, - maxNumberOfTries: 10000, timeoutMs: undefined, } - config = {...defaults, ...options} - rootDocument = findRootDocument(config.root, defaults) - - let path = - bottomUpSearch(input, 'all', - () => bottomUpSearch(input, 'two', - () => bottomUpSearch(input, 'one', - () => bottomUpSearch(input, 'none')))) - - if (path) { - const optimized = sort(optimize(path, input)) - if (optimized.length > 0) { - path = optimized[0] - } - return selector(path) - } else { - throw new Error(`Selector was not found.`) - } -} + const config = {...defaults, ...options} + const rootDocument = findRootDocument(config.root, defaults) -function findRootDocument(rootNode: Element | Document, defaults: Options) { - if (rootNode.nodeType === Node.DOCUMENT_NODE) { - return rootNode - } - if (rootNode === defaults.root) { - return rootNode.ownerDocument as Document - } - return rootNode -} - -function bottomUpSearch( - input: Element, - limit: 'all' | 'two' | 'one' | 'none', - fallback?: () => Path | null, -): Path | null { - let path: Path | null = null - let stack: Knot[][] = [] + const stack: Knot[][] = [] let current: Element | null = input let i = 0 while (current) { - checkTimeout() - - let level: Knot[] = - maybe(id(current)) || - maybe(...attr(current)) || - maybe(...classNames(current)) || - maybe(tagName(current)) || [any()] - - const nth = index(current) - if (limit == 'all') { - if (nth) { - level = level.concat( - level.filter(dispensableNth).map((node) => nthChild(node, nth)), - ) - } - } else if (limit == 'two') { - level = level.slice(0, 1) - if (nth) { - level = level.concat( - level.filter(dispensableNth).map((node) => nthChild(node, nth)), - ) - } - } else if (limit == 'one') { - const [node] = (level = level.slice(0, 1)) - if (nth && dispensableNth(node)) { - level = [nthChild(node, nth)] - } - } else if (limit == 'none') { - level = [any()] - if (nth) { - level = [nthChild(level[0], nth)] - } - } + const level = tie(current, config) for (let node of level) { node.level = i } stack.push(level) - if (stack.length >= config.seedMinLength) { - path = findUniquePath(stack, fallback) - if (path) { - break - } - } current = current.parentElement i++ + + const paths = sort(combinations(stack)) + for (const candidate of paths) { + if (unique(candidate, rootDocument)) { + return selector(candidate) + } + } } - if (!path) { - path = findUniquePath(stack, fallback) + + throw new Error(`Selector was not found.`) +} + +function wordLike(name: string) { + return /^[a-z0-9\-]{3,}$/i.test(name) +} + +function tie(element: Element, config: Options): Knot[] { + const level: Knot[] = [] + + const elementId = element.getAttribute('id') + if (elementId && config.idName(elementId)) { + level.push({ + name: '#' + CSS.escape(elementId), + penalty: 0, + }) } - if (!path && fallback) { - return fallback() + + for (let i = 0; i < element.attributes.length; i++) { + const attr = element.attributes[i] + if (config.attr(attr.name, attr.value)) { + level.push({ + name: `[${CSS.escape(attr.name)}="${CSS.escape(attr.value)}"]`, + penalty: 1, + }) + } } - return path -} -function findUniquePath( - stack: Knot[][], - fallback?: () => Path | null, -): Path | null { - const paths = sort(combinations(stack)) - if (paths.length > config.threshold) { - return fallback ? fallback() : null + for (let i = 0; i < element.classList.length; i++) { + const name = element.classList[i] + if (config.className(name)) { + level.push({ + name: '.' + CSS.escape(name), + penalty: 2, + }) + } } - for (let candidate of paths) { - if (unique(candidate)) { - return candidate + + const tagName = element.tagName.toLowerCase() + if (config.tagName(tagName)) { + level.push({ + name: tagName, + penalty: 3, + }) + + const index = indexOf(element, tagName) + if (index !== undefined) { + level.push({ + name: `${tagName}:nth-of-type(${index})`, + penalty: 4, + }) } } - return null + + const nth = indexOf(element) + if (nth !== undefined) { + level.push({ + name: `*:nth-child(${nth})`, + penalty: 5, + }) + } + + return level } function selector(path: Path): string { @@ -178,83 +143,24 @@ function penalty(path: Path): number { return path.map((node) => node.penalty).reduce((acc, i) => acc + i, 0) } -function unique(path: Path) { - const css = selector(path) - switch (rootDocument.querySelectorAll(css).length) { - case 0: - throw new Error( - `Can't select any node with this selector: ${css}`, - ) - case 1: - return true - default: - return false - } -} - -function id(input: Element): Knot | null { - const elementId = input.getAttribute('id') - if (elementId && config.idName(elementId)) { - return { - name: '#' + CSS.escape(elementId), - penalty: 0, - } - } - return null -} - -function attr(input: Element): Knot[] { - const attrs = Array.from(input.attributes).filter((attr) => - config.attr(attr.name, attr.value), - ) - return attrs.map( - (attr): Knot => ({ - name: `[${CSS.escape(attr.name)}="${CSS.escape(attr.value)}"]`, - penalty: 0.5, - }), - ) -} - -function classNames(input: Element): Knot[] { - const names = Array.from(input.classList).filter(config.className) - return names.map( - (name): Knot => ({ - name: '.' + CSS.escape(name), - penalty: 1, - }), - ) -} - -function tagName(input: Element): Knot | null { - const name = input.tagName.toLowerCase() - if (config.tagName(name)) { - return { - name, - penalty: 2, - } - } - return null -} - -function any(): Knot { - return { - name: '*', - penalty: 3, - } -} - -function index(input: Element): number | null { +function indexOf(input: Element, tagName?: string): number | undefined { const parent = input.parentNode if (!parent) { - return null + return undefined } let child = parent.firstChild if (!child) { - return null + return undefined } let i = 0 while (child) { - if (child.nodeType === Node.ELEMENT_NODE) { + if ( + child.nodeType === Node.ELEMENT_NODE + && ( + tagName === undefined + || (child as Element).tagName.toLowerCase() === tagName + ) + ) { i++ } if (child === input) { @@ -265,29 +171,6 @@ function index(input: Element): number | null { return i } -function nthChild(node: Knot, i: number): Knot { - return { - name: node.name + `:nth-child(${i})`, - penalty: node.penalty + 1, - } -} - -function dispensableNth(node: Knot) { - return node.name !== 'html' && !node.name.startsWith('#') -} - -function maybe(...level: (Knot | null)[]): Knot[] | null { - const list = level.filter(notEmpty) - if (list.length > 0) { - return list - } - return null -} - -function notEmpty(value: T | null | undefined): value is T { - return value !== null && value !== undefined -} - function* combinations(stack: Knot[][], path: Knot[] = []): Generator { if (stack.length > 0) { for (let node of stack[0]) { @@ -302,47 +185,27 @@ function sort(paths: Iterable): Path[] { return [...paths].sort((a, b) => penalty(a) - penalty(b)) } -type Scope = { - counter: number - visited: Map -} - -function* optimize( - path: Path, - input: Element, - scope: Scope = { - counter: 0, - visited: new Map(), - }, -): Generator { - if (path.length > 2 && path.length > config.optimizedMinLength) { - for (let i = 1; i < path.length - 1; i++) { - if (scope.counter > config.maxNumberOfTries) { - return // Okay At least I tried! - } - scope.counter += 1 - const newPath = [...path] - newPath.splice(i, 1) - const newPathKey = selector(newPath) - if (scope.visited.has(newPathKey)) { - return - } - if (unique(newPath) && same(newPath, input)) { - yield newPath - scope.visited.set(newPathKey, true) - yield* optimize(newPath, input, scope) - } - } +function findRootDocument(rootNode: Element | Document, defaults: Options) { + if (rootNode.nodeType === Node.DOCUMENT_NODE) { + return rootNode } + if (rootNode === defaults.root) { + return rootNode.ownerDocument as Document + } + return rootNode } -function same(path: Path, input: Element) { - return rootDocument.querySelector(selector(path)) === input -} - -function checkTimeout() { - const elapsedTime = new Date().getTime() - start.getTime() - if (config.timeoutMs !== undefined && elapsedTime > config.timeoutMs) { - throw new Error(`Timeout: Can't find a unique selector after ${elapsedTime}ms`) +function unique(path: Path, rootDocument: Element | Document) { + const css = selector(path) + switch (rootDocument.querySelectorAll(css).length) { + case 0: + throw new Error( + `Can't select any node with this selector: ${css}`, + ) + case 1: + return true + default: + return false } } + diff --git a/tests/finder.test.js b/tests/finder.test.js index bf38c13..0f28d41 100644 --- a/tests/finder.test.js +++ b/tests/finder.test.js @@ -1,9 +1,9 @@ -import { test, assert, expect } from 'vitest' -import { JSDOM } from 'jsdom' -import { readFileSync } from 'node:fs' -import { fileURLToPath } from 'node:url' -import { dirname } from 'node:path' -import { finder } from '../finder.js' +import {test, assert, expect} from 'vitest' +import {JSDOM} from 'jsdom' +import {readFileSync} from 'node:fs' +import {fileURLToPath} from 'node:url' +import {dirname} from 'node:path' +import {finder} from '../finder.js' import 'css.escape' @@ -22,10 +22,8 @@ function check(html, config = {}) { } catch (err) { assert.ok(false, err.toString() + '\n Node: ' + node.outerHTML.substring(0, 100)) } - assert.equal(document.querySelectorAll(css).length, 1, - `Selector "${css}" selects more then one node.`) - assert.equal(document.querySelector(css), node, - `Selector "${css}" selects another node.`) + assert.equal(document.querySelectorAll(css).length, 1, `Selector "${css}" selects more then one node.`) + assert.equal(document.querySelector(css), node, `Selector "${css}" selects another node.`) selectors.push(css) } return selectors @@ -51,69 +49,12 @@ test('tailwindcss', () => { expect(selectors).toMatchSnapshot() }) -test('config:seed', () => { - const html = ` -
- -

-
-
- ` - check(html) - check(html, {seedMinLength: 3}) - check(html, {seedMinLength: 3, optimizedMinLength: 3}) - check(html, {threshold: 2}) -}) - -test('config:threshold', () => { - const html = ` -
-

-

-

-
- ` - check(html, {threshold: 1}) -}) - -test('config:fun', () => { - const html = ` -
-
-
- ` - check(html, {tagName: tag => tag !== 'div'}) -}) - -test('config:id', () => { - const html = ` -
-
-
- ` - check(html, {idName: id => id !== 'test'}) -}) - -test('config:attr', () => { - const html = ` -
-
-
-
- ` - check(html, { - attr: (name, value) => { - return name !== 'data-test' && name === 'data-qa' && value % 2 === 0 - }, - }) -}) - test('duplicate', () => { const html = `
` - check(html) + expect(check(html)).toMatchSnapshot() }) test('duplicate:sub-nodes', () => { @@ -121,5 +62,5 @@ test('duplicate:sub-nodes', () => {
` - check(html) + expect(check(html)).toMatchSnapshot() })