From 6faf50d9c93261aa23a6116cde5b45d7b63e66dd Mon Sep 17 00:00:00 2001 From: Devon Govett Date: Mon, 3 May 2021 22:30:49 -0700 Subject: [PATCH 1/3] feat: add optional support for source locations --- src/index.ts | 44 +++++++++++++++++++++++++++++++++-- test/test-core.spec.ts | 52 ++++++++++++++++++++++++++++++++++++++++++ types/index.d.ts | 11 +++++++++ 3 files changed, 105 insertions(+), 2 deletions(-) diff --git a/src/index.ts b/src/index.ts index 416e655..256638d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,5 +1,5 @@ import {Parser, ParserOptions} from 'htmlparser2'; -import {Directive, Node, Options, Attributes} from '../types/index.d'; +import {Directive, Node, NodeTag, Options, Attributes} from '../types/index.d'; const defaultOptions: ParserOptions = { lowerCaseTags: false, @@ -49,6 +49,34 @@ const parser = (html: string, options: Options = {}): Node[] => { return result; } + const lastLoc = { + line: 1, + column: 1 + }; + + let lastIndex = 0; + function getLoc(index: number) { + if (index < lastIndex) { + throw new Error('Source indices must be monotonic'); + } + + while (lastIndex < index) { + if (html.charCodeAt(lastIndex) === /* \n */ 10) { + lastLoc.line++; + lastLoc.column = 1; + } else { + lastLoc.column++; + } + + lastIndex++; + } + + return { + line: lastLoc.line, + column: lastLoc.column + }; + } + function onprocessinginstruction(name: string, data: string) { const directives = defaultDirectives.concat(options.directives ?? []); const last: Node = bufferArrayLast(); @@ -92,7 +120,15 @@ const parser = (html: string, options: Options = {}): Node[] => { } function onopentag(tag: string, attrs: Attributes) { - const buf: Node = {tag}; + const start = getLoc(parser.startIndex); + const buf: NodeTag = {tag}; + + if (options.sourceLocations) { + buf.loc = { + start, + end: start + }; + } if (Object.keys(attrs).length > 0) { buf.attrs = normalizeArributes(attrs); @@ -104,6 +140,10 @@ const parser = (html: string, options: Options = {}): Node[] => { function onclosetag() { const buf: Node | undefined = bufArray.pop(); + if (buf && typeof buf === 'object' && buf.loc && parser.endIndex !== null) { + buf.loc.end = getLoc(parser.endIndex); + } + if (buf) { const last = bufferArrayLast(); diff --git a/test/test-core.spec.ts b/test/test-core.spec.ts index ec8fbea..92177af 100644 --- a/test/test-core.spec.ts +++ b/test/test-core.spec.ts @@ -240,3 +240,55 @@ test('should be not converting html entity name', t => { const expected = ['‌ ©']; t.deepEqual(tree, expected); }); + +test('should parse with source locations', t => { + const html = '

Test

\n

Foo

'; + const tree = parser(html, {sourceLocations: true}); + const expected = [ + { + tag: 'h1', + content: ['Test'], + loc: { + start: { + line: 1, + column: 1 + }, + end: { + line: 1, + column: 13 + } + } + }, + '\n', + { + tag: 'p', + content: [ + { + tag: 'b', + content: ['Foo'], + loc: { + start: { + line: 2, + column: 4 + }, + end: { + line: 2, + column: 13 + } + } + } + ], + loc: { + start: { + line: 2, + column: 1 + }, + end: { + line: 2, + column: 17 + } + } + } + ]; + t.deepEqual(tree, expected); +}); diff --git a/types/index.d.ts b/types/index.d.ts index 7bb532f..0518569 100644 --- a/types/index.d.ts +++ b/types/index.d.ts @@ -12,6 +12,7 @@ export type Directive = { export type Options = { directives?: Directive[]; + sourceLocations?: boolean; } & ParserOptions; export type Node = NodeText | NodeTag; @@ -20,6 +21,16 @@ export type NodeTag = { tag?: string | boolean; attrs?: Attributes; content?: Node[]; + loc?: SourceLocation; }; export type Attributes = Record; +export type SourceLocation = { + start: Position; + end: Position; +}; + +export type Position = { + line: number; + column: number; +}; From 7d6f76b3db9228bfc01f553fda4884a2f8b712aa Mon Sep 17 00:00:00 2001 From: Devon Govett Date: Wed, 5 May 2021 20:12:12 -0700 Subject: [PATCH 2/3] Move location tracker to a separate file and refactor into class --- src/index.ts | 38 ++++++-------------------------------- src/location-tracker.ts | 39 +++++++++++++++++++++++++++++++++++++++ test/test-core.spec.ts | 6 +++--- types/index.d.ts | 2 +- 4 files changed, 49 insertions(+), 36 deletions(-) create mode 100644 src/location-tracker.ts diff --git a/src/index.ts b/src/index.ts index 256638d..9e60796 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,5 +1,6 @@ import {Parser, ParserOptions} from 'htmlparser2'; import {Directive, Node, NodeTag, Options, Attributes} from '../types/index.d'; +import {LocationTracker} from './location-tracker'; const defaultOptions: ParserOptions = { lowerCaseTags: false, @@ -16,6 +17,7 @@ const defaultDirectives: Directive[] = [ ]; const parser = (html: string, options: Options = {}): Node[] => { + const locationTracker = new LocationTracker(html); const bufArray: Node[] = []; const results: Node[] = []; @@ -49,34 +51,6 @@ const parser = (html: string, options: Options = {}): Node[] => { return result; } - const lastLoc = { - line: 1, - column: 1 - }; - - let lastIndex = 0; - function getLoc(index: number) { - if (index < lastIndex) { - throw new Error('Source indices must be monotonic'); - } - - while (lastIndex < index) { - if (html.charCodeAt(lastIndex) === /* \n */ 10) { - lastLoc.line++; - lastLoc.column = 1; - } else { - lastLoc.column++; - } - - lastIndex++; - } - - return { - line: lastLoc.line, - column: lastLoc.column - }; - } - function onprocessinginstruction(name: string, data: string) { const directives = defaultDirectives.concat(options.directives ?? []); const last: Node = bufferArrayLast(); @@ -120,11 +94,11 @@ const parser = (html: string, options: Options = {}): Node[] => { } function onopentag(tag: string, attrs: Attributes) { - const start = getLoc(parser.startIndex); + const start = locationTracker.getPosition(parser.startIndex); const buf: NodeTag = {tag}; if (options.sourceLocations) { - buf.loc = { + buf.location = { start, end: start }; @@ -140,8 +114,8 @@ const parser = (html: string, options: Options = {}): Node[] => { function onclosetag() { const buf: Node | undefined = bufArray.pop(); - if (buf && typeof buf === 'object' && buf.loc && parser.endIndex !== null) { - buf.loc.end = getLoc(parser.endIndex); + if (buf && typeof buf === 'object' && buf.location && parser.endIndex !== null) { + buf.location.end = locationTracker.getPosition(parser.endIndex); } if (buf) { diff --git a/src/location-tracker.ts b/src/location-tracker.ts new file mode 100644 index 0000000..95d5ba2 --- /dev/null +++ b/src/location-tracker.ts @@ -0,0 +1,39 @@ +import {Position} from '../types/index.d'; + +export class LocationTracker { + private readonly source: string; + private lastPosition: Position; + private lastIndex: number; + + constructor(source: string) { + this.source = source; + this.lastPosition = { + line: 1, + column: 1 + }; + + this.lastIndex = 0; + } + + getPosition(index: number): Position { + if (index < this.lastIndex) { + throw new Error('Source indices must be monotonic'); + } + + while (this.lastIndex < index) { + if (this.source.charCodeAt(this.lastIndex) === /* \n */ 10) { + this.lastPosition.line++; + this.lastPosition.column = 1; + } else { + this.lastPosition.column++; + } + + this.lastIndex++; + } + + return { + line: this.lastPosition.line, + column: this.lastPosition.column + }; + } +} diff --git a/test/test-core.spec.ts b/test/test-core.spec.ts index 92177af..e1f8673 100644 --- a/test/test-core.spec.ts +++ b/test/test-core.spec.ts @@ -248,7 +248,7 @@ test('should parse with source locations', t => { { tag: 'h1', content: ['Test'], - loc: { + location: { start: { line: 1, column: 1 @@ -266,7 +266,7 @@ test('should parse with source locations', t => { { tag: 'b', content: ['Foo'], - loc: { + location: { start: { line: 2, column: 4 @@ -278,7 +278,7 @@ test('should parse with source locations', t => { } } ], - loc: { + location: { start: { line: 2, column: 1 diff --git a/types/index.d.ts b/types/index.d.ts index 0518569..b3ece55 100644 --- a/types/index.d.ts +++ b/types/index.d.ts @@ -21,7 +21,7 @@ export type NodeTag = { tag?: string | boolean; attrs?: Attributes; content?: Node[]; - loc?: SourceLocation; + location?: SourceLocation; }; export type Attributes = Record; From d61007bb6b535d4414124b4b93194d63b6673fa1 Mon Sep 17 00:00:00 2001 From: Devon Govett Date: Fri, 7 May 2021 22:21:19 -0700 Subject: [PATCH 3/3] Add description to docs --- readme.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/readme.md b/readme.md index 7a6775f..0a58363 100644 --- a/readme.md +++ b/readme.md @@ -115,6 +115,11 @@ Type: `Boolean` Default: `false` Description: *If set to true, self-closing tags will trigger the `onclosetag` event even if `xmlMode` is not set to `true`. NOTE: If `xmlMode` is set to `true` then self-closing tags will always be recognized.* +### `sourceLocations` +Type: `Boolean` +Default: `false` +Description: *If set to true, AST nodes will have a `location` property containing the `start` and `end` line and column position of the node.* + ## License [MIT](LICENSE)