diff --git a/readme.md b/readme.md index 7a6775f..0a58363 100644 --- a/readme.md +++ b/readme.md @@ -115,6 +115,11 @@ Type: `Boolean` Default: `false` Description: *If set to true, self-closing tags will trigger the `onclosetag` event even if `xmlMode` is not set to `true`. NOTE: If `xmlMode` is set to `true` then self-closing tags will always be recognized.* +### `sourceLocations` +Type: `Boolean` +Default: `false` +Description: *If set to true, AST nodes will have a `location` property containing the `start` and `end` line and column position of the node.* + ## License [MIT](LICENSE) diff --git a/src/index.ts b/src/index.ts index 416e655..9e60796 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,5 +1,6 @@ import {Parser, ParserOptions} from 'htmlparser2'; -import {Directive, Node, Options, Attributes} from '../types/index.d'; +import {Directive, Node, NodeTag, Options, Attributes} from '../types/index.d'; +import {LocationTracker} from './location-tracker'; const defaultOptions: ParserOptions = { lowerCaseTags: false, @@ -16,6 +17,7 @@ const defaultDirectives: Directive[] = [ ]; const parser = (html: string, options: Options = {}): Node[] => { + const locationTracker = new LocationTracker(html); const bufArray: Node[] = []; const results: Node[] = []; @@ -92,7 +94,15 @@ const parser = (html: string, options: Options = {}): Node[] => { } function onopentag(tag: string, attrs: Attributes) { - const buf: Node = {tag}; + const start = locationTracker.getPosition(parser.startIndex); + const buf: NodeTag = {tag}; + + if (options.sourceLocations) { + buf.location = { + start, + end: start + }; + } if (Object.keys(attrs).length > 0) { buf.attrs = normalizeArributes(attrs); @@ -104,6 +114,10 @@ const parser = (html: string, options: Options = {}): Node[] => { function onclosetag() { const buf: Node | undefined = bufArray.pop(); + if (buf && typeof buf === 'object' && buf.location && parser.endIndex !== null) { + buf.location.end = locationTracker.getPosition(parser.endIndex); + } + if (buf) { const last = bufferArrayLast(); diff --git a/src/location-tracker.ts b/src/location-tracker.ts new file mode 100644 index 0000000..95d5ba2 --- /dev/null +++ b/src/location-tracker.ts @@ -0,0 +1,39 @@ +import {Position} from '../types/index.d'; + +export class LocationTracker { + private readonly source: string; + private lastPosition: Position; + private lastIndex: number; + + constructor(source: string) { + this.source = source; + this.lastPosition = { + line: 1, + column: 1 + }; + + this.lastIndex = 0; + } + + getPosition(index: number): Position { + if (index < this.lastIndex) { + throw new Error('Source indices must be monotonic'); + } + + while (this.lastIndex < index) { + if (this.source.charCodeAt(this.lastIndex) === /* \n */ 10) { + this.lastPosition.line++; + this.lastPosition.column = 1; + } else { + this.lastPosition.column++; + } + + this.lastIndex++; + } + + return { + line: this.lastPosition.line, + column: this.lastPosition.column + }; + } +} diff --git a/test/test-core.spec.ts b/test/test-core.spec.ts index ec8fbea..e1f8673 100644 --- a/test/test-core.spec.ts +++ b/test/test-core.spec.ts @@ -240,3 +240,55 @@ test('should be not converting html entity name', t => { const expected = ['‌ ©']; t.deepEqual(tree, expected); }); + +test('should parse with source locations', t => { + const html = '

Test

\n

Foo

'; + const tree = parser(html, {sourceLocations: true}); + const expected = [ + { + tag: 'h1', + content: ['Test'], + location: { + start: { + line: 1, + column: 1 + }, + end: { + line: 1, + column: 13 + } + } + }, + '\n', + { + tag: 'p', + content: [ + { + tag: 'b', + content: ['Foo'], + location: { + start: { + line: 2, + column: 4 + }, + end: { + line: 2, + column: 13 + } + } + } + ], + location: { + start: { + line: 2, + column: 1 + }, + end: { + line: 2, + column: 17 + } + } + } + ]; + t.deepEqual(tree, expected); +}); diff --git a/types/index.d.ts b/types/index.d.ts index 04cfe21..5fd20a1 100644 --- a/types/index.d.ts +++ b/types/index.d.ts @@ -12,6 +12,7 @@ export type Directive = { export type Options = { directives?: Directive[]; + sourceLocations?: boolean; } & ParserOptions; export type Tag = string | boolean; @@ -23,6 +24,17 @@ export type NodeTag = { tag?: Tag; attrs?: Attributes; content?: Content; + location?: SourceLocation; }; export type Node = NodeText | NodeTag; + +export type SourceLocation = { + start: Position; + end: Position; +}; + +export type Position = { + line: number; + column: number; +};