generated from JS-DevTools/template-node-typescript
-
Notifications
You must be signed in to change notification settings - Fork 3
/
extractors.ts
78 lines (69 loc) · 2.26 KB
/
extractors.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import * as urlRegex from "url-regex";
import { NodeInfo, UrlExtractor } from "./types";
/**
* The default URL extractors
*/
export const defaultExtractors: UrlExtractor[] = [
jsonUrls,
styleUrls,
];
/**
* Matches all URLs in a string
*/
const urlsPattern = urlRegex();
/**
* Matches CSS URLs - including absolute and relative URLs
*/
const cssUrlPattern = /url\(["']?(.*?)["']?\)/ig;
/**
* Finds URLs in JSON `<script>` tags
*
* @example
* <script type="application/json">
* <script type="application/ld+json">
*/
function jsonUrls({ node }: NodeInfo): string[] | undefined {
if (node.tagName === "script" // Must be a <script> tag
&& node.properties // Must have attributes
&& !node.properties.src // Ignore external scripts. We only want inline scripts.
&& typeof node.properties.type === "string" // Must have a "type" attribute
&& node.properties.type.includes("json") // The "type" must contain "json"
&& node.children // Must have contents
&& node.children.length === 1 // Must only have a single child
&& node.children[0].type === "text" // The child must be a text node
) {
let json = node.children[0].value as string;
let match, urls = [];
while ((match = urlsPattern.exec(json)) !== null) {
let url = match[0];
urls.push(url);
}
return urls;
}
}
/**
* Finds URLs in inline `<style>` tags
*
* @example
* <style>
* div {
* background-image: url(/img/logo.png);
* content: url("http://example.com/img/logo.png")
* }
* </style>
*/
function styleUrls({ node }: NodeInfo): string[] | undefined {
if (node.tagName === "style" // Must be a <style> tag
&& node.children // Must have contents
&& node.children.length === 1 // Must only have a single child
&& node.children[0].type === "text" // The child must be a text node
) {
let css = node.children[0].value as string;
let match, urls = [];
while ((match = cssUrlPattern.exec(css)) !== null) {
let url = match[1];
urls.push(url);
}
return urls;
}
}