-
Notifications
You must be signed in to change notification settings - Fork 33
/
Copy pathparsers.js
648 lines (576 loc) · 17.4 KB
/
parsers.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
const { splitStringBy, isDomain } = require('./utils.js')
const parseSimpleWhois = (whois) => {
let data = {}
let text = []
const renameLabels = {
NetRange: 'range',
inetnum: 'range',
CIDR: 'route',
origin: 'asn',
OriginAS: 'asn',
}
const lineToGroup = {
contact: 'contact',
OrgName: 'organisation',
organisation: 'organisation',
OrgAbuseHandle: 'contactAbuse',
irt: 'contactAbuse',
RAbuseHandle: 'contactAbuse',
OrgTechHandle: 'contactTechnical',
RTechHandle: 'contactTechnical',
OrgNOCHandle: 'contactNoc',
RNOCHandle: 'contactNoc',
}
if (whois.includes('returned 0 objects') || whois.includes('No match found')) {
return data
}
let resultNum = 0
let groups = [{}]
let lastLabel
whois.split('\n').forEach((line) => {
// catch comment lines
if (line.startsWith('%') || line.startsWith('#')) {
// detect if an ASN or IP has multiple WHOIS results
if (line.includes('# start')) {
// nothing
} else if (line.includes('# end')) {
resultNum++
} else {
text.push(line)
}
} else if (resultNum === 0) {
// for the moment, parse only first WHOIS result
if (line) {
if (line.includes(':')) {
const [label, value] = splitStringBy(line, line.indexOf(':')).map((info) => info.trim())
lastLabel = label
// 1) Filter out unnecessary info, 2) then detect if the label is already added to group
if (value.includes('---')) {
// do nothing with useless data
} else if (groups[groups.length - 1][label]) {
groups[groups.length - 1][label] += '\n' + value
} else {
groups[groups.length - 1][label] = value
}
} else {
groups[groups.length - 1][lastLabel] += '\n' + line.trim()
}
} else if (Object.keys(groups[groups.length - 1]).length) {
// if empty line, means another info group starts
groups.push({})
}
}
})
groups
.filter((group) => Object.keys(group).length)
.forEach((group) => {
const groupLabels = Object.keys(group)
let isGroup = false
// check if a label is marked as group
groupLabels.forEach((groupLabel) => {
if (!isGroup && Object.keys(lineToGroup).includes(groupLabel)) {
isGroup = lineToGroup[groupLabel]
}
})
// check if a info group is a Contact in APNIC result
// @Link https://www.apnic.net/manage-ip/using-whois/guide/role/
if (!isGroup && groupLabels.includes('role')) {
isGroup = 'Contact ' + group.role.split(' ')[1]
} else if (!isGroup && groupLabels.includes('person')) {
isGroup = 'Contact ' + group['nic-hdl']
}
if (isGroup === 'contact') {
data.contacts = data.contacts || {}
data.contacts[group['contact']] = group
} else if (isGroup) {
data[isGroup] = group
} else {
for (const key in group) {
const label = renameLabels[key] || key
data[label] = group[key]
}
}
})
// Append the WHOIS comments
data.text = text
return data
}
const parseDomainWhois = (domain, whois, ignorePrivacy) => {
// Text saying there's no useful data in a field
const noData = [
'-',
'.',
'n/a',
'no data',
'redacted',
'privado',
'datos privados',
'data protected',
'not disclosed',
'data protected, not disclosed',
'data redacted',
'not disclosed not disclosed',
'not disclosed! visit www.eurid.eu for webbased whois.',
'not available',
'redacted for privacy',
'redacted | eu data subject',
'gdpr redacted',
'non-public data',
'gdpr masked',
'statutory masking enabled',
'redacted by privacy',
'not applicable',
'na',
'redacted for privacy purposes',
'redacted | eu registrar',
'registration private',
'none',
'redacted.forprivacy',
'redacted | registry policy',
'redacted for gdpr privacy',
'redacted for gdpr',
'redacted redacted',
'not available from registry',
'hidden upon user request',
]
// WHOIS labels to rename. "From" must be lowercase
// from -> to
const renameLabels = {
'domain name': 'Domain Name',
domain: 'Domain Name',
'domain...............': 'Domain Name', // found in .ax
'idn tag': 'IDN',
'internationalized domain name': 'IDN',
nameserver: 'Name Server',
nameservers: 'Name Server',
nserver: 'Name Server',
'name servers': 'Name Server',
'name server information': 'Name Server',
dns: 'Name Server',
'nserver..............': 'Name Server', // found in .ax
hostname: 'Name Server',
'domain nameservers': 'Name Server',
'domain servers in listed order': 'Name Server', // found in .ly
'domain servers': 'Name Server', // found in .tr
'name servers dns': 'Name Server', // found in .mx
flags: 'Domain Status',
status: 'Domain Status',
state: 'Domain Status', // found in .ru
'registration status': 'Domain Status',
'eppstatus': 'Domain Status', // found in .fr
'sponsoring registrar iana id': 'Registrar IANA ID',
organisation: 'Registrar',
registrar: 'Registrar',
'registrar name': 'Registrar',
'registrar organization': 'Registrar',
'registrar............': 'Registrar', // found in .ax
'record maintained by': 'Registrar',
'sponsoring registrar': 'Registrar',
'registrar organization name': 'Registrar', // found in .tr
url: 'Registrar URL',
'registrar website': 'Registrar URL',
'registrar web': 'Registrar URL', // found in .it
'www..................': 'Registrar URL', // found in .ax
'mnt-by': 'Registrar ID', // found in .ua
'creation date': 'Created Date',
'registered on': 'Created Date',
'registration date': 'Created Date',
'relevant dates registered on': 'Created Date',
created: 'Created Date',
'created on': 'Created Date', // found in .mx
'additional info created on..............': 'Created Date', // found in .tr
'registration time': 'Created Date',
registered: 'Created Date',
'created..............': 'Created Date', // found in .ax
'domain registered': 'Created Date',
'registered date': 'Created Date', // found in .co.jp
'last updated': 'Updated Date',
changed: 'Updated Date',
modified: 'Updated Date',
updated: 'Updated Date', // found in .ly
'modification date': 'Updated Date',
'last modified': 'Updated Date',
'relevant dates last updated': 'Updated Date', // found in .uk, .co.uk
'last updated on': 'Updated Date', // found in .mx
'last update': 'Updated Date', // found in .co.jp
'last-update': 'Updated Date', // found in .fr
'registrar registration expiration date': 'Expiry Date',
'registry expiry date': 'Expiry Date',
'expires on': 'Expiry Date',
expires: 'Expiry Date',
'expiration time': 'Expiry Date',
'expire date': 'Expiry Date',
'expiration date': 'Expiry Date',
'expires..............': 'Expiry Date', // found in .ax
'additional info expires on..............': 'Expiry Date', // found in .tr
'paid-till': 'Expiry Date',
'expiry date': 'Expiry Date',
expire: 'Expiry Date',
'relevant dates expiry date': 'Expiry Date', // found in .uk, .co.uk
'record will expire on': 'Expiry Date',
expired: 'Expiry Date', // found in .ly
'registry registrantid': 'Registry Registrant ID', // found in .ai
registrant: 'Registrant Name', // found in .ai
'registrant contact': 'Registrant Name',
'registrant contact name': 'Registrant Name',
registrantname: 'Registrant Name', // found in .ai
'registrant person': 'Registrant Name', // found in .ua
'registrant email': 'Registrant Email', // found in .ua
'registrant e-mail': 'Registrant Email', // found in .fr
'registrant contact email': 'Registrant Email',
registrantemail: 'Registrant Email', // found in .ai
registrantstreet: 'Registrant Street', // found in .ai
registrantcity: 'Registrant City', // found in .ai
registrantcountry: 'Registrant Country', // found in .ai
'registrant country': 'Registrant Country', // found in .fr
'registrant organisation': 'Registrant Organization',
registrantphone: 'Registrant Phone',
'trading as': 'Registrant Organization', // found in .uk, .co.uk
org: 'Registrant Organization', // found in .ru
'registrant state': 'Registrant State/Province',
"registrant's address": 'Registrant Street',
dnssec: 'DNSSEC',
}
const ignoreLabels = [
'note',
'notes',
'please note',
'important',
'notice',
'terms of use',
'web-based whois',
'https',
'to',
'registration service provider',
'you acknowledge that',
]
const ignoreTexts = [
'more information',
'lawful purposes',
'to contact',
'use this data',
'register your domain',
'copy and paste',
'find out more',
'this',
'please',
'important',
'prices',
'payment',
'you agree',
'restrictions', // found on .co.uk domains
'queried object', // found in abc.tech
'service', // found in .au domains
'terms',
]
let colon = ': '
let text = []
let data = {
'Domain Status': [],
'Name Server': [],
}
let lines = whois
.trim()
.split('\n')
.map((line) => line.replace('\t', ' '))
// Parse WHOIS info for specific TLDs
if (
domain.endsWith('.uk') ||
domain.endsWith('.be') ||
domain.endsWith('.nl') ||
domain.endsWith('.eu') ||
domain.endsWith('.ly') ||
domain.endsWith('.mx') ||
domain.endsWith('.gg') ||
domain.endsWith('.je') ||
domain.endsWith('.as')
) {
lines = handleMultiLines(lines)
}
if (domain.endsWith('.gg') || domain.endsWith('.je') || domain.endsWith('.as')) {
lines = handleMissingColons(lines)
}
if (domain.endsWith('.ua')) {
lines = handleDotUa(lines)
colon = ':'
}
if (domain.endsWith('.jp')) {
lines = handleJpLines(lines)
}
if (domain.endsWith('.it')) {
lines = handleDotIt(lines)
} else if (domain.endsWith('.fr')) {
lines = handleDotFr(lines)
}
if (domain.endsWith('.tr')) {
lines = handleDotTr(lines)
}
lines = lines.map((l) => l.trim())
lines.forEach((line) => {
if ((line.includes(colon) || line.endsWith(':')) && !line.startsWith('%') && !line.startsWith(';') && !line.startsWith('*')) {
let [label, value] = splitStringBy(line, line.indexOf(':')).map((info) => info.trim())
// fix whois line with double color, ex: "Label:: value"
if (value.startsWith(':')) {
value = value.slice(1)
}
value = value.trim()
// rename labels to more common format
if (renameLabels[label.toLowerCase()]) {
label = renameLabels[label.toLowerCase()]
}
// remove redacted data
if (ignorePrivacy && noData.includes(value.toLowerCase())) {
value = ''
}
if (data[label] && Array.isArray(data[label])) {
data[label].push(value)
} else if (!ignoreLabels.includes(label.toLowerCase()) && !ignoreTexts.some((text) => label.toLowerCase().includes(text))) {
// WHOIS field already exists, if so append data
if (data[label] && data[label] !== value) {
data[label] = `${data[label]} ${value}`.trim()
} else {
data[label] = value
}
} else {
text.push(line)
}
} else {
text.push(line)
}
})
// remove invalid Name Servers (not valid hostname)
data['Name Server'] = data['Name Server']
.map((nameServer) => nameServer.split(' '))
.flat()
.filter(isDomain)
// filter out empty status lines
data['Domain Status'] = data['Domain Status'].filter(Boolean)
// remove multiple empty lines
text = text.join('\n').trim()
while (text.includes('\n\n\n')) {
text = text.replace('\n\n\n', '\n')
}
data.text = text.split('\n')
return data
}
const handleDotTr = (lines) => {
lines = lines.filter((line) => line.trim() !== '') // Remove blank lines
const registrantLines = ['Name', undefined, undefined, undefined, undefined] // No clue what the other 4 fields are, all domains have them hidden
const replacement = []
let section = ''
let sectionLine = 0
for (let line of lines) {
line = line.replace(/\s+/g, ' ').replace(' :', ':').trim()
if (line.startsWith('** Domain')) {
// Keep line for domain name/nameservers
line = line.replace('** ', '')
} else if (line.includes('** ')) {
// Start new section
section = line.replace(':', '').replace('** ', '').trim()
sectionLine = 0
continue
} else if (section === 'Registrant') {
// Add registrant info
if (!registrantLines[sectionLine]) continue
line = `Registrant ${registrantLines[sectionLine]}: ${line}`
sectionLine++
} else if (!line.includes(': ')) {
// Add multi-line information to one line (nameservers and address)
replacement[replacement.length - 1] += ` ${line}`
continue
} else if (section) {
// Append section name to each line
line = `${section} ${line}`
}
// Remove period at end of dates
if (section === 'Additional Info') {
line = line.replace(/\.$/, '')
}
replacement.push(line)
}
return replacement
}
const handleDotUa = (lines) => {
const types = ['Registrar', 'Registrant', 'Admin', 'Technical']
let flag = ''
lines.forEach((line, index) => {
if (line.startsWith('%') && types.some((v) => line.includes(v))) {
flag = line
.substring(1, line.length - 1)
.trim()
.toLowerCase()
} else if (!line.startsWith('%') && line.includes(': ')) {
if (line.startsWith('registrar')) line = 'id'
lines[index] = flag + ' ' + line
}
})
return lines
}
const handleDotIt = (lines) => {
let section = ''
const replacement = []
for (let line of lines) {
// Ignore comments and empty lines
if (line.startsWith('*') || line === '') {
continue
}
// Collapse whitespace
const collapsed = line.replace(/\s+/g, ' ').trim()
// Check for top-level values and new section indicators
if (/^[^\s]/.test(line)) {
if (line.includes(':')) {
replacement.push(collapsed)
} else {
// Special handling for "Nameservers" section
if (line === 'Nameservers') {
section = 'Name Server:'
} else {
section = collapsed
}
}
}
// Make sure sub-section lines are properly labeled
if (/^\s{2}[^\s]/.test(line)) {
// New sub-section
replacement.push(`${section} ${collapsed}`)
} else if (/^\s{4}/.test(line)) {
// Continuation of previous line
replacement[replacement.length - 1] += `, ${collapsed}`
}
}
return replacement
}
// Fix "label: \n value" format
const handleMultiLines = (lines) => {
lines.forEach((line, index) => {
// if line is just a WHOIS label ending with ":", then verify next lines
if (!line.startsWith('*') && !line.startsWith('%') && line.trim().endsWith(':')) {
let addedLabel = false
// Check next lines
for (let i = 1; i <= 8; i++) {
// if no line or empty line
if (!lines[index + i] || !lines[index + i].trim().length) {
break
}
// if tabbed line or line with value only, prefix the line with main label
if ((lines[index + i].startsWith(' ') && lines[index + i].includes(': ')) || !lines[index + i].endsWith(':')) {
let label = line.trim()
if (lines[index + i].includes(':') && label.endsWith(':')) {
label = label.slice(0, -1)
}
lines[index + i] = label + ' ' + lines[index + i].replace('\t', ' ').trim()
addedLabel = true
}
}
// remove this line if it was just a label for other lines
if (addedLabel) {
lines[index] = ''
}
}
})
return lines
}
// Handle formats like this:
// [Name Server] ns1.jprs.jp
// [Name Server] ns2.jprs.jp
const handleJpLines = (lines) => {
const ret = []
while (lines.length > 0) {
let line = lines.shift()
// handle lines that start with "a. [label]"
if (/^[a-z]. \[/.test(line)) {
line = line.replace(/^[a-z]. \[/, '[')
}
if (line.startsWith('[ ')) {
// skip
} else if (line.startsWith('[')) {
ret.push(line)
} else if (line.startsWith(' ')) {
const prev = ret.pop()
ret.push(prev + '\n' + line.trim())
} else {
// skip
}
}
return ret.map((line) => line.replace(/\[(.*?)\]/g, '$1:'))
}
/**
* Normalize WHOIS data for .fr ccTld, make it look more like gTLDs
*
* @param {string[]} lines
* @returns
*/
function handleDotFr(lines) {
const groups = []
let group = []
const finalLines = []
// split data in groups
lines.forEach(line => {
if (line.startsWith('%')) {
finalLines.push(line)
} else if (!line.trim().length && group.length) {
// start new group
groups.push(group)
group = []
} else if (line.trim().length && !line.startsWith('source')) {
group.push(splitStringBy(line, line.indexOf(':')).map(str => str.trim()))
}
})
if (group.length) {
groups.push(group)
}
groups.forEach(gr => {
if (gr[0][0] === 'domain') {
// group with domain info
gr.forEach(line => {
if (line[0] !== 'status') {
finalLines.push(line.join(': '))
}
})
} else if (gr[0][0] === 'registrar') {
// group with Registrar info
gr.forEach(([label, value]) => {
if (label === 'registrar') {
finalLines.push(`${label}: ${value}`)
} else {
finalLines.push(`registrar ${label}: ${value}`)
}
})
} else if (gr[0][0] === 'nic-hdl') {
let contactType = ''
const contactTypeLine = finalLines.find(line => line.includes(gr[0][1]))
if (contactTypeLine.startsWith('admin-c')) {
contactType = 'admin'
} else if (contactTypeLine.startsWith('holder-c')) {
contactType = 'registrant'
} else if (contactTypeLine.startsWith('tech-c')) {
contactType = 'technical'
}
// group with contact info
gr.forEach(([label, value]) => {
if (label === 'nic-hdl') {
finalLines.push(`${contactType} registry id: ${value}`)
} else {
finalLines.push(`${contactType} ${label}: ${value}`)
}
})
} else {
gr.forEach(line => {
finalLines.push(line.join(': '))
})
}
})
return finalLines
}
// Handle formats like this:
// Registrar Gandi SAS
const handleMissingColons = (lines) => {
lines.forEach((line, index) => {
if (line.startsWith('Registrar ')) {
lines[index] = line.replace('Registrar ', 'Registrar: ')
}
})
return lines
}
module.exports.parseSimpleWhois = parseSimpleWhois
module.exports.parseDomainWhois = parseDomainWhois