-
Notifications
You must be signed in to change notification settings - Fork 8
/
regex-blacklist
98 lines (77 loc) · 4.7 KB
/
regex-blacklist
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
### No dots in hostname
#/^[^\.]+$/
### Invalid chars
#/^.*[^a-zA-Z0-9\.\-].*$/
### More then 36 labels in hostname
#/^([^\.]+\.){37,}$/
### Label longer then 63 positions
#/^([^\.]+){64,}$/
### Hostname longer then 253 positions
#/^.{254,}$/
### Garbage TLD.s
/^(.*\.)*(accountant|ad|an|analytic[s]*|bar|bi[dt]|biz|christmas|click|club|country|cricket|date|dclk|diet|docs|download|exit|faith|free|fun|gdn|guru|i2p|info|kim|link|loan|men|mobi|mom|name|ninja|office|on(ion|line)|ovh|party|pro|racing|realtor|reise|ren|review|rocks|science|shop|site|space|stream|study|tec(h|hnology)|to[pr]|trade|vip|web(cam|site)|work|win|xin|xyz|yokohama|zone)$/
### Garbage CC-GTLD's not ending in CCTLD
#/^.*\.(co[m]*|edu|gov|mil|net|org)\.[a-z]{2,2}\..*$/
### Garbage CCTLD's
/^(.*\.)*(asia|cc|cf|cm|cn|ga|in|ml|gq|pw|ru|sh|tk|tokyo|ua)$/
### Common file-extension TLD's
/^(.*\.)*(do[ct]|p[op][ast]|pub|sld|vsd|x[lp][as])[ablmstx]{0,1}$/
/^(.*\.)*(csv|gz|pdf|tar|t[e]*xt|zip)$/
/^(.*\.)*(bat|cmd|dll|exe)$/
### Porn/Smut TLD's
/^(.*\.)*(adult|porn|sex|xxx)$/
### Gambling TLD's
/^(.*\.)*(auction|bet|casino|game[s]*|poker)$/
### Private
/^(.*\.)*(broadcasthost|company|cor(p|porate)|example|home|host|invalid|la[bn]|local|localdomain|localhost|localnet|nxdomain|office|rejected|router|test|url|workgroup|wpad)$/
/^(.*\.)*(10|127|(16|17|18|19|20|21|22|23|24|25|26|27|28|29|31)\.172|254\.169|168\.192)\.in-addr\.arpa$/
/^(.*\.)*[0ef]\.ip6\.arpa$/
### Common AD labels
/^(.*[\.\-])*[0-9]*(php|web|[mwxy])*ad[sxz]*[0-9]*[\.\-]/
/^(.*[\.\-])*ad(aly[sz]er|am|banner|bureau|click|dynamo|farm|hese|im[a]*g[e]*|info|ition|just|k2|load|log|media|ma(n|nager|x)|ne(t[a-z]+|xus)|nxs|ocean|renaline|revolver|rotat(e|or)|science|script|sense|spirit|[v\-]*s[e]*r(v|ve|ver|vi(ce|ng))|stat|stream)[sxz]*[0-9]*[\.\-]/
/^(.*[\.\-])*ad([v]*server[a-z]*|shuffle|sl|sy(s|stem)|test|(v|ve(r|rt|rtis(e|er|ing)))|trac(k|king|ker)|wise|word)[sxz]*[0-9]*[\.\-]/
/^(banner[a-z]*|open[-]*(ad|x)|page[-]*ad|reklam|(secure[-]*)*pub[-]*ad|smart[-]*ad[s]*([-]*server)*|unityad)[sz]*[0-9]*[\.\-]/
### Common Tracking/Stats labels
/^(.*[\.\-])*(affiliate|([s]*anal|local)[iy](s[iy]s|ti[ck])|click|clk|cooki(e|ex)|cnt|coun(t[y]*|t[e]*[dr])|datacollect|hit|(amp|[s]*)metr[iy][ck]|open[-]*(ad|x)|partner|ping|pixel|sta(t|tisti[ck])|tag|(web|[uv]*)stat)[sz]*[0-9]*[\.\-]/
/^(.*[\.\-])*(telemetr[iy]|(evil|[s]*)trac(k|king|ker)|[s]*trk|utm|video[-]*stat)[sz]*[0-9]*[\.\-]/
### Common wellknown start labels
/^(creatives|gcirm[0-9]*|marketing|oa(s|scentral)|o(x|x-d)|prom(o|otion)|qwe|.*servedby|syndication|traffic)[\.\-]/
/^[a-z]\.([0-9]+|ad[^d]|click)/
### Google
/^(.*\.)*google[-]*(analytic|syndication|(ad[a-z0-9]*|tag)[-]*service)[s]*\./
/^(.*\.)*(www[-]*)*google[-]*(analytic[s]*|(ad|tag)[-]*manager)\./
/^(.*\.)double[-]*clic(k|k[-]*by[-]*google)\./
/^(google|partner|pub)ad[s]*(api[s]*)*\./
### Top-N advertisers/trackers domain(parts)
/^(.*\.)*(atdmt|beget|bango|casalemedia|clickbank|extreme[-]*dm|flurry|krxd|liveadvert|moatads|mookie[1-9]*|nuggad|omtrdc|p2l|quants[e]*rv[e]*|onestat|onthe|pop(cash|check|test|under|up)|revsci|scorecardresearch|sitestat|tacoda|tynt)\./
/^(.*\.)*(o0bc|2mdn|2o7|302br|51yes|adtech([-]*[a-z]+)*|amazon[-]*adsystem|atwola|bkrtx|bluekai|chartbeat|crwdcntrl|d(e|o)mdex|effectivemeasure|falkag|free|fwmrm|gemius)\./
/^(.*\.)*(hit(box|tail)|hotjar|imrworldwide|intellitxt|lijit|mixpanel|ms[-]*adcenter|netklix|outbrain|petrovka|pixel|syndication|rubiconproject|sitemeter|skimresources|smaato|smartadserver|(fast|stat)counter|suprnova|taboola|tradedouble[r]*|xiti|usergrid|visualrevenue|volumetrk)\./
### IDN
/[^\.]*xn--[^\.]*/
### Porn
/^.*(adult|(adult|ana[a]*l|cam|date|free|hardcore|hot|live|mature|phone|teen)[-]*(fuck|porn[o]*|sex)|eroti(c|k|sich)|fuck|gangbang|(hard|soft)core|hentai|neuk|nsfw|porn[o]*|sex[-]*(cam|chat|dat(e|ing)|movie|shop|show|toy|tracker|video)[sz]*[0-9]*|xxx).*$/
/^(adult|hentai|porn[o]*|sex(e|o|y))\./
### Gambling
/^.*(casino|betting|lott(ery|o)|gam(e[rs]|ing|bl(e|ing))|sweepstake|poker).*$/
### Misc / Special / Other
#/^(.*[\.\-])*(advert|banner|beacon|cash|click[y]*|coin|count|megavolt|ms[-]*edge|([ens0-9]+)*(omni|over)ture|pop(cash|check|test|under|up)|tracker)/
#/^.*(advert|banner|beacon|cash|click[y]*|coin|coun(t|ter[s]*)|megavolt|ms[-]*edge|([ens0-9]+)*(omni|over)ture|pop(cash|check|test|under|up)|track(er|ing))[\.\-]/
/^a\./
/^1\./
/(bn|db|hk)[0-9]sch[0-9]+\.wns\.windows\.com$/
/^wpad\./
/^mkt[0-9]+\./
/^protection\.[a-z]{2,2}police\.com$/
/^x[^\.]*\.alexa\.com$/
/^(.*\.)*[0-9]+br\.net$/
/^(.*\.)*[0-9]{2,3}[a-z]+\./
#/^(.*\.)*[0-9]+[a-z]\./
/^(.*\.)*[0-9]+\.[a-z]{2,3}$/
/^myetherwallet\./
#/^d[^\.]+\.cloudfront.net$/
#/^.*\.www/
/^.*(facebook|paypal)\..*-/
/^(bankofamerica|wellsfargo)-/
/^(.*\.)*g00\./
#/^.*[67deklrsyz]\.googlevideo\.com$/
/^.*mackeeper.*$/