-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathgrab_library
430 lines (380 loc) · 16.5 KB
/
grab_library
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
#!/usr/bin/env bash
# TAGS
# grab_library v9.0
# https://github.com/ngadmini
# AUTHOR
# TL;DR
# see README and LICENSE
shopt -s expand_aliases
alias _grp="LC_ALL=C grep"
alias _snc="rsync -rtxX -e 'ssh -q -T -c aes128-ctr -o Compression=no -x'"
alias _sed="LC_ALL=C sed"
alias _srt="LC_ALL=C sort --buffer-size=80% --parallel=3"
alias _ssh="ssh -q -T -c aes128-ctr -o Compression=no -x"
_fkt="$(faketime -f "+5m" date +%I:%M:%S\ %p\ %Z)"
_foo="${0##*/}"
_fuu="${BASH_SOURCE[0]##*/}"
_prl='^(?:(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])$'
_ver=$(_grp -P "^#[[:space:]]+grab_" "${_foo}" | awk -F' ' '{print $NF}')
_api="https://api.github.com/repos/ngadmini/Grabbing-Blacklist-for-Bind9-RPZ/releases/latest"
_get=$(curl -s "${_api}" | grep download_url | awk -F/ '{print $8}')
_ori="https://raw.githubusercontent.com/ngadmini/Grabbing-Blacklist-for-Bind9-RPZ/${_get}"
_lct=$(date +%a\ %Y-%m-%d\ %T\ %Z)
# colorizing
_cyn='\e[96m'
_grn='\e[92m'
_ncl='\e[0m'
_red='\e[91m'
_ylw='\e[93m'
_ccl="${_grn}(Ctrl+C)${_ncl}"
_cnt="${_cyn}CONTINUED to :${_ncl}"
_CYN="${_cyn}%s${_ncl}"
_don="${_grn}[DONE]${_ncl}"
_err="${_red}[FAIL]${_ncl}"
_GRN="${_grn}%s${_ncl}"
_hnt="${_cyn}[HINT]${_ncl}"
_inf="${_ylw}[INFO]${_ncl}"
_pre="${_ylw}PREPARING TASKs:${_ncl}"
_RED="${_red}%s${_ncl}"
_rnr="${_grn}'rndc reload'${_ncl}"
_shd="${_grn}'shutdown -c'${_ncl}"
_tks="${_red}TASKs${_ncl}"
_wn0="${_grn}WARNING:${_ncl}"
_wn1="${_red}[WARN]${_ncl}"
_YLW="${_ylw}%s${_ncl}"
f_tmp() { # remove temporary files, directories & arrays, generated while script execution
find . -maxdepth 1 -regex "^.*\(dmn\|sed\|tm[pqr]\|txt.adulta\).*\|.*gz$" -print0 | xargs -r0 rm -rf
find . -maxdepth 1 -type d ! -name "." -print0 | xargs -r0 rm -rf
find /tmp -maxdepth 1 -type f -name "txt.*" -print0 | xargs -r0 mv -t .
}
f_set() {
unset -v ar_{CAT,CNF,DBC,RPZ,VAR,cat,cnf,dbc,dom,dmn,dwn,exe,num,pac,reg,rpz,shn,shy,spl,tmp,txt,uri,url,zon}
set +Eeuo pipefail
shopt -u expand_aliases
}
f_cln() { printf "\n"; f_tmp; f_set; }
f_trp() { trap f_cln EXIT TERM; trap 'f_cln; exit 130' INT; }
f_xcd() { # exit code {244..255}
for EC in $1; do
local _xcd _xcl _msj _lmm _msg _lnn _ind _lin _ext _ukn _ukm
_xcd=$(printf "${_err} ${_foo} line ${BASH_LINENO[0]}. exit %s" "${EC}")
_xcl=$(printf "${_err} ${_fuu} line ${BASH_LINENO[0]}. exit %s" "${EC}")
case ${EC} in
244) printf "\n%s: \n${_hnt} %s\n" "${_xcd}" "required passwordless ssh to remote host: '$2'"
return 1;;
245) printf "%s: \n${_hnt} %s\n" "${_xcd}" "required '$2' but it's not installed"
return 1;;
246) printf "\n%s: \n${_hnt} %s\n" "${_xcd}" "'$2' required '$3' but it's not installed"
return 1;;
247) printf "\n%s: \n${_hnt} %s\n" "${_xcd}" "you must execute ${_foo} as non-root privileges"
return 1;;
248) _msj="urls. it should consist of ${ar_num[ar_url]} urls"
_lmm="$(basename "$2"): $(wc -l < "$2")"
printf "\n%s\n%s\n" "${_xcd}" "${_lmm} ${_msj}"
return 1;;
249) _msg="lines. it should consist of ${ar_num[ar_reg]} lines"
_lnn="$(basename "$2"): $(wc -l < "$2")"
printf "\n%s\n%s\n" "${_xcd}" "${_lnn} ${_msg}"
return 1;;
250) local _re0='[curl-code] https://curl.se/docs/manpage.html'
local _re1='[stat-code] https://en.wikipedia.org/wiki/List_of_HTTP_status_codes'
local _unk='check if there is a typo in grab_urls. if spelling is correct, please see'
printf "${_inf} %s:\n%s\n%s\n%s\n" "${_unk}" "${_re1}" "${_re0}" "${_xcl}"
exit 1;;
251) printf "\n%s\n%s\n" "${_xcd}" "download failed from origin, please check spelling"
return 1;;
252) printf -v _ind "${_CYN}" "$(echo "${ar_num[*]}" | _grp '[aA-zZ\.,]')"
printf "\n%s, invalid ar_num's values: %s" "${_xcd}" "${_ind}"
printf "\n${_hnt} use numeric in ar_num's values%s\n"
return 1;;
253) _lin=$(grep -En "^(HOST|#HOST)" "${CNF}" | cut -d: -f1)
_ext=$(printf "\n${_err} ${CNF##*/} at line ${_lin}. exit %s" "${EC}")
printf "%s\n%s : if spelling is correct, maybe isDOWN\n" "${_ext}" "${HOST}"
return 1;;
254) printf "\n%s: ${_CYN} doesn't exis in %s\n" "${_xcl}" "$(basename "$2")" "$3"
return 1;;
255) printf "\n${_inf} NEED files:\n${_CYN}\n" "$2"
return 1;;
*) _ukn="Unknown exit code [f_xcd $1], please check"
printf -v _ukm "%s" "${_foo} at line $(grep -n "f_xcd $1" "${_foo}" | cut -d: -f1)"
printf "\n${_err} %s %s\n" "${_ukn}" "${_ukm}"
return 1;;
esac
done
}
f_sm0() { # display offering options
printf "${_wn1} there are still remaining duplicate entries between CATEGORIES\n"
printf "${_hnt} consider continue to next TASKs\n"
printf "${_CYN}\n" "you have 4 options to proceed to the next task:"
printf "%4s) eliminating duplicate entries between CATEGORY\n" "1"
printf "%4s) option [1] and rewriting all CATEGORIES to RPZ format [db.* files]\n" "2"
printf "%4s) options [1,2] and incrementing serial zone-files [rpz.*]\n" "3"
printf "%4s) options [1,2,3] and syncronizing latest [rpz.* and db.*] files to %s\n" "4" "${HOST}"
if [[ ${RNDC_RELOAD} =~ [nN][oO] ]]; then
printf "%6s${_wn0} remote-host: %s will REBOOT due to low memory\n" "" "${HOST}"
printf "%18s ${_shd} at remote-host: %s to abort\n" "use" "${HOST}"
else
printf "%6s${_wn0} ${_rnr} will be executed after syncronizing.\n" ""
fi
printf "%4s) use ${_ccl} to quit\n" "^C"
printf "please hit: ${_CYN} to continue OR ${_ccl} to quit\n" "(1/2/3/4)"
}
f_sm1() { # f_sm[1..5]: display messages while chosing option
printf "\n${_red}[%s'st] TASK options chosen${_ncl}\n" "${opsi}"
printf "${_cnt} 1) eliminating duplicate entries between CATEGORY\n"
f_sm10 "st"
}
f_sm2() { f_sm5 "nd"; f_sm10 "nd"; }
f_sm3() {
f_sm5 "th"
printf "%30s serial at zone-files [rpz.*]\n" "2) incrementing"
f_sm10 "th"
}
f_sm4() {
f_sm5 "th"
printf "%30s serial zone-files [rpz.*]\n" "3) incrementing"
printf "%30s latest [rpz.* and db.*] files to %s\n" "4) syncronizing" "${HOST}"
if [[ ${RNDC_RELOAD} =~ [nN][oO] ]]; then
printf "%6s${_wn0} remote-host: %s will REBOOT due to low memory\n" "" "${HOST}"
printf "%18s ${_shd} at remote-host: %s to abort\n" "use" "${HOST}"
else
printf "%6s${_wn0} ${_rnr} will be executed too. press ${_ccl} to abort\n" ""
fi
f_sm10 "th"
}
f_sm5() {
printf "\n${_red}[%s'%s] TASK options chosen${_ncl}\n" "${opsi}" "$1"
printf "${_cnt} 1) eliminating duplicate entries between CATEGORY\n"
printf "%27s all CATEGORIES to RPZ format [db.* files]\n" "2) rewriting"
}
# f_sm[6..10]: display processing messages
f_sm6() { printf "%12s: %-66s" "grab_$1" "${2}"; }
f_sm7() { printf "\nprocessing ${_CYN} with (%d) additional remote-files\n" "${1^^} CATEGORY" "$2"; }
f_sm8() { printf "%12s: %-66s" "fixing" "bads, duplicates and false entries at ${1^^}"; }
f_sm9() { printf "${_tks} based on %s'%s options: ${_GRN}\nbye!\n" "${opsi}" "$1" "DONE"; }
f_sm10() { printf "${_cyn}Performing tasks based on %s'%s options ...${_ncl}\n" "${opsi}" "$1"; }
f_tim() { # time elapsed in Minute:Second:milisecond
local S="$((T/1000000000))"
local MS="$((T%1000000000/1000000))"
printf "${_don} completed IN ${_cyn}%'dm:%'ds:%'dms${_ncl}\n" "$((S/60%60))" "$((S%60))" "${MS}"
}
f_up() { printf "${_GRN}\n" "isUP"; }
f_ok() { printf "${_GRN}\n" "isOK"; }
f_do() { printf "${_GRN}\n" "DONE"; }
f_no() { printf "\n${_err} missing file(s): ${_CYN}\n" "$1"; }
# grabbing raw-domains
f_add() { curl --retry 5 --retry-all-errors -f -s "$1" || f_xcd 251 "$1"; }
f_grb() { # initialize CATEGORY, many categories are obtained but the main one is adult
printf "\n${_ylw}PERFORMING TASKs:${_ncl} initiating CATEGORY of domains\n"
f_tmp # remove stale dir & file if any
for A in {0..5}; do # grab dsi.ut-capitole.fr and use as initialize category
local tar_dsi
tar_dsi=$(basename "${ar_url[A]}")
local ext_dsi="${tar_dsi/.tar.gz/}"
find . -maxdepth 1 -type d -o -type f -name "${ext_dsi}" -print0 | xargs -0 -r rm -rf
printf "%12s: %-66s" "${ext_dsi^^}" "${ar_uri[A]}"
curl --retry 5 --retry-all-errors -f -s -O "${ar_url[A]}" || f_xcd 251 "${ar_url[A]}"
tar -xzf "${tar_dsi}" "${ext_dsi/domains}"
f_do
done
# some adjusment for initialize category
mkdir ipv4
mv phishing malware
mv gambling trust+
cat vpn/domains >> redirector/domains
rm -rf vpn
mapfile -t ar_cat < <(f_cat) # initializing category
printf "%12s: ${_CYN}\n" "initiating" "${ar_cat[*]} (${#ar_cat[@]} CATEGORIES)"
f_frm "txt.*" # remove stale raw-domains
ar_dmn=()
ar_tmp=()
ar_txt=()
for B in "${!ar_cat[@]}"; do
ar_dmn+=("${ar_cat[B]}"/domains) # as raw-domains container
ar_tmp+=(tmq."${ar_cat[B]}") # as in-process-domains container (temporary)
ar_txt+=(txt."${ar_cat[B]}") # as processed-domains container
done
}
f_fix() { # fixing false and bad entries
f_sm8 "$1"
dos2unix "$2" >> /dev/null 2>&1
_srt -u "$2" | _sed -e "$3" -e "$4" > "$5"
f_do
}
f_fip() { # capture ip-address entry. save it into sub-net [ipv4 CATEGORY]
printf "%12s: %-66s" "moving" "IP-address entries into $3 CATEGORY"
_grp -P "${_prl}" "$1" | _sed "/\/[0-9]\{2\}$/ ! s/$/\/32/" >> "$2"
_sed -i -r "/^([0-9]{1,3}\.){3}[0-9]{1,3}$/d" "$1"
f_do
printf "%12s: %'d entries.\n" "acquired" "$(wc -l < "$1")"
}
f_out() { # fixing out of filtering
_grp 'bobet' "$1" >> "$2"
_sed -i '/bobet/d;s/[a0-z9]\{1,\}\.xhvid2\.com/xhvid2\.com/g' "$1"
}
f_uri() { # verify remote-urls [grab_urls] is available
ar_uri=()
local ar_dwn=()
local i=-1
while read -r uri; do # slicing url && add as an element to ${ar_uri[@]}
local l ll _uri _stc
ll="${uri##htt*\/\/}"
l="${ll/\/*/}"
_uri="${l}/..?../${uri##*/}"
# starting check; ignore curl-code: 35
ar_uri+=("${_uri}")
((i++))
printf "%12s: %-66s" "url_${i}" "${ar_uri[i]}"
_stc=$(curl --connect-timeout 15 -so /dev/null -Iw "%{http_code}" "${uri}")
_ret=$?
if ! [[ ${_stc} =~ ^[02]{3}$ ]]; then
printf "${_RED}\n" "stat-code: ${_stc}"
ar_dwn+=("[stat-code: ${_stc}] ${uri}")
elif [[ ${_ret} = 6 ]]; then
printf "${_RED}\n" "curl-code: ${_ret}"
ar_dwn+=("[curl-code: ${_ret}] ${uri}")
else
f_up
fi
done < "$1"
if [[ ${#ar_dwn[@]} -eq 0 ]]; then # display result
printf "%30s\n" " " | tr " " -; printf "%s\n" "All URLs of remote-files isUP."
else
printf "%84s\n" " " | tr " " -; printf "${_CYN}\n" "${ar_dwn[@]}"; f_xcd 250
fi
}
# f_dpl, f_dpm & f_dpn: used by grab_duplic.sh
f_dpl() { printf "eliminating duplicate entries based on ${_CYN}\n" "${1^^}"; }
f_dpm() {
awk 'FILENAME == ARGV[1] && FNR==NR{a[$1];next} !($1 in a)' "$1" "$2" | _srt -o "$3"
cp "${3}" "${2}"
f_do
}
f_dpn() {
printf "%11s = deduplicating %-28s" "STEP $6.$1" "$2 entries"
_srt "$3" "$4" | uniq -d | _srt -o "$5"
}
f_g4c() { # used by grab_cereal.sh
if [[ $(grep -n "^; generate at" "$1" | cut -d: -f1) = 1 ]]; then
_sed -i "1s/^.*$/; generate at ${_lct} by ${_foo} ${_ver}/" "$1"
else
_sed -i "1i ; generate at ${_lct} by ${_foo} ${_ver}" "$1"
fi
}
f_g4b() { # nested in f_rpz and f_ip4
_sed -i "1i ; generate at ${_lct} by ${_foo} ${_ver}\n;" "$1"
local acq_al
printf -v acq_al "%'d" "$(wc -l < "$1")"
printf "%10s entries\n" "${acq_al}"
}
f_rpz() { # used by grab_build.sh [configurable in grab_config]
printf "%13s %-31s : " "rewriting" "$2 to $1"
awk -v DOM="${RPZ_DOM}" '{print $0" "DOM"\n""*."$0" "DOM}' "$2" >> "$1"
f_g4b "$@"
}
f_ip4() { # used by grab_build.sh [configurable in grab_config]
printf "%13s %-31s : " "rewriting" "$2 to $1"
_sed "s/\//\./" "$2" | awk -v IPV="${RPZ_IPV}" -F. '{print $5"."$4"."$3"."$2"."$1"."IPV}' >> "$1"
f_g4b "$@"
}
f_ssh() { # nested in f_cer and needed by grab_rsync
printf "${_inf} %-85s" "check availability remote-host: ${HOST}"
if ! ping -w 1 "${HOST}" >> /dev/null 2>&1; then f_xcd 253; fi
f_up
printf "${_inf} %-85s" "check passwordless ssh compatibility: ${HOST}"
_ssh -o BatchMode=yes "${HOST}" true >> /dev/null 2>&1 || f_xcd 244 "${HOST}"
f_ok
printf "${_inf} %-85s" "check availability: ${ZONE_DIR} in ${HOST}"
_ssh root@"${HOST}" [[ -d "${ZONE_DIR}" ]] || f_xcd 254 "${ZONE_DIR}" "${HOST}"
f_ok
printf "${_inf} %-85s" "check required debian-packages in ${HOST}"
for C in {rsync,pigz}; do # required: rsync and pigz
_ssh root@"${HOST}" "hash ${C} >> /dev/null 2>&1" || f_xcd 246 "${HOST}" "${C}"
done
f_ok
}
f_cer() { # used by grab_cereal.sh [grabbing zone-files if notFOUND]
f_ssh
for a in $1; do
if scp root@"${HOST}":"${ZONE_DIR}"/"${a}" "${_DIR}" >> /dev/null 2>&1; then
printf "${_inf} fetch files from %s: ${_CYN}\n" "${HOST}" "${a}"
else
printf "\n${_err} notFOUND in %s:\n${_CYN}\n" "${HOST}" "${a}"
f_ori "zones-rpz/${a}" "${a}"
fi
done
printf "\n${_inf} retry to forking ${_CYN}\n" "${_foo}"
exec "$0"
}
f_ori() {
printf "${_inf} trying to get ${_CYN} from origin:\n%s\n" "$2" "${_ori}/$1"
local _val
_val=$(curl --connect-timeout 15 -so /dev/null -I -w "%{http_code}" "${_ori}/$1")
if [[ ${_val} -eq 200 ]]; then
curl -fs "${_ori}/$1" >> "$2" || f_xcd 251
printf "${_inf} %-73s" "get $2 from origin"
else
printf "${_err} ${_CYN} notFOUND. download failed: status-code %d\n" "$2" "${_val}"
return 1
fi
}
f_mis() { printf "\n${_err} mismatch file(s): ${_CYN}" "$1"; f_xcd 255 "$2"; }
f_sed() { _sed "s/ /\n/g" | _srt | uniq -u | tr "\n" " " | _sed "s/ $//g"; }
f_fnd() { find . -maxdepth 1 -type f -name "$1" | _sed "s/\.\///" | _srt; }
f_frm() { find . -maxdepth 1 -type f -name "$1" -print0 | xargs -r0 rm; }
f_cat() { find . -maxdepth 1 -type d | _sed "1d;s/\.\///" | _srt; }
f_sta() { if [[ $(stat -c "%a" "$2") != "$1" ]]; then chmod "$1" "$2"; fi; }
f_dft() { # define default values in grab_config
printf "${_inf} forking ${_CYN} with default configurations\n" "${_foo}"
: "${HOST:=rpz.warnet-ersa.net}" # or ip-address
: "${REGEX:=4}" # fixed number of lines grab_regex
: "${RNDC_RELOAD:=no}" # yes OR no
: "${RPZ_DOM:=CNAME .}" # rpz policy
: "${RPZ_IPV:=rpz-nsip CNAME .}" #+
: "${URLS:=22}" # fixed number of lines grab_urls
: "${ZONE_DIR:=/etc/bind/zones-rpz}" # target dir for rsync
}
f_cnf() { # valuation variables in grab_config
readonly CNF="${_DIR}"/grab_config
local _ptr="s/=[ ^ ]/=/g;s/['\"]//g;s/[#;].*//g;s/[[:space:]]+/ /g;s/[[:space:]]$//g"
if [[ -e ${CNF} ]]; then
f_sta 644 "${CNF}"
ar_cnf=(HOST REGEX RNDC_RELOAD RPZ_DOM RPZ_IPV URLS ZONE_DIR)
mapfile -t ar_CNF < <(_srt "${CNF}" | _grp -E "^[A-Z]" | _sed "s/[ ^ ]=/=/g" | cut -d= -f1)
if [[ ${ar_CNF[*]} == "${ar_cnf[*]}" ]]; then
mapfile -t ar_VAR < <(_srt "${CNF}" | _grp -E "^[A-Z]" | _sed "${_ptr}" | cut -d= -f2)
for ARG in "${!ar_VAR[@]}"; do
# get values supplied by "${CNF}", instead of "source ${CNF}"
printf -v "${ar_CNF[ARG]}" "%s" "${ar_VAR[ARG]}"
done
else # misconfigurations occurs: fallback to default values
printf "\n${_wn1} misconfigurations occurs in %s. undefine value:\n" "${CNF##*/}"
printf "%9s%s\n" "" "$(echo "${ar_CNF[@]}" "${ar_cnf[@]}" | f_sed)"
f_dft
fi
else # missing "${CNF}": fallback to default values
printf "\n${_wn0} %s notFOUND\n" "${CNF##*/}"
f_dft
fi
}
f_stt() { # starting main script
printf "\n${_RED}\nstarting ${0##*/} ${_ver} at ${_CYN}\n" "$1" "${_lct}"
if ! [[ ${_get} == "${_ver}" ]]; then # confirm to the latest release
printf "${_inf} you're running %s. an update available\n" "${0##*/} version ${_ver}"
printf "${_hnt} please download %s :\n" "${0##*/} version ${_get} at"
printf "%s\n" "$(curl -s ${_api} | grep download_url | awk -F\" '{print $4}')"
return 1
fi
# compatibility with shellcheck
if ! [[ -e .shellcheckrc ]]; then
cat <<eof>>.shellcheckrc
source-path=./
source=/dev/null
disable=SC2034
disable=SC2059
disable=SC2148
disable=SC2154
eof
fi
}