-
Notifications
You must be signed in to change notification settings - Fork 8
/
grab_http.sh
234 lines (214 loc) · 9.77 KB
/
grab_http.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
#!/usr/bin/env bash
# TAGS
# grab_http.sh v10.4
# https://github.com/ngadmini
# AUTHOR
# TL;DR
# see README and LICENSE
T=$(date +%s%N)
umask 027
set -Eeuo pipefail
PATH=/usr/local/bin:/usr/bin:/bin:${PATH}
_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# sourcing to grab_library
clear; cd "${_DIR}"
readonly _LIB="${_DIR}"/grab_library
if [[ -e ${_LIB} ]]; then
if [[ $(stat -c "%a" "${_LIB}") != 644 ]]; then chmod 644 "${_LIB}"; fi
source "${_LIB}"
f_trp
else
curl -sO https://raw.githubusercontent.com/ngadmini/Grabbing-Blacklist-for-Bind9-RPZ/master/libs/grab_library
response=$?
if [[ ${response} -ne 0 ]]; then
printf "[FAIL] %s notFOUND\n" "${_LIB##*/}"
exit 1
else
exec "$0"
fi
fi
# starting main script
f_stt ""
printf "${_pre} %-63s" "check availability configuration file"; f_cnf
printf "${_pre} %-63s" "check ${0##*/} is executed by non-root privileges"
[[ ! ${UID} -eq 0 ]] || f_xcd 247; f_ok
printf "${_pre} %-63s" "check required debian-packages in local-host: $(hostname -I)"
for C in {curl,dos2unix,faketime,idn,libnet-netmask-perl,rsync}; do
ar_pkg=()
if ! dpkg -s "${C}" >> /dev/null 2>&1; then
printf "\n${_err} %s %-60s" "${C}" "not installed"
ar_pkg+=("${C}")
fi
done
if [[ ${#ar_pkg[@]} -eq 0 ]]; then
f_ok
else
printf "\n${_CYN} do you want to install ${_CYN} (Y/n)? " "[CONFIRM]" "${ar_pkg[*]}"
read -r confirm
case ${confirm:0:1} in
y|Y|"") for _pkg in "${ar_pkg[@]}"; do
printf "${_inf} installing %-62s" "${_pkg}"
sudo apt install "${_pkg}" -y -qq >> /dev/null 2>&1
f_do
done;;
*) f_xcd 245 "${ar_pkg[*]}";;
esac
fi
# check script-pack's properties
printf "${_pre} %-63s" "check script-pack's property in local-host: $(hostname -I)"
ar_shy=(grab_build.sh grab_cereal.sh grab_duplic.sh grab_rsync.sh)
ar_shn=(grab_regex grab_urls)
for D in "${!ar_shy[@]}"; do f_pkg "${ar_shy[D]}" 755; done
for E in "${!ar_shn[@]}"; do
declare -A ar_num # numeric value. index's position of grab_regex (is no.0 at ar_shn)
ar_num[ar_shn]=$(echo "${ar_shn[*]}" | tr ' ' '\n' | awk '/grab_regex/ {print NR-1}')
f_pkg "${ar_shn[E]}" 644; _sed -i "/^$/d" "${ar_shn[E]}"
if [[ ${E} -eq ${ar_num[ar_shn]} ]]; then
mapfile -t ar_reg < "${ar_shn[E]}"
[[ ${#ar_reg[@]} -eq ${REGEX} ]] || f_xcd 249 "${ar_shn[E]}"
else
mapfile -t ar_url < "${ar_shn[E]}"
[[ ${#ar_url[@]} -eq ${URLS} ]] || f_xcd 248 "${ar_shn[E]}"
fi
done
f_ok
# check the actual longest of TLDs
printf "${_pre} %-63s" "check the actual longest of Top Level Domains"
_act=$(curl -s "${_tld}" | _sed '/#/d;s/[A-Z]/\L&/g' | awk '{print length}' | _srt -g | tail -1)
_reg=$(echo "${ar_reg[0]}" | awk -F'|' '{print $6}')
_cur=$(echo "${_reg}" | cut -d\{ -f2 | cut -d, -f1)
_neo=$(("$_act"+1))
if [[ "${_neo}" != "${_cur}" ]]; then
printf "${_YLW}\nchanging '%s' TO '[a0-z9\-]{%d,}' in ${ar_shn[0]} line: 1\n" "CHANGE occurs" "${_reg}" "${_neo}"
_sed -i "s/$_cur,/$_neo,/" "${ar_shn[0]}"
else
f_ok
fi
# initial CATEGORY. many categories are obtained but the main one is adult
printf "${_pre} check availability of sources-urls (as listed in %s)\n" "${ar_shn[1]}"
f_uri "${ar_shn[1]}" # check url in grab_urls, is it UP-DOWN ?
printf "\n${_ylw}PERFORMING TASKs:${_ncl} initiating CATEGORY of domains\n"
f_tmp # sweep temporary dir-files if any
for A in {0..5}; do
tar_dsi=$(basename "${ar_url[A]}") # grabbing blacklist from dsi.ut-capitole.fr
ext_dsi="${tar_dsi/.tar.gz/}" #+ and use as initial categories
find . -maxdepth 1 -type d -o -type f -name "${ext_dsi}" -print0 | xargs -0 -r rm -rf
printf "%12s: %-66s" "${ext_dsi^^}" "${ar_uri[A]}"
curl -sO "${ar_url[A]}" || f_xcd 251 "${ar_url[A]}"
tar -xzf "${tar_dsi}"
f_do
done
mkdir ipv4; mv phishing malware; mv gambling trust+ # make some adjusment to categorizing
cat vpn/domains >> redirector/domains; rm -rf vpn
mapfile -t ar_cat < <(f_cat)
f_frm "txt.*" # sweep stale domain-lists
ar_dmn=(); ar_txt=() # tmp files: raw-domains && processed-domains
for B in "${!ar_cat[@]}"; do ar_dmn+=("${ar_cat[B]}"/domains); ar_txt+=(txt."${ar_cat[B]}"); done
printf "%12s: ${_CYN}\n" "categorized" "${ar_cat[*]} (${#ar_cat[@]} CATEGORIES)"
# category: TRUST+
# contents: gambling domains and TRUST+
#+ ${ar_cat[5]} with 2 additional entries: ${ar_url[1,7]}
f_sm7 "${ar_cat[5]}" 2
trust=$(mktemp -p "${_DIR}"); porn=$(mktemp -p "${_DIR}")
untrust=$(mktemp -p "${_DIR}"); idna=$(mktemp -p "${_DIR}")
f_sm6 1 "${ar_uri[1]}"; f_do # add gambling-domains to trust+ category
f_sm6 7 "${ar_uri[7]}" # https://trustpositif.kominfo.go.id/assets/db/domains_isp
f_add "${ar_url[7]}" > "${trust}" #+ frequently error-code curl: (35) & firefox: PR_CONNECT_RESET_ERROR
#cp ~/Downloads/domains_isp "${trust}" #+ download then switch line above to comment and it's to uncomment
LC_COLLATE=C grep -P '[^\x00-\x7F]' "${trust}" | idn --quiet > "${idna}" # capture non a legal IDN, then
cat "${idna}" >> "${trust}"; f_do #+ convert it's to ASCII with idn tool
printf "%12s: %-66s" "reducing" "porn domains and move it's to ${ar_cat[0]^^} CATEGORY"
f_add "${ar_url[18]}" > "${porn}" # used as a control to reducing adult-domains as
_srt -us "${trust}" -o "${trust}" #+ listed in "${trust}"
_srt -s "${trust}" "${porn}" | uniq -d > "${untrust}" #+
_grp -E "${ar_reg[1]}" "${trust}" >> "${untrust}" #+
f_awk "${untrust}" "${trust}" "${ar_dmn[5]}" # reducing adult entries by moving it's
cat "${untrust}" >> "${ar_dmn[0]}"; f_do #+ to adult category "${ar_dmn[0]}"
f_fix "${ar_cat[5]}" "${ar_dmn[5]}" "${ar_txt[5]}"; f_do # fixing false-bad entries
f_fip "${ar_txt[5]}" "${ar_dmn[1]}" "${ar_cat[1]^^}" #+
# category: ADULT
# contents: adult-porn domains
#+ ${ar_cat[0]} with 2 additional entries: ${ar_url[0,6]} and some from the trust+ category
f_sm7 "${ar_cat[0]}" 2
f_sm6 0 "${ar_uri[0]}"; f_do # in the initial category
f_sm6 6 "${ar_uri[6]}"; f_add "${ar_url[6]}" | _grp -v '^#' >> "${ar_dmn[0]}"; f_do
f_fix "${ar_cat[0]}" "${ar_dmn[0]}" "${ar_txt[0]}" # fixing false-bad entries
f_out "${ar_txt[0]}" "${ar_txt[5]}" #+
f_fip "${ar_txt[0]}" "${ar_dmn[1]}" "${ar_cat[1]^^}" #+
# category: REDIRECTOR
# contents: vpn and proxy domains
#+ ${ar_cat[4]} with 2 additional entries: ${ar_url[4,5]}
f_sm7 "${ar_cat[4]}" 2
for F in {4,5}; do f_sm6 "${F}" "${ar_uri[F]}"; f_do; done # in the initial category
f_fix "${ar_cat[4]}" "${ar_dmn[4]}" "${ar_txt[4]}"
f_out "${ar_txt[4]}" "${ar_txt[5]}"
f_fip "${ar_txt[4]}" "${ar_dmn[1]}" "${ar_cat[1]^^}"
# category: PUBLICITE
# contents: adv domains
#+ ${ar_cat[3]} with 5 additional entries: ${ar_url[3,8..11]}
f_sm7 "${ar_cat[3]}" 5
f_sm6 3 "${ar_uri[3]}"; f_do # in the initial category
for G in {8..11}; do
f_sm6 "${G}" "${ar_uri[G]}"; f_add "${ar_url[G]}" | _grp -Ev "^(#|$)" >> "${ar_dmn[3]}"; f_do
done
f_fix "${ar_cat[3]}" "${ar_dmn[3]}" "${ar_txt[3]}"
f_out "${ar_txt[3]}" "${ar_txt[5]}"
f_fip "${ar_txt[3]}" "${ar_dmn[1]}" "${ar_cat[1]^^}"
# category: MALWARE
# contents: malware, phishing and ransomware domains
#+ ${ar_cat[2]} with 8 additional entries: ${ar_url[2,12..17]}
f_sm7 "${ar_cat[2]}" 7
f_sm6 2 "${ar_uri[2]}"; f_do # in the initial category
f_sm6 12 "${ar_uri[12]}"; f_add "${ar_url[12]}" | _grp -Ev "^(#|:)" | cut -d' ' -f2 >> "${ar_dmn[2]}"; f_do
f_sm6 13 "${ar_uri[13]}"; f_add "${ar_url[13]}" | _sed "1,11d;/^;/d" | cut -d' ' -f1 >> "${ar_dmn[2]}"; f_do
for H in {14..17}; do
f_sm6 "${H}" "${ar_uri[H]}"; f_add "${ar_url[H]}" | _grp -v "^#" >> "${ar_dmn[2]}"; f_do
done
f_fix "${ar_cat[2]}" "${ar_dmn[2]}" "${ar_txt[2]}"
f_out "${ar_txt[2]}" "${ar_txt[5]}"
f_fip "${ar_txt[2]}" "${ar_dmn[1]}" "${ar_cat[1]^^}"
# category: IPV4
# contents: ipv4 as a CIDR block comes from another category
f_sm7 "${ar_cat[1]}" 0
f_sm8 "${ar_cat[1]}"
awk '!x[$0]++' "${ar_dmn[1]}" | _srt -n -t . -k1,1 -k2,2 -k3,3 -k4,4 -s -o "${ar_txt[1]}"; f_do
printf "%12s: %'d entries.\n" "acquired" "$(wc -l < "${ar_txt[1]}")"
# summarize.
printf "\nprocessing sources-urls (${_CYN}) in summary:\n" "${#ar_txt[@]} CATEGORIES"
for J in "${!ar_cat[@]}"; do
printf -v _sum "%'d" "$(wc -l < "${ar_txt[J]}")" # 'trust+' gets extra (grab_library line 161)
printf "%12s: %9s entries\n" "${ar_cat[J]}" "${_sum}" #+ from those that pass filtration
done
printf "%12s: %'d entries\n" "TOTAL" "$(awk 'END {print NR}' "${ar_txt[@]}")"
printf "%12s: %9s Megabytes\n\n" "disk-usage" "$(wc -c "${ar_txt[@]}" | tail -1 | awk -F' ' '{print ($1/1024^2)}')"
T="$(($(date +%s%N)-T))"; f_tim
f_sm0; read -r opsi # completed by offering OPTIONs: continued to various tasks OR stop here
until [[ ${opsi} =~ ^[1-4]{1}$ ]]; do
printf "please enter: ${_CYN} to continue OR ${_ccl} to quit\n" "[1|2|3|4]"
read -r opsi
done
ar_exe=(); for L in "${!ar_shy[@]}"; do ar_exe+=("./${ar_shy[L]}"); done
case ${opsi:0:1} in
1) f_sm1; "${ar_exe[2]}"; f_sm9 st;;
2) f_sm2
if "${ar_exe[2]}"; then
if "${ar_exe[0]}"; then f_sm9 nd; fi
else exit 1; fi;;
3) f_sm3
if "${ar_exe[2]}"; then
if "${ar_exe[0]}"; then
if "${ar_exe[1]}"; then f_sm9 th; fi
else exit 1; fi
else exit 1; fi;;
4) f_sm4
if "${ar_exe[2]}"; then
if "${ar_exe[0]}"; then
if "${ar_exe[1]}"; then
if "${ar_exe[3]}"; then f_sm9 th; fi
else exit 1; fi
else exit 1; fi
else exit 1; fi;;
*) f_cln; exit 1;;
esac
exit 0