-
Notifications
You must be signed in to change notification settings - Fork 3
/
add_data_to_graph.sh
executable file
·243 lines (208 loc) · 8.6 KB
/
add_data_to_graph.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
#!/bin/bash
#
# ARG_HELP([Upload JSONLD and Turtle data to a Neurobagel graph])
# ARG_POSITIONAL_SINGLE([dir],[Path to directory containing .jsonld and/or .ttl files. ALL .jsonld and .ttl files in this directory will be uploaded.])
# ARG_POSITIONAL_SINGLE([graph-url],[Host and port at which to access the graph database to add data to (e.g., localhost:7200)])
# ARG_POSITIONAL_SINGLE([graph-database],[Name of graph database to add data to])
# ARG_POSITIONAL_SINGLE([user],[Username for graph database access])
# ARG_POSITIONAL_SINGLE([password],[Password for graph database user])
# ARG_OPTIONAL_BOOLEAN([clear-data],[],[Whether or not to first clear all existing data from the graph database],[off])
# ARG_OPTIONAL_BOOLEAN([use-graphdb-syntax],[],[Whether or not to use GraphDB API endpoints to update the specified graph database. If off, assumes the graph database is a Stardog database.],[off])
# ARG_OPTIONAL_BOOLEAN([log-output],[],[Whether or not to write the output to a log file],[off])
# ARG_OPTIONAL_SINGLE([log-file],[],[Path to the log file],[LOG.txt])
# ARGBASH_GO()
# needed because of Argbash --> m4_ignore([
### START OF CODE GENERATED BY Argbash v2.9.0 one line above ###
# Argbash is a bash code generator used to get arguments parsing right.
# Argbash is FREE SOFTWARE, see https://argbash.io for more info
# Generated online by https://argbash.io/generate
die()
{
local _ret="${2:-1}"
test "${_PRINT_HELP:-no}" = yes && print_help >&2
echo "$1" >&2
exit "${_ret}"
}
begins_with_short_option()
{
local first_option all_short_options='h'
first_option="${1:0:1}"
test "$all_short_options" = "${all_short_options/$first_option/}" && return 1 || return 0
}
# THE DEFAULTS INITIALIZATION - POSITIONALS
_positionals=()
# THE DEFAULTS INITIALIZATION - OPTIONALS
_arg_clear_data="off"
_arg_use_graphdb_syntax="off"
_arg_log_output="off"
_arg_log_file="LOG.txt"
print_help()
{
printf '%s\n' "Upload JSONLD and Turtle data to a Neurobagel graph"
printf 'Usage: %s [-h|--help] [--(no-)clear-data] [--(no-)use-graphdb-syntax] [--(no-)log-output] [--log-file <arg>] <dir> <graph-url> <graph-database> <user> <password>\n' "$0"
printf '\t%s\n' "<dir>: Path to directory containing .jsonld and/or .ttl files. ALL .jsonld and .ttl files in this directory will be uploaded."
printf '\t%s\n' "<graph-url>: Host and port at which to access the graph database to add data to (e.g., localhost:7200)"
printf '\t%s\n' "<graph-database>: Name of graph database to add data to"
printf '\t%s\n' "<user>: Username for graph database access"
printf '\t%s\n' "<password>: Password for graph database user"
printf '\t%s\n' "-h, --help: Prints help"
printf '\t%s\n' "--clear-data, --no-clear-data: Whether or not to first clear all existing data from the graph database (off by default)"
printf '\t%s\n' "--use-graphdb-syntax, --no-use-graphdb-syntax: Whether or not to use GraphDB API endpoints to update the specified graph database. If off, assumes the graph database is a Stardog database. (off by default)"
printf '\t%s\n' "--log-output, --no-log-output: Whether or not to write the output to a log file (off by default)"
printf '\t%s\n' "--log-file: Path to the log file (default: 'LOG.txt')"
}
parse_commandline()
{
_positionals_count=0
while test $# -gt 0
do
_key="$1"
case "$_key" in
-h|--help)
print_help
exit 0
;;
-h*)
print_help
exit 0
;;
--no-clear-data|--clear-data)
_arg_clear_data="on"
test "${1:0:5}" = "--no-" && _arg_clear_data="off"
;;
--no-use-graphdb-syntax|--use-graphdb-syntax)
_arg_use_graphdb_syntax="on"
test "${1:0:5}" = "--no-" && _arg_use_graphdb_syntax="off"
;;
--no-log-output|--log-output)
_arg_log_output="on"
test "${1:0:5}" = "--no-" && _arg_log_output="off"
;;
--log-file)
test $# -lt 2 && die "Missing value for the optional argument '$_key'." 1
_arg_log_file="$2"
shift
;;
--log-file=*)
_arg_log_file="${_key##--log-file=}"
;;
*)
_last_positional="$1"
_positionals+=("$_last_positional")
_positionals_count=$((_positionals_count + 1))
;;
esac
shift
done
}
handle_passed_args_count()
{
local _required_args_string="'dir', 'graph-url', 'graph-database', 'user' and 'password'"
test "${_positionals_count}" -ge 5 || _PRINT_HELP=yes die "FATAL ERROR: Not enough positional arguments - we require exactly 5 (namely: $_required_args_string), but got only ${_positionals_count}." 1
test "${_positionals_count}" -le 5 || _PRINT_HELP=yes die "FATAL ERROR: There were spurious positional arguments --- we expect exactly 5 (namely: $_required_args_string), but got ${_positionals_count} (the last one was: '${_last_positional}')." 1
}
assign_positional_args()
{
local _positional_name _shift_for=$1
_positional_names="_arg_dir _arg_graph_url _arg_graph_database _arg_user _arg_password "
shift "$_shift_for"
for _positional_name in ${_positional_names}
do
test $# -gt 0 || break
eval "$_positional_name=\${1}" || die "Error during argument parsing, possibly an Argbash bug." 1
shift
done
}
parse_commandline "$@"
handle_passed_args_count
assign_positional_args 1 "${_positionals[@]}"
# OTHER STUFF GENERATED BY Argbash
### END OF CODE GENERATED BY Argbash (sortof) ### ])
# [ <-- needed because of Argbash
# Reassign positional args to more readable named variables (https://argbash.readthedocs.io/en/latest/guide.html#using-parsing-results)
jsonld_dir=$_arg_dir
user=$_arg_user
password=$_arg_password
graph_db=$_arg_graph_database
graph_url=$_arg_graph_url
clear_data=$_arg_clear_data # value is either on or off (https://argbash.readthedocs.io/en/stable/guide.html#optional-arguments)
use_graphdb_syntax=$_arg_use_graphdb_syntax
log_output=$_arg_log_output
log_file=$_arg_log_file
DELETE_TRIPLES_QUERY="
DELETE {
?s ?p ?o .
} WHERE {
?s ?p ?o .
}"
# Depending on the graph backend used, set URLs for uploading data to and clearing data in graph database
base_url="http://${graph_url}/${graph_db}"
if [ "$use_graphdb_syntax" = "on" ]; then
upload_data_url="${base_url}/statements"
clear_data_url=$upload_data_url
else
upload_data_url=$base_url
clear_data_url="${base_url}/update"
fi
# Main logic
main() {
# Clear existing data in graph database if requested
if [ "$clear_data" = "on" ]; then
echo -e "\nCLEARING EXISTING DATA FROM ${graph_db}..."
response=$(curl -u "${user}:${password}" -s -S -i -w "\n%{http_code}\n" \
-X POST $clear_data_url \
-H "Content-Type: application/sparql-update" \
--data-binary "${DELETE_TRIPLES_QUERY}")
# Extract and check status code outputted as final line of response
httpcode=$(tail -n1 <<< "$response")
if (( $httpcode < 200 || $httpcode >= 300 )); then
echo -e "\nERROR: Failed to clear ${graph_db}:"
echo "$(sed '$d' <<< "$response")"
echo -e "\nEXITING..."
exit 1
fi
fi
# Add data to specified graph database
echo -e "\nUPLOADING DATA FROM ${jsonld_dir} TO ${graph_db}...\n"
upload_failed=()
for db in ${jsonld_dir}/*.jsonld; do
# Prevent edge case where no matching files are present in directory and so loop executes once with glob pattern string itself
[ -e "$db" ] || continue
echo "$(basename ${db}):"
response=$(curl -u "${user}:${password}" -s -S -i -w "\n%{http_code}\n" \
-X POST $upload_data_url \
-H "Content-Type: application/ld+json" \
--data-binary @${db})
httpcode=$(tail -n1 <<< "$response")
if (( $httpcode < 200 || $httpcode >= 300 )); then
upload_failed+=("${db}")
fi
# Print rest of response to stdout
echo -e "$(sed '$d' <<< "$response")\n"
done
for file in ${jsonld_dir}/*.ttl; do
[ -e "$file" ] || continue
echo "$(basename ${file}):"
response=$(curl -u "${user}:${password}" -s -S -i -w "\n%{http_code}\n" \
-X POST $upload_data_url \
-H "Content-Type: text/turtle" \
--data-binary @${file})
httpcode=$(tail -n1 <<< "$response")
if (( $httpcode < 200 || $httpcode >= 300 )); then
upload_failed+=("${file}")
fi
echo -e "$(sed '$d' <<< "$response")\n"
done
echo "FINISHED UPLOADING DATA FROM ${jsonld_dir} TO ${graph_db}."
if (( ${#upload_failed[@]} != 0 )); then
echo -e "\nERROR: Upload failed for these files:"
printf '%s\n' "${upload_failed[@]}"
fi
}
# Call the main logic function with or without output redirection
if [ "$log_output" = "on" ]; then
main > "$log_file"
else
main
fi
# ] <-- needed because of Argbash