forked from WordPress/openverse
-
Notifications
You must be signed in to change notification settings - Fork 0
/
load_sample_data.sh
executable file
·136 lines (119 loc) · 3.94 KB
/
load_sample_data.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#! /usr/bin/env bash
set -e
WEB_SERVICE_NAME="${WEB_SERVICE_NAME:-web}"
CACHE_SERVICE_NAME="${CACHE_SERVICE_NAME:-cache}"
UPSTREAM_DB_SERVICE_NAME="${UPSTREAM_DB_SERVICE_NAME:-upstream_db}"
DB_SERVICE_NAME="${DB_SERVICE_NAME:-db}"
while getopts 'c' OPTION; do
case "$OPTION" in
c)
echo "Loading upstream DB data..."
upstream_only=true
;;
?)
echo "Loading all sample data..."
;;
esac
done
###############
# Upstream DB #
###############
function exec_sql {
echo "$2" | just dc exec -T "$1" psql -AXqt
}
# Load sample data
function load_sample_data {
exec_sql "$UPSTREAM_DB_SERVICE_NAME" "
DELETE FROM $1;
\copy $1 \
from './sample_data/sample_$1.csv' \
with (FORMAT csv, HEADER true);
"
}
function verify_loaded_data {
COUNT=$(exec_sql "$UPSTREAM_DB_SERVICE_NAME" "SELECT COUNT(*) FROM $1;")
if [ "$COUNT" -ne 5000 ]; then
echo "Error: table $1 count differs from expected."
exit 1
fi
}
load_sample_data "image"
verify_loaded_data "image"
load_sample_data "audio"
verify_loaded_data "audio"
# Terminate the script if received flag for only upstream data
if [ "$upstream_only" = true ]; then
exit 0
fi
#######
# API #
#######
# Set up API database and upstream
just dc exec -T "$WEB_SERVICE_NAME" python3 manage.py migrate --noinput
# Create a superuser and a user for integration testing
echo "
from django.contrib.auth.models import User
usernames = ['continuous_integration', 'deploy']
for username in usernames:
if User.objects.filter(username=username).exists():
print(f'User {username} already exists')
continue
if username == 'deploy':
user = User.objects.create_superuser(username, f'{username}@example.com', 'deploy')
else:
user = User.objects.create_user(username, f'{username}@example.com', 'deploy')
user.save()
" | just dc exec -T "$WEB_SERVICE_NAME" python3 manage.py shell
# Load content providers
exec_sql "$DB_SERVICE_NAME" "
DELETE FROM content_provider;
INSERT INTO content_provider
(created_on, provider_identifier, provider_name, domain_name, filter_content, media_type)
VALUES
(now(), 'flickr', 'Flickr', 'https://www.flickr.com', false, 'image'),
(now(), 'stocksnap', 'StockSnap', 'https://stocksnap.io', false, 'image'),
(now(), 'freesound', 'Freesound', 'https://freesound.org/', false, 'audio'),
(now(), 'jamendo', 'Jamendo', 'https://www.jamendo.com', false, 'audio'),
(now(), 'wikimedia_audio', 'Wikimedia', 'https://commons.wikimedia.org', false, 'audio');
"
#############
# Ingestion #
#############
# Ingest and index the data
just ingestion_server/ingest-upstream "audio" "init"
just docker/es/wait-for-index "audio-init"
just docker/es/wait-for-count "audio-init"
just ingestion_server/promote "audio" "init" "audio"
just docker/es/wait-for-index "audio"
just docker/es/wait-for-count "audio"
just ingestion_server/create-and-populate-filtered-index "audio" "init"
just docker/es/wait-for-index "audio-init-filtered"
just ingestion_server/point-alias "audio" "init-filtered" "audio-filtered"
just docker/es/wait-for-index "audio-filtered" "audio-init-filtered"
# Image ingestion is flaky; but usually works on the next attempt
set +e
while true; do
just ingestion_server/ingest-upstream "image" "init"
if just docker/es/wait-for-index "image-init"; then
break
fi
((c++)) && ((c == 3)) && break
done
set -e
just docker/es/wait-for-count "image-init"
just ingestion_server/promote "image" "init" "image"
just docker/es/wait-for-index "image"
just docker/es/wait-for-count "image"
just ingestion_server/create-and-populate-filtered-index "image" "init"
just docker/es/wait-for-index "image-init-filtered"
just ingestion_server/point-alias "image" "init-filtered" "image-filtered"
just docker/es/wait-for-index "image-filtered" "image-init-filtered"
#########
# Redis #
#########
# Clear source cache since it's out of date after data has been loaded
function exec_redis {
echo "$1" | just dc exec -T "$CACHE_SERVICE_NAME" redis-cli
}
exec_redis "del :1:sources-image"
exec_redis "del :1:sources-audio"