Skip to content

Commit

Permalink
add_data.sh: New fetch_csv_xz function
Browse files Browse the repository at this point in the history
to download from xz-compressed repos for speed and cost-saving (no LFS)

See OpenDRR#91
  • Loading branch information
anthonyfok committed May 21, 2021
1 parent 427aaa3 commit fe6f38e
Showing 1 changed file with 43 additions and 5 deletions.
48 changes: 43 additions & 5 deletions python/add_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,13 @@ run_psql() {
psql -h "$POSTGRES_HOST" -U "$POSTGRES_USER" -d "$DB_NAME" -a -f "$input_file"
}

# fetch_csv downloads CSV data files from OpenDRR repos

# fetch_csv_lfs downloads CSV data files from OpenDRR repos
# with help from GitHub API with support for LFS files.
# See https://docs.github.com/en/rest/reference/repos#get-repository-content
fetch_csv() {
fetch_csv_lfs() {
if [ "$#" -ne 2 ]; then
echo "Error: fetch_csv() requires exactly two arguments, but $# was given."
echo "Error: ${FUNCNAME[0]} requires exactly two arguments, but $# was given."
exit 1
fi
local owner="OpenDRR"
Expand All @@ -92,8 +93,45 @@ fetch_csv() {
echo download_url=$download_url
echo size=$size

curl -o "$output_file" -L "$download_url" \
--retry 999 --retry-max-time 0
echo "${FUNCNAME[0]}: Download from $download_url"
curl -o "$output_file" -L "$download_url" --retry 999 --retry-max-time 0
}

# fetch_csv_xz downloads CSV data files from OpenDRR xz-compressed repos
fetch_csv_xz() {
if [ "$#" -ne 2 ]; then
echo "ERROR: ${FUNCNAME[0]} requires exactly two arguments, but $# was given."
exit 1
fi
local owner="OpenDRR"
local repo="$1"
local path="$2"
local output_file=$(basename $path | sed -e 's/?.*//')
local response
local path_dir=$(dirname "$path")
echo $path_dir

# Fetch directory listing
mkdir -p github-api/$path_dir
response="github-api/$path_dir.dir.json"
curl -s -o "$response" \
--retry 999 --retry-max-time 0 \
-H "Authorization: token ${GITHUB_TOKEN}" \
-H "Accept: application/vnd.github.v3+json" \
-L "https://api.github.com/repos/$owner/$repo-xz/contents/$path_dir"

local download_url=$(jq -r '.[] | select(.name == "'"$output_file"'.xz") | .download_url' $response)
echo "${FUNCNAME[0]}: Download from $download_url"
curl -o "$output_file.xz" -L "$download_url" --retry 999 --retry-max-time 0

# TODO: Keep the compressed file somewhere, uncompress when needed
unxz $output_file.xz
}

# fetch_csv calls either fetch_csv_xz or fetch_csv_lfs to fetch CSV files
fetch_csv() {
# TODO: Make it more intelligent.
time fetch_csv_xz "$@" || time fetch_csv_lfs "$@"
}


Expand Down

0 comments on commit fe6f38e

Please sign in to comment.