Skip to content

Commit

Permalink
feat(IPVC-2440/IPVC-2442): specify input and output schema names, mov…
Browse files Browse the repository at this point in the history
…e one time workflows (#38)
  • Loading branch information
bsgiles73 authored May 15, 2024
1 parent c0d5a37 commit 9e2a927
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 24 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ mkdir -p $(pwd)/output/logs
Set variables:
```
export UTA_ETL_OLD_SEQREPO_VERSION=2024-02-20
export UTA_ETL_OLD_UTA_IMAGE_TAG=uta_20210129b
export UTA_ETL_OLD_UTA_VERSION=uta_20210129b
export UTA_ETL_NCBI_DIR=./ncbi-data
export UTA_ETL_SEQREPO_DIR=./seqrepo-data
Expand Down Expand Up @@ -347,14 +348,14 @@ See 2A for nuclear transcripts and 2B for mitochondrial transcripts.
docker compose run ncbi-download
docker compose run uta-extract
docker compose run seqrepo-load
docker compose run uta-load
UTA_ETL_NEW_UTA_VERSION=uta_20240512 docker compose run uta-load
```

#### 2B. Mitochondrial transcripts
```
docker compose run mito-extract
docker compose -f docker-compose.yml -f misc/mito-transcripts/docker-compose-mito-extract.yml run mito-extract
docker compose run seqrepo-load
docker compose run uta-load
UTA_ETL_NEW_UTA_VERSION=uta_20240512 docker compose run uta-load
```

#### 2C. Manual splign transcripts
Expand Down
16 changes: 4 additions & 12 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,17 @@ services:
network_mode: host
uta:
container_name: uta
image: biocommons/uta:${UTA_ETL_OLD_UTA_VERSION}
image: biocommons/uta:${UTA_ETL_OLD_UTA_IMAGE_TAG}
environment:
- POSTGRES_HOST_AUTH_METHOD=trust
healthcheck:
test: psql -h localhost -U anonymous -d uta -c "select * from ${UTA_ETL_OLD_UTA_VERSION}.meta"
test: psql -h localhost -U anonymous -d uta -c "select * from ${UTA_ETL_OLD_UTA_IMAGE_TAG}.meta"
interval: 10s
retries: 60
retries: 80
network_mode: host
uta-load:
image: uta-update
command: sbin/uta-load ${UTA_ETL_OLD_UTA_VERSION} /ncbi-dir /uta-load/work /uta-load/logs
command: sbin/uta-load ${UTA_ETL_OLD_UTA_VERSION} ${UTA_ETL_NEW_UTA_VERSION} /ncbi-dir /uta-load/work /uta-load/logs
depends_on:
uta:
condition: service_healthy
Expand All @@ -50,14 +50,6 @@ services:
- ${UTA_ETL_WORK_DIR}:/uta-load/work
- ${UTA_ETL_LOG_DIR}:/uta-load/logs
network_mode: host
mito-extract:
image: uta-update
command: sbin/ncbi_process_mito.py NC_012920.1 --output-dir /mito-extract/work | tee /mito-extract/logs/mito.log
volumes:
- ${UTA_ETL_WORK_DIR}:/mito-extract/work
- ${UTA_ETL_LOG_DIR}:/mito-extract/logs
working_dir: /opt/repos/uta
network_mode: host
splign-manual:
image: uta-update
command: sbin/uta-splign-manual ${UTA_ETL_OLD_SEQREPO_VERSION} ${UTA_ETL_OLD_UTA_VERSION} /uta-splign-manual/input /uta-splign-manual/work /uta-splign-manual/logs
Expand Down
17 changes: 17 additions & 0 deletions misc/gene-update/docker-compose-gene-update.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# docker compose file for the NCBI gene_id update and backfill procedure

version: '3'

services:
uta-gene-update:
image: uta-update
command: misc/gene-update/upgrade-uta-schema.sh ${UTA_ETL_NEW_UTA_VERSION}
depends_on:
uta:
condition: service_healthy
volumes:
- ${UTA_ETL_NCBI_DIR}:/ncbi-dir
- ${UTA_ETL_WORK_DIR}:/uta-gene-update/work
- ${UTA_ETL_LOG_DIR}:/uta-gene-update/logs
working_dir: /opt/repos/uta
network_mode: host
10 changes: 6 additions & 4 deletions misc/gene-update/upgrade-uta-schema.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,19 @@ set -euxo pipefail
source_uta_v="uta_20210129b"
working_uta_v="uta"
dest_uta_v=$1
dumps_dir="/workdir/dumps"
mkdir -p $dumps_dir
tmp_dumps_dir="/tmp/dumps"
mkdir -p $tmp_dumps_dir

## setup working uta schema
# delete schema if exists
psql -h localhost -U uta_admin -d uta -c "DROP SCHEMA IF EXISTS $working_uta_v CASCADE;"

# dump source version
pg_dump -U uta_admin -h localhost -d uta -n "$source_uta_v" | \
gzip -c > $dumps_dir/"$source_uta_v".pgd.gz
gzip -c > $tmp_dumps_dir/"$source_uta_v".pgd.gz

# create new schema
gzip -cdq $dumps_dir/"$source_uta_v".pgd.gz | \
gzip -cdq $tmp_dumps_dir/"$source_uta_v".pgd.gz | \
sbin/pg-dump-schema-rename "$source_uta_v" "$working_uta_v" | \
sbin/pg-dump-schema-rename "uta_1_1" "$working_uta_v" | \
psql -U uta_admin -h localhost -d uta -aeE
Expand All @@ -53,3 +53,5 @@ alembic -c etc/alembic.ini upgrade head
## Rename schema to destination schema name
psql -h localhost -U uta_admin -d uta -c "DROP SCHEMA IF EXISTS $dest_uta_v CASCADE;"
psql -h localhost -U uta_admin -d uta -c "ALTER SCHEMA uta RENAME TO $dest_uta_v";
pg_dump -h localhost -U uta_admin -d uta -n "$dest_uta_v" | \
gzip -c > "/uta-gene-update/work/$dest_uta_v.pgd.gz"
13 changes: 13 additions & 0 deletions misc/mito-transcripts/docker-compose-mito-extract.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# docker compose file for the mito transcript extraction for the UTA update procedure

version: '3'

services:
mito-extract:
image: uta-update
command: sbin/ncbi_process_mito.py NC_012920.1 --output-dir /mito-extract/work | tee /mito-extract/logs/mito.log
volumes:
- ${UTA_ETL_WORK_DIR}:/mito-extract/work
- ${UTA_ETL_LOG_DIR}:/mito-extract/logs
working_dir: /opt/repos/uta
network_mode: host
17 changes: 12 additions & 5 deletions sbin/uta-load
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@
set -euxo pipefail

source_uta_v=$1
ncbi_dir=$2
working_dir=$3
log_dir=$4
dest_uta_v=$2
ncbi_dir=$3
working_dir=$4
log_dir=$5

if [ -z "$source_uta_v" ] || [ -z "$ncbi_dir" ] || [ -z "$working_dir" ] || [ -z "$log_dir" ]
if [ -z "$source_uta_v" ] || [ -z "$dest_uta_v" ] || [ -z "$ncbi_dir" ] || [ -z "$working_dir" ] || [ -z "$log_dir" ]
then
echo 'Usage: uta-load <source_uta_v> <ncbi_dir> <working_dir> <log_dir>'
echo 'Usage: uta-load <source_uta_v> <dest_uta_v> <ncbi_dir> <working_dir> <log_dir>'
exit 1
fi

Expand Down Expand Up @@ -66,3 +67,9 @@ uta --conf=etc/global.conf --conf=etc/[email protected] align-exons 2>&1 |

### run diff
sbin/uta-diff "$source_uta_v" "$loading_uta_v"

## Rename schema to destination schema name and export to dump file
psql -h localhost -U uta_admin -d uta -c "DROP SCHEMA IF EXISTS $dest_uta_v CASCADE;"
psql -h localhost -U uta_admin -d uta -c "ALTER SCHEMA uta RENAME TO $dest_uta_v";
pg_dump -h localhost -U uta_admin -d uta -n "$dest_uta_v" | \
gzip -c > "$working_dir/$dest_uta_v.pgd.gz"

0 comments on commit 9e2a927

Please sign in to comment.