diff --git a/Dockerfile b/Dockerfile index 085692b..e58df99 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,8 +19,6 @@ COPY pyproject.toml ./ COPY etc ./etc COPY sbin ./sbin COPY src ./src -COPY loading/data/splign-manual ./splign-manual -COPY loading ./loading RUN pip install -e .[dev] diff --git a/README.md b/README.md index 2ade8c5..a05be31 100644 --- a/README.md +++ b/README.md @@ -357,6 +357,19 @@ docker compose run seqrepo-load UTA_ETL_SKIP_GENE_LOAD=true docker compose run uta-load ``` +#### 2C. Manual splign transcripts +To load splign-manual transcripts, the workflow expects an input txdata.yaml file and splign alignments. Define this path +using the environment variable $UTA_SPLIGN_MANUAL_DIR: +- `$UTA_SPLIGN_MANUAL_DIR/splign-manual/txdata.yaml` +- `$UTA_SPLIGN_MANUAL_DIR/splign-manual/alignments/*.splign` + +[txdata.yaml](loading/data/splign-manual/txdata.yaml) defines the transcripts and their metadata. The [alignments dir](loading/data/splign-manual/alignments) contains the splign alignments. +To run the workflow: +``` +export UTA_SPLIGN_MANUAL_DIR=$(pwd)/loading/data/splign-manual/ +docker compose run splign-manual +``` + UTA has updated and the database has been dumped into a pgd file in `UTA_ETL_WORK_DIR`. SeqRepo has been updated in place. diff --git a/docker-compose.yml b/docker-compose.yml index 671227b..295f961 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -62,13 +62,14 @@ services: network_mode: host splign-manual: image: uta-update - command: sbin/uta-splign-manual ${UTA_ETL_OLD_UTA_VERSION} /uta-splign-manual/work /uta-splign-manual/logs + command: sbin/uta-splign-manual ${UTA_ETL_OLD_UTA_VERSION} /uta-splign-manual/input /uta-splign-manual/work /uta-splign-manual/logs depends_on: uta: condition: service_healthy volumes: - ${UTA_ETL_NCBI_DIR}:/ncbi-dir - ${UTA_ETL_SEQREPO_DIR}:/usr/local/share/seqrepo + - ${UTA_SPLIGN_MANUAL_DIR}:/uta-splign-manual/input - ${UTA_ETL_WORK_DIR}:/uta-splign-manual/work - ${UTA_ETL_LOG_DIR}:/uta-splign-manual/logs network_mode: host diff --git a/loading/data/splign-manual/README.md b/loading/data/splign-manual/README.md index 3a4d62b..f364281 100644 --- a/loading/data/splign-manual/README.md +++ b/loading/data/splign-manual/README.md @@ -50,7 +50,7 @@ For a given RefSeq transcript (e.g., NM_000996.3), do the following: - Click on the gene id to go to the gene page (e.g., `6165`) - N.B. Strand is inferred from the orientation of aligned exons. -1. Enter the gene and CDS info in txdata.yaml +1. Enter the gene, geneID, and CDS info in txdata.yaml 1. Get the chromosome and coordinates from the gene page - From the "Genomic Context" section, note the chromosomal diff --git a/loading/data/splign-manual/generate-loading-data b/sbin/generate-loading-data similarity index 100% rename from loading/data/splign-manual/generate-loading-data rename to sbin/generate-loading-data diff --git a/sbin/uta-splign-manual b/sbin/uta-splign-manual index 06a0062..4357b3f 100755 --- a/sbin/uta-splign-manual +++ b/sbin/uta-splign-manual @@ -5,12 +5,13 @@ set -euxo pipefail source_uta_v=$1 -working_dir=$2 -log_dir=$3 +input_dir=$2 +working_dir=$3 +log_dir=$4 -if [ -z "$working_dir" ] || [ -z "$log_dir" ] +if [ -z "$input_dir" ] || [ -z "$working_dir" ] || [ -z "$log_dir" ] then - echo 'Usage: sbin/uta-splign-manual ' + echo 'Usage: sbin/uta-splign-manual ' exit 1 fi @@ -22,7 +23,7 @@ mkdir -p "$log_dir" mkdir -p "$working_dir" # Generate txinfo.gz and exonset.gz files -python loading/data/splign-manual/generate-loading-data loading/data/splign-manual/alignments/*.splign 2>&1 --txdata loading/data/splign-manual/txdata.yaml --output-dir $working_dir | tee "$log_dir/generate-loading-data.log" +python sbin/generate-loading-data $input_dir/alignments/*.splign 2>&1 --txdata $input_dir/txdata.yaml --output-dir $working_dir | tee "$log_dir/generate-loading-data.log" # Generate fasta files seqrepo export $(gzip -cdq $working_dir/txinfo.gz | cut -f2 | tail +2) | gzip -c > $working_dir/seqs.fa.gz | tee "$log_dir/seqrepo-export.log"