Skip to content

Commit

Permalink
Move GEM2S as a step in pipeline (#56)
Browse files Browse the repository at this point in the history
* refactor wrapper

* Added gem2s functions to pipeline

* Adapted pipeline to new schema for gem2s

* renamed one task

* fixes for gem2s

* renamed qc-runner to pipeline-runner

* fixed tryCatch from rebase

* changed pipeline name to qc

* init upload-to-aws

* init meta_sets

* Added sample sets

* complete upload-to-aws

* add s3/dynamodb functions

* add buckets and tables

* writing input to meta.json file

* changed some pipeline references to gem2s/pipeline

* avoiding creating new containers for gem2s or qc if they already exist

* attaching both pipeline containers output (qc & gem2s) to pipeline process stdout

* just a lonenly runaway letter

* added dependencies

* typo

* make docker build for me

* send dynamodb items to api

* reflect nan bugfix from data-inegest

* temporarily fix for paws.common version 0.3.11

* change SNS messag type

* hotfix

* fix

* fix

Co-authored-by: Oliver Gibson <[email protected]>
Co-authored-by: Pol Alvarez <[email protected]>
Co-authored-by: Anugerah Erlaut <[email protected]>
  • Loading branch information
4 people authored May 24, 2021
1 parent b48ae80 commit 4e08fb2
Show file tree
Hide file tree
Showing 40 changed files with 2,476 additions and 358 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/ci-develop.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-20.04
strategy:
matrix:
project: ['qc-runner']
project: ['pipeline-runner']
outputs:
repo-name: ${{ steps.ref.outputs.repo-name }}
tag: ${{ steps.ref.outputs.tag }}
Expand Down Expand Up @@ -48,7 +48,7 @@ jobs:
BASE_REF="refs-heads-${{ github.event.pull_request.base.ref }}"
BASE_SHA=${{ github.event.pull_request.base.sha }}
# If the CI build time gets in your way while working on a PR,
# temporarily hardcode these values in was similar to this
# temporarily hardcode these values in way similar to this
# BASE_REF="refs-pull-7-merge"
# BASE_SHA="ad9c5001ab4a3077790db4f04ac0b09cf5324683"
fi
Expand Down Expand Up @@ -174,9 +174,9 @@ jobs:
| yq w -d1 - metadata.labels.sandboxId "$SANDBOX_ID" \
| yq w -d1 - spec.chart.ref "$CHART_REF" \
| yq w -d1 - spec.releaseName "$DEPLOYMENT_NAME" \
| yq w -d1 - spec.values.qc-runner.image "$IMAGE_NAME-qc-runner" \
| yq w -d1 - spec.values.pipeline-runner.image "$IMAGE_NAME-pipeline-runner" \
| yq w -d1 - spec.values.serviceAccount.iamRole "arn:aws:iam::${{ steps.setup-aws.outputs.aws-account-id }}:role/pipeline-role-staging" \
| yq w -d1 - "metadata.annotations[filter.fluxcd.io/qc-runner]" "glob:${IMAGE_TAG/$GITHUB_SHA/*}-qc-runner" \
| yq w -d1 - "metadata.annotations[filter.fluxcd.io/pipeline-runner]" "glob:${IMAGE_TAG/$GITHUB_SHA/*}-pipeline-runner" \
> $DEPLOYMENT_NAME.yaml
cat $DEPLOYMENT_NAME.yaml
env:
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
runs-on: ubuntu-20.04
strategy:
matrix:
project: ['qc-runner']
project: ['pipeline-runner']
outputs:
repo-name: ${{ steps.ref.outputs.repo-name }}
tag: ${{ steps.ref.outputs.tag }}
Expand Down Expand Up @@ -184,9 +184,9 @@ jobs:
| yq w -d1 - metadata.labels.sandboxId "$SANDBOX_ID" \
| yq w -d1 - spec.chart.ref "$CHART_REF" \
| yq w -d1 - spec.releaseName "$DEPLOYMENT_NAME" \
| yq w -d1 - spec.values.qc-runner.image "$IMAGE_NAME-qc-runner" \
| yq w -d1 - spec.values.pipeline-runner.image "$IMAGE_NAME-pipeline-runner" \
| yq w -d1 - spec.values.serviceAccount.iamRole "arn:aws:iam::${{ steps.setup-aws.outputs.aws-account-id }}:role/pipeline-role-${{ matrix.environment }}" \
| yq w -d1 - "metadata.annotations[filter.fluxcd.io/qc-runner]" "glob:${IMAGE_TAG/$GITHUB_SHA/*}-qc-runner" \
| yq w -d1 - "metadata.annotations[filter.fluxcd.io/pipeline-runner]" "glob:${IMAGE_TAG/$GITHUB_SHA/*}-pipeline-runner" \
> $DEPLOYMENT_NAME.yaml
cat $DEPLOYMENT_NAME.yaml
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ make build

## Debugging locally
To save the parameters (`config`, `seurat_obj`, etc) to a task function, specify DEBUG_STEP and DEBUG_PATH.
Available tasks include all those defined in `run_step` of [init.r](qc-runner/src/init.r) as well as `DEBUG_STEP=all`
Available tasks include all those defined in `run_step` of [init.r](pipeline-runner/src/init.r) as well as `DEBUG_STEP=all`
to save the parameters to all task functions:

```bash
Expand Down
2 changes: 1 addition & 1 deletion chart-infra/templates/instance-configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ metadata:
labels:
sandboxId: "{{ .Values.sandboxId }}"
data:
qc-runner: "{{ index .Values "qc-runner" "image" }}"
pipeline-runner: "{{ index .Values "pipeline-runner" "image" }}"
4 changes: 2 additions & 2 deletions local-runner/cf-local-container-launcher.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ Resources:
ZipFile: |
import subprocess
# Removes all containers that already exist and contain 'pipeline' in their name.
# Removes all containers that already exist and contain either 'qc' or 'gem2s' in their name.
def handler(event, context):
proc = subprocess.run("docker kill $(docker ps -f name=pipeline --format '{{.Names}}') || true", shell=True, check=True)
proc = subprocess.run("docker kill $(docker ps -f name='qc|gem2s' --format '{{.Names}}') || true", shell=True, check=True)
return proc.returncode
Runtime: "python3.8"
Expand Down
2 changes: 1 addition & 1 deletion local-runner/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"test:debug": "node --inspect node_modules/.bin/jest --runInBand --silent",
"coverage": "jest --coverage --silent",
"dev": "nodemon src/app.js",
"build": "docker build -t biomage-pipeline-runner ../qc-runner",
"build": "docker build -t biomage-pipeline-runner ../pipeline-runner",
"start": "node src/app.js",
"restart": "npm run build && npm start",
"lint": "eslint ./src",
Expand Down
10 changes: 6 additions & 4 deletions local-runner/src/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ AWS.config.update({
s3ForcePathStyle: true,
});

const isPipelineContainer = (name) => name.includes('qc') || name.includes('gem2s')

const setVarsInTemplate = (template) => {
const varNames = ['DEBUG_STEP', 'DEBUG_PATH', 'HOST_IP'];
for (let ii = 0; ii < varNames.length; ii += 1) {
Expand All @@ -35,7 +37,7 @@ const initStack = async () => {

const stackName = {
StackName: 'local-container-launcher',
};
};
try {
await cf.deleteStack(stackName).promise();
await cf.waitFor('stackDeleteComplete', stackName).promise();
Expand Down Expand Up @@ -68,7 +70,7 @@ const attachToExistingContainers = (docker, nameColorMap) => {
docker.listContainers((err, containers) => {
containers.forEach((info) => {
const { Names: names, Id: id } = info;
const name = names.filter((n) => n.includes('pipeline'))[0];
const name = names.filter((n) => isPipelineContainer(n))[0];

if (!name) {
return;
Expand All @@ -91,7 +93,7 @@ const attachToNewContainers = (docker, emitter, nameColorMap) => {
emitter.on('start', (message) => {
const { id, Actor: { Attributes: { name } } } = message;

if (!name.includes('pipeline')) {
if (!isPipelineContainer(name)) {
return;
}

Expand All @@ -109,7 +111,7 @@ const attachToNewContainers = (docker, emitter, nameColorMap) => {
const stopDieCallback = (message) => {
const { Actor: { Attributes: { name } } } = message;

if (!name.includes('pipeline')) {
if (isPipelineContainer(name)) {
return;
}

Expand Down
File renamed without changes.
File renamed without changes.
12 changes: 12 additions & 0 deletions qc-runner/Dockerfile → pipeline-runner/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,22 @@ RUN R -e 'pak::pkg_install("batchelor")'

# install handy development stuff
RUN R -e 'pak::pkg_install("zeallot")'
# next version pinning can be removed once release 0.3.11 is included by default in paws +0.1.11
RUN R -e 'devtools::install_version("paws.common", version = "0.3.11")'
RUN R -e 'pak::pkg_install("paws")'
RUN R -e 'pak::pkg_install("import")'
RUN R -e 'pak::pkg_install("ids")'

#Install stuff for data-ingest
RUN R -e 'chooseCRANmirror(ind=52); install.packages("BiocManager")'
RUN R -e 'BiocManager::install("scran")'
RUN R -e 'BiocManager::install("scater")'
RUN R -e 'BiocManager::install("bluster")'
RUN R -e 'BiocManager::install("mbkmeans")'
RUN R -e 'BiocManager::install("DropletUtils")'
RUN R -e 'BiocManager::install("BiocGenerics")'
RUN R -e 'BiocManager::install("plger/scDblFinder")'

# default BLAS/LAPACK issue: Seurat::FindIntegrationAnchors finds ~30 vs ~13,000 anchors
# eventually move this up with other system installs
RUN apt-get update && apt-get -y install libatlas3-base
Expand Down
2 changes: 1 addition & 1 deletion qc-runner/README.md → pipeline-runner/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
qc-runner
pipeline-runner
=========

Docker container for executing dependency-managed tasks.
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
63 changes: 63 additions & 0 deletions pipeline-runner/src/data-ingest/0-download_gem2s.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
require("RJSONIO")
require("paws")
require("zeallot")
require("ids")

task <- function(input,pipeline_config) {
# you are receiving the full sample object instead of sample names
project_id <- input$projectId
sample_names <- input$sampleNames # extract sample names from samples object
sample_uuids <- input$sampleIds
message("download2")
s3 <- paws::s3(config=pipeline_config$aws_config)
message(pipeline_config$originals_bucket)

fnames <- c('features.tsv.gz', 'barcodes.tsv.gz', 'matrix.mtx.gz')
unlink("/input",recursive=TRUE)
for (sample in sample_uuids) {
for (fname in fnames) {
gem_key <- file.path(project_id, sample, fname)
message(gem_key)
sample_name = sample_names[[match(sample,sample_uuids)]]
#Preparing directories
local_dir <- file.path('/input',sample_name)
#unlink(local_dir, recursive = TRUE)
dir.create('/input')
dir.create(local_dir)
dir.create("/output")
local_fpath <- file.path(local_dir,fname)

message("bucket")
message( pipeline_config$originals_bucket)
message("file")
message(gem_key)
# Download the file and store the output in a variable
c(body, ...rest) %<-% s3$get_object(
#Bucket = pipeline_config$originals_bucket,
Bucket = pipeline_config$originals_bucket,
Key = gem_key
)

# Write output to file
writeBin(body, con = local_fpath)
}
}
# download meta.json
#meta_key = file.path(project_id, "meta.json")
#message(paste("File: ",meta_key))
#c(body, ...rest) %<-% s3$get_object(
# Bucket = pipeline_config$originals_bucket,
# Key = meta_key
#)
#writeBin(body, con = "/input/meta.json")
config <- list(name = input$experimentName, samples=input$sampleNames,
organism = input$organism,
input = list(type="10x")
)

exportJSON <- RJSONIO::toJSON(config)
message('Created json config')
message(exportJSON)
write(exportJSON, "/input/meta.json")
message('Written config json')
}
Loading

0 comments on commit 4e08fb2

Please sign in to comment.