Skip to content

Commit

Permalink
Upgrade to v1.0.0 (#64)
Browse files Browse the repository at this point in the history
* Remove incomplete comments

* Fix dirname errors when getting the script directory

Also, it’d be nice to load this function from a common file, but to locate that file robustly we’d need the scripts directory in the first place :)

* Reword comments

* Fix typos

* Rewrite documentation

* Replace fixture-generating scripts, starting now from cell groups

* Apply suggestions from PR

* Guarantee that at least one cell of Not available cell type is included

* Require argument of the selected K in each experiment, as some tests are precisely about this feature

* Update automatic tests service from Travis to GH Actions

* Address clarity issues in README after PR review

* Address clarity issues in fixtures README after PR review

* few README edits

* Fix typo in script

* Fix typo in comment

Co-authored-by: Pedro Madrigal <[email protected]>
  • Loading branch information
alfonsomunozpomer and pmb59 authored Dec 20, 2022
1 parent 8c55e9d commit 3d64dc2
Show file tree
Hide file tree
Showing 33 changed files with 507 additions and 291 deletions.
188 changes: 82 additions & 106 deletions README.md

Large diffs are not rendered by default.

4 changes: 1 addition & 3 deletions bin/add_exps_to_collection.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
#!/usr/bin/env bash

# This script receives a collection identifier, name and description
# and stores it in the
set -e

scriptDir=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh

dbConnection=${dbConnection:-$1}
Expand Down
4 changes: 1 addition & 3 deletions bin/create_collection.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
#!/usr/bin/env bash

# This script receives a collection identifier, name and description
# and stores it in the
set -e

scriptDir=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh

dbConnection=${dbConnection:-$1}
Expand Down
4 changes: 1 addition & 3 deletions bin/delete_collection.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
#!/usr/bin/env bash

# This script receives a collection identifier, name and description
# and stores it in the
set -e

scriptDir=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh

dbConnection=${dbConnection:-$1}
Expand Down
4 changes: 1 addition & 3 deletions bin/delete_exp_from_collection.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
#!/usr/bin/env bash

# This script receives a collection identifier, name and description
# and stores it in the
set -e

scriptDir=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh

dbConnection=${dbConnection:-$1}
Expand Down
2 changes: 1 addition & 1 deletion bin/get_experiment_info.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

set -e

scriptDir=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh

postgres_scripts_dir=$scriptDir/../postgres_routines
Expand Down
2 changes: 1 addition & 1 deletion bin/load_db_scxa_analytics.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# - Postprocess table and attach it to the main scxa-analytics table.
set -e

scriptDir=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh

postgres_scripts_dir=$scriptDir/../postgres_routines
Expand Down
2 changes: 1 addition & 1 deletion bin/load_db_scxa_analytics_pg9.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# PG10, which loads each experiment into a different partition.
set -e

scriptDir=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh

dbConnection=${dbConnection:-$1}
Expand Down
4 changes: 1 addition & 3 deletions bin/load_db_scxa_cell_clusters.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
# scxa_cell_group_membership table of AtlasProd.
set -e

# TODO this type of function should be loaded from a common set of scripts.

scriptDir=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh

dbConnection=${dbConnection:-$1}
Expand Down
7 changes: 2 additions & 5 deletions bin/load_db_scxa_dimred.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
# parameterisations, and loads it into the scxa_coords table of AtlasProd.
set -e

# TODO this type of function should be loaded from a common set of scripts.

scriptDir=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh

dbConnection=${dbConnection:-$1}
Expand All @@ -33,7 +31,6 @@ fi
checkDatabaseConnection $dbConnection

# Write to the new generic coordinates table

echo "Dimension reductions: Loading data for $EXP_ID (new layout)..."
rm -f $SCRATCH_DIR/dimredDataToLoad.csv

Expand All @@ -56,7 +53,7 @@ s=$?

rm $SCRATCH_DIR/dimredDataToLoad.csv

# Roll back if unsucessful
# Roll back if unsuccessful

if [ $s -ne 0 ]; then
echo "DELETE FROM scxa_dimension_reduction WHERE experiment_accession = '"$EXP_ID"' and method = '$DIMRED_TYPE' and parameterisation = '$DIMRED_PARAM_JSON'" | \
Expand Down
4 changes: 1 addition & 3 deletions bin/load_db_scxa_marker_genes.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
# scxa_cell_groups_marker_genes table of AtlasProd.
set -e

# TODO this type of function should be loaded from a common set of scripts.

scriptDir=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh

dbConnection=${dbConnection:-$1}
Expand Down
2 changes: 1 addition & 1 deletion bin/load_experiment_web_cli.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

jar_dir=$CONDA_PREFIX/share/atlas-cli

scriptDir=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/common_routines.sh

echo "CONDA_PREFIX: $CONDA_PREFIX"
Expand Down
4 changes: 1 addition & 3 deletions bin/modify_collection.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
#!/usr/bin/env bash

# This script receives a collection identifier, name and description
# and stores it in the
set -e

scriptDir=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh

dbConnection=${dbConnection:-$1}
Expand Down
2 changes: 1 addition & 1 deletion bin/update_experiment_web_cli.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

jar_dir=$CONDA_PREFIX/share/atlas-cli

scriptDir=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/common_routines.sh

echo "CONDA_PREFIX: $CONDA_PREFIX"
Expand Down
56 changes: 56 additions & 0 deletions fixtures/01-scxa-cell-group.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/env bash

# Arguments:
# Experiment accession
# Selected k
psql -q -U ${POSTGRES_USER} -d ${POSTGRES_DB} -h ${POSTGRES_HOST} << EOF
COPY (
SELECT
*
FROM
scxa_cell_group scg
WHERE
scg.experiment_accession = '${1}'
AND
scg.variable IN (
(SELECT variable FROM scxa_cell_group WHERE variable='${2}')
UNION
(SELECT * FROM (
SELECT
DISTINCT cg.variable
FROM
scxa_cell_group cg
INNER JOIN
scxa_cell_group_marker_genes cgmg
ON
cg.id = cgmg.cell_group_id
WHERE
cg.experiment_accession='${1}'
AND
cg.variable ~ '^[12]'
AND
cgmg.marker_probability < 0.05
) low_clustering_with_marker_genes
ORDER BY random()
LIMIT 2)
UNION
(SELECT * FROM (
SELECT
DISTINCT cg.variable
FROM
scxa_cell_group cg
INNER JOIN
scxa_cell_group_marker_genes cgmg
ON
cg.id = cgmg.cell_group_id
WHERE
cg.experiment_accession='${1}'
AND
cg.variable ~ 'inferred cell type'
AND
cgmg.marker_probability < 0.05
) cell_types_with_marker_genes
ORDER BY random()
LIMIT 1))
) TO STDOUT DELIMITER E'\t';
EOF
18 changes: 0 additions & 18 deletions fixtures/01-scxa_analytics.sh

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/bin/env bash

# Arguments:
# List of cell group IDs separated with commas
# Number of marker genes per group (defaults to 5)
psql -q -U ${POSTGRES_USER} -d ${POSTGRES_DB} -h ${POSTGRES_HOST} << EOF
DROP FUNCTION IF EXISTS sample_cell_group_marker_genes_by_cell_group_id;
CREATE FUNCTION sample_cell_group_marker_genes_by_cell_group_id(cell_group_ids int[], lim int)
RETURNS TABLE (cgmg_id integer, cgmg_gene_id varchar, cgmg_cell_group_id integer, cgmg_marker_probability double precision,
cgmgs_gene_id varchar, cgmgs_cell_group_id integer, cgmgs_marker_id integer, cgmgs_expression_type smallint, cgmgs_mean_expression double precision, cgmgs_median_expression double precision) AS
\$\$
DECLARE
cgi int;
BEGIN
FOREACH cgi IN ARRAY cell_group_ids
LOOP
RETURN QUERY
SELECT
cgmg.*,
cgmgs.*
FROM
scxa_cell_group_marker_genes cgmg
RIGHT JOIN
scxa_cell_group_marker_gene_stats cgmgs
ON
cgmg.id = cgmgs.marker_id
AND
cgmg.gene_id = cgmgs.gene_id
AND
cgmg.cell_group_id = cgmgs.cell_group_id
WHERE
cgmg.cell_group_id = cgi
AND
cgmg.marker_probability < 0.05
ORDER BY random()
LIMIT lim;
END LOOP;
END;
\$\$ LANGUAGE plpgsql;
COPY (
SELECT
*
FROM
sample_cell_group_marker_genes_by_cell_group_id(ARRAY[${1}], ${2:-5})
ORDER BY
cgmg_id
) TO STDOUT DELIMITER E'\t';
DROP FUNCTION sample_cell_group_marker_genes_by_cell_group_id;
EOF
14 changes: 0 additions & 14 deletions fixtures/02-scxa_coords.sh

This file was deleted.

55 changes: 55 additions & 0 deletions fixtures/03-scxa-cell-group-membership.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/usr/bin/env bash

# Arguments:
# List of cell group IDs separated with commas
# Number of cell IDs per group (defaults to 10)
# Number of cell IDs in not available group (defaults to 10)
psql -q -U ${POSTGRES_USER} -d ${POSTGRES_DB} -h ${POSTGRES_HOST} << EOF
COPY (
SELECT
*
FROM
scxa_cell_group_membership
WHERE
cell_group_id = ANY(ARRAY[${1}])
AND
cell_id = ANY(
SELECT * FROM (
SELECT
DISTINCT(cgm.cell_id)
FROM
scxa_cell_group_membership cgm
WHERE
cgm.cell_group_id = ANY(ARRAY[${1}])
) cell_ids
ORDER BY RANDOM()
LIMIT ${2:-10})
UNION
SELECT
*
FROM
scxa_cell_group_membership
WHERE
cell_group_id = ANY(ARRAY[${1}])
AND
cell_id = ANY(
SELECT * FROM (
SELECT
DISTINCT(cgm.cell_id)
FROM
scxa_cell_group_membership cgm
JOIN
scxa_cell_group cg
ON
cgm.cell_group_id=cg.id
WHERE
cg.id = ANY(ARRAY[${1}])
AND
cg.value ~* 'not available'
) cell_ids
ORDER BY RANDOM()
LIMIT ${3:-10})
) TO STDOUT DELIMITER E'\t';
EOF
14 changes: 0 additions & 14 deletions fixtures/03-scxa_cell_group_membership.sh

This file was deleted.

Loading

0 comments on commit 3d64dc2

Please sign in to comment.