Skip to content

Commit

Permalink
subsamples of sm_name-cell_type
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Aug 17, 2024
1 parent 238e004 commit 00c7c93
Show file tree
Hide file tree
Showing 9 changed files with 62 additions and 138 deletions.
48 changes: 0 additions & 48 deletions scripts/_run_evaluation.sh

This file was deleted.

69 changes: 0 additions & 69 deletions scripts/_run_evaluation_all.sh

This file was deleted.

33 changes: 18 additions & 15 deletions scripts/run_grn_evaluation_tw.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@

# RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"

RUN_ID="subsample_200_gb_reg2"
RUN_ID="scgen_pearson_gb_sub549"
resources_dir="s3://openproblems-data/resources/grn"
publish_dir="s3://openproblems-data/resources/grn/results/${RUN_ID}"
# grn_models_folder="${resources_dir}/supplementary/grn_models_noised"
grn_models_folder="${resources_dir}/grn_models"
reg_type=GB
subsample=200
subsample=-1
max_workers=20

param_file="./params/${RUN_ID}.yaml"
Expand All @@ -21,7 +23,8 @@ grn_names=(
"scglue"
)

layers=("pearson" "lognorm" "scgen_pearson" "scgen_lognorm" "seurat_pearson" "seurat_lognorm")
# layers=("pearson" "lognorm" "scgen_pearson" "scgen_lognorm" "seurat_pearson" "seurat_lognorm")
layers=( "scgen_pearson" )

# Start writing to the YAML file
cat > $param_file << HERE
Expand All @@ -39,7 +42,7 @@ append_entry() {
max_workers: $max_workers
consensus: ${resources_dir}/prior/consensus-num-regulators.json
${2:+tf_all: ${resources_dir}/prior/tf_all.csv}
${3:+prediction: ${resources_dir}/grn_models/$1.csv}
${3:+prediction: ${grn_models_folder}/$1.csv}
HERE
}
# Loop through grn_names and layers
Expand All @@ -49,17 +52,17 @@ for grn_name in "${grn_names[@]}"; do
done
done

# Append negative control
grn_name="negative_control"
for layer in "${layers[@]}"; do
append_entry "$grn_name" "" "true"
done
# # Append negative control
# grn_name="negative_control"
# for layer in "${layers[@]}"; do
# append_entry "$grn_name" "" "true"
# done

# Append positive controls
grn_name="positive_control"
for layer in "${layers[@]}"; do
append_entry "$grn_name" "true"
done
# # Append positive controls
# grn_name="positive_control"
# for layer in "${layers[@]}"; do
# append_entry "$grn_name" "true"
# done

# Append the remaining output_state and publish_dir to the YAML file
cat >> $param_file << HERE
Expand All @@ -81,7 +84,7 @@ HERE
--main-script target/nextflow/workflows/run_grn_evaluation/main.nf `
--workspace 53907369739130 `
--compute-env 6TeIFgV5OY4pJCk8I0bfOh `
--params-file ./params/subsample_200_gb_reg2.yaml `
--params-file ./params/scgen_pearson_gb_sub549.yaml `
--config src/common/nextflow_helpers/labels_tw.config


2 changes: 1 addition & 1 deletion src/methods/multi_omics/celloracle/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ functionality:
- name: --links
type: file
direction: output
default: output/celloracle/links.celloracle.links
default: output/celloracle/links.celloracle.links
resources:
- type: python_script
path: script.py
Expand Down
6 changes: 2 additions & 4 deletions src/methods/multi_omics/scglue/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,12 @@ functionality:
arguments:
- name: --annotation_file
type: file
example: resources/supplements/gencode.v45.annotation.gtf.gz
default: resources/supplements/gencode.v45.annotation.gtf.gz
default: resources/supplementary/gencode.v45.annotation.gtf.gz
required: false
direction: input
- name: --motif_file
type: file
example: resources/supplements/JASPAR2022-hg38.bed.gz
default: resources/supplements/JASPAR2022-hg38.bed.gz
default: resources/supplementary/JASPAR2022-hg38.bed.gz
required: false
direction: input

Expand Down
2 changes: 1 addition & 1 deletion src/methods/multi_omics/scglue_ns/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@ functionality:
platforms:
- type: nextflow
directives:
label: [ hightime, midmem, lowcpu ]
label: [ hightime, midmem, highcpu ]
1 change: 1 addition & 0 deletions src/metrics/regression_2/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ functionality:
direction: input
must_exist: true
default: 'resources/prior/consensus-num-regulators.json'
example: 'resources_test/prior/consensus-num-regulators.json'
platforms:
- type: docker
image: ghcr.io/openproblems-bio/base_python:1.0.4
Expand Down
9 changes: 9 additions & 0 deletions src/metrics/regression_2/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,15 @@ def main(par: Dict[str, Any]) -> pd.DataFrame:
subsample = par['subsample']
if subsample != -1:
perturbation_data = perturbation_data[np.random.choice(perturbation_data.n_obs, subsample, replace=False), :]

if True: # one combination of cell_type, sm_name
sampled_obs = perturbation_data.obs.groupby(['sm_name', 'cell_type'], observed=False).apply(lambda x: x.sample(1)).reset_index(drop=True)
obs = perturbation_data.obs
mask = []
for _, row in obs.iterrows():
mask.append((sampled_obs==row).all(axis=1).any())
perturbation_data = perturbation_data[mask,:]

gene_names = perturbation_data.var.index.to_numpy()
n_genes = len(gene_names)
groups = LabelEncoder().fit_transform(perturbation_data.obs.plate_name)
Expand Down
30 changes: 30 additions & 0 deletions src/robustness_analysis/add_noise_grn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import os
import pandas as pd
import numpy as np

layer = 'scgen_pearson'
grn_folder = 'resources/grn_models'
grn_folder_noised = 'resources/supplementary/grn_models_noised'
noise_ratio = 0.2

# Ensure the output folder exists
os.makedirs(grn_folder_noised, exist_ok=True)

# Loop through all files in the grn_folder
for file_name in os.listdir(grn_folder):
if file_name.endswith('.csv'):
# Read the CSV file
file_path = os.path.join(grn_folder, file_name)
df = pd.read_csv(file_path)

# Add noise to the 'weight' column
if 'weight' in df.columns:
std_dev = df['weight'].std()
noise = np.random.normal(0, noise_ratio * std_dev, size=df['weight'].shape)
df['weight'] += noise

# Save the noised DataFrame to the new folder
noised_file_path = os.path.join(grn_folder_noised, file_name)
df.to_csv(noised_file_path, index=False)

print("Noise added to all GRN models and saved successfully.")

0 comments on commit 00c7c93

Please sign in to comment.