Skip to content

Commit

Permalink
Limma-Voom differential expression can now fit mixed linear models co…
Browse files Browse the repository at this point in the history
…ntaining a random effect (e.g. nested design).
  • Loading branch information
GuyTeichman committed Nov 13, 2023
1 parent 6acb871 commit 3adf0a7
Show file tree
Hide file tree
Showing 10 changed files with 89 additions and 24 deletions.
3 changes: 1 addition & 2 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@ History

Added
*******
* Limma-Voom differential expression can now fit mixed linear models containing a random effect (e.g. nested design).

Changed
********

Fixed
*******
Expand Down
14 changes: 14 additions & 0 deletions rnalysis/data_files/r_templates/limma_install.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,20 @@
pgks <- list("statmod")
for (pkg in pgks) {
tryCatch(
tryCatch(
{install.packages(pkg)
require(pkg)},
warning = function(e) {
install.packages(pkg, type = "binary")},
error = function(e) {
install.packages(pkg, type = "binary")}),
error = function(e) {}
)
}
if (("limma" %in% rownames(installed.packages()) == FALSE) || (!require("limma", quietly = TRUE))) {
if (!require("BiocManager", quietly = TRUE)) {
install.packages("BiocManager")
}
BiocManager::install("limma",update=TRUE, ask=FALSE, force=TRUE)
}

2 changes: 1 addition & 1 deletion rnalysis/data_files/r_templates/limma_run_parametric.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ colnames(design)[1] <- "Intercept"

count_data <- read.table("$COUNT_MATRIX", header=TRUE, sep= ",", row.names = 1)
voom_object <- voom(count_data, design, plot=FALSE, save.plot=TRUE)
fit <- lmFit(voom_object, design)

$RANDOM_EFFECT_FIT
14 changes: 9 additions & 5 deletions rnalysis/filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -3069,15 +3069,16 @@ def _diff_exp_assertions(self, design_mat_df: pd.DataFrame):
f"{sorted(self.columns)}"

for factor in design_mat_df.columns:
assert parsing.slugify(factor) == factor, f"Invalid factor name '{factor}': contains invalid characters." \
f" \nSuggested alternative name: '{parsing.slugify(factor)}'. "
assert generic.sanitize_variable_name(
factor) == factor, f"Invalid factor name '{factor}': contains invalid characters." \
f" \nSuggested alternative name: '{generic.sanitize_variable_name(factor)}'. "

@readable_name('Run Limma-Voom differential expression')
def differential_expression_limma_voom(self, design_matrix: Union[str, Path],
comparisons: Iterable[Tuple[str, str, str]],
r_installation_folder: Union[str, Path, Literal['auto']] = 'auto',
output_folder: Union[str, Path, None] = None
) -> Tuple['DESeqFilter', ...]:
output_folder: Union[str, Path, None] = None,
random_effect: Union[str, None] = None) -> Tuple['DESeqFilter', ...]:
"""
Run differential expression analysis on the count matrix using the \
`Limma-Voom <https://doi.org/10.1186/gb-2014-15-2-r29>`_ pipeline. \
Expand Down Expand Up @@ -3107,6 +3108,9 @@ def differential_expression_limma_voom(self, design_matrix: Union[str, Path],
as well as the log files and R script used to generate them, will be saved. \
if output_folder is None, the results will not be saved to a specified directory.
:type output_folder: str, Path, or None
:param random_effect: optionally, specify a single factor to model as a random effect. \
This is useful when your experimental design is nested. limma-voom can only treat one factor as a random effect.
:type random_effect: str or None
:return: a tuple of DESeqFilter objects, one for each comparison
"""
if output_folder is not None:
Expand All @@ -3128,7 +3132,7 @@ def differential_expression_limma_voom(self, design_matrix: Union[str, Path],
io.save_table(design_mat_df, design_mat_path)

r_output_dir = differential_expression.run_limma_analysis(data_path, design_mat_path, comparisons,
r_installation_folder)
r_installation_folder, random_effect)
outputs = []
for item in r_output_dir.iterdir():
if not item.is_file():
Expand Down
27 changes: 23 additions & 4 deletions rnalysis/utils/differential_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def install_limma(r_installation_folder: Union[str, Path, Literal['auto']] = 'au


def create_limma_script(data_path: Union[str, Path], design_mat_path: Union[str, Path],
comparisons: Iterable[Tuple[str, str, str]]):
comparisons: Iterable[Tuple[str, str, str]], random_effect: Union[str, None]):
cache_dir = io.get_todays_cache_dir().joinpath(hashlib.sha1(str(time.time_ns()).encode('utf-8')).hexdigest())
if not cache_dir.exists():
cache_dir.mkdir(parents=True)
Expand All @@ -36,7 +36,11 @@ def create_limma_script(data_path: Union[str, Path], design_mat_path: Union[str,
factors_str = ''
for factor in design_mat_df.columns:
factor_name = generic.sanitize_variable_name(factor)
factor_names[factor] = factor_name

if factor == random_effect or factor_name == random_effect:
random_effect = factor_name
else:
factor_names[factor] = factor_name

values = sorted(design_mat_df[factor].unique())
values_str = ', '.join([f'"{val}"' for val in values])
Expand All @@ -45,10 +49,24 @@ def create_limma_script(data_path: Union[str, Path], design_mat_path: Union[str,

formula = "~ " + " + ".join(factor_names.values())

if random_effect is None:
random_effect_fit_code = "fit <- lmFit(voom_object, design)"
else:
random_effect_fit_code = (f"cor <- duplicateCorrelation(voom_object, design, block={random_effect})\n"
"if (cor$consensus.correlation > 0) { "
"#only include random effect if the correlation is positive\n"
f" fit <- lmFit(voom_object, design, block={random_effect}, "
f"correlation=cor$consensus.correlation)\n"
"}"
"else {"
" fit <- lmFit(voom_object, design)"
"}")

run_template = run_template.replace("$COUNT_MATRIX", Path(data_path).as_posix())
run_template = run_template.replace("$DESIGN_MATRIX", (Path(design_mat_path).as_posix()))
run_template = run_template.replace("$DEFINE_FACTORS", factors_str)
run_template = run_template.replace("$FORMULA", formula)
run_template = run_template.replace("$RANDOM_EFFECT_FIT", random_effect_fit_code)

outfile.write(run_template)

Expand All @@ -68,9 +86,10 @@ def create_limma_script(data_path: Union[str, Path], design_mat_path: Union[str,

def run_limma_analysis(data_path: Union[str, Path], design_mat_path: Union[str, Path],
comparisons: Iterable[Tuple[str, str, str]],
r_installation_folder: Union[str, Path, Literal['auto']] = 'auto'):
r_installation_folder: Union[str, Path, Literal['auto']] = 'auto',
random_effect: Union[str, None] = None):
install_limma(r_installation_folder)
script_path = create_limma_script(data_path, design_mat_path, comparisons)
script_path = create_limma_script(data_path, design_mat_path, comparisons, random_effect)
io.run_r_script(script_path, r_installation_folder)
return script_path.parent

Expand Down
17 changes: 10 additions & 7 deletions tests/test_differential_expression.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import platform
import sys

import numpy as np
Expand All @@ -11,18 +10,21 @@ def test_install_limma():
install_limma()


@pytest.mark.parametrize("data,design_matrix,comparisons,expected_path", [
@pytest.mark.parametrize("data,design_matrix,comparisons,random_effect,expected_path", [
('tests/test_files/big_counted.csv', 'tests/test_files/test_design_matrix.csv', [('condition', 'cond2', 'cond1')],
'tests/test_files/limma_tests/case1/expected_limma_script_1.R'),
None, 'tests/test_files/limma_tests/case1/expected_limma_script_1.R'),
('counted.csv', 'tests/test_files/test_design_matrix.csv',
[('condition', 'cond3', 'cond2'), ('replicate', 'rep2', 'rep1'), ('condition', 'cond1', 'cond2')],
'tests/test_files/limma_tests/case2/expected_limma_script_2.R')
None, 'tests/test_files/limma_tests/case2/expected_limma_script_2.R'),
('tests/test_files/big_counted.csv', 'tests/test_files/test_design_matrix.csv', [('condition', 'cond2', 'cond1')],
'replicate', 'tests/test_files/limma_tests/case3/expected_limma_script_3.R'),
])
def test_create_limma_script(data, design_matrix, comparisons, expected_path):
def test_create_limma_script(data, design_matrix, comparisons, random_effect, expected_path):
with open(expected_path) as f:
expected = f.read()

out_path = create_limma_script(data, design_matrix, comparisons)
out_path = create_limma_script(data, design_matrix, comparisons, random_effect)
assert Path(out_path).exists()
with open(out_path) as f:
out = f.read()
Expand All @@ -33,7 +35,8 @@ def test_create_limma_script(data, design_matrix, comparisons, expected_path):

@pytest.mark.parametrize('comparisons,expected_paths', [
(
[('replicate', 'rep2', 'rep3')], ['tests/test_files/limma_tests/case1/LimmaVoom_replicate_rep2_vs_rep3_truth.csv']),
[('replicate', 'rep2', 'rep3')],
['tests/test_files/limma_tests/case1/LimmaVoom_replicate_rep2_vs_rep3_truth.csv']),
([('condition', 'cond2', 'cond1'), ('condition', 'cond3', 'cond2')],
['tests/test_files/limma_tests/case2/LimmaVoom_condition_cond2_vs_cond1_truth.csv',
'tests/test_files/limma_tests/case2/LimmaVoom_condition_cond3_vs_cond2_truth.csv'])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ colnames(design)[1] <- "Intercept"

count_data <- read.table("tests/test_files/big_counted.csv", header=TRUE, sep= ",", row.names = 1)
voom_object <- voom(count_data, design, plot=FALSE, save.plot=TRUE)
fit <- lmFit(voom_object, design)

fit <- lmFit(voom_object, design)

contrast <- makeContrasts("conditioncond2", levels = design)
contrast_fit <- contrasts.fit(fit, contrast)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ colnames(design)[1] <- "Intercept"

count_data <- read.table("counted.csv", header=TRUE, sep= ",", row.names = 1)
voom_object <- voom(count_data, design, plot=FALSE, save.plot=TRUE)
fit <- lmFit(voom_object, design)

fit <- lmFit(voom_object, design)

contrast <- makeContrasts("conditioncond3 - conditioncond2", levels = design)
contrast_fit <- contrasts.fit(fit, contrast)
Expand Down
23 changes: 23 additions & 0 deletions tests/test_files/limma_tests/case3/expected_limma_script_3.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
require("limma")

design_matrix <- read.table("tests/test_files/test_design_matrix.csv", header=TRUE, sep= ",")
condition <- factor(design_matrix$condition, levels=c("cond1", "cond2", "cond3"))
replicate <- factor(design_matrix$replicate, levels=c("rep1", "rep2", "rep3"))

design <- model.matrix(~ condition)
colnames(design)[1] <- "Intercept"

count_data <- read.table("tests/test_files/big_counted.csv", header=TRUE, sep= ",", row.names = 1)
voom_object <- voom(count_data, design, plot=FALSE, save.plot=TRUE)

cor <- duplicateCorrelation(voom_object, design, block=replicate)
if (cor$consensus.correlation > 0) { #only include random effect if the correlation is positive
fit <- lmFit(voom_object, design, block=replicate, correlation=cor$consensus.correlation)
}else { fit <- lmFit(voom_object, design)}

contrast <- makeContrasts("conditioncond2", levels = design)
contrast_fit <- contrasts.fit(fit, contrast)
contrast_bayes <- eBayes(contrast_fit)
res <- topTable(contrast_bayes, n=Inf)
res_ordered <- res[order(res$adj.P.Val),]
write.csv(as.data.frame(res_ordered),file="*PLACEHOLDERPATH*/LimmaVoom_condition_cond2_vs_cond1.csv")
9 changes: 6 additions & 3 deletions tests/test_filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -2066,6 +2066,7 @@ def mock_run_analysis(data_path, design_mat_path, comps, r_installation_folder):
unlink_tree(outdir)


@pytest.mark.parametrize('random_effect', [None, 'replicate'])
@pytest.mark.parametrize('comparisons,expected_paths,script_path', [
([('replicate', 'rep2', 'rep3')], ['tests/test_files/limma_tests/case1/LimmaVoom_replicate_rep2_vs_rep3_truth.csv'],
'tests/test_files/limma_tests/case1/expected_limma_script_1.R'),
Expand All @@ -2074,24 +2075,26 @@ def mock_run_analysis(data_path, design_mat_path, comps, r_installation_folder):
'tests/test_files/limma_tests/case2/LimmaVoom_condition_cond3_vs_cond2_truth.csv'],
'tests/test_files/limma_tests/case2/expected_limma_script_2.R')
])
def test_differential_expression_limma(monkeypatch, comparisons, expected_paths, script_path):
def test_differential_expression_limma(monkeypatch, comparisons, expected_paths, script_path, random_effect):
outdir = 'tests/test_files/limma_tests/outdir'
sample_table_path = 'tests/test_files/test_design_matrix.csv'
truth = parsing.data_to_tuple(
[DESeqFilter(file, log2fc_col='logFC', padj_col='adj.P.Val') for file in expected_paths])
c = CountFilter('tests/test_files/big_counted.csv')

def mock_run_analysis(data_path, design_mat_path, comps, r_installation_folder):
def mock_run_analysis(data_path, design_mat_path, comps, r_installation_folder, rand_eff):
assert r_installation_folder == 'auto'
assert comps == comparisons
assert rand_eff == random_effect
assert CountFilter(data_path) == c
assert io.load_table(design_mat_path, 0).equals(io.load_table(sample_table_path, 0))

return Path(script_path).parent

monkeypatch.setattr(differential_expression, 'run_limma_analysis', mock_run_analysis)
try:
res = c.differential_expression_limma_voom(sample_table_path, comparisons, output_folder=outdir)
res = c.differential_expression_limma_voom(sample_table_path, comparisons, output_folder=outdir,
random_effect=random_effect)
assert sorted(res, key=lambda filter_obj: filter_obj.fname.name) == sorted(truth, key=lambda
filter_obj: filter_obj.fname.name)
with open(Path(outdir).joinpath(Path(script_path).name)) as outfile, open(script_path) as truthfile:
Expand Down

0 comments on commit 3adf0a7

Please sign in to comment.