Skip to content

Commit

Permalink
Bugfix: Properly check pvalues/scores when loading from a csv
Browse files Browse the repository at this point in the history
When loading the pvalues from a csv properly check if these are pvalues or scores by checking if there are any values > 1. This is more reliable than checking the column header
  • Loading branch information
timeu committed Jun 13, 2017
1 parent 9768f01 commit 6310404
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 6 deletions.
4 changes: 4 additions & 0 deletions Changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
1.7.1
--
* Bugfix: loading a gwas result from csv will properly check if it contains pvalues or scores

1.7.0
--
* Enhancement: Add support to transform phenotypes from the CLI
Expand Down
4 changes: 2 additions & 2 deletions pygwas/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
__version__ = '1.7.0'
__updated__ = "03.05.2017"
__version__ = '1.7.1'
__updated__ = "13.06.2017"
__date__ = "20.8.2014"
9 changes: 5 additions & 4 deletions pygwas/core/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,27 +71,28 @@ def load_from_csv(filename):
is_pval = False
with open(filename,'r') as f:
header = f.readline().rstrip()
if header[2] == 'pvalue':
is_pval = True
add_header = header.split(",")[5:]
for key in add_header:
key = key.replace('"','')
additional_columns[key] = []
for row in f:
fields = row.rstrip().split(",")
if chr != fields[0]:
chr = fields[0]
chrs.append(chr)
chromosomes.append(chr)
positions.append(int(fields[1]))
positions.append(int(float(fields[1])))
pvals.append(float(fields[2]))
mafs.append(float(fields[3]))
macs.append(int(fields[4]))
macs.append(int(float(fields[4])))
if len(add_header) > 0:
for i,key in enumerate(add_header):
key = key.replace('"','')
addit_value = None
if fields[(5+i)] != '':
addit_value = float(fields[(5+i)])
additional_columns[key].append(addit_value)
is_pval = max(pvals) <= 1.0
if is_pval is False:
pvals = map(lambda x:math.pow(10,-1*x),pvals)
return GWASResult(chrs,chromosomes,positions,pvals,{'mafs':mafs,'macs':macs},additional_columns = additional_columns)
Expand Down
7 changes: 7 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ def statisticsArgs():
def ld_filename():
return '%s/ld.hdf5' % resource_path

@pytest.fixture
def csv_scores():
return '%s/csv_scores.csv' % resource_path

@pytest.fixture
def csv_pvalues():
return '%s/csv_pvalues.csv' % resource_path

@pytest.fixture
def genes():
Expand Down
11 changes: 11 additions & 0 deletions tests/res/csv_pvalues.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Chr,Pos,Pval,MAF,MAC,variance_explained
1,100003,2.2106989795385197e-08,0.0028328611898017,1,8.84846534409123e-05
1,1000033,0.791800358928746,0.0084985835694051,3,0.000199356154049757
2,1000084,0.731829069116948,0.0028328611898017,1,0.00033596270942915
2,1000091,0.557893156575617,0.0028328611898017,1,0.000981963456590074
3,1000114,0.490002833429499,0.0056657223796034,2,0.00136250295302121
3,100013,0.590525250208836,0.141643059490085,50,0.000828053007342899
4,1000267,1.0,0.0028328611898017,1,9.22732593666709e-08
4,100027,0.590525250208836,0.141643059490085,50,0.000828053007342899
5,1000383,0.233147318752054,0.113314447592068,40,0.00405913486553411
5,1000386,0.504440789460337,0.0028328611898017,1,0.00127409711179371
11 changes: 11 additions & 0 deletions tests/res/csv_scores.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Chr,Pos,Pval,MAF,MAC,variance_explained
1,100003,7.655470389228389,0.0028328611898017,1,8.84846534409123e-05
1,1000033,0.1013843057149856,0.0084985835694051,3,0.000199356154049757
2,1000084,0.13559034381586413,0.0028328611898017,1,0.00033596270942915
2,1000091,0.2534489658519991,0.0028328611898017,1,0.000981963456590074
3,1000114,0.309801408666918,0.0056657223796034,2,0.00136250295302121
3,100013,0.22876152770006264,0.141643059490085,50,0.000828053007342899
4,1000267,-0.0, 0.0028328611898017,1,9.22732593666709e-08
4,100027,0.22876152770006264,0.141643059490085,50,0.000828053007342899
5,1000383,0.6323695746674916,0.113314447592068,40,0.00405913486553411
5,1000386,0.2971898033017406,0.0028328611898017,1,0.00127409711179371
30 changes: 30 additions & 0 deletions tests/test_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import pytest
from pygwas.core import result
import numpy as np

class TestResult:

def test_load_csv_pvalues(self, csv_pvalues):
res = result.load_from_csv(csv_pvalues)
self._assert_result(res)

def test_load_csv_scores(self, csv_scores):
res = result.load_from_csv(csv_scores)
self._assert_result(res)

def _assert_result(self, res):
pvals_to_check = [2.2106989795385197e-08, 0.791800358928746, 0.731829069116948, 0.557893156575617, 0.490002833429499, 0.590525250208836, 1.0, 0.590525250208836, 0.233147318752054, 0.504440789460337]
assert res.chromosomes == ['1', '1', '2', '2', '3', '3', '4', '4', '5', '5']
assert res.chrs == ['1', '2', '3', '4', '5']
np.testing.assert_allclose(res.min_pval, 2.2106989795385197e-08,rtol=1e-5, atol=0)
assert res.bonferroni_threshold == 2.3010299956639813
assert res.stats['med_pval'] == 0.59052525020883595
assert res.stats['bh_thres_d']['thes_pval'] == 0.010000000000000002
assert res.stats['ks_stats']['p_val'] == 0.30742245590503603
assert res.stats['ks_stats']['D'] == 0.29000283342949901
assert res.maf_dict['mafs'] == [0.0028328611898017, 0.0084985835694051, 0.0028328611898017, 0.0028328611898017, 0.0056657223796034, 0.141643059490085, 0.0028328611898017, 0.141643059490085, 0.113314447592068, 0.0028328611898017]
assert res.maf_dict['macs'] == [1, 3, 1, 1, 2, 50, 1, 50, 40, 1]
assert res.positions == [100003, 1000033, 1000084, 1000091, 1000114, 100013, 1000267, 100027, 1000383, 1000386]
for i, pval in enumerate(res.pvals):
np.testing.assert_allclose(pval, pvals_to_check[i],rtol=1e-5, atol=0)

0 comments on commit 6310404

Please sign in to comment.