Skip to content

Commit

Permalink
a couple new features, improvements and debug
Browse files Browse the repository at this point in the history
  • Loading branch information
jkobject committed Aug 16, 2022
1 parent 04c92ed commit 1dbb001
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 23 deletions.
2 changes: 1 addition & 1 deletion genepy/google/gcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def lsFiles(files, add="", group=50):
for val in sfiles:
a += val + " "
data = subprocess.run(
"gsutil -m ls " + add + " '" + a"'", capture_output=True, shell=True
"gsutil -m ls " + add + " '" + a + "'", capture_output=True, shell=True
)
if data.returncode != 0:
if "One or more URLs matched no objects" not in str(data.stderr):
Expand Down
2 changes: 2 additions & 0 deletions genepy/mutations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ def read_comments(f):
annot = annot.replace("FUNCOTATION=", "")[1:-1]
res.update({name: [] for name in funco_fields})
for site in annot.split("],["):
if "]#[" in site:
site = site.split("]#[")[0]
site = (
site.replace("_%7C_", " ")
.replace("_%20_", " ")
Expand Down
37 changes: 15 additions & 22 deletions genepy/utils/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,21 +55,21 @@
}


def fileToList(filename):
def fileToList(filename, strconv=lambda x: x):
"""
loads an input file with a\\n b\\n.. into a list [a,b,..]
"""
with open(filename) as f:
return [val[:-1] for val in f.readlines()]
return [strconv(val[:-1]) for val in f.readlines()]


def listToFile(l, filename):
def listToFile(l, filename, strconv=lambda x: str(x)):
"""
loads a list with [a,b,..] into an input file a\\n b\\n..
"""
with open(filename, "w") as f:
for item in l:
f.write("%s\n" % item)
f.write("%s\n" % strconv(item))


def dictToFile(d, filename):
Expand Down Expand Up @@ -1006,39 +1006,32 @@ def cutLoops(li):
def removeCoVar(mat, maxcorr=0.95):
"""removeCoVar list columns to remove as they covar with other columns
just regular linear correlation.
It displays a list of genes that have been dropped because their correlation
to another gene is above a certain value.
It shows a python dictionary {gene_to_be_dropped: gene_it_correlates_to}.
Args:
mat (array like): the matrix of obs x var
maxcorr (float, optional): the max correlation above which to drop an observation. Defaults to 0.95.
Returns:
list(tuples): lists of observations to drop and their covarying observation to keep [(todrop,tokeep),...]
"""
mat = mat.T
loc = np.argwhere(np.corrcoef(mat) >= maxcorr)
nloc = cutLoops(loc)

drop = []
sameness = []
for a, b in nloc:
if b in drop:
if a not in drop:
continue
else:
drop.append(a)
for same in sameness:
if same[0] == b:
sameness.append((a, same[0]))
break
else:
drop.append(b)
if a in drop:
for same in sameness:
if same[0] == a:
sameness.append((b, same[0]))
break
else:
if a not in drop:
# we already said to drop b: do nothing here
if b not in drop:
drop.append(b)
sameness.append((b, a))
if type(mat) is pd.DataFrame:
col = mat.columns.tolist()
col = mat.index.tolist()
# replace sameness values with the col values
res = []
for (i, j) in sameness:
Expand Down

0 comments on commit 1dbb001

Please sign in to comment.