Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bugs #132

Merged
merged 17 commits into from
Dec 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions .github/workflows/python-package-conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
strategy:
max-parallel: 5
matrix:
python-version: [3.7, 3.8, 3.9]
python-version: [3.7]

steps:
- uses: actions/checkout@v2
Expand All @@ -22,10 +22,14 @@ jobs:
echo $CONDA/bin >> $GITHUB_PATH
- name: Install dependencies
run: |
conda config --add channels bioconda
conda config --add channels defaults
conda config --add channels bioconda
conda config --add channels conda-forge
# conda config --set channel_priority flexible
# conda env update --file environment.yml --name base
conda install stream
conda create -n test-environment stream r-stringi python=${{ matrix.python-version }}
source activate test-environment
# conda install stream
python -m pip install --upgrade pip
pip install -e .
# - name: Lint with flake8
Expand All @@ -37,4 +41,5 @@ jobs:
# flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test
run: |
source activate test-environment
stream_run_test
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ install:
- conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION
- source activate test-environment
- conda config --add channels defaults
- conda config --add channels bioconda
- conda config --add channels bioconda
- conda config --add channels conda-forge
- conda install tzlocal stream=1.0 -y
- conda install tzlocal stream -y
- Rscript -e 'install.packages("stringi",repos="https://cloud.r-project.org/")'
- installationpath=$PWD
- echo $installationpath
Expand Down
78 changes: 56 additions & 22 deletions stream/core.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import numpy as np
import pandas as pd
from pandas.api.types import is_string_dtype,is_numeric_dtype
from pandas.api.types import (
is_string_dtype,
is_numeric_dtype
)
import anndata as ad
import networkx as nx
import re
Expand All @@ -13,23 +16,48 @@
import multiprocessing
import os
from sklearn.decomposition import PCA as sklearnPCA
from sklearn import preprocessing
from sklearn.manifold import LocallyLinearEmbedding,TSNE, SpectralEmbedding
from sklearn.cluster import SpectralClustering,AffinityPropagation,KMeans
from sklearn.metrics.pairwise import pairwise_distances_argmin_min,pairwise_distances,euclidean_distances
# from sklearn import preprocessing
from sklearn.manifold import (
LocallyLinearEmbedding,
TSNE,
SpectralEmbedding
)
from sklearn.cluster import (
SpectralClustering,
AffinityPropagation,
KMeans
)
from sklearn.metrics.pairwise import (
# pairwise_distances_argmin_min,
pairwise_distances,
euclidean_distances
)
import matplotlib as mpl
import matplotlib.patches as Patches
from matplotlib.patches import Polygon
from mpl_toolkits.mplot3d import Axes3D
import umap
from copy import deepcopy
import itertools
from scipy.spatial import distance,cKDTree,KDTree
from scipy.spatial import (
# distance,
cKDTree,
# KDTree
)
import math
# mpl.use('Agg')
from scipy import stats
from scipy.stats import spearmanr,mannwhitneyu,gaussian_kde,kruskal
from scipy.sparse import issparse,lil_matrix,csr_matrix
from scipy.stats import (
spearmanr,
mannwhitneyu,
# gaussian_kde,
# kruskal
)
# from scipy.sparse import (
# issparse,
# lil_matrix,
# csr_matrix
# )
from slugify import slugify
from decimal import *
import matplotlib.gridspec as gridspec
Expand All @@ -44,11 +72,13 @@
from rpy2.robjects import pandas2ri

from .extra import *
#scikit_posthocs is currently not available in conda system. We will update it once it can be installed via conda.
#import scikit_posthocs as sp
# scikit_posthocs is currently not available in conda system.
# We will update it once it can be installed via conda.
# import scikit_posthocs as sp
from .scikit_posthocs import posthoc_conover

os.environ['KMP_DUPLICATE_LIB_OK']='True'
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'


def set_figure_params(context='notebook',style='white',palette='deep',font='sans-serif',font_scale=1.1,color_codes=True,
dpi=80,dpi_save=150,figsize=[5.4, 4.8],rc=None):
Expand Down Expand Up @@ -179,8 +209,8 @@ def read(file_name,file_path=None,file_format=None,delimiter='\t',workdir=None,
adata = pickle.load(f)
f.close()

if(not issparse(adata.X)):
adata.X = csr_matrix(adata.X)
# if(not issparse(adata.X)):
# adata.X = csr_matrix(adata.X)

if('workdir' not in adata.uns_keys()):
set_workdir(adata,workdir=workdir)
Expand Down Expand Up @@ -403,18 +433,18 @@ def cal_qc(adata,expr_cutoff=1,assay='rna'):
assay = assay.lower()
assert assay in ['rna','atac'], "`assay` must be chosen from ['rna','atac']"

if(not issparse(adata.X)):
adata.X = csr_matrix(adata.X)
# if(not issparse(adata.X)):
# adata.X = csr_matrix(adata.X)

n_counts = adata.X.sum(axis=0).A1
n_counts = adata.X.sum(axis=0)
adata.var['n_counts'] = n_counts
n_cells = (adata.X>=expr_cutoff).sum(axis=0).A1
n_cells = (adata.X>=expr_cutoff).sum(axis=0)
adata.var['n_cells'] = n_cells
adata.var['pct_cells'] = n_cells/adata.shape[0]

n_counts = adata.X.sum(axis=1).A1
n_counts = adata.X.sum(axis=1)
adata.obs['n_counts'] = n_counts
n_features = (adata.X>=expr_cutoff).sum(axis=1).A1
n_features = (adata.X>=expr_cutoff).sum(axis=1)
if(assay=='atac'):
adata.obs['n_peaks'] = n_features
adata.obs['pct_peaks'] = n_features/adata.shape[1]
Expand All @@ -424,7 +454,7 @@ def cal_qc(adata,expr_cutoff=1,assay='rna'):
r = re.compile("^MT-",flags=re.IGNORECASE)
mt_genes = list(filter(r.match, adata.var_names))
if(len(mt_genes)>0):
n_counts_mt = adata[:,mt_genes].X.sum(axis=1).A1
n_counts_mt = adata[:,mt_genes].X.sum(axis=1)
adata.obs['pct_mt'] = n_counts_mt/n_counts
else:
adata.obs['pct_mt'] = 0
Expand Down Expand Up @@ -860,6 +890,9 @@ def select_variable_genes(adata,loess_frac=0.01,percentile=95,n_genes = None,n_j
The selected variable gene names.
"""

# if(not issparse(adata.X)):
# adata.X = csr_matrix(adata.X)

if(fig_path is None):
fig_path = adata.uns['workdir']
fig_size = mpl.rcParams['figure.figsize'] if fig_size is None else fig_size
Expand Down Expand Up @@ -3032,6 +3065,7 @@ def plot_stream(adata,root='S0',color = None,preference=None,dist_scale=0.9,
factor_num_win=10,factor_min_win=2.0,factor_width=2.5,factor_nrow=200,factor_ncol=400,
log_scale = False,factor_zoomin=100.0,
fig_size=(7,4.5),fig_legend_order=None,fig_legend_ncol=1,
fig_colorbar_aspect=30,
vmin=None,vmax=None,
pad=1.08,w_pad=None,h_pad=None,
save_fig=False,fig_path=None,fig_format='pdf'):
Expand Down Expand Up @@ -3107,7 +3141,7 @@ def plot_stream(adata,root='S0',color = None,preference=None,dist_scale=0.9,
dict_ann[ann] = adata.obs_vector(ann)
else:
raise ValueError("could not find '%s' in `adata.obs.columns` and `adata.var_names`" % (ann))

flat_tree = adata.uns['flat_tree']
ft_node_label = nx.get_node_attributes(flat_tree,'label')
label_to_node = {value: key for key,value in nx.get_node_attributes(flat_tree,'label').items()}
Expand Down Expand Up @@ -3206,7 +3240,7 @@ def plot_stream(adata,root='S0',color = None,preference=None,dist_scale=0.9,
clip_path = Polygon(verts_cell, facecolor='none', edgecolor='none', closed=True)
ax.add_patch(clip_path)
im.set_clip_path(clip_path)
cbar = plt.colorbar(im, ax=ax, pad=0.04, fraction=0.02, aspect='auto')
cbar = plt.colorbar(im, ax=ax, pad=0.04, fraction=0.02, aspect=fig_colorbar_aspect)
cbar.ax.locator_params(nbins=5)
ax.set_xlim(xmin,xmax)
ax.set_ylim(ymin,ymax)
Expand Down
Loading