Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
tomwhite authored and mergify[bot] committed Nov 11, 2021
1 parent b42b0b8 commit d798e95
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 11 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ repos:
hooks:
- id: isort
- repo: https://github.com/python/black
rev: 20.8b1
rev: 21.10b0
hooks:
- id: black
language_version: python3
Expand Down
4 changes: 2 additions & 2 deletions sgkit/io/bgen/bgen_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def _read_metafile_partition(path: Path, partition: int) -> pd.DataFrame:


def read_metafile(path: PathType) -> dd.DataFrame:
""" Read cbgen metafile containing partitioned variant info """
"""Read cbgen metafile containing partitioned variant info"""
with bgen_metafile(path) as mf:
divisions = [mf.partition_size * i for i in range(mf.npartitions)] + [
mf.nvariants - 1
Expand All @@ -189,7 +189,7 @@ def read_metafile(path: PathType) -> dd.DataFrame:


def read_samples(path: PathType) -> pd.DataFrame:
""" Read BGEN .sample file """
"""Read BGEN .sample file"""
df = pd.read_csv(path, sep=" ", skiprows=[1], usecols=[0])
df.columns = ["sample_id"]
return df
Expand Down
2 changes: 1 addition & 1 deletion sgkit/io/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
def dataframe_to_dict(
df: dd.DataFrame, dtype: Optional[Mapping[str, DType]] = None
) -> Mapping[str, ArrayLike]:
""" Convert dask dataframe to dictionary of arrays """
"""Convert dask dataframe to dictionary of arrays"""
arrs = {}
for c in df:
a = df[c].to_dask_array(lengths=True)
Expand Down
8 changes: 4 additions & 4 deletions sgkit/stats/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def pca_est(
random_state: RandomStateType = 0,
variable: str = "call_alternate_allele_count",
) -> BaseEstimator:
""" Create PCA estimator """
"""Create PCA estimator"""
if ploidy is None:
if "ploidy" not in ds.dims:
raise ValueError(
Expand Down Expand Up @@ -79,7 +79,7 @@ def pca_fit(
variable: str = "call_alternate_allele_count",
check_missing: bool = True,
) -> BaseEstimator:
""" Fit PCA estimator """
"""Fit PCA estimator"""
AC = _allele_counts(ds, variable, check_missing=check_missing)
return est.fit(da.asarray(AC).T)

Expand All @@ -92,7 +92,7 @@ def pca_transform(
check_missing: bool = True,
merge: bool = True,
) -> Dataset:
""" Apply PCA estimator to new data """
"""Apply PCA estimator to new data"""
AC = _allele_counts(ds, variable, check_missing=check_missing)
projection = est.transform(da.asarray(AC).T)
new_ds = Dataset(
Expand All @@ -111,7 +111,7 @@ def _get(est: BaseEstimator, attr: str, fn: Any = lambda v: v) -> Optional[Array


def pca_stats(ds: Dataset, est: BaseEstimator, *, merge: bool = True) -> Dataset:
""" Extract attributes from PCA estimator """
"""Extract attributes from PCA estimator"""
new_ds = {
variables.sample_pca_component: (
("variants", "components"),
Expand Down
6 changes: 3 additions & 3 deletions sgkit/tests/test_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
def simulate_cohort_genotypes(
n_variant: int, n_sample: int, n_cohort: int, seed: int = 0
) -> NDArray:
""" Sample genotypes from distinct ancestral populations """
"""Sample genotypes from distinct ancestral populations"""
rs = np.random.RandomState(seed)
# Determine size of each cohort (which will be roughly equal)
cohort_sizes = list(map(len, np.array_split(np.arange(n_sample), n_cohort)))
Expand All @@ -38,7 +38,7 @@ def simulate_dataset(
n_cohort: Optional[int] = None,
chunks: Any = (None, None),
) -> Dataset:
""" Simulate dataset with optional population structure """
"""Simulate dataset with optional population structure"""
ds = simulate_genotype_call_dataset(n_variant, n_sample, seed=0)
if n_cohort:
ac = simulate_cohort_genotypes(
Expand Down Expand Up @@ -284,7 +284,7 @@ def validate_allel_comparison(ds_sg: Dataset, ds_sk: Dataset) -> None:


def _align_vectors(x: ArrayLike, axis: int) -> ArrayLike:
""" Align vectors to common, arbitrary half-space """
"""Align vectors to common, arbitrary half-space"""
assert x.ndim == 2
v = np.random.RandomState(1).rand(x.shape[axis])
signs = np.dot(x, v)[:, np.newaxis] if axis == 1 else np.dot(v[np.newaxis], x)
Expand Down

0 comments on commit d798e95

Please sign in to comment.