Skip to content

Commit

Permalink
Even more linting and adjustements
Browse files Browse the repository at this point in the history
  • Loading branch information
dobraczka committed Dec 22, 2023
1 parent 91c14b7 commit a4dc4c3
Show file tree
Hide file tree
Showing 19 changed files with 80 additions and 107 deletions.
35 changes: 11 additions & 24 deletions kiez/analysis/estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
"k_occurrence",
]

_SPACE_LIMIT = 10000


def _calc_skewness_truncnorm(k_occurrence: np.ndarray) -> float:
"""Hubness measure; corrected for non-negativity of k-occurrence.
Expand All @@ -53,8 +55,7 @@ def _calc_skewness_truncnorm(k_occurrence: np.ndarray) -> float:
k_occurrence_std = k_occurrence.std(ddof=1)
a = (clip_left - k_occurrence_mean) / k_occurrence_std
b = (clip_right - k_occurrence_mean) / k_occurrence_std
skew_truncnorm = stats.truncnorm(a, b).moment(3)
return skew_truncnorm
return stats.truncnorm(a, b).moment(3)


def _calc_gini_index(
Expand Down Expand Up @@ -199,8 +200,6 @@ def hubness_score(
*,
k: Optional[int] = None,
hub_size: float = 2.0,
shuffle_equal: bool = True,
random_state=None,
verbose: int = 0,
return_value: str = "all_but_gini",
store_k_occurrence: bool = False,
Expand All @@ -219,16 +218,6 @@ def hubness_score(
number of k for k-nearest neighbor
hub_size : float
Hubs are defined as objects with k-occurrence > hub_size * k.
shuffle_equal : bool
If true shuffle neighbors with identical distances
to avoid artifact hubness.
NOTE: This is especially useful for secondary distance measures
with a finite number of possible values
random_state: int, RandomState instance or None, optional
If int, random_state is the seed used by the random number generator;
If RandomState instance, random_state is the random number generator;
If None, the random number generator is the RandomState instance used
by `np.random`.
verbose : int
Level of output messages
return_value : str
Expand Down Expand Up @@ -285,12 +274,11 @@ def hubness_score(
k_neighbors = nn_ind.copy()
if k is None:
k = nn_ind.shape[1]
else:
if k < k_neighbors.shape[1]:
k_neighbors = k_neighbors[:, :k]
elif k > k_neighbors.shape[1]:
k = nn_ind.shape[1]
warnings.warn(f"k > nn_ind.shape[1], k will be set to {k}", stacklevel=2)
elif k < k_neighbors.shape[1]:
k_neighbors = k_neighbors[:, :k]
elif k > k_neighbors.shape[1]:
k = nn_ind.shape[1]
warnings.warn(f"k > nn_ind.shape[1], k will be set to {k}", stacklevel=2)
assert k is not None

# Negative indices can occur, when ANN does not find enough neighbors,
Expand All @@ -314,7 +302,7 @@ def hubness_score(

# Gini index
if return_value in ["gini", "all"]:
limiting = "space" if k_occurrence.shape[0] > 10_000 else "time"
limiting = "space" if k_occurrence.shape[0] > _SPACE_LIMIT else "time"
gini_index = _calc_gini_index(k_occurrence, limiting, verbose=verbose)
else:
gini_index = np.nan
Expand Down Expand Up @@ -357,8 +345,7 @@ def hubness_score(
hubness_measures["k_occurrence"] = k_occurrence
if return_value == "all":
return hubness_measures
elif return_value == "all_but_gini":
if return_value == "all_but_gini":
del hubness_measures["gini"]
return hubness_measures
else:
return hubness_measures[return_value]
return hubness_measures[return_value]
20 changes: 11 additions & 9 deletions kiez/hubness_reduction/dis_sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@

from .base import HubnessReduction

_DESIRED_P_VALUE = 2
_MINIMUM_DIST = 0.0


class DisSimLocal(HubnessReduction):
"""Hubness reduction with DisSimLocal.
Expand All @@ -27,7 +30,7 @@ class DisSimLocal(HubnessReduction):
----------
.. [1] Hara K, Suzuki I, Kobayashi K, Fukumizu K, Radovanović M (2016)
Flattening the density gradient for eliminating spatial centrality to reduce hubness.
In: Proceedings of the 30th AAAI conference on artificial intelligence, pp 16591665.
In: Proceedings of the 30th AAAI conference on artificial intelligence, pp 1659-1665.
https://www.aaai.org/ocs/index.php/AAAI/AAAI16/paper/viewPaper/12055
"""

Expand All @@ -36,13 +39,12 @@ def __init__(self, squared: bool = True, **kwargs):
self.squared = squared
if self.nn_algo.metric in ["euclidean", "minkowski"]:
self.squared = False
if hasattr(self.nn_algo, "p"):
if self.nn_algo.p != 2:
raise ValueError(
"DisSimLocal only supports squared Euclidean distances. If"
" the provided NNAlgorithm has a `p` parameter it must be"
f" set to p=2. Now it is p={self.nn_algo.p}"
)
if hasattr(self.nn_algo, "p") and self.nn_algo.p != _DESIRED_P_VALUE:
raise ValueError(
"DisSimLocal only supports squared Euclidean distances. If"
" the provided NNAlgorithm has a `p` parameter it must be"
f" set to p=2. Now it is p={self.nn_algo.p}"
)
elif self.nn_algo.metric in ["sqeuclidean"]:
self.squared = True
else:
Expand Down Expand Up @@ -152,7 +154,7 @@ def transform(
# certain scikit-learn routines (e.g. in metric='precomputed' usages).
# We, therefore, shift dissimilarities to non-negative values, if necessary.
min_dist = hub_reduced_dist.min()
if min_dist < 0.0:
if min_dist < _MINIMUM_DIST:
hub_reduced_dist += -min_dist

# Return Euclidean or squared Euclidean distances?
Expand Down
2 changes: 1 addition & 1 deletion kiez/hubness_reduction/local_scaling.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class LocalScaling(HubnessReduction):
----------
.. [1] Schnitzer, D., Flexer, A., Schedl, M., & Widmer, G. (2012).
Local and global scaling reduce hubs in space. The Journal of Machine
Learning Research, 13(1), 28712902.
Learning Research, 13(1), 2871-2902.
"""

def __init__(self, method: str = "standard", **kwargs):
Expand Down
5 changes: 3 additions & 2 deletions kiez/hubness_reduction/mutual_proximity.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class MutualProximity(HubnessReduction):
----------
.. [1] Schnitzer, D., Flexer, A., Schedl, M., & Widmer, G. (2012).
Local and global scaling reduce hubs in space. The Journal of Machine
Learning Research, 13(1), 28712902.
Learning Research, 13(1), 2871-2902.
"""

def __init__(self, method: str = "normal", **kwargs):
Expand All @@ -39,7 +39,8 @@ def __init__(self, method: str = "normal", **kwargs):
f'Mutual proximity method "{method}" not recognized. Try "normal"'
' or "empiric".'
)
elif method in ["exact", "empiric"]:

if method in ["exact", "empiric"]:
self.method = "empiric"
elif method in ["normal", "gaussi"]:
self.method = "normal"
Expand Down
4 changes: 2 additions & 2 deletions kiez/io/temp_file_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ def create_tempfile_preferably_in_dir(
"""
temp_file = mkstemp if persistent else NamedTemporaryFile
try:
handle = temp_file(suffix=suffix, prefix=prefix, dir=directory) # type: ignore
handle = temp_file(suffix=suffix, prefix=prefix, dir=directory) # type: ignore[operator]
warn = False
except FileNotFoundError:
handle = temp_file(suffix=suffix, prefix=prefix, dir=None) # type: ignore
handle = temp_file(suffix=suffix, prefix=prefix, dir=None) # type: ignore[operator]
warn = True

# Extract the path (as string)
Expand Down
3 changes: 2 additions & 1 deletion kiez/kiez.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ def __init__(
f"n_neighbors does not take {type(n_candidates)} value, enter"
" integer value"
)
elif n_candidates <= 0:

if n_candidates <= 0:
raise ValueError(f"Expected n_candidates > 0. Got {n_candidates}")
if algorithm_kwargs is None:
algorithm_kwargs = {"n_candidates": n_candidates}
Expand Down
7 changes: 3 additions & 4 deletions kiez/neighbors/approximate/faiss.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ class Faiss(NNAlgorithm):
For details about configuring faiss consult their wiki: https://github.com/facebookresearch/faiss/wiki
"""

valid_metrics = ["l2", "euclidean"]
valid_spaces = ["l2"]
valid_metrics = ("l2", "euclidean")
valid_spaces = "l2"

def __init__(
self,
Expand Down Expand Up @@ -117,5 +117,4 @@ def _kneighbors(self, k, query, index, return_distance, is_self_querying):
if self.metric == "euclidean":
dist = np.sqrt(dist)
return dist, ind
else:
return ind
return ind
15 changes: 8 additions & 7 deletions kiez/neighbors/approximate/nmslib.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
except ImportError: # pragma: no cover
nmslib = None

_VERBOSE_THRESH = 2


class NMSLIB(NNAlgorithm):
"""Wrapper for hierarchical navigable small world graphs based approximate nearest neighbor search implementation from NMSLIB.
Expand Down Expand Up @@ -44,22 +46,22 @@ class NMSLIB(NNAlgorithm):
See the nmslib documentation for more details: https://github.com/nmslib/nmslib/blob/master/manual/methods.md
"""

valid_metrics = [
valid_metrics = (
"euclidean",
"l2",
"minkowski",
"squared_euclidean",
"sqeuclidean",
"cosine",
"cosinesimil",
]
)

def __init__(
self,
n_candidates: int = 5,
metric: str = "euclidean",
method: str = "hnsw",
M: int = 16, # noqa: N803
M: int = 16,
post_processing: int = 2,
ef_construction: int = 200,
n_jobs: int = 1,
Expand Down Expand Up @@ -97,7 +99,7 @@ def __init__(
super().__init__(n_candidates=n_candidates, metric=metric, n_jobs=n_jobs)
self.verbose = verbose
self.method = method
self.M = M # noqa: N803
self.M = M
self.post_processing = post_processing
self.ef_construction = ef_construction

Expand Down Expand Up @@ -128,7 +130,7 @@ def _fit(self, data, is_source: bool):
"post": post_processing,
"indexThreadQty": self.n_jobs,
},
print_progress=(self.verbose >= 2),
print_progress=(self.verbose >= _VERBOSE_THRESH),
)
return hnsw_index

Expand Down Expand Up @@ -158,5 +160,4 @@ def _kneighbors(self, k, query, index, return_distance, is_self_querying):

if return_distance:
return neigh_dist, neigh_ind
else:
return neigh_ind
return neigh_ind
27 changes: 13 additions & 14 deletions kiez/neighbors/approximate/nng.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from __future__ import annotations

import logging
from types import MappingProxyType

import numpy as np
from tqdm.auto import tqdm
Expand Down Expand Up @@ -62,7 +63,7 @@ class NNG(NNAlgorithmWithJoblib):
when required.
"""

valid_metrics = [
valid_metrics = (
"manhattan",
"L1",
"euclidean",
Expand All @@ -75,13 +76,15 @@ class NNG(NNAlgorithmWithJoblib):
"Normalized Cosine",
"Hamming",
"Jaccard",
]
_internal_distance_type = {
"manhattan": "L1",
"euclidean": "L2",
"minkowski": "L2",
"sqeuclidean": "L2",
}
)
_internal_distance_type = MappingProxyType(
{
"manhattan": "L1",
"euclidean": "L2",
"minkowski": "L2",
"sqeuclidean": "L2",
}
)

def __init__(
self,
Expand Down Expand Up @@ -147,10 +150,7 @@ def _index_dir_plausibility_check(self):
)

def _fit(self, data, is_source: bool):
if is_source:
prefix = "kiez_source"
else:
prefix = "kiez_target"
prefix = "kiez_source" if is_source else "kiez_target"

index_path = None
# Set up a directory to save the index to
Expand Down Expand Up @@ -264,5 +264,4 @@ def _kneighbors_part(self, k, query, index, return_distance, is_self_querying):

if return_distance:
return neigh_dist, neigh_ind
else:
return neigh_ind
return neigh_ind
7 changes: 3 additions & 4 deletions kiez/neighbors/approximate/random_projection_trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,14 @@ class Annoy(NNAlgorithmWithJoblib):
See more details in the annoy documentation: https://github.com/spotify/annoy#full-python-api
"""

valid_metrics = [
valid_metrics = (
"angular",
"euclidean",
"manhattan",
"hamming",
"dot",
"minkowski",
]
)

def __init__(
self,
Expand Down Expand Up @@ -239,5 +239,4 @@ def _kneighbors_part(self, k, query, index, return_distance, is_self_querying):

if return_distance:
return neigh_dist, neigh_ind
else:
return neigh_ind
return neigh_ind
2 changes: 1 addition & 1 deletion kiez/neighbors/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@ def available_nn_algorithms() -> List[Type[NNAlgorithm]]:
try:
nn_algorithm_resolver.make(ann)
available.append(nn_algorithm_resolver.lookup(ann))
except ImportError:
except ImportError: # noqa: PERF203
pass
return available
22 changes: 9 additions & 13 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ def lint(session: Session) -> None:
"pre-commit",
"run",
"--all-files",
"--show-diff-on-failure",
"--hook-stage=manual",
*session.posargs,
)
Expand All @@ -118,18 +117,15 @@ def lint(session: Session) -> None:
@session()
def style_checking(session: Session) -> None:
args = session.posargs or locations
session.install(
"pyproject-flake8",
"flake8-eradicate",
"flake8-isort",
"flake8-debugger",
"flake8-comprehensions",
"flake8-print",
"flake8-black",
"flake8-bugbear",
"pydocstyle",
)
session.run("pflake8", *args)
session.install("ruff")
session.run("ruff", "check", *args)


@session()
def pedantic_checking(session: Session) -> None:
args = session.posargs or locations
session.install("ruff")
session.run("ruff", "check", '--extend-select="ARG,TID,PLR0913,PLR0912"', *args)


@session()
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ indent-width = 4
target-version = "py38"

[tool.ruff.lint]
ignore = ["E111", "E114", "E117", "E501", "D1", "D203", "D213", "D206", "D300", "Q000", "Q001", "Q002", "Q003", "COM812", "COM819", "ISC001","ISC002", "B905", "W191"]
select = ["B", "C", "E", "F", "W", "B", "I", "D", "UP", "A", "C4", "T10", "ICN", "PIE", "PYI", "PT"]
ignore = ["E111", "E114", "E117", "E501", "D1", "D203", "D213", "D206", "D300", "Q000", "Q001", "Q002", "Q003", "COM812", "COM819", "ISC001","ISC002", "B905", "W191", "PLR0913", "PLR0912"]
select = ["B", "C", "E", "F", "W", "B", "I", "D", "UP", "A", "C4", "T10", "ICN", "PIE", "PYI", "PT", "RET", "SIM", "ERA", "PD", "PGH", "PL", "NPY", "PERF", "RUF"]

[tool.ruff.lint.mccabe]
max-complexity = 18
Expand Down
Loading

0 comments on commit a4dc4c3

Please sign in to comment.