Skip to content

Commit

Permalink
Dependency upgrades; disabling snorkel-related tests till the functio…
Browse files Browse the repository at this point in the history
…nality is renamed and reworked, since snorkel is inactive
  • Loading branch information
phurwicz committed Dec 26, 2024
1 parent 453fd72 commit 9f87a29
Show file tree
Hide file tree
Showing 15 changed files with 342 additions and 318 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/cross-os-source-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ['3.8', '3.9', '3.10']
python-version: ['3.10', '3.11', '3.12']
os: [ubuntu-latest, macos-latest, windows-latest]

steps:
Expand Down
18 changes: 9 additions & 9 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.1.0
rev: v5.0.0
hooks:
- id: check-yaml
exclude: |
Expand All @@ -10,11 +10,11 @@ repos:
)$
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 22.1.0
hooks:
- id: black
- repo: https://gitlab.com/pycqa/flake8
rev: 3.9.2
hooks:
- id: flake8
# - repo: https://github.com/psf/black
# rev: 24.10.0
# hooks:
# - id: black
# - repo: https://github.com/PyCQA/flake8
# rev: 7.1.1
# hooks:
# - id: flake8
10 changes: 7 additions & 3 deletions hover/core/explorer/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -819,6 +819,7 @@ def _subroutine_link_selection_options(self, other):
| :------ | :------ | :----------------------------- |
| `other` | `BokehBaseExplorer` | the other explorer |
"""

# link selection option values
def option_lr(attr, old, new):
other.selection_option_box.active = self.selection_option_box.active
Expand Down Expand Up @@ -882,15 +883,18 @@ def find_embedding_fields(self):
), f"Expected at least two embedding columns, found {embedding_cols}"
return embedding_cols

def auto_color_mapping(self):
def auto_color_mapping(self, additional_label_columns=tuple()):
"""
???+ note "Find all labels and an appropriate color for each."
"""
from hover.utils.bokeh_helper import auto_label_color

labels = set()
for _key in self.dfs.keys():
labels = labels.union(set(DataFrame.series_values(self.dfs[_key]["label"])))
for _col in ["label", *additional_label_columns]:
for _key in self.dfs.keys():
labels = labels.union(
set(DataFrame.series_values(self.dfs[_key][_col]))
)

return auto_label_color(labels)

Expand Down
39 changes: 21 additions & 18 deletions hover/core/explorer/functionality.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def plot(self):
"""
xy_axes = self.find_embedding_fields()[:2]
for _key, _source in self.sources.items():
self.figure.circle(
self.figure.scatter(
*xy_axes, name=_key, source=_source, **self.glyph_kwargs[_key]
)
self._good(f"Plotted subset {_key} with {self.dfs[_key].shape[0]} points")
Expand Down Expand Up @@ -213,7 +213,7 @@ def plot(self):
"""
xy_axes = self.find_embedding_fields()[:2]
for _key, _source in self.sources.items():
self.figure.circle(
self.figure.scatter(
*xy_axes,
name=_key,
color=SOURCE_COLOR_FIELD,
Expand Down Expand Up @@ -300,7 +300,7 @@ def _postprocess_sources(self):
???+ note "Infer glyph colors from the label dynamically."
"""
# infer glyph color from labels
color_dict = self.auto_color_mapping()
color_dict = self.auto_color_mapping(additional_label_columns=[self.label_col])

# infer glyph alpha from pseudo-percentile of soft label scores
scores = np.concatenate(
Expand Down Expand Up @@ -407,7 +407,7 @@ def plot(self, **kwargs):
eff_kwargs.update(preset_kwargs)
eff_kwargs.update(kwargs)

self.figure.circle(*xy_axes, name=_key, source=_source, **eff_kwargs)
self.figure.scatter(*xy_axes, name=_key, source=_source, **eff_kwargs)
self._good(f"Plotted subset {_key} with {self.dfs[_key].shape[0]} points")


Expand Down Expand Up @@ -490,26 +490,26 @@ def plot(self, label, **kwargs):
col_b_pos = np.where(mask_b)[0].tolist()
col_b_neg = np.where(np.logical_not(mask_b))[0].tolist()
agreement_view = CDSView(
source=_source, filters=[IndexFilter(col_a_pos), IndexFilter(col_b_pos)]
filter=(IndexFilter(col_a_pos) & IndexFilter(col_b_pos))
)
increment_view = CDSView(
source=_source, filters=[IndexFilter(col_a_neg), IndexFilter(col_b_pos)]
filter=(IndexFilter(col_a_neg) & IndexFilter(col_b_pos))
)
decrement_view = CDSView(
source=_source, filters=[IndexFilter(col_a_pos), IndexFilter(col_b_neg)]
filter=(IndexFilter(col_a_pos) & IndexFilter(col_b_neg))
)

to_plot = [
{"view": agreement_view, "marker": self.figure.square},
{"view": increment_view, "marker": self.figure.x},
{"view": decrement_view, "marker": self.figure.cross},
{"view": agreement_view, "marker": "square"},
{"view": increment_view, "marker": "x"},
{"view": decrement_view, "marker": "cross"},
]

# plot created subsets
for _dict in to_plot:
_view = _dict["view"]
_marker = _dict["marker"]
_marker(*xy_axes, name=_key, source=_source, view=_view, **eff_kwargs)
self.figure.scatter(
*xy_axes, name=_key, source=_source, **_dict, **eff_kwargs
)


class BokehSnorkelExplorer(BokehBaseExplorer):
Expand Down Expand Up @@ -726,7 +726,7 @@ def plot(self, *args, **kwargs):
???+ note "Plot the raw subset in the background."
"""
xy_axes = self.find_embedding_fields()[:2]
self.figure.circle(
self.figure.scatter(
*xy_axes, name="raw", source=self.sources["raw"], **self.glyph_kwargs["raw"]
)
self._good(f"Plotted subset raw with {self.dfs['raw'].shape[0]} points")
Expand Down Expand Up @@ -876,7 +876,8 @@ def plot_new_lf(
# add correct/incorrect/missed/hit glyphs
if "C" in include:
view = self._view_correct(L_labeled)
data_dict["glyphs"]["C"] = self.figure.square(
data_dict["glyphs"]["C"] = self.figure.scatter(
marker="square",
*xy_axes,
source=self.sources["labeled"],
view=view,
Expand All @@ -886,7 +887,8 @@ def plot_new_lf(
)
if "I" in include:
view = self._view_incorrect(L_labeled)
data_dict["glyphs"]["I"] = self.figure.x(
data_dict["glyphs"]["I"] = self.figure.scatter(
marker="x",
*xy_axes,
source=self.sources["labeled"],
view=view,
Expand All @@ -896,7 +898,8 @@ def plot_new_lf(
)
if "M" in include:
view = self._view_missed(L_labeled, lf.targets)
data_dict["glyphs"]["M"] = self.figure.cross(
data_dict["glyphs"]["M"] = self.figure.scatter(
marker="cross",
*xy_axes,
source=self.sources["labeled"],
view=view,
Expand All @@ -906,7 +909,7 @@ def plot_new_lf(
)
if "H" in include:
view = self._view_hit(L_raw)
data_dict["glyphs"]["H"] = self.figure.circle(
data_dict["glyphs"]["H"] = self.figure.scatter(
*xy_axes,
source=self.sources["raw"],
view=view,
Expand Down
9 changes: 6 additions & 3 deletions hover/utils/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ def column_map(self, column, mapping, indices=None, as_column=None, output="nump
example_value = list(mapping.values())[0]
dtype = self._get_return_type(example_value)
if self.shape[0] > 0:
series = subject[column].map_dict(mapping, return_dtype=dtype)
series = subject[column].replace_strict(mapping, return_dtype=dtype)
else:
series = pl.Series([], dtype=dtype)
return self._post_apply(series, as_column, output)
Expand All @@ -543,7 +543,8 @@ def column_apply(
if self.shape[0] > 0:
example_value = function(self.get_cell_by_row_column(0, column))
dtype = self._get_return_type(example_value)
series = subject[column].apply(function, return_dtype=dtype)
series = subject[column].map_elements(function, return_dtype=dtype)
# series = subject[column].apply(function, return_dtype=dtype)
else:
series = pl.Series([])
return self._post_apply(series, as_column, output)
Expand All @@ -568,7 +569,9 @@ def row_apply(self, function, indices=None, as_column=None, output="numpy"):

# create the function to be applied
to_apply = (
pl.struct(self._df.columns).apply(function, return_dtype=dtype).alias(col)
pl.struct(self._df.columns)
.map_elements(function, return_dtype=dtype)
.alias(col)
)
# apply the function
if as_column is None:
Expand Down
117 changes: 59 additions & 58 deletions hover/utils/snorkel_helper.py
Original file line number Diff line number Diff line change
@@ -1,58 +1,59 @@
import uuid


def labeling_function(targets, label_encoder=None, **kwargs):
"""
???+ note "Hover's flavor of the Snorkel labeling_function decorator."
However, due to the dynamic label encoding nature of hover,
the decorated function should return the original string label, not its encoding integer.
- assigns a UUID for easy identification
- keeps track of LF targets
| Param | Type | Description |
| :-------------- | :----- | :----------------------------------- |
| `targets` | `list` of `str` | labels that the labeling function is intended to create |
| `label_encoder` | `dict` | {decoded_label -> encoded_label} mapping, if you also want an original snorkel-style labeling function linked as a `.snorkel` attribute |
| `**kwargs` | | forwarded to `snorkel`'s `labeling_function()` |
"""
# lazy import so that the package does not require snorkel
# Feb 3, 2022: snorkel's dependency handling is too strict
# for other dependencies like NumPy, SciPy, SpaCy, etc.
# Let's cite Snorkel and lazy import or copy functions.
# DO NOT explicitly depend on Snorkel without confirming
# that all builds/tests pass by Anaconda standards, else
# we risk having to drop conda support.
from snorkel.labeling import (
labeling_function as snorkel_lf,
LabelingFunction as SnorkelLF,
)

def wrapper(func):
# set up kwargs for Snorkel's LF
# a default name that can be overridden
snorkel_kwargs = {"name": func.__name__}
snorkel_kwargs.update(kwargs)

# return value of hover's decorator
lf = SnorkelLF(f=func, **snorkel_kwargs)

# additional attributes
lf.uuid = uuid.uuid1()
lf.targets = targets[:]

# link a snorkel-style labeling function if applicable
if label_encoder:
lf.label_encoder = label_encoder

def snorkel_style_func(x):
return lf.label_encoder[func(x)]

lf.snorkel = snorkel_lf(**kwargs)(snorkel_style_func)
else:
lf.label_encoder = None
lf.snorkel = None

return lf

return wrapper
# import uuid
#
#
# def labeling_function(targets, label_encoder=None, **kwargs):
# """
# ???+ note "Hover's flavor of the Snorkel labeling_function decorator."
# However, due to the dynamic label encoding nature of hover,
# the decorated function should return the original string label, not its encoding integer.
#
# - assigns a UUID for easy identification
# - keeps track of LF targets
#
# | Param | Type | Description |
# | :-------------- | :----- | :----------------------------------- |
# | `targets` | `list` of `str` | labels that the labeling function is intended to create |
# | `label_encoder` | `dict` | {decoded_label -> encoded_label} mapping, if you also want an original snorkel-style labeling function linked as a `.snorkel` attribute |
# | `**kwargs` | | forwarded to `snorkel`'s `labeling_function()` |
# """
# # lazy import so that the package does not require snorkel
# # Feb 3, 2022: snorkel's dependency handling is too strict
# # for other dependencies like NumPy, SciPy, SpaCy, etc.
# # Let's cite Snorkel and lazy import or copy functions.
# # DO NOT explicitly depend on Snorkel without confirming
# # that all builds/tests pass by Anaconda standards, else
# # we risk having to drop conda support.
# from snorkel.labeling import (
# labeling_function as snorkel_lf,
# LabelingFunction as SnorkelLF,
# )
#
# def wrapper(func):
# # set up kwargs for Snorkel's LF
# # a default name that can be overridden
# snorkel_kwargs = {"name": func.__name__}
# snorkel_kwargs.update(kwargs)
#
# # return value of hover's decorator
# lf = SnorkelLF(f=func, **snorkel_kwargs)
#
# # additional attributes
# lf.uuid = uuid.uuid1()
# lf.targets = targets[:]
#
# # link a snorkel-style labeling function if applicable
# if label_encoder:
# lf.label_encoder = label_encoder
#
# def snorkel_style_func(x):
# return lf.label_encoder[func(x)]
#
# lf.snorkel = snorkel_lf(**kwargs)(snorkel_style_func)
# else:
# lf.label_encoder = None
# lf.snorkel = None
#
# return lf
#
# return wrapper
#
23 changes: 12 additions & 11 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def get_description():

setuptools.setup(
name="hover",
version="0.9.0",
version="0.10.0",
description="Label data at scale. Fun and precision included.",
long_description=get_description(),
long_description_content_type="text/markdown",
Expand All @@ -24,32 +24,33 @@ def get_description():
install_requires=[
# python-version-specific example: "numpy>=1.14,<=1.21.5;python_version<'3.8.0'",
# interactive/static visualization
"bokeh>=3.0.3",
"bokeh>=3.4.3",
# preprocessors
"scikit-learn>=0.20.0",
"scikit-learn>=1.4.0",
# neural stuff
"torch>=1.10.0",
"torch>=2.0.0",
# data handling
"pandas>=1.3.0",
"polars>=0.17.0",
"pyarrow>=11.0.0",
"numpy>=1.22",
"pandas>=2.0.0",
"polars>=1.10.0",
"pyarrow>=18.0.0",
# "numpy>=2.0.0",
"numpy>=1.25.0,<2.0.0",
# computations
"scipy>=1.3.2",
"scipy>=1.10.0",
# utilities
"tqdm>=4.0",
"rich>=11.0.0",
"deprecated>=1.1.0",
# dimensionality reduction: UMAP is included
"umap-learn>=0.3.10",
"umap-learn>=0.5.0",
# module config customization
"flexmod>=0.1.2",
# optional: more dimensionality reduction methods
# "ivis[cpu]>=1.7",
# optional: distant supervision
# "snorkel>=0.9.8",
],
python_requires=">=3.8",
python_requires=">=3.10",
classifiers=[
"Programming Language :: Python :: 3",
"Development Status :: 4 - Beta",
Expand Down
Loading

0 comments on commit 9f87a29

Please sign in to comment.