Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Experimental] Introduce ImplicitBPRWrapper model #232

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Unreleased

### Added
- `ImplicitBPRWrapperModel` model ([#232](https://github.com/MobileTeleSystems/RecTools/pull/232))

## [0.9.0] - 11.12.2024

Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ line-length = 120
target-version = ["py39", "py310", "py311", "py312"]


[tool.isort]
profile = "black"
chezou marked this conversation as resolved.
Show resolved Hide resolved

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
6 changes: 5 additions & 1 deletion rectools/metrics/auc.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@

from rectools import Columns
from rectools.metrics.base import outer_merge_reco
from rectools.metrics.debias import DebiasableMetrikAtK, calc_debiased_fit_task, debias_interactions
from rectools.metrics.debias import (
DebiasableMetrikAtK,
calc_debiased_fit_task,
debias_interactions,
)


class InsufficientHandling(str, Enum):
Expand Down
3 changes: 2 additions & 1 deletion rectools/metrics/intersection.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Dict, Hashable, Optional, Union
from collections.abc import Hashable
from typing import Dict, Optional, Union

import attr
import numpy as np
Expand Down
7 changes: 6 additions & 1 deletion rectools/metrics/ranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,12 @@
from rectools.metrics.base import merge_reco
from rectools.utils import log_at_base, select_by_type

from .debias import DebiasableMetrikAtK, calc_debiased_fit_task, debias_for_metric_configs, debias_interactions
from .debias import (
DebiasableMetrikAtK,
calc_debiased_fit_task,
debias_for_metric_configs,
debias_interactions,
)


@attr.s
Expand Down
6 changes: 5 additions & 1 deletion rectools/metrics/scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@

from .auc import AucMetric, calc_auc_metrics
from .base import Catalog, MetricAtK, merge_reco
from .classification import ClassificationMetric, SimpleClassificationMetric, calc_classification_metrics
from .classification import (
ClassificationMetric,
SimpleClassificationMetric,
calc_classification_metrics,
)
from .diversity import DiversityMetric, calc_diversity_metrics
from .dq import CrossDQMetric, RecoDQMetric, calc_cross_dq_metrics, calc_reco_dq_metrics
from .intersection import IntersectionMetric, calc_intersection_metrics
Expand Down
2 changes: 2 additions & 0 deletions rectools/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@

from .ease import EASEModel
from .implicit_als import ImplicitALSWrapperModel
from .implicit_bpr import ImplicitBPRWrapperModel
from .implicit_knn import ImplicitItemKNNWrapperModel
from .popular import PopularModel
from .popular_in_category import PopularInCategoryModel
Expand All @@ -59,6 +60,7 @@
__all__ = (
"EASEModel",
"ImplicitALSWrapperModel",
"ImplicitBPRWrapperModel",
"ImplicitItemKNNWrapperModel",
"LightFMWrapperModel",
"PopularModel",
Expand Down
6 changes: 5 additions & 1 deletion rectools/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@
from rectools.exceptions import NotFittedError
from rectools.types import ExternalIdsArray, InternalIdsArray
from rectools.utils.config import BaseConfig
from rectools.utils.misc import get_class_or_function_full_path, import_object, make_dict_flat
from rectools.utils.misc import (
get_class_or_function_full_path,
import_object,
make_dict_flat,
)
from rectools.utils.serialization import PICKLE_PROTOCOL, FileLike, read_bytes

T = tp.TypeVar("T", bound="ModelBase")
Expand Down
8 changes: 2 additions & 6 deletions rectools/models/implicit_als.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@
from implicit.cpu.als import AlternatingLeastSquares as CPUAlternatingLeastSquares
from implicit.gpu.als import AlternatingLeastSquares as GPUAlternatingLeastSquares
from implicit.utils import check_random_state
from pydantic import BeforeValidator, ConfigDict, PlainSerializer, SerializationInfo, WrapSerializer
from pydantic import BeforeValidator, ConfigDict, SerializationInfo, WrapSerializer
from scipy import sparse
from tqdm.auto import tqdm

from rectools.dataset import Dataset, Features
from rectools.exceptions import NotFittedError
from rectools.models.base import ModelConfig
from rectools.utils.misc import get_class_or_function_full_path, import_object
from rectools.utils.serialization import RandomState
from rectools.utils.serialization import DType, RandomState

from .rank import Distance
from .vector import Factors, VectorModel
Expand Down Expand Up @@ -68,10 +68,6 @@ def _serialize_alternating_least_squares_class(
),
]

DType = tpe.Annotated[
np.dtype, BeforeValidator(func=np.dtype), PlainSerializer(func=lambda dtp: dtp.name, when_used="json")
]


class AlternatingLeastSquaresConfig(tpe.TypedDict):
"""Config for implicit `AlternatingLeastSquares` model."""
Expand Down
228 changes: 228 additions & 0 deletions rectools/models/implicit_bpr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
import typing as tp
from copy import deepcopy

import numpy as np
import typing_extensions as tpe
from implicit.bpr import BayesianPersonalizedRanking

# pylint: disable=no-name-in-module
from implicit.cpu.bpr import (
BayesianPersonalizedRanking as CPUBayesianPersonalizedRanking,
)
from implicit.gpu.bpr import (
BayesianPersonalizedRanking as GPUBayesianPersonalizedRanking,
)

# pylint: enable=no-name-in-module
from pydantic import BeforeValidator, ConfigDict, SerializationInfo, WrapSerializer

from rectools.dataset.dataset import Dataset
from rectools.exceptions import NotFittedError
from rectools.models.base import ModelConfig
from rectools.models.rank import Distance
from rectools.models.vector import Factors, VectorModel
from rectools.utils.misc import get_class_or_function_full_path, import_object
from rectools.utils.serialization import DType, RandomState

BPR_STRING = "BayesianPersonalizedRanking"

AnyBayesianPersonalizedRanking = tp.Union[CPUBayesianPersonalizedRanking, GPUBayesianPersonalizedRanking]
BayesianPersonalizedRankingType = tp.Union[
tp.Type[AnyBayesianPersonalizedRanking], tp.Literal["BayesianPersonalizedRanking"]
]


def _get_bpr_class(spec: tp.Any) -> tp.Any:
if spec in (BPR_STRING, get_class_or_function_full_path(BayesianPersonalizedRanking)):
return "BayesianPersonalizedRanking"
if isinstance(spec, str):
return import_object(spec)
return spec


def _serialize_bpr_class(
cls: BayesianPersonalizedRankingType, handler: tp.Callable, info: SerializationInfo
) -> tp.Union[None, str, AnyBayesianPersonalizedRanking]:
if cls in (CPUBayesianPersonalizedRanking, GPUBayesianPersonalizedRanking) or cls == "BayesianPersonalizedRanking":
return BPR_STRING
if info.mode == "json":
return get_class_or_function_full_path(cls)
return cls


BayesianPersonalizedRankingClass = tpe.Annotated[
BayesianPersonalizedRankingType,
BeforeValidator(_get_bpr_class),
WrapSerializer(
func=_serialize_bpr_class,
when_used="always",
),
]


class BayesianPersonalizedRankingConfig(tpe.TypedDict):
"""Config for implicit `BayesianPersonalizedRanking` model."""

cls: tpe.NotRequired[BayesianPersonalizedRankingClass]
factors: tpe.NotRequired[int]
learning_rate: tpe.NotRequired[float]
regularization: tpe.NotRequired[float]
dtype: tpe.NotRequired[DType]
num_threads: tpe.NotRequired[int]
iterations: tpe.NotRequired[int]
verify_negative_samples: tpe.NotRequired[bool]
random_state: tpe.NotRequired[RandomState]
use_gpu: tpe.NotRequired[bool]


class ImplicitBPRWrapperModelConfig(ModelConfig):
"""Config for `ImplicitBPRWrapperModel`"""

model_config = ConfigDict(arbitrary_types_allowed=True)

model: BayesianPersonalizedRankingConfig


class ImplicitBPRWrapperModel(VectorModel[ImplicitBPRWrapperModelConfig]):
"""
Wrapper for `implicit.bpr.BayesianPersonalizedRanking` model.

See https://implicit.readthedocs.io/en/latest/bpr.html for details of the base model.

Parameters
----------
model : BayesianPersonalizedRanking
Baes model to wrap.
verbose : int, default ``0``
Degree of verbose output. If ``0``, no output will be provided.
"""

recommends_for_warm = False
recommends_for_cold = False

u2i_dist = Distance.DOT
i2i_dist = Distance.COSINE

config_class = ImplicitBPRWrapperModelConfig

def __init__(self, model: AnyBayesianPersonalizedRanking, verbose: int = 0):
self._config = self._make_config(model, verbose)
super().__init__(verbose=verbose)
self.model: AnyBayesianPersonalizedRanking
self._model = model # for refit

self.use_gpu = isinstance(model, GPUBayesianPersonalizedRanking)
if not self.use_gpu:
self.n_threads = model.num_threads

@classmethod
def _make_config(cls, model: AnyBayesianPersonalizedRanking, verbose: int) -> ImplicitBPRWrapperModelConfig:
model_cls = (
model.__class__
if model.__class__ not in (CPUBayesianPersonalizedRanking, GPUBayesianPersonalizedRanking)
else "BayesianPersonalizedRanking"
)

inner_model_config = {
"cls": model_cls,
"factors": model.factors,
"learning_rate": model.learning_rate,
"dtype": None,
"regularization": model.regularization,
"iterations": model.iterations,
"verify_negative_samples": model.verify_negative_samples,
"random_state": model.random_state,
}
if isinstance(model, GPUBayesianPersonalizedRanking): # pragma: no cover
inner_model_config["use_gpu"] = True
else:
inner_model_config.update(
{
"use_gpu": False,
"dtype": model.dtype,
"num_threads": model.num_threads,
}
)

return ImplicitBPRWrapperModelConfig(
cls=cls,
model=tp.cast(BayesianPersonalizedRankingConfig, inner_model_config),
verbose=verbose,
)

def _get_config(self) -> ImplicitBPRWrapperModelConfig:
return self._config

@classmethod
def _from_config(cls, config: ImplicitBPRWrapperModelConfig) -> tpe.Self:
inner_model_params = deepcopy(config.model)
inner_model_cls = inner_model_params.pop("cls", BayesianPersonalizedRanking)
inner_model_cls = tp.cast(tp.Callable, inner_model_cls)
if inner_model_cls == BPR_STRING:
inner_model_cls = BayesianPersonalizedRanking
model = inner_model_cls(**inner_model_params)
return cls(model=model, verbose=config.verbose)

def _fit(self, dataset: Dataset) -> None:
self.model = deepcopy(self._model)

ui_csr = dataset.get_user_item_matrix(include_weights=True).astype(np.float32)
self.model.fit(ui_csr, show_progress=self.verbose > 0)

def _get_users_factors(self, dataset: Dataset) -> Factors:
return Factors(get_users_vectors(self.model))

def _get_items_factors(self, dataset: Dataset) -> Factors:
return Factors(get_items_vectors(self.model))

def get_vectors(self) -> tp.Tuple[np.ndarray, np.ndarray]:
"""
Return user and item vector representation from fitted model.

Returns
-------
(np.ndarray, np.ndarray)
User and item vectors.
Shapes are (n_users, n_factors) and (n_items, n_factors).
"""
if not self.is_fitted:
raise NotFittedError(self.__class__.__name__)
return get_users_vectors(self.model), get_items_vectors(self.model)


def get_users_vectors(model: AnyBayesianPersonalizedRanking) -> np.ndarray:
"""
Get user vectors from BPR model as a numpy array.

Parameters
----------
model : BayesianPersonalizedRanking
Fitted BPR model. Can be CPU or GPU model

Returns
-------
np.ndarray
User vectors.
"""
if isinstance(model, GPUBayesianPersonalizedRanking): # pragma: no cover
return model.user_factors.to_numpy()
return model.user_factors


def get_items_vectors(model: AnyBayesianPersonalizedRanking) -> np.ndarray:
"""
Get item vectors from BPR model as a numpy array.

Parameters
----------
model : BayesianPersonalizedRanking
Fitted BPR model. Can be CPU or GPU model

Returns
-------
np.ndarray
Item vectors.
"""
if isinstance(model, GPUBayesianPersonalizedRanking): # pragma: no cover
return model.item_factors.to_numpy()
return model.item_factors
7 changes: 6 additions & 1 deletion rectools/models/implicit_knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@
import implicit.nearest_neighbours
import numpy as np
import typing_extensions as tpe
from implicit.nearest_neighbours import BM25Recommender, CosineRecommender, ItemItemRecommender, TFIDFRecommender
from implicit.nearest_neighbours import (
BM25Recommender,
CosineRecommender,
ItemItemRecommender,
TFIDFRecommender,
)
from implicit.utils import ParameterWarning
from pydantic import BeforeValidator, ConfigDict, PlainSerializer
from scipy import sparse
Expand Down
8 changes: 7 additions & 1 deletion rectools/models/popular_in_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,13 @@
from rectools.types import InternalIdsArray

from .base import ModelBase, Scores
from .popular import FixedColdRecoModelMixin, PopularModel, PopularModelConfig, PopularModelMixin, PopularityOptions
from .popular import (
FixedColdRecoModelMixin,
PopularityOptions,
PopularModel,
PopularModelConfig,
PopularModelMixin,
)


class MixingStrategy(Enum):
Expand Down
4 changes: 3 additions & 1 deletion rectools/models/rank.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
import implicit.cpu
import implicit.gpu
import numpy as np
from implicit.cpu.matrix_factorization_base import _filter_items_from_sparse_matrix as filter_items_from_sparse_matrix
from implicit.cpu.matrix_factorization_base import (
_filter_items_from_sparse_matrix as filter_items_from_sparse_matrix,
)
from implicit.gpu import HAS_CUDA
from scipy import sparse

Expand Down
Loading