Skip to content

Commit

Permalink
refactor(datasets): deprecate "DataSet" type names (svmlight)
Browse files Browse the repository at this point in the history
Signed-off-by: Deepyaman Datta <[email protected]>
  • Loading branch information
deepyaman committed Sep 18, 2023
1 parent 6eac94c commit fc2dff7
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 86 deletions.
1 change: 1 addition & 0 deletions kedro-datasets/docs/source/kedro_datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ kedro_datasets
kedro_datasets.spark.SparkStreamingDataSet
kedro_datasets.spark.SparkStreamingDataset
kedro_datasets.svmlight.SVMLightDataSet
kedro_datasets.svmlight.SVMLightDataset
kedro_datasets.tensorflow.TensorFlowModelDataSet
kedro_datasets.text.TextDataSet
kedro_datasets.tracking.JSONDataSet
Expand Down
11 changes: 7 additions & 4 deletions kedro-datasets/kedro_datasets/svmlight/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
"""``AbstractDataSet`` implementation to load/save data from/to a svmlight/
libsvm sparse data file."""
"""``AbstractDataset`` implementation to load/save data from/to a
svmlight/libsvm sparse data file."""
from __future__ import annotations

from typing import Any

import lazy_loader as lazy

# https://github.com/pylint-dev/pylint/issues/4300#issuecomment-1043601901
SVMLightDataSet: Any
SVMLightDataSet: type[SVMLightDataset]
SVMLightDataset: Any

__getattr__, __dir__, __all__ = lazy.attach(
__name__, submod_attrs={"svmlight_dataset": ["SVMLightDataSet"]}
__name__, submod_attrs={"svmlight_dataset": ["SVMLightDataSet", "SVMLightDataset"]}
)
44 changes: 31 additions & 13 deletions kedro-datasets/kedro_datasets/svmlight/svmlight_dataset.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""``SVMLightDataSet`` loads/saves data from/to a svmlight/libsvm file using an
"""``SVMLightDataset`` loads/saves data from/to a svmlight/libsvm file using an
underlying filesystem (e.g.: local, S3, GCS). It uses sklearn functions
``dump_svmlight_file`` to save and ``load_svmlight_file`` to load a file.
"""
import warnings
from copy import deepcopy
from pathlib import PurePosixPath
from typing import Any, Dict, Optional, Tuple, Union
Expand All @@ -12,8 +13,7 @@
from scipy.sparse.csr import csr_matrix
from sklearn.datasets import dump_svmlight_file, load_svmlight_file

from .._io import AbstractVersionedDataset as AbstractVersionedDataSet
from .._io import DatasetError as DataSetError
from kedro_datasets._io import AbstractVersionedDataset, DatasetError

# NOTE: kedro.extras.datasets will be removed in Kedro 0.19.0.
# Any contribution to datasets should be made in kedro-datasets
Expand All @@ -25,8 +25,8 @@
_DO = Tuple[csr_matrix, ndarray]


class SVMLightDataSet(AbstractVersionedDataSet[_DI, _DO]):
"""``SVMLightDataSet`` loads/saves data from/to a svmlight/libsvm file using an
class SVMLightDataset(AbstractVersionedDataset[_DI, _DO]):
"""``SVMLightDataset`` loads/saves data from/to a svmlight/libsvm file using an
underlying filesystem (e.g.: local, S3, GCS). It uses sklearn functions
``dump_svmlight_file`` to save and ``load_svmlight_file`` to load a file.
Expand All @@ -46,15 +46,15 @@ class SVMLightDataSet(AbstractVersionedDataSet[_DI, _DO]):
.. code-block:: yaml
svm_dataset:
type: svmlight.SVMLightDataSet
type: svmlight.SVMLightDataset
filepath: data/01_raw/location.svm
load_args:
zero_based: False
save_args:
zero_based: False
cars:
type: svmlight.SVMLightDataSet
type: svmlight.SVMLightDataset
filepath: gcs://your_bucket/cars.svm
fs_args:
project: my-project
Expand All @@ -69,15 +69,15 @@ class SVMLightDataSet(AbstractVersionedDataSet[_DI, _DO]):
advanced_data_catalog_usage.html>`_:
::
>>> from kedro_datasets.svmlight import SVMLightDataSet
>>> from kedro_datasets.svmlight import SVMLightDataset
>>> import numpy as np
>>>
>>> # Features and labels.
>>> data = (np.array([[0, 1], [2, 3.14159]]), np.array([7, 3]))
>>>
>>> data_set = SVMLightDataSet(filepath="test.svm")
>>> data_set.save(data)
>>> reloaded_features, reloaded_labels = data_set.load()
>>> dataset = SVMLightDataset(filepath="test.svm")
>>> dataset.save(data)
>>> reloaded_features, reloaded_labels = dataset.load()
>>> assert (data[0] == reloaded_features).all()
>>> assert (data[1] == reloaded_labels).all()
Expand All @@ -97,7 +97,7 @@ def __init__(
fs_args: Dict[str, Any] = None,
metadata: Dict[str, Any] = None,
) -> None:
"""Creates a new instance of SVMLightDataSet to load/save data from a svmlight/libsvm file.
"""Creates a new instance of SVMLightDataset to load/save data from a svmlight/libsvm file.
Args:
filepath: Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.
Expand Down Expand Up @@ -177,7 +177,7 @@ def _save(self, data: _DI) -> None:
def _exists(self) -> bool:
try:
load_path = get_filepath_str(self._get_load_path(), self._protocol)
except DataSetError:
except DatasetError:
return False

return self._fs.exists(load_path)
Expand All @@ -190,3 +190,21 @@ def _invalidate_cache(self) -> None:
"""Invalidate underlying filesystem caches."""
filepath = get_filepath_str(self._filepath, self._protocol)
self._fs.invalidate_cache(filepath)


_DEPRECATED_CLASSES = {
"SVMLightDataSet": SVMLightDataset,
}


def __getattr__(name):
if name in _DEPRECATED_CLASSES:
alias = _DEPRECATED_CLASSES[name]
warnings.warn(
f"{repr(name)} has been renamed to {repr(alias.__name__)}, "
f"and the alias will be removed in Kedro-Datasets 2.0.0",
DeprecationWarning,
stacklevel=2,
)
return alias
raise AttributeError(f"module {repr(__name__)} has no attribute {repr(name)}")
File renamed without changes.
Loading

0 comments on commit fc2dff7

Please sign in to comment.