diff --git a/kedro-datasets/docs/source/kedro_datasets.rst b/kedro-datasets/docs/source/kedro_datasets.rst index d1e06429c..4b90a316b 100644 --- a/kedro-datasets/docs/source/kedro_datasets.rst +++ b/kedro-datasets/docs/source/kedro_datasets.rst @@ -12,6 +12,7 @@ kedro_datasets :template: autosummary/class.rst kedro_datasets.api.APIDataSet + kedro_datasets.api.APIDataset kedro_datasets.biosequence.BioSequenceDataSet kedro_datasets.dask.ParquetDataSet kedro_datasets.databricks.ManagedTableDataSet diff --git a/kedro-datasets/kedro_datasets/api/__init__.py b/kedro-datasets/kedro_datasets/api/__init__.py index 5910d7916..d59fe67e0 100644 --- a/kedro-datasets/kedro_datasets/api/__init__.py +++ b/kedro-datasets/kedro_datasets/api/__init__.py @@ -1,14 +1,17 @@ -"""``APIDataSet`` loads the data from HTTP(S) APIs +"""``APIDataset`` loads the data from HTTP(S) APIs and returns them into either as string or json Dict. It uses the python requests library: https://requests.readthedocs.io/en/latest/ """ +from __future__ import annotations + from typing import Any import lazy_loader as lazy # https://github.com/pylint-dev/pylint/issues/4300#issuecomment-1043601901 -APIDataSet: Any +APIDataSet: type[APIDataset] +APIDataset: Any __getattr__, __dir__, __all__ = lazy.attach( - __name__, submod_attrs={"api_dataset": ["APIDataSet"]} + __name__, submod_attrs={"api_dataset": ["APIDataSet", "APIDataset"]} ) diff --git a/kedro-datasets/kedro_datasets/api/api_dataset.py b/kedro-datasets/kedro_datasets/api/api_dataset.py index def66a7f4..2c5a098ab 100644 --- a/kedro-datasets/kedro_datasets/api/api_dataset.py +++ b/kedro-datasets/kedro_datasets/api/api_dataset.py @@ -1,7 +1,8 @@ -"""``APIDataSet`` loads the data from HTTP(S) APIs. +"""``APIDataset`` loads the data from HTTP(S) APIs. It uses the python requests library: https://requests.readthedocs.io/en/latest/ """ import json as json_ # make pylint happy +import warnings from copy import deepcopy from typing import Any, Dict, List, Tuple, Union @@ -13,8 +14,8 @@ from .._io import DatasetError as DataSetError -class APIDataSet(AbstractDataSet[None, requests.Response]): - """``APIDataSet`` loads/saves data from/to HTTP(S) APIs. +class APIDataset(AbstractDataSet[None, requests.Response]): + """``APIDataset`` loads/saves data from/to HTTP(S) APIs. It uses the python requests library: https://requests.readthedocs.io/en/latest/ Example usage for the `YAML API `_: :: - >>> from kedro_datasets.api import APIDataSet + >>> from kedro_datasets.api import APIDataset >>> >>> - >>> data_set = APIDataSet( + >>> data_set = APIDataset( >>> url="https://quickstats.nass.usda.gov", >>> load_args={ >>> "params": { @@ -55,12 +56,12 @@ class APIDataSet(AbstractDataSet[None, requests.Response]): >>> ) >>> data = data_set.load() - ``APIDataSet`` can also be used to save output on a remote server using HTTP(S) + ``APIDataset`` can also be used to save output on a remote server using HTTP(S) methods. :: >>> example_table = '{"col1":["val1", "val2"], "col2":["val3", "val4"]}' - >>> data_set = APIDataSet( + >>> data_set = APIDataset( method = "POST", url = "url_of_remote_server", save_args = {"chunk_size":1} @@ -74,7 +75,7 @@ class APIDataSet(AbstractDataSet[None, requests.Response]): used if the input of save method is a list. It will divide the request into chunks of size `chunk_size`. For example, here we will send two requests each containing one row of our example DataFrame. - If the data passed to the save method is not a list, ``APIDataSet`` will check if it + If the data passed to the save method is not a list, ``APIDataset`` will check if it can be loaded as JSON. If true, it will send the data unchanged in a single request. Otherwise, the ``_save`` method will try to dump the data in JSON format and execute the request. @@ -99,7 +100,7 @@ def __init__( credentials: Union[Tuple[str, str], List[str], AuthBase] = None, metadata: Dict[str, Any] = None, ) -> None: - """Creates a new instance of ``APIDataSet`` to fetch data from an API endpoint. + """Creates a new instance of ``APIDataset`` to fetch data from an API endpoint. Args: url: The API URL endpoint. @@ -233,3 +234,21 @@ def _exists(self) -> bool: with sessions.Session() as session: response = self._execute_request(session) return response.ok + + +_DEPRECATED_CLASSES = { + "APIDataSet": APIDataset, +} + + +def __getattr__(name): + if name in _DEPRECATED_CLASSES: + alias = _DEPRECATED_CLASSES[name] + warnings.warn( + f"{repr(name)} has been renamed to {repr(alias.__name__)}, " + f"and the alias will be removed in Kedro-Datasets 2.0.0", + DeprecationWarning, + stacklevel=2, + ) + return alias + raise AttributeError(f"module {repr(__name__)} has no attribute {repr(name)}") diff --git a/kedro-datasets/tests/api/test_api_dataset.py b/kedro-datasets/tests/api/test_api_dataset.py index e87d1cd02..fe475be79 100644 --- a/kedro-datasets/tests/api/test_api_dataset.py +++ b/kedro-datasets/tests/api/test_api_dataset.py @@ -1,5 +1,6 @@ # pylint: disable=no-member import base64 +import importlib import json import socket from typing import Any @@ -9,7 +10,8 @@ from kedro.io.core import DataSetError from requests.auth import HTTPBasicAuth -from kedro_datasets.api import APIDataSet +from kedro_datasets.api import APIDataset +from kedro_datasets.api.api_dataset import _DEPRECATED_CLASSES POSSIBLE_METHODS = ["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"] SAVE_METHODS = ["POST", "PUT"] @@ -27,7 +29,16 @@ TEST_SAVE_DATA = [{"key1": "info1", "key2": "info2"}] -class TestAPIDataSet: +@pytest.mark.parametrize( + "module_name", ["kedro_datasets.api", "kedro_datasets.api.api_dataset"] +) +@pytest.mark.parametrize("class_name", _DEPRECATED_CLASSES) +def test_deprecation(module_name, class_name): + with pytest.warns(DeprecationWarning, match=f"{repr(class_name)} has been renamed"): + getattr(importlib.import_module(module_name), class_name) + + +class TestAPIDataset: @pytest.mark.parametrize("method", POSSIBLE_METHODS) def test_request_method(self, requests_mock, method): if method in ["OPTIONS", "HEAD", "PATCH", "DELETE"]: @@ -35,10 +46,10 @@ def test_request_method(self, requests_mock, method): ValueError, match="Only GET, POST and PUT methods are supported", ): - APIDataSet(url=TEST_URL, method=method) + APIDataset(url=TEST_URL, method=method) else: - api_data_set = APIDataSet(url=TEST_URL, method=method) + api_data_set = APIDataset(url=TEST_URL, method=method) requests_mock.register_uri(method, TEST_URL, text=TEST_TEXT_RESPONSE_DATA) @@ -59,7 +70,7 @@ def test_request_method(self, requests_mock, method): ], ) def test_params_in_request(self, requests_mock, parameters_in, url_postfix): - api_data_set = APIDataSet( + api_data_set = APIDataset( url=TEST_URL, method=TEST_METHOD, load_args={"params": parameters_in} ) requests_mock.register_uri( @@ -71,7 +82,7 @@ def test_params_in_request(self, requests_mock, parameters_in, url_postfix): assert response.text == TEST_TEXT_RESPONSE_DATA def test_json_in_request(self, requests_mock): - api_data_set = APIDataSet( + api_data_set = APIDataset( url=TEST_URL, method=TEST_METHOD, load_args={"json": TEST_JSON_REQUEST_DATA}, @@ -82,7 +93,7 @@ def test_json_in_request(self, requests_mock): assert response.request.json() == TEST_JSON_REQUEST_DATA def test_headers_in_request(self, requests_mock): - api_data_set = APIDataSet( + api_data_set = APIDataset( url=TEST_URL, method=TEST_METHOD, load_args={"headers": TEST_HEADERS} ) requests_mock.register_uri(TEST_METHOD, TEST_URL, headers={"pan": "cake"}) @@ -93,7 +104,7 @@ def test_headers_in_request(self, requests_mock): assert response.headers["pan"] == "cake" def test_api_cookies(self, requests_mock): - api_data_set = APIDataSet( + api_data_set = APIDataset( url=TEST_URL, method=TEST_METHOD, load_args={"cookies": {"pan": "cake"}} ) requests_mock.register_uri(TEST_METHOD, TEST_URL, text="text") @@ -107,7 +118,7 @@ def test_credentials_auth_error(self): the constructor should raise a ValueError. """ with pytest.raises(ValueError, match="both auth and credentials"): - APIDataSet( + APIDataset( url=TEST_URL, method=TEST_METHOD, load_args={"auth": []}, credentials={} ) @@ -128,7 +139,7 @@ def _basic_auth(username, password): ], ) def test_auth_sequence(self, requests_mock, auth_kwarg): - api_data_set = APIDataSet(url=TEST_URL, method=TEST_METHOD, **auth_kwarg) + api_data_set = APIDataset(url=TEST_URL, method=TEST_METHOD, **auth_kwarg) requests_mock.register_uri( TEST_METHOD, TEST_URL, @@ -137,7 +148,7 @@ def test_auth_sequence(self, requests_mock, auth_kwarg): response = api_data_set.load() assert isinstance(response, requests.Response) - assert response.request.headers["Authorization"] == TestAPIDataSet._basic_auth( + assert response.request.headers["Authorization"] == TestAPIDataset._basic_auth( "john", "doe" ) assert response.text == TEST_TEXT_RESPONSE_DATA @@ -151,7 +162,7 @@ def test_auth_sequence(self, requests_mock, auth_kwarg): ], ) def test_api_timeout(self, requests_mock, timeout_in, timeout_out): - api_data_set = APIDataSet( + api_data_set = APIDataset( url=TEST_URL, method=TEST_METHOD, load_args={"timeout": timeout_in} ) requests_mock.register_uri(TEST_METHOD, TEST_URL) @@ -161,7 +172,7 @@ def test_api_timeout(self, requests_mock, timeout_in, timeout_out): def test_stream(self, requests_mock): text = "I am being streamed." - api_data_set = APIDataSet( + api_data_set = APIDataset( url=TEST_URL, method=TEST_METHOD, load_args={"stream": True} ) @@ -175,7 +186,7 @@ def test_stream(self, requests_mock): assert chunks == ["I ", "am", " b", "ei", "ng", " s", "tr", "ea", "me", "d."] def test_proxy(self, requests_mock): - api_data_set = APIDataSet( + api_data_set = APIDataset( url="ftp://example.com/api/test", method=TEST_METHOD, load_args={"proxies": {"ftp": "ftp://127.0.0.1:3000"}}, @@ -198,7 +209,7 @@ def test_proxy(self, requests_mock): ], ) def test_certs(self, requests_mock, cert_in, cert_out): - api_data_set = APIDataSet( + api_data_set = APIDataset( url=TEST_URL, method=TEST_METHOD, load_args={"cert": cert_in} ) requests_mock.register_uri(TEST_METHOD, TEST_URL) @@ -210,7 +221,7 @@ def test_exists_http_error(self, requests_mock): In case of an unexpected HTTP error, ``exists()`` should not silently catch it. """ - api_data_set = APIDataSet( + api_data_set = APIDataset( url=TEST_URL, method=TEST_METHOD, load_args={"params": TEST_PARAMS, "headers": TEST_HEADERS}, @@ -230,7 +241,7 @@ def test_exists_ok(self, requests_mock): If the file actually exists and server responds 200, ``exists()`` should return True """ - api_data_set = APIDataSet( + api_data_set = APIDataset( url=TEST_URL, method=TEST_METHOD, load_args={"params": TEST_PARAMS, "headers": TEST_HEADERS}, @@ -245,7 +256,7 @@ def test_exists_ok(self, requests_mock): assert api_data_set.exists() def test_http_error(self, requests_mock): - api_data_set = APIDataSet( + api_data_set = APIDataset( url=TEST_URL, method=TEST_METHOD, load_args={"params": TEST_PARAMS, "headers": TEST_HEADERS}, @@ -262,7 +273,7 @@ def test_http_error(self, requests_mock): api_data_set.load() def test_socket_error(self, requests_mock): - api_data_set = APIDataSet( + api_data_set = APIDataset( url=TEST_URL, method=TEST_METHOD, load_args={"params": TEST_PARAMS, "headers": TEST_HEADERS}, @@ -281,7 +292,7 @@ def test_socket_error(self, requests_mock): def test_successful_save(self, requests_mock, method, data): """ When we want to save some data on a server - Given an APIDataSet class + Given an APIDataset class Then check that the response is OK and the sent data is in the correct form. """ @@ -292,7 +303,7 @@ def json_callback( return request.json() if method in ["PUT", "POST"]: - api_data_set = APIDataSet( + api_data_set = APIDataset( url=TEST_URL, method=method, save_args={"params": TEST_PARAMS, "headers": TEST_HEADERS}, @@ -309,7 +320,7 @@ def json_callback( assert response.json() == TEST_SAVE_DATA elif method == "GET": - api_data_set = APIDataSet( + api_data_set = APIDataset( url=TEST_URL, method=method, save_args={"params": TEST_PARAMS, "headers": TEST_HEADERS}, @@ -321,13 +332,13 @@ def json_callback( ValueError, match="Only GET, POST and PUT methods are supported", ): - APIDataSet(url=TEST_URL, method=method) + APIDataset(url=TEST_URL, method=method) @pytest.mark.parametrize("save_methods", SAVE_METHODS) def test_successful_save_with_json(self, requests_mock, save_methods): """ When we want to save with json parameters - Given an APIDataSet class + Given an APIDataset class Then check we get a response """ @@ -337,7 +348,7 @@ def json_callback( """Callback that sends back the json.""" return request.json() - api_data_set = APIDataSet( + api_data_set = APIDataset( url=TEST_URL, method=save_methods, save_args={"json": TEST_JSON_RESPONSE_DATA, "headers": TEST_HEADERS}, @@ -363,7 +374,7 @@ def json_callback( @pytest.mark.parametrize("save_methods", SAVE_METHODS) def test_save_http_error(self, requests_mock, save_methods): - api_data_set = APIDataSet( + api_data_set = APIDataset( url=TEST_URL, method=save_methods, save_args={"params": TEST_PARAMS, "headers": TEST_HEADERS, "chunk_size": 2}, @@ -384,7 +395,7 @@ def test_save_http_error(self, requests_mock, save_methods): @pytest.mark.parametrize("save_methods", SAVE_METHODS) def test_save_socket_error(self, requests_mock, save_methods): - api_data_set = APIDataSet( + api_data_set = APIDataset( url=TEST_URL, method=save_methods, save_args={"params": TEST_PARAMS, "headers": TEST_HEADERS},