diff --git a/databricks/koalas/__init__.py b/databricks/koalas/__init__.py index a513b1b911..3053ee3701 100644 --- a/databricks/koalas/__init__.py +++ b/databricks/koalas/__init__.py @@ -39,12 +39,12 @@ def assert_pyspark_version(): from databricks.koalas.indexes import Index, MultiIndex from databricks.koalas.series import Series from databricks.koalas.typedef import pandas_wraps -from databricks.koalas.config import get_option, set_option, reset_option +from databricks.koalas.config import get_option, set_option, reset_option, options __all__ = ['read_csv', 'read_parquet', 'to_datetime', 'from_pandas', 'get_dummies', 'DataFrame', 'Series', 'Index', 'MultiIndex', 'pandas_wraps', 'sql', 'range', 'concat', 'melt', 'get_option', 'set_option', 'reset_option', - 'read_sql_table', 'read_sql_query', 'read_sql'] + 'read_sql_table', 'read_sql_query', 'read_sql', 'options'] def _auto_patch(): diff --git a/databricks/koalas/config.py b/databricks/koalas/config.py index 0903dab8bd..9133507f7b 100644 --- a/databricks/koalas/config.py +++ b/databricks/koalas/config.py @@ -18,14 +18,14 @@ Infrastructure of options for Koalas. """ import json -from typing import Union, Any, Tuple, Callable, List +from typing import Union, Any, Tuple, Callable, List, Dict from pyspark._globals import _NoValue, _NoValueType from databricks.koalas.utils import default_session -__all__ = ['get_option', 'set_option', 'reset_option'] +__all__ = ['get_option', 'set_option', 'reset_option', 'options'] class Option: @@ -194,7 +194,7 @@ def validate(self, v: Any) -> None: "'plotting.sample_ratio' should be 1 >= value >= 0.")), ] # type: List[Option] -_options_dict = dict(zip((option.key for option in _options), _options)) +_options_dict = dict(zip((option.key for option in _options), _options)) # type: Dict[str, Option] _key_format = 'koalas.{}'.format @@ -298,3 +298,61 @@ def _check_option(key: str) -> None: raise OptionError( "No such option: '{}'. Available options are [{}]".format( key, ", ".join(list(_options_dict.keys())))) + + +class DictWrapper: + """ provide attribute-style access to a nested dict""" + + def __init__(self, d, prefix=""): + object.__setattr__(self, "d", d) + object.__setattr__(self, "prefix", prefix) + + def __setattr__(self, key, val): + prefix = object.__getattribute__(self, "prefix") + d = object.__getattribute__(self, "d") + if prefix: + prefix += "." + canonical_key = prefix + key + + candidates = [ + k for k in d.keys() if all(x in k.split(".") for x in canonical_key.split("."))] + if len(candidates) == 1 and candidates[0] == canonical_key: + return set_option(canonical_key, val) + else: + raise OptionError( + "No such option: '{}'. Available options are [{}]".format( + key, ", ".join(list(_options_dict.keys())))) + + def __getattr__(self, key): + prefix = object.__getattribute__(self, "prefix") + d = object.__getattribute__(self, "d") + if prefix: + prefix += "." + canonical_key = prefix + key + + candidates = [ + k for k in d.keys() if all(x in k.split(".") for x in canonical_key.split("."))] + if len(candidates) == 1 and candidates[0] == canonical_key: + return get_option(canonical_key) + elif len(candidates) == 0: + raise OptionError( + "No such option: '{}'. Available options are [{}]".format( + key, ", ".join(list(_options_dict.keys())))) + else: + return DictWrapper(d, canonical_key) + + def __dir__(self): + prefix = object.__getattribute__(self, "prefix") + d = object.__getattribute__(self, "d") + + if prefix == "": + candidates = d.keys() + offset = 0 + else: + candidates = [ + k for k in d.keys() if all(x in k.split(".") for x in prefix.split("."))] + offset = len(prefix) + 1 # prefix (e.g. "compute.") to trim. + return [c[offset:] for c in candidates] + + +options = DictWrapper(_options_dict) diff --git a/databricks/koalas/tests/test_config.py b/databricks/koalas/tests/test_config.py index 4847d65cf9..00c6561574 100644 --- a/databricks/koalas/tests/test_config.py +++ b/databricks/koalas/tests/test_config.py @@ -16,7 +16,7 @@ from databricks import koalas as ks from databricks.koalas import config -from databricks.koalas.config import Option +from databricks.koalas.config import Option, DictWrapper from databricks.koalas.testing.utils import ReusedSQLTestCase @@ -95,3 +95,40 @@ def test_unknown_option(self): with self.assertRaisesRegex(config.OptionError, "test.config"): ks.reset_option('unknown') + + def test_namespace_access(self): + try: + self.assertEqual(ks.options.compute.max_rows, ks.get_option("compute.max_rows")) + ks.options.compute.max_rows = 0 + self.assertEqual(ks.options.compute.max_rows, 0) + self.assertTrue(isinstance(ks.options.compute, DictWrapper)) + + wrapper = ks.options.compute + self.assertEqual(wrapper.max_rows, ks.get_option("compute.max_rows")) + wrapper.max_rows = 1000 + self.assertEqual(ks.options.compute.max_rows, 1000) + + self.assertRaisesRegex(config.OptionError, "No such option", lambda: ks.options.compu) + self.assertRaisesRegex( + config.OptionError, "No such option", lambda: ks.options.compute.max) + self.assertRaisesRegex( + config.OptionError, "No such option", lambda: ks.options.max_rows1) + + with self.assertRaisesRegex(config.OptionError, "No such option"): + ks.options.compute.max = 0 + with self.assertRaisesRegex(config.OptionError, "No such option"): + ks.options.compute = 0 + with self.assertRaisesRegex(config.OptionError, "No such option"): + ks.options.com = 0 + finally: + ks.reset_option("compute.max_rows") + + def test_dir_options(self): + self.assertTrue("compute.default_index_type" in dir(ks.options)) + self.assertTrue("plotting.sample_ratio" in dir(ks.options)) + + self.assertTrue("default_index_type" in dir(ks.options.compute)) + self.assertTrue("sample_ratio" not in dir(ks.options.compute)) + + self.assertTrue("default_index_type" not in dir(ks.options.plotting)) + self.assertTrue("sample_ratio" in dir(ks.options.plotting)) diff --git a/docs/source/user_guide/options.rst b/docs/source/user_guide/options.rst index 2ed96cde15..78fce20e81 100644 --- a/docs/source/user_guide/options.rst +++ b/docs/source/user_guide/options.rst @@ -9,6 +9,17 @@ Koalas has an options system that lets you customize some aspects of its behavio display-related options being those the user is most likely to adjust. Options have a full "dotted-style", case-insensitive name (e.g. ``display.max_rows``). +You can get/set options directly as attributes of the top-level ``options`` attribute: + + +.. code-block:: python + + >>> import databricks.koalas as ks + >>> ks.options.display.max_rows + 1000 + >>> ks.options.display.max_rows = 10 + >>> ks.options.display.max_rows + 10 The API is composed of 3 relevant functions, available directly from the ``koalas`` namespace: