From 9e1109b118a1b82275305e72526c9e8ddfefae4d Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sat, 9 Sep 2023 08:15:50 -0400 Subject: [PATCH] style(docstrings): format docstrings according to `black` style --- ibis/backends/base/__init__.py | 12 ++- ibis/backends/base/df/timecontext.py | 24 ++--- ibis/backends/base/sql/__init__.py | 1 + ibis/backends/bigquery/client.py | 10 +- ibis/backends/bigquery/udf/__init__.py | 23 +++-- ibis/backends/bigquery/udf/find.py | 2 +- ibis/backends/dask/execution/util.py | 4 +- ibis/backends/duckdb/__init__.py | 6 +- ibis/backends/impala/__init__.py | 32 ++++--- ibis/backends/mysql/__init__.py | 18 ++-- ibis/backends/pandas/aggcontext.py | 109 +++++++++++++--------- ibis/backends/pandas/execution/strings.py | 12 +-- ibis/backends/pandas/execution/util.py | 4 +- ibis/backends/postgres/__init__.py | 18 ++-- ibis/backends/pyspark/__init__.py | 18 ++-- ibis/backends/pyspark/timecontext.py | 6 +- ibis/common/annotations.py | 2 + ibis/common/patterns.py | 21 +++-- ibis/common/typing.py | 22 +++-- ibis/expr/analysis.py | 10 +- ibis/expr/api.py | 21 +++-- ibis/expr/operations/analytic.py | 4 +- ibis/expr/operations/udf.py | 4 - ibis/expr/types/arrays.py | 20 ++-- ibis/expr/types/core.py | 6 +- ibis/expr/types/generic.py | 32 ++++--- ibis/expr/types/groupby.py | 9 +- ibis/expr/types/maps.py | 72 +++++++++----- ibis/expr/types/relations.py | 70 +++++++++++--- ibis/expr/types/strings.py | 22 +++-- ibis/expr/types/structs.py | 28 +++--- ibis/legacy/udf/vectorized.py | 40 +++++--- ibis/selectors.py | 19 ++-- ibis/util.py | 14 +-- 34 files changed, 431 insertions(+), 284 deletions(-) diff --git a/ibis/backends/base/__init__.py b/ibis/backends/base/__init__.py index 459d78d64c4f..be83b4b2bb4d 100644 --- a/ibis/backends/base/__init__.py +++ b/ibis/backends/base/__init__.py @@ -175,7 +175,7 @@ class TablesAccessor(collections.abc.Mapping): Examples -------- >>> con = ibis.sqlite.connect("example.db") - >>> people = con.tables['people'] # access via index + >>> people = con.tables["people"] # access via index >>> people = con.tables.people # access via attribute """ @@ -848,7 +848,7 @@ def tables(self): Examples -------- >>> con = ibis.sqlite.connect("example.db") - >>> people = con.tables['people'] # access via index + >>> people = con.tables["people"] # access via index >>> people = con.tables.people # access via attribute """ return TablesAccessor(self) @@ -1202,11 +1202,15 @@ def connect(resource: Path | str, **kwargs: Any) -> BaseBackend: Connect to a PostgreSQL server: - >>> con = ibis.connect("postgres://user:password@hostname:5432") # quartodoc: +SKIP # doctest: +SKIP + >>> con = ibis.connect( + ... "postgres://user:password@hostname:5432" + ... ) # quartodoc: +SKIP # doctest: +SKIP Connect to BigQuery: - >>> con = ibis.connect("bigquery://my-project/my-dataset") # quartodoc: +SKIP # doctest: +SKIP + >>> con = ibis.connect( + ... "bigquery://my-project/my-dataset" + ... ) # quartodoc: +SKIP # doctest: +SKIP """ url = resource = str(resource) diff --git a/ibis/backends/base/df/timecontext.py b/ibis/backends/base/df/timecontext.py index 82fa8cbe570b..e63a2e83c4e7 100644 --- a/ibis/backends/base/df/timecontext.py +++ b/ibis/backends/base/df/timecontext.py @@ -162,16 +162,14 @@ def construct_time_context_aware_series( Examples -------- >>> import pandas as pd - >>> from ibis.backends.base.df.timecontext import construct_time_context_aware_series + >>> from ibis.backends.base.df.timecontext import ( + ... construct_time_context_aware_series, + ... ) >>> df = pd.DataFrame( ... { - ... 'time': pd.Series( - ... pd.date_range( - ... start='2017-01-02', periods=3 - ... ).values - ... ), - ... 'id': [1,2,3], - ... 'value': [1.1, 2.2, 3.3], + ... "time": pd.Series(pd.date_range(start="2017-01-02", periods=3).values), + ... "id": [1, 2, 3], + ... "value": [1.1, 2.2, 3.3], ... } ... ) >>> df @@ -179,13 +177,15 @@ def construct_time_context_aware_series( 0 2017-01-02 1 1.1 1 2017-01-03 2 2.2 2 2017-01-04 3 3.3 - >>> series = df['value'] + >>> series = df["value"] >>> series 0 1.1 1 2.2 2 3.3 Name: value, dtype: float64 - >>> construct_time_context_aware_series(series, df) # quartodoc: +SKIP # doctest: +SKIP + >>> construct_time_context_aware_series( + ... series, df + ... ) # quartodoc: +SKIP # doctest: +SKIP time 0 2017-01-02 1.1 1 2017-01-03 2.2 @@ -203,7 +203,9 @@ def construct_time_context_aware_series( 2 2017-01-04 3.3 Name: value, dtype: float64 - >>> construct_time_context_aware_series(timed_series, df) # quartodoc: +SKIP # doctest: +SKIP + >>> construct_time_context_aware_series( + ... timed_series, df + ... ) # quartodoc: +SKIP # doctest: +SKIP time 0 2017-01-02 1.1 1 2017-01-03 2.2 diff --git a/ibis/backends/base/sql/__init__.py b/ibis/backends/base/sql/__init__.py index 72043640a90a..f294de2b39fc 100644 --- a/ibis/backends/base/sql/__init__.py +++ b/ibis/backends/base/sql/__init__.py @@ -171,6 +171,7 @@ def raw_sql(self, query: str): >>> con = ibis.connect("duckdb://") >>> with con.raw_sql("SELECT 1") as cursor: ... result = cursor.fetchall() + ... >>> result [(1,)] >>> cursor.closed diff --git a/ibis/backends/bigquery/client.py b/ibis/backends/bigquery/client.py index fa33502a5954..a371cc4c2421 100644 --- a/ibis/backends/bigquery/client.py +++ b/ibis/backends/bigquery/client.py @@ -178,8 +178,7 @@ def parse_project_and_dataset(project: str, dataset: str = "") -> tuple[str, str Examples -------- >>> data_project, billing_project, dataset = parse_project_and_dataset( - ... 'ibis-gbq', - ... 'foo-bar.my_dataset' + ... "ibis-gbq", "foo-bar.my_dataset" ... ) >>> data_project 'foo-bar' @@ -188,8 +187,7 @@ def parse_project_and_dataset(project: str, dataset: str = "") -> tuple[str, str >>> dataset 'my_dataset' >>> data_project, billing_project, dataset = parse_project_and_dataset( - ... 'ibis-gbq', - ... 'my_dataset' + ... "ibis-gbq", "my_dataset" ... ) >>> data_project 'ibis-gbq' @@ -197,9 +195,7 @@ def parse_project_and_dataset(project: str, dataset: str = "") -> tuple[str, str 'ibis-gbq' >>> dataset 'my_dataset' - >>> data_project, billing_project, _ = parse_project_and_dataset( - ... 'ibis-gbq' - ... ) + >>> data_project, billing_project, _ = parse_project_and_dataset("ibis-gbq") >>> data_project 'ibis-gbq' """ diff --git a/ibis/backends/bigquery/udf/__init__.py b/ibis/backends/bigquery/udf/__init__.py index 5249d419d42b..85f7ef01beb6 100644 --- a/ibis/backends/bigquery/udf/__init__.py +++ b/ibis/backends/bigquery/udf/__init__.py @@ -87,6 +87,7 @@ def python( >>> @udf.python(input_type=[dt.double], output_type=dt.double) ... def add_one(x): ... return x + 1 + ... >>> print(add_one.sql) CREATE TEMPORARY FUNCTION add_one_0(x FLOAT64) RETURNS FLOAT64 @@ -97,14 +98,16 @@ def python( } return add_one(x); """; - >>> @udf.python(input_type=[dt.double, dt.double], - ... output_type=dt.Array(dt.double)) + >>> @udf.python( + ... input_type=[dt.double, dt.double], output_type=dt.Array(dt.double) + ... ) ... def my_range(start, stop): ... def gen(start, stop): ... curr = start ... while curr < stop: ... yield curr ... curr += 1 + ... ... result = [] ... for value in gen(start, stop): ... result.append(value) @@ -132,9 +135,9 @@ def python( """; >>> @udf.python( ... input_type=[dt.double, dt.double], - ... output_type=dt.Struct.from_tuples([ - ... ('width', 'double'), ('height', 'double') - ... ]) + ... output_type=dt.Struct.from_tuples( + ... [("width", "double"), ("height", "double")] + ... ), ... ) ... def my_rectangle(width, height): ... class Rectangle: @@ -247,7 +250,7 @@ def js( ... name="add_one", ... params={"a": dt.double}, ... output_type=dt.double, - ... body="return x + 1" + ... body="return x + 1", ... ) >>> print(add_one.sql) CREATE TEMPORARY FUNCTION add_one_0(x FLOAT64) @@ -356,10 +359,10 @@ def sql( >>> from ibis.backends.bigquery import udf >>> import ibis.expr.datatypes as dt >>> add_one = udf.sql( - ... name="add_one", - ... params={'x': dt.double}, - ... output_type=dt.double, - ... sql_expression="x + 1" + ... name="add_one", + ... params={"x": dt.double}, + ... output_type=dt.double, + ... sql_expression="x + 1", ... ) >>> print(add_one.sql) CREATE TEMPORARY FUNCTION add_one_0(x FLOAT64) diff --git a/ibis/backends/bigquery/udf/find.py b/ibis/backends/bigquery/udf/find.py index 40f7de568726..2ab1b6f42162 100644 --- a/ibis/backends/bigquery/udf/find.py +++ b/ibis/backends/bigquery/udf/find.py @@ -44,7 +44,7 @@ def find_names(node: ast.AST) -> list[ast.Name]: Examples -------- >>> import ast - >>> node = ast.parse('a + b') + >>> node = ast.parse("a + b") >>> names = find_names(node) >>> names [<....Name object at 0x...>, <....Name object at 0x...>] diff --git a/ibis/backends/dask/execution/util.py b/ibis/backends/dask/execution/util.py index f044d2fdb1c4..b47d198c07a2 100644 --- a/ibis/backends/dask/execution/util.py +++ b/ibis/backends/dask/execution/util.py @@ -115,12 +115,12 @@ def coerce_to_output( >>> coerce_to_output(1, expr) # quartodoc: +SKIP # doctest: +SKIP 0 1 Name: result, dtype: int64 - >>> coerce_to_output(1, expr, [1,2,3]) # quartodoc: +SKIP # doctest: +SKIP + >>> coerce_to_output(1, expr, [1, 2, 3]) # quartodoc: +SKIP # doctest: +SKIP 1 1 2 1 3 1 Name: result, dtype: int64 - >>> coerce_to_output([1,2,3], expr) # quartodoc: +SKIP # doctest: +SKIP + >>> coerce_to_output([1, 2, 3], expr) # quartodoc: +SKIP # doctest: +SKIP 0 [1, 2, 3] Name: result, dtype: object """ diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index 13f1a378f482..d1e2b34fdf69 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -759,6 +759,7 @@ def read_sqlite(self, path: str | Path, table_name: str | None = None) -> ir.Tab ... _ = con.execute("DROP TABLE IF EXISTS t") ... _ = con.execute("CREATE TABLE t (a INT, b TEXT)") ... _ = con.execute("INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c')") + ... >>> con = ibis.connect("duckdb://") >>> t = con.read_sqlite("/tmp/sqlite.db", table_name="t") >>> t @@ -809,6 +810,7 @@ def attach_sqlite( ... _ = con.execute("DROP TABLE IF EXISTS t") ... _ = con.execute("CREATE TABLE t (a INT, b TEXT)") ... _ = con.execute("INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c')") + ... >>> con = ibis.connect("duckdb://") >>> con.list_tables() [] @@ -995,7 +997,9 @@ def to_parquet( Partition on multiple columns. - >>> con.to_parquet(penguins, tempfile.mkdtemp(), partition_by=("year", "island")) + >>> con.to_parquet( + ... penguins, tempfile.mkdtemp(), partition_by=("year", "island") + ... ) """ self._run_pre_execute_hooks(expr) query = self._to_sql(expr, params=params) diff --git a/ibis/backends/impala/__init__.py b/ibis/backends/impala/__init__.py index c19d09436a78..e92f06dc44bd 100644 --- a/ibis/backends/impala/__init__.py +++ b/ibis/backends/impala/__init__.py @@ -272,10 +272,10 @@ def do_connect( -------- >>> import os >>> import ibis - >>> hdfs_host = os.environ.get('IBIS_TEST_NN_HOST', 'localhost') - >>> hdfs_port = int(os.environ.get('IBIS_TEST_NN_PORT', 50070)) - >>> impala_host = os.environ.get('IBIS_TEST_IMPALA_HOST', 'localhost') - >>> impala_port = int(os.environ.get('IBIS_TEST_IMPALA_PORT', 21050)) + >>> hdfs_host = os.environ.get("IBIS_TEST_NN_HOST", "localhost") + >>> hdfs_port = int(os.environ.get("IBIS_TEST_NN_PORT", 50070)) + >>> impala_host = os.environ.get("IBIS_TEST_IMPALA_HOST", "localhost") + >>> impala_port = int(os.environ.get("IBIS_TEST_IMPALA_PORT", 21050)) >>> hdfs = ibis.impala.hdfs_connect(host=hdfs_host, port=hdfs_port) >>> client = ibis.impala.connect( ... host=impala_host, @@ -927,11 +927,13 @@ def insert( Examples -------- - >>> table = 'my_table' + >>> table = "my_table" >>> con.insert(table, table_expr) # quartodoc: +SKIP # doctest: +SKIP Completely overwrite contents - >>> con.insert(table, table_expr, overwrite=True) # quartodoc: +SKIP # doctest: +SKIP + >>> con.insert( + ... table, table_expr, overwrite=True + ... ) # quartodoc: +SKIP # doctest: +SKIP """ table = self.table(table_name, database=database) return table.insert( @@ -958,9 +960,11 @@ def drop_table( Examples -------- - >>> table = 'my_table' - >>> db = 'operations' - >>> con.drop_table(table, database=db, force=True) # quartodoc: +SKIP # doctest: +SKIP + >>> table = "my_table" + >>> db = "operations" + >>> con.drop_table( + ... table, database=db, force=True + ... ) # quartodoc: +SKIP # doctest: +SKIP """ statement = DropTable(name, database=database, must_exist=not force) self._safe_exec_sql(statement) @@ -1015,10 +1019,12 @@ def cache_table(self, table_name, *, database=None, pool="default"): Examples -------- - >>> table = 'my_table' - >>> db = 'operations' - >>> pool = 'op_4GB_pool' - >>> con.cache_table('my_table', database=db, pool=pool) # quartodoc: +SKIP # doctest: +SKIP + >>> table = "my_table" + >>> db = "operations" + >>> pool = "op_4GB_pool" + >>> con.cache_table( + ... "my_table", database=db, pool=pool + ... ) # quartodoc: +SKIP # doctest: +SKIP """ statement = ddl.CacheTable(table_name, database=database, pool=pool) self._safe_exec_sql(statement) diff --git a/ibis/backends/mysql/__init__.py b/ibis/backends/mysql/__init__.py index 1ed548315f14..e14bea134a60 100644 --- a/ibis/backends/mysql/__init__.py +++ b/ibis/backends/mysql/__init__.py @@ -65,20 +65,14 @@ def do_connect( -------- >>> import os >>> import getpass - >>> host = os.environ.get('IBIS_TEST_MYSQL_HOST', 'localhost') - >>> user = os.environ.get('IBIS_TEST_MYSQL_USER', getpass.getuser()) - >>> password = os.environ.get('IBIS_TEST_MYSQL_PASSWORD') - >>> database = os.environ.get('IBIS_TEST_MYSQL_DATABASE', - ... 'ibis_testing') - >>> con = connect( - ... database=database, - ... host=host, - ... user=user, - ... password=password - ... ) + >>> host = os.environ.get("IBIS_TEST_MYSQL_HOST", "localhost") + >>> user = os.environ.get("IBIS_TEST_MYSQL_USER", getpass.getuser()) + >>> password = os.environ.get("IBIS_TEST_MYSQL_PASSWORD") + >>> database = os.environ.get("IBIS_TEST_MYSQL_DATABASE", "ibis_testing") + >>> con = connect(database=database, host=host, user=user, password=password) >>> con.list_tables() # doctest: +ELLIPSIS [...] - >>> t = con.table('functional_alltypes') + >>> t = con.table("functional_alltypes") >>> t MySQLTable[table] name: functional_alltypes diff --git a/ibis/backends/pandas/aggcontext.py b/ibis/backends/pandas/aggcontext.py index 13ef83b0b233..bfa66b08dca7 100644 --- a/ibis/backends/pandas/aggcontext.py +++ b/ibis/backends/pandas/aggcontext.py @@ -25,12 +25,14 @@ :: >>> import pandas as pd >>> import numpy as np - >>> df = pd.DataFrame({ - ... 'key': list('aabc'), - ... 'value': np.random.randn(4), - ... 'time': pd.date_range(periods=4, start='now') - ... }) - >>> s = pd.Series(df.value.sum(), index=df.index, name='sum_value') + >>> df = pd.DataFrame( + ... { + ... "key": list("aabc"), + ... "value": np.random.randn(4), + ... "time": pd.date_range(periods=4, start="now"), + ... } + ... ) + >>> s = pd.Series(df.value.sum(), index=df.index, name="sum_value") >>> s # quartodoc: +SKIP # doctest: +SKIP Ibis @@ -38,9 +40,11 @@ :: >>> import ibis - >>> schema = dict(time='timestamp', key='string', value='double') - >>> t = ibis.table(schema, name='t') - >>> t[t, t.value.sum().name('sum_value')].sum_value # quartodoc: +SKIP # doctest: +SKIP + >>> schema = dict(time="timestamp", key="string", value="double") + >>> t = ibis.table(schema, name="t") + >>> t[ + ... t, t.value.sum().name("sum_value") + ... ].sum_value # quartodoc: +SKIP # doctest: +SKIP ``group_by``, no ``order_by``: ``context.Transform()`` @@ -62,21 +66,25 @@ >>> import pandas as pd >>> import numpy as np - >>> df = pd.DataFrame({ - ... 'key': list('aabc'), - ... 'value': np.random.randn(4), - ... 'time': pd.date_range(periods=4, start='now') - ... }) - >>> df.groupby('key').value.transform('sum') # quartodoc: +SKIP # doctest: +SKIP + >>> df = pd.DataFrame( + ... { + ... "key": list("aabc"), + ... "value": np.random.randn(4), + ... "time": pd.date_range(periods=4, start="now"), + ... } + ... ) + >>> df.groupby("key").value.transform("sum") # quartodoc: +SKIP # doctest: +SKIP Ibis :: >>> import ibis - >>> schema = dict(time='timestamp', key='string', value='double') - >>> t = ibis.table(schema, name='t') - >>> t.value.sum().over(ibis.window(group_by=t.key)) # quartodoc: +SKIP # doctest: +SKIP + >>> schema = dict(time="timestamp", key="string", value="double") + >>> t = ibis.table(schema, name="t") + >>> t.value.sum().over( + ... ibis.window(group_by=t.key) + ... ) # quartodoc: +SKIP # doctest: +SKIP ``order_by``, no ``group_by``: ``context.Cumulative()``/``context.Rolling()`` ----------------------------------------------------------------------------- @@ -104,20 +112,22 @@ >>> import pandas as pd >>> import numpy as np - >>> df = pd.DataFrame({ - ... 'key': list('aabc'), - ... 'value': np.random.randn(4), - ... 'time': pd.date_range(periods=4, start='now') - ... }) - >>> df.sort_values('time').value.cumsum() # quartodoc: +SKIP # doctest: +SKIP + >>> df = pd.DataFrame( + ... { + ... "key": list("aabc"), + ... "value": np.random.randn(4), + ... "time": pd.date_range(periods=4, start="now"), + ... } + ... ) + >>> df.sort_values("time").value.cumsum() # quartodoc: +SKIP # doctest: +SKIP Ibis :: >>> import ibis - >>> schema = dict(time='timestamp', key='string', value='double') - >>> t = ibis.table(schema, name='t') + >>> schema = dict(time="timestamp", key="string", value="double") + >>> t = ibis.table(schema, name="t") >>> window = ibis.cumulative_window(order_by=t.time) >>> t.value.sum().over(window) # quartodoc: +SKIP # doctest: +SKIP @@ -142,20 +152,24 @@ >>> import pandas as pd >>> import numpy as np - >>> df = pd.DataFrame({ - ... 'key': list('aabc'), - ... 'value': np.random.randn(4), - ... 'time': pd.date_range(periods=4, start='now') - ... }) - >>> df.sort_values('time').value.rolling(3).sum() # quartodoc: +SKIP # doctest: +SKIP + >>> df = pd.DataFrame( + ... { + ... "key": list("aabc"), + ... "value": np.random.randn(4), + ... "time": pd.date_range(periods=4, start="now"), + ... } + ... ) + >>> df.sort_values("time").value.rolling( + ... 3 + ... ).sum() # quartodoc: +SKIP # doctest: +SKIP Ibis :: >>> import ibis - >>> schema = dict(time='timestamp', key='string', value='double') - >>> t = ibis.table(schema, name='t') + >>> schema = dict(time="timestamp", key="string", value="double") + >>> t = ibis.table(schema, name="t") >>> window = ibis.trailing_window(3, order_by=t.time) >>> t.value.sum().over(window) # quartodoc: +SKIP # doctest: +SKIP @@ -181,15 +195,20 @@ >>> import pandas as pd >>> import numpy as np - >>> df = pd.DataFrame({ - ... 'key': list('aabc'), - ... 'value': np.random.randn(4), - ... 'time': pd.date_range(periods=4, start='now') - ... }) - >>> sorter = lambda df: df.sort_values('time') - >>> gb = df.groupby('key', group_keys=False).apply(sorter).reset_index( - ... drop=True - ... ).groupby('key') + >>> df = pd.DataFrame( + ... { + ... "key": list("aabc"), + ... "value": np.random.randn(4), + ... "time": pd.date_range(periods=4, start="now"), + ... } + ... ) + >>> sorter = lambda df: df.sort_values("time") + >>> gb = ( + ... df.groupby("key", group_keys=False) + ... .apply(sorter) + ... .reset_index(drop=True) + ... .groupby("key") + ... ) >>> rolling = gb.value.rolling(2) >>> rolling.sum() # quartodoc: +SKIP # doctest: +SKIP @@ -198,8 +217,8 @@ :: >>> import ibis - >>> schema = dict(time='timestamp', key='string', value='double') - >>> t = ibis.table(schema, name='t') + >>> schema = dict(time="timestamp", key="string", value="double") + >>> t = ibis.table(schema, name="t") >>> window = ibis.trailing_window(2, order_by=t.time, group_by=t.key) >>> t.value.sum().over(window) # quartodoc: +SKIP # doctest: +SKIP """ diff --git a/ibis/backends/pandas/execution/strings.py b/ibis/backends/pandas/execution/strings.py index ab5f58e136fe..66e325b6d367 100644 --- a/ibis/backends/pandas/execution/strings.py +++ b/ibis/backends/pandas/execution/strings.py @@ -185,17 +185,17 @@ def sql_like_to_regex(pattern: str, escape: str | None = None) -> str: Examples -------- - >>> sql_like_to_regex('6%') # default is to not escape anything + >>> sql_like_to_regex("6%") # default is to not escape anything '^6.*$' - >>> sql_like_to_regex('6^%', escape='^') + >>> sql_like_to_regex("6^%", escape="^") '^6%$' - >>> sql_like_to_regex('6_') + >>> sql_like_to_regex("6_") '^6.$' - >>> sql_like_to_regex('6/_', escape='/') + >>> sql_like_to_regex("6/_", escape="/") '^6_$' - >>> sql_like_to_regex('%abc') # any string ending with "abc" + >>> sql_like_to_regex("%abc") # any string ending with "abc" '^.*abc$' - >>> sql_like_to_regex('abc%') # any string starting with "abc" + >>> sql_like_to_regex("abc%") # any string starting with "abc" '^abc.*$' """ return f"^{''.join(_sql_like_to_regex(pattern, escape))}$" diff --git a/ibis/backends/pandas/execution/util.py b/ibis/backends/pandas/execution/util.py index 48a207aa413f..9e310c968c80 100644 --- a/ibis/backends/pandas/execution/util.py +++ b/ibis/backends/pandas/execution/util.py @@ -120,12 +120,12 @@ def coerce_to_output( >>> coerce_to_output(1, node) # quartodoc: +SKIP # doctest: +SKIP 0 1 Name: result, dtype: int64 - >>> coerce_to_output(1, node, [1,2,3]) # quartodoc: +SKIP # doctest: +SKIP + >>> coerce_to_output(1, node, [1, 2, 3]) # quartodoc: +SKIP # doctest: +SKIP 1 1 2 1 3 1 Name: result, dtype: int64 - >>> coerce_to_output([1,2,3], node) # quartodoc: +SKIP # doctest: +SKIP + >>> coerce_to_output([1, 2, 3], node) # quartodoc: +SKIP # doctest: +SKIP 0 [1, 2, 3] Name: result, dtype: object """ diff --git a/ibis/backends/postgres/__init__.py b/ibis/backends/postgres/__init__.py index cfce7a7ba025..f3ff1ab375f9 100644 --- a/ibis/backends/postgres/__init__.py +++ b/ibis/backends/postgres/__init__.py @@ -73,20 +73,14 @@ def do_connect( >>> import os >>> import getpass >>> import ibis - >>> host = os.environ.get('IBIS_TEST_POSTGRES_HOST', 'localhost') - >>> user = os.environ.get('IBIS_TEST_POSTGRES_USER', getpass.getuser()) - >>> password = os.environ.get('IBIS_TEST_POSTGRES_PASSWORD') - >>> database = os.environ.get('IBIS_TEST_POSTGRES_DATABASE', - ... 'ibis_testing') - >>> con = connect( - ... database=database, - ... host=host, - ... user=user, - ... password=password - ... ) + >>> host = os.environ.get("IBIS_TEST_POSTGRES_HOST", "localhost") + >>> user = os.environ.get("IBIS_TEST_POSTGRES_USER", getpass.getuser()) + >>> password = os.environ.get("IBIS_TEST_POSTGRES_PASSWORD") + >>> database = os.environ.get("IBIS_TEST_POSTGRES_DATABASE", "ibis_testing") + >>> con = connect(database=database, host=host, user=user, password=password) >>> con.list_tables() # doctest: +ELLIPSIS [...] - >>> t = con.table('functional_alltypes') + >>> t = con.table("functional_alltypes") >>> t PostgreSQLTable[table] name: functional_alltypes diff --git a/ibis/backends/pyspark/__init__.py b/ibis/backends/pyspark/__init__.py index a073ac29a7dd..9599fde7a73b 100644 --- a/ibis/backends/pyspark/__init__.py +++ b/ibis/backends/pyspark/__init__.py @@ -379,7 +379,9 @@ def create_table( Examples -------- - >>> con.create_table('new_table_name', table_expr) # quartodoc: +SKIP # doctest: +SKIP + >>> con.create_table( + ... "new_table_name", table_expr + ... ) # quartodoc: +SKIP # doctest: +SKIP """ import pandas as pd import pyarrow as pa @@ -500,9 +502,11 @@ def drop_table_or_view( Examples -------- - >>> table = 'my_table' - >>> db = 'operations' - >>> con.drop_table_or_view(table, db, force=True) # quartodoc: +SKIP # doctest: +SKIP + >>> table = "my_table" + >>> db = "operations" + >>> con.drop_table_or_view( + ... table, db, force=True + ... ) # quartodoc: +SKIP # doctest: +SKIP """ statement = DropTable(name, database=database, must_exist=not force) self.raw_sql(statement.compile()) @@ -546,11 +550,13 @@ def insert( Examples -------- - >>> table = 'my_table' + >>> table = "my_table" >>> con.insert(table, table_expr) # quartodoc: +SKIP # doctest: +SKIP # Completely overwrite contents - >>> con.insert(table, table_expr, overwrite=True) # quartodoc: +SKIP # doctest: +SKIP + >>> con.insert( + ... table, table_expr, overwrite=True + ... ) # quartodoc: +SKIP # doctest: +SKIP """ table = self.table(table_name, database=database) return table.insert( diff --git a/ibis/backends/pyspark/timecontext.py b/ibis/backends/pyspark/timecontext.py index bbbbba2afe1d..e2d5c2a4d4d5 100644 --- a/ibis/backends/pyspark/timecontext.py +++ b/ibis/backends/pyspark/timecontext.py @@ -59,9 +59,9 @@ def combine_time_context( -------- >>> import pandas as pd >>> timecontexts = [ - ... (pd.Timestamp('20200102'), pd.Timestamp('20200103')), - ... (pd.Timestamp('20200101'), pd.Timestamp('20200106')), - ... (pd.Timestamp('20200109'), pd.Timestamp('20200110')), + ... (pd.Timestamp("20200102"), pd.Timestamp("20200103")), + ... (pd.Timestamp("20200101"), pd.Timestamp("20200106")), + ... (pd.Timestamp("20200109"), pd.Timestamp("20200110")), ... ] >>> combine_time_context(timecontexts) (Timestamp(...), Timestamp(...)) diff --git a/ibis/common/annotations.py b/ibis/common/annotations.py index 1dbec9f47070..d7262d5c1960 100644 --- a/ibis/common/annotations.py +++ b/ibis/common/annotations.py @@ -573,6 +573,7 @@ def annotated(_1=None, _2=None, _3=None, **kwargs): >>> @annotated(x=instance_of(int), y=instance_of(str)) ... def foo(x, y): ... return float(x) + float(y) + ... 3. With mixing type annotations and patterns where the latter takes precedence @@ -585,6 +586,7 @@ def annotated(_1=None, _2=None, _3=None, **kwargs): >>> @annotated([instance_of(int), instance_of(str)], instance_of(float)) ... def foo(x, y): ... return float(x) + float(y) + ... Parameters ---------- diff --git a/ibis/common/patterns.py b/ibis/common/patterns.py index 227890aa67fc..60fb6578764b 100644 --- a/ibis/common/patterns.py +++ b/ibis/common/patterns.py @@ -495,8 +495,8 @@ def namespace(cls, module) -> Namespace: >>> from ibis.common.patterns import Call >>> from ibis.expr.operations import Negate >>> - >>> c = Call.namespace('ibis.expr.operations') - >>> x = Variable('x') + >>> c = Call.namespace("ibis.expr.operations") + >>> x = Variable("x") >>> pattern = c.Negate(x) >>> pattern Call(func=, args=(Variable(name='x'),), kwargs=FrozenDict({})) @@ -847,13 +847,13 @@ class GenericInstanceOf(Slotted, Pattern): Examples -------- >>> class MyNumber(Generic[T_co]): - ... value: T_co + ... value: T_co ... - ... def __init__(self, value: T_co): - ... self.value = value + ... def __init__(self, value: T_co): + ... self.value = value ... - ... def __eq__(self, other): - ... return type(self) is type(other) and self.value == other.value + ... def __eq__(self, other): + ... return type(self) is type(other) and self.value == other.value ... >>> p = GenericInstanceOf(MyNumber[int]) >>> assert p.match(MyNumber(1), {}) == MyNumber(1) @@ -1770,6 +1770,7 @@ def pattern(obj: AnyType) -> Pattern: >>> @pattern ... def as_int(x, context): ... return int(x) + ... >>> >>> assert as_int.match(1, {}) == 1 @@ -1826,7 +1827,11 @@ def match( >>> assert match(1, 1, context={"x": 1}) == 1 >>> assert match(1, 2, context={"x": 1}) is NoMatch >>> assert match([1, int], [1, 2]) == [1, 2] - >>> assert match([1, int, "a" @ InstanceOf(str)], [1, 2, "three"]) == [1, 2, "three"] + >>> assert match([1, int, "a" @ InstanceOf(str)], [1, 2, "three"]) == [ + ... 1, + ... 2, + ... "three", + ... ] """ if context is None: context = {} diff --git a/ibis/common/typing.py b/ibis/common/typing.py index df70e7500a90..feac3b2d853e 100644 --- a/ibis/common/typing.py +++ b/ibis/common/typing.py @@ -94,12 +94,16 @@ def get_type_params(obj: Any) -> dict[str, type]: -------- >>> from typing import Dict, List >>> - >>> class MyList(List[T]): ... + >>> class MyList(List[T]): + ... ... + ... >>> >>> get_type_params(MyList[int]) {'T': } >>> - >>> class MyDict(Dict[T, U]): ... + >>> class MyDict(Dict[T, U]): + ... ... + ... >>> >>> get_type_params(MyDict[int, str]) {'T': , 'U': } @@ -135,18 +139,18 @@ def get_bound_typevars(obj: Any) -> dict[TypeVar, tuple[str, type]]: Examples -------- >>> class MyStruct(Generic[T, U]): - ... a: T - ... b: U + ... a: T + ... b: U ... >>> get_bound_typevars(MyStruct[int, str]) {~T: ('a', ), ~U: ('b', )} >>> >>> class MyStruct(Generic[T, U]): - ... a: T + ... a: T ... - ... @property - ... def myprop(self) -> U: - ... ... + ... @property + ... def myprop(self) -> U: + ... ... ... >>> get_bound_typevars(MyStruct[float, bytes]) {~T: ('a', ), ~U: ('myprop', )} @@ -183,7 +187,7 @@ def evaluate_annotations( Examples -------- - >>> annots = {'a': 'dict[str, float]', 'b': 'int'} + >>> annots = {"a": "dict[str, float]", "b": "int"} >>> evaluate_annotations(annots, __name__) {'a': dict[str, float], 'b': } """ diff --git a/ibis/expr/analysis.py b/ibis/expr/analysis.py index fdbaf6bc380f..2a97a99c5f4c 100644 --- a/ibis/expr/analysis.py +++ b/ibis/expr/analysis.py @@ -86,12 +86,12 @@ def find_immediate_parent_tables(input_node, keep_input=True): Examples -------- >>> import ibis, toolz - >>> t = ibis.table([('a', 'int64')], name='t') + >>> t = ibis.table([("a", "int64")], name="t") >>> expr = t.mutate(foo=t.a + 1) - >>> result, = find_immediate_parent_tables(expr.op()) + >>> (result,) = find_immediate_parent_tables(expr.op()) >>> result.equals(expr.op()) True - >>> result, = find_immediate_parent_tables(expr.op(), keep_input=False) + >>> (result,) = find_immediate_parent_tables(expr.op(), keep_input=False) >>> result.equals(t.op()) True """ @@ -590,8 +590,8 @@ def flatten_predicate(node): Examples -------- >>> import ibis - >>> t = ibis.table([('a', 'int64'), ('b', 'string')], name='t') - >>> filt = (t.a == 1) & (t.b == 'foo') + >>> t = ibis.table([("a", "int64"), ("b", "string")], name="t") + >>> filt = (t.a == 1) & (t.b == "foo") >>> predicates = flatten_predicate(filt.op()) >>> len(predicates) 2 diff --git a/ibis/expr/api.py b/ibis/expr/api.py index ef356aefc34e..c097fac74ab3 100644 --- a/ibis/expr/api.py +++ b/ibis/expr/api.py @@ -213,10 +213,10 @@ def param(type: dt.DataType) -> ir.Scalar: Examples -------- >>> import ibis - >>> start = ibis.param('date') - >>> end = ibis.param('date') - >>> schema = dict(timestamp_col='timestamp', value='double') - >>> t = ibis.table(schema, name='t') + >>> start = ibis.param("date") + >>> end = ibis.param("date") + >>> schema = dict(timestamp_col="timestamp", value="double") + >>> t = ibis.table(schema, name="t") >>> predicates = [t.timestamp_col >= start, t.timestamp_col <= end] >>> t.filter(predicates).value.sum() r0 := UnboundTable: t @@ -256,11 +256,8 @@ def schema( Examples -------- >>> from ibis import schema, Schema - >>> sc = schema([('foo', 'string'), - ... ('bar', 'int64'), - ... ('baz', 'boolean')]) - >>> sc = schema(names=['foo', 'bar', 'baz'], - ... types=['string', 'int64', 'boolean']) + >>> sc = schema([("foo", "string"), ("bar", "int64"), ("baz", "boolean")]) + >>> sc = schema(names=["foo", "bar", "baz"], types=["string", "int64", "boolean"]) >>> sc = schema(dict(foo="string")) >>> sc = schema(Schema(dict(foo="string"))) # no-op """ @@ -873,6 +870,7 @@ def read_csv( ... ''' >>> with open("/tmp/lines.csv", mode="w") as f: ... _ = f.write(lines) + ... >>> t = ibis.read_csv("/tmp/lines.csv") >>> t ┏━━━━━━━┳━━━━━━━━┓ @@ -928,6 +926,7 @@ def read_json( ... ''' >>> with open("/tmp/lines.json", mode="w") as f: ... _ = f.write(lines) + ... >>> t = ibis.read_json("/tmp/lines.json") >>> t ┏━━━━━━━┳━━━━━━━━┓ @@ -1070,7 +1069,9 @@ def set_backend(backend: str | BaseBackend) -> None: Or as a URI - >>> ibis.set_backend("postgres://user:password@hostname:5432") # quartodoc: +SKIP # doctest: +SKIP + >>> ibis.set_backend( + ... "postgres://user:password@hostname:5432" + ... ) # quartodoc: +SKIP # doctest: +SKIP Or as an existing backend instance diff --git a/ibis/expr/operations/analytic.py b/ibis/expr/operations/analytic.py index d56d5812ca9f..b2206943b513 100644 --- a/ibis/expr/operations/analytic.py +++ b/ibis/expr/operations/analytic.py @@ -63,10 +63,10 @@ class RowNumber(RankBase): Examples -------- >>> import ibis - >>> t = ibis.table([('values', dt.int64)]) + >>> t = ibis.table([("values", dt.int64)]) >>> w = ibis.window(order_by=t.values) >>> row_num = ibis.row_number().over(w) - >>> result = t[t.values, row_num.name('row_num')] + >>> result = t[t.values, row_num.name("row_num")] Returns ------- diff --git a/ibis/expr/operations/udf.py b/ibis/expr/operations/udf.py index ad3a90676169..6cf7c33f9594 100644 --- a/ibis/expr/operations/udf.py +++ b/ibis/expr/operations/udf.py @@ -106,7 +106,6 @@ def python( >>> @ibis.udf.scalar.python ... def add_one(x: int) -> int: ... return x + 1 - ... >>> expr = add_one(2) >>> con = ibis.connect("duckdb://") >>> con.execute(expr) @@ -158,7 +157,6 @@ def pandas( >>> @ibis.udf.scalar.pandas ... def add_one(x: int) -> int: ... return x + 1 - ... >>> expr = add_one(2) >>> con = ibis.connect(os.environ["SNOWFLAKE_URL"]) # doctest: +SKIP >>> con.execute(expr) # doctest: +SKIP @@ -211,7 +209,6 @@ def pyarrow( >>> @ibis.udf.scalar.pyarrow ... def add_one(x: int) -> int: ... return pc.add(x, 1) - ... >>> expr = add_one(2) >>> con = ibis.connect("duckdb://") >>> con.execute(expr) @@ -262,7 +259,6 @@ def builtin( >>> @ibis.udf.scalar.builtin ... def hamming(a: str, b: str) -> int: ... '''Compute the Hamming distance between two strings.''' - ... >>> expr = hamming("duck", "luck") >>> con = ibis.connect("duckdb://") >>> con.execute(expr) diff --git a/ibis/expr/types/arrays.py b/ibis/expr/types/arrays.py index 65c99c5ca8d8..835030aebec2 100644 --- a/ibis/expr/types/arrays.py +++ b/ibis/expr/types/arrays.py @@ -144,7 +144,7 @@ def concat(self, other: ArrayValue, *args: ArrayValue) -> ArrayValue: -------- >>> import ibis >>> ibis.options.interactive = True - >>> t = ibis.memtable({"a": [[7], [3] , None]}) + >>> t = ibis.memtable({"a": [[7], [3], None]}) >>> t ┏━━━━━━━━━━━━━━━━━━━━━━┓ ┃ a ┃ @@ -224,7 +224,7 @@ def repeat(self, n: int | ir.IntegerValue) -> ArrayValue: -------- >>> import ibis >>> ibis.options.interactive = True - >>> t = ibis.memtable({"a": [[7], [3] , None]}) + >>> t = ibis.memtable({"a": [[7], [3], None]}) >>> t ┏━━━━━━━━━━━━━━━━━━━━━━┓ ┃ a ┃ @@ -274,7 +274,7 @@ def unnest(self) -> ir.Value: -------- >>> import ibis >>> ibis.options.interactive = True - >>> t = ibis.memtable({"a": [[7, 42], [3, 3] , None]}) + >>> t = ibis.memtable({"a": [[7, 42], [3, 3], None]}) >>> t ┏━━━━━━━━━━━━━━━━━━━━━━┓ ┃ a ┃ @@ -765,7 +765,9 @@ def union(self, other: ir.ArrayValue) -> ir.ArrayValue: -------- >>> import ibis >>> ibis.options.interactive = True - >>> t = ibis.memtable({"arr1": [[3, 2], [], None], "arr2": [[1, 3], [None], [5]]}) + >>> t = ibis.memtable( + ... {"arr1": [[3, 2], [], None], "arr2": [[1, 3], [None], [5]]} + ... ) >>> t ┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┓ ┃ arr1 ┃ arr2 ┃ @@ -816,7 +818,9 @@ def intersect(self, other: ArrayValue) -> ArrayValue: -------- >>> import ibis >>> ibis.options.interactive = True - >>> t = ibis.memtable({"arr1": [[3, 2], [], None], "arr2": [[1, 3], [None], [5]]}) + >>> t = ibis.memtable( + ... {"arr1": [[3, 2], [], None], "arr2": [[1, 3], [None], [5]]} + ... ) >>> t ┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┓ ┃ arr1 ┃ arr2 ┃ @@ -860,7 +864,9 @@ def zip(self, other: ArrayValue, *others: ArrayValue) -> ArrayValue: -------- >>> import ibis >>> ibis.options.interactive = True - >>> t = ibis.memtable({"numbers": [[3, 2], [], None], "strings": [["a", "c"], None, ["e"]]}) + >>> t = ibis.memtable( + ... {"numbers": [[3, 2], [], None], "strings": [["a", "c"], None, ["e"]]} + ... ) >>> t ┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┓ ┃ numbers ┃ strings ┃ @@ -947,7 +953,7 @@ def array(values: Iterable[V], type: str | dt.DataType | None = None) -> ArrayVa >>> import ibis >>> ibis.options.interactive = True - >>> t = ibis.memtable({'a': [1, 2, 3], 'b': [4, 5, 6]}) + >>> t = ibis.memtable({"a": [1, 2, 3], "b": [4, 5, 6]}) >>> ibis.array([t.a, t.b]) ┏━━━━━━━━━━━━━━━━━━━━━━┓ ┃ ArrayColumn() ┃ diff --git a/ibis/expr/types/core.py b/ibis/expr/types/core.py index 64359c1c6837..6f3612cbfd8f 100644 --- a/ibis/expr/types/core.py +++ b/ibis/expr/types/core.py @@ -205,9 +205,9 @@ def pipe(self, f, *args: Any, **kwargs: Any) -> Expr: Examples -------- >>> import ibis - >>> t = ibis.table([('a', 'int64'), ('b', 'string')], name='t') - >>> f = lambda a: (a + 1).name('a') - >>> g = lambda a: (a * 2).name('a') + >>> t = ibis.table([("a", "int64"), ("b", "string")], name="t") + >>> f = lambda a: (a + 1).name("a") + >>> g = lambda a: (a * 2).name("a") >>> result1 = t.a.pipe(f).pipe(g) >>> result1 r0 := UnboundTable: t diff --git a/ibis/expr/types/generic.py b/ibis/expr/types/generic.py index 2f4de0896421..d5312cbea576 100644 --- a/ibis/expr/types/generic.py +++ b/ibis/expr/types/generic.py @@ -136,7 +136,9 @@ def try_cast(self, target_type: dt.DataType) -> Value: >>> import ibis >>> from ibis import _ >>> ibis.options.interactive = True - >>> t = ibis.memtable({"numbers": [1, 2, 3, 4], "strings": ["1.0", "2", "hello", "world"]}) + >>> t = ibis.memtable( + ... {"numbers": [1, 2, 3, 4], "strings": ["1.0", "2", "hello", "world"]} + ... ) >>> t ┏━━━━━━━━━┳━━━━━━━━━┓ ┃ numbers ┃ strings ┃ @@ -353,15 +355,15 @@ def isin(self, values: Value | Sequence[Value]) -> ir.BooleanValue: Check whether a column's values are contained in a sequence >>> import ibis - >>> table = ibis.table(dict(string_col='string'), name="t") - >>> table.string_col.isin(['foo', 'bar', 'baz']) + >>> table = ibis.table(dict(string_col="string"), name="t") + >>> table.string_col.isin(["foo", "bar", "baz"]) r0 := UnboundTable: t string_col string InValues(string_col): InValues(...) Check whether a column's values are contained in another table's column - >>> table2 = ibis.table(dict(other_string_col='string'), name="t2") + >>> table2 = ibis.table(dict(other_string_col="string"), name="t2") >>> table.string_col.isin(table2.other_string_col) r0 := UnboundTable: t string_col string @@ -627,13 +629,15 @@ def case(self) -> bl.SimpleCaseBuilder: Examples -------- >>> import ibis - >>> t = ibis.table([('string_col', 'string')], name='t') + >>> t = ibis.table([("string_col", "string")], name="t") >>> expr = t.string_col - >>> case_expr = (expr.case() - ... .when('a', 'an a') - ... .when('b', 'a b') - ... .else_('null or (not a and not b)') - ... .end()) + >>> case_expr = ( + ... expr.case() + ... .when("a", "an a") + ... .when("b", "a b") + ... .else_("null or (not a and not b)") + ... .end() + ... ) >>> case_expr r0 := UnboundTable: t string_col string @@ -1409,7 +1413,7 @@ def first(self, where: ir.BooleanValue | None = None) -> Value: └────────┘ >>> t.chars.first() 'a' - >>> t.chars.first(where=t.chars != 'a') + >>> t.chars.first(where=t.chars != "a") 'b' """ return ops.First(self, where=where).to_expr() @@ -1435,7 +1439,7 @@ def last(self, where: ir.BooleanValue | None = None) -> Value: └────────┘ >>> t.chars.last() 'd' - >>> t.chars.last(where=t.chars != 'd') + >>> t.chars.last(where=t.chars != "d") 'c' """ return ops.Last(self, where=where).to_expr() @@ -1661,13 +1665,13 @@ def literal(value: Any, type: dt.DataType | str | None = None) -> Scalar: Construct a `float64` literal from an `int` - >>> y = ibis.literal(42, type='double') + >>> y = ibis.literal(42, type="double") >>> y.type() Float64(nullable=True) Ibis checks for invalid types - >>> ibis.literal('foobar', type='int64') # quartodoc: +EXPECTED_FAILURE + >>> ibis.literal("foobar", type="int64") # quartodoc: +EXPECTED_FAILURE Traceback (most recent call last): ... TypeError: Value 'foobar' cannot be safely coerced to int64 diff --git a/ibis/expr/types/groupby.py b/ibis/expr/types/groupby.py index a5514f0d6056..199d6f789ed7 100644 --- a/ibis/expr/types/groupby.py +++ b/ibis/expr/types/groupby.py @@ -199,9 +199,12 @@ def mutate( │ … │ … │ … │ … │ … │ … │ └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴───┘ >>> ( - ... t.select("species", "bill_length_mm") - ... .group_by("species") - ... .mutate(centered_bill_len=ibis._.bill_length_mm - ibis._.bill_length_mm.mean()) + ... t.select("species", "bill_length_mm") + ... .group_by("species") + ... .mutate( + ... centered_bill_len=ibis._.bill_length_mm + ... - ibis._.bill_length_mm.mean() + ... ) ... ) ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓ ┃ species ┃ bill_length_mm ┃ centered_bill_len ┃ diff --git a/ibis/expr/types/maps.py b/ibis/expr/types/maps.py index d4efd1c7933e..990ca5626ab0 100644 --- a/ibis/expr/types/maps.py +++ b/ibis/expr/types/maps.py @@ -23,9 +23,14 @@ class MapValue(Value): >>> import ibis >>> ibis.options.interactive = True >>> import pyarrow as pa - >>> tab = pa.table({ - ... "m": pa.array([[("a", 1), ("b", 2)], [("a", 1)], None], - ... type=pa.map_(pa.utf8(), pa.int64()))}) + >>> tab = pa.table( + ... { + ... "m": pa.array( + ... [[("a", 1), ("b", 2)], [("a", 1)], None], + ... type=pa.map_(pa.utf8(), pa.int64()), + ... ) + ... } + ... ) >>> t = ibis.memtable(tab) >>> t ┏━━━━━━━━━━━━━━━━━━━━━━┓ @@ -39,7 +44,7 @@ class MapValue(Value): └──────────────────────┘ Can use `[]` to access values: - >>> t.m['a'] + >>> t.m["a"] ┏━━━━━━━━━━━━━━━━━━━━━━┓ ┃ MapGet(m, 'a', None) ┃ ┡━━━━━━━━━━━━━━━━━━━━━━┩ @@ -51,7 +56,7 @@ class MapValue(Value): └──────────────────────┘ To provide default values, use `get`: - >>> t.m.get('b', 0) + >>> t.m.get("b", 0) ┏━━━━━━━━━━━━━━━━━━━┓ ┃ MapGet(m, 'b', 0) ┃ ┡━━━━━━━━━━━━━━━━━━━┩ @@ -89,9 +94,14 @@ def get( >>> import ibis >>> import pyarrow as pa >>> ibis.options.interactive = True - >>> tab = pa.table({ - ... "m": pa.array([[("a", 1), ("b", 2)], [("a", 1)], None], - ... type=pa.map_(pa.utf8(), pa.int64()))}) + >>> tab = pa.table( + ... { + ... "m": pa.array( + ... [[("a", 1), ("b", 2)], [("a", 1)], None], + ... type=pa.map_(pa.utf8(), pa.int64()), + ... ) + ... } + ... ) >>> t = ibis.memtable(tab) >>> t ┏━━━━━━━━━━━━━━━━━━━━━━┓ @@ -150,9 +160,14 @@ def length(self) -> ir.IntegerValue: >>> import ibis >>> import pyarrow as pa >>> ibis.options.interactive = True - >>> tab = pa.table({ - ... "m": pa.array([[("a", 1), ("b", 2)], [("a", 1)], None], - ... type=pa.map_(pa.utf8(), pa.int64()))}) + >>> tab = pa.table( + ... { + ... "m": pa.array( + ... [[("a", 1), ("b", 2)], [("a", 1)], None], + ... type=pa.map_(pa.utf8(), pa.int64()), + ... ) + ... } + ... ) >>> t = ibis.memtable(tab) >>> t ┏━━━━━━━━━━━━━━━━━━━━━━┓ @@ -202,9 +217,14 @@ def __getitem__(self, key: ir.Value) -> ir.Value: >>> import ibis >>> import pyarrow as pa >>> ibis.options.interactive = True - >>> tab = pa.table({ - ... "m": pa.array([[("a", 1), ("b", 2)], [("a", 1)], None], - ... type=pa.map_(pa.utf8(), pa.int64()))}) + >>> tab = pa.table( + ... { + ... "m": pa.array( + ... [[("a", 1), ("b", 2)], [("a", 1)], None], + ... type=pa.map_(pa.utf8(), pa.int64()), + ... ) + ... } + ... ) >>> t = ibis.memtable(tab) >>> t ┏━━━━━━━━━━━━━━━━━━━━━━┓ @@ -249,9 +269,14 @@ def contains( >>> import ibis >>> import pyarrow as pa >>> ibis.options.interactive = True - >>> tab = pa.table({ - ... "m": pa.array([[("a", 1), ("b", 2)], [("a", 1)], None], - ... type=pa.map_(pa.utf8(), pa.int64()))}) + >>> tab = pa.table( + ... { + ... "m": pa.array( + ... [[("a", 1), ("b", 2)], [("a", 1)], None], + ... type=pa.map_(pa.utf8(), pa.int64()), + ... ) + ... } + ... ) >>> t = ibis.memtable(tab) >>> t ┏━━━━━━━━━━━━━━━━━━━━━━┓ @@ -289,9 +314,14 @@ def keys(self) -> ir.ArrayValue: >>> import ibis >>> import pyarrow as pa >>> ibis.options.interactive = True - >>> tab = pa.table({ - ... "m": pa.array([[("a", 1), ("b", 2)], [("a", 1)], None], - ... type=pa.map_(pa.utf8(), pa.int64()))}) + >>> tab = pa.table( + ... { + ... "m": pa.array( + ... [[("a", 1), ("b", 2)], [("a", 1)], None], + ... type=pa.map_(pa.utf8(), pa.int64()), + ... ) + ... } + ... ) >>> t = ibis.memtable(tab) >>> t ┏━━━━━━━━━━━━━━━━━━━━━━┓ @@ -431,7 +461,7 @@ def map( >>> import ibis >>> ibis.options.interactive = True - >>> t = ibis.memtable({'keys': [['a', 'b'], ['b']], 'values': [[1, 2], [3]]}) + >>> t = ibis.memtable({"keys": [["a", "b"], ["b"]], "values": [[1, 2], [3]]}) >>> t ┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┓ ┃ keys ┃ values ┃ diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index bd4212c97fce..9348da5b71bc 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -745,7 +745,12 @@ def group_by( >>> import ibis >>> from ibis import _ >>> ibis.options.interactive = True - >>> t = ibis.memtable({"fruit": ["apple", "apple", "banana", "orange"], "price": [0.5, 0.5, 0.25, 0.33]}) + >>> t = ibis.memtable( + ... { + ... "fruit": ["apple", "apple", "banana", "orange"], + ... "price": [0.5, 0.5, 0.25, 0.33], + ... } + ... ) >>> t ┏━━━━━━━━┳━━━━━━━━━┓ ┃ fruit ┃ price ┃ @@ -907,7 +912,12 @@ def aggregate( >>> import ibis >>> from ibis import _ >>> ibis.options.interactive = True - >>> t = ibis.memtable({"fruit": ["apple", "apple", "banana", "orange"], "price": [0.5, 0.5, 0.25, 0.33]}) + >>> t = ibis.memtable( + ... { + ... "fruit": ["apple", "apple", "banana", "orange"], + ... "price": [0.5, 0.5, 0.25, 0.33], + ... } + ... ) >>> t ┏━━━━━━━━┳━━━━━━━━━┓ ┃ fruit ┃ price ┃ @@ -919,7 +929,12 @@ def aggregate( │ banana │ 0.25 │ │ orange │ 0.33 │ └────────┴─────────┘ - >>> t.aggregate(by=["fruit"], total_cost=_.price.sum(), avg_cost=_.price.mean(), having=_.price.sum() < 0.5) + >>> t.aggregate( + ... by=["fruit"], + ... total_cost=_.price.sum(), + ... avg_cost=_.price.mean(), + ... having=_.price.sum() < 0.5, + ... ) ┏━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━┓ ┃ fruit ┃ total_cost ┃ avg_cost ┃ ┡━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━┩ @@ -1058,7 +1073,9 @@ def distinct( Drop all duplicated rows - >>> expr = t.distinct(on=["species", "island", "year", "bill_length_mm"], keep=None) + >>> expr = t.distinct( + ... on=["species", "island", "year", "bill_length_mm"], keep=None + ... ) >>> expr.count() 273 >>> t.count() @@ -1481,7 +1498,9 @@ def mutate( >>> import ibis.selectors as s >>> from ibis import _ >>> ibis.options.interactive = True - >>> t = ibis.examples.penguins.fetch().select("species", "year", "bill_length_mm") + >>> t = ibis.examples.penguins.fetch().select( + ... "species", "year", "bill_length_mm" + ... ) >>> t ┏━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━┓ ┃ species ┃ year ┃ bill_length_mm ┃ @@ -1518,7 +1537,9 @@ def mutate( Add a new column based on an aggregation. Note the automatic broadcasting. - >>> t.select("species", bill_demean=_.bill_length_mm - _.bill_length_mm.mean()).head() + >>> t.select( + ... "species", bill_demean=_.bill_length_mm - _.bill_length_mm.mean() + ... ).head() ┏━━━━━━━━━┳━━━━━━━━━━━━━┓ ┃ species ┃ bill_demean ┃ ┡━━━━━━━━━╇━━━━━━━━━━━━━┩ @@ -2084,7 +2105,9 @@ def filter( │ Adelie │ Torgersen │ 42.0 │ 20.2 │ 190 │ … │ │ … │ … │ … │ … │ … │ … │ └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴───┘ - >>> t.filter([t.species == "Adelie", t.body_mass_g > 3500]).sex.value_counts().dropna("sex") + >>> t.filter( + ... [t.species == "Adelie", t.body_mass_g > 3500] + ... ).sex.value_counts().dropna("sex") ┏━━━━━━━━┳━━━━━━━━━━━┓ ┃ sex ┃ sex_count ┃ ┡━━━━━━━━╇━━━━━━━━━━━┩ @@ -2357,6 +2380,7 @@ def unpack(self, *columns: str) -> Table: ... ''' >>> with open("/tmp/lines.json", "w") as f: ... _ = f.write(lines) + ... >>> t = ibis.read_json("/tmp/lines.json") >>> t ┏━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ @@ -2934,6 +2958,7 @@ def cache(self) -> Table: >>> with t.mutate(computation="Heavy Computation").cache() as cached_penguins: ... cached_penguins + ... ┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━┓ ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ … ┃ ┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━┩ @@ -3024,7 +3049,9 @@ def pivot_longer( Here we convert column names not matching the selector for the `religion` column and convert those names into values - >>> relig_income.pivot_longer(~s.c("religion"), names_to="income", values_to="count") + >>> relig_income.pivot_longer( + ... ~s.c("religion"), names_to="income", values_to="count" + ... ) ┏━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┓ ┃ religion ┃ income ┃ count ┃ ┡━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━┩ @@ -3181,7 +3208,12 @@ def pivot_longer( ... names_pattern="new_?(.*)_(.)(.*)", ... names_transform=dict( ... gender={"m": 1, "f": 2}.get, - ... age=dict(zip(["014", "1524", "2534", "3544", "4554", "5564", "65"], range(7))).get, + ... age=dict( + ... zip( + ... ["014", "1524", "2534", "3544", "4554", "5564", "65"], + ... range(7), + ... ) + ... ).get, ... ), ... values_to="count", ... ) @@ -3216,7 +3248,9 @@ def pivot_longer( `names_transform` must be a mapping or callable - >>> who.pivot_longer(s.r["new_sp_m014":"newrel_f65"], names_transform="upper") # quartodoc: +EXPECTED_FAILURE + >>> who.pivot_longer( + ... s.r["new_sp_m014":"newrel_f65"], names_transform="upper" + ... ) # quartodoc: +EXPECTED_FAILURE Traceback (most recent call last): ... ibis.common.exceptions.IbisTypeError: ... Got @@ -3382,7 +3416,9 @@ def pivot_wider( Fill missing pivoted values using `values_fill` - >>> fish_encounters.pivot_wider(names_from="station", values_from="seen", values_fill=0) + >>> fish_encounters.pivot_wider( + ... names_from="station", values_from="seen", values_fill=0 + ... ) ┏━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━┓ ┃ fish ┃ Release ┃ I80_1 ┃ Lisbon ┃ Rstr ┃ Base_TD ┃ BCE ┃ BCW ┃ … ┃ ┡━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━┩ @@ -3422,7 +3458,9 @@ def pivot_wider( │ 06 │ California │ rent │ 1358 │ 3 │ │ … │ … │ … │ … │ … │ └────────┴────────────┴──────────┴──────────┴───────┘ - >>> us_rent_income.pivot_wider(names_from="variable", values_from=["estimate", "moe"]) + >>> us_rent_income.pivot_wider( + ... names_from="variable", values_from=["estimate", "moe"] + ... ) ┏━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━┓ ┃ geoid ┃ name ┃ estimate_income ┃ moe_income ┃ … ┃ ┡━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━┩ @@ -3468,7 +3506,9 @@ def pivot_wider( Supply an alternative function to summarize values - >>> warpbreaks = ibis.examples.warpbreaks.fetch().select("wool", "tension", "breaks") + >>> warpbreaks = ibis.examples.warpbreaks.fetch().select( + ... "wool", "tension", "breaks" + ... ) >>> warpbreaks ┏━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┓ ┃ wool ┃ tension ┃ breaks ┃ @@ -3487,7 +3527,9 @@ def pivot_wider( │ A │ M │ 18 │ │ … │ … │ … │ └────────┴─────────┴────────┘ - >>> warpbreaks.pivot_wider(names_from="wool", values_from="breaks", values_agg="mean") + >>> warpbreaks.pivot_wider( + ... names_from="wool", values_from="breaks", values_agg="mean" + ... ) ┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┓ ┃ tension ┃ A ┃ B ┃ ┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━┩ diff --git a/ibis/expr/types/strings.py b/ibis/expr/types/strings.py index 6086d109c423..53178661ae24 100644 --- a/ibis/expr/types/strings.py +++ b/ibis/expr/types/strings.py @@ -619,8 +619,8 @@ def translate(self, from_str: StringValue, to_str: StringValue) -> StringValue: Examples -------- >>> import ibis - >>> table = ibis.table(dict(string_col='string')) - >>> result = table.string_col.translate('a', 'b') + >>> table = ibis.table(dict(string_col="string")) + >>> result = table.string_col.translate("a", "b") """ return ops.Translate(self, from_str, to_str).to_expr() @@ -772,8 +772,8 @@ def find_in_set(self, str_list: Sequence[str]) -> ir.IntegerValue: Examples -------- >>> import ibis - >>> table = ibis.table(dict(string_col='string')) - >>> result = table.string_col.find_in_set(['a', 'b']) + >>> table = ibis.table(dict(string_col="string")) + >>> result = table.string_col.find_in_set(["a", "b"]) """ return ops.FindInSet(self, str_list).to_expr() @@ -1254,7 +1254,9 @@ def file(self): Examples -------- >>> import ibis - >>> url = ibis.literal("https://example.com:80/docs/books/tutorial/index.html?name=networking") + >>> url = ibis.literal( + ... "https://example.com:80/docs/books/tutorial/index.html?name=networking" + ... ) >>> result = url.file() # docs/books/tutorial/index.html?name=networking Returns @@ -1270,7 +1272,9 @@ def path(self): Examples -------- >>> import ibis - >>> url = ibis.literal("https://example.com:80/docs/books/tutorial/index.html?name=networking") + >>> url = ibis.literal( + ... "https://example.com:80/docs/books/tutorial/index.html?name=networking" + ... ) >>> result = url.path() # docs/books/tutorial/index.html Returns @@ -1294,9 +1298,11 @@ def query(self, key: str | StringValue | None = None): Examples -------- >>> import ibis - >>> url = ibis.literal("https://example.com:80/docs/books/tutorial/index.html?name=networking") + >>> url = ibis.literal( + ... "https://example.com:80/docs/books/tutorial/index.html?name=networking" + ... ) >>> result = url.query() # name=networking - >>> query_name = url.query('name') # networking + >>> query_name = url.query("name") # networking Returns ------- diff --git a/ibis/expr/types/structs.py b/ibis/expr/types/structs.py index 9f3487dd25b6..5cd25acbcd66 100644 --- a/ibis/expr/types/structs.py +++ b/ibis/expr/types/structs.py @@ -48,18 +48,18 @@ def struct( -------- Create a struct literal from a [](`dict`) with the type inferred >>> import ibis - >>> t = ibis.struct(dict(a=1, b='foo')) + >>> t = ibis.struct(dict(a=1, b="foo")) Create a struct literal from a [](`dict`) with a specified type - >>> t = ibis.struct(dict(a=1, b='foo'), type='struct') + >>> t = ibis.struct(dict(a=1, b="foo"), type="struct") Specify a specific type for the struct literal - >>> t = ibis.struct(dict(a=1, b=40), type='struct') + >>> t = ibis.struct(dict(a=1, b=40), type="struct") Create a struct array from multiple arrays >>> ibis.options.interactive = True - >>> t = ibis.memtable({'a': [1, 2, 3], 'b': ['foo', 'bar', 'baz']}) - >>> ibis.struct([('a', t.a), ('b', t.b)]) + >>> t = ibis.memtable({"a": [1, 2, 3], "b": ["foo", "bar", "baz"]}) + >>> ibis.struct([("a", t.a), ("b", t.b)]) ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ StructColumn() ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ @@ -71,7 +71,7 @@ def struct( └─────────────────────────────┘ Create a struct array from columns and literals - >>> ibis.struct([('a', t.a), ('b', 'foo')]) + >>> ibis.struct([("a", t.a), ("b", "foo")]) ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ StructColumn() ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ @@ -103,7 +103,7 @@ class StructValue(Value): -------- >>> import ibis >>> ibis.options.interactive = True - >>> t = ibis.memtable({'s': [{'a': 1, 'b': 'foo'}, {'a': 3, 'b': None}, None]}) + >>> t = ibis.memtable({"s": [{"a": 1, "b": "foo"}, {"a": 3, "b": None}, None]}) >>> t ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ s ┃ @@ -127,7 +127,7 @@ class StructValue(Value): │ 3 │ │ NULL │ └───────┘ - >>> t.s['a'] + >>> t.s["a"] ┏━━━━━━━┓ ┃ a ┃ ┡━━━━━━━┩ @@ -166,7 +166,7 @@ def __getitem__(self, name: str) -> ir.Value: -------- >>> import ibis >>> ibis.options.interactive = True - >>> t = ibis.memtable({'s': [{'a': 1, 'b': 'foo'}, {'a': 3, 'b': None}, None]}) + >>> t = ibis.memtable({"s": [{"a": 1, "b": "foo"}, {"a": 3, "b": None}, None]}) >>> t ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ s ┃ @@ -177,7 +177,7 @@ def __getitem__(self, name: str) -> ir.Value: │ {'a': 3, 'b': None} │ │ NULL │ └─────────────────────────────┘ - >>> t.s['a'] + >>> t.s["a"] ┏━━━━━━━┓ ┃ a ┃ ┡━━━━━━━┩ @@ -187,7 +187,7 @@ def __getitem__(self, name: str) -> ir.Value: │ 3 │ │ NULL │ └───────┘ - >>> t.s['b'] + >>> t.s["b"] ┏━━━━━━━━┓ ┃ b ┃ ┡━━━━━━━━┩ @@ -197,7 +197,7 @@ def __getitem__(self, name: str) -> ir.Value: │ NULL │ │ NULL │ └────────┘ - >>> t.s['foo_bar'] + >>> t.s["foo_bar"] Traceback (most recent call last): ... KeyError: 'foo_bar' @@ -226,7 +226,7 @@ def __getattr__(self, name: str) -> ir.Value: -------- >>> import ibis >>> ibis.options.interactive = True - >>> t = ibis.memtable({'s': [{'a': 1, 'b': 'foo'}, {'a': 3, 'b': None}, None]}) + >>> t = ibis.memtable({"s": [{"a": 1, "b": "foo"}, {"a": 3, "b": None}, None]}) >>> t ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ s ┃ @@ -353,7 +353,7 @@ def destructure(self) -> list[ir.Value]: -------- >>> import ibis >>> ibis.options.interactive = True - >>> t = ibis.memtable({'s': [{'a': 1, 'b': 'foo'}, {'a': 3, 'b': None}, None]}) + >>> t = ibis.memtable({"s": [{"a": 1, "b": "foo"}, {"a": 3, "b": None}, None]}) >>> t ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ s ┃ diff --git a/ibis/legacy/udf/vectorized.py b/ibis/legacy/udf/vectorized.py index 0bfca24c60b8..6164e4377784 100644 --- a/ibis/legacy/udf/vectorized.py +++ b/ibis/legacy/udf/vectorized.py @@ -134,22 +134,32 @@ def _coerce_to_dataframe( Examples -------- >>> import pandas as pd - >>> _coerce_to_dataframe(pd.DataFrame({'a': [1, 2, 3]}), dt.Struct(dict(b="int32"))) # noqa: E501 + >>> _coerce_to_dataframe( + ... pd.DataFrame({"a": [1, 2, 3]}), dt.Struct(dict(b="int32")) + ... ) # noqa: E501 b 0 1 1 2 2 3 - >>> _coerce_to_dataframe(pd.Series([[1, 2, 3]]), dt.Struct(dict.fromkeys('abc', 'int32'))) # noqa: E501 + >>> _coerce_to_dataframe( + ... pd.Series([[1, 2, 3]]), dt.Struct(dict.fromkeys("abc", "int32")) + ... ) # noqa: E501 a b c 0 1 2 3 - >>> _coerce_to_dataframe(pd.Series([range(3), range(3)]), dt.Struct(dict.fromkeys('abc', 'int32'))) # noqa: E501 + >>> _coerce_to_dataframe( + ... pd.Series([range(3), range(3)]), dt.Struct(dict.fromkeys("abc", "int32")) + ... ) # noqa: E501 a b c 0 0 1 2 1 0 1 2 - >>> _coerce_to_dataframe([pd.Series(x) for x in [1, 2, 3]], dt.Struct(dict.fromkeys('abc', 'int32'))) # noqa: E501 + >>> _coerce_to_dataframe( + ... [pd.Series(x) for x in [1, 2, 3]], dt.Struct(dict.fromkeys("abc", "int32")) + ... ) # noqa: E501 a b c 0 1 2 3 - >>> _coerce_to_dataframe([1, 2, 3], dt.Struct(dict.fromkeys('abc', 'int32'))) # noqa: E501 + >>> _coerce_to_dataframe( + ... [1, 2, 3], dt.Struct(dict.fromkeys("abc", "int32")) + ... ) # noqa: E501 a b c 0 1 2 3 """ @@ -277,6 +287,7 @@ def analytic(input_type, output_type): >>> @analytic(input_type=[dt.double], output_type=dt.double) ... def zscore(series): # note the use of aggregate functions ... return (series - series.mean()) / series.std() + ... Define and use an UDF with multiple return columns: @@ -289,10 +300,10 @@ def analytic(input_type, output_type): ... std = v.std() ... return v - mean, (v - mean) / std >>> - >>> win = ibis.window(preceding=None, following=None, group_by='key') + >>> win = ibis.window(preceding=None, following=None, group_by="key") >>> # add two columns "demean" and "zscore" >>> table = table.mutate( # quartodoc: +SKIP # doctest: +SKIP - ... demean_and_zscore(table['v']).over(win).destructure() + ... demean_and_zscore(table["v"]).over(win).destructure() ... ) """ return _udf_decorator(AnalyticVectorizedUDF, input_type, output_type) @@ -318,24 +329,28 @@ def elementwise(input_type, output_type): >>> @elementwise(input_type=[dt.string], output_type=dt.int64) ... def my_string_length(series): ... return series.str.len() * 2 + ... Define an UDF with non-column parameters: >>> @elementwise(input_type=[dt.string], output_type=dt.int64) ... def my_string_length(series, *, times): ... return series.str.len() * times + ... Define and use an UDF with multiple return columns: >>> @elementwise( ... input_type=[dt.string], - ... output_type=dt.Struct(dict(year=dt.string, monthday=dt.string)) + ... output_type=dt.Struct(dict(year=dt.string, monthday=dt.string)), ... ) ... def year_monthday(date): ... return date.str.slice(0, 4), date.str.slice(4, 8) >>> >>> # add two columns "year" and "monthday" - >>> table = table.mutate(year_monthday(table['date']).destructure()) # quartodoc: +SKIP # doctest: +SKIP + >>> table = table.mutate( + ... year_monthday(table["date"]).destructure() + ... ) # quartodoc: +SKIP # doctest: +SKIP """ return _udf_decorator(ElementWiseVectorizedUDF, input_type, output_type) @@ -360,19 +375,20 @@ def reduction(input_type, output_type): >>> @reduction(input_type=[dt.string], output_type=dt.int64) ... def my_string_length_agg(series, **kwargs): ... return (series.str.len() * 2).sum() + ... Define and use an UDF with multiple return columns: >>> @reduction( ... input_type=[dt.double], - ... output_type=dt.Struct(dict(mean="double", std="double")) + ... output_type=dt.Struct(dict(mean="double", std="double")), ... ) ... def mean_and_std(v): ... return v.mean(), v.std() >>> >>> # create aggregation columns "mean" and "std" - >>> table = table.group_by('key').aggregate( # quartodoc: +SKIP # doctest: +SKIP - ... mean_and_std(table['v']).destructure() + >>> table = table.group_by("key").aggregate( # quartodoc: +SKIP # doctest: +SKIP + ... mean_and_std(table["v"]).destructure() ... ) """ return _udf_decorator(ReductionVectorizedUDF, input_type, output_type) diff --git a/ibis/selectors.py b/ibis/selectors.py index 721438671bfb..170fa22aac79 100644 --- a/ibis/selectors.py +++ b/ibis/selectors.py @@ -32,7 +32,8 @@ >>> expr = t.select( ... [ -... t[c] for c in t.columns +... t[c] +... for c in t.columns ... if t[c].type().is_numeric() or t[c].type().is_string() ... if ("a" in c or "b" in c or "cd" in c) ... ] @@ -213,7 +214,9 @@ def of_type(dtype: dt.DataType | str | type[dt.DataType]) -> Predicate: >>> import ibis >>> import ibis.expr.datatypes as dt >>> import ibis.selectors as s - >>> t = ibis.table(dict(name="string", siblings="array", parents="array")) + >>> t = ibis.table( + ... dict(name="string", siblings="array", parents="array") + ... ) >>> expr = t.select(s.of_type(dt.Array(dt.string))) >>> expr.columns ['siblings'] @@ -327,7 +330,11 @@ def contains( >>> import ibis >>> import ibis.selectors as s - >>> t = ibis.table(dict(a="int64", b="string", c="float", d="array", ab="struct")) + >>> t = ibis.table( + ... dict( + ... a="int64", b="string", c="float", d="array", ab="struct" + ... ) + ... ) >>> expr = t.select(s.contains(("a", "b"))) >>> expr.columns ['a', 'b', 'ab'] @@ -467,11 +474,7 @@ def across( >>> from ibis import _, selectors as s >>> t = ibis.examples.penguins.fetch() >>> t.select(s.startswith("bill")).mutate( - ... s.across( - ... s.numeric(), - ... dict(centered =_ - _.mean()), - ... names = "{fn}_{col}" - ... ) + ... s.across(s.numeric(), dict(centered=_ - _.mean()), names="{fn}_{col}") ... ) ┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━┓ ┃ bill_length_mm ┃ bill_depth_mm ┃ centered_bill_length_mm ┃ … ┃ diff --git a/ibis/util.py b/ibis/util.py index 315799f45006..f623eb397d45 100644 --- a/ibis/util.py +++ b/ibis/util.py @@ -205,11 +205,11 @@ def is_iterable(o: Any) -> bool: Examples -------- - >>> is_iterable('1') + >>> is_iterable("1") False - >>> is_iterable(b'1') + >>> is_iterable(b"1") False - >>> is_iterable(iter('1')) + >>> is_iterable(iter("1")) True >>> is_iterable(i for i in range(1)) True @@ -254,17 +254,17 @@ def convert_unit(value, unit, to, floor: bool = True): Examples -------- >>> one_second = 1000 - >>> x = convert_unit(one_second, 'ms', 's') + >>> x = convert_unit(one_second, "ms", "s") >>> x 1 >>> one_second = 1 - >>> x = convert_unit(one_second, 's', 'ms') + >>> x = convert_unit(one_second, "s", "ms") >>> x 1000 - >>> x = convert_unit(one_second, 's', 's') + >>> x = convert_unit(one_second, "s", "s") >>> x 1 - >>> x = convert_unit(one_second, 's', 'M') + >>> x = convert_unit(one_second, "s", "M") Traceback (most recent call last): ... ValueError: Cannot convert to or from unit ... to unit ...