feat(python): Allow empty select/with_columns/groupby (#9205)

pola-rs · Jun 4, 2023 · f800164 · f800164
1 parent ac728ce
commit f800164
Show file tree

Hide file tree

Showing 7 changed files with 164 additions and 155 deletions.
diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py
@@ -2031,9 +2031,6 @@ def select(
         └───────────┘
 
         """
-        if exprs is None and not named_exprs:
-            raise ValueError("Expected at least one of 'exprs' or '**named_exprs'")
-
         structify = bool(int(os.environ.get("POLARS_AUTO_STRUCTIFY", 0)))
 
         exprs = parse_as_list_of_expressions(
@@ -3088,9 +3085,6 @@ def with_columns(
         └─────┴──────┴─────────────┘
 
         """
-        if exprs is None and not named_exprs:
-            raise ValueError("Expected at least one of 'exprs' or '**named_exprs'")
-
         structify = bool(int(os.environ.get("POLARS_AUTO_STRUCTIFY", 0)))
 
         exprs = parse_as_list_of_expressions(

diff --git a/py-polars/polars/lazyframe/groupby.py b/py-polars/polars/lazyframe/groupby.py
@@ -120,8 +120,6 @@ def agg(
             raise ValueError(
                 f"'aggs' argument should be one or multiple expressions, got: '{aggs}'."
             )
-        if aggs is None and not named_aggs:
-            raise ValueError("Expected at least one of 'aggs' or '**named_aggs'")
 
         exprs = parse_as_list_of_expressions(aggs, *more_aggs, **named_aggs)
 

diff --git a/py-polars/tests/unit/operations/test_groupby.py b/py-polars/tests/unit/operations/test_groupby.py
@@ -132,6 +132,13 @@ def test_groupby_args() -> None:
     assert df.groupby("a").agg(q="b", r="c").columns == ["a", "q", "r"]
 
 
+def test_groupby_empty() -> None:
+    df = pl.DataFrame({"a": [1, 1, 2]})
+    result = df.groupby("a").agg()
+    expected = pl.DataFrame({"a": [1, 2]})
+    assert_frame_equal(result, expected, check_row_order=False)
+
+
 def test_groupby_iteration() -> None:
     df = pl.DataFrame(
         {

diff --git a/py-polars/tests/unit/operations/test_select.py b/py-polars/tests/unit/operations/test_select.py
@@ -37,6 +37,12 @@ def test_select_args_kwargs() -> None:
     assert_frame_equal(result, expected)
 
 
+def test_select_empty() -> None:
+    result = pl.select()
+    expected = pl.DataFrame()
+    assert_frame_equal(result, expected)
+
+
 def test_select_empty_list() -> None:
     result = pl.select([])
     expected = pl.DataFrame()

diff --git a/py-polars/tests/unit/operations/test_with_columns.py b/py-polars/tests/unit/operations/test_with_columns.py
@@ -0,0 +1,151 @@
+import polars as pl
+from polars.testing import assert_frame_equal
+
+
+def test_with_columns() -> None:
+    import datetime
+
+    df = pl.DataFrame(
+        {
+            "a": [1, 2, 3, 4],
+            "b": [0.5, 4, 10, 13],
+            "c": [True, True, False, True],
+        }
+    )
+    srs_named = pl.Series("f", [3, 2, 1, 0])
+    srs_unnamed = pl.Series(values=[3, 2, 1, 0])
+
+    expected = pl.DataFrame(
+        {
+            "a": [1, 2, 3, 4],
+            "b": [0.5, 4, 10, 13],
+            "c": [True, True, False, True],
+            "d": [0.5, 8.0, 30.0, 52.0],
+            "e": [False, False, True, False],
+            "f": [3, 2, 1, 0],
+            "g": True,
+            "h": pl.Series(values=[1, 1, 1, 1], dtype=pl.Int32),
+            "i": 3.2,
+            "j": [1, 2, 3, 4],
+            "k": pl.Series(values=[None, None, None, None], dtype=pl.Null),
+            "l": datetime.datetime(2001, 1, 1, 0, 0),
+        }
+    )
+
+    # as exprs list
+    dx = df.with_columns(
+        [
+            (pl.col("a") * pl.col("b")).alias("d"),
+            ~pl.col("c").alias("e"),
+            srs_named,
+            pl.lit(True).alias("g"),
+            pl.lit(1).alias("h"),
+            pl.lit(3.2).alias("i"),
+            pl.col("a").alias("j"),
+            pl.lit(None).alias("k"),
+            pl.lit(datetime.datetime(2001, 1, 1, 0, 0)).alias("l"),
+        ]
+    )
+    assert_frame_equal(dx, expected)
+
+    # as positional arguments
+    dx = df.with_columns(
+        (pl.col("a") * pl.col("b")).alias("d"),
+        ~pl.col("c").alias("e"),
+        srs_named,
+        pl.lit(True).alias("g"),
+        pl.lit(1).alias("h"),
+        pl.lit(3.2).alias("i"),
+        pl.col("a").alias("j"),
+        pl.lit(None).alias("k"),
+        pl.lit(datetime.datetime(2001, 1, 1, 0, 0)).alias("l"),
+    )
+    assert_frame_equal(dx, expected)
+
+    # as keyword arguments
+    dx = df.with_columns(
+        d=pl.col("a") * pl.col("b"),
+        e=~pl.col("c"),
+        f=srs_unnamed,
+        g=True,
+        h=1,
+        i=3.2,
+        j="a",  # Note: string interpreted as column name, resolves to `pl.col("a")`
+        k=None,
+        l=datetime.datetime(2001, 1, 1, 0, 0),
+    )
+    assert_frame_equal(dx, expected)
+
+    # mixed
+    dx = df.with_columns(
+        [(pl.col("a") * pl.col("b")).alias("d")],
+        ~pl.col("c").alias("e"),
+        f=srs_unnamed,
+        g=True,
+        h=1,
+        i=3.2,
+        j="a",  # Note: string interpreted as column name, resolves to `pl.col("a")`
+        k=None,
+        l=datetime.datetime(2001, 1, 1, 0, 0),
+    )
+    assert_frame_equal(dx, expected)
+
+    # automatically upconvert multi-output expressions to struct
+    with pl.Config() as cfg:
+        cfg.set_auto_structify(True)
+
+        ldf = (
+            pl.DataFrame({"x1": [1, 2, 6], "x2": [1, 2, 3]})
+            .lazy()
+            .with_columns(
+                pl.col(["x1", "x2"]).pct_change().alias("pct_change"),
+                maxes=pl.all().max().suffix("_max"),
+                xcols=pl.col("^x.*$"),
+            )
+        )
+        # ┌─────┬─────┬─────────────┬───────────┬───────────┐
+        # │ x1  ┆ x2  ┆ pct_change  ┆ maxes     ┆ xcols     │
+        # │ --- ┆ --- ┆ ---         ┆ ---       ┆ ---       │
+        # │ i64 ┆ i64 ┆ struct[2]   ┆ struct[2] ┆ struct[2] │
+        # ╞═════╪═════╪═════════════╪═══════════╪═══════════╡
+        # │ 1   ┆ 1   ┆ {null,null} ┆ {6,3}     ┆ {1,1}     │
+        # │ 2   ┆ 2   ┆ {1.0,1.0}   ┆ {6,3}     ┆ {2,2}     │
+        # │ 6   ┆ 3   ┆ {2.0,0.5}   ┆ {6,3}     ┆ {6,3}     │
+        # └─────┴─────┴─────────────┴───────────┴───────────┘
+        assert ldf.collect().to_dicts() == [
+            {
+                "x1": 1,
+                "x2": 1,
+                "pct_change": {"x1": None, "x2": None},
+                "maxes": {"x1_max": 6, "x2_max": 3},
+                "xcols": {"x1": 1, "x2": 1},
+            },
+            {
+                "x1": 2,
+                "x2": 2,
+                "pct_change": {"x1": 1.0, "x2": 1.0},
+                "maxes": {"x1_max": 6, "x2_max": 3},
+                "xcols": {"x1": 2, "x2": 2},
+            },
+            {
+                "x1": 6,
+                "x2": 3,
+                "pct_change": {"x1": 2.0, "x2": 0.5},
+                "maxes": {"x1_max": 6, "x2_max": 3},
+                "xcols": {"x1": 6, "x2": 3},
+            },
+        ]
+
+
+def test_with_columns_empty() -> None:
+    df = pl.DataFrame({"a": [1, 2]})
+    result = df.with_columns()
+    assert_frame_equal(result, df)
+
+
+def test_with_columns_single_series() -> None:
+    ldf = pl.LazyFrame({"a": [1, 2]})
+    result = ldf.with_columns(pl.Series("b", [3, 4]))
+
+    expected = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
+    assert_frame_equal(result.collect(), expected)
diff --git a/py-polars/tests/unit/test_df.py b/py-polars/tests/unit/test_df.py
@@ -3111,145 +3111,6 @@ def test_unique_on_sorted() -> None:
     ).to_dict(False) == {"a": [1, 3], "b": [2, 3]}
 
 
-def test_with_columns() -> None:
-    import datetime
-
-    df = pl.DataFrame(
-        {
-            "a": [1, 2, 3, 4],
-            "b": [0.5, 4, 10, 13],
-            "c": [True, True, False, True],
-        }
-    )
-    srs_named = pl.Series("f", [3, 2, 1, 0])
-    srs_unnamed = pl.Series(values=[3, 2, 1, 0])
-
-    expected = pl.DataFrame(
-        {
-            "a": [1, 2, 3, 4],
-            "b": [0.5, 4, 10, 13],
-            "c": [True, True, False, True],
-            "d": [0.5, 8.0, 30.0, 52.0],
-            "e": [False, False, True, False],
-            "f": [3, 2, 1, 0],
-            "g": True,
-            "h": pl.Series(values=[1, 1, 1, 1], dtype=pl.Int32),
-            "i": 3.2,
-            "j": [1, 2, 3, 4],
-            "k": pl.Series(values=[None, None, None, None], dtype=pl.Null),
-            "l": datetime.datetime(2001, 1, 1, 0, 0),
-        }
-    )
-
-    # as exprs list
-    dx = df.with_columns(
-        [
-            (pl.col("a") * pl.col("b")).alias("d"),
-            ~pl.col("c").alias("e"),
-            srs_named,
-            pl.lit(True).alias("g"),
-            pl.lit(1).alias("h"),
-            pl.lit(3.2).alias("i"),
-            pl.col("a").alias("j"),
-            pl.lit(None).alias("k"),
-            pl.lit(datetime.datetime(2001, 1, 1, 0, 0)).alias("l"),
-        ]
-    )
-    assert_frame_equal(dx, expected)
-
-    # as positional arguments
-    dx = df.with_columns(
-        (pl.col("a") * pl.col("b")).alias("d"),
-        ~pl.col("c").alias("e"),
-        srs_named,
-        pl.lit(True).alias("g"),
-        pl.lit(1).alias("h"),
-        pl.lit(3.2).alias("i"),
-        pl.col("a").alias("j"),
-        pl.lit(None).alias("k"),
-        pl.lit(datetime.datetime(2001, 1, 1, 0, 0)).alias("l"),
-    )
-    assert_frame_equal(dx, expected)
-
-    # as keyword arguments
-    dx = df.with_columns(
-        d=pl.col("a") * pl.col("b"),
-        e=~pl.col("c"),
-        f=srs_unnamed,
-        g=True,
-        h=1,
-        i=3.2,
-        j="a",  # Note: string interpreted as column name, resolves to `pl.col("a")`
-        k=None,
-        l=datetime.datetime(2001, 1, 1, 0, 0),
-    )
-    assert_frame_equal(dx, expected)
-
-    # mixed
-    dx = df.with_columns(
-        [(pl.col("a") * pl.col("b")).alias("d")],
-        ~pl.col("c").alias("e"),
-        f=srs_unnamed,
-        g=True,
-        h=1,
-        i=3.2,
-        j="a",  # Note: string interpreted as column name, resolves to `pl.col("a")`
-        k=None,
-        l=datetime.datetime(2001, 1, 1, 0, 0),
-    )
-    assert_frame_equal(dx, expected)
-
-    # automatically upconvert multi-output expressions to struct
-    with pl.Config() as cfg:
-        cfg.set_auto_structify(True)
-
-        ldf = (
-            pl.DataFrame({"x1": [1, 2, 6], "x2": [1, 2, 3]})
-            .lazy()
-            .with_columns(
-                pl.col(["x1", "x2"]).pct_change().alias("pct_change"),
-                maxes=pl.all().max().suffix("_max"),
-                xcols=pl.col("^x.*$"),
-            )
-        )
-        # ┌─────┬─────┬─────────────┬───────────┬───────────┐
-        # │ x1  ┆ x2  ┆ pct_change  ┆ maxes     ┆ xcols     │
-        # │ --- ┆ --- ┆ ---         ┆ ---       ┆ ---       │
-        # │ i64 ┆ i64 ┆ struct[2]   ┆ struct[2] ┆ struct[2] │
-        # ╞═════╪═════╪═════════════╪═══════════╪═══════════╡
-        # │ 1   ┆ 1   ┆ {null,null} ┆ {6,3}     ┆ {1,1}     │
-        # │ 2   ┆ 2   ┆ {1.0,1.0}   ┆ {6,3}     ┆ {2,2}     │
-        # │ 6   ┆ 3   ┆ {2.0,0.5}   ┆ {6,3}     ┆ {6,3}     │
-        # └─────┴─────┴─────────────┴───────────┴───────────┘
-        assert ldf.collect().to_dicts() == [
-            {
-                "x1": 1,
-                "x2": 1,
-                "pct_change": {"x1": None, "x2": None},
-                "maxes": {"x1_max": 6, "x2_max": 3},
-                "xcols": {"x1": 1, "x2": 1},
-            },
-            {
-                "x1": 2,
-                "x2": 2,
-                "pct_change": {"x1": 1.0, "x2": 1.0},
-                "maxes": {"x1_max": 6, "x2_max": 3},
-                "xcols": {"x1": 2, "x2": 2},
-            },
-            {
-                "x1": 6,
-                "x2": 3,
-                "pct_change": {"x1": 2.0, "x2": 0.5},
-                "maxes": {"x1_max": 6, "x2_max": 3},
-                "xcols": {"x1": 6, "x2": 3},
-            },
-        ]
-
-    # require at least one of exprs / **named_exprs
-    with pytest.raises(ValueError):
-        _ = ldf.with_columns()
-
-
 def test_len_compute(df: pl.DataFrame) -> None:
     df = df.with_columns(pl.struct(["list_bool", "cat"]).alias("struct"))
     filtered = df.filter(pl.col("bools"))

diff --git a/py-polars/tests/unit/test_lazy.py b/py-polars/tests/unit/test_lazy.py
@@ -892,14 +892,6 @@ def test_with_column_renamed(fruits_cars: pl.DataFrame) -> None:
     assert res.columns[0] == "C"
 
 
-def test_with_columns_single_series() -> None:
-    ldf = pl.LazyFrame({"a": [1, 2]})
-    result = ldf.with_columns(pl.Series("b", [3, 4]))
-
-    expected = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
-    assert_frame_equal(result.collect(), expected)
-
-
 def test_reverse() -> None:
     out = pl.LazyFrame({"a": [1, 2], "b": [3, 4]}).reverse()
     expected = pl.DataFrame({"a": [2, 1], "b": [4, 3]})