coiled · ncclementi · Dec 23, 2022 · Nov 30, 2022 · Nov 30, 2022 · Dec 7, 2022
diff --git a/alembic/versions/c38b9d85915e_default_parameter_for_shuffling_tests.py b/alembic/versions/c38b9d85915e_default_parameter_for_shuffling_tests.py
@@ -0,0 +1,59 @@
+"""Default parameter for shuffling tests
+
+Revision ID: c38b9d85915e
+Revises: fa79471ffa8c
+Create Date: 2022-12-23 09:05:57.440944
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = 'c38b9d85915e'
+down_revision = 'fa79471ffa8c'
+branch_labels = None
+depends_on = None
+
+
+def h2o_update_query(test: str, ddf: str) -> str:
+        return f"""
+        update test_run
+            set name = '{test}[{ddf}-tasks]'
+            where name == '{test}[{ddf}]';
+        """
+
+def rename_h2o_tests() -> None:
+    for i in range(1, 10):
+        test = f"test_q{i}"
+        for ddf_param in ("0.5 GB (csv)", "0.5 GB (parquet)", "5 GB (parquet)"):
+            op.execute(f"""
+                update test_run
+                    set name = '{test}[{ddf_param}-tasks]'
+                    where name == '{test}[{ddf_param}]';
+                """)           
+
+def rename_join_tests() -> None:
+    for test in ("test_join_big", "test_join_big_small"):
+        op.execute(f"""
+            update test_run
+                set name = '{test}[0.1-tasks]'
+                where name == '{test}[0.1]';
+            """)
+
+def rename_shuffle_tests() -> None:
+    for test in ("test_shuffle_parquet", "test_shuffle_simple"):
+        op.execute(f"""
+            update test_run
+                set name = '{test}[tasks]'
+                where name == '{test}';
+            """)
+
+def upgrade() -> None:
+    rename_h2o_tests()
+    rename_join_tests()
+    rename_shuffle_tests()
+
+
+def downgrade() -> None:
+    pass
diff --git a/tests/benchmarks/h2o/test_h2o_benchmarks.py b/tests/benchmarks/h2o/test_h2o_benchmarks.py
@@ -55,13 +55,13 @@ def ddf(request):
 
 
 @run_up_to_nthreads("small_cluster", 100, reason="fixed size data")
-def test_q1(ddf, small_client):
+def test_q1(ddf, small_client, configure_shuffling):
     ddf = ddf[["id1", "v1"]]
     ddf.groupby("id1", dropna=False, observed=True).agg({"v1": "sum"}).compute()
 
 
 @run_up_to_nthreads("small_cluster", 100, reason="fixed size data")
-def test_q2(ddf, small_client):
+def test_q2(ddf, small_client, configure_shuffling):
     ddf = ddf[["id1", "id2", "v1"]]
     (
         ddf.groupby(["id1", "id2"], dropna=False, observed=True)
@@ -71,7 +71,7 @@ def test_q2(ddf, small_client):
 
 
 @run_up_to_nthreads("small_cluster", 100, reason="fixed size data")
-def test_q3(ddf, small_client):
+def test_q3(ddf, small_client, configure_shuffling):
     ddf = ddf[["id3", "v1", "v3"]]
     (
         ddf.groupby("id3", dropna=False, observed=True)
@@ -81,7 +81,7 @@ def test_q3(ddf, small_client):
 
 
 @run_up_to_nthreads("small_cluster", 100, reason="fixed size data")
-def test_q4(ddf, small_client):
+def test_q4(ddf, small_client, configure_shuffling):
     ddf = ddf[["id4", "v1", "v2", "v3"]]
     (
         ddf.groupby("id4", dropna=False, observed=True)
@@ -91,11 +91,13 @@ def test_q4(ddf, small_client):
 
 
 @run_up_to_nthreads("small_cluster", 100, reason="fixed size data")
-def test_q5(ddf, small_client):
+def test_q5(ddf, small_client, configure_shuffling):
     ddf = ddf[["id6", "v1", "v2", "v3"]]
     (
         ddf.groupby("id6", dropna=False, observed=True)
-        .agg({"v1": "sum", "v2": "sum", "v3": "sum"})
+        .agg(
+            {"v1": "sum", "v2": "sum", "v3": "sum"},
+        )
         .compute()
     )
 
@@ -105,17 +107,17 @@ def test_q5(ddf, small_client):
     Version(dask.__version__) < Version("2022.10.0"),
     reason="No support for median in dask < 2022.10.0",
 )
-def test_q6(ddf, small_client):
+def test_q6(ddf, small_client, shuffle):
     ddf = ddf[["id4", "id5", "v3"]]
     (
         ddf.groupby(["id4", "id5"], dropna=False, observed=True)
-        .agg({"v3": ["median", "std"]}, shuffle="tasks")
-        .compute()  # requires shuffle="tasks"
+        .agg({"v3": ["median", "std"]}, shuffle=shuffle)
+        .compute()  # requires shuffle arg to be set explicitly
     )
 
 
 @run_up_to_nthreads("small_cluster", 100, reason="fixed size data")
-def test_q7(ddf, small_client):
+def test_q7(ddf, small_client, configure_shuffling):
     ddf = ddf[["id3", "v1", "v2"]]
     (
         ddf.groupby("id3", dropna=False, observed=True)
@@ -126,7 +128,7 @@ def test_q7(ddf, small_client):
 
 
 @run_up_to_nthreads("small_cluster", 100, reason="fixed size data")
-def test_q8(ddf, small_client):
+def test_q8(ddf, small_client, configure_shuffling):
     ddf = ddf[["id6", "v1", "v2", "v3"]]
     (
         ddf[~ddf["v3"].isna()][["id6", "v3"]]
@@ -140,7 +142,7 @@ def test_q8(ddf, small_client):
 
 
 @run_up_to_nthreads("small_cluster", 100, reason="fixed size data")
-def test_q9(ddf, small_client):
+def test_q9(ddf, small_client, configure_shuffling):
     ddf = ddf[["id2", "id4", "v1", "v2"]]
     (
         ddf[["id2", "id4", "v1", "v2"]]

diff --git a/tests/benchmarks/test_join.py b/tests/benchmarks/test_join.py
@@ -1,52 +1,51 @@
+import dask
 import dask.dataframe as dd
 import pytest
 
 from ..utils_test import cluster_memory, run_up_to_nthreads, timeseries_of_size
 
-mem_mult = [
-    0.1,
-    pytest.param(
-        1,
-        marks=pytest.mark.skip(reason="Does not finish"),
-    ),
-    pytest.param(
-        10,
-        marks=pytest.mark.skip(reason="Does not finish"),
-    ),
+params = [
+    (0.1, "tasks"),
+    # shuffling takes a long time with 1 or higher
+    (0.1, "p2p"),
+    # (1, "p2p"),
+    # (10, "p2p"),
 ]
 
 
 @run_up_to_nthreads("small_cluster", 40, reason="Does not finish")
-@pytest.mark.parametrize("mem_mult", mem_mult)  # [0.1, 1, 10]
-def test_join_big(small_client, mem_mult):
-    memory = cluster_memory(small_client)  # 76.66 GiB
+@pytest.mark.parametrize("mem_mult, shuffle", params)
+def test_join_big(small_client, mem_mult, shuffle):
+    with dask.config.set(shuffle=shuffle):
+        memory = cluster_memory(small_client)  # 76.66 GiB
 
-    df1_big = timeseries_of_size(memory * mem_mult)
-    df1_big["x2"] = df1_big["x"] * 1e9
-    df1_big = df1_big.astype({"x2": "int"})
+        df1_big = timeseries_of_size(memory * mem_mult)
+        df1_big["x2"] = df1_big["x"] * 1e9
+        df1_big = df1_big.astype({"x2": "int"})
 
-    df2_big = timeseries_of_size(memory * mem_mult)
+        df2_big = timeseries_of_size(memory * mem_mult)
 
-    # Control cardinality on column to join - this produces cardinality ~ to len(df)
-    df2_big["x2"] = df2_big["x"] * 1e9
-    df2_big = df2_big.astype({"x2": "int"})
+        # Control cardinality on column to join - this produces cardinality ~ to len(df)
+        df2_big["x2"] = df2_big["x"] * 1e9
+        df2_big = df2_big.astype({"x2": "int"})
 
-    dd.merge(df1_big, df2_big, on="x2", how="inner").compute()
+        dd.merge(df1_big, df2_big, on="x2", how="inner").compute()
 
 
-@pytest.mark.parametrize("mem_mult", mem_mult)  # [0.1, 1, 10]
-def test_join_big_small(small_client, mem_mult):
-    memory = cluster_memory(small_client)  # 76.66 GiB
+@pytest.mark.parametrize("mem_mult, shuffle", params)
+def test_join_big_small(small_client, mem_mult, shuffle):
+    with dask.config.set(shuffle=shuffle):
+        memory = cluster_memory(small_client)  # 76.66 GiB
 
-    df_big = timeseries_of_size(memory * mem_mult)
+        df_big = timeseries_of_size(memory * mem_mult)
 
-    # Control cardinality on column to join - this produces cardinality ~ to len(df)
-    df_big["x2"] = df_big["x"] * 1e9
-    df_big = df_big.astype({"x2": "int"})
+        # Control cardinality on column to join - this produces cardinality ~ to len(df)
+        df_big["x2"] = df_big["x"] * 1e9
+        df_big = df_big.astype({"x2": "int"})
 
-    df_small = timeseries_of_size("50 MB")  # make it obviously small
+        df_small = timeseries_of_size("50 MB")  # make it obviously small
 
-    df_small["x2"] = df_small["x"] * 1e9
-    df_small_pd = df_small.astype({"x2": "int"}).compute()
+        df_small["x2"] = df_small["x"] * 1e9
+        df_small_pd = df_small.astype({"x2": "int"}).compute()
 
-    dd.merge(df_big, df_small_pd, on="x2", how="inner").compute()
+        dd.merge(df_big, df_small_pd, on="x2", how="inner").compute()
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -23,6 +23,7 @@
 from distributed import Client
 from distributed.diagnostics.memory_sampler import MemorySampler
 from distributed.scheduler import logger as scheduler_logger
+from packaging.version import Version
 from sqlalchemy.orm import Session
 
 from benchmark_schema import TestRun
@@ -605,3 +606,28 @@ def _upload_cluster_dump(client):
                 client.dump_cluster_state(dump_path, **s3_storage_options)
 
     yield _upload_cluster_dump
+
+
+# Include https://github.com/dask/distributed/pull/7410 for categorical support
+P2P_AVAILABLE = Version(dask.__version__) > Version("2022.12.1")
+
+
+@pytest.fixture(
+    params=[
+        "tasks",
+        pytest.param(
+            "p2p",
+            marks=pytest.mark.skipif(
+                not P2P_AVAILABLE, reason="p2p shuffle not available"
+            ),
+        ),
+    ]
+)
+def shuffle(request):
+    return request.param
+
+
+@pytest.fixture
+def configure_shuffling(shuffle):
+    with dask.config.set(shuffle=shuffle):
+        yield
diff --git a/tests/stability/test_shuffle.py b/tests/stability/test_shuffle.py
@@ -4,7 +4,7 @@
 
 
 @pytest.mark.stability
-def test_shuffle_simple(small_client):
+def test_shuffle_simple(small_client, configure_shuffling):
     df = dask.datasets.timeseries(
         start="2000-01-01", end="2000-12-31", freq="1s", partition_freq="1D"
     )
@@ -16,7 +16,7 @@ def test_shuffle_simple(small_client):
 
 
 @pytest.mark.stability
-def test_shuffle_parquet(small_client, s3_url, s3_storage_options):
+def test_shuffle_parquet(small_client, s3_url, s3_storage_options, configure_shuffling):
     # Write synthetic dataset to S3
     # Notes on how `freq` impacts total dataset size:
     #   - 100ms ~12GB