From 5682ef94c859139a80f567d6424edb80a7f80104 Mon Sep 17 00:00:00 2001
From: Jian Xiao <99709935+jianoaix@users.noreply.github.com>
Date: Tue, 24 Jan 2023 11:25:03 -0800
Subject: [PATCH] Fixes for the new bulk execution backend (#31884)

---
 .../operators/actor_pool_submitter.py         |  2 -
 .../execution/operators/map_operator.py       |  4 +-
 python/ray/data/tests/test_dataset.py         |  5 ++-
 .../ray/data/tests/test_dataset_pipeline.py   | 16 --------
 .../tests/test_pipeline_incremental_take.py   | 25 +++++++++++++
 python/ray/data/tests/test_stats.py           | 37 ++++++++++---------
 6 files changed, 50 insertions(+), 39 deletions(-)
 create mode 100644 python/ray/data/tests/test_pipeline_incremental_take.py

diff --git a/python/ray/data/_internal/execution/operators/actor_pool_submitter.py b/python/ray/data/_internal/execution/operators/actor_pool_submitter.py
index a98a2cf078d6..23727e907031 100644
--- a/python/ray/data/_internal/execution/operators/actor_pool_submitter.py
+++ b/python/ray/data/_internal/execution/operators/actor_pool_submitter.py
@@ -28,8 +28,6 @@ def __init__(
             ray_remote_args: Remote arguments for the Ray actors to be created.
             pool_size: The size of the actor pool.
         """
-        if "num_cpus" not in ray_remote_args:
-            raise ValueError("Remote args should have explicit CPU spec.")
         self._transform_fn_ref = transform_fn_ref
         self._ray_remote_args = ray_remote_args
         self._pool_size = pool_size
diff --git a/python/ray/data/_internal/execution/operators/map_operator.py b/python/ray/data/_internal/execution/operators/map_operator.py
index 7791e9cf56fa..a0062bfa25c2 100644
--- a/python/ray/data/_internal/execution/operators/map_operator.py
+++ b/python/ray/data/_internal/execution/operators/map_operator.py
@@ -130,12 +130,12 @@ def _canonicalize_ray_remote_args(ray_remote_args: Dict[str, Any]) -> Dict[str,
     ray_remote_args = ray_remote_args.copy()
     if "num_cpus" not in ray_remote_args and "num_gpus" not in ray_remote_args:
         ray_remote_args["num_cpus"] = 1
-    if "num_gpus" in ray_remote_args:
+    if ray_remote_args.get("num_gpus", 0) > 0:
         if ray_remote_args.get("num_cpus", 0) != 0:
             raise ValueError(
                 "It is not allowed to specify both num_cpus and num_gpus for map tasks."
             )
-    elif "num_cpus" in ray_remote_args:
+    elif ray_remote_args.get("num_cpus", 0) > 0:
         if ray_remote_args.get("num_gpus", 0) != 0:
             raise ValueError(
                 "It is not allowed to specify both num_cpus and num_gpus for map tasks."
diff --git a/python/ray/data/tests/test_dataset.py b/python/ray/data/tests/test_dataset.py
index 132f4aeb957e..e593c5ca60ce 100644
--- a/python/ray/data/tests/test_dataset.py
+++ b/python/ray/data/tests/test_dataset.py
@@ -5455,7 +5455,10 @@ def f(x):
         .fully_executed()
     )
 
-    assert f"{max_size}/{max_size} blocks" in ds.stats()
+    # TODO(https://github.com/ray-project/ray/issues/31723): implement the feature
+    # of capping bundle size by actor pool size, and then re-enable this test.
+    if not DatasetContext.get_current().new_execution_backend:
+        assert f"{max_size}/{max_size} blocks" in ds.stats()
 
     # Check batch size is still respected.
     ds = (
diff --git a/python/ray/data/tests/test_dataset_pipeline.py b/python/ray/data/tests/test_dataset_pipeline.py
index b6c3fa0dfb7b..c3e8a398e4e0 100644
--- a/python/ray/data/tests/test_dataset_pipeline.py
+++ b/python/ray/data/tests/test_dataset_pipeline.py
@@ -814,22 +814,6 @@ def consume(pipe, owned_by_consumer):
     ray.get([consume.remote(splits[0], True), consume.remote(splits[1], True)])
 
 
-# Run at end of file to avoid segfault https://github.com/ray-project/ray/issues/31145
-def test_incremental_take(shutdown_only):
-    ray.shutdown()
-    ray.init(num_cpus=2)
-
-    # Can read incrementally even if future results are delayed.
-    def block_on_ones(x: int) -> int:
-        if x == 1:
-            time.sleep(999999)
-        return x
-
-    pipe = ray.data.range(2).window(blocks_per_window=1)
-    pipe = pipe.map(block_on_ones)
-    assert pipe.take(1) == [0]
-
-
 if __name__ == "__main__":
     import sys
 
diff --git a/python/ray/data/tests/test_pipeline_incremental_take.py b/python/ray/data/tests/test_pipeline_incremental_take.py
new file mode 100644
index 000000000000..4025bbeab462
--- /dev/null
+++ b/python/ray/data/tests/test_pipeline_incremental_take.py
@@ -0,0 +1,25 @@
+import time
+import pytest
+import ray
+
+from ray.tests.conftest import *  # noqa
+
+
+def test_incremental_take(shutdown_only):
+    ray.init(num_cpus=2)
+
+    # Can read incrementally even if future results are delayed.
+    def block_on_ones(x: int) -> int:
+        if x == 1:
+            time.sleep(999999)
+        return x
+
+    pipe = ray.data.range(2).window(blocks_per_window=1)
+    pipe = pipe.map(block_on_ones)
+    assert pipe.take(1) == [0]
+
+
+if __name__ == "__main__":
+    import sys
+
+    sys.exit(pytest.main(["-v", __file__]))
diff --git a/python/ray/data/tests/test_stats.py b/python/ray/data/tests/test_stats.py
index d33b62425904..c5c85c27ddda 100644
--- a/python/ray/data/tests/test_stats.py
+++ b/python/ray/data/tests/test_stats.py
@@ -54,7 +54,7 @@ def test_dataset_stats_basic(ray_start_regular_shared, enable_auto_log_stats):
 * Output num rows: N min, N max, N mean, N total
 * Output size bytes: N min, N max, N mean, N total
 * Tasks per node: N min, N max, N mean; N nodes used
-* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': Z, \
+* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': N, \
 'obj_store_mem_peak': N}
 """
                 )
@@ -85,7 +85,7 @@ def test_dataset_stats_basic(ray_start_regular_shared, enable_auto_log_stats):
 * Output num rows: N min, N max, N mean, N total
 * Output size bytes: N min, N max, N mean, N total
 * Tasks per node: N min, N max, N mean; N nodes used
-* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': Z, \
+* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': N, \
 'obj_store_mem_peak': N}
 """
                 )
@@ -115,7 +115,7 @@ def test_dataset_stats_basic(ray_start_regular_shared, enable_auto_log_stats):
 * Output num rows: N min, N max, N mean, N total
 * Output size bytes: N min, N max, N mean, N total
 * Tasks per node: N min, N max, N mean; N nodes used
-* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': Z, \
+* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': N, \
 'obj_store_mem_peak': N}
 
 Stage N map: N/N blocks executed in T
@@ -133,7 +133,7 @@ def test_dataset_stats_basic(ray_start_regular_shared, enable_auto_log_stats):
 * In format_batch(): T
 * In user code: T
 * Total time: T
-* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': Z, \
+* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': N, \
 'obj_store_mem_peak': N}
 """
         )
@@ -266,7 +266,7 @@ def test_dataset_stats_read_parquet(ray_start_regular_shared, tmp_path):
 * Output num rows: N min, N max, N mean, N total
 * Output size bytes: N min, N max, N mean, N total
 * Tasks per node: N min, N max, N mean; N nodes used
-* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': Z, \
+* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': N, \
 'obj_store_mem_peak': N}
 """
         )
@@ -302,7 +302,7 @@ def test_dataset_split_stats(ray_start_regular_shared, tmp_path):
 * Output num rows: N min, N max, N mean, N total
 * Output size bytes: N min, N max, N mean, N total
 * Tasks per node: N min, N max, N mean; N nodes used
-* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': Z, \
+* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': N, \
 'obj_store_mem_peak': N}
 
 Stage N split: N/N blocks executed in T
@@ -320,7 +320,7 @@ def test_dataset_split_stats(ray_start_regular_shared, tmp_path):
 * Output num rows: N min, N max, N mean, N total
 * Output size bytes: N min, N max, N mean, N total
 * Tasks per node: N min, N max, N mean; N nodes used
-* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': Z, \
+* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': N, \
 'obj_store_mem_peak': N}
 """
             )
@@ -384,7 +384,7 @@ def test_dataset_pipeline_stats_basic(ray_start_regular_shared, enable_auto_log_
 * Output num rows: N min, N max, N mean, N total
 * Output size bytes: N min, N max, N mean, N total
 * Tasks per node: N min, N max, N mean; N nodes used
-* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': Z, \
+* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': N, \
 'obj_store_mem_peak': N}
 """
                 )
@@ -416,7 +416,7 @@ def test_dataset_pipeline_stats_basic(ray_start_regular_shared, enable_auto_log_
 * Output num rows: N min, N max, N mean, N total
 * Output size bytes: N min, N max, N mean, N total
 * Tasks per node: N min, N max, N mean; N nodes used
-* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': Z, \
+* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': N, \
 'obj_store_mem_peak': N}
 """
                 )
@@ -451,7 +451,7 @@ def test_dataset_pipeline_stats_basic(ray_start_regular_shared, enable_auto_log_
 * Output num rows: N min, N max, N mean, N total
 * Output size bytes: N min, N max, N mean, N total
 * Tasks per node: N min, N max, N mean; N nodes used
-* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': Z, \
+* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': N, \
 'obj_store_mem_peak': N}
 """
                 )
@@ -480,7 +480,7 @@ def test_dataset_pipeline_stats_basic(ray_start_regular_shared, enable_auto_log_
 * Output num rows: N min, N max, N mean, N total
 * Output size bytes: N min, N max, N mean, N total
 * Tasks per node: N min, N max, N mean; N nodes used
-* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': Z, \
+* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': N, \
 'obj_store_mem_peak': N}
 
 Stage N map: N/N blocks executed in T
@@ -490,12 +490,12 @@ def test_dataset_pipeline_stats_basic(ray_start_regular_shared, enable_auto_log_
 * Output num rows: N min, N max, N mean, N total
 * Output size bytes: N min, N max, N mean, N total
 * Tasks per node: N min, N max, N mean; N nodes used
-* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': Z, \
+* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': N, \
 'obj_store_mem_peak': N}
 
 == Pipeline Window N ==
 Stage N read->map_batches: [execution cached]
-* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': Z, \
+* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': N, \
 'obj_store_mem_peak': N}
 
 Stage N map: N/N blocks executed in T
@@ -505,12 +505,12 @@ def test_dataset_pipeline_stats_basic(ray_start_regular_shared, enable_auto_log_
 * Output num rows: N min, N max, N mean, N total
 * Output size bytes: N min, N max, N mean, N total
 * Tasks per node: N min, N max, N mean; N nodes used
-* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': Z, \
+* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': N, \
 'obj_store_mem_peak': N}
 
 == Pipeline Window N ==
 Stage N read->map_batches: [execution cached]
-* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': Z, \
+* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': N, \
 'obj_store_mem_peak': N}
 
 Stage N map: N/N blocks executed in T
@@ -520,7 +520,7 @@ def test_dataset_pipeline_stats_basic(ray_start_regular_shared, enable_auto_log_
 * Output num rows: N min, N max, N mean, N total
 * Output size bytes: N min, N max, N mean, N total
 * Tasks per node: N min, N max, N mean; N nodes used
-* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': Z, \
+* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': N, \
 'obj_store_mem_peak': N}
 
 ##### Overall Pipeline Time Breakdown #####
@@ -629,6 +629,7 @@ def consume(split):
     s0, s1 = pipe.split(2)
     stats = ray.get([consume.remote(s0), consume.remote(s1)])
     if context.new_execution_backend:
+        print("XXX stats:", canonicalize(stats[0]))
         assert (
             canonicalize(stats[0])
             == """== Pipeline Window Z ==
@@ -639,7 +640,7 @@ def consume(split):
 * Output num rows: N min, N max, N mean, N total
 * Output size bytes: N min, N max, N mean, N total
 * Tasks per node: N min, N max, N mean; N nodes used
-* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': Z, \
+* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': N, \
 'obj_store_mem_peak': N}
 
 == Pipeline Window N ==
@@ -650,7 +651,7 @@ def consume(split):
 * Output num rows: N min, N max, N mean, N total
 * Output size bytes: N min, N max, N mean, N total
 * Tasks per node: N min, N max, N mean; N nodes used
-* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': Z, \
+* Extra metrics: {'obj_store_mem_alloc': N, 'obj_store_mem_freed': N, \
 'obj_store_mem_peak': N}
 
 ##### Overall Pipeline Time Breakdown #####