Move GPU allocator config to public XLA API

PiperOrigin-RevId: 693715069
openxla · Nov 8, 2024 · dc37ba2 · dc37ba2
1 parent 4339802
commit dc37ba2
Show file tree

Hide file tree

Showing 5 changed files with 76 additions and 39 deletions.
diff --git a/xla/pjrt/gpu/BUILD b/xla/pjrt/gpu/BUILD
@@ -27,6 +27,7 @@ cc_library(
         "//xla:util",
         "//xla/client:client_library",
         "//xla/client:local_client",
+        "//xla/pjrt/plugin/xla_gpu:xla_gpu_allocator_config",
         "//xla/service:platform_util",
         "//xla/stream_executor:platform",
         "//xla/stream_executor:stream_executor_h",

diff --git a/xla/pjrt/gpu/gpu_helpers.h b/xla/pjrt/gpu/gpu_helpers.h
@@ -25,6 +25,7 @@ limitations under the License.
 #include "absl/status/statusor.h"
 #include "absl/types/span.h"
 #include "xla/client/local_client.h"
+#include "xla/pjrt/plugin/xla_gpu/xla_gpu_allocator_config.h"
 #include "xla/stream_executor/stream_executor.h"
 #include "xla/tsl/framework/bfc_allocator.h"
 #include "xla/types.h"
@@ -39,43 +40,6 @@ absl::StatusOr<LocalClient*> GetGpuXlaClient(
 // Enables peer access between all pairs of GPUs where possible.
 void EnablePeerAccess(absl::Span<se::StreamExecutor* const> executors);
 
-struct GpuAllocatorConfig {
-  enum class Kind {
-    kDefault,   // Client picks the best option for the platform.
-    kPlatform,  // The platform's default.
-    kBFC,  // Allocator using a "Best-Fit with Coalescing" algorithm. Currently
-           // only available for GPU.
-    kCudaAsync,  // Use the CUDA async allocator.
-  };
-  Kind kind = Kind::kDefault;
-
-  // Only used if kind == kBFC. The maximum fraction of available memory to
-  // allocate. This is the default value of XLA_CLIENT_MEM_FRACTION.
-  //
-  // If `gpu_system_memory_size` is set, it determines memory allocation.
-  // `memory_fraction` won't be used in this case.
-  double memory_fraction = 0.75;
-
-  // Only used if kind == kBFC. The absolute size of reserved memory space for
-  // GPU system in bytes.
-  //
-  // If null, the default value `memory_fraction` will be used.
-  std::optional<int64_t> gpu_system_memory_size = std::nullopt;
-
-  // Only used if kind == kBFC. If true, the allocator will immediately allocate
-  // the maximum amount allowed by `memory_fraction`. This reduces
-  // fragmentation, allowing more of the total memory to be used. If false, the
-  // allocator will allocate more memory as allocations are requested.
-  bool preallocate = true;
-
-  // Amount of collective memory (ncclMemAlloc) to preallocate. If this value is
-  // 0, collective memory space will be grown as needed to fit the application's
-  // usage, with the drawback of potentially higher fragmentation. If set,
-  // should be set to a multiple of 512MB to avoid wasting memory due to
-  // granularity requirements.
-  size_t collective_memory_size = 0;
-};
-
 std::unique_ptr<tsl::BFCAllocator> GetGpuHostAllocator(
     se::StreamExecutor* executor);
 

diff --git a/xla/pjrt/plugin/xla_gpu/BUILD b/xla/pjrt/plugin/xla_gpu/BUILD
@@ -26,8 +26,16 @@ cc_library(
     srcs = [],
     hdrs = ["xla_gpu_client_options.h"],
     deps = [
+        ":xla_gpu_allocator_config",
         "//xla/pjrt/distributed:key_value_store_interface",
-        "//xla/pjrt/gpu:gpu_helpers",
+    ],
+)
+
+cc_library(
+    name = "xla_gpu_allocator_config",
+    srcs = [],
+    hdrs = ["xla_gpu_allocator_config.h"],
+    deps = [
     ],
 )
 

diff --git a/xla/pjrt/plugin/xla_gpu/xla_gpu_allocator_config.h b/xla/pjrt/plugin/xla_gpu/xla_gpu_allocator_config.h
@@ -0,0 +1,64 @@
+/* Copyright 2024 The OpenXLA Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef XLA_PJRT_PLUGIN_XLA_GPU_XLA_GPU_ALLOCATOR_CONFIG_H_
+#define XLA_PJRT_PLUGIN_XLA_GPU_XLA_GPU_ALLOCATOR_CONFIG_H_
+
+#include <cstddef>
+#include <cstdint>
+#include <optional>
+
+namespace xla {
+
+struct GpuAllocatorConfig {
+  enum class Kind {
+    kDefault,   // Client picks the best option for the platform.
+    kPlatform,  // The platform's default.
+    kBFC,  // Allocator using a "Best-Fit with Coalescing" algorithm. Currently
+           // only available for GPU.
+    kCudaAsync,  // Use the CUDA async allocator.
+  };
+  Kind kind = Kind::kDefault;
+
+  // Only used if kind == kBFC. The maximum fraction of available memory to
+  // allocate. This is the default value of XLA_CLIENT_MEM_FRACTION.
+  //
+  // If `gpu_system_memory_size` is set, it determines memory allocation.
+  // `memory_fraction` won't be used in this case.
+  double memory_fraction = 0.75;
+
+  // Only used if kind == kBFC. The absolute size of reserved memory space for
+  // GPU system in bytes.
+  //
+  // If null, the default value `memory_fraction` will be used.
+  std::optional<int64_t> gpu_system_memory_size = std::nullopt;
+
+  // Only used if kind == kBFC. If true, the allocator will immediately allocate
+  // the maximum amount allowed by `memory_fraction`. This reduces
+  // fragmentation, allowing more of the total memory to be used. If false, the
+  // allocator will allocate more memory as allocations are requested.
+  bool preallocate = true;
+
+  // Amount of collective memory (ncclMemAlloc) to preallocate. If this value is
+  // 0, collective memory space will be grown as needed to fit the application's
+  // usage, with the drawback of potentially higher fragmentation. If set,
+  // should be set to a multiple of 512MB to avoid wasting memory due to
+  // granularity requirements.
+  size_t collective_memory_size = 0;
+};
+
+}  // namespace xla
+
+#endif  // XLA_PJRT_PLUGIN_XLA_GPU_XLA_GPU_ALLOCATOR_CONFIG_H_
diff --git a/xla/pjrt/plugin/xla_gpu/xla_gpu_client_options.h b/xla/pjrt/plugin/xla_gpu/xla_gpu_client_options.h
@@ -22,7 +22,7 @@ limitations under the License.
 #include <string>
 
 #include "xla/pjrt/distributed/key_value_store_interface.h"
-#include "xla/pjrt/gpu/gpu_helpers.h"
+#include "xla/pjrt/plugin/xla_gpu/xla_gpu_allocator_config.h"
 
 namespace xla {