-
Notifications
You must be signed in to change notification settings - Fork 101
/
Copy pathsystem.py
185 lines (132 loc) · 5.6 KB
/
system.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
"""Utilities for working with the physical system (e.g., GPUs).
This module mostly provides convenience functions for changing the state of the runtime
environment by wrapping `tf.config` module functions.
"""
import tensorflow as tf
from typing import List, Optional, Text
def get_all_gpus() -> List[tf.config.PhysicalDevice]:
"""Return a list of GPUs including unavailable devices."""
return tf.config.list_physical_devices("GPU")
def is_gpu_system() -> bool:
"""Return True if the system has discoverable GPUs."""
return len(get_all_gpus()) > 0
def get_available_gpus() -> List[tf.config.PhysicalDevice]:
"""Return a list of available GPUs."""
return tf.config.get_visible_devices("GPU")
def get_current_gpu() -> tf.config.PhysicalDevice:
"""Return the current (single) GPU device.
Returns:
The tf.config.PhysicalDevice for the available GPU.
If no GPUs are available, returns None.
Raises:
ValueError: If multiple GPUs are available.
"""
available_gpus = get_available_gpus()
if len(available_gpus) == 0:
return None
elif len(available_gpus) == 1:
return available_gpus[0]
else:
raise ValueError("Multiple GPUs are available.")
def use_cpu_only():
"""Hide GPUs from TensorFlow to ensure only the CPU is available."""
tf.config.set_visible_devices([], "GPU")
def use_gpu(device_ind: int):
"""Make a single GPU available to TensorFlow.
Args:
device_ind: Index of the GPU within the list of system GPUs.
"""
gpus = get_all_gpus()
tf.config.set_visible_devices(gpus[device_ind], "GPU")
def use_first_gpu():
"""Make only the first GPU available to TensorFlow."""
use_gpu(0)
def use_last_gpu():
"""Make only the last GPU available to TensorFlow."""
use_gpu(-1)
def is_initialized(gpu: Optional[tf.config.PhysicalDevice] = None) -> bool:
"""Check if a physical GPU has been initialized without triggering initialization.
Arguments:
gpu: The GPU to check for initialization. If None, defaults to the current GPU.
Returns:
True if the GPU is initialized.
Notes:
Once initialized, the GPU cannot be hidden or change its memory policy.
Initialization happens when a `tf.config.LogicalDevice` is created on the
physical device, typically when the first tensor op runs.
Checking if the GPU is initializing by querying the logical devices will trigger
initialization, so this method provides an easy way of checking without
modifying the system state.
"""
if gpu is None:
gpu = get_current_gpu()
is_initialized = False
try:
# Get the current memory policy.
current_policy = tf.config.experimental.get_memory_growth(gpu)
# Try flipping it.
tf.config.experimental.set_memory_growth(gpu, not current_policy)
# Set it back if we were successful.
tf.config.experimental.set_memory_growth(gpu, current_policy)
except RuntimeError as ex:
is_initialized = (
len(ex.args) > 0
and ex.args[0]
== "Physical devices cannot be modified after being initialized"
)
return is_initialized
def disable_preallocation():
"""Disable preallocation of full GPU memory on all available GPUs.
This enables memory growth policy so that TensorFlow will not pre-allocate all
available GPU memory.
Preallocation can be more efficient, but can lead to CUDA startup errors when the
memory is not available (e.g., shared, multi-session and some *nix systems).
See also: enable_gpu_preallocation
"""
for gpu in get_available_gpus():
tf.config.experimental.set_memory_growth(gpu, True)
def enable_preallocation():
"""Enable preallocation of full GPU memory on all available GPUs.
This disables memory growth policy so that TensorFlow will pre-allocate all
available GPU memory.
Preallocation can be more efficient, but can lead to CUDA startup errors when the
memory is not available (e.g., shared, multi-session and some *nix systems).
See also: disable_gpu_preallocation
"""
for gpu in get_available_gpus():
tf.config.experimental.set_memory_growth(gpu, False)
def initialize_devices():
"""Initialize available physical devices as logical devices.
If preallocation was enabled on the GPUs, this will trigger memory allocation.
"""
tf.config.list_logical_devices()
def summary():
"""Print a summary of the state of the system."""
gpus = get_available_gpus()
all_gpus = get_all_gpus()
if len(all_gpus) > 0:
print(f"GPUs: {len(gpus)}/{len(all_gpus)} available")
for gpu in all_gpus:
print(f" Device: {gpu.name}")
print(f" Available: {gpu in gpus}")
print(f" Initalized: {is_initialized(gpu)}")
print(
f" Memory growth: {tf.config.experimental.get_memory_growth(gpu)}"
)
else:
print("GPUs: None detected.")
def best_logical_device_name() -> Text:
"""Return the name of the best logical device for performance.
This is particularly useful to use with `tf.device()` for explicit tensor placement.
Returns:
The name of the first logical GPU device if available, or alternatively the CPU.
Notes:
This will initialize the logical devices if they were not already!
"""
gpus = tf.config.list_logical_devices("GPU")
if len(gpus) > 0:
device_name = gpus[0].name
else:
cpus = tf.config.list_logical_devices("CPU")
device_name = cpus[0].name
return device_name