Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add full process info. #65

Merged
merged 5 commits into from
Aug 3, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Options:
* `--no-color` : Suppress colored output
* `-u`, `--show-user` : Display username of the process owner
* `-c`, `--show-cmd` : Display the process name
* `-f`, `--show-full-cmd` : Display full command and cpu stats of running process
* `-p`, `--show-pid` : Display PID of the process
* `-F`, `--show-fan` : Display GPU fan speed
* `-P`, `--show-power` : Display GPU power usage and/or limit (`draw` or `draw,limit`)
Expand Down
4 changes: 4 additions & 0 deletions gpustat/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ def main(*argv):

parser.add_argument('-c', '--show-cmd', action='store_true',
help='Display cmd name of running process')
parser.add_argument(
'-f', '--show-full-cmd', action='store_true',
help='Display full command and cpu stats of running process'
)
parser.add_argument('-u', '--show-user', action='store_true',
help='Display username of running process')
parser.add_argument('-p', '--show-pid', action='store_true',
Expand Down
66 changes: 59 additions & 7 deletions gpustat/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import os.path
import platform
import sys
import time
from datetime import datetime

from six.moves import cStringIO as StringIO
Expand Down Expand Up @@ -155,6 +156,7 @@ def processes(self):
def print_to(self, fp,
with_colors=True, # deprecated arg
show_cmd=False,
show_full_cmd=False,
show_user=False,
show_pid=False,
show_power=None,
Expand Down Expand Up @@ -182,9 +184,11 @@ def _conditional(cond_fn, true_value, false_value,
colors['CMemU'] = term.bold_yellow
colors['CMemT'] = term.yellow
colors['CMemP'] = term.yellow
colors['CCPUMemU'] = term.yellow
colors['CUser'] = term.bold_black # gray
colors['CUtil'] = _conditional(lambda: self.utilization < 30,
term.green, term.bold_green)
colors['CCPUUtil'] = term.green
colors['CPowU'] = _conditional(
lambda: float(self.power_draw) / self.power_limit < 0.4,
term.magenta, term.bold_magenta
Expand Down Expand Up @@ -245,14 +249,40 @@ def process_repr(p):
)
return r

def bytes2human(in_bytes):
suffixes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
suffix = 0
result = in_bytes
while result > 9999 and suffix < len(suffixes):
result = result >> 10
suffix += 1
return "%d%s" % (result, suffixes[suffix])

def full_process_info(p):
r = "{C0} ├─ {:>6} ".format(
_repr(p['pid'], '--'), **colors
)
r += "{C0}({CCPUUtil}{:4.0f}%{C0}, {CCPUMemU}{:>6}{C0})".format(
_repr(p['cpu_percent'], '--'),
bytes2human(_repr(p['cpu_memory_usage'], 0)), **colors
)
r += "{C0}: {C1}{}{C0}".format(
_repr(p['full_command'], '?'), **colors
)
return r

processes = self.entry['processes']
full_processes = ''
if processes is None:
# None (not available)
reps += ' ({})'.format(NOT_SUPPORTED)
else:
for p in processes:
reps += ' ' + process_repr(p)

if show_full_cmd:
full_processes += '\n' + full_process_info(p)
if show_full_cmd:
reps += full_processes[::-1].replace('├', '└', 1)[::-1]
fp.write(reps)
return fp

Expand All @@ -268,6 +298,8 @@ def jsonify(self):

class GPUStatCollection(object):

global_processes = {}

def __init__(self, gpu_list, driver_version=None):
self.gpus = gpu_list

Expand All @@ -276,6 +308,12 @@ def __init__(self, gpu_list, driver_version=None):
self.query_time = datetime.now()
self.driver_version = driver_version

@staticmethod
def clean_processes():
for pid in list(GPUStatCollection.global_processes.keys()):
if not psutil.pid_exists(pid):
del GPUStatCollection.global_processes[pid]

@staticmethod
def new_query():
"""Query the information of all the GPUs on local machine"""
Expand All @@ -293,18 +331,27 @@ def get_gpu_info(handle):
def get_process_info(nv_process):
"""Get the process information of specific pid"""
process = {}
ps_process = psutil.Process(pid=nv_process.pid)
if nv_process.pid not in GPUStatCollection.global_processes:
GPUStatCollection.global_processes[nv_process.pid] = \
psutil.Process(pid=nv_process.pid)
ps_process = GPUStatCollection.global_processes[nv_process.pid]
process['username'] = ps_process.username()
# cmdline returns full path;
# as in `ps -o comm`, get short cmdnames.
_cmdline = ps_process.cmdline()
if not _cmdline:
# sometimes, zombie or unknown (e.g. [kworker/8:2H])
process['command'] = '?'
process['full_command'] = '?'
else:
process['command'] = os.path.basename(_cmdline[0])
process['full_command'] = " ".join(_cmdline)
# Bytes to MBytes
process['gpu_memory_usage'] = nv_process.usedGpuMemory // MB
process['cpu_percent'] = ps_process.cpu_percent()
process['cpu_memory_usage'] = \
round((ps_process.memory_percent() / 100.0) *
psutil.virtual_memory().total)
process['pid'] = nv_process.pid
return process

Expand Down Expand Up @@ -361,15 +408,18 @@ def get_process_info(nv_process):
nv_comp_processes = nv_comp_processes or []
nv_graphics_processes = nv_graphics_processes or []
for nv_process in nv_comp_processes + nv_graphics_processes:
# TODO: could be more information such as system memory
# usage, CPU percentage, create time etc.
try:
process = get_process_info(nv_process)
processes.append(process)
except psutil.NoSuchProcess:
# TODO: add some reminder for NVML broken context
# e.g. nvidia-smi reset or reboot the system
pass
time.sleep(0.1)
Stonesjtu marked this conversation as resolved.
Show resolved Hide resolved
for process in processes:
pid = process['pid']
cache_process = GPUStatCollection.global_processes[pid]
process['cpu_percent'] = cache_process.cpu_percent()

index = N.nvmlDeviceGetIndex(handle)
gpu_info = {
Expand All @@ -387,6 +437,7 @@ def get_process_info(nv_process):
'memory.total': memory.total // MB if memory else None,
'processes': processes,
}
GPUStatCollection.clean_processes()
return gpu_info

# 1. get the list of gpu and status
Expand Down Expand Up @@ -426,9 +477,9 @@ def __repr__(self):
# --- Printing Functions ---

def print_formatted(self, fp=sys.stdout, force_color=False, no_color=False,
show_cmd=False, show_user=False, show_pid=False,
show_power=None, show_fan_speed=None, gpuname_width=16,
show_header=True,
show_cmd=False, show_full_cmd=False, show_user=False,
show_pid=False, show_power=None, show_fan_speed=None,
gpuname_width=16, show_header=True,
eol_char=os.linesep,
):
# ANSI color configuration
Expand Down Expand Up @@ -473,6 +524,7 @@ def print_formatted(self, fp=sys.stdout, force_color=False, no_color=False,
for g in self:
g.print_to(fp,
show_cmd=show_cmd,
show_full_cmd=show_full_cmd,
show_user=show_user,
show_pid=show_pid,
show_power=show_power,
Expand Down
63 changes: 40 additions & 23 deletions gpustat/test_gpustat.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-

"""
Unit or integration tests for gpustat
"""
Expand All @@ -26,7 +24,7 @@
MagicMock = mock.MagicMock


def _configure_mock(N, Process,
def _configure_mock(N, Process, virtual_memory,
scenario_nonexistent_pid=False):
"""
Define mock behaviour for N: the pynvml module, and psutil.Process,
Expand Down Expand Up @@ -136,24 +134,30 @@ def _decorated(*args, **kwargs):
}.get(handle, RuntimeError))

mock_pid_map = { # mock information for psutil...
48448: ('user1', 'python'),
154213: ('user1', 'caffe'),
38310: ('user3', 'python'),
153223: ('user2', 'python'),
194826: ('user3', 'caffe'),
192453: ('user1', 'torch'),
48448: ('user1', 'python', 85.25, 3.1415),
154213: ('user1', 'caffe', 16.89, 100.00),
38310: ('user3', 'python', 26.23, 99.9653),
153223: ('user2', 'python', 15.25, 0.0000),
194826: ('user3', 'caffe', 0.0, 12.5236),
192453: ('user1', 'torch', 123.2, 0.7312),
}

def _MockedProcess(pid):
if pid not in mock_pid_map:
raise psutil.NoSuchProcess(pid=pid)
username, cmdline = mock_pid_map[pid]
username, cmdline, cpuutil, memutil = mock_pid_map[pid]
p = MagicMock() # mocked process
p.username.return_value = username
p.cmdline.return_value = [cmdline]
p.cpu_percent.return_value = cpuutil
p.memory_percent.return_value = memutil
return p
Process.side_effect = _MockedProcess

def _MockedMem():
return mock_memory_t(total=8589934592, used=0)
virtual_memory.side_effect = _MockedMem


MOCK_EXPECTED_OUTPUT_DEFAULT = """\
[0] GeForce GTX TITAN 0 | 80°C, 76 % | 8000 / 12287 MB | user1(4000M) user2(4000M)
Expand All @@ -163,7 +167,11 @@ def _MockedProcess(pid):

MOCK_EXPECTED_OUTPUT_FULL = """\
[0] GeForce GTX TITAN 0 | 80°C, 16 %, 76 %, 125 / 250 W | 8000 / 12287 MB | user1:python/48448(4000M) user2:python/153223(4000M)
├─ 48448 ( 85%, 257MB): python
└─ 153223 ( 15%, 0B): python
[1] GeForce GTX TITAN 1 | 36°C, 53 %, 0 %, ?? / 250 W | 9000 / 12189 MB | user1:torch/192453(3000M) user3:caffe/194826(6000M)
├─ 192453 ( 123%, 59MB): torch
└─ 194826 ( 0%, 1025MB): caffe
[2] GeForce GTX TITAN 2 | 71°C, 100 %, ?? %, 250 / ?? W | 0 / 12189 MB | (Not Supported)
""" # noqa: E501

Expand All @@ -180,30 +188,33 @@ def remove_ansi_codes(s):

class TestGPUStat(unittest.TestCase):

@mock.patch('psutil.virtual_memory')
@mock.patch('psutil.Process')
@mock.patch('gpustat.core.N')
def test_main(self, N, Process):
def test_main(self, N, Process, virtual_memory):
"""
Test whether gpustat.main() works well. The behavior is mocked
exactly as in test_new_query_mocked().
"""
_configure_mock(N, Process)
_configure_mock(N, Process, virtual_memory)
sys.argv = ['gpustat']
gpustat.main()

@mock.patch('psutil.virtual_memory')
@mock.patch('psutil.Process')
@mock.patch('gpustat.core.N')
def test_new_query_mocked(self, N, Process):
def test_new_query_mocked(self, N, Process, virtual_memory):
"""
A basic functionality test, in a case where everything is just normal.
"""
_configure_mock(N, Process)
_configure_mock(N, Process, virtual_memory)

gpustats = gpustat.new_query()
fp = StringIO()
gpustats.print_formatted(
fp=fp, no_color=False, show_user=True,
show_cmd=True, show_pid=True, show_power=True, show_fan_speed=True
show_cmd=True, show_pid=True, show_power=True, show_fan_speed=True,
show_full_cmd=True
)

result = fp.getvalue()
Expand All @@ -216,24 +227,28 @@ def test_new_query_mocked(self, N, Process):
self.maxDiff = 4096
self.assertEqual(unescaped, MOCK_EXPECTED_OUTPUT_FULL)

@mock.patch('psutil.virtual_memory')
@mock.patch('psutil.Process')
@mock.patch('gpustat.core.N')
def test_new_query_mocked_nonexistent_pid(self, N, Process):
def test_new_query_mocked_nonexistent_pid(self, N, Process,
virtual_memory):
"""
Test a case where nvidia query returns non-existent pids (see #16, #18)
"""
_configure_mock(N, Process, scenario_nonexistent_pid=True)
_configure_mock(N, Process, virtual_memory,
scenario_nonexistent_pid=True)

gpustats = gpustat.new_query()
gpustats.print_formatted(fp=sys.stdout)

@mock.patch('psutil.virtual_memory')
@mock.patch('psutil.Process')
@mock.patch('gpustat.core.N')
def test_attributes_and_items(self, N, Process):
def test_attributes_and_items(self, N, Process, virtual_memory):
"""
Test whether each property of `GPUStat` instance is well-defined.
"""
_configure_mock(N, Process)
_configure_mock(N, Process, virtual_memory)

g = gpustat.new_query()[1] # includes N/A
print("(keys) : %s" % str(g.keys()))
Expand All @@ -253,13 +268,14 @@ def test_attributes_and_items(self, N, Process):
print("utilization : %s" % (g.utilization))

@unittest.skipIf(sys.version_info < (3, 4), "Only in Python 3.4+")
@mock.patch('psutil.virtual_memory')
@mock.patch('psutil.Process')
@mock.patch('gpustat.core.N')
def test_args_endtoend(self, N, Process):
def test_args_endtoend(self, N, Process, virtual_memory):
"""
End-to-end testing given command line args.
"""
_configure_mock(N, Process)
_configure_mock(N, Process, virtual_memory)

def capture_output(*args):
f = StringIO()
Expand All @@ -284,10 +300,11 @@ def capture_output(*args):
s = capture_output('gpustat', '--no-header')
self.assertIn("[0]", s.split('\n')[0])

@mock.patch('psutil.virtual_memory')
@mock.patch('psutil.Process')
@mock.patch('gpustat.core.N')
def test_json_mocked(self, N, Process):
_configure_mock(N, Process)
def test_json_mocked(self, N, Process, virtual_memory):
_configure_mock(N, Process, virtual_memory)
gpustats = gpustat.new_query()

fp = StringIO()
Expand Down