forked from CFSworks/nvml_fix
-
Notifications
You must be signed in to change notification settings - Fork 0
/
nvml_v3.h
1998 lines (1857 loc) · 89.6 KB
/
nvml_v3.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws. Users and possessors of this source code
* are hereby granted a nonexclusive, royalty-free license to use this code
* in individual and commercial software.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*
* Any use of this source code in individual and commercial software must
* include, in the user documentation and internal comments to the code,
* the above Disclaimer and U.S. Government End Users Notice.
*/
/*!
\mainpage NVML API Reference
The NVIDIA Management Library (NVML) is a C-based programmatic interface for monitoring and
managing various states within NVIDIA Tesla &tm; GPUs.<br>It is intended to be a platform for building
3rd party applications, and is also the underlying library for the NVIDIA-supported nvidia-smi
tool.<br>NVML is thread-safe so it is safe to make simultaneous NVML calls from multiple threads.
<p>
<hr size="1">
<p>
<h2 align="center">API Documentation</h2>
<p>
Supported OS platforms:
- Windows: Windows Server 2008 R2 64bit, Windows 7 64bit
- Linux: 32-bit and 64-bit
Supported products:
- Full Support
- NVIDIA Tesla &tm; Line: S1070, S2050, C1060, C2050/70/75, M2050/70/75/90, X2070/90
- NVIDIA Quadro ® Line: 4000, 5000, 6000, 7000, M2070-Q
- NVIDIA GeForce ® Line: None
- Limited Support
- NVIDIA Tesla &tm; Line: None
- NVIDIA Quadro ® Line: All other current and previous generation Quadro-branded parts
- NVIDIA GeForce ® Line: All current and previous generation GeForce-branded parts
The NVML library can be found at \%ProgramW6432\%\\"NVIDIA Corporation"\\NVSMI\\ on Windows, but
will not be added to the path. To dynamically link to NVML, add this path to the PATH environmental
variable. To dynamically load NVML, call LoadLibrary with this path.
On Linux the NVML library will be found on the standard library path. For 64 bit Linux, both the 32 bit
and 64 bit NVML libraries will be installed.
The NVML API is divided into five categories:
- Support Methods:
- \ref nvmlInitializationAndCleanup
- Query Methods:
- \ref nvmlSystemQueries
- \ref nvmlDeviceQueries
- \ref nvmlUnitQueries
- Control Methods:
- \ref nvmlUnitCommands
- \ref nvmlDeviceCommands
- Event Handling Methods:
- \ref nvmlEvents
- Error reporting Methods
- \ref nvmlErrorReporting
List of changes can be found in the \ref Changelog
\latexonly
\section{Feature Matrix}
\endlatexonly
\image latex FeatureMatrix_Units.png "This chart shows which unit-level features are available for each S-class product. All GPUs within each S-class product also provide the information listed in the Device chart below." width=10cm
\image latex FeatureMatrix_Fermi.png "This chart shows which features are available for each Fermi and Kepler architecture GPU product." width=15cm
\image latex FeatureMatrix_QuadroAndT10.png "This chart shows which features are available for each Quadro and T10 GPU product." height=22cm
*/
#ifndef __nvml_nvml_h__
#define __nvml_nvml_h__
#ifdef __cplusplus
extern "C" {
#endif
/*
* On Windows, set up methods for DLL export
*/
#if defined _WINDOWS
#if defined LIB_EXPORT
#define DECLDIR __declspec(dllexport)
#else
#define DECLDIR __declspec(dllimport)
#endif
#else
#define DECLDIR
#endif
/**
* NVML API versioning support
*/
#define NVML_API_VERSION 3
#define NVML_API_VERSION_STR "3"
#define nvmlDeviceGetPciInfo nvmlDeviceGetPciInfo_v2
/***************************************************************************************************/
/** @defgroup nvmlDeviceStructs Device Structs
* @{
*/
/***************************************************************************************************/
/**
* Special constant that some fields take when they are not available.
* Used when only part of the struct is not available.
*
* Each structure explicitly states when to check for this value.
*/
#define NVML_VALUE_NOT_AVAILABLE (-1)
typedef struct nvmlDevice_st* nvmlDevice_t;
/**
* PCI information about a GPU device.
*/
typedef struct nvmlPciInfo_st
{
char busId[16]; //!< The tuple domain:bus:device.function PCI identifier (& NULL terminator)
unsigned int domain; //!< The PCI domain on which the device's bus resides, 0 to 0xffff
unsigned int bus; //!< The bus on which the device resides, 0 to 0xff
unsigned int device; //!< The device's id on the bus, 0 to 31
unsigned int pciDeviceId; //!< The combined 16-bit device id and 16-bit vendor id
// Added in NVML 2.285 API
unsigned int pciSubSystemId; //!< The 32-bit Sub System Device ID
// NVIDIA reserved for internal use only
unsigned int reserved0;
unsigned int reserved1;
unsigned int reserved2;
unsigned int reserved3;
} nvmlPciInfo_t;
/**
* Detailed ECC error counts for a device.
*/
typedef struct nvmlEccErrorCounts_st
{
unsigned long long l1Cache; //!< L1 cache errors
unsigned long long l2Cache; //!< L2 cache errors
unsigned long long deviceMemory; //!< Device memory errors
unsigned long long registerFile; //!< Register file errors
} nvmlEccErrorCounts_t;
/**
* Utilization information for a device.
*/
typedef struct nvmlUtilization_st
{
unsigned int gpu; //!< Percent of time over the past second during which one or more kernels was executing on the GPU
unsigned int memory; //!< Percent of time over the past second during which global (device) memory was being read or written
} nvmlUtilization_t;
/**
* Memory allocation information for a device.
*/
typedef struct nvmlMemory_st
{
unsigned long long total; //!< Total installed FB memory (in bytes)
unsigned long long free; //!< Unallocated FB memory (in bytes)
unsigned long long used; //!< Allocated FB memory (in bytes). Note that the driver/GPU always sets aside a small amount of memory for bookkeeping
} nvmlMemory_t;
/**
* Information about running compute processes on the GPU
*/
typedef struct nvmlProcessInfo_st
{
unsigned int pid; //!< Process ID
unsigned long long usedGpuMemory; //!< Amount of used GPU memory in bytes.
//!< Under WDDM, \ref NVML_VALUE_NOT_AVAILABLE is always reported
//!< because Windows KMD manages all the memory and not the NVIDIA driver
} nvmlProcessInfo_t;
/** @} */
/***************************************************************************************************/
/** @defgroup nvmlDeviceEnumvs Device Enums
* @{
*/
/***************************************************************************************************/
/**
* Generic enable/disable enum.
*/
typedef enum nvmlEnableState_enum
{
NVML_FEATURE_DISABLED = 0, //!< Feature disabled
NVML_FEATURE_ENABLED = 1 //!< Feature enabled
} nvmlEnableState_t;
//! Generic flag used to specify the default behavior of some functions. See description of particular functions for details.
#define nvmlFlagDefault 0x00
//! Generic flag used to force some behavior. See description of particular functions for details.
#define nvmlFlagForce 0x01
/**
* Temperature sensors.
*/
typedef enum nvmlTemperatureSensors_enum
{
NVML_TEMPERATURE_GPU = 0 //!< Temperature sensor for the GPU die
} nvmlTemperatureSensors_t;
/**
* Compute mode.
*
* NVML_COMPUTEMODE_EXCLUSIVE_PROCESS was added in CUDA 4.0.
* Earlier CUDA versions supported a single exclusive mode,
* which is equivalent to NVML_COMPUTEMODE_EXCLUSIVE_THREAD in CUDA 4.0 and beyond.
*/
typedef enum nvmlComputeMode_enum
{
NVML_COMPUTEMODE_DEFAULT = 0, //!< Default compute mode -- multiple contexts per device
NVML_COMPUTEMODE_EXCLUSIVE_THREAD = 1, //!< Compute-exclusive-thread mode -- only one context per device, usable from one thread at a time
NVML_COMPUTEMODE_PROHIBITED = 2, //!< Compute-prohibited mode -- no contexts per device
NVML_COMPUTEMODE_EXCLUSIVE_PROCESS = 3 //!< Compute-exclusive-process mode -- only one context per device, usable from multiple threads at a time
} nvmlComputeMode_t;
/**
* ECC bit types.
*/
typedef enum nvmlEccBitType_enum
{
NVML_SINGLE_BIT_ECC = 0, //!< Single bit ECC errors
NVML_DOUBLE_BIT_ECC = 1 //!< Double bit ECC errors
} nvmlEccBitType_t;
/**
* ECC counter types.
*
* Note: Volatile counts are reset each time the driver loads. On Windows this is once per boot. On Linux this can be more frequent.
* On Linux the driver unloads when no active clients exist. If persistence mode is enabled or there is always a driver
* client active (e.g. X11), then Linux also sees per-boot behavior. If not, volatile counts are reset each time a compute app
* is run.
*/
typedef enum nvmlEccCounterType_enum
{
NVML_VOLATILE_ECC = 0, //!< Volatile counts are reset each time the driver loads.
NVML_AGGREGATE_ECC = 1 //!< Aggregate counts persist across reboots (i.e. for the lifetime of the device)
} nvmlEccCounterType_t;
/**
* Clock types.
*
* All speeds are in Mhz.
*/
typedef enum nvmlClockType_enum
{
NVML_CLOCK_GRAPHICS = 0, //!< Graphics clock domain
NVML_CLOCK_SM = 1, //!< SM clock domain
NVML_CLOCK_MEM = 2 //!< Memory clock domain
} nvmlClockType_t;
/**
* Driver models.
*
* Windows only.
*/
typedef enum nvmlDriverModel_enum
{
NVML_DRIVER_WDDM = 0, //!< WDDM driver model -- GPU treated as a display device
NVML_DRIVER_WDM = 1 //!< WDM (TCC) model (recommended) -- GPU treated as a generic device
} nvmlDriverModel_t;
/**
* Allowed PStates.
*/
typedef enum nvmlPStates_enum
{
NVML_PSTATE_0 = 0, //!< Performance state 0 -- Maximum Performance
NVML_PSTATE_1 = 1, //!< Performance state 1
NVML_PSTATE_2 = 2, //!< Performance state 2
NVML_PSTATE_3 = 3, //!< Performance state 3
NVML_PSTATE_4 = 4, //!< Performance state 4
NVML_PSTATE_5 = 5, //!< Performance state 5
NVML_PSTATE_6 = 6, //!< Performance state 6
NVML_PSTATE_7 = 7, //!< Performance state 7
NVML_PSTATE_8 = 8, //!< Performance state 8
NVML_PSTATE_9 = 9, //!< Performance state 9
NVML_PSTATE_10 = 10, //!< Performance state 10
NVML_PSTATE_11 = 11, //!< Performance state 11
NVML_PSTATE_12 = 12, //!< Performance state 12
NVML_PSTATE_13 = 13, //!< Performance state 13
NVML_PSTATE_14 = 14, //!< Performance state 14
NVML_PSTATE_15 = 15, //!< Performance state 15 -- Minimum Performance
NVML_PSTATE_UNKNOWN = 32, //!< Unknown performance state
} nvmlPstates_t;
/**
* Available infoROM objects.
*/
typedef enum nvmlInforomObject_enum
{
NVML_INFOROM_OEM = 0, //!< An object defined by OEM
NVML_INFOROM_ECC = 1, //!< The ECC object determining the level of ECC support
NVML_INFOROM_POWER = 2 //!< The power management object
} nvmlInforomObject_t;
/**
* Return values for NVML API calls.
*/
typedef enum nvmlReturn_enum
{
NVML_SUCCESS = 0, //!< The operation was successful
NVML_ERROR_UNINITIALIZED = 1, //!< NVML was not first initialized with nvmlInit()
NVML_ERROR_INVALID_ARGUMENT = 2, //!< A supplied argument is invalid
NVML_ERROR_NOT_SUPPORTED = 3, //!< The requested operation is not available on target device
NVML_ERROR_NO_PERMISSION = 4, //!< The current user does not have permission for operation
NVML_ERROR_ALREADY_INITIALIZED = 5, //!< Deprecated: Multiple initializations are now allowed through ref counting
NVML_ERROR_NOT_FOUND = 6, //!< A query to find an object was unsuccessful
NVML_ERROR_INSUFFICIENT_SIZE = 7, //!< An input argument is not large enough
NVML_ERROR_INSUFFICIENT_POWER = 8, //!< A device's external power cables are not properly attached
NVML_ERROR_DRIVER_NOT_LOADED = 9, //!< NVIDIA driver is not loaded
NVML_ERROR_TIMEOUT = 10, //!< User provided timeout passed
NVML_ERROR_UNKNOWN = 999 //!< An internal driver error occurred
} nvmlReturn_t;
/** @} */
/***************************************************************************************************/
/** @defgroup nvmlUnitStructs Unit Structs
* @{
*/
/***************************************************************************************************/
typedef struct nvmlUnit_st* nvmlUnit_t;
/**
* Description of HWBC entry
*/
typedef struct nvmlHwbcEntry_st
{
unsigned int hwbcId;
char firmwareVersion[32];
} nvmlHwbcEntry_t;
/**
* Fan state enum.
*/
typedef enum nvmlFanState_enum
{
NVML_FAN_NORMAL = 0, //!< Fan is working properly
NVML_FAN_FAILED = 1 //!< Fan has failed
} nvmlFanState_t;
/**
* Led color enum.
*/
typedef enum nvmlLedColor_enum
{
NVML_LED_COLOR_GREEN = 0, //!< GREEN, indicates good health
NVML_LED_COLOR_AMBER = 1 //!< AMBER, indicates problem
} nvmlLedColor_t;
/**
* LED states for an S-class unit.
*/
typedef struct nvmlLedState_st
{
char cause[256]; //!< If amber, a text description of the cause
nvmlLedColor_t color; //!< GREEN or AMBER
} nvmlLedState_t;
/**
* Static S-class unit info.
*/
typedef struct nvmlUnitInfo_st
{
char name[96]; //!< Product name
char id[96]; //!< Product identifier
char serial[96]; //!< Product serial number
char firmwareVersion[96]; //!< Firmware version
} nvmlUnitInfo_t;
/**
* Power usage information for an S-class unit.
* The power supply state is a human readable string that equals "Normal" or contains
* a combination of "Abnormal" plus one or more of the following:
*
* - High voltage
* - Fan failure
* - Heatsink temperature
* - Current limit
* - Voltage below UV alarm threshold
* - Low-voltage
* - SI2C remote off command
* - MOD_DISABLE input
* - Short pin transition
*/
typedef struct nvmlPSUInfo_st
{
char state[256]; //!< The power supply state
unsigned int current; //!< PSU current (A)
unsigned int voltage; //!< PSU voltage (V)
unsigned int power; //!< PSU power draw (W)
} nvmlPSUInfo_t;
/**
* Fan speed reading for a single fan in an S-class unit.
*/
typedef struct nvmlUnitFanInfo_st
{
unsigned int speed; //!< Fan speed (RPM)
nvmlFanState_t state; //!< Flag that indicates whether fan is working properly
} nvmlUnitFanInfo_t;
/**
* Fan speed readings for an entire S-class unit.
*/
typedef struct nvmlUnitFanSpeeds_st
{
nvmlUnitFanInfo_t fans[24]; //!< Fan speed data for each fan
unsigned int count; //!< Number of fans in unit
} nvmlUnitFanSpeeds_t;
/** @} */
/***************************************************************************************************/
/** @addtogroup nvmlEvents
* @{
*/
/***************************************************************************************************/
/**
* Handle to an event set
*/
typedef struct nvmlEventSet_st* nvmlEventSet_t;
/** @defgroup nvmlEventType Event Types
* @{
* Event Types which user can be notified about.
* See description of particular functions for details.
*
* See \ref nvmlDeviceRegisterEvents and \ref nvmlDeviceGetSupportedEventTypes to check which devices
* support each event.
*
* Types can be combined with bitwise or operator '|' when passed to \ref nvmlDeviceRegisterEvents
*/
//! Event about single bit ECC errors
#define nvmlEventTypeSingleBitEccError 0x0000000000000001LL
//! Event about double bit ECC errors
#define nvmlEventTypeDoubleBitEccError 0x0000000000000002LL
//! Event about PState changes
/**
* \note On Fermi architecture PState changes are also an indicator that GPU is throttling down due to
* no work being executed on the GPU, power capping or thermal capping. In a typical situation,
* Fermi-based GPU should stay in P0 for the duration of the execution of the compute process.
*/
#define nvmlEventTypePState 0x0000000000000004LL
//! Event that Xid critical error occurred
#define nvmlEventTypeXidCriticalError 0x0000000000000008LL
//! Mask with no events
#define nvmlEventTypeNone 0x0000000000000000LL
//! Mask of all events
#define nvmlEventTypeAll (nvmlEventTypeNone \
| nvmlEventTypeSingleBitEccError \
| nvmlEventTypeDoubleBitEccError \
| nvmlEventTypePState \
| nvmlEventTypeXidCriticalError \
)
/** @} */
/**
* Information about occurred event
*/
typedef struct nvmlEventData_st
{
nvmlDevice_t device; //!< Specific device where the event occurred
unsigned long long eventType; //!< Information about what specific event occurred
unsigned long long reserved;
} nvmlEventData_t;
/** @} */
/***************************************************************************************************/
/** @defgroup nvmlInitializationAndCleanup Initialization and Cleanup
* This chapter describes the methods that handle NVML initialization and cleanup.
* It is the user's responsibility to call \ref nvmlInit() before calling any other methods, and
* nvmlShutdown() once NVML is no longer being used.
* @{
*/
/***************************************************************************************************/
/**
* Initialize NVML by discovering and attaching to all GPU devices in the system.
*
* For all products.
*
* This method should be called once before invoking any other methods in the library.
* A reference count of the number of initializations is maintained. Shutdown only occurs
* when the reference count reaches zero.
*
* @return
* - \ref NVML_SUCCESS if NVML has been properly initialized
* - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to talk to any device
* - \ref NVML_ERROR_DRIVER_NOT_LOADED if NVIDIA driver is not running
* - \ref NVML_ERROR_INSUFFICIENT_POWER if any devices have improperly attached external power cables
* - \ref NVML_ERROR_UNKNOWN on any unexpected error
*/
nvmlReturn_t DECLDIR nvmlInit(void);
/**
* Shut down NVML by releasing all GPU resources previously allocated with \ref nvmlInit().
*
* For all products.
*
* This method should be called after NVML work is done, once for each call to \ref nvmlInit()
* A reference count of the number of initializations is maintained. Shutdown only occurs
* when the reference count reaches zero. For backwards compatibility, no error is reported if
* nvmlShutdown() is called more times than nvmlInit().
*
* @return
* - \ref NVML_SUCCESS if NVML has been properly shut down
* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
* - \ref NVML_ERROR_UNKNOWN on any unexpected error
*/
nvmlReturn_t DECLDIR nvmlShutdown(void);
/** @} */
/***************************************************************************************************/
/** @defgroup nvmlErrorReporting Error reporting
* This chapter describes helper functions for error reporting routines.
* @{
*/
/***************************************************************************************************/
/**
* Helper method for converting NVML error codes into readable strings.
*
* For all products
*
* @param result NVML error code to convert
*
* @return String representation of the error.
*
*/
const DECLDIR char* nvmlErrorString(nvmlReturn_t result);
/** @} */
/***************************************************************************************************/
/** @defgroup nvmlConstants Constants
* @{
*/
/***************************************************************************************************/
/**
* Buffer size guaranteed to be large enough for \ref nvmlDeviceGetInforomVersion
*/
#define NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE 16
/**
* Buffer size guaranteed to be large enough for \ref nvmlDeviceGetUUID
*/
#define NVML_DEVICE_UUID_BUFFER_SIZE 80
/**
* Buffer size guaranteed to be large enough for \ref nvmlSystemGetDriverVersion
*/
#define NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE 80
/**
* Buffer size guaranteed to be large enough for \ref nvmlSystemGetNVMLVersion
*/
#define NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE 80
/**
* Buffer size guaranteed to be large enough for \ref nvmlDeviceGetName
*/
#define NVML_DEVICE_NAME_BUFFER_SIZE 64
/**
* Buffer size guaranteed to be large enough for \ref nvmlDeviceGetSerial
*/
#define NVML_DEVICE_SERIAL_BUFFER_SIZE 30
/**
* Buffer size guaranteed to be large enough for \ref nvmlDeviceGetVbiosVersion
*/
#define NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE 32
/** @} */
/***************************************************************************************************/
/** @defgroup nvmlSystemQueries System Queries
* This chapter describes the queries that NVML can perform against the local system. These queries
* are not device-specific.
* @{
*/
/***************************************************************************************************/
/**
* Retrieves the version of the system's graphics driver.
*
* For all products.
*
* The version identifier is an alphanumeric string. It will not exceed 80 characters in length
* (including the NULL terminator). See \ref nvmlConstants::NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE.
*
* @param version Reference in which to return the version identifier
* @param length The maximum allowed length of the string returned in \a version
*
* @return
* - \ref NVML_SUCCESS if \a version has been set
* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
* - \ref NVML_ERROR_INVALID_ARGUMENT if \a version is NULL
* - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small
*/
nvmlReturn_t DECLDIR nvmlSystemGetDriverVersion(char *version, unsigned int length);
/**
* Retrieves the version of the NVML library.
*
* For all products.
*
* The version identifier is an alphanumeric string. It will not exceed 80 characters in length
* (including the NULL terminator). See \ref nvmlConstants::NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE.
*
* @param version Reference in which to return the version identifier
* @param length The maximum allowed length of the string returned in \a version
*
* @return
* - \ref NVML_SUCCESS if \a version has been set
* - \ref NVML_ERROR_INVALID_ARGUMENT if \a version is NULL
* - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small
*/
nvmlReturn_t DECLDIR nvmlSystemGetNVMLVersion(char *version, unsigned int length);
/**
* Gets name of the process with provided process id
*
* For all products.
*
* Returned process name is cropped to provided length.
* name string is encoded in ANSI.
*
* @param pid The identifier of the process
* @param name Reference in which to return the process name
* @param length The maximum allowed length of the string returned in \a name
*
* @return
* - \ref NVML_SUCCESS if \a name has been set
* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
* - \ref NVML_ERROR_INVALID_ARGUMENT if \a name is NULL
* - \ref NVML_ERROR_NOT_FOUND if process doesn't exists
* - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation
* - \ref NVML_ERROR_UNKNOWN on any unexpected error
*/
nvmlReturn_t DECLDIR nvmlSystemGetProcessName(unsigned int pid, char *name, unsigned int length);
/** @} */
/***************************************************************************************************/
/** @defgroup nvmlUnitQueries Unit Queries
* This chapter describes that queries that NVML can perform against each unit. For S-class systems only.
* In each case the device is identified with an nvmlUnit_t handle. This handle is obtained by
* calling \ref nvmlUnitGetHandleByIndex().
* @{
*/
/***************************************************************************************************/
/**
* Retrieves the number of units in the system.
*
* For S-class products.
*
* @param unitCount Reference in which to return the number of units
*
* @return
* - \ref NVML_SUCCESS if \a unitCount has been set
* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
* - \ref NVML_ERROR_INVALID_ARGUMENT if \a unitCount is NULL
* - \ref NVML_ERROR_UNKNOWN on any unexpected error
*/
nvmlReturn_t DECLDIR nvmlUnitGetCount(unsigned int *unitCount);
/**
* Acquire the handle for a particular unit, based on its index.
*
* For S-class products.
*
* Valid indices are derived from the \a unitCount returned by \ref nvmlUnitGetCount().
* For example, if \a unitCount is 2 the valid indices are 0 and 1, corresponding to UNIT 0 and UNIT 1.
*
* The order in which NVML enumerates units has no guarantees of consistency between reboots.
*
* @param index The index of the target unit, >= 0 and < \a unitCount
* @param unit Reference in which to return the unit handle
*
* @return
* - \ref NVML_SUCCESS if \a unit has been set
* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
* - \ref NVML_ERROR_INVALID_ARGUMENT if \a index is invalid or \a unit is NULL
* - \ref NVML_ERROR_UNKNOWN on any unexpected error
*/
nvmlReturn_t DECLDIR nvmlUnitGetHandleByIndex(unsigned int index, nvmlUnit_t *unit);
/**
* Retrieves the static information associated with a unit.
*
* For S-class products.
*
* See \ref nvmlUnitInfo_t for details on available unit info.
*
* @param unit The identifier of the target unit
* @param info Reference in which to return the unit information
*
* @return
* - \ref NVML_SUCCESS if \a info has been populated
* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
* - \ref NVML_ERROR_INVALID_ARGUMENT if \a unit is invalid or \a info is NULL
*/
nvmlReturn_t DECLDIR nvmlUnitGetUnitInfo(nvmlUnit_t unit, nvmlUnitInfo_t *info);
/**
* Retrieves the LED state associated with this unit.
*
* For S-class products.
*
* See \ref nvmlLedState_t for details on allowed states.
*
* @param unit The identifier of the target unit
* @param state Reference in which to return the current LED state
*
* @return
* - \ref NVML_SUCCESS if \a state has been set
* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
* - \ref NVML_ERROR_INVALID_ARGUMENT if \a unit is invalid or \a state is NULL
* - \ref NVML_ERROR_NOT_SUPPORTED if this is not an S-class product
* - \ref NVML_ERROR_UNKNOWN on any unexpected error
*
* @see nvmlUnitSetLedState()
*/
nvmlReturn_t DECLDIR nvmlUnitGetLedState(nvmlUnit_t unit, nvmlLedState_t *state);
/**
* Retrieves the PSU stats for the unit.
*
* For S-class products.
*
* See \ref nvmlPSUInfo_t for details on available PSU info.
*
* @param unit The identifier of the target unit
* @param psu Reference in which to return the PSU information
*
* @return
* - \ref NVML_SUCCESS if \a psu has been populated
* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
* - \ref NVML_ERROR_INVALID_ARGUMENT if \a unit is invalid or \a psu is NULL
* - \ref NVML_ERROR_NOT_SUPPORTED if this is not an S-class product
* - \ref NVML_ERROR_UNKNOWN on any unexpected error
*/
nvmlReturn_t DECLDIR nvmlUnitGetPsuInfo(nvmlUnit_t unit, nvmlPSUInfo_t *psu);
/**
* Retrieves the temperature readings for the unit, in degrees C.
*
* For S-class products.
*
* Depending on the product, readings may be available for intake (type=0),
* exhaust (type=1) and board (type=2).
*
* @param unit The identifier of the target unit
* @param type The type of reading to take
* @param temp Reference in which to return the intake temperature
*
* @return
* - \ref NVML_SUCCESS if \a temp has been populated
* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
* - \ref NVML_ERROR_INVALID_ARGUMENT if \a unit or \a type is invalid or \a temp is NULL
* - \ref NVML_ERROR_NOT_SUPPORTED if this is not an S-class product
* - \ref NVML_ERROR_UNKNOWN on any unexpected error
*/
nvmlReturn_t DECLDIR nvmlUnitGetTemperature(nvmlUnit_t unit, unsigned int type, unsigned int *temp);
/**
* Retrieves the fan speed readings for the unit.
*
* For S-class products.
*
* See \ref nvmlUnitFanSpeeds_t for details on available fan speed info.
*
* @param unit The identifier of the target unit
* @param fanSpeeds Reference in which to return the fan speed information
*
* @return
* - \ref NVML_SUCCESS if \a fanSpeeds has been populated
* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
* - \ref NVML_ERROR_INVALID_ARGUMENT if \a unit is invalid or \a fanSpeeds is NULL
* - \ref NVML_ERROR_NOT_SUPPORTED if this is not an S-class product
* - \ref NVML_ERROR_UNKNOWN on any unexpected error
*/
nvmlReturn_t DECLDIR nvmlUnitGetFanSpeedInfo(nvmlUnit_t unit, nvmlUnitFanSpeeds_t *fanSpeeds);
/**
* Retrieves the set of GPU devices that are attached to the specified unit.
*
* For S-class products.
*
* The \a deviceCount argument is expected to be set to the size of the input \a devices array.
*
* @param unit The identifier of the target unit
* @param deviceCount Reference in which to provide the \a devices array size, and
* to return the number of attached GPU devices
* @param devices Reference in which to return the references to the attached GPU devices
*
* @return
* - \ref NVML_SUCCESS if \a deviceCount and \a devices have been populated
* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
* - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a deviceCount indicates that the \a devices array is too small
* - \ref NVML_ERROR_INVALID_ARGUMENT if \a unit is invalid, either of \a deviceCount or \a devices is NULL
*/
nvmlReturn_t DECLDIR nvmlUnitGetDevices(nvmlUnit_t unit, unsigned int *deviceCount, nvmlDevice_t *devices);
/**
* Retrieves the IDs and firmware versions for any Host Interface Cards (HICs) in the system.
*
* For S-class products.
*
* The \a hwbcCount argument is expected to be set to the size of the input \a hwbcEntries array.
* The HIC must be connected to an S-class system for it to be reported by this function.
*
* @param hwbcCount Size of hwbcEntries array
* @param hwbcEntries Array holding information about hwbc
*
* @return
* - \ref NVML_SUCCESS if \a hwbcCount and \a hwbcEntries have been populated
* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
* - \ref NVML_ERROR_INVALID_ARGUMENT if either \a hwbcCount or \a hwbcEntries is NULL
* - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a hwbcCount indicates that the \a hwbcEntries array is too small
*/
nvmlReturn_t DECLDIR nvmlSystemGetHicVersion(unsigned int *hwbcCount, nvmlHwbcEntry_t *hwbcEntries);
/** @} */
/***************************************************************************************************/
/** @defgroup nvmlDeviceQueries Device Queries
* This chapter describes that queries that NVML can perform against each device.
* In each case the device is identified with an nvmlDevice_t handle. This handle is obtained by
* calling one of \ref nvmlDeviceGetHandleByIndex(), \ref nvmlDeviceGetHandleBySerial() or
* \ref nvmlDeviceGetHandleByPciBusId().
* @{
*/
/***************************************************************************************************/
/**
* Retrieves the number of compute devices in the system. A compute device is a single GPU.
*
* For all products.
*
* On some platforms not all devices may be accessible due to permission restrictions. In these
* cases the device count will reflect only the GPUs that NVML can access.
*
* @param deviceCount Reference in which to return the number of accessible devices
*
* @return
* - \ref NVML_SUCCESS if \a deviceCount has been set
* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
* - \ref NVML_ERROR_INVALID_ARGUMENT if \a deviceCount is NULL
* - \ref NVML_ERROR_UNKNOWN on any unexpected error
*/
nvmlReturn_t DECLDIR nvmlDeviceGetCount(unsigned int *deviceCount);
/**
* Acquire the handle for a particular device, based on its index.
*
* For all products.
*
* Valid indices are derived from the \a accessibleDevices count returned by
* \ref nvmlDeviceGetCount(). For example, if \a accessibleDevices is 2 the valid indices
* are 0 and 1, corresponding to GPU 0 and GPU 1.
*
* The order in which NVML enumerates devices has no guarantees of consistency between reboots. For that reason it
* is recommended that devices be looked up by their PCI ids or board serial numbers. See
* \ref nvmlDeviceGetHandleBySerial() and \ref nvmlDeviceGetHandleByPciBusId().
*
* @param index The index of the target GPU, >= 0 and < \a accessibleDevices
* @param device Reference in which to return the device handle
*
* @return
* - \ref NVML_SUCCESS if \a device has been set
* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
* - \ref NVML_ERROR_INVALID_ARGUMENT if \a index is invalid or \a device is NULL
* - \ref NVML_ERROR_UNKNOWN on any unexpected error
*/
nvmlReturn_t DECLDIR nvmlDeviceGetHandleByIndex(unsigned int index, nvmlDevice_t *device);
/**
* Acquire the handle for a particular device, based on its board serial number.
*
* For all products.
*
* This number corresponds to the value printed directly on the board, and to the value returned by
* \ref nvmlDeviceGetSerial().
*
* @deprecated Since more than one GPU can exist on a single board this function is deprecated in favor
* of \ref nvmlDeviceGetHandleByUUID.
* For dual GPU boards this function will return NVML_ERROR_INVALID_ARGUMENT.
*
* @param serial The board serial number of the target GPU
* @param device Reference in which to return the device handle
*
* @return
* - \ref NVML_SUCCESS if \a device has been set
* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
* - \ref NVML_ERROR_INVALID_ARGUMENT if \a serial is invalid, \a device is NULL or more than one
* device has the same serial (dual GPU boards)
* - \ref NVML_ERROR_NOT_FOUND if \a serial does not match a valid device on the system
* - \ref NVML_ERROR_UNKNOWN on any unexpected error
*
* @see nvmlDeviceGetSerial
* @see nvmlDeviceGetHandleByUUID
*/
nvmlReturn_t DECLDIR nvmlDeviceGetHandleBySerial(const char *serial, nvmlDevice_t *device);
/**
* Acquire the handle for a particular device, based on its globally unique immutable UUID associated with each device.
*
* For all products.
*
* @param uuid The UUID of the target GPU
* @param device Reference in which to return the device handle
*
* @return
* - \ref NVML_SUCCESS if \a device has been set
* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
* - \ref NVML_ERROR_INVALID_ARGUMENT if \a uuid is invalid or \a device is null
* - \ref NVML_ERROR_NOT_FOUND if \a uuid does not match a valid device on the system
* - \ref NVML_ERROR_UNKNOWN on any unexpected error
*
* @see nvmlDeviceGetUUID
*/
nvmlReturn_t DECLDIR nvmlDeviceGetHandleByUUID(const char *uuid, nvmlDevice_t *device);
/**
* Acquire the handle for a particular device, based on its PCI bus id.
*
* For all products.
*
* This value corresponds to the nvmlPciInfo_t::busId returned by \ref nvmlDeviceGetPciInfo().
*
* @param pciBusId The PCI bus id of the target GPU
* @param device Reference in which to return the device handle
*
* @return
* - \ref NVML_SUCCESS if \a device has been set
* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
* - \ref NVML_ERROR_INVALID_ARGUMENT if \a pciBusId is invalid or \a device is NULL
* - \ref NVML_ERROR_NOT_FOUND if \a pciBusId does not match a valid device on the system
* - \ref NVML_ERROR_UNKNOWN on any unexpected error
*/
nvmlReturn_t DECLDIR nvmlDeviceGetHandleByPciBusId(const char *pciBusId, nvmlDevice_t *device);
/**
* Retrieves the name of this device.
*
* For all products.
*
* The name is an alphanumeric string that denotes a particular product, e.g. Tesla &tm; C2070. It will not
* exceed 64 characters in length (including the NULL terminator). See \ref
* nvmlConstants::NVML_DEVICE_NAME_BUFFER_SIZE.
*
* @param device The identifier of the target device
* @param name Reference in which to return the product name
* @param length The maximum allowed length of the string returned in \a name
*
* @return
* - \ref NVML_SUCCESS if \a name has been set
* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
* - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a name is NULL
* - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small
*/
nvmlReturn_t DECLDIR nvmlDeviceGetName(nvmlDevice_t device, char *name, unsigned int length);