-
Notifications
You must be signed in to change notification settings - Fork 0
/
NvInferRuntimeCommon.h
1379 lines (1238 loc) · 53.1 KB
/
NvInferRuntimeCommon.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef NV_INFER_RUNTIME_COMMON_H
#define NV_INFER_RUNTIME_COMMON_H
#include <cstddef>
#include <cstdint>
#include "NvInferVersion.h"
#if __cplusplus >= 201103L
#define _TENSORRT_FINAL final
#define _TENSORRT_OVERRIDE override
#else
#define _TENSORRT_FINAL
#define _TENSORRT_OVERRIDE
#endif
//!< Items that are marked as deprecated will be removed in a future release.
#if __cplusplus >= 201402L
#define TRT_DEPRECATED [[deprecated]]
#if __GNUC__ < 6
#define TRT_DEPRECATED_ENUM
#else
#define TRT_DEPRECATED_ENUM TRT_DEPRECATED
#endif
#ifdef _MSC_VER
#define TRT_DEPRECATED_API __declspec(dllexport)
#else
#define TRT_DEPRECATED_API [[deprecated]] __attribute__((visibility("default")))
#endif
#else
#ifdef _MSC_VER
#define TRT_DEPRECATED
#define TRT_DEPRECATED_ENUM
#define TRT_DEPRECATED_API __declspec(dllexport)
#else
#define TRT_DEPRECATED __attribute__((deprecated))
#define TRT_DEPRECATED_ENUM
#define TRT_DEPRECATED_API __attribute__((deprecated, visibility("default")))
#endif
#endif
//!< Defines which symbols are exported
#ifdef TENSORRT_BUILD_LIB
#ifdef _MSC_VER
#define TENSORRTAPI __declspec(dllexport)
#else
#define TENSORRTAPI __attribute__((visibility("default")))
#endif
#else
#define TENSORRTAPI
#endif
//! Defined for use with legacy APIs that have not been updated to noexcept yet.
//! Do not use with new APIs, use noexcept instead.
#define TRTNOEXCEPT
//!
//! \file NvInferRuntimeCommon.h
//!
//! This is the top-level API file for TensorRT core runtime library.
//!
// forward declare some CUDA types to avoid an include dependency
struct cublasContext;
struct cudnnContext;
typedef struct CUstream_st* cudaStream_t; //!< Forward declaration of cudaStream_t.
typedef struct CUevent_st* cudaEvent_t; //!< Forward declaration of cudaEvent_t.
static const int32_t NV_TENSORRT_VERSION
= (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSORRT_PATCH; // major, minor, patch
//!
//! \namespace nvinfer1
//!
//! \brief The TensorRT API version 1 namespace.
//!
namespace nvinfer1
{
class IErrorRecorder; //!< Forward declare IErrorRecorder for use in other interfaces.
class IGpuAllocator; //!< Forward declare IGpuAllocator for use in other interfaces.
//! Maximum number of elements in an enumeration type.
template <typename T>
constexpr inline int32_t EnumMax();
//!
//! \enum ActivationType
//!
//! \brief Enumerates the types of activation to perform in an activation layer.
//!
enum class ActivationType : int32_t
{
kRELU = 0, //!< Rectified linear activation.
kSIGMOID = 1, //!< Sigmoid activation.
kTANH = 2, //!< TanH activation.
kLEAKY_RELU = 3, //!< LeakyRelu activation: x>=0 ? x : alpha * x.
kELU = 4, //!< Elu activation: x>=0 ? x : alpha * (exp(x) - 1).
kSELU = 5, //!< Selu activation: x>0 ? beta * x : beta * (alpha*exp(x) - alpha)
kSOFTSIGN = 6, //!< Softsign activation: x / (1+|x|)
kSOFTPLUS = 7, //!< Parametric softplus activation: alpha*log(exp(beta*x)+1)
kCLIP = 8, //!< Clip activation: max(alpha, min(beta, x))
kHARD_SIGMOID = 9, //!< Hard sigmoid activation: max(0, min(1, alpha*x+beta))
kSCALED_TANH = 10, //!< Scaled tanh activation: alpha*tanh(beta*x)
kTHRESHOLDED_RELU = 11 //!< Thresholded ReLU activation: x>alpha ? x : 0
};
//! Maximum number of elements in ActivationType enum. \see ActivationType
template <>
constexpr inline int32_t EnumMax<ActivationType>()
{
return 12;
}
//!
//! \enum DataType
//!
//! \brief The type of weights and tensors.
//!
enum class DataType : int32_t
{
//! 32-bit floating point format.
kFLOAT = 0,
//! IEEE 16-bit floating-point format.
kHALF = 1,
//! 8-bit integer representing a quantized floating-point value.
kINT8 = 2,
//! Signed 32-bit integer format.
kINT32 = 3,
//! 8-bit boolean. 0 = false, 1 = true, other values undefined.
kBOOL = 4
};
//! Maximum number of elements in DataType enum. \see DataType
template <>
constexpr inline int32_t EnumMax<DataType>()
{
return 5;
}
//!
//! \enum DimensionType
//! \brief The type of data encoded across this dimension.
//!
enum class DimensionType : int32_t
{
kSPATIAL = 0, //!< Elements correspond to different spatial data.
kCHANNEL = 1, //!< Elements correspond to different channels.
kINDEX = 2, //!< Elements correspond to different batch index.
kSEQUENCE = 3 //!< Elements correspond to different sequence values.
};
//! Maximum number of elements in DimensionType enum. \see DimensionType
template <>
constexpr inline int32_t EnumMax<DimensionType>()
{
return 4;
}
//!
//! \class Dims
//! \brief Structure to define the dimensions of a tensor.
//!
//! \note: Currently the following formats are supported for layer inputs and outputs:
//! * zero or more index dimensions followed by one channel and two spatial dimensions (e.g. CHW)
//! * one time series dimension followed by one index dimension followed by one channel dimension (i.e. TNC)
//!
//! TensorRT can also return an invalid dims structure. This structure is represented by nbDims == -1
//! and d[i] == 0 for all d.
//!
//! TensorRT can also return an "unknown rank" dims structure. This structure is represented by nbDims == -1
//! and d[i] == -1 for all d.
//!
class Dims
{
public:
static const int32_t MAX_DIMS = 8; //!< The maximum number of dimensions supported for a tensor.
int32_t nbDims; //!< The number of dimensions.
int32_t d[MAX_DIMS]; //!< The extent of each dimension.
TRT_DEPRECATED DimensionType type[MAX_DIMS]; //!< The type of each dimension, provided for backwards compatibility
//!< and will be removed in TensorRT 8.0.
};
//!
//! \brief It is capable of representing one or more TensorFormat by binary OR
//! operations, e.g., 1U << TensorFormats::kCHW4 | 1U << TensorFormats::kCHW32.
//!
//! \see ITensor::getAllowedFormats(), ITensor::setAllowedFormats(),
//!
typedef uint32_t TensorFormats;
//!
//! \enum TensorFormat
//!
//! \brief Format of the input/output tensors.
//!
//! This enum is extended to be used by both plugins and reformat-free network
//! I/O tensors.
//!
//! \see IPluginExt::getPluginFormats(), safe::ICudaEngine::getBindingFormat()
//!
//! For more information about data formats, see the topic "Data Format Description" located in the
//! TensorRT Developer Guide (https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html).
//!
enum class TensorFormat : int32_t
{
//! Row major linear format.
//! For a tensor with dimensions {N, C, H, W} or {numbers, channels,
//! columns, rows}, the dimensional index corresponds to {3, 2, 1, 0}
//! and thus the order is W minor.
kLINEAR = 0,
kNCHW TRT_DEPRECATED_ENUM = kLINEAR, //!< Deprecated name of kLINEAR, provided for backwards compatibility and will
//!< be removed in TensorRT 8.0.
//! Two wide channel vectorized row major format. This format is bound to
//! FP16. It is only available for dimensions >= 3.
//! For a tensor with dimensions {N, C, H, W},
//! the memory layout is equivalent to a C array with dimensions
//! [N][(C+1)/2][H][W][2], with the tensor coordinates (n, c, h, w)
//! mapping to array subscript [n][c/2][h][w][c%2].
kCHW2 = 1,
kNC2HW2 TRT_DEPRECATED_ENUM = kCHW2, //!< Deprecated name of kCHW2, provided for backwards compatibility and will
//!< be removed in TensorRT 8.0.
//! Eight channel format where C is padded to a multiple of 8. This format
//! is bound to FP16. It is only available for dimensions >= 3.
//! For a tensor with dimensions {N, H, W, C},
//! the memory layout is equivalent to the array with dimensions
//! [N][H][W][(C+7)/8*8], with the tensor coordinates (n, h, w, c)
//! mapping to array subscript [n][h][w][c].
kHWC8 = 2,
kNHWC8 TRT_DEPRECATED_ENUM = kHWC8, //!< Deprecated name of kHWC8, provided for backwards compatibility and will
//!< be removed in TensorRT 8.0.
//! Four wide channel vectorized row major format. This format is bound to
//! INT8 or FP16. It is only available for dimensions >= 3.
//! For INT8, the C dimension must be a build-time constant.
//! For a tensor with dimensions {N, C, H, W},
//! the memory layout is equivalent to a C array with dimensions
//! [N][(C+3)/4][H][W][4], with the tensor coordinates (n, c, h, w)
//! mapping to array subscript [n][c/4][h][w][c%4].
//! If running on the DLA, this format can be used for acceleration
//! with the caveat that C must be equal or lesser than 4.
//! If used as DLA input with allowGPUFallback disable, it needs to meet
//! line stride requirement of DLA format. Column stride in bytes should
//! be multiple of 32.
kCHW4 = 3,
//! Sixteen wide channel vectorized row major format. This format is bound
//! to FP16. It is only available for dimensions >= 3.
//! For a tensor with dimensions {N, C, H, W},
//! the memory layout is equivalent to a C array with dimensions
//! [N][(C+15)/16][H][W][16], with the tensor coordinates (n, c, h, w)
//! mapping to array subscript [n][c/16][h][w][c%16].
//!
//! For DLA usage, this format maps to the native format for FP16,
//! and the tensor sizes are limited to C,H,W in the range [1,8192].
kCHW16 = 4,
//! Thirty-two wide channel vectorized row major format. This format is
//! only available for dimensions >= 3.
//! For a tensor with dimensions {N, C, H, W},
//! the memory layout is equivalent to a C array with dimensions
//! [N][(C+31)/32][H][W][32], with the tensor coordinates (n, c, h, w)
//! mapping to array subscript [n][c/32][h][w][c%32].
//!
//! For DLA usage, this format maps to the native format for INT8,
//! and the tensor sizes are limited to C,H,W in the range [1,8192].
kCHW32 = 5,
//! Eight channel format where C is padded to a multiple of 8. This format
//! is bound to FP16, and it is only available for dimensions >= 4.
//! For a tensor with dimensions {N, D, H, W, C},
//! the memory layout is equivalent to an array with dimensions
//! [N][D][H][W][(C+7)/8*8], with the tensor coordinates (n, d, h, w, c)
//! mapping to array subscript [n][d][h][w][c].
kDHWC8 = 6,
//! Thirty-two wide channel vectorized row major format. This format is
//! bound to FP16 and INT8 and is only available for dimensions >= 4.
//! For a tensor with dimensions {N, C, D, H, W},
//! the memory layout is equivalent to a C array with dimensions
//! [N][(C+31)/32][D][H][W][32], with the tensor coordinates (n, c, d, h, w)
//! mapping to array subscript [n][c/32][d][h][w][c%32].
kCDHW32 = 7,
//! Non-vectorized channel-last format. This format is bound to FP32
//! and is only available for dimensions >= 3.
kHWC = 8
};
//!
//! \brief PluginFormat is reserved for backward compatibility.
//!
//! \see IPluginExt::getPluginFormats()
//!
using PluginFormat = TensorFormat;
//! Maximum number of elements in TensorFormat enum. \see TensorFormat
template <>
constexpr inline int32_t EnumMax<TensorFormat>()
{
return 9;
}
//! \struct PluginTensorDesc
//!
//! \brief Fields that a plugin might see for an input or output.
//!
//! Scale is only valid when data type is DataType::kINT8. TensorRT will set
//! the value to -1.0f if it is invalid.
//!
//! \see IPluginV2IOExt::supportsFormat
//! \see IPluginV2IOExt::configurePlugin
//!
struct PluginTensorDesc
{
Dims dims;
DataType type; //!< \warning DataType:kBOOL not supported.
TensorFormat format;
float scale;
};
//! \struct PluginVersion
//!
//! \brief Definition of plugin versions.
//!
//! Tag for plug-in versions. Used in upper byte of getTensorRTVersion().
//!
enum class PluginVersion : uint8_t
{
kV2 = 0, //! IPluginV2
kV2_EXT = 1, //! IPluginV2Ext
kV2_IOEXT = 2, //! IPluginV2IOExt
kV2_DYNAMICEXT = 3, //! IPluginV2DynamicExt
};
//! \class IPluginV2
//!
//! \brief Plugin class for user-implemented layers.
//!
//! Plugins are a mechanism for applications to implement custom layers. When
//! combined with IPluginCreator it provides a mechanism to register plugins and
//! look up the Plugin Registry during de-serialization.
//!
//! \see IPluginCreator
//! \see IPluginRegistry
//!
class IPluginV2
{
public:
//!
//! \brief Return the API version with which this plugin was built.
//!
//! Do not override this method as it is used by the TensorRT library to maintain backwards-compatibility with
//! plugins.
//!
virtual int32_t getTensorRTVersion() const TRTNOEXCEPT
{
return NV_TENSORRT_VERSION;
}
//!
//! \brief Return the plugin type. Should match the plugin name returned by the corresponding plugin creator
// \see IPluginCreator::getPluginName()
//!
virtual const char* getPluginType() const TRTNOEXCEPT = 0;
//!
//! \brief Return the plugin version. Should match the plugin version returned by the corresponding plugin creator
// \see IPluginCreator::getPluginVersion()
//!
virtual const char* getPluginVersion() const TRTNOEXCEPT = 0;
//!
//! \brief Get the number of outputs from the layer.
//!
//! \return The number of outputs.
//!
//! This function is called by the implementations of INetworkDefinition and IBuilder. In particular, it is called
//! prior to any call to initialize().
//!
virtual int32_t getNbOutputs() const TRTNOEXCEPT = 0;
//!
//! \brief Get the dimension of an output tensor.
//!
//! \param index The index of the output tensor.
//! \param inputs The input tensors.
//! \param nbInputDims The number of input tensors.
//!
//! This function is called by the implementations of INetworkDefinition and IBuilder. In particular, it is called
//! prior to any call to initialize().
//!
virtual Dims getOutputDimensions(int32_t index, const Dims* inputs, int32_t nbInputDims) TRTNOEXCEPT = 0;
//!
//! \brief Check format support.
//!
//! \param type DataType requested.
//! \param format PluginFormat requested.
//! \return true if the plugin supports the type-format combination.
//!
//! This function is called by the implementations of INetworkDefinition, IBuilder, and
//! safe::ICudaEngine/ICudaEngine. In particular, it is called when creating an engine and when deserializing an
//! engine.
//!
//! \warning for the format field, the values PluginFormat::kCHW4, PluginFormat::kCHW16, and PluginFormat::kCHW32
//! will not be passed in, this is to keep backward compatibility with TensorRT 5.x series. Use PluginV2IOExt
//! or PluginV2DynamicExt for other PluginFormats.
//!
//! \warning DataType:kBOOL not supported.
//!
virtual bool supportsFormat(DataType type, PluginFormat format) const TRTNOEXCEPT = 0;
//!
//! \brief Configure the layer.
//!
//! This function is called by the builder prior to initialize(). It provides an opportunity for the layer to make
//! algorithm choices on the basis of its weights, dimensions, and maximum batch size.
//!
//! \param inputDims The input tensor dimensions.
//! \param nbInputs The number of inputs.
//! \param outputDims The output tensor dimensions.
//! \param nbOutputs The number of outputs.
//! \param type The data type selected for the engine.
//! \param format The format selected for the engine.
//! \param maxBatchSize The maximum batch size.
//!
//! The dimensions passed here do not include the outermost batch size (i.e. for 2-D image networks, they will be
//! 3-dimensional CHW dimensions).
//!
//! \warning for the format field, the values PluginFormat::kCHW4, PluginFormat::kCHW16, and PluginFormat::kCHW32
//! will not be passed in, this is to keep backward compatibility with TensorRT 5.x series. Use PluginV2IOExt
//! or PluginV2DynamicExt for other PluginFormats.
//!
//! \warning DataType:kBOOL not supported.
//!
virtual void configureWithFormat(const Dims* inputDims, int32_t nbInputs, const Dims* outputDims, int32_t nbOutputs,
DataType type, PluginFormat format, int32_t maxBatchSize) TRTNOEXCEPT = 0;
//!
//! \brief Initialize the layer for execution. This is called when the engine is created.
//!
//! \return 0 for success, else non-zero (which will cause engine termination).
//!
virtual int32_t initialize() TRTNOEXCEPT = 0;
//!
//! \brief Release resources acquired during plugin layer initialization. This is called when the engine is
//! destroyed. \see initialize()
//!
virtual void terminate() TRTNOEXCEPT = 0;
//!
//! \brief Find the workspace size required by the layer.
//!
//! This function is called during engine startup, after initialize(). The workspace size returned should be
//! sufficient for any batch size up to the maximum.
//!
//! \return The workspace size.
//!
virtual size_t getWorkspaceSize(int32_t maxBatchSize) const TRTNOEXCEPT = 0;
//!
//! \brief Execute the layer.
//!
//! \param batchSize The number of inputs in the batch.
//! \param inputs The memory for the input tensors.
//! \param outputs The memory for the output tensors.
//! \param workspace Workspace for execution.
//! \param stream The stream in which to execute the kernels.
//!
//! \return 0 for success, else non-zero (which will cause engine termination).
//!
virtual int32_t enqueue(int32_t batchSize, const void* const* inputs, void** outputs, void* workspace,
cudaStream_t stream) TRTNOEXCEPT = 0;
//!
//! \brief Find the size of the serialization buffer required.
//!
//! \return The size of the serialization buffer.
//!
virtual size_t getSerializationSize() const TRTNOEXCEPT = 0;
//!
//! \brief Serialize the layer.
//!
//! \param buffer A pointer to a buffer to serialize data. Size of buffer must be equal to value returned by getSerializationSize.
//!
//! \see getSerializationSize()
//!
virtual void serialize(void* buffer) const TRTNOEXCEPT = 0;
//!
//! \brief Destroy the plugin object. This will be called when the network, builder or engine is destroyed.
//!
virtual void destroy() TRTNOEXCEPT = 0;
//!
//! \brief Clone the plugin object. This copies over internal plugin parameters and returns a new plugin object with these parameters.
//!
virtual IPluginV2* clone() const TRTNOEXCEPT = 0;
//!
//! \brief Set the namespace that this plugin object belongs to. Ideally, all plugin
//! objects from the same plugin library should have the same namespace.
//!
virtual void setPluginNamespace(const char* pluginNamespace) TRTNOEXCEPT = 0;
//!
//! \brief Return the namespace of the plugin object.
//!
virtual const char* getPluginNamespace() const TRTNOEXCEPT = 0;
protected:
virtual ~IPluginV2() {}
};
//! \class IPluginV2Ext
//!
//! \brief Plugin class for user-implemented layers.
//!
//! Plugins are a mechanism for applications to implement custom layers. This
//! interface provides additional capabilities to the IPluginV2 interface by
//! supporting different output data types and broadcast across batch.
//!
//! \see IPluginV2
//!
class IPluginV2Ext : public IPluginV2
{
public:
//!
//! \brief Return the DataType of the plugin output at the requested index.
//! The default behavior should be to return the type of the first input, or DataType::kFLOAT if the layer has no
//! inputs. The returned data type must have a format that is supported by the plugin. \see supportsFormat()
//!
//! \warning DataType:kBOOL not supported.
//!
virtual nvinfer1::DataType getOutputDataType(
int32_t index, const nvinfer1::DataType* inputTypes, int32_t nbInputs) const TRTNOEXCEPT = 0;
//! \brief Return true if output tensor is broadcast across a batch.
//!
//! \param outputIndex The index of the output
//! \param inputIsBroadcasted The ith element is true if the tensor for the ith input is broadcast across a batch.
//! \param nbInputs The number of inputs
//!
//! The values in inputIsBroadcasted refer to broadcasting at the semantic level,
//! i.e. are unaffected by whether method canBroadcastInputAcrossBatch requests
//! physical replication of the values.
//!
virtual bool isOutputBroadcastAcrossBatch(
int32_t outputIndex, const bool* inputIsBroadcasted, int32_t nbInputs) const TRTNOEXCEPT = 0;
//! \brief Return true if plugin can use input that is broadcast across batch without replication.
//!
//! \param inputIndex Index of input that could be broadcast.
//!
//! For each input whose tensor is semantically broadcast across a batch,
//! TensorRT calls this method before calling configurePlugin.
//! If canBroadcastInputAcrossBatch returns true, TensorRT will not replicate the input tensor;
//! i.e., there will be a single copy that the plugin should share across the batch.
//! If it returns false, TensorRT will replicate the input tensor
//! so that it appears like a non-broadcasted tensor.
//!
//! This method is called only for inputs that can be broadcast.
//!
virtual bool canBroadcastInputAcrossBatch(int32_t inputIndex) const TRTNOEXCEPT = 0;
//!
//! \brief Configure the layer with input and output data types.
//!
//! This function is called by the builder prior to initialize(). It provides an opportunity for the layer to make
//! algorithm choices on the basis of its weights, dimensions, data types and maximum batch size.
//!
//! \param inputDims The input tensor dimensions.
//! \param nbInputs The number of inputs.
//! \param outputDims The output tensor dimensions.
//! \param nbOutputs The number of outputs.
//! \param inputTypes The data types selected for the plugin inputs.
//! \param outputTypes The data types selected for the plugin outputs.
//! \param inputIsBroadcast True for each input that the plugin must broadcast across the batch.
//! \param outputIsBroadcast True for each output that TensorRT will broadcast across the batch.
//! \param floatFormat The format selected for the engine for the floating point inputs/outputs.
//! \param maxBatchSize The maximum batch size.
//!
//! The dimensions passed here do not include the outermost batch size (i.e. for 2-D image networks, they will be
//! 3-dimensional CHW dimensions). When inputIsBroadcast or outputIsBroadcast is true, the outermost batch size for
//! that input or output should be treated as if it is one. \ref inputIsBroadcast[i] is true only if the input is
//! semantically broadcast across the batch and \ref canBroadcastInputAcrossBatch(i) returned true. \ref
//! outputIsBroadcast[i] is true only if \ref isOutputBroadcastAcrossBatch(i) returned true.
//!
//! \warning for the floatFormat field, the values PluginFormat::kCHW4, PluginFormat::kCHW16, and
//! PluginFormat::kCHW32 will not be passed in, this is to keep backward compatibility with TensorRT 5.x series. Use
//! PluginV2IOExt or PluginV2DynamicExt for other PluginFormats.
//!
virtual void configurePlugin(const Dims* inputDims, int32_t nbInputs, const Dims* outputDims, int32_t nbOutputs,
const DataType* inputTypes, const DataType* outputTypes, const bool* inputIsBroadcast,
const bool* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) TRTNOEXCEPT = 0;
virtual ~IPluginV2Ext() {}
//!
//! \brief Attach the plugin object to an execution context and grant the plugin the access to some context resource.
//!
//! \param cudnn The cudnn context handle of the execution context
//! \param cublas The cublas context handle of the execution context
//! \param allocator The allocator used by the execution context
//!
//! This function is called automatically for each plugin when a new execution context is created.
//! If the plugin needs per-context resource, it can be allocated here.
//! The plugin can also get context-owned CUDNN and CUBLAS context here.
//!
virtual void attachToContext(cudnnContext* /*cudnn*/, cublasContext* /*cublas*/, IGpuAllocator* /*allocator*/) TRTNOEXCEPT {}
//!
//! \brief Detach the plugin object from its execution context.
//!
//! This function is called automatically for each plugin when a execution context is destroyed.
//! If the plugin owns per-context resource, it can be released here.
//!
virtual void detachFromContext() TRTNOEXCEPT {}
//!
//! \brief Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin object with these parameters.
//! If the source plugin is pre-configured with configurePlugin(), the returned object should also be pre-configured. The returned object should allow attachToContext() with a new execution context.
//! Cloned plugin objects can share the same per-engine immutable resource (e.g. weights) with the source object (e.g. via ref-counting) to avoid duplication.
//!
virtual IPluginV2Ext* clone() const _TENSORRT_OVERRIDE TRTNOEXCEPT = 0;
protected:
//!
//! \brief Return the API version with which this plugin was built. The
//! upper byte reserved by TensorRT and is used to differentiate this from IPlguinV2.
//!
//! Do not override this method as it is used by the TensorRT library to maintain backwards-compatibility with
//! plugins.
//!
int32_t getTensorRTVersion() const _TENSORRT_OVERRIDE TRTNOEXCEPT
{
return (static_cast<int32_t>(PluginVersion::kV2_EXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));
}
//!
//! \brief Derived classes should not implement this. In a C++11 API it would be override final.
//!
void configureWithFormat(const Dims* /*inputDims*/, int32_t /*nbInputs*/, const Dims* /*outputDims*/,
int32_t /*nbOutputs*/, DataType /*type*/, PluginFormat /*format*/,
int32_t /*maxBatchSize*/) _TENSORRT_OVERRIDE TRTNOEXCEPT
{
}
};
//! \class IPluginV2IOExt
//!
//! \brief Plugin class for user-implemented layers.
//!
//! Plugins are a mechanism for applications to implement custom layers. This interface provides additional
//! capabilities to the IPluginV2Ext interface by extending different I/O data types and tensor formats.
//!
//! \see IPluginV2Ext
//!
class IPluginV2IOExt : public IPluginV2Ext
{
public:
//!
//! \brief Configure the layer.
//!
//! This function is called by the builder prior to initialize(). It provides an opportunity for the layer to make
//! algorithm choices on the basis of I/O PluginTensorDesc and the maximum batch size.
//!
//! \param in The input tensors attributes that are used for configuration.
//! \param nbInput Number of input tensors.
//! \param out The output tensors attributes that are used for configuration.
//! \param nbOutput Number of output tensors.
//!
virtual void configurePlugin(
const PluginTensorDesc* in, int32_t nbInput, const PluginTensorDesc* out, int32_t nbOutput) TRTNOEXCEPT = 0;
//!
//! \brief Return true if plugin supports the format and datatype for the input/output indexed by pos.
//!
//! For this method inputs are numbered 0..(nbInputs-1) and outputs are numbered nbInputs..(nbInputs+nbOutputs-1).
//! Using this numbering, pos is an index into InOut, where 0 <= pos < nbInputs+nbOutputs-1.
//!
//! TensorRT invokes this method to ask if the input/output indexed by pos supports the format/datatype specified
//! by inOut[pos].format and inOut[pos].type. The override should return true if that format/datatype at inOut[pos]
//! are supported by the plugin. If support is conditional on other input/output formats/datatypes, the plugin can
//! make its result conditional on the formats/datatypes in inOut[0..pos-1], which will be set to values
//! that the plugin supports. The override should not inspect inOut[pos+1..nbInputs+nbOutputs-1],
//! which will have invalid values. In other words, the decision for pos must be based on inOut[0..pos] only.
//!
//! Some examples:
//!
//! * A definition for a plugin that supports only FP16 NCHW:
//!
//! return inOut.format[pos] == TensorFormat::kLINEAR && inOut.type[pos] == DataType::kHALF;
//!
//! * A definition for a plugin that supports only FP16 NCHW for its two inputs,
//! and FP32 NCHW for its single output:
//!
//! return inOut.format[pos] == TensorFormat::kLINEAR && (inOut.type[pos] == pos < 2 ? DataType::kHALF :
//! DataType::kFLOAT);
//!
//! * A definition for a "polymorphic" plugin with two inputs and one output that supports
//! any format or type, but the inputs and output must have the same format and type:
//!
//! return pos == 0 || (inOut.format[pos] == inOut.format[0] && inOut.type[pos] == inOut.type[0]);
//!
//! Warning: TensorRT will stop asking for formats once it finds kFORMAT_COMBINATION_LIMIT on combinations.
//!
virtual bool supportsFormatCombination(
int32_t pos, const PluginTensorDesc* inOut, int32_t nbInputs, int32_t nbOutputs) const TRTNOEXCEPT = 0;
protected:
//!
//! \brief Return the API version with which this plugin was built. The upper byte is reserved by TensorRT and is
//! used to differentiate this from IPlguinV2 and IPluginV2Ext.
//!
//! Do not override this method as it is used by the TensorRT library to maintain backwards-compatibility with
//! plugins.
//!
//! \deprecated Deprecated interface will be removed in TensorRT 8.0.
//!
TRT_DEPRECATED
int32_t getTensorRTVersion() const _TENSORRT_OVERRIDE
{
return (static_cast<int32_t>(PluginVersion::kV2_IOEXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));
}
//!
//! \brief Deprecated interface inheriting from base class. Derived classes should not implement this. In a C++11
//! API it would be override final.
//!
//! \deprecated Deprecated interface will be removed in TensorRT 8.0.
//!
TRT_DEPRECATED
void configureWithFormat(
const Dims*, int32_t, const Dims*, int32_t, DataType, PluginFormat, int32_t) _TENSORRT_OVERRIDE _TENSORRT_FINAL
{
}
//!
//! \brief Deprecated interface inheriting from base class. Derived classes should not implement this. In a C++11
//! API it would be override final.
//!
//! \deprecated Deprecated interface will be removed in TensorRT 8.0.
//!
TRT_DEPRECATED
void configurePlugin(const Dims*, int32_t, const Dims*, int32_t, const DataType*, const DataType*, const bool*,
const bool*, PluginFormat, int32_t) _TENSORRT_OVERRIDE _TENSORRT_FINAL
{
}
//!
//! \brief Deprecated interface inheriting from base class. Derived classes should not implement this. In a C++11
//! API it would be override final.
//!
//! \deprecated Deprecated interface will be removed in TensorRT 8.0.
//!
TRT_DEPRECATED
bool supportsFormat(DataType, PluginFormat) const _TENSORRT_OVERRIDE _TENSORRT_FINAL
{
return false;
}
};
//!
//! \enum FieldType
//! \brief The possible field types for custom layer.
//!
enum class PluginFieldType : int32_t
{
kFLOAT16 = 0, //!< FP16 field type.
kFLOAT32 = 1, //!< FP32 field type.
kFLOAT64 = 2, //!< FP64 field type.
kINT8 = 3, //!< INT8 field type.
kINT16 = 4, //!< INT16 field type.
kINT32 = 5, //!< INT32 field type.
kCHAR = 6, //!< char field type.
kDIMS = 7, //!< nvinfer1::Dims field type.
kUNKNOWN = 8
};
//!
//! \class PluginField
//!
//! \brief Structure containing plugin attribute field names and associated data
//! This information can be parsed to decode necessary plugin metadata
//!
//!
class PluginField
{
public:
//!
//! \brief Plugin field attribute name
//!
const char* name{nullptr};
//!
//! \brief Plugin field attribute data
//!
const void* data{nullptr};
//!
//! \brief Plugin field attribute type
//! \see PluginFieldType
//!
PluginFieldType type{PluginFieldType::kUNKNOWN};
//!
//! \brief Number of data entries in the Plugin attribute
//!
int32_t length{0};
PluginField(const char* name_ = nullptr, const void* data_ = nullptr, const PluginFieldType type_ = PluginFieldType::kUNKNOWN, int32_t length_ = 0)
: name(name_)
, data(data_)
, type(type_)
, length(length_)
{
}
};
struct PluginFieldCollection
{
int32_t nbFields; //!< Number of PluginField entries
const PluginField* fields; //!< Pointer to PluginField entries
};
//!
//! \class IPluginCreator
//!
//! \brief Plugin creator class for user implemented layers.
//!
//! \see IPlugin and IPluginFactory
//!
class IPluginCreator
{
public:
//!
//! \brief Return the version of the API the plugin creator was compiled with.
//!
virtual int32_t getTensorRTVersion() const TRTNOEXCEPT
{
return NV_TENSORRT_VERSION;
}
//!
//! \brief Return the plugin name.
//!
virtual const char* getPluginName() const TRTNOEXCEPT = 0;
//!
//! \brief Return the plugin version.
//!
virtual const char* getPluginVersion() const TRTNOEXCEPT = 0;
//!
//! \brief Return a list of fields that needs to be passed to createPlugin.
//! \see PluginFieldCollection
//!
virtual const PluginFieldCollection* getFieldNames() TRTNOEXCEPT = 0;
//!
//! \brief Return a plugin object. Return nullptr in case of error.
//!
virtual IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc) TRTNOEXCEPT = 0;
//!
//! \brief Called during deserialization of plugin layer. Return a plugin object.
//!
virtual IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) TRTNOEXCEPT = 0;
//!
//! \brief Set the namespace of the plugin creator based on the plugin
//! library it belongs to. This can be set while registering the plugin creator.
//!
//! \see IPluginRegistry::registerCreator()
//!
virtual void setPluginNamespace(const char* pluginNamespace) TRTNOEXCEPT = 0;
//!
//! \brief Return the namespace of the plugin creator object.
//!
virtual const char* getPluginNamespace() const TRTNOEXCEPT = 0;
virtual ~IPluginCreator() {}
};
//!
//! \class IPluginRegistry
//!
//! \brief Single registration point for all plugins in an application. It is
//! used to find plugin implementations during engine deserialization.
//! Internally, the plugin registry is considered to be a singleton so all
//! plugins in an application are part of the same global registry.
//! Note that the plugin registry is only supported for plugins of type
//! IPluginV2 and should also have a corresponding IPluginCreator implementation.
//!
//! \see IPluginV2 and IPluginCreator
//!
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
//!
class IPluginRegistry
{
public:
//!
//! \brief Register a plugin creator. Returns false if one with same type
//! is already registered.
//!
virtual bool registerCreator(IPluginCreator& creator, const char* pluginNamespace) noexcept = 0;
//!
//! \brief Return all the registered plugin creators and the number of
//! registered plugin creators. Returns nullptr if none found.
//!
virtual IPluginCreator* const* getPluginCreatorList(int32_t* numCreators) const noexcept = 0;
//!
//! \brief Return plugin creator based on plugin type, version and
//! namespace associated with plugin during network creation.
//!
virtual IPluginCreator* getPluginCreator(const char* pluginType, const char* pluginVersion, const char* pluginNamespace = "") noexcept = 0;
protected:
virtual ~IPluginRegistry() noexcept {}
public:
//!
//! \brief Set the ErrorRecorder for this interface
//!
//! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
//! This function will call incRefCount of the registered ErrorRecorder at least once. Setting
//! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
//! a recorder has been registered.
//!
//! \param recorder The error recorder to register with this interface.
//
//! \see getErrorRecorder
//!
virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
//!
//! \brief set the ErrorRecorder assigned to this interface.
//!
//! Retrieves the assigned error recorder object for the given class. A default error recorder does not exist,
//! so a nullptr will be returned if setErrorRecorder has not been called, or an ErrorRecorder has not been
//! inherited.
//!
//! \return A pointer to the IErrorRecorder object that has been registered.
//!
//! \see setErrorRecorder
//!
virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
};
//!
//! \enum TensorLocation
//! \brief The location for tensor data storage, device or host.
//!
enum class TensorLocation : int32_t
{
kDEVICE = 0, //!< Data stored on device.
kHOST = 1, //!< Data stored on host.
};
//! Maximum number of elements in TensorLocation enum. \see TensorLocation
template <>
constexpr inline int32_t EnumMax<TensorLocation>()
{
return 2;
}
//!
//! \class IGpuAllocator
//!
//! \brief Application-implemented class for controlling allocation on the GPU.