-
Notifications
You must be signed in to change notification settings - Fork 4.3k
/
TritonClient.h
100 lines (83 loc) · 3.16 KB
/
TritonClient.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#ifndef HeterogeneousCore_SonicTriton_TritonClient
#define HeterogeneousCore_SonicTriton_TritonClient
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "HeterogeneousCore/SonicCore/interface/SonicClient.h"
#include "HeterogeneousCore/SonicTriton/interface/TritonData.h"
#include "HeterogeneousCore/SonicTriton/interface/TritonService.h"
#include <map>
#include <vector>
#include <string>
#include <exception>
#include <unordered_map>
#include "grpc_client.h"
#include "grpc_service.pb.h"
enum class TritonBatchMode { Rectangular = 1, Ragged = 2 };
class TritonClient : public SonicClient<TritonInputMap, TritonOutputMap> {
public:
struct ServerSideStats {
uint64_t inference_count_;
uint64_t execution_count_;
uint64_t success_count_;
uint64_t cumm_time_ns_;
uint64_t queue_time_ns_;
uint64_t compute_input_time_ns_;
uint64_t compute_infer_time_ns_;
uint64_t compute_output_time_ns_;
};
//constructor
TritonClient(const edm::ParameterSet& params, const std::string& debugName);
//destructor
~TritonClient() override;
//accessors
unsigned batchSize() const;
TritonBatchMode batchMode() const { return batchMode_; }
bool verbose() const { return verbose_; }
bool useSharedMemory() const { return useSharedMemory_; }
void setUseSharedMemory(bool useShm) { useSharedMemory_ = useShm; }
bool setBatchSize(unsigned bsize);
void setBatchMode(TritonBatchMode batchMode);
void resetBatchMode();
void reset() override;
TritonServerType serverType() const { return serverType_; }
bool isLocal() const { return isLocal_; }
//for fillDescriptions
static void fillPSetDescription(edm::ParameterSetDescription& iDesc);
protected:
//helpers
bool noOuterDim() const { return noOuterDim_; }
unsigned outerDim() const { return outerDim_; }
unsigned nEntries() const;
void getResults(const std::vector<std::shared_ptr<triton::client::InferResult>>& results);
void evaluate() override;
template <typename F>
bool handle_exception(F&& call);
void reportServerSideStats(const ServerSideStats& stats) const;
ServerSideStats summarizeServerStats(const inference::ModelStatistics& start_status,
const inference::ModelStatistics& end_status) const;
inference::ModelStatistics getServerSideStatus() const;
//members
unsigned maxOuterDim_;
unsigned outerDim_;
bool noOuterDim_;
unsigned nEntries_;
TritonBatchMode batchMode_;
bool manualBatchMode_;
bool verbose_;
bool useSharedMemory_;
TritonServerType serverType_;
bool isLocal_;
grpc_compression_algorithm compressionAlgo_;
triton::client::Headers headers_;
std::unique_ptr<triton::client::InferenceServerGrpcClient> client_;
//stores timeout, model name and version
std::vector<triton::client::InferOptions> options_;
private:
friend TritonInputData;
friend TritonOutputData;
//private accessors only used by data
auto client() { return client_.get(); }
void addEntry(unsigned entry);
void resizeEntries(unsigned entry);
};
#endif