17 #include <cuda_runtime_api.h> 25 #include "NvOnnxParser.h" 26 #include "fastdeploy/runtime/backends/backend.h" 27 #include "fastdeploy/runtime/backends/tensorrt/utils.h" 28 #include "fastdeploy/runtime/backends/tensorrt/option.h" 29 #include "fastdeploy/utils/unique_ptr.h" 31 class Int8EntropyCalibrator2 :
public nvinfer1::IInt8EntropyCalibrator2 {
33 explicit Int8EntropyCalibrator2(
const std::string& calibration_cache)
34 : calibration_cache_(calibration_cache) {}
36 int getBatchSize() const noexcept
override {
return 0; }
38 bool getBatch(
void* bindings[],
const char* names[],
39 int nbBindings) noexcept
override {
43 const void* readCalibrationCache(
size_t& length) noexcept
override {
44 length = calibration_cache_.size();
45 return length ? calibration_cache_.data() :
nullptr;
48 void writeCalibrationCache(
const void* cache,
49 size_t length) noexcept
override {
50 fastdeploy::FDERROR <<
"NOT IMPLEMENT." << std::endl;
54 const std::string calibration_cache_;
61 std::vector<int> shape;
62 nvinfer1::DataType dtype;
63 FDDataType original_dtype;
66 std::vector<int> toVec(
const nvinfer1::Dims& dim);
67 size_t TrtDataTypeSize(
const nvinfer1::DataType& dtype);
68 FDDataType GetFDDataType(
const nvinfer1::DataType& dtype);
70 class TrtBackend :
public BaseBackend {
72 TrtBackend() : engine_(nullptr), context_(nullptr) {}
74 bool Init(
const RuntimeOption& runtime_option);
75 bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
76 bool copy_to_fd =
true)
override;
78 int NumInputs()
const {
return inputs_desc_.size(); }
79 int NumOutputs()
const {
return outputs_desc_.size(); }
80 TensorInfo GetInputInfo(
int index);
81 TensorInfo GetOutputInfo(
int index);
82 std::vector<TensorInfo> GetInputInfos()
override;
83 std::vector<TensorInfo> GetOutputInfos()
override;
84 std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
85 void* stream =
nullptr,
86 int device_id = -1)
override;
95 void BuildOption(
const TrtBackendOption& option);
97 bool InitFromPaddle(
const std::string& model_buffer,
98 const std::string& params_buffer,
99 const TrtBackendOption& option = TrtBackendOption(),
100 bool verbose =
false);
101 bool InitFromOnnx(
const std::string& model_buffer,
102 const TrtBackendOption& option = TrtBackendOption());
104 TrtBackendOption option_;
105 std::shared_ptr<nvinfer1::ICudaEngine> engine_;
106 std::shared_ptr<nvinfer1::IExecutionContext> context_;
107 FDUniquePtr<nvonnxparser::IParser> parser_;
108 FDUniquePtr<nvinfer1::IBuilder> builder_;
109 FDUniquePtr<nvinfer1::INetworkDefinition> network_;
110 cudaStream_t stream_{};
111 std::vector<void*> bindings_;
112 std::vector<TrtValueInfo> inputs_desc_;
113 std::vector<TrtValueInfo> outputs_desc_;
114 std::map<std::string, FDDeviceBuffer> inputs_device_buffer_;
115 std::map<std::string, FDDeviceBuffer> outputs_device_buffer_;
116 std::map<std::string, int> io_name_index_;
118 std::string calibration_str_;
119 bool save_external_ =
false;
120 std::string model_file_name_ =
"";
127 std::map<std::string, int> outputs_order_;
132 std::string onnx_model_buffer_;
136 std::map<std::string, ShapeRangeInfo> shape_range_info_;
143 std::map<std::string, FDTensor> casted_output_tensors_;
145 void GetInputOutputInfo();
146 bool CreateTrtEngineFromOnnx(
const std::string& onnx_model_buffer);
147 bool BuildTrtEngine();
148 bool LoadTrtCache(
const std::string& trt_engine_file);
149 int ShapeRangeInfoUpdated(
const std::vector<FDTensor>& inputs);
150 void SetInputs(
const std::vector<FDTensor>& inputs);
151 void AllocateOutputsBuffer(std::vector<FDTensor>* outputs,
152 bool copy_to_fd =
true);
All C++ FastDeploy APIs are defined inside this namespace.
Definition: option.h:16