19 #include "fastdeploy/core/fd_tensor.h" 20 #include "fastdeploy/function/eigen.h" 25 #define DEFINE_ELEMENTWISE_OP(name) \ 26 template <typename T> struct name##RawKernel { \ 27 void operator()(const FDTensor& x, const FDTensor& y, int axis, \ 29 if (x.Shape() == y.Shape()) { \ 30 SameDimsElementwiseCompute<SameDims##name##Functor<T>>()(x, y, out); \ 32 auto x_dims = x.Shape(); \ 33 auto y_dims = y.Shape(); \ 34 if (x_dims.size() >= y_dims.size()) { \ 35 ElementwiseCompute<name##Functor<T>, T>(x, y, axis, \ 36 name##Functor<T>(), out); \ 38 ElementwiseCompute<Inverse##name##Functor<T>, T>( \ 39 x, y, axis, Inverse##name##Functor<T>(), out); \ 45 inline void GetMidDims(
const std::vector<int64_t>& x_dims,
46 const std::vector<int64_t>& y_dims,
const int axis,
47 int* pre,
int* n,
int* post,
48 int* is_run_common_broadcast) {
52 *is_run_common_broadcast = 0;
53 for (
int i = 0; i < axis; ++i) {
56 for (
int i = 0; i < y_dims.size(); ++i) {
57 if (x_dims[i + axis] != y_dims[i]) {
58 FDASSERT(y_dims[i] == 1 || x_dims[i + axis] == 1,
59 "Broadcast dimension mismatch. Operands " 60 "could not be broadcast together with the shape of " 61 "X = [%s] and the shape of Y = [%s]. Received [%d] " 62 "in X is not equal to [%d] in Y.",
63 Str(x_dims).c_str(), Str(y_dims).c_str(), x_dims[i + axis],
65 *is_run_common_broadcast = 1;
70 for (
int i = axis + y_dims.size(); i < x_dims.size(); ++i) {
75 inline std::vector<int64_t>
76 TrimTrailingSingularDims(
const std::vector<int64_t>& dims) {
78 auto actual_dims_size = dims.size();
79 for (; actual_dims_size != 0; --actual_dims_size) {
80 if (dims[actual_dims_size - 1] != 1)
83 if (actual_dims_size == dims.size())
85 std::vector<int64_t> trim_dims;
86 trim_dims.resize(actual_dims_size);
87 for (
int i = 0; i < actual_dims_size; ++i) {
88 trim_dims[i] = dims[i];
93 inline int GetElementwiseIndex(
const int64_t* x_dims_array,
const int max_dim,
94 const int64_t* index_array) {
96 for (
int i = 0; i < max_dim; i++) {
97 if (x_dims_array[i] > 1) {
98 index_ = index_ * x_dims_array[i] + index_array[i];
104 inline void UpdateElementwiseIndexArray(
const int64_t* out_dims_array,
106 int64_t* index_array) {
107 for (
int i = max_dim - 1; i >= 0; --i) {
109 if (index_array[i] >= out_dims_array[i]) {
110 index_array[i] -= out_dims_array[i];
117 inline void GetBroadcastDimsArrays(
const std::vector<int64_t>& x_dims,
118 const std::vector<int64_t>& y_dims,
119 int64_t* x_dims_array, int64_t* y_dims_array,
120 int64_t* out_dims_array,
const int max_dim,
123 "Axis should be great than or equal to 0, but received axis is %d.",
125 FDASSERT(axis < max_dim,
126 "Axis should be less than %d, but received axis is %d.", max_dim,
128 if (x_dims.size() > y_dims.size()) {
129 std::fill(y_dims_array, y_dims_array + axis, 1);
130 if (axis + y_dims.size() < max_dim) {
131 std::fill(y_dims_array + axis + y_dims.size(), y_dims_array + max_dim, 1);
133 std::copy(x_dims.data(), x_dims.data() + x_dims.size(), x_dims_array);
134 std::copy(y_dims.data(), y_dims.data() + y_dims.size(),
135 y_dims_array + axis);
137 std::fill(x_dims_array, x_dims_array + axis, 1);
138 if (axis + x_dims.size() < max_dim) {
139 std::fill(x_dims_array + axis + x_dims.size(), x_dims_array + max_dim, 1);
141 std::copy(x_dims.data(), x_dims.data() + x_dims.size(),
142 x_dims_array + axis);
143 std::copy(y_dims.data(), y_dims.data() + y_dims.size(), y_dims_array);
146 for (
int i = 0; i < max_dim; i++) {
147 FDASSERT(x_dims_array[i] == y_dims_array[i] || x_dims_array[i] <= 1 ||
148 y_dims_array[i] <= 1,
149 "Broadcast dimension mismatch. Operands " 150 "could not be broadcast together with the shape of " 151 "X = [%s] and the shape of Y = [%s]. Received [%d] " 152 "in X is not equal to [%d] in Y.",
153 Str(x_dims).c_str(), Str(y_dims).c_str(), x_dims[i + axis],
155 if ((x_dims_array[i] > 1 || y_dims_array[i] > 1) ||
156 (x_dims_array[i] == 1 && y_dims_array[i] == 1)) {
157 out_dims_array[i] = (std::max)(x_dims_array[i], y_dims_array[i]);
159 out_dims_array[i] = -1;
164 template <
typename Functor,
typename T,
typename OutType = T>
165 void CommonForwardBroadcastCPU(
const FDTensor& x,
const FDTensor& y,
166 FDTensor* z, int64_t* x_dims_array,
167 int64_t* y_dims_array, int64_t* out_dims_array,
168 int max_dim, Functor func,
169 const bool is_xsize_larger =
true) {
170 std::vector<int64_t> index_array(max_dim, 0);
171 const T* x_data =
reinterpret_cast<const T*
>(x.Data());
172 const T* y_data =
reinterpret_cast<const T*
>(y.Data());
173 FDASSERT(x_data !=
nullptr,
"The input X should not be empty.");
174 FDASSERT(y_data !=
nullptr,
"The input X should not be empty.");
175 OutType* out_data =
reinterpret_cast<OutType*
>(z->Data());
177 const int out_size = std::accumulate(out_dims_array, out_dims_array + max_dim,
178 1, std::multiplies<int64_t>());
179 int x_index, y_index;
180 for (
int out_index = 0; out_index < out_size; ++out_index) {
181 x_index = GetElementwiseIndex(x_dims_array, max_dim, index_array.data());
182 y_index = GetElementwiseIndex(y_dims_array, max_dim, index_array.data());
183 if (is_xsize_larger) {
184 out_data[out_index] = func(x_data[x_index], y_data[y_index]);
186 out_data[out_index] = func(y_data[y_index], x_data[x_index]);
189 UpdateElementwiseIndexArray(out_dims_array, max_dim, index_array.data());
193 template <
typename Functor,
typename T,
typename OutType = T>
194 void CommonElementwiseBroadcastForward(
const FDTensor& x,
const FDTensor& y,
196 const std::vector<int64_t>& x_dims,
197 const std::vector<int64_t>& y_dims,
198 Functor func,
int axis,
199 const bool is_xsize_larger =
true) {
200 int x_dims_size = x_dims.size();
201 int y_dims_size = y_dims.size();
202 int max_dim = (std::max)(x_dims_size, y_dims_size);
203 axis = (axis == -1 ? std::abs(x_dims_size - y_dims_size) : axis);
205 "Axis should be great than or equal to 0, but received axis is %d.",
207 FDASSERT(axis < max_dim,
208 "Axis should be less than %d, but received axis is %d.", max_dim,
210 std::vector<int64_t> x_dims_array(max_dim);
211 std::vector<int64_t> y_dims_array(max_dim);
212 std::vector<int64_t> out_dims_array(max_dim);
213 GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array.data(),
214 y_dims_array.data(), out_dims_array.data(), max_dim,
217 tmp.Allocate(out_dims_array, TypeToDataType<OutType>::dtype);
218 CommonForwardBroadcastCPU<Functor, T, OutType>(
219 x, y, &tmp, x_dims_array.data(), y_dims_array.data(),
220 out_dims_array.data(), max_dim, func, is_xsize_larger);
224 template <
typename Functor,
typename T,
typename OutType = T>
225 void ElementwiseCompute(
const FDTensor& x,
const FDTensor& y,
int axis,
226 Functor func, FDTensor* z) {
227 auto x_dims = x.Shape();
228 auto y_dims = y.Shape();
229 bool is_xsize_larger =
true;
230 int max_dim = x_dims.size();
231 if (x_dims.size() < y_dims.size()) {
232 is_xsize_larger =
false;
233 max_dim = y_dims.size();
236 int diff_size = x_dims.size() - y_dims.size();
237 axis = (axis == -1 ? std::abs(diff_size) : axis);
239 "Axis should be great than or equal to 0, but received axis is %d.",
241 FDASSERT(axis < max_dim,
242 "Axis should be less than %d, but received axis is %d.", max_dim,
245 int pre, n, post, is_run_common_broadcast, axis_trim = 0;
246 if (is_xsize_larger) {
247 auto y_dims_trimed = TrimTrailingSingularDims(y_dims);
248 axis_trim = (y_dims_trimed.size() == 0) ? x_dims.size() : axis;
249 GetMidDims(x_dims, y_dims_trimed, axis_trim, &pre, &n, &post,
250 &is_run_common_broadcast);
252 auto x_dims_trimed = TrimTrailingSingularDims(x_dims);
253 axis_trim = (x_dims_trimed.size() == 0) ? y_dims.size() : axis;
254 GetMidDims(y_dims, x_dims_trimed, axis_trim, &pre, &n, &post,
255 &is_run_common_broadcast);
260 CommonElementwiseBroadcastForward<Functor, T, OutType>(
261 x, y, z, x_dims, y_dims, func, axis, is_xsize_larger);
All C++ FastDeploy APIs are defined inside this namespace.
Definition: option.h:16