FastDeploy  latest
Fast & Easy to Deploy!
elementwise_base.h
1 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #pragma once
16 
17 #include <algorithm>
18 
19 #include "fastdeploy/core/fd_tensor.h"
20 #include "fastdeploy/function/eigen.h"
21 
22 namespace fastdeploy {
23 namespace function {
24 
25 #define DEFINE_ELEMENTWISE_OP(name) \
26  template <typename T> struct name##RawKernel { \
27  void operator()(const FDTensor& x, const FDTensor& y, int axis, \
28  FDTensor* out) { \
29  if (x.Shape() == y.Shape()) { \
30  SameDimsElementwiseCompute<SameDims##name##Functor<T>>()(x, y, out); \
31  } else { \
32  auto x_dims = x.Shape(); \
33  auto y_dims = y.Shape(); \
34  if (x_dims.size() >= y_dims.size()) { \
35  ElementwiseCompute<name##Functor<T>, T>(x, y, axis, \
36  name##Functor<T>(), out); \
37  } else { \
38  ElementwiseCompute<Inverse##name##Functor<T>, T>( \
39  x, y, axis, Inverse##name##Functor<T>(), out); \
40  } \
41  } \
42  } \
43  }
44 
45 inline void GetMidDims(const std::vector<int64_t>& x_dims,
46  const std::vector<int64_t>& y_dims, const int axis,
47  int* pre, int* n, int* post,
48  int* is_run_common_broadcast) {
49  *pre = 1;
50  *n = 1;
51  *post = 1;
52  *is_run_common_broadcast = 0;
53  for (int i = 0; i < axis; ++i) {
54  (*pre) *= x_dims[i];
55  }
56  for (int i = 0; i < y_dims.size(); ++i) {
57  if (x_dims[i + axis] != y_dims[i]) {
58  FDASSERT(y_dims[i] == 1 || x_dims[i + axis] == 1,
59  "Broadcast dimension mismatch. Operands "
60  "could not be broadcast together with the shape of "
61  "X = [%s] and the shape of Y = [%s]. Received [%d] "
62  "in X is not equal to [%d] in Y.",
63  Str(x_dims).c_str(), Str(y_dims).c_str(), x_dims[i + axis],
64  y_dims[i]);
65  *is_run_common_broadcast = 1;
66  return;
67  }
68  (*n) *= y_dims[i];
69  }
70  for (int i = axis + y_dims.size(); i < x_dims.size(); ++i) {
71  (*post) *= x_dims[i];
72  }
73 }
74 
75 inline std::vector<int64_t>
76 TrimTrailingSingularDims(const std::vector<int64_t>& dims) {
77  // Remove trailing dimensions of size 1 for y
78  auto actual_dims_size = dims.size();
79  for (; actual_dims_size != 0; --actual_dims_size) {
80  if (dims[actual_dims_size - 1] != 1)
81  break;
82  }
83  if (actual_dims_size == dims.size())
84  return dims;
85  std::vector<int64_t> trim_dims;
86  trim_dims.resize(actual_dims_size);
87  for (int i = 0; i < actual_dims_size; ++i) {
88  trim_dims[i] = dims[i];
89  }
90  return trim_dims;
91 }
92 
93 inline int GetElementwiseIndex(const int64_t* x_dims_array, const int max_dim,
94  const int64_t* index_array) {
95  int index_ = 0;
96  for (int i = 0; i < max_dim; i++) {
97  if (x_dims_array[i] > 1) {
98  index_ = index_ * x_dims_array[i] + index_array[i];
99  }
100  }
101  return index_;
102 }
103 
104 inline void UpdateElementwiseIndexArray(const int64_t* out_dims_array,
105  const int max_dim,
106  int64_t* index_array) {
107  for (int i = max_dim - 1; i >= 0; --i) {
108  ++index_array[i];
109  if (index_array[i] >= out_dims_array[i]) {
110  index_array[i] -= out_dims_array[i];
111  } else {
112  break;
113  }
114  }
115 }
116 
117 inline void GetBroadcastDimsArrays(const std::vector<int64_t>& x_dims,
118  const std::vector<int64_t>& y_dims,
119  int64_t* x_dims_array, int64_t* y_dims_array,
120  int64_t* out_dims_array, const int max_dim,
121  const int axis) {
122  FDASSERT(axis >= 0,
123  "Axis should be great than or equal to 0, but received axis is %d.",
124  axis);
125  FDASSERT(axis < max_dim,
126  "Axis should be less than %d, but received axis is %d.", max_dim,
127  axis);
128  if (x_dims.size() > y_dims.size()) {
129  std::fill(y_dims_array, y_dims_array + axis, 1);
130  if (axis + y_dims.size() < max_dim) {
131  std::fill(y_dims_array + axis + y_dims.size(), y_dims_array + max_dim, 1);
132  }
133  std::copy(x_dims.data(), x_dims.data() + x_dims.size(), x_dims_array);
134  std::copy(y_dims.data(), y_dims.data() + y_dims.size(),
135  y_dims_array + axis);
136  } else {
137  std::fill(x_dims_array, x_dims_array + axis, 1);
138  if (axis + x_dims.size() < max_dim) {
139  std::fill(x_dims_array + axis + x_dims.size(), x_dims_array + max_dim, 1);
140  }
141  std::copy(x_dims.data(), x_dims.data() + x_dims.size(),
142  x_dims_array + axis);
143  std::copy(y_dims.data(), y_dims.data() + y_dims.size(), y_dims_array);
144  }
145 
146  for (int i = 0; i < max_dim; i++) {
147  FDASSERT(x_dims_array[i] == y_dims_array[i] || x_dims_array[i] <= 1 ||
148  y_dims_array[i] <= 1,
149  "Broadcast dimension mismatch. Operands "
150  "could not be broadcast together with the shape of "
151  "X = [%s] and the shape of Y = [%s]. Received [%d] "
152  "in X is not equal to [%d] in Y.",
153  Str(x_dims).c_str(), Str(y_dims).c_str(), x_dims[i + axis],
154  y_dims[i]);
155  if ((x_dims_array[i] > 1 || y_dims_array[i] > 1) ||
156  (x_dims_array[i] == 1 && y_dims_array[i] == 1)) {
157  out_dims_array[i] = (std::max)(x_dims_array[i], y_dims_array[i]);
158  } else {
159  out_dims_array[i] = -1;
160  }
161  }
162 }
163 
164 template <typename Functor, typename T, typename OutType = T>
165 void CommonForwardBroadcastCPU(const FDTensor& x, const FDTensor& y,
166  FDTensor* z, int64_t* x_dims_array,
167  int64_t* y_dims_array, int64_t* out_dims_array,
168  int max_dim, Functor func,
169  const bool is_xsize_larger = true) {
170  std::vector<int64_t> index_array(max_dim, 0);
171  const T* x_data = reinterpret_cast<const T*>(x.Data());
172  const T* y_data = reinterpret_cast<const T*>(y.Data());
173  FDASSERT(x_data != nullptr, "The input X should not be empty.");
174  FDASSERT(y_data != nullptr, "The input X should not be empty.");
175  OutType* out_data = reinterpret_cast<OutType*>(z->Data());
176 
177  const int out_size = std::accumulate(out_dims_array, out_dims_array + max_dim,
178  1, std::multiplies<int64_t>());
179  int x_index, y_index;
180  for (int out_index = 0; out_index < out_size; ++out_index) {
181  x_index = GetElementwiseIndex(x_dims_array, max_dim, index_array.data());
182  y_index = GetElementwiseIndex(y_dims_array, max_dim, index_array.data());
183  if (is_xsize_larger) {
184  out_data[out_index] = func(x_data[x_index], y_data[y_index]);
185  } else {
186  out_data[out_index] = func(y_data[y_index], x_data[x_index]);
187  }
188 
189  UpdateElementwiseIndexArray(out_dims_array, max_dim, index_array.data());
190  }
191 }
192 
193 template <typename Functor, typename T, typename OutType = T>
194 void CommonElementwiseBroadcastForward(const FDTensor& x, const FDTensor& y,
195  FDTensor* z,
196  const std::vector<int64_t>& x_dims,
197  const std::vector<int64_t>& y_dims,
198  Functor func, int axis,
199  const bool is_xsize_larger = true) {
200  int x_dims_size = x_dims.size();
201  int y_dims_size = y_dims.size();
202  int max_dim = (std::max)(x_dims_size, y_dims_size);
203  axis = (axis == -1 ? std::abs(x_dims_size - y_dims_size) : axis);
204  FDASSERT(axis >= 0,
205  "Axis should be great than or equal to 0, but received axis is %d.",
206  axis);
207  FDASSERT(axis < max_dim,
208  "Axis should be less than %d, but received axis is %d.", max_dim,
209  axis);
210  std::vector<int64_t> x_dims_array(max_dim);
211  std::vector<int64_t> y_dims_array(max_dim);
212  std::vector<int64_t> out_dims_array(max_dim);
213  GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array.data(),
214  y_dims_array.data(), out_dims_array.data(), max_dim,
215  axis);
216  FDTensor tmp;
217  tmp.Allocate(out_dims_array, TypeToDataType<OutType>::dtype);
218  CommonForwardBroadcastCPU<Functor, T, OutType>(
219  x, y, &tmp, x_dims_array.data(), y_dims_array.data(),
220  out_dims_array.data(), max_dim, func, is_xsize_larger);
221  *z = std::move(tmp);
222 }
223 
224 template <typename Functor, typename T, typename OutType = T>
225 void ElementwiseCompute(const FDTensor& x, const FDTensor& y, int axis,
226  Functor func, FDTensor* z) {
227  auto x_dims = x.Shape();
228  auto y_dims = y.Shape();
229  bool is_xsize_larger = true;
230  int max_dim = x_dims.size();
231  if (x_dims.size() < y_dims.size()) {
232  is_xsize_larger = false;
233  max_dim = y_dims.size();
234  }
235 
236  int diff_size = x_dims.size() - y_dims.size();
237  axis = (axis == -1 ? std::abs(diff_size) : axis);
238  FDASSERT(axis >= 0,
239  "Axis should be great than or equal to 0, but received axis is %d.",
240  axis);
241  FDASSERT(axis < max_dim,
242  "Axis should be less than %d, but received axis is %d.", max_dim,
243  axis);
244 
245  int pre, n, post, is_run_common_broadcast, axis_trim = 0;
246  if (is_xsize_larger) {
247  auto y_dims_trimed = TrimTrailingSingularDims(y_dims);
248  axis_trim = (y_dims_trimed.size() == 0) ? x_dims.size() : axis;
249  GetMidDims(x_dims, y_dims_trimed, axis_trim, &pre, &n, &post,
250  &is_run_common_broadcast);
251  } else {
252  auto x_dims_trimed = TrimTrailingSingularDims(x_dims);
253  axis_trim = (x_dims_trimed.size() == 0) ? y_dims.size() : axis;
254  GetMidDims(y_dims, x_dims_trimed, axis_trim, &pre, &n, &post,
255  &is_run_common_broadcast);
256  }
257  // special case for common implementation.
258  // case 1: x=[2,3,1,5], y=[2,1,4,1]
259  // case 2: x=[2,3,4], y=[1,1,4]
260  CommonElementwiseBroadcastForward<Functor, T, OutType>(
261  x, y, z, x_dims, y_dims, func, axis, is_xsize_larger);
262 }
263 
264 } // namespace function
265 } // namespace fastdeploy
All C++ FastDeploy APIs are defined inside this namespace.
Definition: option.h:16