10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H 23 template<
typename Broadcast,
typename XprType>
26 typedef typename XprType::Scalar
Scalar;
30 typedef typename XprType::Nested
Nested;
32 static const int NumDimensions = XprTraits::NumDimensions;
33 static const int Layout = XprTraits::Layout;
36 template<
typename Broadcast,
typename XprType>
42 template<
typename Broadcast,
typename XprType>
48 template <
typename Dims>
50 static const bool value =
false;
54 static const bool value =
true;
56 #ifndef EIGEN_EMULATE_CXX11_META_H 57 template <
typename std::size_t... Indices>
59 static const bool value = (
Sizes<Indices...>::total_size == 1);
67 template<
typename Broadcast,
typename XprType>
79 : m_xpr(expr), m_broadcast(broadcast) {}
82 const Broadcast&
broadcast()
const {
return m_broadcast; }
95 template<
typename Broadcast,
typename ArgType,
typename Device>
116 : m_broadcast(op.broadcast()),m_impl(op.expression(), device)
122 const InputDimensions& input_dims = m_impl.dimensions();
123 const Broadcast& broadcast = op.
broadcast();
124 for (
int i = 0; i < NumDims; ++i) {
126 m_dimensions[i] = input_dims[i] * broadcast[i];
129 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
130 m_inputStrides[0] = 1;
131 m_outputStrides[0] = 1;
132 for (
int i = 1; i < NumDims; ++i) {
133 m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
134 m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
137 m_inputStrides[NumDims-1] = 1;
138 m_outputStrides[NumDims-1] = 1;
139 for (
int i = NumDims-2; i >= 0; --i) {
140 m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
141 m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
149 m_impl.evalSubExprsIfNeeded(NULL);
160 return m_impl.coeff(0);
163 if (static_cast<int>(Layout) == static_cast<int>(
ColMajor)) {
164 return coeffColMajor(index);
166 return coeffRowMajor(index);
173 Index inputIndex = 0;
174 for (
int i = NumDims - 1; i > 0; --i) {
175 const Index idx = index / m_outputStrides[i];
176 if (internal::index_statically_eq<Broadcast>(i, 1)) {
178 inputIndex += idx * m_inputStrides[i];
180 if (internal::index_statically_eq<InputDimensions>(i, 1)) {
183 inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i];
186 index -= idx * m_outputStrides[i];
188 if (internal::index_statically_eq<Broadcast>(0, 1)) {
192 if (internal::index_statically_eq<InputDimensions>(0, 1)) {
195 inputIndex += (index % m_impl.dimensions()[0]);
198 return m_impl.coeff(inputIndex);
203 Index inputIndex = 0;
204 for (
int i = 0; i < NumDims - 1; ++i) {
205 const Index idx = index / m_outputStrides[i];
206 if (internal::index_statically_eq<Broadcast>(i, 1)) {
208 inputIndex += idx * m_inputStrides[i];
210 if (internal::index_statically_eq<InputDimensions>(i, 1)) {
213 inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i];
216 index -= idx * m_outputStrides[i];
218 if (internal::index_statically_eq<Broadcast>(NumDims-1, 1)) {
222 if (internal::index_statically_eq<InputDimensions>(NumDims-1, 1)) {
223 eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0);
225 inputIndex += (index % m_impl.dimensions()[NumDims-1]);
228 return m_impl.coeff(inputIndex);
231 template<
int LoadMode>
235 return internal::pset1<PacketReturnType>(m_impl.coeff(0));
238 if (static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
239 return packetColMajor<LoadMode>(index);
241 return packetRowMajor<LoadMode>(index);
247 template<
int LoadMode>
251 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
253 const Index originalIndex = index;
255 Index inputIndex = 0;
256 for (
int i = NumDims - 1; i > 0; --i) {
257 const Index idx = index / m_outputStrides[i];
258 if (internal::index_statically_eq<Broadcast>(i, 1)) {
260 inputIndex += idx * m_inputStrides[i];
262 if (internal::index_statically_eq<InputDimensions>(i, 1)) {
265 inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i];
268 index -= idx * m_outputStrides[i];
271 if (internal::index_statically_eq<Broadcast>(0, 1)) {
273 innermostLoc = index;
275 if (internal::index_statically_eq<InputDimensions>(0, 1)) {
279 innermostLoc = index % m_impl.dimensions()[0];
282 inputIndex += innermostLoc;
286 if (innermostLoc + PacketSize <= m_impl.dimensions()[0]) {
287 return m_impl.template packet<Unaligned>(inputIndex);
290 values[0] = m_impl.coeff(inputIndex);
291 for (
int i = 1; i < PacketSize; ++i) {
292 values[i] = coeffColMajor(originalIndex+i);
294 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
299 template<
int LoadMode>
303 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
305 const Index originalIndex = index;
307 Index inputIndex = 0;
308 for (
int i = 0; i < NumDims - 1; ++i) {
309 const Index idx = index / m_outputStrides[i];
310 if (internal::index_statically_eq<Broadcast>(i, 1)) {
312 inputIndex += idx * m_inputStrides[i];
314 if (internal::index_statically_eq<InputDimensions>(i, 1)) {
317 inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i];
320 index -= idx * m_outputStrides[i];
323 if (internal::index_statically_eq<Broadcast>(NumDims-1, 1)) {
325 innermostLoc = index;
327 if (internal::index_statically_eq<InputDimensions>(NumDims-1, 1)) {
328 eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0);
331 innermostLoc = index % m_impl.dimensions()[NumDims-1];
334 inputIndex += innermostLoc;
338 if (innermostLoc + PacketSize <= m_impl.dimensions()[NumDims-1]) {
339 return m_impl.template packet<Unaligned>(inputIndex);
342 values[0] = m_impl.coeff(inputIndex);
343 for (
int i = 1; i < PacketSize; ++i) {
344 values[i] = coeffRowMajor(originalIndex+i);
346 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
353 double compute_cost = TensorOpCost::AddCost<Index>();
355 for (
int i = NumDims - 1; i > 0; --i) {
356 compute_cost += TensorOpCost::DivCost<Index>();
357 if (internal::index_statically_eq<Broadcast>(i, 1)) {
359 TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
361 if (!internal::index_statically_eq<InputDimensions>(i, 1)) {
362 compute_cost += TensorOpCost::MulCost<Index>() +
363 TensorOpCost::ModCost<Index>() +
364 TensorOpCost::AddCost<Index>();
368 TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
371 return m_impl.costPerCoeff(vectorized) +
372 TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
375 EIGEN_DEVICE_FUNC Scalar*
data()
const {
return NULL; }
379 Broadcast
functor()
const {
return m_broadcast; }
392 #endif // EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H const Broadcast m_broadcast
Eigen::internal::traits< TensorBroadcastingOp >::Index Index
#define EIGEN_ALWAYS_INLINE
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup()
#define EIGEN_STRONG_INLINE
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar *)
Eigen::internal::traits< TensorBroadcastingOp >::Scalar Scalar
TensorBroadcastingOp< Broadcast, ArgType > XprType
DSizes< Index, NumDims > Dimensions
std::vector< double > values
PacketType< CoeffReturnType, Device >::type PacketReturnType
const Broadcast m_broadcast
traits< XprType > XprTraits
remove_reference< Nested >::type _Nested
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
A cost model used to limit the number of threads used for evaluating tensor expression.
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
TensorEvaluator< ArgType, Device >::Dimensions InputDimensions
Broadcast functor() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffRowMajor(Index index) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffColMajor(Index index) const
Eigen::internal::traits< TensorBroadcastingOp >::StorageKind StorageKind
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
XprType::CoeffReturnType CoeffReturnType
EIGEN_DEVICE_FUNC const internal::remove_all< typename XprType::Nested >::type & expression() const
XprTraits::StorageKind StorageKind
TensorBroadcastingOp< Broadcast, XprType > type
array< Index, NumDims > m_inputStrides
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const
Eigen::internal::nested< TensorBroadcastingOp >::type Nested
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBroadcastingOp(const XprType &expr, const Broadcast &broadcast)
EIGEN_DEVICE_FUNC Scalar * data() const
const TensorEvaluator< ArgType, Device > & impl() const
const TensorBroadcastingOp< Broadcast, XprType > & type
Eigen::NumTraits< Scalar >::Real RealScalar
TensorEvaluator< ArgType, Device > m_impl
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
XprType::CoeffReturnType CoeffReturnType
EIGEN_DEVICE_FUNC const Broadcast & broadcast() const
internal::packet_traits< Scalar >::type type
array< Index, NumDims > m_outputStrides
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const