10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
23 template<
typename Broadcast,
typename XprType>
30 typedef typename XprType::Nested
Nested;
32 static const int NumDimensions = XprTraits::NumDimensions;
33 static const int Layout = XprTraits::Layout;
36 template<
typename Broadcast,
typename XprType>
42 template<
typename Broadcast,
typename XprType>
48 template <
typename Dims>
50 static const bool value =
false;
56 #ifndef EIGEN_EMULATE_CXX11_META_H
57 template <
typename std::size_t... Indices>
59 static const bool value = (
Sizes<Indices...>::total_size == 1);
67 template<
typename Broadcast,
typename XprType>
95 template<
typename Broadcast,
typename ArgType,
typename Device>
116 : m_broadcast(op.broadcast()),
m_impl(op.expression(),
device)
123 const Broadcast& broadcast = op.
broadcast();
124 for (
int i = 0; i < NumDims; ++i) {
126 m_dimensions[i] = input_dims[i] * broadcast[i];
130 m_inputStrides[0] = 1;
131 m_outputStrides[0] = 1;
132 for (
int i = 1; i < NumDims; ++i) {
133 m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
134 m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
137 m_inputStrides[NumDims-1] = 1;
138 m_outputStrides[NumDims-1] = 1;
139 for (
int i = NumDims-2; i >= 0; --i) {
140 m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
141 m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
149 m_impl.evalSubExprsIfNeeded(NULL);
164 return coeffColMajor(index);
166 return coeffRowMajor(index);
173 Index inputIndex = 0;
174 for (
int i = NumDims - 1; i > 0; --i) {
175 const Index idx = index / m_outputStrides[i];
176 if (internal::index_statically_eq<Broadcast>(i, 1)) {
178 inputIndex += idx * m_inputStrides[i];
180 if (internal::index_statically_eq<InputDimensions>(i, 1)) {
183 inputIndex += (idx %
m_impl.dimensions()[i]) * m_inputStrides[i];
186 index -= idx * m_outputStrides[i];
188 if (internal::index_statically_eq<Broadcast>(0, 1)) {
192 if (internal::index_statically_eq<InputDimensions>(0, 1)) {
195 inputIndex += (index %
m_impl.dimensions()[0]);
198 return m_impl.coeff(inputIndex);
203 Index inputIndex = 0;
204 for (
int i = 0; i < NumDims - 1; ++i) {
205 const Index idx = index / m_outputStrides[i];
206 if (internal::index_statically_eq<Broadcast>(i, 1)) {
208 inputIndex += idx * m_inputStrides[i];
210 if (internal::index_statically_eq<InputDimensions>(i, 1)) {
213 inputIndex += (idx %
m_impl.dimensions()[i]) * m_inputStrides[i];
216 index -= idx * m_outputStrides[i];
218 if (internal::index_statically_eq<Broadcast>(NumDims-1, 1)) {
222 if (internal::index_statically_eq<InputDimensions>(NumDims-1, 1)) {
225 inputIndex += (index %
m_impl.dimensions()[NumDims-1]);
228 return m_impl.coeff(inputIndex);
231 template<
int LoadMode>
235 return internal::pset1<PacketReturnType>(
m_impl.coeff(0));
239 return packetColMajor<LoadMode>(index);
241 return packetRowMajor<LoadMode>(index);
247 template<
int LoadMode>
253 const Index originalIndex = index;
255 Index inputIndex = 0;
256 for (
int i = NumDims - 1; i > 0; --i) {
257 const Index idx = index / m_outputStrides[i];
258 if (internal::index_statically_eq<Broadcast>(i, 1)) {
260 inputIndex += idx * m_inputStrides[i];
262 if (internal::index_statically_eq<InputDimensions>(i, 1)) {
265 inputIndex += (idx %
m_impl.dimensions()[i]) * m_inputStrides[i];
268 index -= idx * m_outputStrides[i];
271 if (internal::index_statically_eq<Broadcast>(0, 1)) {
273 innermostLoc = index;
275 if (internal::index_statically_eq<InputDimensions>(0, 1)) {
279 innermostLoc = index %
m_impl.dimensions()[0];
282 inputIndex += innermostLoc;
286 if (innermostLoc + PacketSize <=
m_impl.dimensions()[0]) {
287 return m_impl.template packet<Unaligned>(inputIndex);
290 values[0] =
m_impl.coeff(inputIndex);
291 for (
int i = 1; i < PacketSize; ++i) {
292 values[i] = coeffColMajor(originalIndex+i);
299 template<
int LoadMode>
305 const Index originalIndex = index;
307 Index inputIndex = 0;
308 for (
int i = 0; i < NumDims - 1; ++i) {
309 const Index idx = index / m_outputStrides[i];
310 if (internal::index_statically_eq<Broadcast>(i, 1)) {
312 inputIndex += idx * m_inputStrides[i];
314 if (internal::index_statically_eq<InputDimensions>(i, 1)) {
317 inputIndex += (idx %
m_impl.dimensions()[i]) * m_inputStrides[i];
320 index -= idx * m_outputStrides[i];
323 if (internal::index_statically_eq<Broadcast>(NumDims-1, 1)) {
325 innermostLoc = index;
327 if (internal::index_statically_eq<InputDimensions>(NumDims-1, 1)) {
331 innermostLoc = index %
m_impl.dimensions()[NumDims-1];
334 inputIndex += innermostLoc;
338 if (innermostLoc + PacketSize <=
m_impl.dimensions()[NumDims-1]) {
339 return m_impl.template packet<Unaligned>(inputIndex);
342 values[0] =
m_impl.coeff(inputIndex);
343 for (
int i = 1; i < PacketSize; ++i) {
344 values[i] = coeffRowMajor(originalIndex+i);
353 double compute_cost = TensorOpCost::AddCost<Index>();
355 for (
int i = NumDims - 1; i > 0; --i) {
356 compute_cost += TensorOpCost::DivCost<Index>();
357 if (internal::index_statically_eq<Broadcast>(i, 1)) {
359 TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
361 if (!internal::index_statically_eq<InputDimensions>(i, 1)) {
362 compute_cost += TensorOpCost::MulCost<Index>() +
363 TensorOpCost::ModCost<Index>() +
364 TensorOpCost::AddCost<Index>();
368 TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
371 return m_impl.costPerCoeff(vectorized) +
372 TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
379 Broadcast
functor()
const {
return m_broadcast; }
392 #endif // EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H