10 #ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H 11 #define EIGEN_TRIANGULAR_MATRIX_MATRIX_H 45 int Mode,
bool LhsIsTriangular,
46 int LhsStorageOrder,
bool ConjugateLhs,
47 int RhsStorageOrder,
bool ConjugateRhs,
52 int Mode,
bool LhsIsTriangular,
53 int LhsStorageOrder,
bool ConjugateLhs,
54 int RhsStorageOrder,
bool ConjugateRhs,
int Version>
56 LhsStorageOrder,ConjugateLhs,
57 RhsStorageOrder,ConjugateRhs,
RowMajor,Version>
61 const Scalar* lhs, Index lhsStride,
62 const Scalar* rhs, Index rhsStride,
71 LhsStorageOrder==RowMajor ?
ColMajor : RowMajor,
74 ::
run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
79 template <
typename Scalar,
typename Index,
int Mode,
80 int LhsStorageOrder,
bool ConjugateLhs,
81 int RhsStorageOrder,
bool ConjugateRhs,
int Version>
83 LhsStorageOrder,ConjugateLhs,
84 RhsStorageOrder,ConjugateRhs,
ColMajor,Version>
95 Index _rows, Index _cols, Index _depth,
96 const Scalar* _lhs, Index lhsStride,
97 const Scalar* _rhs, Index rhsStride,
102 template <
typename Scalar,
typename Index,
int Mode,
103 int LhsStorageOrder,
bool ConjugateLhs,
104 int RhsStorageOrder,
bool ConjugateRhs,
int Version>
106 LhsStorageOrder,ConjugateLhs,
108 Index _rows, Index _cols, Index _depth,
109 const Scalar* _lhs, Index lhsStride,
110 const Scalar* _rhs, Index rhsStride,
111 Scalar* _res, Index resStride,
115 Index diagSize = (
std::min)(_rows,_depth);
116 Index
rows = IsLower ? _rows : diagSize;
117 Index depth = IsLower ? diagSize : _depth;
123 LhsMapper lhs(_lhs,lhsStride);
124 RhsMapper rhs(_rhs,rhsStride);
125 ResMapper
res(_res, resStride);
127 Index kc = blocking.
kc();
149 triangularBuffer.diagonal().
setZero();
151 triangularBuffer.diagonal().
setOnes();
157 for(Index k2=IsLower ? depth : 0;
158 IsLower ? k2>0 : k2<depth;
159 IsLower ? k2-=kc : k2+=kc)
161 Index actual_kc = (
std::min)(IsLower ? k2 : depth-k2, kc);
162 Index actual_k2 = IsLower ? k2-actual_kc : k2;
165 if((!IsLower)&&(k2<rows)&&(k2+actual_kc>
rows))
168 k2 = k2+actual_kc-kc;
171 pack_rhs(blockB, rhs.getSubMapper(actual_k2,0), actual_kc,
cols);
179 if(IsLower || actual_k2<rows)
182 for (Index k1=0; k1<actual_kc; k1+=panelWidth)
184 Index actualPanelWidth = std::min<Index>(actual_kc-k1, panelWidth);
185 Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1;
186 Index startBlock = actual_k2+k1;
187 Index blockBOffset = k1;
192 for (Index k=0;k<actualPanelWidth;++k)
195 triangularBuffer.
coeffRef(k,k) = lhs(startBlock+k,startBlock+k);
196 for (Index
i=IsLower ? k+1 : 0; IsLower ?
i<actualPanelWidth :
i<k; ++
i)
197 triangularBuffer.
coeffRef(
i,k) = lhs(startBlock+
i,startBlock+k);
199 pack_lhs(blockA, LhsMapper(triangularBuffer.
data(), triangularBuffer.
outerStride()), actualPanelWidth, actualPanelWidth);
201 gebp_kernel(res.getSubMapper(startBlock, 0), blockA, blockB,
202 actualPanelWidth, actualPanelWidth,
cols,
alpha,
203 actualPanelWidth, actual_kc, 0, blockBOffset);
208 Index startTarget = IsLower ? actual_k2+k1+actualPanelWidth : actual_k2;
210 pack_lhs(blockA, lhs.getSubMapper(startTarget,startBlock), actualPanelWidth, lengthTarget);
212 gebp_kernel(res.getSubMapper(startTarget, 0), blockA, blockB,
213 lengthTarget, actualPanelWidth,
cols,
alpha,
214 actualPanelWidth, actual_kc, 0, blockBOffset);
220 Index start = IsLower ? k2 : 0;
221 Index
end = IsLower ? rows : (
std::min)(actual_k2,rows);
222 for(Index i2=start; i2<
end; i2+=mc)
224 const Index actual_mc = (
std::min)(i2+mc,end)-i2;
226 (blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
228 gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc,
236 template <
typename Scalar,
typename Index,
int Mode,
237 int LhsStorageOrder,
bool ConjugateLhs,
238 int RhsStorageOrder,
bool ConjugateRhs,
int Version>
240 LhsStorageOrder,ConjugateLhs,
241 RhsStorageOrder,ConjugateRhs,ColMajor,Version>
251 Index _rows, Index _cols, Index _depth,
252 const Scalar* _lhs, Index lhsStride,
253 const Scalar* _rhs, Index rhsStride,
254 Scalar*
res, Index resStride,
258 template <
typename Scalar,
typename Index,
int Mode,
259 int LhsStorageOrder,
bool ConjugateLhs,
260 int RhsStorageOrder,
bool ConjugateRhs,
int Version>
262 LhsStorageOrder,ConjugateLhs,
264 Index _rows, Index _cols, Index _depth,
265 const Scalar* _lhs, Index lhsStride,
266 const Scalar* _rhs, Index rhsStride,
267 Scalar* _res, Index resStride,
272 Index diagSize = (
std::min)(_cols,_depth);
274 Index depth = IsLower ? _depth : diagSize;
275 Index
cols = IsLower ? diagSize : _cols;
280 LhsMapper lhs(_lhs,lhsStride);
281 RhsMapper rhs(_rhs,rhsStride);
282 ResMapper
res(_res, resStride);
284 Index kc = blocking.
kc();
297 triangularBuffer.diagonal().
setZero();
299 triangularBuffer.diagonal().
setOnes();
306 for(Index k2=IsLower ? 0 : depth;
307 IsLower ? k2<depth : k2>0;
308 IsLower ? k2+=kc : k2-=kc)
310 Index actual_kc = (
std::min)(IsLower ? depth-k2 : k2, kc);
311 Index actual_k2 = IsLower ? k2 : k2-actual_kc;
314 if(IsLower && (k2<cols) && (actual_k2+actual_kc>
cols))
317 k2 = actual_k2 + actual_kc - kc;
321 Index rs = IsLower ? (
std::min)(cols,actual_k2) : cols - k2;
323 Index ts = (IsLower && actual_k2>=
cols) ? 0 : actual_kc;
325 Scalar* geb = blockB+ts*ts;
326 geb = geb + internal::first_aligned<PacketBytes>(geb,PacketBytes/
sizeof(
Scalar));
328 pack_rhs(geb, rhs.getSubMapper(actual_k2,IsLower ? 0 : k2), actual_kc, rs);
333 for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
335 Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
336 Index actual_j2 = actual_k2 + j2;
337 Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
338 Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;
340 pack_rhs_panel(blockB+j2*actual_kc,
341 rhs.getSubMapper(actual_k2+panelOffset, actual_j2),
342 panelLength, actualPanelWidth,
343 actual_kc, panelOffset);
346 for (Index
j=0;
j<actualPanelWidth;++
j)
349 triangularBuffer.
coeffRef(
j,
j) = rhs(actual_j2+
j,actual_j2+
j);
350 for (Index k=IsLower ?
j+1 : 0; IsLower ? k<actualPanelWidth : k<
j; ++k)
351 triangularBuffer.
coeffRef(k,j) = rhs(actual_j2+k,actual_j2+j);
354 pack_rhs_panel(blockB+j2*actual_kc,
356 actualPanelWidth, actualPanelWidth,
361 for (Index i2=0; i2<
rows; i2+=mc)
363 const Index actual_mc = (
std::min)(mc,rows-i2);
364 pack_lhs(blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
369 for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
371 Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
372 Index panelLength = IsLower ? actual_kc-j2 : j2+actualPanelWidth;
373 Index blockOffset = IsLower ? j2 : 0;
375 gebp_kernel(res.getSubMapper(i2, actual_k2 + j2),
376 blockA, blockB+j2*actual_kc,
377 actual_mc, panelLength, actualPanelWidth,
379 actual_kc, actual_kc,
380 blockOffset, blockOffset);
383 gebp_kernel(res.getSubMapper(i2, IsLower ? 0 : k2),
384 blockA, geb, actual_mc, actual_kc, rs,
398 template<
int Mode,
bool LhsIsTriangular,
typename Lhs,
typename Rhs>
408 typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
411 typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
417 LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs);
418 RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs);
419 Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
422 Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
425 Index stripedRows = ((!LhsIsTriangular) || (IsLower)) ? lhs.rows() : (
std::min)(lhs.rows(),lhs.cols());
426 Index stripedCols = ((LhsIsTriangular) || (!IsLower)) ? rhs.cols() : (
std::min)(rhs.cols(),rhs.rows());
427 Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (
std::min)(lhs.cols(),lhs.rows()))
428 : ((IsLower) ? rhs.rows() : (
std::min)(rhs.rows(),rhs.cols()));
430 BlockingType blocking(stripedRows, stripedCols, stripedDepth, 1,
false);
433 Mode, LhsIsTriangular,
438 stripedRows, stripedCols, stripedDepth,
439 &lhs.coeffRef(0,0), lhs.outerStride(),
440 &rhs.coeffRef(0,0), rhs.outerStride(),
441 &dst.coeffRef(0,0), dst.outerStride(),
442 actualAlpha, blocking
448 if (LhsIsTriangular && lhs_alpha!=LhsScalar(1))
450 Index diagSize = (
std::min)(lhs.rows(),lhs.cols());
451 dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize);
453 else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1))
455 Index diagSize = (
std::min)(rhs.rows(),rhs.cols());
456 dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize);
466 #endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H
#define EIGEN_STRONG_INLINE
EIGEN_DEVICE_FUNC Matrix< _Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols > & setZero(Index size)
#define EIGEN_MAX_ALIGN_BYTES
gebp_traits< Scalar, Scalar > Traits
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar * data() const
#define EIGEN_PLAIN_ENUM_MAX(a, b)
Namespace containing all symbols from the Eigen library.
gebp_traits< Scalar, Scalar > Traits
EIGEN_DEVICE_FUNC Matrix< _Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols > & setOnes(Index size)
const unsigned int RowMajorBit
#define EIGEN_DONT_INLINE
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar & coeffRef(Index rowId, Index colId)
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
EIGEN_DEVICE_FUNC Index outerStride() const
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
static EIGEN_STRONG_INLINE void run(Index rows, Index cols, Index depth, const Scalar *lhs, Index lhsStride, const Scalar *rhs, Index rhsStride, Scalar *res, Index resStride, const Scalar &alpha, level3_blocking< Scalar, Scalar > &blocking)
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar &alpha)
The matrix class, also used for vectors and row-vectors.
void run(Expr &expr, Dev &dev)