10 #ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H
11 #define EIGEN_TRIANGULAR_MATRIX_MATRIX_H
45 int Mode,
bool LhsIsTriangular,
46 int LhsStorageOrder,
bool ConjugateLhs,
47 int RhsStorageOrder,
bool ConjugateRhs,
52 int Mode,
bool LhsIsTriangular,
53 int LhsStorageOrder,
bool ConjugateLhs,
54 int RhsStorageOrder,
bool ConjugateRhs,
int Version>
56 LhsStorageOrder,ConjugateLhs,
57 RhsStorageOrder,ConjugateRhs,
RowMajor,Version>
74 ::
run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride,
alpha, blocking);
79 template <
typename Scalar,
typename Index,
int Mode,
80 int LhsStorageOrder,
bool ConjugateLhs,
81 int RhsStorageOrder,
bool ConjugateRhs,
int Version>
83 LhsStorageOrder,ConjugateLhs,
84 RhsStorageOrder,ConjugateRhs,
ColMajor,Version>
102 template <
typename Scalar,
typename Index,
int Mode,
103 int LhsStorageOrder,
bool ConjugateLhs,
104 int RhsStorageOrder,
bool ConjugateRhs,
int Version>
106 LhsStorageOrder,ConjugateLhs,
116 Index rows = IsLower ? _rows : diagSize;
117 Index depth = IsLower ? diagSize : _depth;
123 LhsMapper lhs(_lhs,lhsStride);
124 RhsMapper rhs(_rhs,rhsStride);
125 ResMapper res(_res, resStride);
134 std::size_t sizeA = kc*mc;
135 std::size_t sizeB = kc*cols;
149 triangularBuffer.diagonal().
setZero();
151 triangularBuffer.diagonal().
setOnes();
157 for(
Index k2=IsLower ? depth : 0;
158 IsLower ? k2>0 : k2<depth;
159 IsLower ? k2-=kc : k2+=kc)
162 Index actual_k2 = IsLower ? k2-actual_kc : k2;
165 if((!IsLower)&&(k2<rows)&&(k2+actual_kc>rows))
168 k2 = k2+actual_kc-kc;
171 pack_rhs(blockB, rhs.getSubMapper(actual_k2,0), actual_kc, cols);
179 if(IsLower || actual_k2<rows)
182 for (
Index k1=0; k1<actual_kc; k1+=panelWidth)
184 Index actualPanelWidth = std::min<Index>(actual_kc-k1, panelWidth);
185 Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1;
186 Index startBlock = actual_k2+k1;
187 Index blockBOffset = k1;
192 for (
Index k=0;k<actualPanelWidth;++k)
195 triangularBuffer.
coeffRef(k,k) = lhs(startBlock+k,startBlock+k);
196 for (
Index i=IsLower ? k+1 : 0; IsLower ? i<actualPanelWidth : i<k; ++i)
197 triangularBuffer.
coeffRef(i,k) = lhs(startBlock+i,startBlock+k);
199 pack_lhs(blockA, LhsMapper(triangularBuffer.
data(), triangularBuffer.
outerStride()), actualPanelWidth, actualPanelWidth);
201 gebp_kernel(res.getSubMapper(startBlock, 0), blockA, blockB,
202 actualPanelWidth, actualPanelWidth, cols,
alpha,
203 actualPanelWidth, actual_kc, 0, blockBOffset);
208 Index startTarget = IsLower ? actual_k2+k1+actualPanelWidth : actual_k2;
210 pack_lhs(blockA, lhs.getSubMapper(startTarget,startBlock), actualPanelWidth, lengthTarget);
212 gebp_kernel(res.getSubMapper(startTarget, 0), blockA, blockB,
213 lengthTarget, actualPanelWidth, cols,
alpha,
214 actualPanelWidth, actual_kc, 0, blockBOffset);
220 Index start = IsLower ? k2 : 0;
222 for(
Index i2=start; i2<end; i2+=mc)
226 (blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
228 gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc,
229 actual_kc, cols,
alpha, -1, -1, 0, 0);
236 template <
typename Scalar,
typename Index,
int Mode,
237 int LhsStorageOrder,
bool ConjugateLhs,
238 int RhsStorageOrder,
bool ConjugateRhs,
int Version>
240 LhsStorageOrder,ConjugateLhs,
241 RhsStorageOrder,ConjugateRhs,
ColMajor,Version>
258 template <
typename Scalar,
typename Index,
int Mode,
259 int LhsStorageOrder,
bool ConjugateLhs,
260 int RhsStorageOrder,
bool ConjugateRhs,
int Version>
262 LhsStorageOrder,ConjugateLhs,
274 Index depth = IsLower ? _depth : diagSize;
275 Index cols = IsLower ? diagSize : _cols;
280 LhsMapper lhs(_lhs,lhsStride);
281 RhsMapper rhs(_rhs,rhsStride);
282 ResMapper res(_res, resStride);
287 std::size_t sizeA = kc*mc;
297 triangularBuffer.diagonal().
setZero();
299 triangularBuffer.diagonal().
setOnes();
306 for(
Index k2=IsLower ? 0 : depth;
307 IsLower ? k2<depth : k2>0;
308 IsLower ? k2+=kc : k2-=kc)
311 Index actual_k2 = IsLower ? k2 : k2-actual_kc;
314 if(IsLower && (k2<cols) && (actual_k2+actual_kc>cols))
317 k2 = actual_k2 + actual_kc - kc;
323 Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
325 Scalar* geb = blockB+ts*ts;
326 geb = geb + internal::first_aligned<PacketBytes>(geb,PacketBytes/
sizeof(
Scalar));
328 pack_rhs(geb, rhs.getSubMapper(actual_k2,IsLower ? 0 : k2), actual_kc, rs);
333 for (
Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
335 Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
336 Index actual_j2 = actual_k2 + j2;
337 Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
338 Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;
340 pack_rhs_panel(blockB+j2*actual_kc,
341 rhs.getSubMapper(actual_k2+panelOffset, actual_j2),
342 panelLength, actualPanelWidth,
343 actual_kc, panelOffset);
346 for (
Index j=0;j<actualPanelWidth;++j)
349 triangularBuffer.
coeffRef(j,j) = rhs(actual_j2+j,actual_j2+j);
350 for (
Index k=IsLower ? j+1 : 0; IsLower ? k<actualPanelWidth : k<j; ++k)
351 triangularBuffer.
coeffRef(k,j) = rhs(actual_j2+k,actual_j2+j);
354 pack_rhs_panel(blockB+j2*actual_kc,
356 actualPanelWidth, actualPanelWidth,
361 for (
Index i2=0; i2<rows; i2+=mc)
364 pack_lhs(blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
369 for (
Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
371 Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
372 Index panelLength = IsLower ? actual_kc-j2 : j2+actualPanelWidth;
373 Index blockOffset = IsLower ? j2 : 0;
376 blockA, blockB+j2*actual_kc,
377 actual_mc, panelLength, actualPanelWidth,
379 actual_kc, actual_kc,
380 blockOffset, blockOffset);
383 gebp_kernel(res.getSubMapper(i2, IsLower ? 0 : k2),
384 blockA, geb, actual_mc, actual_kc, rs,
398 template<
int Mode,
bool LhsIsTriangular,
typename Lhs,
typename Rhs>
408 typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
411 typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
417 LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs);
418 RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs);
419 Scalar actualAlpha =
alpha * lhs_alpha * rhs_alpha;
422 Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
425 Index stripedRows = ((!LhsIsTriangular) || (IsLower)) ? lhs.rows() : (
std::min)(lhs.rows(),lhs.cols());
426 Index stripedCols = ((LhsIsTriangular) || (!IsLower)) ? rhs.cols() : (
std::min)(rhs.cols(),rhs.rows());
427 Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (
std::min)(lhs.cols(),lhs.rows()))
428 : ((IsLower) ? rhs.rows() : (
std::min)(rhs.rows(),rhs.cols()));
430 BlockingType blocking(stripedRows, stripedCols, stripedDepth, 1,
false);
433 Mode, LhsIsTriangular,
438 stripedRows, stripedCols, stripedDepth,
439 &lhs.coeffRef(0,0), lhs.outerStride(),
440 &rhs.coeffRef(0,0), rhs.outerStride(),
441 &dst.coeffRef(0,0), dst.outerStride(),
442 actualAlpha, blocking
448 if (LhsIsTriangular && lhs_alpha!=LhsScalar(1))
451 dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize);
453 else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1))
456 dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize);
466 #endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H