00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #ifndef SPARSELU_KERNEL_BMOD_H
00012 #define SPARSELU_KERNEL_BMOD_H
00013
00014 namespace Eigen {
00015 namespace internal {
00016
00031 template <int SegSizeAtCompileTime> struct LU_kernel_bmod
00032 {
00033 template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
00034 static EIGEN_DONT_INLINE void run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
00035 const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
00036 };
00037
00038 template <int SegSizeAtCompileTime>
00039 template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
00040 EIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
00041 const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)
00042 {
00043 typedef typename ScalarVector::Scalar Scalar;
00044
00045
00046
00047 Index isub = lptr + no_zeros;
00048 int i;
00049 Index irow;
00050 for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
00051 {
00052 irow = lsub(isub);
00053 tempv(i) = dense(irow);
00054 ++isub;
00055 }
00056
00057 luptr += lda * no_zeros + no_zeros;
00058
00059 Map<Matrix<Scalar,SegSizeAtCompileTime,SegSizeAtCompileTime>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(lda) );
00060 Map<Matrix<Scalar,SegSizeAtCompileTime,1> > u(tempv.data(), segsize);
00061
00062 u = A.template triangularView<UnitLower>().solve(u);
00063
00064
00065 luptr += segsize;
00066 const Index PacketSize = internal::packet_traits<Scalar>::size;
00067 Index ldl = internal::first_multiple(nrow, PacketSize);
00068 Map<Matrix<Scalar,Dynamic,SegSizeAtCompileTime>, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(lda) );
00069 Index aligned_offset = internal::first_aligned(tempv.data()+segsize, PacketSize);
00070 Index aligned_with_B_offset = (PacketSize-internal::first_aligned(B.data(), PacketSize))%PacketSize;
00071 Map<Matrix<Scalar,Dynamic,1>, 0, OuterStride<> > l(tempv.data()+segsize+aligned_offset+aligned_with_B_offset, nrow, OuterStride<>(ldl) );
00072
00073 l.setZero();
00074 internal::sparselu_gemm<Scalar>(l.rows(), l.cols(), B.cols(), B.data(), B.outerStride(), u.data(), u.outerStride(), l.data(), l.outerStride());
00075
00076
00077 isub = lptr + no_zeros;
00078 for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
00079 {
00080 irow = lsub(isub++);
00081 dense(irow) = tempv(i);
00082 }
00083
00084
00085 for (i = 0; i < nrow; i++)
00086 {
00087 irow = lsub(isub++);
00088 dense(irow) -= l(i);
00089 }
00090 }
00091
00092 template <> struct LU_kernel_bmod<1>
00093 {
00094 template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
00095 static EIGEN_DONT_INLINE void run(const int , BlockScalarVector& dense, ScalarVector& , ScalarVector& lusup, Index& luptr,
00096 const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
00097 };
00098
00099
00100 template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
00101 EIGEN_DONT_INLINE void LU_kernel_bmod<1>::run(const int , BlockScalarVector& dense, ScalarVector& , ScalarVector& lusup, Index& luptr,
00102 const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)
00103 {
00104 typedef typename ScalarVector::Scalar Scalar;
00105 Scalar f = dense(lsub(lptr + no_zeros));
00106 luptr += lda * no_zeros + no_zeros + 1;
00107 const Scalar* a(lusup.data() + luptr);
00108 const Index* irow(lsub.data()+lptr + no_zeros + 1);
00109 Index i = 0;
00110 for (; i+1 < nrow; i+=2)
00111 {
00112 Index i0 = *(irow++);
00113 Index i1 = *(irow++);
00114 Scalar a0 = *(a++);
00115 Scalar a1 = *(a++);
00116 Scalar d0 = dense.coeff(i0);
00117 Scalar d1 = dense.coeff(i1);
00118 d0 -= f*a0;
00119 d1 -= f*a1;
00120 dense.coeffRef(i0) = d0;
00121 dense.coeffRef(i1) = d1;
00122 }
00123 if(i<nrow)
00124 dense.coeffRef(*(irow++)) -= f * *(a++);
00125 }
00126
00127 }
00128
00129 }
00130 #endif // SPARSELU_KERNEL_BMOD_H