.. _program_listing_file__tmp_ws_src_proxsuite_include_proxsuite_linalg_dense_factorize.hpp: Program Listing for File factorize.hpp ====================================== |exhale_lsh| :ref:`Return to documentation for file ` (``/tmp/ws/src/proxsuite/include/proxsuite/linalg/dense/factorize.hpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp // // Copyright (c) 2022 INRIA // #ifndef PROXSUITE_LINALG_DENSE_LDLT_FACTORIZE_HPP #define PROXSUITE_LINALG_DENSE_LDLT_FACTORIZE_HPP #include "proxsuite/linalg/dense/core.hpp" #include #include namespace proxsuite { namespace linalg { namespace dense { namespace _detail { template VEG_NO_INLINE void compute_permutation_impl(isize* perm_indices, isize* perm_inv_indices, isize n, T const* diagonal_data, isize stride) { for (isize k = 0; k < n; ++k) { perm_indices[k] = k; } { std::sort(perm_indices, perm_indices + n, [diagonal_data, stride](isize i, isize j) noexcept -> bool { using std::fabs; auto lhs = fabs(diagonal_data[stride * i]); auto rhs = fabs(diagonal_data[stride * j]); if (lhs == rhs) { return i < j; } return lhs > rhs; }); } for (isize k = 0; k < n; ++k) { perm_inv_indices[perm_indices[k]] = k; } } template VEG_NO_INLINE void compute_permutation(isize* perm_indices, isize* perm_inv_indices, Diag const& diagonal) { _detail::compute_permutation_impl( perm_indices, perm_inv_indices, diagonal.rows(), diagonal.data(), diagonal.innerStride()); } template void apply_permutation_tri_lower(Mat&& mat, Work&& work, isize const* perm_indices) { using T = typename proxsuite::linalg::veg::uncvref_t::Scalar; isize n = mat.rows(); VEG_ASSERT_ALL_OF( // n == mat.rows(), n == mat.cols(), n == work.rows(), n == work.cols()); auto mat_coeff = [&](isize i, isize j) noexcept -> T& { return i >= j ? mat(i, j) : mat(j, i); }; for (isize j = 0; j < n; ++j) { for (isize i = j; i < n; ++i) { work(i, j) = mat_coeff(perm_indices[i], perm_indices[j]); } } mat.template triangularView() = work.template triangularView(); } template void factorize_unblocked_impl(Mat mat, proxsuite::linalg::veg::dynstack::DynStackMut stack) { // left looking cholesky // https://en.wikipedia.org/wiki/Cholesky_decomposition#LDL_decomposition_2 using T = typename Mat::Scalar; isize n = mat.rows(); if (n == 0) { return; } auto _work = stack.make_new_for_overwrite( // proxsuite::linalg::veg::Tag{}, n, _detail::align()); auto work_storage = Eigen::Map, Eigen::Unaligned>{ _work.ptr_mut(), n, 1, }; isize j = 0; while (true) { /* * L00 * l = l10 1 * L20 l21 L22 * * D0 * d = d1 * D2 * * compute d1 and l21 */ auto l10 = util::subcols(util::row(mat, j), 0, j); auto d0 = util::subrows(mat.diagonal(), 0, j); auto work = util::subrows(work_storage, 0, j); work = util::trans(l10).cwiseProduct(d0); mat(j, j) -= work.dot(l10); if (j + 1 == n) { break; } isize rem = n - j - 1; auto l20 = util::submatrix(mat, j + 1, 0, rem, j); auto l21 = util::subrows(util::col(mat, j), j + 1, rem); util::noalias_mul_add(l21, l20, work, T(-1)); l21 *= 1 / mat(j, j); ++j; } } template void factorize_blocked_impl(Mat mat, isize block_size, proxsuite::linalg::veg::dynstack::DynStackMut stack) { // right looking blocked cholesky using T = typename Mat::Scalar; VEG_ASSERT(mat.rows() == mat.cols()); isize n = mat.rows(); if (n == 0) { return; } isize j = 0; while (true) { isize bs = min2(n - j, block_size); auto ld11 = util::submatrix(mat, j, j, bs, bs); auto d1 = util::diagonal(ld11); _detail::factorize_unblocked_impl(ld11, stack); if (j + bs == n) { break; } isize rem = n - j - bs; isize work_stride = _detail::adjusted_stride(rem); auto _work = stack.make_new_for_overwrite( // proxsuite::linalg::veg::Tag{}, bs * work_stride, _detail::align()); auto work = Eigen::Map, Eigen::Unaligned, Eigen::OuterStride>{ _work.ptr_mut(), rem, bs, Eigen::OuterStride{ work_stride }, }; auto l21 = util::submatrix(mat, j + bs, j, rem, bs); util::trans(ld11) .template triangularView() .template solveInPlace(l21); work = l21; l21 = l21 * d1.asDiagonal().inverse(); auto l22 = util::submatrix(mat, j + bs, j + bs, rem, rem); l22.template triangularView() -= l21 * util::trans(work); j += bs; } } using factorize_recursive_threshold = proxsuite::linalg::veg::meta::constant; template void factorize_recursive_impl(Mat mat, proxsuite::linalg::veg::dynstack::DynStackMut stack) { // right looking recursive cholesky using T = typename Mat::Scalar; VEG_ASSERT(mat.rows() == mat.cols()); isize n = mat.rows(); if (n < factorize_recursive_threshold::value) { _detail::factorize_unblocked_impl(mat, stack); } else { /* * L00 * L = L10 L11 * * D0 * D = D1 * * compute L00 and D0 recursively * compute L10 by solving a triangular system * compute L11 recursively */ isize bs = (n + 1) / 2; isize rem = n - bs; auto l00 = util::submatrix(mat, 0, 0, bs, bs); auto l10 = util::submatrix(mat, bs, 0, rem, bs); auto l11 = util::submatrix(mat, bs, bs, rem, rem); _detail::factorize_recursive_impl(l00, stack); auto d0 = util::diagonal(l00); isize work_stride = _detail::adjusted_stride(rem); util::trans(l00) .template triangularView() .template solveInPlace(l10); { auto _work = stack.make_new_for_overwrite( // proxsuite::linalg::veg::Tag{}, bs * work_stride, _detail::align()); auto work = Eigen::Map, Eigen::Unaligned, Eigen::OuterStride>{ _work.ptr_mut(), rem, bs, Eigen::OuterStride{ work_stride }, }; work = l10; l10 = l10 * d0.asDiagonal().inverse(); l11.template triangularView() -= l10 * util::trans(work); } _detail::factorize_recursive_impl(l11, stack); } } } // namespace _detail template auto factorize_unblocked_req(proxsuite::linalg::veg::Tag /*tag*/, isize n) noexcept -> proxsuite::linalg::veg::dynstack::StackReq { return { n * isize{ sizeof(T) }, _detail::align(), }; } template auto factorize_blocked_req(proxsuite::linalg::veg::Tag tag, isize n, isize block_size) noexcept -> proxsuite::linalg::veg::dynstack::StackReq { return proxsuite::linalg::dense::factorize_unblocked_req(tag, block_size) | proxsuite::linalg::veg::dynstack::StackReq{ _detail::adjusted_stride( _detail::max2(n - block_size, isize(0))) * block_size * isize{ sizeof(T) }, _detail::align(), }; } template auto factorize_recursive_req(proxsuite::linalg::veg::Tag tag, isize n) noexcept -> proxsuite::linalg::veg::dynstack::StackReq { auto req0 = proxsuite::linalg::dense::factorize_unblocked_req( tag, _detail::min2(n, _detail::factorize_recursive_threshold::value)); if (n < _detail::factorize_recursive_threshold::value) { return req0; } isize bs = (n + 1) / 2; isize rem = n - bs; return req0 | proxsuite::linalg::veg::dynstack::StackReq{ bs * _detail::adjusted_stride(rem) * isize{ sizeof(T) }, _detail::align(), }; } template void factorize_unblocked(Mat&& mat, proxsuite::linalg::veg::dynstack::DynStackMut stack) { _detail::factorize_unblocked_impl(util::to_view_dyn(mat), stack); } template void factorize_blocked(Mat&& mat, isize block_size, proxsuite::linalg::veg::dynstack::DynStackMut stack) { _detail::factorize_blocked_impl(util::to_view_dyn(mat), block_size, stack); } template void factorize_recursive(Mat&& mat, proxsuite::linalg::veg::dynstack::DynStackMut stack) { _detail::factorize_recursive_impl(util::to_view_dyn(mat), stack); } template auto factorize_req(proxsuite::linalg::veg::Tag tag, isize n) noexcept -> proxsuite::linalg::veg::dynstack::StackReq { return proxsuite::linalg::dense::factorize_blocked_req(tag, n, 128) | proxsuite::linalg::dense::factorize_recursive_req(tag, n); } template void factorize(Mat&& mat, proxsuite::linalg::veg::dynstack::DynStackMut stack) { isize n = mat.rows(); if (n > 2048) { proxsuite::linalg::dense::factorize_blocked(mat, 128, stack); } else { proxsuite::linalg::dense::factorize_recursive(mat, stack); } } } // namespace dense } // namespace linalg } // namespace proxsuite #endif /* end of include guard PROXSUITE_LINALG_DENSE_LDLT_FACTORIZE_HPP */