5#ifndef PROXSUITE_LINALG_SPARSE_LDLT_FACTORIZE_HPP
6#define PROXSUITE_LINALG_SPARSE_LDLT_FACTORIZE_HPP
9#include <Eigen/OrderingMethods>
20 return { nrows * isize(
sizeof(I)), isize(
alignof(I)) };
24template<
typename T,
typename I>
31 using namespace _detail;
35 at.nrows() == a.ncols(),
36 at.ncols() == a.nrows(),
39 auto pai = a.row_indices();
40 auto pax = a.values();
42 auto patp =
at.col_ptrs_mut();
43 auto pati =
at.row_indices_mut();
44 auto patx =
at.values_mut();
47 auto work =
_work.ptr_mut();
50 if (a.is_compressed()) {
51 for (usize p = 0; p < usize(a.nnz()); ++p) {
52 util::wrapping_inc(mut(work[util::zero_extend(
pai[p])]));
55 for (usize
j = 0;
j < a.ncols(); ++
j) {
56 isize col_start = a.col_start(
j);
57 isize col_end = a.col_end(
j);
58 for (isize p = col_start; p < col_end; ++p) {
59 util::wrapping_inc(mut(work[util::zero_extend(
pai[p])]));
65 for (usize
j = 0;
j < usize(
at.ncols()); ++
j) {
66 patp[
j + 1] = util::checked_non_negative_plus(
patp[
j], work[
j]);
70 for (usize
j = 0;
j < usize(a.ncols()); ++
j) {
71 auto col_start = a.col_start(
j);
72 auto col_end = a.col_end(
j);
73 for (usize p = col_start; p < col_end; ++p) {
74 auto i = util::zero_extend(
pai[p]);
75 auto q = util::zero_extend(work[
i]);
79 util::wrapping_inc(mut(work[
i]));
90 return { nrows * isize(
sizeof(I)), isize(
alignof(I)) };
100 using namespace _detail;
104 at.nrows() == a.ncols(),
105 at.ncols() == a.nrows(),
106 at.nnz() == a.nnz());
108 auto pai = a.row_indices();
110 auto patp =
at.col_ptrs_mut();
111 auto pati =
at.row_indices_mut();
114 auto work =
_work.ptr_mut();
117 for (usize p = 0; p < usize(a.nnz()); ++p) {
118 util::wrapping_inc(mut(work[util::zero_extend(
pai[p])]));
122 for (usize
j = 0;
j < usize(
at.ncols()); ++
j) {
123 patp[
j + 1] = util::checked_non_negative_plus(
patp[
j], work[
j]);
127 for (usize
j = 0;
j < usize(a.ncols()); ++
j) {
128 auto col_start = a.col_start(
j);
129 auto col_end = a.col_end(
j);
130 for (usize p = col_start; p < col_end; ++p) {
131 auto i = util::zero_extend(
pai[p]);
132 auto q = util::zero_extend(work[
i]);
135 util::wrapping_inc(mut(work[
i]));
147template<
typename T,
typename I>
151 using namespace _detail;
154 l.nrows() == l.ncols(),
155 x.nrows() == l.nrows()
159 usize n = usize(l.nrows());
161 auto pli = l.row_indices();
162 auto plx = l.values();
164 auto px = x.as_slice_mut().ptr_mut();
166 for (usize
j = 0;
j < n; ++
j) {
167 auto const xj =
px[
j];
168 auto col_start = l.col_start(
j);
169 auto col_end = l.col_end(
j);
172 for (usize p = col_start + 1; p < col_end; ++p) {
173 auto i = util::zero_extend(
pli[p]);
186template<
typename T,
typename I>
190 using namespace _detail;
193 l.nrows() == l.ncols(),
194 x.nrows() == l.nrows()
198 usize n = usize(l.nrows());
200 auto pli = l.row_indices();
201 auto plx = l.values();
203 auto px = x.as_slice_mut().ptr_mut();
212 auto col_start = l.col_start(
j);
213 auto col_end = l.col_end(
j);
220 usize
pstart = col_start + 1;
225 auto i0 = util::zero_extend(
pli[p + 0]);
226 auto i1 = util::zero_extend(
pli[p + 1]);
227 auto i2 = util::zero_extend(
pli[p + 2]);
228 auto i3 = util::zero_extend(
pli[p + 3]);
235 auto i0 = util::zero_extend(
pli[p + 0]);
255 return { n * isize{
sizeof(I) },
alignof(I) };
274 using namespace _detail;
276 usize n = usize(a.ncols());
277 auto pai = a.row_indices();
284 for (usize
k = 0;
k < n; ++
k) {
289 auto col_start = a.col_start(
k);
290 auto col_end = a.col_end(
k);
293 for (usize p = col_start; p < col_end; ++p) {
295 auto i = util::zero_extend(
pai[p]);
303 auto next = usize(-1);
319 if (
next == usize(-1)) {
334 return { (
k + 1) * isize{
sizeof(
bool) },
alignof(
bool) };
349 usize n = usize(a.ncols());
352 auto pai = a.row_indices();
355 auto col_start = a.col_start(
k_);
356 auto col_end = a.col_end(
k_);
363 for (usize p = col_start; p < col_end; ++p) {
364 auto i = util::zero_extend(
pai[p]);
380 s[isize(len)] = I(
i);
381 util::wrapping_inc(mut(len));
392 usize(len) *
sizeof(I));
395 top = util::wrapping_plus(
top, -len);
398 for (usize
q =
top;
q < n; ++
q) {
421 using namespace _detail;
427 while (
top != usize(-1)) {
437 util::wrapping_dec(mut(
top));
440 util::wrapping_inc(mut(
top));
462 return { (3 * n) * isize(
sizeof(I)),
alignof(I) };
477 using namespace _detail;
488 for (usize
j = 0;
j < usize(n); ++
j) {
492 for (usize
_j = 0;
_j < usize(n); ++
_j) {
495 usize
j = usize(n) - 1 -
_j;
498 if (parent[isize(
j)] != I(-1)) {
508 if (parent[isize(
root)] == I(-1)) {
528 using namespace _detail;
533 if (
i <=
j || util::wrapping_plus(
pfirst[
j], I(1)) <=
550 if (
j_prev == usize(-1)) {
589 isize{
sizeof(I) } * (1 + 5 * n + nnz),
603 using namespace _detail;
604 usize n = usize(a.nrows());
606 1 + 5 * isize(n) + a.nnz());
612 from_raw_parts, isize(n), isize(n), a.nnz(),
617 auto patp =
at.col_ptrs();
618 auto pati =
at.row_indices();
633 for (usize
i = 0;
i < 3 * n; ++
i) {
636 for (usize
i = 0;
i < n; ++
i) {
641 for (usize
k = 0;
k < n; ++
k) {
643 auto j = util::zero_extend(
ppost[
k]);
657 if (
j == usize(-1) ||
pfirst[
j] != I(-1)) {
666 for (usize
k = 0;
k < n; ++
k) {
668 auto j = util::zero_extend(
ppost[
k]);
677 auto col_start = util::zero_extend(
patp[
j]);
678 auto col_end = util::zero_extend(
patp[
j + 1]);
681 for (usize p = col_start; p < col_end; ++p) {
682 auto i = util::zero_extend(
pati[p]);
693 util::wrapping_inc(mut(
pdelta[
j]));
697 util::wrapping_dec(mut(
pdelta[util::zero_extend(
lca)]));
709 for (usize
j = 0;
j < n; ++
j) {
710 if (parent[isize(
j)] != I(-1)) {
711 pcounts[util::zero_extend(parent[isize(
j)])] = util::wrapping_plus(
723 return { nnz * isize{
sizeof(
char) },
alignof(
char) };
733 isize n =
mat.nrows();
734 isize nnz =
mat.nnz();
736 Eigen::PermutationMatrix<-1, -1, I>
perm_eigen;
739 Eigen::AMDOrdering<I>{}(
740 Eigen::Map<Eigen::SparseMatrix<char, Eigen::ColMajor, I>
const>{
755 usize(n) *
sizeof(I));
761inv_perm(I* perm_inv, I
const* perm, isize n)
noexcept
763 for (usize
i = 0;
i < usize(n); ++
i) {
764 perm_inv[util::zero_extend(perm[
i])] = I(
i);
774 return { n * isize{
sizeof(I) },
alignof(I) };
781 return { n * isize{
sizeof(I) },
alignof(I) };
798 for (usize p = col_start; p < col_end; ++p) {
799 usize
old_i = util::zero_extend(
old_a.row_indices()[p]);
809 for (usize
i = 0;
i < n; ++
i) {
824 usize n = usize(
new_a.nrows());
847 for (usize p = col_start; p < col_end; ++p) {
864template<
typename T,
typename I>
871 usize n = usize(
new_a.nrows());
896 for (usize p = col_start; p < col_end; ++p) {
940 constexpr isize
sz{
sizeof(I) };
941 constexpr isize
al{
alignof(I) };
995 bool id_perm = perm_inv ==
nullptr;
1004 usize n = usize(a.ncols());
1011 auto amd_perm = stack.make_new_for_overwrite(tag, isize(n));
1025 .make_new_for_overwrite(tag,
id_perm ? 0 : (a.ncols() + 1));
1028 .make_new_for_overwrite(tag,
id_perm ? 0 : (a.nnz()));
1058 auto _post = stack.make_new_for_overwrite(tag, isize(n));
1092 usize n = usize(a.ncols());
1095 for (usize
i = 0;
i < n; ++
i) {
1109template<
typename T,
typename I>
1120 constexpr isize
sz{
sizeof(I) };
1121 constexpr isize
al{
alignof(I) };
1123 constexpr isize
tsz{
sizeof(T) };
1124 constexpr isize
tal{
alignof(T) };
1131 & (StackReq{
tsz * n,
tal }
1133 & (StackReq{ 2 * n *
sz,
al }
1134 & (StackReq{ n *
tsz,
tal }
1135 & StackReq{ n * isize{
sizeof(
bool) },
alignof(
bool) }))));
1157template<
typename T,
typename I>
1170 using namespace _detail;
1171 isize n = a.nrows();
1173 bool id_perm = perm_inv ==
nullptr;
1183 stack.make_new_for_overwrite(tag,
id_perm ? 0 : (a.ncols() + 1));
1185 stack.make_new_for_overwrite(tag,
id_perm ? 0 : a.nnz());
1219 T*
px =
_x.ptr_mut();
1224 usize(n) *
sizeof(I));
1225 for (usize
i = 0;
i < usize(n); ++
i) {
1231 I
const*
plp = col_ptrs;
1234 for (usize iter = 0; iter < usize(n); ++iter) {
1248 I*
pli = row_indices;
1258 for (usize p = col_start; p < col_end; ++p) {
1259 auto i = util::zero_extend(
pai[p]);
1263 T d =
px[iter] + ((
diag_to_add ==
nullptr || perm ==
nullptr)
1272 auto col_start = util::zero_extend(
plp[
j]);
1276 T
const dj =
plx[col_start];
1283 for (usize p = col_start + 1; p <
row_idx; ++p) {
1284 auto i = util::zero_extend(
pli[p]);
1295 auto col_start = util::zero_extend(
plp[iter]);
1296 pli[col_start] = I(iter);
#define VEG_ASSERT_ALL_OF(...)
#define HEDLEY_FALL_THROUGH
VEG_INLINE auto postorder_depth_first_search(I *post, usize root, usize start_index, I *pstack, I *pfirst_child, I *pnext_child) noexcept -> usize
void inv_perm(I *perm_inv, I const *perm, isize n) noexcept
void symmetric_permute(MatMut< T, I > new_a, MatRef< T, I > old_a, I const *perm_inv, DynStackMut stack) noexcept(VEG_CONCEPT(nothrow_copyable< T >))
void symmetric_permute_common(usize n, I const *pperm_inv, SymbolicMatRef< I > old_a, I *pnew_ap, I *pcol_counts)
VEG_INLINE auto least_common_ancestor(usize i, usize j, I const *pfirst, I *pmax_first, I *pprev_leaf, I *pancestor) noexcept -> I
auto symmetric_permute_req(proxsuite::linalg::veg::Tag< I >, isize n) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
VEG_NODISCARD VEG_INLINE auto ereach(usize &count, I *s, SymbolicMatRef< I > a, I const *parent, isize k, bool *pmarked) noexcept -> I *
auto ereach_req(isize k) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
void symmetric_permute_symbolic(SymbolicMatMut< I > new_a, SymbolicMatRef< I > old_a, I const *perm_inv, DynStackMut stack) noexcept
auto symmetric_permute_symbolic_req(proxsuite::linalg::veg::Tag< I >, isize n) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
void factorize_symbolic_non_zeros(I *nnz_per_col, I *etree, I *perm_inv, I const *perm, SymbolicMatRef< I > a, DynStackMut stack) noexcept
void factorize_numeric(T *values, I *row_indices, proxsuite::linalg::veg::DoNotDeduce< T const * > diag_to_add, proxsuite::linalg::veg::DoNotDeduce< I const * > perm, I const *col_ptrs, I const *etree, I const *perm_inv, MatRef< T, I > a, DynStackMut stack) noexcept(false)
auto amd_req(proxsuite::linalg::veg::Tag< I >, isize, isize nnz) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
auto postorder_req(proxsuite::linalg::veg::Tag< I >, isize n) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
void postorder(I *post, I const *parent, isize n, DynStackMut stack) noexcept
auto transpose_req(proxsuite::linalg::veg::Tag< I >, isize nrows) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
void transpose(MatMut< T, I > at, MatRef< T, I > a, DynStackMut stack) noexcept(VEG_CONCEPT(nothrow_copyable< T >))
VEG_INLINE void etree(I *parent, SymbolicMatRef< I > a, DynStackMut stack) noexcept
auto factorize_symbolic_req(proxsuite::linalg::veg::Tag< I > tag, isize n, isize nnz, Ordering o) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
void factorize_symbolic_col_counts(I *col_ptrs, I *etree, I *perm_inv, I const *perm, SymbolicMatRef< I > a, DynStackMut stack) noexcept
void transpose_symbolic(SymbolicMatMut< I > at, SymbolicMatRef< I > a, DynStackMut stack) noexcept
auto transpose_symbolic_req(proxsuite::linalg::veg::Tag< I >, isize nrows) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
auto factorize_numeric_req(proxsuite::linalg::veg::Tag< T >, proxsuite::linalg::veg::Tag< I >, isize n, isize a_nnz, Ordering o) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
void dense_ltsolve(DenseVecMut< T > x, MatRef< T, I > l) noexcept(false)
void column_counts(I *counts, SymbolicMatRef< I > a, I const *parent, I const *post, DynStackMut stack) noexcept
auto etree_req(proxsuite::linalg::veg::Tag< I >, isize n) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
void dense_lsolve(DenseVecMut< T > x, MatRef< T, I > l) noexcept(false)
auto column_counts_req(proxsuite::linalg::veg::Tag< I > tag, isize n, isize nnz) noexcept -> proxsuite::linalg::veg::dynstack::StackReq