proxsuite 0.7.1
The Advanced Proximal Optimization Toolbox
Loading...
Searching...
No Matches
factorize.hpp
Go to the documentation of this file.
1
2//
3// Copyright (c) 2022 INRIA
4//
5#ifndef PROXSUITE_LINALG_SPARSE_LDLT_FACTORIZE_HPP
6#define PROXSUITE_LINALG_SPARSE_LDLT_FACTORIZE_HPP
7
9#include <Eigen/OrderingMethods>
10
11namespace proxsuite {
12namespace linalg {
13namespace sparse {
14
15template<typename I>
16auto
17transpose_req(proxsuite::linalg::veg::Tag<I> /*tag*/, isize nrows) noexcept
19{
20 return { nrows * isize(sizeof(I)), isize(alignof(I)) };
21}
22
23// at = a.T
24template<typename T, typename I>
25void
27 MatMut<T, I> at,
29 DynStackMut stack) noexcept(VEG_CONCEPT(nothrow_copyable<T>))
30{
31 using namespace _detail;
32
34 at.is_compressed(),
35 at.nrows() == a.ncols(),
36 at.ncols() == a.nrows(),
37 at.nnz() == a.nnz());
38
39 auto pai = a.row_indices();
40 auto pax = a.values();
41
42 auto patp = at.col_ptrs_mut();
43 auto pati = at.row_indices_mut();
44 auto patx = at.values_mut();
45
46 auto _work = stack.make_new(proxsuite::linalg::veg::Tag<I>{}, at.ncols());
47 auto work = _work.ptr_mut();
48
49 // work[i] = num zeros in ith row of A
50 if (a.is_compressed()) {
51 for (usize p = 0; p < usize(a.nnz()); ++p) {
52 util::wrapping_inc(mut(work[util::zero_extend(pai[p])]));
53 }
54 } else {
55 for (usize j = 0; j < a.ncols(); ++j) {
56 isize col_start = a.col_start(j);
57 isize col_end = a.col_end(j);
58 for (isize p = col_start; p < col_end; ++p) {
59 util::wrapping_inc(mut(work[util::zero_extend(pai[p])]));
60 }
61 }
62 }
63
64 // compute the cumulative sum
65 for (usize j = 0; j < usize(at.ncols()); ++j) {
66 patp[j + 1] = util::checked_non_negative_plus(patp[j], work[j]);
67 work[j] = patp[j];
68 }
69
70 for (usize j = 0; j < usize(a.ncols()); ++j) {
71 auto col_start = a.col_start(j);
72 auto col_end = a.col_end(j);
73 for (usize p = col_start; p < col_end; ++p) {
74 auto i = util::zero_extend(pai[p]);
75 auto q = util::zero_extend(work[i]);
76
77 pati[q] = j;
78 patx[q] = pax[p];
79 util::wrapping_inc(mut(work[i]));
80 }
81 }
82}
83
84template<typename I>
85auto
86transpose_symbolic_req(proxsuite::linalg::veg::Tag<I> /*tag*/,
87 isize nrows) noexcept
89{
90 return { nrows * isize(sizeof(I)), isize(alignof(I)) };
91}
92
93template<typename I>
94void
98 DynStackMut stack) noexcept
99{
100 using namespace _detail;
101
103 at.is_compressed(),
104 at.nrows() == a.ncols(),
105 at.ncols() == a.nrows(),
106 at.nnz() == a.nnz());
107
108 auto pai = a.row_indices();
109
110 auto patp = at.col_ptrs_mut();
111 auto pati = at.row_indices_mut();
112
113 auto _work = stack.make_new(proxsuite::linalg::veg::Tag<I>{}, at.ncols());
114 auto work = _work.ptr_mut();
115
116 // work[i] = num zeros in ith row of A
117 for (usize p = 0; p < usize(a.nnz()); ++p) {
118 util::wrapping_inc(mut(work[util::zero_extend(pai[p])]));
119 }
120
121 // compute the cumulative sum
122 for (usize j = 0; j < usize(at.ncols()); ++j) {
123 patp[j + 1] = util::checked_non_negative_plus(patp[j], work[j]);
124 work[j] = patp[j];
125 }
126
127 for (usize j = 0; j < usize(a.ncols()); ++j) {
128 auto col_start = a.col_start(j);
129 auto col_end = a.col_end(j);
130 for (usize p = col_start; p < col_end; ++p) {
131 auto i = util::zero_extend(pai[p]);
132 auto q = util::zero_extend(work[i]);
133
134 pati[q] = I(j);
135 util::wrapping_inc(mut(work[i]));
136 }
137 }
138}
139
147template<typename T, typename I>
148void
150{
151 using namespace _detail;
152
154 l.nrows() == l.ncols(),
155 x.nrows() == l.nrows()
156 /* l is unit lower triangular */
157 );
158
159 usize n = usize(l.nrows());
160
161 auto pli = l.row_indices();
162 auto plx = l.values();
163
164 auto px = x.as_slice_mut().ptr_mut();
165
166 for (usize j = 0; j < n; ++j) {
167 auto const xj = px[j];
168 auto col_start = l.col_start(j);
169 auto col_end = l.col_end(j);
170
171 // skip the diagonal entry
172 for (usize p = col_start + 1; p < col_end; ++p) {
173 auto i = util::zero_extend(pli[p]);
174 px[i] -= plx[p] * xj;
175 }
176 }
177}
178
186template<typename T, typename I>
187void
189{
190 using namespace _detail;
191
193 l.nrows() == l.ncols(),
194 x.nrows() == l.nrows()
195 /* l is unit lower triangular */
196 );
197
198 usize n = usize(l.nrows());
199
200 auto pli = l.row_indices();
201 auto plx = l.values();
202
203 auto px = x.as_slice_mut().ptr_mut();
204
205 usize j = n;
206 while (true) {
207 if (j == 0) {
208 break;
209 }
210 --j;
211
212 auto col_start = l.col_start(j);
213 auto col_end = l.col_end(j);
214 T acc0 = 0;
215 T acc1 = 0;
216 T acc2 = 0;
217 T acc3 = 0;
218
219 // skip the diagonal entry
220 usize pstart = col_start + 1;
221 usize pcount = col_end - pstart;
222
223 usize p = pstart;
224 for (; p < pstart + pcount / 4 * 4; p += 4) {
225 auto i0 = util::zero_extend(pli[p + 0]);
226 auto i1 = util::zero_extend(pli[p + 1]);
227 auto i2 = util::zero_extend(pli[p + 2]);
228 auto i3 = util::zero_extend(pli[p + 3]);
229 acc0 += plx[p + 0] * px[i0];
230 acc1 += plx[p + 1] * px[i1];
231 acc2 += plx[p + 2] * px[i2];
232 acc3 += plx[p + 3] * px[i3];
233 }
234 for (; p < pstart + pcount; ++p) {
235 auto i0 = util::zero_extend(pli[p + 0]);
236 acc0 += plx[p + 0] * px[i0];
237 }
238
239 acc0 = (acc0 + acc1) + (acc2 + acc3);
240
241 px[j] -= acc0;
242 }
243}
244
250template<typename I>
251auto
252etree_req(proxsuite::linalg::veg::Tag<I> /*tag*/, isize n) noexcept
254{
255 return { n * isize{ sizeof(I) }, alignof(I) };
256}
257
267template<typename I>
268VEG_INLINE void
270 I* parent,
272 DynStackMut stack) noexcept
273{
274 using namespace _detail;
275
276 usize n = usize(a.ncols());
277 auto pai = a.row_indices();
278
279 auto _work =
280 stack.make_new_for_overwrite(proxsuite::linalg::veg::Tag<I>{}, isize(n));
281 auto pancestors = _work.ptr_mut();
282
283 // for each column of a
284 for (usize k = 0; k < n; ++k) {
285 parent[k] = I(-1);
286 pancestors[k] = I(-1);
287 // assuming elimination subtree T_{k-1} is known, compute T_k
288
289 auto col_start = a.col_start(k);
290 auto col_end = a.col_end(k);
291
292 // for each non zero element of a
293 for (usize p = col_start; p < col_end; ++p) {
294 // get the row
295 auto i = util::zero_extend(pai[p]);
296 // skip if looking at lower triangular half
297 if (i >= k) {
298 continue;
299 }
300
301 // go up towards the root of the tree
302 usize node = i;
303 auto next = usize(-1);
304 while (true) {
305 if (node == usize(-1) || node >= k) {
306 break;
307 }
308
309 // use the highest known ancestor instead of the parent
310 next = util::sign_extend(pancestors[node]);
311
312 // set the highest known ancestor to k, since we know we're going
313 // to find it eventually
314 pancestors[node] = I(k);
315
316 // if there is no highest known ancestor, we must have hit the root of
317 // the tree
318 // set the parent to k
319 if (next == usize(-1)) {
320 parent[node] = I(k);
321 break;
322 }
323 // go to the highest ancestor
324 node = next;
325 }
326 }
327 }
328}
329
330namespace _detail {
331inline auto
333{
334 return { (k + 1) * isize{ sizeof(bool) }, alignof(bool) };
335}
336
337// compute the set of reachable nodes from the non zero pattern of a_{.,k}
338// not including the node k itself
339template<typename I>
342 I* s,
344 I const* parent,
345 isize k,
346 bool* pmarked) noexcept -> I*
347{
348
349 usize n = usize(a.ncols());
350 auto k_ = usize(k);
351
352 auto pai = a.row_indices();
353 auto pparent = parent;
354
355 auto col_start = a.col_start(k_);
356 auto col_end = a.col_end(k_);
357
358 pmarked[k_] = true;
359
360 usize top = n;
361
362 // for each non zero element of a_{.,k}
363 for (usize p = col_start; p < col_end; ++p) {
364 auto i = util::zero_extend(pai[p]);
365
366 // only triu part of a
367 if (i > k_) {
368 continue;
369 }
370
371 usize len = 0;
372 while (true) {
373 // if we reach a marked node
374 if (pmarked[i]) {
375 break;
376 }
377
378 // can't overwrite top of the stack since elements of s are unique
379 // and s is large enough to hold all the nodes
380 s[isize(len)] = I(i);
381 util::wrapping_inc(mut(len));
382
383 // mark node i as reached
384 pmarked[i] = true;
385 i = util::sign_extend(pparent[i]);
386 }
387
388 // make sure that we can memmove
389 std::memmove( //
390 s + (top - len),
391 s,
392 usize(len) * sizeof(I));
393
394 // move down the top of the stack
395 top = util::wrapping_plus(top, -len);
396 }
397
398 for (usize q = top; q < n; ++q) {
399 pmarked[s[q]] = false;
400 }
401 pmarked[k_] = false;
402
403 // [top, end[
404 count = n - top;
405 return s + top;
406}
407} // namespace _detail
408
409namespace _detail {
410// return the next start_index
411template<typename I>
412VEG_INLINE auto
414 I* post,
415 usize root,
416 usize start_index,
417 I* pstack,
418 I* pfirst_child,
419 I* pnext_child) noexcept -> usize
420{
421 using namespace _detail;
422
423 usize top = 0;
424 pstack[0] = I(root);
425
426 // stack is non empty
427 while (top != usize(-1)) {
428 auto current_node = util::zero_extend(pstack[top]);
429 auto current_child = util::sign_extend(pfirst_child[current_node]);
430
431 // no more children
432 if (current_child == usize(-1)) {
433 post[start_index] = I(current_node);
434 ++start_index;
435
436 // pop node from the stack
437 util::wrapping_dec(mut(top));
438 } else {
439 // add current child to the stack
440 util::wrapping_inc(mut(top));
441 pstack[top] = I(current_child);
442
443 // next child is now the first child
444 pfirst_child[current_node] = pnext_child[current_child];
445 }
446 }
447 return start_index;
448}
449} // namespace _detail
450
457template<typename I>
458auto
459postorder_req(proxsuite::linalg::veg::Tag<I> /*tag*/, isize n) noexcept
461{
462 return { (3 * n) * isize(sizeof(I)), alignof(I) };
463}
464
473template<typename I>
474void
475postorder(I* post, I const* parent, isize n, DynStackMut stack) noexcept
476{
477 using namespace _detail;
478
479 auto _work = stack.make_new_for_overwrite(proxsuite::linalg::veg::Tag<I>{},
480 3 * isize(n));
481 I* pwork = _work.ptr_mut();
482
483 I* pstack = pwork;
484 I* pfirst_child = pstack + n;
485 I* pnext_child = pfirst_child + n;
486
487 // no children are found yet
488 for (usize j = 0; j < usize(n); ++j) {
489 pfirst_child[j] = I(-1);
490 }
491
492 for (usize _j = 0; _j < usize(n); ++_j) {
493 // traverse in reverse order, since the children appear in reverse order
494 // of insertion in the linked list
495 usize j = usize(n) - 1 - _j;
496
497 // if not a root node
498 if (parent[isize(j)] != I(-1)) {
499 // next child of this node is the previous first child
500 pnext_child[j] = pfirst_child[util::zero_extend(parent[isize(j)])];
501 // set this node to be the new first child
502 pfirst_child[util::zero_extend(parent[isize(j)])] = I(j);
503 }
504 }
505
506 usize start_index = 0;
507 for (usize root = 0; root < usize(n); ++root) {
508 if (parent[isize(root)] == I(-1)) {
510 post, root, start_index, pstack, pfirst_child, pnext_child);
511 }
512 }
513}
514
515namespace _detail {
516// returns -2 if j is not a leaf
517// returns -1 if j is a first leaf
518// returns the least common ancestor of j and the previous j otherwise
519template<typename I>
520VEG_INLINE auto
522 usize j,
523 I const* pfirst,
524 I* pmax_first,
525 I* pprev_leaf,
526 I* pancestor) noexcept -> I
527{
528 using namespace _detail;
529
530 // if upper triangular part, or not a leaf
531 // leaves always have a new larger value of pfirst
532 // add 1 to get the correct result when comparing with -1
533 if (i <= j || util::wrapping_plus(pfirst[j], I(1)) <=
534 util::wrapping_plus(pmax_first[i], I(1))) {
535 return I(-2);
536 }
537
538 // update the largest max_first
539 // no need to compare because the value of j increases
540 // inbetween successive calls, and so does first_j
541 pmax_first[i] = pfirst[j];
542
543 // get the previous j
544 usize j_prev = util::sign_extend(pprev_leaf[i]);
545
546 // set the previous j to the current j
547 pprev_leaf[i] = I(j);
548
549 // if first leaf
550 if (j_prev == usize(-1)) {
551 return I(-1);
552 }
553
554 // else, subsequent leaf
555 // get the least common ancestor of j and j_prev
556 usize lca = j_prev;
557 while (true) {
558 if (lca == util::zero_extend(pancestor[lca])) {
559 break;
560 }
561 lca = util::zero_extend(pancestor[lca]);
562 }
563
564 // compress the path to speed up the subsequent calls
565 // to this function
566 usize node = j_prev;
567 while (true) {
568 if (node == lca) {
569 break;
570 }
571 usize next = util::zero_extend(pancestor[node]);
572 pancestor[node] = I(lca);
573 node = next;
574 }
575
576 return I(lca);
577}
578} // namespace _detail
579
580template<typename I>
581auto
582column_counts_req(proxsuite::linalg::veg::Tag<I> tag,
583 isize n,
584 isize nnz) noexcept
586{
588 return StackReq{
589 isize{ sizeof(I) } * (1 + 5 * n + nnz),
590 alignof(I),
592}
593
594template<typename I>
595void
598 I const* parent,
599 I const* post,
600 DynStackMut stack) noexcept
601{
602 // https://youtu.be/uZKJPTo4dZs
603 using namespace _detail;
604 usize n = usize(a.nrows());
605 auto _at_work = stack.make_new_for_overwrite(proxsuite::linalg::veg::Tag<I>{},
606 1 + 5 * isize(n) + a.nnz());
607 auto pat_work = _at_work.ptr_mut();
608 pat_work[0] = 0;
609 pat_work[n] = I(a.nnz());
610
612 from_raw_parts, isize(n), isize(n), a.nnz(),
613 pat_work, nullptr, pat_work + n + 1,
614 };
615 sparse::transpose_symbolic(at, a, stack);
616
617 auto patp = at.col_ptrs();
618 auto pati = at.row_indices();
619
620 auto pwork = pat_work + n + 1 + a.nnz();
621
622 auto pdelta = counts;
623
624 auto pfirst = pwork;
625 auto pmax_first = pwork + n;
626 auto pprev_leaf = pwork + 2 * n;
627 auto pancestor = pwork + 3 * n;
628
629 auto pcounts = counts;
630 auto ppost = post;
631 auto pparent = parent;
632
633 for (usize i = 0; i < 3 * n; ++i) {
634 pwork[i] = I(-1);
635 }
636 for (usize i = 0; i < n; ++i) {
637 pancestor[i] = I(i);
638 }
639
640 // for each column in a
641 for (usize k = 0; k < n; ++k) {
642 // in postordered fashion
643 auto j = util::zero_extend(ppost[k]);
644
645 // if first_j isn't computed, j must be a leaf
646 // because if it's not a leaf then first_j will be initialized from
647 // the init loop of its first descendant
648 //
649 // in which case initialize delta_j to 1
650 pdelta[j] = (pfirst[j] == I(-1)) ? I(1) : I(0);
651
652 // init loop
653 while (true) {
654 // while j is not a root, and the first descendant of j isn't computed
655 // set the first descendant of j to k, as well as all of its ancestors
656 // that don't yet have a first descendant
657 if (j == usize(-1) || pfirst[j] != I(-1)) {
658 break;
659 }
660 pfirst[j] = I(k);
661 j = util::sign_extend(pparent[j]);
662 }
663 }
664
665 // for each node
666 for (usize k = 0; k < n; ++k) {
667 // in postordered fashion
668 auto j = util::zero_extend(ppost[k]);
669
670 // if this node is the child of some other node
671 if (pparent[j] != I(-1)) {
672 // decrement the delta of that node
673 // corresponding to the correction term e_j
674 util::wrapping_dec(mut(pdelta[util::zero_extend(pparent[j])]));
675 }
676
677 auto col_start = util::zero_extend(patp[j]);
678 auto col_end = util::zero_extend(patp[j + 1]);
679
680 // iterate over lower triangular half of a
681 for (usize p = col_start; p < col_end; ++p) {
682 auto i = util::zero_extend(pati[p]);
684 i,
685 j,
686 pfirst,
687 pmax_first,
688 pprev_leaf,
689 pancestor);
690
691 // if j is a leaf of T^i
692 if (lca != I(-2)) {
693 util::wrapping_inc(mut(pdelta[j]));
694
695 // if j is a subsequent leaf
696 if (lca != I(-1)) {
697 util::wrapping_dec(mut(pdelta[util::zero_extend(lca)]));
698 }
699 }
700 }
701
702 if (pparent[j] != -1) {
703 // set the ancestor of j
704 pancestor[j] = pparent[j];
705 }
706 }
707
708 // sum up the deltas
709 for (usize j = 0; j < n; ++j) {
710 if (parent[isize(j)] != I(-1)) {
711 pcounts[util::zero_extend(parent[isize(j)])] = util::wrapping_plus(
712 pcounts[util::zero_extend(parent[isize(j)])], pcounts[j]);
713 }
714 }
715}
716
717template<typename I>
718auto
719amd_req(proxsuite::linalg::veg::Tag<I> /*tag*/, isize /*n*/, isize nnz) noexcept
721{
722 return { nnz * isize{ sizeof(char) }, alignof(char) };
723}
724
725template<typename I>
726void
727amd(I* perm, SymbolicMatRef<I> mat, DynStackMut stack) noexcept
728{
729 // TODO: reimplement amd under BSD-3
730 // https://github.com/DrTimothyAldenDavis/SuiteSparse/tree/master/AMD
731
732 isize n = mat.nrows();
733 isize nnz = mat.nnz();
734
735 Eigen::PermutationMatrix<-1, -1, I> perm_eigen;
736 auto _ = stack.make_new(proxsuite::linalg::veg::Tag<char>{}, nnz);
737
738 Eigen::AMDOrdering<I>{}(
739 Eigen::Map<Eigen::SparseMatrix<char, Eigen::ColMajor, I> const>{
740 n,
741 n,
742 nnz,
743 mat.col_ptrs(),
744 mat.row_indices(),
745 _.ptr(),
746 mat.nnz_per_col(),
747 }
748 .template selfadjointView<Eigen::Upper>(),
749
750 perm_eigen);
751 std::memmove( //
752 perm,
753 perm_eigen.indices().data(),
754 usize(n) * sizeof(I));
755}
756
757namespace _detail {
758template<typename I>
759void
760inv_perm(I* perm_inv, I const* perm, isize n) noexcept
761{
762 for (usize i = 0; i < usize(n); ++i) {
763 perm_inv[util::zero_extend(perm[i])] = I(i);
764 }
765}
766
767template<typename I>
768auto
769symmetric_permute_symbolic_req(proxsuite::linalg::veg::Tag<I> /*tag*/,
770 isize n) noexcept
772{
773 return { n * isize{ sizeof(I) }, alignof(I) };
774}
775template<typename I>
776auto
777symmetric_permute_req(proxsuite::linalg::veg::Tag<I> /*tag*/, isize n) noexcept
779{
780 return { n * isize{ sizeof(I) }, alignof(I) };
781}
782
783template<typename I>
784void
786 I const* pperm_inv,
787 SymbolicMatRef<I> old_a,
788 I* pnew_ap,
789 I* pcol_counts)
790{
791 for (usize old_j = 0; old_j < n; ++old_j) {
792 usize new_j = util::zero_extend(pperm_inv[old_j]);
793
794 auto col_start = old_a.col_start(old_j);
795 auto col_end = old_a.col_end(old_j);
796
797 for (usize p = col_start; p < col_end; ++p) {
798 usize old_i = util::zero_extend(old_a.row_indices()[p]);
799
800 if (old_i <= old_j) {
801 usize new_i = util::zero_extend(pperm_inv[old_i]);
802 util::wrapping_inc(mut(pcol_counts[new_i > new_j ? new_i : new_j]));
803 }
804 }
805 }
806
807 pnew_ap[0] = I(0);
808 for (usize i = 0; i < n; ++i) {
809 pnew_ap[i + 1] =
810 util::checked_non_negative_plus(pnew_ap[i], pcol_counts[i]);
811 pcol_counts[i] = pnew_ap[i];
812 }
813}
814
815template<typename I>
816void
818 SymbolicMatRef<I> old_a,
819 I const* perm_inv,
820 DynStackMut stack) noexcept
821{
822
823 usize n = usize(new_a.nrows());
824
825 auto _work = stack.make_new(proxsuite::linalg::veg::Tag<I>{}, isize(n));
826 I* pcol_counts = _work.ptr_mut();
827
828 VEG_ASSERT(new_a.is_compressed());
829 auto pold_ai = old_a.row_indices();
830
831 auto pnew_ap = new_a.col_ptrs_mut();
832 auto pnew_ai = new_a.row_indices_mut();
833
834 auto pperm_inv = perm_inv;
835
836 _detail::symmetric_permute_common(n, pperm_inv, old_a, pnew_ap, pcol_counts);
837
838 auto pcurrent_row_index = pcol_counts;
839
840 for (usize old_j = 0; old_j < n; ++old_j) {
841 usize new_j = util::zero_extend(pperm_inv[old_j]);
842
843 auto col_start = old_a.col_start(old_j);
844 auto col_end = old_a.col_end(old_j);
845
846 for (usize p = col_start; p < col_end; ++p) {
847 usize old_i = util::zero_extend(pold_ai[p]);
848
849 if (old_i <= old_j) {
850 usize new_i = util::zero_extend(pperm_inv[old_i]);
851
852 usize new_max = new_i > new_j ? new_i : new_j;
853 usize new_min = new_i < new_j ? new_i : new_j;
854
855 auto row_idx = pcurrent_row_index[new_max];
856 pnew_ai[row_idx] = I(new_min);
857 pcurrent_row_index[new_max] = util::wrapping_plus(row_idx, I(1));
858 }
859 }
860 }
861}
862
863template<typename T, typename I>
864void
866 MatRef<T, I> old_a,
867 I const* perm_inv,
868 DynStackMut stack) noexcept(VEG_CONCEPT(nothrow_copyable<T>))
869{
870 usize n = usize(new_a.nrows());
871 auto _work = stack.make_new(proxsuite::linalg::veg::Tag<I>{}, isize(n));
872 I* pcol_counts = _work.ptr_mut();
873
874 VEG_ASSERT(new_a.is_compressed());
875 auto pold_ai = old_a.row_indices();
876
877 auto pnew_ap = new_a.col_ptrs_mut();
878 auto pnew_ai = new_a.row_indices_mut();
879
880 auto pperm_inv = perm_inv;
881
883 n, pperm_inv, old_a.symbolic(), pnew_ap, pcol_counts);
884
885 auto pcurrent_row_index = pcol_counts;
886
887 auto pold_ax = old_a.values();
888 auto pnew_ax = new_a.values_mut();
889 for (usize old_j = 0; old_j < n; ++old_j) {
890 usize new_j = util::zero_extend(pperm_inv[old_j]);
891
892 auto col_start = old_a.col_start(old_j);
893 auto col_end = old_a.col_end(old_j);
894
895 for (usize p = col_start; p < col_end; ++p) {
896 usize old_i = util::zero_extend(pold_ai[p]);
897
898 if (old_i <= old_j) {
899 usize new_i = util::zero_extend(pperm_inv[old_i]);
900
901 usize new_max = new_i > new_j ? new_i : new_j;
902 usize new_min = new_i < new_j ? new_i : new_j;
903
904 auto row_idx = pcurrent_row_index[new_max];
905 pnew_ai[row_idx] = I(new_min);
906 pnew_ax[row_idx] = pold_ax[p];
907 pcurrent_row_index[new_max] = util::wrapping_plus(row_idx, I(1));
908 }
909 }
910 }
911}
912} // namespace _detail
913
914enum struct Ordering : unsigned char
915{
916 natural,
918 amd,
919 ENUM_END,
920};
921
930template<typename I>
931auto
932factorize_symbolic_req(proxsuite::linalg::veg::Tag<I> tag,
933 isize n,
934 isize nnz,
935 Ordering o) noexcept
937{
939 constexpr isize sz{ sizeof(I) };
940 constexpr isize al{ alignof(I) };
941
942 StackReq perm_req{ 0, al };
943 StackReq amd_req{ 0, al };
944 switch (o) {
946 break;
947 case Ordering::amd:
948 amd_req =
949 StackReq{ n * sz, al } & StackReq{ sparse::amd_req(tag, n, nnz) };
952 perm_req = perm_req & StackReq{ (n + 1 + nnz) * sz, al };
953 perm_req = perm_req & _detail::symmetric_permute_symbolic_req(tag, n);
954 default:
955 break;
956 }
957
958 StackReq parent_req = { n * sz, al };
959 StackReq post_req = { n * sz, al };
960
961 StackReq etree_req = sparse::etree_req(tag, n);
962 StackReq postorder_req = sparse::postorder_req(tag, n);
963 StackReq colcount_req = sparse::column_counts_req(tag, n, nnz);
964
965 return amd_req //
966 | (perm_req //
967 & (parent_req //
968 & (etree_req //
969 | (post_req //
970 & (postorder_req | colcount_req)))));
971}
972
984template<typename I>
985void
987 I* etree,
988 I* perm_inv,
989 I const* perm,
991 DynStackMut stack) noexcept
992{
993
994 bool id_perm = perm_inv == nullptr;
995 bool user_perm = perm != nullptr;
996
997 Ordering o = user_perm ? Ordering::user_provided
998 : id_perm ? Ordering::natural
1000
1001 proxsuite::linalg::veg::Tag<I> tag{};
1002
1003 usize n = usize(a.ncols());
1004
1005 switch (o) {
1006 case Ordering::natural:
1007 break;
1008
1009 case Ordering::amd: {
1010 auto amd_perm = stack.make_new_for_overwrite(tag, isize(n));
1011 sparse::amd(amd_perm.ptr_mut(), a, stack);
1012 perm = amd_perm.ptr();
1013 }
1016 _detail::inv_perm(perm_inv, perm, isize(n));
1017 }
1018 default:
1019 break;
1020 }
1021
1022 auto _permuted_a_col_ptrs =
1023 stack //
1024 .make_new_for_overwrite(tag, id_perm ? 0 : (a.ncols() + 1));
1025 auto _permuted_a_row_indices =
1026 stack //
1027 .make_new_for_overwrite(tag, id_perm ? 0 : (a.nnz()));
1028
1029 if (!id_perm) {
1030 _permuted_a_col_ptrs.as_mut()[0] = 0;
1031 _permuted_a_col_ptrs.as_mut()[isize(n)] = I(a.nnz());
1032 SymbolicMatMut<I> permuted_a{
1033 from_raw_parts,
1034 isize(n),
1035 isize(n),
1036 a.nnz(),
1037 _permuted_a_col_ptrs.ptr_mut(),
1038 nullptr,
1039 _permuted_a_row_indices.ptr_mut(),
1040 };
1041 _detail::symmetric_permute_symbolic(permuted_a, a, perm_inv, stack);
1042 }
1043
1044 SymbolicMatRef<I> permuted_a = id_perm ? a
1046 from_raw_parts,
1047 isize(n),
1048 isize(n),
1049 a.nnz(),
1050 _permuted_a_col_ptrs.ptr(),
1051 nullptr,
1052 _permuted_a_row_indices.ptr(),
1053 };
1054
1055 sparse::etree(etree, permuted_a, stack);
1056
1057 auto _post = stack.make_new_for_overwrite(tag, isize(n));
1058 sparse::postorder(_post.ptr_mut(), etree, isize(n), stack);
1059 sparse::column_counts(nnz_per_col, permuted_a, etree, _post.ptr(), stack);
1060}
1061
1073template<typename I>
1074void
1076 I* etree,
1077 I* perm_inv,
1078 I const* perm,
1080 DynStackMut stack) noexcept
1081{
1082
1084 col_ptrs + 1,
1085 etree,
1086 perm_inv,
1087 perm,
1088 a,
1089 stack);
1090
1091 usize n = usize(a.ncols());
1092 auto pcol_ptrs = col_ptrs;
1093 pcol_ptrs[0] = I(0);
1094 for (usize i = 0; i < n; ++i) {
1095 pcol_ptrs[i + 1] =
1096 util::checked_non_negative_plus(pcol_ptrs[i + 1], pcol_ptrs[i]);
1097 }
1098}
1099
1108template<typename T, typename I>
1109auto
1110factorize_numeric_req(proxsuite::linalg::veg::Tag<T> /*ttag*/,
1111 proxsuite::linalg::veg::Tag<I> /*itag*/,
1112 isize n,
1113 isize a_nnz,
1114 Ordering o) noexcept
1116{
1118
1119 constexpr isize sz{ sizeof(I) };
1120 constexpr isize al{ alignof(I) };
1121
1122 constexpr isize tsz{ sizeof(T) };
1123 constexpr isize tal{ alignof(T) };
1124
1125 bool id_perm = o == Ordering::natural;
1126
1127 auto symb_perm_req = StackReq{ sz * (id_perm ? 0 : (n + 1 + a_nnz)), al };
1128 auto num_perm_req = StackReq{ tsz * (id_perm ? 0 : a_nnz), tal };
1129 return num_perm_req //
1130 & (StackReq{ tsz * n, tal } //
1131 & (symb_perm_req //
1132 & (StackReq{ 2 * n * sz, al } //
1133 & (StackReq{ n * tsz, tal } //
1134 & StackReq{ n * isize{ sizeof(bool) }, alignof(bool) }))));
1135}
1136
1156template<typename T, typename I>
1157void
1159 T* values,
1160 I* row_indices,
1163 I const* col_ptrs,
1164 I const* etree,
1165 I const* perm_inv,
1166 MatRef<T, I> a,
1167 DynStackMut stack) noexcept(false)
1168{
1169 using namespace _detail;
1170 isize n = a.nrows();
1171
1172 bool id_perm = perm_inv == nullptr;
1173
1174 proxsuite::linalg::veg::Tag<I> tag{};
1175
1176 auto _permuted_a_values = stack.make_new_for_overwrite(
1177 proxsuite::linalg::veg::Tag<T>{}, id_perm ? 0 : a.nnz());
1178
1179 auto _x = stack.make_new_for_overwrite(proxsuite::linalg::veg::Tag<T>{}, n);
1180
1181 auto _permuted_a_col_ptrs =
1182 stack.make_new_for_overwrite(tag, id_perm ? 0 : (a.ncols() + 1));
1183 auto _permuted_a_row_indices =
1184 stack.make_new_for_overwrite(tag, id_perm ? 0 : a.nnz());
1185
1186 if (!id_perm) {
1187 _permuted_a_col_ptrs.as_mut()[0] = 0;
1188 _permuted_a_col_ptrs.as_mut()[n] = I(a.nnz());
1189 MatMut<T, I> permuted_a{
1190 from_raw_parts,
1191 n,
1192 n,
1193 a.nnz(),
1194 _permuted_a_col_ptrs.ptr_mut(),
1195 nullptr,
1196 _permuted_a_row_indices.ptr_mut(),
1197 _permuted_a_values.ptr_mut(),
1198 };
1199 _detail::symmetric_permute(permuted_a, a, perm_inv, stack);
1200 }
1201
1202 MatRef<T, I> permuted_a = id_perm ? a
1203 : MatRef<T, I>{
1204 from_raw_parts,
1205 isize(n),
1206 isize(n),
1207 a.nnz(),
1208 _permuted_a_col_ptrs.ptr(),
1209 nullptr,
1210 _permuted_a_row_indices.ptr(),
1211 _permuted_a_values.ptr(),
1212 };
1213
1214 auto _current_row_index = stack.make_new_for_overwrite(tag, n);
1215 auto _ereach_stack_storage = stack.make_new_for_overwrite(tag, n);
1216
1217 I* pcurrent_row_index = _current_row_index.ptr_mut();
1218 T* px = _x.ptr_mut();
1219
1220 std::memcpy( //
1221 pcurrent_row_index,
1222 col_ptrs,
1223 usize(n) * sizeof(I));
1224 for (usize i = 0; i < usize(n); ++i) {
1225 px[i] = 0;
1226 }
1227
1228 // compute the iter-th row of L using the iter-th column of permuted_a
1229 // the diagonal element is filled with the diagonal of D instead of 1
1230 I const* plp = col_ptrs;
1231
1232 auto _marked = stack.make_new(proxsuite::linalg::veg::Tag<bool>{}, n);
1233 for (usize iter = 0; iter < usize(n); ++iter) {
1234 usize ereach_count = 0;
1235 auto ereach_stack = _detail::ereach(ereach_count,
1236 _ereach_stack_storage.ptr_mut(),
1237 permuted_a.symbolic(),
1238 etree,
1239 isize(iter),
1240 _marked.ptr_mut());
1241
1242 auto pereach_stack = ereach_stack;
1243
1244 I const* pai = permuted_a.row_indices();
1245 T const* pax = permuted_a.values();
1246
1247 I* pli = row_indices;
1248 T* plx = values;
1249
1250 {
1251 auto col_start = permuted_a.col_start(iter);
1252 auto col_end = permuted_a.col_end(iter);
1253
1254 // scatter permuted_a column into x
1255 // untouched columns are already zeroed
1256
1257 for (usize p = col_start; p < col_end; ++p) {
1258 auto i = util::zero_extend(pai[p]);
1259 px[i] = pax[p];
1260 }
1261 }
1262 T d = px[iter] + ((diag_to_add == nullptr || perm == nullptr)
1263 ? T(0)
1264 : diag_to_add[util::zero_extend(perm[iter])]);
1265
1266 // zero for next iteration
1267 px[iter] = 0;
1268
1269 for (usize q = 0; q < ereach_count; ++q) {
1270 usize j = util::zero_extend(pereach_stack[q]);
1271 auto col_start = util::zero_extend(plp[j]);
1272 auto row_idx = util::zero_extend(pcurrent_row_index[j]) + 1;
1273
1274 T const xj = px[j];
1275 T const dj = plx[col_start];
1276 T const lkj = xj / dj;
1277
1278 // zero for the next iteration
1279 px[j] = 0;
1280
1281 // skip first element, to put diagonal there later
1282 for (usize p = col_start + 1; p < row_idx; ++p) {
1283 auto i = util::zero_extend(pli[p]);
1284 px[i] -= plx[p] * xj;
1285 }
1286
1287 d -= lkj * xj;
1288
1289 pli[row_idx] = I(iter);
1290 plx[row_idx] = lkj;
1291 pcurrent_row_index[j] = I(row_idx);
1292 }
1293 {
1294 auto col_start = util::zero_extend(plp[iter]);
1295 pli[col_start] = I(iter);
1296 plx[col_start] = d;
1297 }
1298 }
1299}
1300} // namespace sparse
1301} // namespace linalg
1302} // namespace proxsuite
1303#endif /* end of include guard PROXSUITE_LINALG_SPARSE_LDLT_FACTORIZE_HPP */
#define VEG_ASSERT_ALL_OF(...)
#define VEG_ASSERT(...)
#define HEDLEY_FALL_THROUGH
#define VEG_CONCEPT(...)
Definition macros.hpp:1241
#define VEG_INLINE
Definition macros.hpp:118
VEG_INLINE auto postorder_depth_first_search(I *post, usize root, usize start_index, I *pstack, I *pfirst_child, I *pnext_child) noexcept -> usize
void inv_perm(I *perm_inv, I const *perm, isize n) noexcept
void symmetric_permute(MatMut< T, I > new_a, MatRef< T, I > old_a, I const *perm_inv, DynStackMut stack) noexcept(VEG_CONCEPT(nothrow_copyable< T >))
void symmetric_permute_common(usize n, I const *pperm_inv, SymbolicMatRef< I > old_a, I *pnew_ap, I *pcol_counts)
VEG_INLINE auto least_common_ancestor(usize i, usize j, I const *pfirst, I *pmax_first, I *pprev_leaf, I *pancestor) noexcept -> I
auto symmetric_permute_req(proxsuite::linalg::veg::Tag< I >, isize n) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
VEG_NODISCARD VEG_INLINE auto ereach(usize &count, I *s, SymbolicMatRef< I > a, I const *parent, isize k, bool *pmarked) noexcept -> I *
auto ereach_req(isize k) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
void symmetric_permute_symbolic(SymbolicMatMut< I > new_a, SymbolicMatRef< I > old_a, I const *perm_inv, DynStackMut stack) noexcept
auto symmetric_permute_symbolic_req(proxsuite::linalg::veg::Tag< I >, isize n) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
void factorize_symbolic_non_zeros(I *nnz_per_col, I *etree, I *perm_inv, I const *perm, SymbolicMatRef< I > a, DynStackMut stack) noexcept
void factorize_numeric(T *values, I *row_indices, proxsuite::linalg::veg::DoNotDeduce< T const * > diag_to_add, proxsuite::linalg::veg::DoNotDeduce< I const * > perm, I const *col_ptrs, I const *etree, I const *perm_inv, MatRef< T, I > a, DynStackMut stack) noexcept(false)
auto amd_req(proxsuite::linalg::veg::Tag< I >, isize, isize nnz) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
auto postorder_req(proxsuite::linalg::veg::Tag< I >, isize n) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
void postorder(I *post, I const *parent, isize n, DynStackMut stack) noexcept
auto transpose_req(proxsuite::linalg::veg::Tag< I >, isize nrows) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
Definition factorize.hpp:17
void transpose(MatMut< T, I > at, MatRef< T, I > a, DynStackMut stack) noexcept(VEG_CONCEPT(nothrow_copyable< T >))
Definition factorize.hpp:26
VEG_INLINE void etree(I *parent, SymbolicMatRef< I > a, DynStackMut stack) noexcept
auto factorize_symbolic_req(proxsuite::linalg::veg::Tag< I > tag, isize n, isize nnz, Ordering o) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
void factorize_symbolic_col_counts(I *col_ptrs, I *etree, I *perm_inv, I const *perm, SymbolicMatRef< I > a, DynStackMut stack) noexcept
void transpose_symbolic(SymbolicMatMut< I > at, SymbolicMatRef< I > a, DynStackMut stack) noexcept
Definition factorize.hpp:95
auto transpose_symbolic_req(proxsuite::linalg::veg::Tag< I >, isize nrows) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
Definition factorize.hpp:86
auto factorize_numeric_req(proxsuite::linalg::veg::Tag< T >, proxsuite::linalg::veg::Tag< I >, isize n, isize a_nnz, Ordering o) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
void dense_ltsolve(DenseVecMut< T > x, MatRef< T, I > l) noexcept(false)
void column_counts(I *counts, SymbolicMatRef< I > a, I const *parent, I const *post, DynStackMut stack) noexcept
auto etree_req(proxsuite::linalg::veg::Tag< I >, isize n) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
void dense_lsolve(DenseVecMut< T > x, MatRef< T, I > l) noexcept(false)
auto column_counts_req(proxsuite::linalg::veg::Tag< I > tag, isize n, isize nnz) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
meta::type_identity_t< T > DoNotDeduce
Definition core.hpp:292
_detail::_meta::make_signed< usize >::Type isize
Definition typedefs.hpp:43
decltype(sizeof(0)) usize
Definition macros.hpp:702
#define VEG_NODISCARD
Definition prologue.hpp:97
auto symbolic() const noexcept -> SymbolicMatRef< I >
Definition core.hpp:416
auto values() const noexcept -> T const *
Definition core.hpp:415
auto col_start(usize j) const noexcept -> usize
Definition core.hpp:279
auto col_end(usize j) const noexcept -> usize
Definition core.hpp:287
auto row_indices() const noexcept -> I const *
Definition core.hpp:277