proxsuite 0.6.7
The Advanced Proximal Optimization Toolbox
Loading...
Searching...
No Matches
factorize.hpp
Go to the documentation of this file.
1
2//
3// Copyright (c) 2022 INRIA
4//
5#ifndef PROXSUITE_LINALG_SPARSE_LDLT_FACTORIZE_HPP
6#define PROXSUITE_LINALG_SPARSE_LDLT_FACTORIZE_HPP
7
9#include <Eigen/OrderingMethods>
10
11namespace proxsuite {
12namespace linalg {
13namespace sparse {
14
15template<typename I>
16auto
17transpose_req(proxsuite::linalg::veg::Tag<I> /*tag*/, isize nrows) noexcept
19{
20 return { nrows * isize(sizeof(I)), isize(alignof(I)) };
21}
22
23// at = a.T
24template<typename T, typename I>
25void
30{
31 using namespace _detail;
32
34 at.is_compressed(),
35 at.nrows() == a.ncols(),
36 at.ncols() == a.nrows(),
37 at.nnz() == a.nnz());
38
39 auto pai = a.row_indices();
40 auto pax = a.values();
41
42 auto patp = at.col_ptrs_mut();
43 auto pati = at.row_indices_mut();
44 auto patx = at.values_mut();
45
46 auto _work = stack.make_new(proxsuite::linalg::veg::Tag<I>{}, at.ncols());
47 auto work = _work.ptr_mut();
48
49 // work[i] = num zeros in ith row of A
50 if (a.is_compressed()) {
51 for (usize p = 0; p < usize(a.nnz()); ++p) {
52 util::wrapping_inc(mut(work[util::zero_extend(pai[p])]));
53 }
54 } else {
55 for (usize j = 0; j < a.ncols(); ++j) {
56 isize col_start = a.col_start(j);
57 isize col_end = a.col_end(j);
58 for (isize p = col_start; p < col_end; ++p) {
59 util::wrapping_inc(mut(work[util::zero_extend(pai[p])]));
60 }
61 }
62 }
63
64 // compute the cumulative sum
65 for (usize j = 0; j < usize(at.ncols()); ++j) {
66 patp[j + 1] = util::checked_non_negative_plus(patp[j], work[j]);
67 work[j] = patp[j];
68 }
69
70 for (usize j = 0; j < usize(a.ncols()); ++j) {
71 auto col_start = a.col_start(j);
72 auto col_end = a.col_end(j);
73 for (usize p = col_start; p < col_end; ++p) {
74 auto i = util::zero_extend(pai[p]);
75 auto q = util::zero_extend(work[i]);
76
77 pati[q] = j;
78 patx[q] = pax[p];
79 util::wrapping_inc(mut(work[i]));
80 }
81 }
82}
83
84template<typename I>
85auto
87 isize nrows) noexcept
89{
90 return { nrows * isize(sizeof(I)), isize(alignof(I)) };
91}
92
93template<typename I>
94void
98 DynStackMut stack) noexcept
99{
100 using namespace _detail;
101
103 at.is_compressed(),
104 at.nrows() == a.ncols(),
105 at.ncols() == a.nrows(),
106 at.nnz() == a.nnz());
107
108 auto pai = a.row_indices();
109
110 auto patp = at.col_ptrs_mut();
111 auto pati = at.row_indices_mut();
112
113 auto _work = stack.make_new(proxsuite::linalg::veg::Tag<I>{}, at.ncols());
114 auto work = _work.ptr_mut();
115
116 // work[i] = num zeros in ith row of A
117 for (usize p = 0; p < usize(a.nnz()); ++p) {
118 util::wrapping_inc(mut(work[util::zero_extend(pai[p])]));
119 }
120
121 // compute the cumulative sum
122 for (usize j = 0; j < usize(at.ncols()); ++j) {
123 patp[j + 1] = util::checked_non_negative_plus(patp[j], work[j]);
124 work[j] = patp[j];
125 }
126
127 for (usize j = 0; j < usize(a.ncols()); ++j) {
128 auto col_start = a.col_start(j);
129 auto col_end = a.col_end(j);
130 for (usize p = col_start; p < col_end; ++p) {
131 auto i = util::zero_extend(pai[p]);
132 auto q = util::zero_extend(work[i]);
133
134 pati[q] = I(j);
135 util::wrapping_inc(mut(work[i]));
136 }
137 }
138}
139
147template<typename T, typename I>
148void
150{
151 using namespace _detail;
152
154 l.nrows() == l.ncols(),
155 x.nrows() == l.nrows()
156 /* l is unit lower triangular */
157 );
158
159 usize n = usize(l.nrows());
160
161 auto pli = l.row_indices();
162 auto plx = l.values();
163
164 auto px = x.as_slice_mut().ptr_mut();
165
166 for (usize j = 0; j < n; ++j) {
167 auto const xj = px[j];
168 auto col_start = l.col_start(j);
169 auto col_end = l.col_end(j);
170
171 // skip the diagonal entry
172 for (usize p = col_start + 1; p < col_end; ++p) {
173 auto i = util::zero_extend(pli[p]);
174 px[i] -= plx[p] * xj;
175 }
176 }
177}
178
186template<typename T, typename I>
187void
189{
190 using namespace _detail;
191
193 l.nrows() == l.ncols(),
194 x.nrows() == l.nrows()
195 /* l is unit lower triangular */
196 );
197
198 usize n = usize(l.nrows());
199
200 auto pli = l.row_indices();
201 auto plx = l.values();
202
203 auto px = x.as_slice_mut().ptr_mut();
204
205 usize j = n;
206 while (true) {
207 if (j == 0) {
208 break;
209 }
210 --j;
211
212 auto col_start = l.col_start(j);
213 auto col_end = l.col_end(j);
214 T acc0 = 0;
215 T acc1 = 0;
216 T acc2 = 0;
217 T acc3 = 0;
218
219 // skip the diagonal entry
220 usize pstart = col_start + 1;
221 usize pcount = col_end - pstart;
222
223 usize p = pstart;
224 for (; p < pstart + pcount / 4 * 4; p += 4) {
225 auto i0 = util::zero_extend(pli[p + 0]);
226 auto i1 = util::zero_extend(pli[p + 1]);
227 auto i2 = util::zero_extend(pli[p + 2]);
228 auto i3 = util::zero_extend(pli[p + 3]);
229 acc0 += plx[p + 0] * px[i0];
230 acc1 += plx[p + 1] * px[i1];
231 acc2 += plx[p + 2] * px[i2];
232 acc3 += plx[p + 3] * px[i3];
233 }
234 for (; p < pstart + pcount; ++p) {
235 auto i0 = util::zero_extend(pli[p + 0]);
236 acc0 += plx[p + 0] * px[i0];
237 }
238
239 acc0 = (acc0 + acc1) + (acc2 + acc3);
240
241 px[j] -= acc0;
242 }
243}
244
250template<typename I>
251auto
254{
255 return { n * isize{ sizeof(I) }, alignof(I) };
256}
257
267template<typename I>
268VEG_INLINE void
270 I* parent,
272 DynStackMut stack) noexcept
273{
274 using namespace _detail;
275
276 usize n = usize(a.ncols());
277 auto pai = a.row_indices();
278
279 auto _work =
280 stack.make_new_for_overwrite(proxsuite::linalg::veg::Tag<I>{}, isize(n));
281 auto pancestors = _work.ptr_mut();
282
283 // for each column of a
284 for (usize k = 0; k < n; ++k) {
285 parent[k] = I(-1);
286 pancestors[k] = I(-1);
287 // assuming elimination subtree T_{k-1} is known, compute T_k
288
289 auto col_start = a.col_start(k);
290 auto col_end = a.col_end(k);
291
292 // for each non zero element of a
293 for (usize p = col_start; p < col_end; ++p) {
294 // get the row
295 auto i = util::zero_extend(pai[p]);
296 // skip if looking at lower triangular half
297 if (i >= k) {
298 continue;
299 }
300
301 // go up towards the root of the tree
302 usize node = i;
303 auto next = usize(-1);
304 while (true) {
305 if (node == usize(-1) || node >= k) {
306 break;
307 }
308
309 // use the highest known ancestor instead of the parent
310 next = util::sign_extend(pancestors[node]);
311
312 // set the highest known ancestor to k, since we know we're going
313 // to find it eventually
314 pancestors[node] = I(k);
315
316 // if there is no highest known ancestor, we must have hit the root of
317 // the tree
318 // set the parent to k
319 if (next == usize(-1)) {
320 parent[node] = I(k);
321 break;
322 }
323 // go to the highest ancestor
324 node = next;
325 }
326 }
327 }
328}
329
330namespace _detail {
331inline auto
333{
334 return { (k + 1) * isize{ sizeof(bool) }, alignof(bool) };
335}
336
337// compute the set of reachable nodes from the non zero pattern of a_{.,k}
338// not including the node k itself
339template<typename I>
342 I* s,
344 I const* parent,
345 isize k,
346 bool* pmarked) noexcept -> I*
347{
348
349 usize n = usize(a.ncols());
350 auto k_ = usize(k);
351
352 auto pai = a.row_indices();
353 auto pparent = parent;
354
355 auto col_start = a.col_start(k_);
356 auto col_end = a.col_end(k_);
357
358 pmarked[k_] = true;
359
360 usize top = n;
361
362 // for each non zero element of a_{.,k}
363 for (usize p = col_start; p < col_end; ++p) {
364 auto i = util::zero_extend(pai[p]);
365
366 // only triu part of a
367 if (i > k_) {
368 continue;
369 }
370
371 usize len = 0;
372 while (true) {
373 // if we reach a marked node
374 if (pmarked[i]) {
375 break;
376 }
377
378 // can't overwrite top of the stack since elements of s are unique
379 // and s is large enough to hold all the nodes
380 s[isize(len)] = I(i);
381 util::wrapping_inc(mut(len));
382
383 // mark node i as reached
384 pmarked[i] = true;
385 i = util::sign_extend(pparent[i]);
386 }
387
388 // make sure that we can memmove
389 std::memmove( //
390 s + (top - len),
391 s,
392 usize(len) * sizeof(I));
393
394 // move down the top of the stack
395 top = util::wrapping_plus(top, -len);
396 }
397
398 for (usize q = top; q < n; ++q) {
399 pmarked[s[q]] = false;
400 }
401 pmarked[k_] = false;
402
403 // [top, end[
404 count = n - top;
405 return s + top;
406}
407} // namespace _detail
408
409namespace _detail {
410// return the next start_index
411template<typename I>
412VEG_INLINE auto
414 I* post,
415 usize root,
416 usize start_index,
417 I* pstack,
418 I* pfirst_child,
419 I* pnext_child) noexcept -> usize
420{
421 using namespace _detail;
422
423 usize top = 0;
424 pstack[0] = I(root);
425
426 // stack is non empty
427 while (top != usize(-1)) {
428 auto current_node = util::zero_extend(pstack[top]);
429 auto current_child = util::sign_extend(pfirst_child[current_node]);
430
431 // no more children
432 if (current_child == usize(-1)) {
434 ++start_index;
435
436 // pop node from the stack
437 util::wrapping_dec(mut(top));
438 } else {
439 // add current child to the stack
440 util::wrapping_inc(mut(top));
442
443 // next child is now the first child
445 }
446 }
447 return start_index;
448}
449} // namespace _detail
450
457template<typename I>
458auto
461{
462 return { (3 * n) * isize(sizeof(I)), alignof(I) };
463}
464
473template<typename I>
474void
475postorder(I* post, I const* parent, isize n, DynStackMut stack) noexcept
476{
477 using namespace _detail;
478
479 auto _work = stack.make_new_for_overwrite(proxsuite::linalg::veg::Tag<I>{},
480 3 * isize(n));
481 I* pwork = _work.ptr_mut();
482
483 I* pstack = pwork;
484 I* pfirst_child = pstack + n;
485 I* pnext_child = pfirst_child + n;
486
487 // no children are found yet
488 for (usize j = 0; j < usize(n); ++j) {
489 pfirst_child[j] = I(-1);
490 }
491
492 for (usize _j = 0; _j < usize(n); ++_j) {
493 // traverse in reverse order, since the children appear in reverse order
494 // of insertion in the linked list
495 usize j = usize(n) - 1 - _j;
496
497 // if not a root node
498 if (parent[isize(j)] != I(-1)) {
499 // next child of this node is the previous first child
500 pnext_child[j] = pfirst_child[util::zero_extend(parent[isize(j)])];
501 // set this node to be the new first child
502 pfirst_child[util::zero_extend(parent[isize(j)])] = I(j);
503 }
504 }
505
506 usize start_index = 0;
507 for (usize root = 0; root < usize(n); ++root) {
508 if (parent[isize(root)] == I(-1)) {
511 }
512 }
513}
514
515namespace _detail {
516// returns -2 if j is not a leaf
517// returns -1 if j is a first leaf
518// returns the least common ancestor of j and the previous j otherwise
519template<typename I>
520VEG_INLINE auto
522 usize j,
523 I const* pfirst,
524 I* pmax_first,
525 I* pprev_leaf,
526 I* pancestor) noexcept -> I
527{
528 using namespace _detail;
529
530 // if upper triangular part, or not a leaf
531 // leaves always have a new larger value of pfirst
532 // add 1 to get the correct result when comparing with -1
533 if (i <= j || util::wrapping_plus(pfirst[j], I(1)) <=
534 util::wrapping_plus(pmax_first[i], I(1))) {
535 return I(-2);
536 }
537
538 // update the largest max_first
539 // no need to compare because the value of j increases
540 // inbetween successive calls, and so does first_j
541 pmax_first[i] = pfirst[j];
542
543 // get the previous j
544 usize j_prev = util::sign_extend(pprev_leaf[i]);
545
546 // set the previous j to the current j
547 pprev_leaf[i] = I(j);
548
549 // if first leaf
550 if (j_prev == usize(-1)) {
551 return I(-1);
552 }
553
554 // else, subsequent leaf
555 // get the least common ancestor of j and j_prev
556 usize lca = j_prev;
557 while (true) {
558 if (lca == util::zero_extend(pancestor[lca])) {
559 break;
560 }
561 lca = util::zero_extend(pancestor[lca]);
562 }
563
564 // compress the path to speed up the subsequent calls
565 // to this function
566 usize node = j_prev;
567 while (true) {
568 if (node == lca) {
569 break;
570 }
571 usize next = util::zero_extend(pancestor[node]);
572 pancestor[node] = I(lca);
573 node = next;
574 }
575
576 return I(lca);
577}
578} // namespace _detail
579
580template<typename I>
581auto
583 isize n,
584 isize nnz) noexcept
586{
588 return StackReq{
589 isize{ sizeof(I) } * (1 + 5 * n + nnz),
590 alignof(I),
592}
593
594template<typename I>
595void
598 I const* parent,
599 I const* post,
600 DynStackMut stack) noexcept
601{
602 // https://youtu.be/uZKJPTo4dZs
603 using namespace _detail;
604 usize n = usize(a.nrows());
605 auto _at_work = stack.make_new_for_overwrite(proxsuite::linalg::veg::Tag<I>{},
606 1 + 5 * isize(n) + a.nnz());
607 auto pat_work = _at_work.ptr_mut();
608 pat_work[0] = 0;
609 pat_work[n] = I(a.nnz());
610
612 from_raw_parts, isize(n), isize(n), a.nnz(),
613 pat_work, nullptr, pat_work + n + 1,
614 };
616
617 auto patp = at.col_ptrs();
618 auto pati = at.row_indices();
619
620 auto pwork = pat_work + n + 1 + a.nnz();
621
622 auto pdelta = counts;
623
624 auto pfirst = pwork;
625 auto pmax_first = pwork + n;
626 auto pprev_leaf = pwork + 2 * n;
627 auto pancestor = pwork + 3 * n;
628
629 auto pcounts = counts;
630 auto ppost = post;
631 auto pparent = parent;
632
633 for (usize i = 0; i < 3 * n; ++i) {
634 pwork[i] = I(-1);
635 }
636 for (usize i = 0; i < n; ++i) {
637 pancestor[i] = I(i);
638 }
639
640 // for each column in a
641 for (usize k = 0; k < n; ++k) {
642 // in postordered fashion
643 auto j = util::zero_extend(ppost[k]);
644
645 // if first_j isn't computed, j must be a leaf
646 // because if it's not a leaf then first_j will be initialized from
647 // the init loop of its first descendant
648 //
649 // in which case initialize delta_j to 1
650 pdelta[j] = (pfirst[j] == I(-1)) ? I(1) : I(0);
651
652 // init loop
653 while (true) {
654 // while j is not a root, and the first descendant of j isn't computed
655 // set the first descendant of j to k, as well as all of its ancestors
656 // that don't yet have a first descendant
657 if (j == usize(-1) || pfirst[j] != I(-1)) {
658 break;
659 }
660 pfirst[j] = I(k);
661 j = util::sign_extend(pparent[j]);
662 }
663 }
664
665 // for each node
666 for (usize k = 0; k < n; ++k) {
667 // in postordered fashion
668 auto j = util::zero_extend(ppost[k]);
669
670 // if this node is the child of some other node
671 if (pparent[j] != I(-1)) {
672 // decrement the delta of that node
673 // corresponding to the correction term e_j
674 util::wrapping_dec(mut(pdelta[util::zero_extend(pparent[j])]));
675 }
676
677 auto col_start = util::zero_extend(patp[j]);
678 auto col_end = util::zero_extend(patp[j + 1]);
679
680 // iterate over lower triangular half of a
681 for (usize p = col_start; p < col_end; ++p) {
682 auto i = util::zero_extend(pati[p]);
684 i,
685 j,
686 pfirst,
689 pancestor);
690
691 // if j is a leaf of T^i
692 if (lca != I(-2)) {
693 util::wrapping_inc(mut(pdelta[j]));
694
695 // if j is a subsequent leaf
696 if (lca != I(-1)) {
697 util::wrapping_dec(mut(pdelta[util::zero_extend(lca)]));
698 }
699 }
700 }
701
702 if (pparent[j] != -1) {
703 // set the ancestor of j
704 pancestor[j] = pparent[j];
705 }
706 }
707
708 // sum up the deltas
709 for (usize j = 0; j < n; ++j) {
710 if (parent[isize(j)] != I(-1)) {
711 pcounts[util::zero_extend(parent[isize(j)])] = util::wrapping_plus(
712 pcounts[util::zero_extend(parent[isize(j)])], pcounts[j]);
713 }
714 }
715}
716
717template<typename I>
718auto
720 isize /*n*/,
721 isize nnz) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
722{
723 return { nnz * isize{ sizeof(char) }, alignof(char) };
724}
725
726template<typename I>
727void
728amd(I* perm, SymbolicMatRef<I> mat, DynStackMut stack) noexcept
729{
730 // TODO: reimplement amd under BSD-3
731 // https://github.com/DrTimothyAldenDavis/SuiteSparse/tree/master/AMD
732
733 isize n = mat.nrows();
734 isize nnz = mat.nnz();
735
736 Eigen::PermutationMatrix<-1, -1, I> perm_eigen;
737 auto _ = stack.make_new(proxsuite::linalg::veg::Tag<char>{}, nnz);
738
739 Eigen::AMDOrdering<I>{}(
740 Eigen::Map<Eigen::SparseMatrix<char, Eigen::ColMajor, I> const>{
741 n,
742 n,
743 nnz,
744 mat.col_ptrs(),
745 mat.row_indices(),
746 _.ptr(),
747 mat.nnz_per_col(),
748 }
750
751 perm_eigen);
752 std::memmove( //
753 perm,
754 perm_eigen.indices().data(),
755 usize(n) * sizeof(I));
756}
757
758namespace _detail {
759template<typename I>
760void
761inv_perm(I* perm_inv, I const* perm, isize n) noexcept
762{
763 for (usize i = 0; i < usize(n); ++i) {
764 perm_inv[util::zero_extend(perm[i])] = I(i);
765 }
766}
767
768template<typename I>
769auto
771 isize n) noexcept
773{
774 return { n * isize{ sizeof(I) }, alignof(I) };
775}
776template<typename I>
777auto
780{
781 return { n * isize{ sizeof(I) }, alignof(I) };
782}
783
784template<typename I>
785void
787 I const* pperm_inv,
789 I* pnew_ap,
790 I* pcol_counts)
791{
792 for (usize old_j = 0; old_j < n; ++old_j) {
793 usize new_j = util::zero_extend(pperm_inv[old_j]);
794
795 auto col_start = old_a.col_start(old_j);
796 auto col_end = old_a.col_end(old_j);
797
798 for (usize p = col_start; p < col_end; ++p) {
799 usize old_i = util::zero_extend(old_a.row_indices()[p]);
800
801 if (old_i <= old_j) {
802 usize new_i = util::zero_extend(pperm_inv[old_i]);
803 util::wrapping_inc(mut(pcol_counts[new_i > new_j ? new_i : new_j]));
804 }
805 }
806 }
807
808 pnew_ap[0] = I(0);
809 for (usize i = 0; i < n; ++i) {
810 pnew_ap[i + 1] =
811 util::checked_non_negative_plus(pnew_ap[i], pcol_counts[i]);
813 }
814}
815
816template<typename I>
817void
820 I const* perm_inv,
821 DynStackMut stack) noexcept
822{
823
824 usize n = usize(new_a.nrows());
825
826 auto _work = stack.make_new(proxsuite::linalg::veg::Tag<I>{}, isize(n));
827 I* pcol_counts = _work.ptr_mut();
828
829 VEG_ASSERT(new_a.is_compressed());
830 auto pold_ai = old_a.row_indices();
831
832 auto pnew_ap = new_a.col_ptrs_mut();
833 auto pnew_ai = new_a.row_indices_mut();
834
835 auto pperm_inv = perm_inv;
836
838
840
841 for (usize old_j = 0; old_j < n; ++old_j) {
842 usize new_j = util::zero_extend(pperm_inv[old_j]);
843
844 auto col_start = old_a.col_start(old_j);
845 auto col_end = old_a.col_end(old_j);
846
847 for (usize p = col_start; p < col_end; ++p) {
848 usize old_i = util::zero_extend(pold_ai[p]);
849
850 if (old_i <= old_j) {
851 usize new_i = util::zero_extend(pperm_inv[old_i]);
852
853 usize new_max = new_i > new_j ? new_i : new_j;
854 usize new_min = new_i < new_j ? new_i : new_j;
855
857 pnew_ai[row_idx] = I(new_min);
858 pcurrent_row_index[new_max] = util::wrapping_plus(row_idx, I(1));
859 }
860 }
861 }
862}
863
864template<typename T, typename I>
865void
868 I const* perm_inv,
870{
871 usize n = usize(new_a.nrows());
872 auto _work = stack.make_new(proxsuite::linalg::veg::Tag<I>{}, isize(n));
873 I* pcol_counts = _work.ptr_mut();
874
875 VEG_ASSERT(new_a.is_compressed());
876 auto pold_ai = old_a.row_indices();
877
878 auto pnew_ap = new_a.col_ptrs_mut();
879 auto pnew_ai = new_a.row_indices_mut();
880
881 auto pperm_inv = perm_inv;
882
884 n, pperm_inv, old_a.symbolic(), pnew_ap, pcol_counts);
885
887
888 auto pold_ax = old_a.values();
889 auto pnew_ax = new_a.values_mut();
890 for (usize old_j = 0; old_j < n; ++old_j) {
891 usize new_j = util::zero_extend(pperm_inv[old_j]);
892
893 auto col_start = old_a.col_start(old_j);
894 auto col_end = old_a.col_end(old_j);
895
896 for (usize p = col_start; p < col_end; ++p) {
897 usize old_i = util::zero_extend(pold_ai[p]);
898
899 if (old_i <= old_j) {
900 usize new_i = util::zero_extend(pperm_inv[old_i]);
901
902 usize new_max = new_i > new_j ? new_i : new_j;
903 usize new_min = new_i < new_j ? new_i : new_j;
904
906 pnew_ai[row_idx] = I(new_min);
907 pnew_ax[row_idx] = pold_ax[p];
908 pcurrent_row_index[new_max] = util::wrapping_plus(row_idx, I(1));
909 }
910 }
911 }
912}
913} // namespace _detail
914
915enum struct Ordering : unsigned char
916{
917 natural,
919 amd,
920 ENUM_END,
921};
922
931template<typename I>
932auto
934 isize n,
935 isize nnz,
936 Ordering o) noexcept
938{
940 constexpr isize sz{ sizeof(I) };
941 constexpr isize al{ alignof(I) };
942
943 StackReq perm_req{ 0, al };
944 StackReq amd_req{ 0, al };
945 switch (o) {
947 break;
948 case Ordering::amd:
949 amd_req =
950 StackReq{ n * sz, al } & StackReq{ sparse::amd_req(tag, n, nnz) };
953 perm_req = perm_req & StackReq{ (n + 1 + nnz) * sz, al };
955 default:
956 break;
957 }
958
959 StackReq parent_req = { n * sz, al };
960 StackReq post_req = { n * sz, al };
961
962 StackReq etree_req = sparse::etree_req(tag, n);
963 StackReq postorder_req = sparse::postorder_req(tag, n);
964 StackReq colcount_req = sparse::column_counts_req(tag, n, nnz);
965
966 return amd_req //
967 | (perm_req //
968 & (parent_req //
969 & (etree_req //
970 | (post_req //
971 & (postorder_req | colcount_req)))));
972}
973
985template<typename I>
986void
988 I* etree,
989 I* perm_inv,
990 I const* perm,
992 DynStackMut stack) noexcept
993{
994
995 bool id_perm = perm_inv == nullptr;
996 bool user_perm = perm != nullptr;
997
1000 : Ordering::amd;
1001
1003
1004 usize n = usize(a.ncols());
1005
1006 switch (o) {
1007 case Ordering::natural:
1008 break;
1009
1010 case Ordering::amd: {
1011 auto amd_perm = stack.make_new_for_overwrite(tag, isize(n));
1012 sparse::amd(amd_perm.ptr_mut(), a, stack);
1013 perm = amd_perm.ptr();
1014 }
1017 _detail::inv_perm(perm_inv, perm, isize(n));
1018 }
1019 default:
1020 break;
1021 }
1022
1024 stack //
1025 .make_new_for_overwrite(tag, id_perm ? 0 : (a.ncols() + 1));
1027 stack //
1028 .make_new_for_overwrite(tag, id_perm ? 0 : (a.nnz()));
1029
1030 if (!id_perm) {
1031 _permuted_a_col_ptrs.as_mut()[0] = 0;
1032 _permuted_a_col_ptrs.as_mut()[isize(n)] = I(a.nnz());
1034 from_raw_parts,
1035 isize(n),
1036 isize(n),
1037 a.nnz(),
1038 _permuted_a_col_ptrs.ptr_mut(),
1039 nullptr,
1040 _permuted_a_row_indices.ptr_mut(),
1041 };
1043 }
1044
1047 from_raw_parts,
1048 isize(n),
1049 isize(n),
1050 a.nnz(),
1052 nullptr,
1054 };
1055
1057
1058 auto _post = stack.make_new_for_overwrite(tag, isize(n));
1059 sparse::postorder(_post.ptr_mut(), etree, isize(n), stack);
1060 sparse::column_counts(nnz_per_col, permuted_a, etree, _post.ptr(), stack);
1061}
1062
1074template<typename I>
1075void
1077 I* etree,
1078 I* perm_inv,
1079 I const* perm,
1081 DynStackMut stack) noexcept
1082{
1083
1085 col_ptrs + 1,
1086 etree,
1087 perm_inv,
1088 perm,
1089 a,
1090 stack);
1091
1092 usize n = usize(a.ncols());
1093 auto pcol_ptrs = col_ptrs;
1094 pcol_ptrs[0] = I(0);
1095 for (usize i = 0; i < n; ++i) {
1096 pcol_ptrs[i + 1] =
1097 util::checked_non_negative_plus(pcol_ptrs[i + 1], pcol_ptrs[i]);
1098 }
1099}
1100
1109template<typename T, typename I>
1110auto
1113 isize n,
1114 isize a_nnz,
1115 Ordering o) noexcept
1117{
1119
1120 constexpr isize sz{ sizeof(I) };
1121 constexpr isize al{ alignof(I) };
1122
1123 constexpr isize tsz{ sizeof(T) };
1124 constexpr isize tal{ alignof(T) };
1125
1126 bool id_perm = o == Ordering::natural;
1127
1128 auto symb_perm_req = StackReq{ sz * (id_perm ? 0 : (n + 1 + a_nnz)), al };
1129 auto num_perm_req = StackReq{ tsz * (id_perm ? 0 : a_nnz), tal };
1130 return num_perm_req //
1131 & (StackReq{ tsz * n, tal } //
1132 & (symb_perm_req //
1133 & (StackReq{ 2 * n * sz, al } //
1134 & (StackReq{ n * tsz, tal } //
1135 & StackReq{ n * isize{ sizeof(bool) }, alignof(bool) }))));
1136}
1137
1157template<typename T, typename I>
1158void
1160 T* values,
1161 I* row_indices,
1164 I const* col_ptrs,
1165 I const* etree,
1166 I const* perm_inv,
1167 MatRef<T, I> a,
1168 DynStackMut stack) noexcept(false)
1169{
1170 using namespace _detail;
1171 isize n = a.nrows();
1172
1173 bool id_perm = perm_inv == nullptr;
1174
1176
1177 auto _permuted_a_values = stack.make_new_for_overwrite(
1178 proxsuite::linalg::veg::Tag<T>{}, id_perm ? 0 : a.nnz());
1179
1180 auto _x = stack.make_new_for_overwrite(proxsuite::linalg::veg::Tag<T>{}, n);
1181
1183 stack.make_new_for_overwrite(tag, id_perm ? 0 : (a.ncols() + 1));
1185 stack.make_new_for_overwrite(tag, id_perm ? 0 : a.nnz());
1186
1187 if (!id_perm) {
1188 _permuted_a_col_ptrs.as_mut()[0] = 0;
1189 _permuted_a_col_ptrs.as_mut()[n] = I(a.nnz());
1191 from_raw_parts,
1192 n,
1193 n,
1194 a.nnz(),
1195 _permuted_a_col_ptrs.ptr_mut(),
1196 nullptr,
1197 _permuted_a_row_indices.ptr_mut(),
1198 _permuted_a_values.ptr_mut(),
1199 };
1200 _detail::symmetric_permute(permuted_a, a, perm_inv, stack);
1201 }
1202
1204 : MatRef<T, I>{
1205 from_raw_parts,
1206 isize(n),
1207 isize(n),
1208 a.nnz(),
1210 nullptr,
1212 _permuted_a_values.ptr(),
1213 };
1214
1215 auto _current_row_index = stack.make_new_for_overwrite(tag, n);
1216 auto _ereach_stack_storage = stack.make_new_for_overwrite(tag, n);
1217
1219 T* px = _x.ptr_mut();
1220
1221 std::memcpy( //
1223 col_ptrs,
1224 usize(n) * sizeof(I));
1225 for (usize i = 0; i < usize(n); ++i) {
1226 px[i] = 0;
1227 }
1228
1229 // compute the iter-th row of L using the iter-th column of permuted_a
1230 // the diagonal element is filled with the diagonal of D instead of 1
1231 I const* plp = col_ptrs;
1232
1233 auto _marked = stack.make_new(proxsuite::linalg::veg::Tag<bool>{}, n);
1234 for (usize iter = 0; iter < usize(n); ++iter) {
1235 usize ereach_count = 0;
1237 _ereach_stack_storage.ptr_mut(),
1238 permuted_a.symbolic(),
1239 etree,
1240 isize(iter),
1241 _marked.ptr_mut());
1242
1244
1245 I const* pai = permuted_a.row_indices();
1246 T const* pax = permuted_a.values();
1247
1248 I* pli = row_indices;
1249 T* plx = values;
1250
1251 {
1252 auto col_start = permuted_a.col_start(iter);
1253 auto col_end = permuted_a.col_end(iter);
1254
1255 // scatter permuted_a column into x
1256 // untouched columns are already zeroed
1257
1258 for (usize p = col_start; p < col_end; ++p) {
1259 auto i = util::zero_extend(pai[p]);
1260 px[i] = pax[p];
1261 }
1262 }
1263 T d = px[iter] + ((diag_to_add == nullptr || perm == nullptr)
1264 ? T(0)
1265 : diag_to_add[util::zero_extend(perm[iter])]);
1266
1267 // zero for next iteration
1268 px[iter] = 0;
1269
1270 for (usize q = 0; q < ereach_count; ++q) {
1271 usize j = util::zero_extend(pereach_stack[q]);
1272 auto col_start = util::zero_extend(plp[j]);
1273 auto row_idx = util::zero_extend(pcurrent_row_index[j]) + 1;
1274
1275 T const xj = px[j];
1276 T const dj = plx[col_start];
1277 T const lkj = xj / dj;
1278
1279 // zero for the next iteration
1280 px[j] = 0;
1281
1282 // skip first element, to put diagonal there later
1283 for (usize p = col_start + 1; p < row_idx; ++p) {
1284 auto i = util::zero_extend(pli[p]);
1285 px[i] -= plx[p] * xj;
1286 }
1287
1288 d -= lkj * xj;
1289
1290 pli[row_idx] = I(iter);
1291 plx[row_idx] = lkj;
1293 }
1294 {
1295 auto col_start = util::zero_extend(plp[iter]);
1296 pli[col_start] = I(iter);
1297 plx[col_start] = d;
1298 }
1299 }
1300}
1301} // namespace sparse
1302} // namespace linalg
1303} // namespace proxsuite
1304#endif /* end of include guard PROXSUITE_LINALG_SPARSE_LDLT_FACTORIZE_HPP */
#define VEG_ASSERT_ALL_OF(...)
#define VEG_ASSERT(...)
#define HEDLEY_FALL_THROUGH
#define VEG_CONCEPT(...)
Definition macros.hpp:1243
#define VEG_INLINE
Definition macros.hpp:118
VEG_INLINE auto postorder_depth_first_search(I *post, usize root, usize start_index, I *pstack, I *pfirst_child, I *pnext_child) noexcept -> usize
void inv_perm(I *perm_inv, I const *perm, isize n) noexcept
void symmetric_permute(MatMut< T, I > new_a, MatRef< T, I > old_a, I const *perm_inv, DynStackMut stack) noexcept(VEG_CONCEPT(nothrow_copyable< T >))
void symmetric_permute_common(usize n, I const *pperm_inv, SymbolicMatRef< I > old_a, I *pnew_ap, I *pcol_counts)
VEG_INLINE auto least_common_ancestor(usize i, usize j, I const *pfirst, I *pmax_first, I *pprev_leaf, I *pancestor) noexcept -> I
auto symmetric_permute_req(proxsuite::linalg::veg::Tag< I >, isize n) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
VEG_NODISCARD VEG_INLINE auto ereach(usize &count, I *s, SymbolicMatRef< I > a, I const *parent, isize k, bool *pmarked) noexcept -> I *
auto ereach_req(isize k) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
void symmetric_permute_symbolic(SymbolicMatMut< I > new_a, SymbolicMatRef< I > old_a, I const *perm_inv, DynStackMut stack) noexcept
auto symmetric_permute_symbolic_req(proxsuite::linalg::veg::Tag< I >, isize n) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
void factorize_symbolic_non_zeros(I *nnz_per_col, I *etree, I *perm_inv, I const *perm, SymbolicMatRef< I > a, DynStackMut stack) noexcept
void factorize_numeric(T *values, I *row_indices, proxsuite::linalg::veg::DoNotDeduce< T const * > diag_to_add, proxsuite::linalg::veg::DoNotDeduce< I const * > perm, I const *col_ptrs, I const *etree, I const *perm_inv, MatRef< T, I > a, DynStackMut stack) noexcept(false)
auto amd_req(proxsuite::linalg::veg::Tag< I >, isize, isize nnz) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
auto postorder_req(proxsuite::linalg::veg::Tag< I >, isize n) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
void postorder(I *post, I const *parent, isize n, DynStackMut stack) noexcept
auto transpose_req(proxsuite::linalg::veg::Tag< I >, isize nrows) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
Definition factorize.hpp:17
void transpose(MatMut< T, I > at, MatRef< T, I > a, DynStackMut stack) noexcept(VEG_CONCEPT(nothrow_copyable< T >))
Definition factorize.hpp:26
VEG_INLINE void etree(I *parent, SymbolicMatRef< I > a, DynStackMut stack) noexcept
auto factorize_symbolic_req(proxsuite::linalg::veg::Tag< I > tag, isize n, isize nnz, Ordering o) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
void factorize_symbolic_col_counts(I *col_ptrs, I *etree, I *perm_inv, I const *perm, SymbolicMatRef< I > a, DynStackMut stack) noexcept
void transpose_symbolic(SymbolicMatMut< I > at, SymbolicMatRef< I > a, DynStackMut stack) noexcept
Definition factorize.hpp:95
auto transpose_symbolic_req(proxsuite::linalg::veg::Tag< I >, isize nrows) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
Definition factorize.hpp:86
auto factorize_numeric_req(proxsuite::linalg::veg::Tag< T >, proxsuite::linalg::veg::Tag< I >, isize n, isize a_nnz, Ordering o) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
void dense_ltsolve(DenseVecMut< T > x, MatRef< T, I > l) noexcept(false)
void column_counts(I *counts, SymbolicMatRef< I > a, I const *parent, I const *post, DynStackMut stack) noexcept
auto etree_req(proxsuite::linalg::veg::Tag< I >, isize n) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
void dense_lsolve(DenseVecMut< T > x, MatRef< T, I > l) noexcept(false)
auto column_counts_req(proxsuite::linalg::veg::Tag< I > tag, isize n, isize nnz) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
#define VEG_NODISCARD
Definition prologue.hpp:97