5#ifndef PROXSUITE_LINALG_DENSE_LDLT_UPDATE_HPP
6#define PROXSUITE_LINALG_DENSE_LDLT_UPDATE_HPP
17 using UPtr = std::uintptr_t;
19 UPtr mask =
align - 1;
20 UPtr iptr = UPtr(ptr);
21 UPtr aligned_ptr = iptr & ~mask;
22 return isize(aligned_ptr - iptr);
27 using UPtr = std::uintptr_t;
29 UPtr mask =
align - 1;
30 UPtr iptr = UPtr(ptr);
31 UPtr aligned_ptr = (iptr + mask) & ~mask;
32 return isize(aligned_ptr - iptr);
35template<usize... Is,
typename Fn>
43template<usize N,
typename Fn>
51template<
typename T, usize N>
64template<
typename T, usize N>
79template<
typename T, usize N>
92template<usize R,
typename T, usize N>
108 p_in_l.store_unaligned(inout_l);
111template<
bool VECTORIZABLE>
114template<
typename T, usize N>
131 template<usize R,
typename T>
137 T
const* mu)
noexcept
145 auto inout_l_finish = inout_l + n;
146 while (inout_l < inout_l_finish) {
147 _detail::rank_r_update_inner_loop_iter<R>(
148 p_p, p_mu, inout_l, pw, w_stride);
158 template<usize R,
typename T>
164 T
const* mu)
noexcept
171 constexpr usize N = Info::N;
172 auto inout_l_vectorized_end = inout_l + usize(n) / N * N;
173 auto inout_l_end = inout_l + usize(n);
182 while (inout_l < inout_l_vectorized_end) {
183 _detail::rank_r_update_inner_loop_iter<R>(
184 p_p, p_mu, inout_l, pw, w_stride);
196 while (inout_l < inout_l_end) {
197 _detail::rank_r_update_inner_loop_iter<R>(
198 p_p, p_mu, inout_l, pw, w_stride);
206template<usize R,
typename T>
216 n, inout_l, pw, w_stride, p, mu);
219template<
typename LD,
typename T,
typename Fn>
228 static_assert(LD::InnerStrideAtCompileTime == 1,
".");
229 static_assert(!bool(LD::IsRowMajor),
".");
233 for (isize j = 0; j < n; ++j) {
242 isize r_chunk = min2(isize(4), r - r_done);
248 for (isize k = 0; k < r_chunk; ++k) {
249 auto& p = (+p_array)[k];
250 auto& mu = (+mu_array)[k];
251 auto& alpha = palpha[r_done + k];
253 p = pw[(r_done + k) * w_stride];
254 T new_dj = dj + (alpha * p) * p;
255 mu = (alpha * p) / new_dj;
256 alpha -= new_dj * (mu * mu);
262 isize rem = n - j - 1;
264 using FnType = void (*)(isize, T*, T*, isize, T
const*, T
const*);
265 FnType fn_table[] = {
266 rank_r_update_inner_loop<1, T>,
267 rank_r_update_inner_loop<2, T>,
268 rank_r_update_inner_loop<3, T>,
269 rank_r_update_inner_loop<4, T>,
272 (*fn_table[r_chunk - 1])(
275 pw + 1 + r_done * w_stride,
297 typename T =
typename proxsuite::linalg::veg::uncvref_t<LD>::Scalar>
307 proxsuite::linalg::veg::mem::addressof(alpha),
314 typename T =
typename proxsuite::linalg::veg::uncvref_t<LD>::Scalar>
#define VEG_EVAL_ALL(...)
typename NativePackInfo< T >::Type NativePack
VEG_INLINE void unroll_impl(proxsuite::linalg::veg::meta::index_sequence< Is... >, Fn fn)
auto bytes_to_prev_aligned(void *ptr, usize align) noexcept -> isize
auto bytes_to_next_aligned(void *ptr, usize align) noexcept -> isize
VEG_INLINE void rank_r_update_inner_loop_iter(_simd::Pack< T, N > const *p_p, _simd::Pack< T, N > const *p_mu, T *inout_l, T *pw, isize w_stride)
void rank_r_update_clobber_w_impl(LD ld, T *pw, isize w_stride, T *palpha, Fn r_fn)
VEG_INLINE void rank_r_update_inner_loop(isize n, T *inout_l, T *pw, isize w_stride, T const *p, T const *mu)
VEG_INLINE void unroll(Fn fn)
auto align() noexcept -> isize
auto matrix_elem_addr(Mat &&mat, isize row, isize col) noexcept -> decltype(mat.data())
auto to_view_dyn(Mat &&mat) noexcept -> Eigen::Map< _detail::const_if< _detail::ptr_is_const< decltype(mat.data())>::value, _detail::OwnedMatrix< proxsuite::linalg::veg::uncvref_t< Mat > > >, Eigen::Unaligned, _detail::StrideOf< proxsuite::linalg::veg::uncvref_t< Mat > > >
void rank_r_update_clobber_inputs(LD &&ld, W &&w, A &&alpha)
void rank_1_update_clobber_w(LD &&ld, W &&w, proxsuite::linalg::veg::DoNotDeduce< T > alpha)
VEG_INLINE auto operator()() const noexcept -> isize
static VEG_INLINE void fn(isize n, T *inout_l, T *pw, isize w_stride, T const *p, T const *mu) noexcept
static VEG_INLINE void fn(isize n, T *inout_l, T *pw, isize w_stride, T const *p, T const *mu) noexcept
VEG_INLINE void operator()(usize i) const
_simd::Pack< T, N > * p_p
_simd::Pack< T, N > * p_mu
_simd::Pack< T, N > * p_wr
VEG_INLINE void operator()(usize i) const
_simd::Pack< T, N > const * p_wr
VEG_INLINE void operator()(usize i) const
VEG_INLINE void operator()(usize i) const
_simd::Pack< T, N > const * p_p
_simd::Pack< T, N > const * p_mu
_simd::Pack< T, N > & p_in_l
_simd::Pack< T, N > * p_wr