5#ifndef PROXSUITE_LINALG_DENSE_LDLT_CORE_HPP
6#define PROXSUITE_LINALG_DENSE_LDLT_CORE_HPP
12#if !(defined(__aarch64__) || defined(__PPC64__) || defined(__ppc64__) || \
17#ifdef PROXSUITE_VECTORIZE
19#include <simde/x86/avx2.h>
20#include <simde/x86/fma.h>
25#define LDLT_ID(id) __VEG_PP_CAT(id, __LINE__)
27#define __LDLT_TEMP_VEC_IMPL(Type, Name, Rows, Stack, Make) \
28 auto LDLT_ID(vec_storage) = (Stack).Make( \
29 ::proxsuite::linalg::veg::Tag<__VEG_PP_REMOVE_PAREN(Type)>{}, \
31 ::proxsuite::linalg::dense::_detail::align<__VEG_PP_REMOVE_PAREN( \
33 auto Name = ::Eigen::Map< \
34 ::Eigen::Matrix<__VEG_PP_REMOVE_PAREN(Type), ::Eigen::Dynamic, 1>, \
36 ::Eigen::Stride<::Eigen::Dynamic, 1>>{ \
37 LDLT_ID(vec_storage).ptr_mut(), \
38 LDLT_ID(vec_storage).len(), \
39 ::Eigen::Stride<::Eigen::Dynamic, 1>{ \
40 LDLT_ID(vec_storage).len(), \
44 static_assert(true, ".")
46#define __LDLT_TEMP_MAT_IMPL(Type, Name, Rows, Cols, Stack, Make) \
47 ::proxsuite::linalg::veg::isize LDLT_ID(rows) = (Rows); \
48 ::proxsuite::linalg::veg::isize LDLT_ID(cols) = (Cols); \
49 ::proxsuite::linalg::veg::isize LDLT_ID(stride) = \
50 ::proxsuite::linalg::dense::_detail::adjusted_stride< \
51 __VEG_PP_REMOVE_PAREN(Type)>(LDLT_ID(rows)); \
52 auto LDLT_ID(vec_storage) = (Stack).Make( \
53 ::proxsuite::linalg::veg::Tag<__VEG_PP_REMOVE_PAREN(Type)>{}, \
54 LDLT_ID(stride) * LDLT_ID(cols), \
55 ::proxsuite::linalg::dense::_detail::align<__VEG_PP_REMOVE_PAREN( \
58 ::Eigen::Map<::Eigen::Matrix<__VEG_PP_REMOVE_PAREN(Type), \
63 ::Eigen::Stride<::Eigen::Dynamic, 1>>{ \
64 LDLT_ID(vec_storage).ptr_mut(), \
67 ::Eigen::Stride<::Eigen::Dynamic, 1>{ \
72 static_assert(true, ".")
74#define LDLT_TEMP_VEC(Type, Name, Rows, Stack) \
75 __LDLT_TEMP_VEC_IMPL(Type, Name, Rows, Stack, make_new)
76#define LDLT_TEMP_VEC_UNINIT(Type, Name, Rows, Stack) \
77 __LDLT_TEMP_VEC_IMPL(Type, Name, Rows, Stack, make_new_for_overwrite)
79#define LDLT_TEMP_MAT(Type, Name, Rows, Cols, Stack) \
80 __LDLT_TEMP_MAT_IMPL(Type, Name, Rows, Cols, Stack, make_new)
81#define LDLT_TEMP_MAT_UNINIT(Type, Name, Rows, Cols, Stack) \
82 __LDLT_TEMP_MAT_IMPL(Type, Name, Rows, Cols, Stack, make_new_for_overwrite)
98#define DENSE_LDLT_FP_PRAGMA _Pragma("STDC FP_CONTRACT ON")
100#define DENSE_LDLT_FP_PRAGMA
103static_assert(
static_cast<unsigned char>(-1) == 255,
"char should have 8 bits");
104static_assert(
sizeof(
f32) == 4,
"f32 should be 32 bits");
105static_assert(
sizeof(
f64) == 8,
"f64 should be 64 bits");
107#define LDLT_FN_IMPL3(Fn, Prefix, Suffix) \
108 VEG_INLINE static auto Fn(Pack a, Pack b, Pack c) noexcept -> Pack \
110 return Pack{ simde_mm##Prefix##_##Fn##_##Suffix( \
111 a.inner, b.inner, c.inner) }; \
115#define LDLT_ARITHMETIC_IMPL(Prefix, Suffix) \
116 LDLT_FN_IMPL3(fmadd, Prefix, Suffix); \
117 LDLT_FN_IMPL3(fnmadd, Prefix, Suffix);
119#define LDLT_LOAD_STORE(Prefix, Suffix) \
120 VEG_INLINE static auto load_unaligned(ScalarType const* ptr) noexcept \
123 return Pack{ simde_mm##Prefix##_loadu_##Suffix(ptr) }; \
125 VEG_INLINE static auto broadcast(ScalarType value) noexcept -> Pack \
127 return Pack{ simde_mm##Prefix##_set1_##Suffix(value) }; \
129 VEG_INLINE void store_unaligned(ScalarType* ptr) const noexcept \
131 simde_mm##Prefix##_storeu_##Suffix(ptr, inner); \
135template<
typename T, usize N>
148 return { a.inner * b.inner + c.inner };
152 return fmadd({ -a.inner }, b, c);
168#ifdef PROXSUITE_VECTORIZE
172 using ScalarType =
f32;
182 using ScalarType =
f32;
193 using ScalarType =
f32;
196 VEG_INLINE static auto fmadd(Pack a, Pack b, Pack c)
noexcept -> Pack
199 return { _mm512_fmadd_ps(a.inner, b.inner, c.inner) };
201 VEG_INLINE static auto fnmadd(Pack a, Pack b, Pack c)
noexcept -> Pack
203 return { _mm512_fnmadd_ps(a.inner, b.inner, c.inner) };
205 VEG_INLINE static auto load_unaligned(ScalarType
const* ptr)
noexcept -> Pack
207 return { _mm512_loadu_ps(ptr) };
209 VEG_INLINE static auto broadcast(ScalarType value)
noexcept -> Pack
211 return { _mm512_set1_ps(value) };
213 VEG_INLINE void store_unaligned(ScalarType* ptr)
const noexcept
215 _mm512_storeu_ps(ptr, inner);
223 using ScalarType =
f64;
232 using ScalarType =
f64;
243 using ScalarType =
f64;
246 VEG_INLINE static auto fmadd(Pack a, Pack b, Pack c)
noexcept -> Pack
249 return { _mm512_fmadd_pd(a.inner, b.inner, c.inner) };
251 VEG_INLINE static auto fnmadd(Pack a, Pack b, Pack c)
noexcept -> Pack
253 return { _mm512_fnmadd_pd(a.inner, b.inner, c.inner) };
255 VEG_INLINE static auto load_unaligned(ScalarType
const* ptr)
noexcept -> Pack
257 return { _mm512_loadu_pd(ptr) };
259 VEG_INLINE static auto broadcast(ScalarType value)
noexcept -> Pack
261 return { _mm512_set1_pd(value) };
263 VEG_INLINE void store_unaligned(ScalarType* ptr)
const noexcept
265 _mm512_storeu_pd(ptr, inner);
279#ifdef PROXSUITE_VECTORIZE
283 static constexpr usize N = SIMDE_NATURAL_VECTOR_SIZE / 32;
287struct NativePackInfo<
f64>
289 static constexpr usize N = SIMDE_NATURAL_VECTOR_SIZE / 64;
290 using Type = Pack<f64, N>;
301template<
bool COND,
typename T>
312 return a + (b - 1) / b * b;
315#ifdef PROXSUITE_VECTORIZE
329#ifndef SIMDE_NATURAL_FLOAT_VECTOR_SIZE
330 isize simd_stride = 1;
333 (SIMDE_NATURAL_VECTOR_SIZE / CHAR_BIT) /
isize{
sizeof(T) };
362 return a > b ? a : b;
370 return (a < b) ? a : b;
381 for (
usize i = 0; i < n; ++i) {
388 Eigen::Matrix<
typename T::Scalar,
391 bool(T::IsRowMajor) ? Eigen::RowMajor : Eigen::ColMajor>;
395 Eigen::Matrix<
typename T::Scalar,
396 T::RowsAtCompileTime,
397 T::ColsAtCompileTime,
398 bool(T::IsRowMajor) ? Eigen::RowMajor : Eigen::ColMajor>;
402 Eigen::Matrix<
typename T::Scalar,
404 T::ColsAtCompileTime,
405 bool(T::IsRowMajor) ? Eigen::RowMajor : Eigen::ColMajor>;
409 Eigen::Matrix<
typename T::Scalar,
410 T::RowsAtCompileTime,
412 bool(T::IsRowMajor) ? Eigen::RowMajor : Eigen::ColMajor>;
427template<
bool ROWMAJOR>
434 static auto fn(T* ptr,
438 isize inner_stride)
noexcept -> T*
440 return ptr + (inner_stride * row + outer_stride * col);
447 static auto fn(T* ptr,
451 isize inner_stride)
noexcept -> T*
453 return ptr + (outer_stride * row + inner_stride * col);
457template<
bool COLMAJOR>
468 Eigen::InnerStride<uncvref_t<T>::InnerStrideAtCompileTime>>;
474 Eigen::InnerStride<uncvref_t<T>::OuterStrideAtCompileTime>>;
480 mat.data() + col_idx * mat.outerStride(),
483 Eigen::InnerStride<uncvref_t<T>::InnerStrideAtCompileTime>{
492 mat.data() + row_idx * mat.innerStride(),
495 Eigen::InnerStride<uncvref_t<T>::OuterStrideAtCompileTime>{
509 Eigen::InnerStride<uncvref_t<T>::OuterStrideAtCompileTime>>;
515 Eigen::InnerStride<uncvref_t<T>::InnerStrideAtCompileTime>>;
521 mat.data() + col_idx * mat.innerStride(),
524 Eigen::InnerStride<uncvref_t<T>::OuterStrideAtCompileTime>{
533 mat.data() + row_idx * mat.outerStride(),
536 Eigen::InnerStride<uncvref_t<T>::InnerStrideAtCompileTime>{
544 T::OuterStrideAtCompileTime,
545 T::InnerStrideAtCompileTime>;
549template<
bool COLMAJOR,
typename T>
555 isize inner_stride)
noexcept -> T*
558 ptr,
row,
col, outer_stride, inner_stride);
561template<
typename Mat>
564 ->
decltype(mat.data())
592template<
typename Mat>
594trans(Mat&& mat)
noexcept -> Eigen::Map<
617template<
typename Mat>
627 Eigen::InnerStride<Eigen::Dynamic>>
630 mat.rows() == mat.cols());
634 Eigen::InnerStride<Eigen::Dynamic>{ mat.outerStride() + 1 } };
637template<
typename Mat>
643 isize ncols)
noexcept
652 mat.data(), row_start, col_start, mat.outerStride(), mat.innerStride()),
662template<
typename Mat>
681template<
typename Mat>
700template<
typename Mat>
719template<
typename Mat>
739template<
typename Mat>
749 mat.data(), row_start, 0, mat.outerStride(), mat.innerStride()),
759template<
typename Mat>
769 mat.data(), 0, col_start, mat.outerStride(), mat.innerStride()),
781template<
typename Dst,
typename Lhs,
typename Rhs,
typename T>
786 dst.cols() == rhs.cols(),
787 lhs.cols() == rhs.rows());
789 isize nrows = dst.rows();
790 isize ncols = dst.cols();
791 isize depth = lhs.cols();
793 if (nrows == 0 || ncols == 0 || depth == 0) {
797#if !EIGEN_VERSION_AT_LEAST(3, 3, 8)
798#define LAZY_PRODUCT(a, b) a.lazyProduct(b)
800#define LAZY_PRODUCT(a, b) a.operator*(b)
803#if !EIGEN_VERSION_AT_LEAST(3, 3, 8)
804 if ((dst.rows() < 20) && (dst.cols() < 20) && (rhs.rows() < 20)) {
809 Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor, 20, 20>;
811 Eigen::Map<Mat, Eigen::Unaligned, Eigen::OuterStride<Eigen::Dynamic>>;
814 MapMut(dst.data(), dst.rows(), dst.cols(), { dst.outerStride() });
815 dst_.noalias().operator+=(factor *
LAZY_PRODUCT(lhs, rhs));
819 dst.noalias().operator+=(factor *
LAZY_PRODUCT(lhs, rhs));
826template<
typename Dst,
typename Lhs,
typename Rhs,
typename T>
855 rows *
isize{
sizeof(T) },
#define VEG_DEBUG_ASSERT(...)
#define VEG_ASSERT_ALL_OF(...)
#define DENSE_LDLT_FP_PRAGMA
#define LAZY_PRODUCT(a, b)
#define LDLT_LOAD_STORE(Prefix, Suffix)
#define LDLT_ARITHMETIC_IMPL(Prefix, Suffix)
#define VEG_NIEBLOID(Name)
typename NativePackInfo< T >::Type NativePack
Eigen::Matrix< typename T::Scalar, Eigen::Dynamic, Eigen::Dynamic, bool(T::IsRowMajor) ? Eigen::RowMajor :Eigen::ColMajor > OwnedMatrix
constexpr auto round_up(T a, T b) noexcept -> T
void set_zero(T *dest, usize n)
void noalias_mul_add_impl(Dst dst, Lhs lhs, Rhs rhs, T factor)
auto adjusted_stride(isize n) noexcept -> isize
Eigen::Matrix< typename T::Scalar, Eigen::Dynamic, 1, Eigen::ColMajor > OwnedColVector
Eigen::Matrix< typename T::Scalar, T::RowsAtCompileTime, Eigen::Dynamic, bool(T::IsRowMajor) ? Eigen::RowMajor :Eigen::ColMajor > OwnedCols
Eigen::Matrix< typename T::Scalar, Eigen::Dynamic, T::ColsAtCompileTime, bool(T::IsRowMajor) ? Eigen::RowMajor :Eigen::ColMajor > OwnedRows
Eigen::Stride< T::OuterStrideAtCompileTime, T::InnerStrideAtCompileTime > StrideOf
proxsuite::linalg::veg::meta::if_t< COND, T const, T > const_if
Eigen::Matrix< typename T::Scalar, 1, Eigen::Dynamic, Eigen::RowMajor > OwnedRowVector
Eigen::Matrix< typename T::Scalar, T::RowsAtCompileTime, T::ColsAtCompileTime, bool(T::IsRowMajor) ? Eigen::RowMajor :Eigen::ColMajor > OwnedAll
proxsuite::linalg::veg::meta::bool_constant< false > should_vectorize
auto align() noexcept -> isize
auto matrix_elem_addr(Mat &&mat, isize row, isize col) noexcept -> decltype(mat.data())
auto trans(Mat &&mat) noexcept -> Eigen::Map< _detail::const_if< _detail::ptr_is_const< decltype(mat.data())>::value, Eigen::Matrix< typename proxsuite::linalg::veg::uncvref_t< Mat >::Scalar, proxsuite::linalg::veg::uncvref_t< Mat >::ColsAtCompileTime, proxsuite::linalg::veg::uncvref_t< Mat >::RowsAtCompileTime, bool(proxsuite::linalg::veg::uncvref_t< Mat >::IsRowMajor) ? Eigen::ColMajor :Eigen::RowMajor > >, Eigen::Unaligned, _detail::StrideOf< proxsuite::linalg::veg::uncvref_t< Mat > > >
void noalias_mul_add(Dst &&dst, Lhs const &lhs, Rhs const &rhs, T factor)
auto to_view_dyn_rows(Mat &&mat) noexcept -> Eigen::Map< _detail::const_if< _detail::ptr_is_const< decltype(mat.data())>::value, _detail::OwnedRows< proxsuite::linalg::veg::uncvref_t< Mat > > >, Eigen::Unaligned, _detail::StrideOf< proxsuite::linalg::veg::uncvref_t< Mat > > >
auto subcols(Mat &&mat, isize col_start, isize ncols) noexcept -> Eigen::Map< _detail::const_if< _detail::ptr_is_const< decltype(mat.data())>::value, _detail::OwnedCols< proxsuite::linalg::veg::uncvref_t< Mat > > >, Eigen::Unaligned, _detail::StrideOf< proxsuite::linalg::veg::uncvref_t< Mat > > >
auto diagonal(Mat &&mat) noexcept -> Eigen::Map< _detail::const_if< _detail::ptr_is_const< decltype(mat.data())>::value, Eigen::Matrix< typename proxsuite::linalg::veg::uncvref_t< Mat >::Scalar, Eigen::Dynamic, 1, Eigen::ColMajor > >, Eigen::Unaligned, Eigen::InnerStride< Eigen::Dynamic > >
auto row(T &&mat, isize row_idx) noexcept -> typename _detail::RowColAccessImpl< !bool(proxsuite::linalg::veg::uncvref_t< T >::IsRowMajor)>::template Row< T >
auto subrows(Mat &&mat, isize row_start, isize nrows) noexcept -> Eigen::Map< _detail::const_if< _detail::ptr_is_const< decltype(mat.data())>::value, _detail::OwnedRows< proxsuite::linalg::veg::uncvref_t< Mat > > >, Eigen::Unaligned, _detail::StrideOf< proxsuite::linalg::veg::uncvref_t< Mat > > >
auto submatrix(Mat &&mat, isize row_start, isize col_start, isize nrows, isize ncols) noexcept -> Eigen::Map< _detail::const_if< _detail::ptr_is_const< decltype(mat.data())>::value, _detail::OwnedMatrix< proxsuite::linalg::veg::uncvref_t< Mat > > >, Eigen::Unaligned, _detail::StrideOf< proxsuite::linalg::veg::uncvref_t< Mat > > >
auto elem_addr(T *ptr, isize row, isize col, isize outer_stride, isize inner_stride) noexcept -> T *
auto to_view_dyn(Mat &&mat) noexcept -> Eigen::Map< _detail::const_if< _detail::ptr_is_const< decltype(mat.data())>::value, _detail::OwnedMatrix< proxsuite::linalg::veg::uncvref_t< Mat > > >, Eigen::Unaligned, _detail::StrideOf< proxsuite::linalg::veg::uncvref_t< Mat > > >
auto to_view(Mat &&mat) noexcept -> Eigen::Map< _detail::const_if< _detail::ptr_is_const< decltype(mat.data())>::value, _detail::OwnedAll< proxsuite::linalg::veg::uncvref_t< Mat > > >, Eigen::Unaligned, _detail::StrideOf< proxsuite::linalg::veg::uncvref_t< Mat > > >
auto col(T &&mat, isize col_idx) noexcept -> typename _detail::RowColAccessImpl< !bool(proxsuite::linalg::veg::uncvref_t< T >::IsRowMajor)>::template Col< T >
auto to_view_dyn_cols(Mat &&mat) noexcept -> Eigen::Map< _detail::const_if< _detail::ptr_is_const< decltype(mat.data())>::value, _detail::OwnedCols< proxsuite::linalg::veg::uncvref_t< Mat > > >, Eigen::Unaligned, _detail::StrideOf< proxsuite::linalg::veg::uncvref_t< Mat > > >
auto temp_vec_req(proxsuite::linalg::veg::Tag< T >, isize rows) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
auto temp_mat_req(proxsuite::linalg::veg::Tag< T >, isize rows, isize cols) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
_detail::_meta::make_signed< usize >::Type isize
decltype(sizeof(0)) usize
static auto fn(T *ptr, isize row, isize col, isize outer_stride, isize inner_stride) noexcept -> T *
static auto fn(T *ptr, isize row, isize col, isize outer_stride, isize inner_stride) noexcept -> T *
NoCopy(NoCopy const &)=delete
auto operator=(NoCopy &&) -> NoCopy &=delete
auto operator=(NoCopy const &) -> NoCopy &=delete
static auto col(T &&mat, isize col_idx) noexcept -> Col< T >
Eigen::Map< const_if< ptr_is_const< decltype(VEG_DECLVAL(T &&).data())>::value, OwnedRowVector< uncvref_t< T > > >, Eigen::Unaligned, Eigen::InnerStride< uncvref_t< T >::InnerStrideAtCompileTime > > Row
static auto row(T &&mat, isize row_idx) noexcept -> Row< T >
Eigen::Map< const_if< ptr_is_const< decltype(VEG_DECLVAL(T &&).data())>::value, OwnedColVector< uncvref_t< T > > >, Eigen::Unaligned, Eigen::InnerStride< uncvref_t< T >::OuterStrideAtCompileTime > > Col
static auto col(T &&mat, isize col_idx) noexcept -> Col< T >
static auto row(T &&mat, isize row_idx) noexcept -> Row< T >
Eigen::Map< const_if< ptr_is_const< decltype(VEG_DECLVAL(T &&).data())>::value, OwnedColVector< uncvref_t< T > > >, Eigen::Unaligned, Eigen::InnerStride< uncvref_t< T >::InnerStrideAtCompileTime > > Col
Eigen::Map< const_if< ptr_is_const< decltype(VEG_DECLVAL(T &&).data())>::value, OwnedRowVector< uncvref_t< T > > >, Eigen::Unaligned, Eigen::InnerStride< uncvref_t< T >::OuterStrideAtCompileTime > > Row
VEG_INLINE void store_unaligned(ScalarType *ptr) const noexcept
static VEG_INLINE auto load_unaligned(ScalarType const *ptr) noexcept -> Pack
static VEG_INLINE auto fmadd(Pack a, Pack b, Pack c) noexcept -> Pack
static VEG_INLINE auto broadcast(ScalarType value) noexcept -> Pack
static VEG_INLINE auto fnmadd(Pack a, Pack b, Pack c) noexcept -> Pack
VEG_INLINE constexpr auto operator()(T const &a, T const &b) const -> T const &
VEG_INLINE constexpr auto operator()(T a, T b) const -> T