5#ifndef PROXSUITE_LINALG_DENSE_LDLT_CORE_HPP
6#define PROXSUITE_LINALG_DENSE_LDLT_CORE_HPP
12#if !(defined(__aarch64__) || defined(__PPC64__) || defined(__ppc64__) || \
17#ifdef PROXSUITE_VECTORIZE
19#include <simde/x86/avx2.h>
20#include <simde/x86/fma.h>
25#define LDLT_ID(id) __VEG_PP_CAT(id, __LINE__)
27#define __LDLT_TEMP_VEC_IMPL(Type, Name, Rows, Stack, Make) \
28 auto LDLT_ID(vec_storage) = (Stack).Make( \
29 ::proxsuite::linalg::veg::Tag<__VEG_PP_REMOVE_PAREN(Type)>{}, \
31 ::proxsuite::linalg::dense::_detail::align<__VEG_PP_REMOVE_PAREN( \
33 auto Name = ::Eigen::Map< \
34 ::Eigen::Matrix<__VEG_PP_REMOVE_PAREN(Type), ::Eigen::Dynamic, 1>, \
36 ::Eigen::Stride<::Eigen::Dynamic, 1>>{ \
37 LDLT_ID(vec_storage).ptr_mut(), \
38 LDLT_ID(vec_storage).len(), \
39 ::Eigen::Stride<::Eigen::Dynamic, 1>{ \
40 LDLT_ID(vec_storage).len(), \
44 static_assert(true, ".")
46#define __LDLT_TEMP_MAT_IMPL(Type, Name, Rows, Cols, Stack, Make) \
47 ::proxsuite::linalg::veg::isize LDLT_ID(rows) = (Rows); \
48 ::proxsuite::linalg::veg::isize LDLT_ID(cols) = (Cols); \
49 ::proxsuite::linalg::veg::isize LDLT_ID(stride) = \
50 ::proxsuite::linalg::dense::_detail::adjusted_stride< \
51 __VEG_PP_REMOVE_PAREN(Type)>(LDLT_ID(rows)); \
52 auto LDLT_ID(vec_storage) = (Stack).Make( \
53 ::proxsuite::linalg::veg::Tag<__VEG_PP_REMOVE_PAREN(Type)>{}, \
54 LDLT_ID(stride) * LDLT_ID(cols), \
55 ::proxsuite::linalg::dense::_detail::align<__VEG_PP_REMOVE_PAREN( \
58 ::Eigen::Map<::Eigen::Matrix<__VEG_PP_REMOVE_PAREN(Type), \
63 ::Eigen::Stride<::Eigen::Dynamic, 1>>{ \
64 LDLT_ID(vec_storage).ptr_mut(), \
67 ::Eigen::Stride<::Eigen::Dynamic, 1>{ \
72 static_assert(true, ".")
74#define LDLT_TEMP_VEC(Type, Name, Rows, Stack) \
75 __LDLT_TEMP_VEC_IMPL(Type, Name, Rows, Stack, make_new)
76#define LDLT_TEMP_VEC_UNINIT(Type, Name, Rows, Stack) \
77 __LDLT_TEMP_VEC_IMPL(Type, Name, Rows, Stack, make_new_for_overwrite)
79#define LDLT_TEMP_MAT(Type, Name, Rows, Cols, Stack) \
80 __LDLT_TEMP_MAT_IMPL(Type, Name, Rows, Cols, Stack, make_new)
81#define LDLT_TEMP_MAT_UNINIT(Type, Name, Rows, Cols, Stack) \
82 __LDLT_TEMP_MAT_IMPL(Type, Name, Rows, Cols, Stack, make_new_for_overwrite)
98#define DENSE_LDLT_FP_PRAGMA _Pragma("STDC FP_CONTRACT ON")
100#define DENSE_LDLT_FP_PRAGMA
103static_assert(
static_cast<unsigned char>(-1) == 255,
"char should have 8 bits");
104static_assert(
sizeof(
f32) == 4,
"f32 should be 32 bits");
105static_assert(
sizeof(
f64) == 8,
"f64 should be 64 bits");
107#define LDLT_FN_IMPL3(Fn, Prefix, Suffix) \
108 VEG_INLINE static auto Fn(Pack a, Pack b, Pack c) noexcept -> Pack \
110 return Pack{ simde_mm##Prefix##_##Fn##_##Suffix( \
111 a.inner, b.inner, c.inner) }; \
115#define LDLT_ARITHMETIC_IMPL(Prefix, Suffix) \
116 LDLT_FN_IMPL3(fmadd, Prefix, Suffix); \
117 LDLT_FN_IMPL3(fnmadd, Prefix, Suffix);
119#define LDLT_LOAD_STORE(Prefix, Suffix) \
120 VEG_INLINE static auto load_unaligned( \
121 ScalarType const* ptr) noexcept -> Pack \
123 return Pack{ simde_mm##Prefix##_loadu_##Suffix(ptr) }; \
125 VEG_INLINE static auto broadcast(ScalarType value) noexcept -> Pack \
127 return Pack{ simde_mm##Prefix##_set1_##Suffix(value) }; \
129 VEG_INLINE void store_unaligned(ScalarType* ptr) const noexcept \
131 simde_mm##Prefix##_storeu_##Suffix(ptr, inner); \
135template<
typename T, usize N>
148 return { a.inner * b.inner + c.inner };
152 return fmadd({ -a.inner }, b, c);
168#ifdef PROXSUITE_VECTORIZE
172 using ScalarType =
f32;
182 using ScalarType =
f32;
193 using ScalarType =
f32;
196 VEG_INLINE static auto fmadd(Pack a, Pack b, Pack c)
noexcept -> Pack
199 return { _mm512_fmadd_ps(a.inner, b.inner, c.inner) };
201 VEG_INLINE static auto fnmadd(Pack a, Pack b, Pack c)
noexcept -> Pack
203 return { _mm512_fnmadd_ps(a.inner, b.inner, c.inner) };
205 VEG_INLINE static auto load_unaligned(ScalarType
const* ptr)
noexcept -> Pack
207 return { _mm512_loadu_ps(ptr) };
209 VEG_INLINE static auto broadcast(ScalarType value)
noexcept -> Pack
211 return { _mm512_set1_ps(value) };
213 VEG_INLINE void store_unaligned(ScalarType* ptr)
const noexcept
215 _mm512_storeu_ps(ptr, inner);
223 using ScalarType =
f64;
232 using ScalarType =
f64;
243 using ScalarType =
f64;
246 VEG_INLINE static auto fmadd(Pack a, Pack b, Pack c)
noexcept -> Pack
249 return { _mm512_fmadd_pd(a.inner, b.inner, c.inner) };
251 VEG_INLINE static auto fnmadd(Pack a, Pack b, Pack c)
noexcept -> Pack
253 return { _mm512_fnmadd_pd(a.inner, b.inner, c.inner) };
255 VEG_INLINE static auto load_unaligned(ScalarType
const* ptr)
noexcept -> Pack
257 return { _mm512_loadu_pd(ptr) };
259 VEG_INLINE static auto broadcast(ScalarType value)
noexcept -> Pack
261 return { _mm512_set1_pd(value) };
263 VEG_INLINE void store_unaligned(ScalarType* ptr)
const noexcept
265 _mm512_storeu_pd(ptr, inner);
275 static constexpr usize
N = 1;
279#ifdef PROXSUITE_VECTORIZE
283 static constexpr usize
N = SIMDE_NATURAL_VECTOR_SIZE / 32;
287struct NativePackInfo<
f64>
289 static constexpr usize
N = SIMDE_NATURAL_VECTOR_SIZE / 64;
290 using Type = Pack<f64, N>;
300using proxsuite::linalg::veg::uncvref_t;
301template<
bool COND,
typename T>
312 return a + (b - 1) / b * b;
315#ifdef PROXSUITE_VECTORIZE
329#ifndef SIMDE_NATURAL_FLOAT_VECTOR_SIZE
330 isize simd_stride = 1;
333 (SIMDE_NATURAL_VECTOR_SIZE / CHAR_BIT) / isize{
sizeof(T) };
342 return isize{
alignof(T) } * _detail::adjusted_stride<T>(1);
362 return a > b ? a : b;
370 return (a < b) ? a : b;
381 for (usize i = 0; i < n; ++i) {
388 Eigen::Matrix<
typename T::Scalar,
391 bool(T::IsRowMajor) ? Eigen::RowMajor : Eigen::ColMajor>;
395 Eigen::Matrix<
typename T::Scalar,
396 T::RowsAtCompileTime,
397 T::ColsAtCompileTime,
398 bool(T::IsRowMajor) ? Eigen::RowMajor : Eigen::ColMajor>;
402 Eigen::Matrix<
typename T::Scalar,
404 T::ColsAtCompileTime,
405 bool(T::IsRowMajor) ? Eigen::RowMajor : Eigen::ColMajor>;
409 Eigen::Matrix<
typename T::Scalar,
410 T::RowsAtCompileTime,
412 bool(T::IsRowMajor) ? Eigen::RowMajor : Eigen::ColMajor>;
427template<
bool ROWMAJOR>
434 static auto fn(T* ptr,
438 isize inner_stride)
noexcept -> T*
440 return ptr + (inner_stride * row + outer_stride * col);
447 static auto fn(T* ptr,
451 isize inner_stride)
noexcept -> T*
453 return ptr + (outer_stride * row + inner_stride * col);
457template<
bool COLMAJOR>
468 Eigen::InnerStride<uncvref_t<T>::InnerStrideAtCompileTime>>;
474 Eigen::InnerStride<uncvref_t<T>::OuterStrideAtCompileTime>>;
477 static auto col(T&& mat, isize col_idx)
noexcept ->
Col<T>
480 mat.data() + col_idx * mat.outerStride(),
483 Eigen::InnerStride<uncvref_t<T>::InnerStrideAtCompileTime>{
489 static auto row(T&& mat, isize row_idx)
noexcept ->
Row<T>
492 mat.data() + row_idx * mat.innerStride(),
495 Eigen::InnerStride<uncvref_t<T>::OuterStrideAtCompileTime>{
509 Eigen::InnerStride<uncvref_t<T>::OuterStrideAtCompileTime>>;
515 Eigen::InnerStride<uncvref_t<T>::InnerStrideAtCompileTime>>;
518 static auto col(T&& mat, isize col_idx)
noexcept ->
Col<T>
521 mat.data() + col_idx * mat.innerStride(),
524 Eigen::InnerStride<uncvref_t<T>::OuterStrideAtCompileTime>{
530 static auto row(T&& mat, isize row_idx)
noexcept ->
Row<T>
533 mat.data() + row_idx * mat.outerStride(),
536 Eigen::InnerStride<uncvref_t<T>::InnerStrideAtCompileTime>{
544 T::OuterStrideAtCompileTime,
545 T::InnerStrideAtCompileTime>;
549template<
bool COLMAJOR,
typename T>
555 isize inner_stride)
noexcept -> T*
558 ptr,
row,
col, outer_stride, inner_stride);
561template<
typename Mat>
565 isize
col)
noexcept ->
decltype(mat.data())
568 proxsuite::linalg::veg::uncvref_t<Mat>::IsRowMajor)>(
578col(T&& mat, isize col_idx)
noexcept ->
580 !bool(proxsuite::linalg::veg::uncvref_t<T>::IsRowMajor)>::template Col<T>
583 proxsuite::linalg::veg::uncvref_t<T>::IsRowMajor)>
::col(mat, col_idx);
587row(T&& mat, isize row_idx)
noexcept ->
589 !bool(proxsuite::linalg::veg::uncvref_t<T>::IsRowMajor)>::template Row<T>
592 proxsuite::linalg::veg::uncvref_t<T>::IsRowMajor)>
::row(mat, row_idx);
595template<
typename Mat>
602 typename proxsuite::linalg::veg::uncvref_t<Mat>::Scalar,
603 proxsuite::linalg::veg::uncvref_t<Mat>::ColsAtCompileTime,
604 proxsuite::linalg::veg::uncvref_t<Mat>::RowsAtCompileTime,
605 bool(proxsuite::linalg::veg::uncvref_t<Mat>::IsRowMajor)
622template<
typename Mat>
628 typename proxsuite::linalg::veg::uncvref_t<Mat>::Scalar,
633 Eigen::InnerStride<Eigen::Dynamic>>
636 mat.rows() == mat.cols());
640 Eigen::InnerStride<Eigen::Dynamic>{ mat.outerStride() + 1 } };
643template<
typename Mat>
649 isize ncols)
noexcept
657 util::elem_addr<!bool(proxsuite::linalg::veg::uncvref_t<Mat>::IsRowMajor)>(
658 mat.data(), row_start, col_start, mat.outerStride(), mat.innerStride()),
668template<
typename Mat>
688template<
typename Mat>
708template<
typename Mat>
728template<
typename Mat>
748template<
typename Mat>
750subrows(Mat&& mat, isize row_start, isize nrows)
noexcept
758 util::elem_addr<!bool(proxsuite::linalg::veg::uncvref_t<Mat>::IsRowMajor)>(
759 mat.data(), row_start, 0, mat.outerStride(), mat.innerStride()),
769template<
typename Mat>
771subcols(Mat&& mat, isize col_start, isize ncols)
noexcept
779 util::elem_addr<!bool(proxsuite::linalg::veg::uncvref_t<Mat>::IsRowMajor)>(
780 mat.data(), 0, col_start, mat.outerStride(), mat.innerStride()),
792template<
typename Dst,
typename Lhs,
typename Rhs,
typename T>
797 dst.cols() == rhs.cols(),
798 lhs.cols() == rhs.rows());
800 isize nrows = dst.rows();
801 isize ncols = dst.cols();
802 isize depth = lhs.cols();
804 if (nrows == 0 || ncols == 0 || depth == 0) {
808#if !EIGEN_VERSION_AT_LEAST(3, 3, 8)
809#define LAZY_PRODUCT(a, b) a.lazyProduct(b)
811#define LAZY_PRODUCT(a, b) a.operator*(b)
814#if !EIGEN_VERSION_AT_LEAST(3, 3, 8)
815 if ((dst.rows() < 20) && (dst.cols() < 20) && (rhs.rows() < 20)) {
820 Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor, 20, 20>;
822 Eigen::Map<Mat, Eigen::Unaligned, Eigen::OuterStride<Eigen::Dynamic>>;
825 MapMut(dst.data(), dst.rows(), dst.cols(), { dst.outerStride() });
826 dst_.noalias().operator+=(factor *
LAZY_PRODUCT(lhs, rhs));
830 dst.noalias().operator+=(factor *
LAZY_PRODUCT(lhs, rhs));
837template<
typename Dst,
typename Lhs,
typename Rhs,
typename T>
855 _detail::adjusted_stride<T>(rows) * cols * isize{
sizeof(T) },
866 rows * isize{
sizeof(T) },
#define VEG_DEBUG_ASSERT(...)
#define VEG_ASSERT_ALL_OF(...)
#define DENSE_LDLT_FP_PRAGMA
#define LAZY_PRODUCT(a, b)
#define LDLT_LOAD_STORE(Prefix, Suffix)
#define LDLT_ARITHMETIC_IMPL(Prefix, Suffix)
#define VEG_NIEBLOID(Name)
typename NativePackInfo< T >::Type NativePack
Eigen::Matrix< typename T::Scalar, Eigen::Dynamic, Eigen::Dynamic, bool(T::IsRowMajor) ? Eigen::RowMajor :Eigen::ColMajor > OwnedMatrix
constexpr auto round_up(T a, T b) noexcept -> T
void set_zero(T *dest, usize n)
void noalias_mul_add_impl(Dst dst, Lhs lhs, Rhs rhs, T factor)
auto adjusted_stride(isize n) noexcept -> isize
Eigen::Matrix< typename T::Scalar, Eigen::Dynamic, 1, Eigen::ColMajor > OwnedColVector
Eigen::Matrix< typename T::Scalar, T::RowsAtCompileTime, Eigen::Dynamic, bool(T::IsRowMajor) ? Eigen::RowMajor :Eigen::ColMajor > OwnedCols
Eigen::Matrix< typename T::Scalar, Eigen::Dynamic, T::ColsAtCompileTime, bool(T::IsRowMajor) ? Eigen::RowMajor :Eigen::ColMajor > OwnedRows
Eigen::Stride< T::OuterStrideAtCompileTime, T::InnerStrideAtCompileTime > StrideOf
proxsuite::linalg::veg::meta::if_t< COND, T const, T > const_if
Eigen::Matrix< typename T::Scalar, 1, Eigen::Dynamic, Eigen::RowMajor > OwnedRowVector
Eigen::Matrix< typename T::Scalar, T::RowsAtCompileTime, T::ColsAtCompileTime, bool(T::IsRowMajor) ? Eigen::RowMajor :Eigen::ColMajor > OwnedAll
proxsuite::linalg::veg::meta::bool_constant< false > should_vectorize
auto align() noexcept -> isize
auto matrix_elem_addr(Mat &&mat, isize row, isize col) noexcept -> decltype(mat.data())
auto trans(Mat &&mat) noexcept -> Eigen::Map< _detail::const_if< _detail::ptr_is_const< decltype(mat.data())>::value, Eigen::Matrix< typename proxsuite::linalg::veg::uncvref_t< Mat >::Scalar, proxsuite::linalg::veg::uncvref_t< Mat >::ColsAtCompileTime, proxsuite::linalg::veg::uncvref_t< Mat >::RowsAtCompileTime, bool(proxsuite::linalg::veg::uncvref_t< Mat >::IsRowMajor) ? Eigen::ColMajor :Eigen::RowMajor > >, Eigen::Unaligned, _detail::StrideOf< proxsuite::linalg::veg::uncvref_t< Mat > > >
void noalias_mul_add(Dst &&dst, Lhs const &lhs, Rhs const &rhs, T factor)
auto to_view_dyn_rows(Mat &&mat) noexcept -> Eigen::Map< _detail::const_if< _detail::ptr_is_const< decltype(mat.data())>::value, _detail::OwnedRows< proxsuite::linalg::veg::uncvref_t< Mat > > >, Eigen::Unaligned, _detail::StrideOf< proxsuite::linalg::veg::uncvref_t< Mat > > >
auto subcols(Mat &&mat, isize col_start, isize ncols) noexcept -> Eigen::Map< _detail::const_if< _detail::ptr_is_const< decltype(mat.data())>::value, _detail::OwnedCols< proxsuite::linalg::veg::uncvref_t< Mat > > >, Eigen::Unaligned, _detail::StrideOf< proxsuite::linalg::veg::uncvref_t< Mat > > >
auto diagonal(Mat &&mat) noexcept -> Eigen::Map< _detail::const_if< _detail::ptr_is_const< decltype(mat.data())>::value, Eigen::Matrix< typename proxsuite::linalg::veg::uncvref_t< Mat >::Scalar, Eigen::Dynamic, 1, Eigen::ColMajor > >, Eigen::Unaligned, Eigen::InnerStride< Eigen::Dynamic > >
auto row(T &&mat, isize row_idx) noexcept -> typename _detail::RowColAccessImpl< !bool(proxsuite::linalg::veg::uncvref_t< T >::IsRowMajor)>::template Row< T >
auto subrows(Mat &&mat, isize row_start, isize nrows) noexcept -> Eigen::Map< _detail::const_if< _detail::ptr_is_const< decltype(mat.data())>::value, _detail::OwnedRows< proxsuite::linalg::veg::uncvref_t< Mat > > >, Eigen::Unaligned, _detail::StrideOf< proxsuite::linalg::veg::uncvref_t< Mat > > >
auto submatrix(Mat &&mat, isize row_start, isize col_start, isize nrows, isize ncols) noexcept -> Eigen::Map< _detail::const_if< _detail::ptr_is_const< decltype(mat.data())>::value, _detail::OwnedMatrix< proxsuite::linalg::veg::uncvref_t< Mat > > >, Eigen::Unaligned, _detail::StrideOf< proxsuite::linalg::veg::uncvref_t< Mat > > >
auto elem_addr(T *ptr, isize row, isize col, isize outer_stride, isize inner_stride) noexcept -> T *
auto to_view_dyn(Mat &&mat) noexcept -> Eigen::Map< _detail::const_if< _detail::ptr_is_const< decltype(mat.data())>::value, _detail::OwnedMatrix< proxsuite::linalg::veg::uncvref_t< Mat > > >, Eigen::Unaligned, _detail::StrideOf< proxsuite::linalg::veg::uncvref_t< Mat > > >
auto to_view(Mat &&mat) noexcept -> Eigen::Map< _detail::const_if< _detail::ptr_is_const< decltype(mat.data())>::value, _detail::OwnedAll< proxsuite::linalg::veg::uncvref_t< Mat > > >, Eigen::Unaligned, _detail::StrideOf< proxsuite::linalg::veg::uncvref_t< Mat > > >
auto col(T &&mat, isize col_idx) noexcept -> typename _detail::RowColAccessImpl< !bool(proxsuite::linalg::veg::uncvref_t< T >::IsRowMajor)>::template Col< T >
auto to_view_dyn_cols(Mat &&mat) noexcept -> Eigen::Map< _detail::const_if< _detail::ptr_is_const< decltype(mat.data())>::value, _detail::OwnedCols< proxsuite::linalg::veg::uncvref_t< Mat > > >, Eigen::Unaligned, _detail::StrideOf< proxsuite::linalg::veg::uncvref_t< Mat > > >
auto temp_vec_req(proxsuite::linalg::veg::Tag< T >, isize rows) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
auto temp_mat_req(proxsuite::linalg::veg::Tag< T >, isize rows, isize cols) noexcept -> proxsuite::linalg::veg::dynstack::StackReq
_detail::_meta::make_signed< usize >::Type isize
decltype(sizeof(0)) usize
static auto fn(T *ptr, isize row, isize col, isize outer_stride, isize inner_stride) noexcept -> T *
static auto fn(T *ptr, isize row, isize col, isize outer_stride, isize inner_stride) noexcept -> T *
NoCopy(NoCopy const &)=delete
auto operator=(NoCopy &&) -> NoCopy &=delete
auto operator=(NoCopy const &) -> NoCopy &=delete
static auto col(T &&mat, isize col_idx) noexcept -> Col< T >
Eigen::Map< const_if< ptr_is_const< decltype(VEG_DECLVAL(T &&).data())>::value, OwnedRowVector< uncvref_t< T > > >, Eigen::Unaligned, Eigen::InnerStride< uncvref_t< T >::InnerStrideAtCompileTime > > Row
static auto row(T &&mat, isize row_idx) noexcept -> Row< T >
Eigen::Map< const_if< ptr_is_const< decltype(VEG_DECLVAL(T &&).data())>::value, OwnedColVector< uncvref_t< T > > >, Eigen::Unaligned, Eigen::InnerStride< uncvref_t< T >::OuterStrideAtCompileTime > > Col
static auto col(T &&mat, isize col_idx) noexcept -> Col< T >
static auto row(T &&mat, isize row_idx) noexcept -> Row< T >
Eigen::Map< const_if< ptr_is_const< decltype(VEG_DECLVAL(T &&).data())>::value, OwnedColVector< uncvref_t< T > > >, Eigen::Unaligned, Eigen::InnerStride< uncvref_t< T >::InnerStrideAtCompileTime > > Col
Eigen::Map< const_if< ptr_is_const< decltype(VEG_DECLVAL(T &&).data())>::value, OwnedRowVector< uncvref_t< T > > >, Eigen::Unaligned, Eigen::InnerStride< uncvref_t< T >::OuterStrideAtCompileTime > > Row
VEG_INLINE void store_unaligned(ScalarType *ptr) const noexcept
static VEG_INLINE auto load_unaligned(ScalarType const *ptr) noexcept -> Pack
static VEG_INLINE auto fmadd(Pack a, Pack b, Pack c) noexcept -> Pack
static VEG_INLINE auto broadcast(ScalarType value) noexcept -> Pack
static VEG_INLINE auto fnmadd(Pack a, Pack b, Pack c) noexcept -> Pack
VEG_INLINE constexpr auto operator()(T const &a, T const &b) const -> T const &
VEG_INLINE constexpr auto operator()(T a, T b) const -> T