// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2009-2010 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H #define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H namespace Eigen { template struct selfadjoint_rank1_update; namespace internal { /********************************************************************** * This file implements a general A * B product while * evaluating only one triangular part of the product. * This is more general version of self adjoint product (C += A A^T) * as the level 3 SYRK Blas routine. **********************************************************************/ // forward declarations (defined at the end of this file) template struct tribb_kernel; /* Optimized matrix-matrix product evaluating only one triangular half */ template struct general_matrix_matrix_triangular_product; // as usual if the result is row major => we transpose the product template struct general_matrix_matrix_triangular_product { typedef typename scalar_product_traits::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha) { general_matrix_matrix_triangular_product ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha); } }; template struct general_matrix_matrix_triangular_product { typedef typename scalar_product_traits::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha) { const_blas_data_mapper lhs(_lhs,lhsStride); const_blas_data_mapper rhs(_rhs,rhsStride); typedef gebp_traits Traits; Index kc = depth; // cache block size along the K direction Index mc = size; // cache block size along the M direction Index nc = size; // cache block size along the N direction computeProductBlockingSizes(kc, mc, nc); // !!! mc must be a multiple of nr: if(mc > Traits::nr) mc = (mc/Traits::nr)*Traits::nr; std::size_t sizeW = kc*Traits::WorkSpaceFactor; std::size_t sizeB = sizeW + kc*size; ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0); ei_declare_aligned_stack_constructed_variable(RhsScalar, allocatedBlockB, sizeB, 0); RhsScalar* blockB = allocatedBlockB + sizeW; gemm_pack_lhs pack_lhs; gemm_pack_rhs pack_rhs; gebp_kernel gebp; tribb_kernel sybb; for(Index k2=0; k2 processed with gebp or skipped // 2 - the actual_mc x actual_mc symmetric block => processed with a special kernel // 3 - after the diagonal => processed with gebp or skipped if (UpLo==Lower) gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, (std::min)(size,i2), alpha, -1, -1, 0, 0, allocatedBlockB); sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB); if (UpLo==Upper) { Index j2 = i2+actual_mc; gebp(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, (std::max)(Index(0), size-j2), alpha, -1, -1, 0, 0, allocatedBlockB); } } } } }; // Optimized packed Block * packed Block product kernel evaluating only one given triangular part // This kernel is built on top of the gebp kernel: // - the current destination block is processed per panel of actual_mc x BlockSize // where BlockSize is set to the minimal value allowing gebp to be as fast as possible // - then, as usual, each panel is split into three parts along the diagonal, // the sub blocks above and below the diagonal are processed as usual, // while the triangular block overlapping the diagonal is evaluated into a // small temporary buffer which is then accumulated into the result using a // triangular traversal. template struct tribb_kernel { typedef gebp_traits Traits; typedef typename Traits::ResScalar ResScalar; enum { BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr) }; void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha, RhsScalar* workspace) { gebp_kernel gebp_kernel; Matrix buffer; // let's process the block per panel of actual_mc x BlockSize, // again, each is split into three parts, etc. for (Index j=0; j(BlockSize,size - j); const RhsScalar* actual_b = blockB+j*depth; if(UpLo==Upper) gebp_kernel(res+j*resStride, resStride, blockA, actual_b, j, depth, actualBlockSize, alpha, -1, -1, 0, 0, workspace); // selfadjoint micro block { Index i = j; buffer.setZero(); // 1 - apply the kernel on the temporary buffer gebp_kernel(buffer.data(), BlockSize, blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha, -1, -1, 0, 0, workspace); // 2 - triangular accumulation for(Index j1=0; j1 struct general_product_to_triangular_selector; template struct general_product_to_triangular_selector { static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha) { typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Index Index; typedef typename internal::remove_all::type Lhs; typedef internal::blas_traits LhsBlasTraits; typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs; typedef typename internal::remove_all::type _ActualLhs; typename internal::add_const_on_value_type::type actualLhs = LhsBlasTraits::extract(prod.lhs()); typedef typename internal::remove_all::type Rhs; typedef internal::blas_traits RhsBlasTraits; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs; typedef typename internal::remove_all::type _ActualRhs; typename internal::add_const_on_value_type::type actualRhs = RhsBlasTraits::extract(prod.rhs()); Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived()); enum { StorageOrder = (internal::traits::Flags&RowMajorBit) ? RowMajor : ColMajor, UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1, UseRhsDirectly = _ActualRhs::InnerStrideAtCompileTime==1 }; internal::gemv_static_vector_if static_lhs; ei_declare_aligned_stack_constructed_variable(Scalar, actualLhsPtr, actualLhs.size(), (UseLhsDirectly ? const_cast(actualLhs.data()) : static_lhs.data())); if(!UseLhsDirectly) Map(actualLhsPtr, actualLhs.size()) = actualLhs; internal::gemv_static_vector_if static_rhs; ei_declare_aligned_stack_constructed_variable(Scalar, actualRhsPtr, actualRhs.size(), (UseRhsDirectly ? const_cast(actualRhs.data()) : static_rhs.data())); if(!UseRhsDirectly) Map(actualRhsPtr, actualRhs.size()) = actualRhs; selfadjoint_rank1_update::IsComplex, RhsBlasTraits::NeedToConjugate && NumTraits::IsComplex> ::run(actualLhs.size(), mat.data(), mat.outerStride(), actualLhsPtr, actualRhsPtr, actualAlpha); } }; template struct general_product_to_triangular_selector { static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha) { typedef typename MatrixType::Index Index; typedef typename internal::remove_all::type Lhs; typedef internal::blas_traits LhsBlasTraits; typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs; typedef typename internal::remove_all::type _ActualLhs; typename internal::add_const_on_value_type::type actualLhs = LhsBlasTraits::extract(prod.lhs()); typedef typename internal::remove_all::type Rhs; typedef internal::blas_traits RhsBlasTraits; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs; typedef typename internal::remove_all::type _ActualRhs; typename internal::add_const_on_value_type::type actualRhs = RhsBlasTraits::extract(prod.rhs()); typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived()); internal::general_matrix_matrix_triangular_product ::run(mat.cols(), actualLhs.cols(), &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(), mat.data(), mat.outerStride(), actualAlpha); } }; template template TriangularView& TriangularView::assignProduct(const ProductBase& prod, const Scalar& alpha) { general_product_to_triangular_selector::run(m_matrix.const_cast_derived(), prod.derived(), alpha); return *this; } } // end namespace Eigen #endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H