// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2009-2010 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H #define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H namespace Eigen { template struct selfadjoint_rank1_update; namespace internal { /********************************************************************** * This file implements a general A * B product while * evaluating only one triangular part of the product. * This is a more general version of self adjoint product (C += A A^T) * as the level 3 SYRK Blas routine. **********************************************************************/ // forward declarations (defined at the end of this file) template struct tribb_kernel; /* Optimized matrix-matrix product evaluating only one triangular half */ template struct general_matrix_matrix_triangular_product; // as usual if the result is row major => we transpose the product template struct general_matrix_matrix_triangular_product { typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha, level3_blocking& blocking) { general_matrix_matrix_triangular_product ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking); } }; template struct general_matrix_matrix_triangular_product { typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride, const ResScalar& alpha, level3_blocking& blocking) { typedef gebp_traits Traits; typedef const_blas_data_mapper LhsMapper; typedef const_blas_data_mapper RhsMapper; typedef blas_data_mapper ResMapper; LhsMapper lhs(_lhs,lhsStride); RhsMapper rhs(_rhs,rhsStride); ResMapper res(_res, resStride); Index kc = blocking.kc(); Index mc = (std::min)(size,blocking.mc()); // !!! mc must be a multiple of nr: if(mc > Traits::nr) mc = (mc/Traits::nr)*Traits::nr; std::size_t sizeA = kc*mc; std::size_t sizeB = kc*size; ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA()); ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB()); gemm_pack_lhs pack_lhs; gemm_pack_rhs pack_rhs; gebp_kernel gebp; tribb_kernel sybb; for(Index k2=0; k2 processed with gebp or skipped // 2 - the actual_mc x actual_mc symmetric block => processed with a special kernel // 3 - after the diagonal => processed with gebp or skipped if (UpLo==Lower) gebp(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, (std::min)(size,i2), alpha, -1, -1, 0, 0); sybb(_res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha); if (UpLo==Upper) { Index j2 = i2+actual_mc; gebp(res.getSubMapper(i2, j2), blockA, blockB+actual_kc*j2, actual_mc, actual_kc, (std::max)(Index(0), size-j2), alpha, -1, -1, 0, 0); } } } } }; // Optimized packed Block * packed Block product kernel evaluating only one given triangular part // This kernel is built on top of the gebp kernel: // - the current destination block is processed per panel of actual_mc x BlockSize // where BlockSize is set to the minimal value allowing gebp to be as fast as possible // - then, as usual, each panel is split into three parts along the diagonal, // the sub blocks above and below the diagonal are processed as usual, // while the triangular block overlapping the diagonal is evaluated into a // small temporary buffer which is then accumulated into the result using a // triangular traversal. template struct tribb_kernel { typedef gebp_traits Traits; typedef typename Traits::ResScalar ResScalar; enum { BlockSize = meta_least_common_multiple::ret }; void operator()(ResScalar* _res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha) { typedef blas_data_mapper ResMapper; ResMapper res(_res, resStride); gebp_kernel gebp_kernel; Matrix buffer((internal::constructor_without_unaligned_array_assert())); // let's process the block per panel of actual_mc x BlockSize, // again, each is split into three parts, etc. for (Index j=0; j(BlockSize,size - j); const RhsScalar* actual_b = blockB+j*depth; if(UpLo==Upper) gebp_kernel(res.getSubMapper(0, j), blockA, actual_b, j, depth, actualBlockSize, alpha, -1, -1, 0, 0); // selfadjoint micro block { Index i = j; buffer.setZero(); // 1 - apply the kernel on the temporary buffer gebp_kernel(ResMapper(buffer.data(), BlockSize), blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha, -1, -1, 0, 0); // 2 - triangular accumulation for(Index j1=0; j1 struct general_product_to_triangular_selector; template struct general_product_to_triangular_selector { static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha, bool beta) { typedef typename MatrixType::Scalar Scalar; typedef typename internal::remove_all::type Lhs; typedef internal::blas_traits LhsBlasTraits; typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs; typedef typename internal::remove_all::type _ActualLhs; typename internal::add_const_on_value_type::type actualLhs = LhsBlasTraits::extract(prod.lhs()); typedef typename internal::remove_all::type Rhs; typedef internal::blas_traits RhsBlasTraits; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs; typedef typename internal::remove_all::type _ActualRhs; typename internal::add_const_on_value_type::type actualRhs = RhsBlasTraits::extract(prod.rhs()); Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived()); if(!beta) mat.template triangularView().setZero(); enum { StorageOrder = (internal::traits::Flags&RowMajorBit) ? RowMajor : ColMajor, UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1, UseRhsDirectly = _ActualRhs::InnerStrideAtCompileTime==1 }; internal::gemv_static_vector_if static_lhs; ei_declare_aligned_stack_constructed_variable(Scalar, actualLhsPtr, actualLhs.size(), (UseLhsDirectly ? const_cast(actualLhs.data()) : static_lhs.data())); if(!UseLhsDirectly) Map(actualLhsPtr, actualLhs.size()) = actualLhs; internal::gemv_static_vector_if static_rhs; ei_declare_aligned_stack_constructed_variable(Scalar, actualRhsPtr, actualRhs.size(), (UseRhsDirectly ? const_cast(actualRhs.data()) : static_rhs.data())); if(!UseRhsDirectly) Map(actualRhsPtr, actualRhs.size()) = actualRhs; selfadjoint_rank1_update::IsComplex, RhsBlasTraits::NeedToConjugate && NumTraits::IsComplex> ::run(actualLhs.size(), mat.data(), mat.outerStride(), actualLhsPtr, actualRhsPtr, actualAlpha); } }; template struct general_product_to_triangular_selector { static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha, bool beta) { typedef typename internal::remove_all::type Lhs; typedef internal::blas_traits LhsBlasTraits; typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs; typedef typename internal::remove_all::type _ActualLhs; typename internal::add_const_on_value_type::type actualLhs = LhsBlasTraits::extract(prod.lhs()); typedef typename internal::remove_all::type Rhs; typedef internal::blas_traits RhsBlasTraits; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs; typedef typename internal::remove_all::type _ActualRhs; typename internal::add_const_on_value_type::type actualRhs = RhsBlasTraits::extract(prod.rhs()); typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived()); if(!beta) mat.template triangularView().setZero(); enum { IsRowMajor = (internal::traits::Flags&RowMajorBit) ? 1 : 0, LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0, RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0, SkipDiag = (UpLo&(UnitDiag|ZeroDiag))!=0 }; Index size = mat.cols(); if(SkipDiag) size--; Index depth = actualLhs.cols(); typedef internal::gemm_blocking_space BlockingType; BlockingType blocking(size, size, depth, 1, false); internal::general_matrix_matrix_triangular_product ::run(size, depth, &actualLhs.coeffRef(SkipDiag&&(UpLo&Lower)==Lower ? 1 : 0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,SkipDiag&&(UpLo&Upper)==Upper ? 1 : 0), actualRhs.outerStride(), mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? 1 : mat.outerStride() ) : 0), mat.outerStride(), actualAlpha, blocking); } }; template template EIGEN_DEVICE_FUNC TriangularView& TriangularViewImpl::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta) { EIGEN_STATIC_ASSERT((UpLo&UnitDiag)==0, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED); eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols()); general_product_to_triangular_selector::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta); return derived(); } } // end namespace Eigen #endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H