// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2008-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H #define EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H namespace Eigen { namespace internal { template static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, bool sortedInsertion = false) { typedef typename remove_all::type::Scalar LhsScalar; typedef typename remove_all::type::Scalar RhsScalar; typedef typename remove_all::type::Scalar ResScalar; // make sure to call innerSize/outerSize since we fake the storage order. Index rows = lhs.innerSize(); Index cols = rhs.outerSize(); eigen_assert(lhs.outerSize() == rhs.innerSize()); ei_declare_aligned_stack_constructed_variable(bool, mask, rows, 0); ei_declare_aligned_stack_constructed_variable(ResScalar, values, rows, 0); ei_declare_aligned_stack_constructed_variable(Index, indices, rows, 0); std::memset(mask,0,sizeof(bool)*rows); evaluator lhsEval(lhs); evaluator rhsEval(rhs); // estimate the number of non zero entries // given a rhs column containing Y non zeros, we assume that the respective Y columns // of the lhs differs in average of one non zeros, thus the number of non zeros for // the product of a rhs column with the lhs is X+Y where X is the average number of non zero // per column of the lhs. // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs) Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate(); res.setZero(); res.reserve(Index(estimated_nnz_prod)); // we compute each column of the result, one after the other for (Index j=0; j::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) { RhsScalar y = rhsIt.value(); Index k = rhsIt.index(); for (typename evaluator::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) { Index i = lhsIt.index(); LhsScalar x = lhsIt.value(); if(!mask[i]) { mask[i] = true; values[i] = x * y; indices[nnz] = i; ++nnz; } else values[i] += x * y; } } if(!sortedInsertion) { // unordered insertion for(Index k=0; k use a quick sort // otherwise => loop through the entire vector // In order to avoid to perform an expensive log2 when the // result is clearly very sparse we use a linear bound up to 200. if((nnz<200 && nnz1) std::sort(indices,indices+nnz); for(Index k=0; k::Flags&RowMajorBit) ? RowMajor : ColMajor, int RhsStorageOrder = (traits::Flags&RowMajorBit) ? RowMajor : ColMajor, int ResStorageOrder = (traits::Flags&RowMajorBit) ? RowMajor : ColMajor> struct conservative_sparse_sparse_product_selector; template struct conservative_sparse_sparse_product_selector { typedef typename remove_all::type LhsCleaned; typedef typename LhsCleaned::Scalar Scalar; static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { typedef SparseMatrix RowMajorMatrix; typedef SparseMatrix ColMajorMatrixAux; typedef typename sparse_eval::type ColMajorMatrix; // If the result is tall and thin (in the extreme case a column vector) // then it is faster to sort the coefficients inplace instead of transposing twice. // FIXME, the following heuristic is probably not very good. if(lhs.rows()>rhs.cols()) { ColMajorMatrix resCol(lhs.rows(),rhs.cols()); // perform sorted insertion internal::conservative_sparse_sparse_product_impl(lhs, rhs, resCol, true); res = resCol.markAsRValue(); } else { ColMajorMatrixAux resCol(lhs.rows(),rhs.cols()); // ressort to transpose to sort the entries internal::conservative_sparse_sparse_product_impl(lhs, rhs, resCol, false); RowMajorMatrix resRow(resCol); res = resRow.markAsRValue(); } } }; template struct conservative_sparse_sparse_product_selector { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { typedef SparseMatrix RowMajorRhs; typedef SparseMatrix RowMajorRes; RowMajorRhs rhsRow = rhs; RowMajorRes resRow(lhs.rows(), rhs.cols()); internal::conservative_sparse_sparse_product_impl(rhsRow, lhs, resRow); res = resRow; } }; template struct conservative_sparse_sparse_product_selector { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { typedef SparseMatrix RowMajorLhs; typedef SparseMatrix RowMajorRes; RowMajorLhs lhsRow = lhs; RowMajorRes resRow(lhs.rows(), rhs.cols()); internal::conservative_sparse_sparse_product_impl(rhs, lhsRow, resRow); res = resRow; } }; template struct conservative_sparse_sparse_product_selector { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { typedef SparseMatrix RowMajorMatrix; RowMajorMatrix resRow(lhs.rows(), rhs.cols()); internal::conservative_sparse_sparse_product_impl(rhs, lhs, resRow); res = resRow; } }; template struct conservative_sparse_sparse_product_selector { typedef typename traits::type>::Scalar Scalar; static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { typedef SparseMatrix ColMajorMatrix; ColMajorMatrix resCol(lhs.rows(), rhs.cols()); internal::conservative_sparse_sparse_product_impl(lhs, rhs, resCol); res = resCol; } }; template struct conservative_sparse_sparse_product_selector { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { typedef SparseMatrix ColMajorLhs; typedef SparseMatrix ColMajorRes; ColMajorLhs lhsCol = lhs; ColMajorRes resCol(lhs.rows(), rhs.cols()); internal::conservative_sparse_sparse_product_impl(lhsCol, rhs, resCol); res = resCol; } }; template struct conservative_sparse_sparse_product_selector { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { typedef SparseMatrix ColMajorRhs; typedef SparseMatrix ColMajorRes; ColMajorRhs rhsCol = rhs; ColMajorRes resCol(lhs.rows(), rhs.cols()); internal::conservative_sparse_sparse_product_impl(lhs, rhsCol, resCol); res = resCol; } }; template struct conservative_sparse_sparse_product_selector { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { typedef SparseMatrix RowMajorMatrix; typedef SparseMatrix ColMajorMatrix; RowMajorMatrix resRow(lhs.rows(),rhs.cols()); internal::conservative_sparse_sparse_product_impl(rhs, lhs, resRow); // sort the non zeros: ColMajorMatrix resCol(resRow); res = resCol; } }; } // end namespace internal namespace internal { template static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res) { typedef typename remove_all::type::Scalar LhsScalar; typedef typename remove_all::type::Scalar RhsScalar; Index cols = rhs.outerSize(); eigen_assert(lhs.outerSize() == rhs.innerSize()); evaluator lhsEval(lhs); evaluator rhsEval(rhs); for (Index j=0; j::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) { RhsScalar y = rhsIt.value(); Index k = rhsIt.index(); for (typename evaluator::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) { Index i = lhsIt.index(); LhsScalar x = lhsIt.value(); res.coeffRef(i,j) += x * y; } } } } } // end namespace internal namespace internal { template::Flags&RowMajorBit) ? RowMajor : ColMajor, int RhsStorageOrder = (traits::Flags&RowMajorBit) ? RowMajor : ColMajor> struct sparse_sparse_to_dense_product_selector; template struct sparse_sparse_to_dense_product_selector { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { internal::sparse_sparse_to_dense_product_impl(lhs, rhs, res); } }; template struct sparse_sparse_to_dense_product_selector { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { typedef SparseMatrix ColMajorLhs; ColMajorLhs lhsCol(lhs); internal::sparse_sparse_to_dense_product_impl(lhsCol, rhs, res); } }; template struct sparse_sparse_to_dense_product_selector { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { typedef SparseMatrix ColMajorRhs; ColMajorRhs rhsCol(rhs); internal::sparse_sparse_to_dense_product_impl(lhs, rhsCol, res); } }; template struct sparse_sparse_to_dense_product_selector { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { Transpose trRes(res); internal::sparse_sparse_to_dense_product_impl >(rhs, lhs, trRes); } }; } // end namespace internal } // end namespace Eigen #endif // EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H